Browse Source

Fix BBRegressor: Apply modulation before PrRoIPool in predict_iou

resnet
mht 2 months ago
parent
commit
8ed90f273e
  1. 126
      cimp/bb_regressor/bb_regressor.cpp

126
cimp/bb_regressor/bb_regressor.cpp

@ -647,19 +647,56 @@ torch::Tensor BBRegressor::predict_iou(std::vector<torch::Tensor> modulation,
auto feat_device = feat[0].device();
roi = roi.to(feat_device);
// Apply ROI pooling to get features for each proposal
auto pooled_feat1 = prroi_pool3t->forward(feat[0], roi); // Output: [batch_size * num_proposals, C, 5, 5]
auto pooled_feat2 = prroi_pool4t->forward(feat[1], roi); // Output: [batch_size * num_proposals, C, 3, 3]
// Apply modulation vectors BEFORE PrRoIPooling
auto mod0_4d = modulation[0].to(feat_device);
auto mod1_4d = modulation[1].to(feat_device);
if (mod0_4d.dim() == 2) {
mod0_4d = mod0_4d.reshape({mod0_4d.size(0), mod0_4d.size(1), 1, 1});
}
if (mod1_4d.dim() == 2) {
mod1_4d = mod1_4d.reshape({mod1_4d.size(0), mod1_4d.size(1), 1, 1});
}
std::cout << " Pooled shapes:" << std::endl;
std::cout << " pooled_feat1 (from prroi_pool3t on feat[0]): [" << pooled_feat1.sizes() << "] dev: " << pooled_feat1.device() << std::endl;
std::cout << " pooled_feat2 (from prroi_pool4t on feat[1]): [" << pooled_feat2.sizes() << "] dev: " << pooled_feat2.device() << std::endl;
// Ensure modulation vectors are broadcastable with features
// Features (feat[0], feat[1]) are [batch_size, channels, H, W]
// Modulation (mod0_4d, mod1_4d) should be [batch_size, channels, 1, 1]
// If num_proposals > 1, the pooling happens on features that are effectively repeated.
// The modulation is per-image, not per-proposal before pooling.
torch::Tensor modulated_feat0 = feat[0] * mod0_4d;
torch::Tensor modulated_feat1 = feat[1] * mod1_4d;
// Apply ROI pooling to get features for each proposal from MODULATED features
auto pooled_feat1 = prroi_pool3t->forward(modulated_feat0, roi); // Output: [batch_size * num_proposals, C, 5, 5]
auto pooled_feat2 = prroi_pool4t->forward(modulated_feat1, roi);
std::cout << " Modulated and Pooled shapes:" << std::endl;
std::cout << " pooled_feat1 (from prroi_pool3t on modulated_feat0): [" << pooled_feat1.sizes() << "] dev: " << pooled_feat1.device() << std::endl;
std::cout << " pooled_feat2 (from prroi_pool4t on modulated_feat1): [" << pooled_feat2.sizes() << "] dev: " << pooled_feat2.device() << std::endl;
std::cout << " IoU predictor dimensions:" << std::endl;
std::cout << " weight: [" << iou_predictor->weight.sizes() << "]" << std::endl;
std::cout << " bias: [" << iou_predictor->bias.sizes() << "]" << std::endl;
try {
// The feat_prod_0 and feat_prod_1 are now directly the pooled_feat1 and pooled_feat2
// as modulation was applied before pooling.
auto x0 = fc3_rt.forward(pooled_feat1);
auto x1 = fc4_rt.forward(pooled_feat2);
auto ioufeat_final = torch::cat({x0, x1}, 1).contiguous();
// Ensure iou_predictor is on the correct device
iou_predictor->to(target_device);
auto iou_scores = iou_predictor->forward(ioufeat_final);
// Ensure iou_scores is on the correct device before returning
iou_scores = iou_scores.to(target_device);
// The following block for feat_prod_0 and feat_prod_1 is no longer needed as modulation is done pre-pool.
/*
auto mod0_4d = modulation[0].to(target_device);
auto mod1_4d = modulation[1].to(target_device);
@ -682,59 +719,42 @@ torch::Tensor BBRegressor::predict_iou(std::vector<torch::Tensor> modulation,
std::cout << " mod1_4d: [" << mod1_4d.sizes() << "] dev: " << mod1_4d.device() << std::endl;
auto feat_prod_0 = pooled_feat1 * mod0_4d;
auto feat_prod_1 = pooled_feat2 * mod1_4d;
std::cout << " After element-wise product with modulation:\n feat_prod_0 (pooled_feat1 * mod0_4d): [" << feat_prod_0.sizes() << "] dev: " << feat_prod_0.device() << "\n feat_prod_1 (pooled_feat2 * mod1_4d): [" << feat_prod_1.sizes() << "] dev: " << feat_prod_1.device() << std::endl;
std::cout << " Applying fc3_rt to feat_prod_0..." << std::endl;
auto x0 = fc3_rt.forward(feat_prod_0); // Corrected: . instead of ->
std::cout << " Applying fc4_rt to feat_prod_1..." << std::endl;
auto x1 = fc4_rt.forward(feat_prod_1); // Corrected: . instead of ->
std::cout << " After fc_rt blocks:\n x0 (fc3_rt output): [" << x0.sizes() << "] dev: " << x0.device() << "\n x1 (fc4_rt output): [" << x1.sizes() << "] dev: " << x1.device() << std::endl;
auto feat_prod_1 = pooled_feat2 * mod1_4d;
auto ioufeat_final = torch::cat(std::vector<torch::Tensor>{x0, x1}, 1).contiguous(); // Corrected: std::vector wrapper
std::cout << " Concatenated ioufeat_final: [" << ioufeat_final.sizes() << "] dev: " << ioufeat_final.device() << std::endl;
std::cout << " Feature product shapes (pooled_feat * mod_vec):" << std::endl;
std::cout << " feat_prod_0: [" << feat_prod_0.sizes() << "] dev: " << feat_prod_0.device() << std::endl;
std::cout << " feat_prod_1: [" << feat_prod_1.sizes() << "] dev: " << feat_prod_1.device() << std::endl;
// Forward through linear blocks
// Ensure fc3_rt and fc4_rt are on the correct device
fc3_rt.to(target_device);
fc4_rt.to(target_device);
torch::Tensor iou_scores;
try {
std::cout << " Applying final iou_predictor on GPU" << std::endl;
iou_predictor->to(target_device);
iou_scores = iou_predictor->forward(ioufeat_final.to(target_device));
std::cout << " Final iou_predictor on GPU successful. Output scores shape: [" << iou_scores.sizes() << "]" << std::endl;
} catch (const std::exception& cuda_error) {
std::cout << " GPU iou_predictor->forward() failed: " << cuda_error.what() << std::endl;
std::cout << " Falling back to CPU for final iou_predictor" << std::endl;
auto ioufeat_final_cpu = ioufeat_final.to(torch::kCPU).contiguous();
auto weight_cpu = iou_predictor->weight.to(torch::kCPU).contiguous();
auto bias_cpu = torch::Tensor();
if (iou_predictor->bias.defined()) {
bias_cpu = iou_predictor->bias.to(torch::kCPU).contiguous();
}
auto x0 = fc3_rt.forward(feat_prod_0);
auto x1 = fc4_rt.forward(feat_prod_1);
std::cout << " fc_rt output shapes:" << std::endl;
std::cout << " x0 (fc3_rt output): [" << x0.sizes() << "] dev: " << x0.device() << std::endl;
std::cout << " x1 (fc4_rt output): [" << x1.sizes() << "] dev: " << x1.device() << std::endl;
std::cout << " DEBUG CPU Fallback: ioufeat_final_cpu device: " << ioufeat_final_cpu.device() << std::endl;
std::cout << " DEBUG CPU Fallback: weight_cpu device: " << weight_cpu.device() << std::endl;
if (bias_cpu.defined()) {
std::cout << " DEBUG CPU Fallback: bias_cpu device: " << bias_cpu.device() << std::endl;
} else {
std::cout << " DEBUG CPU Fallback: bias_cpu is undefined." << std::endl;
}
try {
iou_scores = torch::nn::functional::linear(ioufeat_final_cpu, weight_cpu, bias_cpu);
std::cout << " CPU fallback torch::nn::functional::linear() successful. Output device: " << iou_scores.device() << std::endl;
} catch (const std::exception& cpu_fwd_error) {
std::cerr << "ERROR during CPU torch::nn::functional::linear(): " << cpu_fwd_error.what() << std::endl;
iou_predictor->to(target_device);
throw;
}
iou_predictor->to(target_device);
auto ioufeat_final = torch::cat({x0, x1}, 1).contiguous();
std::cout << " ioufeat_final shape: [" << ioufeat_final.sizes() << "] dev: " << ioufeat_final.device() << std::endl;
// Ensure iou_predictor is on the correct device
iou_predictor->to(target_device);
auto iou_scores = iou_predictor->forward(ioufeat_final);
// Ensure iou_scores is on the correct device before returning
iou_scores = iou_scores.to(target_device);
*/
// Ensure iou_scores is on the correct device before returning.
// This was already done above, but as a final check:
if (iou_scores.device() != target_device) {
iou_scores = iou_scores.to(target_device);
}
std::cout << " iou_scores raw shape: [" << iou_scores.size(0) << ", " << iou_scores.size(1) << "]" << std::endl;
iou_scores = iou_scores.reshape({batch_size, num_proposals});
std::cout << " Final iou_scores shape: [" << iou_scores.size(0) << ", " << iou_scores.size(1) << "]" << std::endl;

Loading…
Cancel
Save