From 8ed90f273edb284a759f8afc194aecb8a28d9bdb Mon Sep 17 00:00:00 2001 From: mht Date: Sun, 1 Jun 2025 14:01:48 +0330 Subject: [PATCH] Fix BBRegressor: Apply modulation before PrRoIPool in predict_iou --- cimp/bb_regressor/bb_regressor.cpp | 126 +++++++++++++++++------------ 1 file changed, 73 insertions(+), 53 deletions(-) diff --git a/cimp/bb_regressor/bb_regressor.cpp b/cimp/bb_regressor/bb_regressor.cpp index b48c49c..121ca79 100644 --- a/cimp/bb_regressor/bb_regressor.cpp +++ b/cimp/bb_regressor/bb_regressor.cpp @@ -647,19 +647,56 @@ torch::Tensor BBRegressor::predict_iou(std::vector modulation, auto feat_device = feat[0].device(); roi = roi.to(feat_device); - // Apply ROI pooling to get features for each proposal - auto pooled_feat1 = prroi_pool3t->forward(feat[0], roi); // Output: [batch_size * num_proposals, C, 5, 5] - auto pooled_feat2 = prroi_pool4t->forward(feat[1], roi); // Output: [batch_size * num_proposals, C, 3, 3] + // Apply modulation vectors BEFORE PrRoIPooling + auto mod0_4d = modulation[0].to(feat_device); + auto mod1_4d = modulation[1].to(feat_device); + + if (mod0_4d.dim() == 2) { + mod0_4d = mod0_4d.reshape({mod0_4d.size(0), mod0_4d.size(1), 1, 1}); + } + if (mod1_4d.dim() == 2) { + mod1_4d = mod1_4d.reshape({mod1_4d.size(0), mod1_4d.size(1), 1, 1}); + } - std::cout << " Pooled shapes:" << std::endl; - std::cout << " pooled_feat1 (from prroi_pool3t on feat[0]): [" << pooled_feat1.sizes() << "] dev: " << pooled_feat1.device() << std::endl; - std::cout << " pooled_feat2 (from prroi_pool4t on feat[1]): [" << pooled_feat2.sizes() << "] dev: " << pooled_feat2.device() << std::endl; + // Ensure modulation vectors are broadcastable with features + // Features (feat[0], feat[1]) are [batch_size, channels, H, W] + // Modulation (mod0_4d, mod1_4d) should be [batch_size, channels, 1, 1] + // If num_proposals > 1, the pooling happens on features that are effectively repeated. + // The modulation is per-image, not per-proposal before pooling. + + torch::Tensor modulated_feat0 = feat[0] * mod0_4d; + torch::Tensor modulated_feat1 = feat[1] * mod1_4d; + + // Apply ROI pooling to get features for each proposal from MODULATED features + auto pooled_feat1 = prroi_pool3t->forward(modulated_feat0, roi); // Output: [batch_size * num_proposals, C, 5, 5] + auto pooled_feat2 = prroi_pool4t->forward(modulated_feat1, roi); + + std::cout << " Modulated and Pooled shapes:" << std::endl; + std::cout << " pooled_feat1 (from prroi_pool3t on modulated_feat0): [" << pooled_feat1.sizes() << "] dev: " << pooled_feat1.device() << std::endl; + std::cout << " pooled_feat2 (from prroi_pool4t on modulated_feat1): [" << pooled_feat2.sizes() << "] dev: " << pooled_feat2.device() << std::endl; std::cout << " IoU predictor dimensions:" << std::endl; std::cout << " weight: [" << iou_predictor->weight.sizes() << "]" << std::endl; std::cout << " bias: [" << iou_predictor->bias.sizes() << "]" << std::endl; try { + // The feat_prod_0 and feat_prod_1 are now directly the pooled_feat1 and pooled_feat2 + // as modulation was applied before pooling. + auto x0 = fc3_rt.forward(pooled_feat1); + auto x1 = fc4_rt.forward(pooled_feat2); + + auto ioufeat_final = torch::cat({x0, x1}, 1).contiguous(); + + // Ensure iou_predictor is on the correct device + iou_predictor->to(target_device); + + auto iou_scores = iou_predictor->forward(ioufeat_final); + + // Ensure iou_scores is on the correct device before returning + iou_scores = iou_scores.to(target_device); + + // The following block for feat_prod_0 and feat_prod_1 is no longer needed as modulation is done pre-pool. + /* auto mod0_4d = modulation[0].to(target_device); auto mod1_4d = modulation[1].to(target_device); @@ -682,59 +719,42 @@ torch::Tensor BBRegressor::predict_iou(std::vector modulation, std::cout << " mod1_4d: [" << mod1_4d.sizes() << "] dev: " << mod1_4d.device() << std::endl; auto feat_prod_0 = pooled_feat1 * mod0_4d; - auto feat_prod_1 = pooled_feat2 * mod1_4d; - std::cout << " After element-wise product with modulation:\n feat_prod_0 (pooled_feat1 * mod0_4d): [" << feat_prod_0.sizes() << "] dev: " << feat_prod_0.device() << "\n feat_prod_1 (pooled_feat2 * mod1_4d): [" << feat_prod_1.sizes() << "] dev: " << feat_prod_1.device() << std::endl; - - std::cout << " Applying fc3_rt to feat_prod_0..." << std::endl; - auto x0 = fc3_rt.forward(feat_prod_0); // Corrected: . instead of -> - std::cout << " Applying fc4_rt to feat_prod_1..." << std::endl; - auto x1 = fc4_rt.forward(feat_prod_1); // Corrected: . instead of -> - std::cout << " After fc_rt blocks:\n x0 (fc3_rt output): [" << x0.sizes() << "] dev: " << x0.device() << "\n x1 (fc4_rt output): [" << x1.sizes() << "] dev: " << x1.device() << std::endl; + auto feat_prod_1 = pooled_feat2 * mod1_4d; - auto ioufeat_final = torch::cat(std::vector{x0, x1}, 1).contiguous(); // Corrected: std::vector wrapper - std::cout << " Concatenated ioufeat_final: [" << ioufeat_final.sizes() << "] dev: " << ioufeat_final.device() << std::endl; + std::cout << " Feature product shapes (pooled_feat * mod_vec):" << std::endl; + std::cout << " feat_prod_0: [" << feat_prod_0.sizes() << "] dev: " << feat_prod_0.device() << std::endl; + std::cout << " feat_prod_1: [" << feat_prod_1.sizes() << "] dev: " << feat_prod_1.device() << std::endl; + + // Forward through linear blocks + // Ensure fc3_rt and fc4_rt are on the correct device + fc3_rt.to(target_device); + fc4_rt.to(target_device); - torch::Tensor iou_scores; - try { - std::cout << " Applying final iou_predictor on GPU" << std::endl; - iou_predictor->to(target_device); - iou_scores = iou_predictor->forward(ioufeat_final.to(target_device)); - std::cout << " Final iou_predictor on GPU successful. Output scores shape: [" << iou_scores.sizes() << "]" << std::endl; - - } catch (const std::exception& cuda_error) { - std::cout << " GPU iou_predictor->forward() failed: " << cuda_error.what() << std::endl; - std::cout << " Falling back to CPU for final iou_predictor" << std::endl; - - auto ioufeat_final_cpu = ioufeat_final.to(torch::kCPU).contiguous(); - auto weight_cpu = iou_predictor->weight.to(torch::kCPU).contiguous(); - auto bias_cpu = torch::Tensor(); - if (iou_predictor->bias.defined()) { - bias_cpu = iou_predictor->bias.to(torch::kCPU).contiguous(); - } + auto x0 = fc3_rt.forward(feat_prod_0); + auto x1 = fc4_rt.forward(feat_prod_1); + + std::cout << " fc_rt output shapes:" << std::endl; + std::cout << " x0 (fc3_rt output): [" << x0.sizes() << "] dev: " << x0.device() << std::endl; + std::cout << " x1 (fc4_rt output): [" << x1.sizes() << "] dev: " << x1.device() << std::endl; - std::cout << " DEBUG CPU Fallback: ioufeat_final_cpu device: " << ioufeat_final_cpu.device() << std::endl; - std::cout << " DEBUG CPU Fallback: weight_cpu device: " << weight_cpu.device() << std::endl; - if (bias_cpu.defined()) { - std::cout << " DEBUG CPU Fallback: bias_cpu device: " << bias_cpu.device() << std::endl; - } else { - std::cout << " DEBUG CPU Fallback: bias_cpu is undefined." << std::endl; - } - - try { - iou_scores = torch::nn::functional::linear(ioufeat_final_cpu, weight_cpu, bias_cpu); - std::cout << " CPU fallback torch::nn::functional::linear() successful. Output device: " << iou_scores.device() << std::endl; - } catch (const std::exception& cpu_fwd_error) { - std::cerr << "ERROR during CPU torch::nn::functional::linear(): " << cpu_fwd_error.what() << std::endl; - iou_predictor->to(target_device); - throw; - } - - iou_predictor->to(target_device); + auto ioufeat_final = torch::cat({x0, x1}, 1).contiguous(); + + std::cout << " ioufeat_final shape: [" << ioufeat_final.sizes() << "] dev: " << ioufeat_final.device() << std::endl; + + // Ensure iou_predictor is on the correct device + iou_predictor->to(target_device); + + auto iou_scores = iou_predictor->forward(ioufeat_final); + + // Ensure iou_scores is on the correct device before returning + iou_scores = iou_scores.to(target_device); + */ + // Ensure iou_scores is on the correct device before returning. + // This was already done above, but as a final check: + if (iou_scores.device() != target_device) { iou_scores = iou_scores.to(target_device); } - std::cout << " iou_scores raw shape: [" << iou_scores.size(0) << ", " << iou_scores.size(1) << "]" << std::endl; - iou_scores = iou_scores.reshape({batch_size, num_proposals}); std::cout << " Final iou_scores shape: [" << iou_scores.size(0) << ", " << iou_scores.size(1) << "]" << std::endl;