|
|
@ -647,19 +647,56 @@ torch::Tensor BBRegressor::predict_iou(std::vector<torch::Tensor> modulation, |
|
|
|
auto feat_device = feat[0].device(); |
|
|
|
roi = roi.to(feat_device); |
|
|
|
|
|
|
|
// Apply ROI pooling to get features for each proposal
|
|
|
|
auto pooled_feat1 = prroi_pool3t->forward(feat[0], roi); // Output: [batch_size * num_proposals, C, 5, 5]
|
|
|
|
auto pooled_feat2 = prroi_pool4t->forward(feat[1], roi); // Output: [batch_size * num_proposals, C, 3, 3]
|
|
|
|
// Apply modulation vectors BEFORE PrRoIPooling
|
|
|
|
auto mod0_4d = modulation[0].to(feat_device); |
|
|
|
auto mod1_4d = modulation[1].to(feat_device); |
|
|
|
|
|
|
|
if (mod0_4d.dim() == 2) { |
|
|
|
mod0_4d = mod0_4d.reshape({mod0_4d.size(0), mod0_4d.size(1), 1, 1}); |
|
|
|
} |
|
|
|
if (mod1_4d.dim() == 2) { |
|
|
|
mod1_4d = mod1_4d.reshape({mod1_4d.size(0), mod1_4d.size(1), 1, 1}); |
|
|
|
} |
|
|
|
|
|
|
|
std::cout << " Pooled shapes:" << std::endl; |
|
|
|
std::cout << " pooled_feat1 (from prroi_pool3t on feat[0]): [" << pooled_feat1.sizes() << "] dev: " << pooled_feat1.device() << std::endl; |
|
|
|
std::cout << " pooled_feat2 (from prroi_pool4t on feat[1]): [" << pooled_feat2.sizes() << "] dev: " << pooled_feat2.device() << std::endl; |
|
|
|
// Ensure modulation vectors are broadcastable with features
|
|
|
|
// Features (feat[0], feat[1]) are [batch_size, channels, H, W]
|
|
|
|
// Modulation (mod0_4d, mod1_4d) should be [batch_size, channels, 1, 1]
|
|
|
|
// If num_proposals > 1, the pooling happens on features that are effectively repeated.
|
|
|
|
// The modulation is per-image, not per-proposal before pooling.
|
|
|
|
|
|
|
|
torch::Tensor modulated_feat0 = feat[0] * mod0_4d; |
|
|
|
torch::Tensor modulated_feat1 = feat[1] * mod1_4d; |
|
|
|
|
|
|
|
// Apply ROI pooling to get features for each proposal from MODULATED features
|
|
|
|
auto pooled_feat1 = prroi_pool3t->forward(modulated_feat0, roi); // Output: [batch_size * num_proposals, C, 5, 5]
|
|
|
|
auto pooled_feat2 = prroi_pool4t->forward(modulated_feat1, roi); |
|
|
|
|
|
|
|
std::cout << " Modulated and Pooled shapes:" << std::endl; |
|
|
|
std::cout << " pooled_feat1 (from prroi_pool3t on modulated_feat0): [" << pooled_feat1.sizes() << "] dev: " << pooled_feat1.device() << std::endl; |
|
|
|
std::cout << " pooled_feat2 (from prroi_pool4t on modulated_feat1): [" << pooled_feat2.sizes() << "] dev: " << pooled_feat2.device() << std::endl; |
|
|
|
|
|
|
|
std::cout << " IoU predictor dimensions:" << std::endl; |
|
|
|
std::cout << " weight: [" << iou_predictor->weight.sizes() << "]" << std::endl; |
|
|
|
std::cout << " bias: [" << iou_predictor->bias.sizes() << "]" << std::endl; |
|
|
|
|
|
|
|
try { |
|
|
|
// The feat_prod_0 and feat_prod_1 are now directly the pooled_feat1 and pooled_feat2
|
|
|
|
// as modulation was applied before pooling.
|
|
|
|
auto x0 = fc3_rt.forward(pooled_feat1); |
|
|
|
auto x1 = fc4_rt.forward(pooled_feat2); |
|
|
|
|
|
|
|
auto ioufeat_final = torch::cat({x0, x1}, 1).contiguous(); |
|
|
|
|
|
|
|
// Ensure iou_predictor is on the correct device
|
|
|
|
iou_predictor->to(target_device); |
|
|
|
|
|
|
|
auto iou_scores = iou_predictor->forward(ioufeat_final); |
|
|
|
|
|
|
|
// Ensure iou_scores is on the correct device before returning
|
|
|
|
iou_scores = iou_scores.to(target_device); |
|
|
|
|
|
|
|
// The following block for feat_prod_0 and feat_prod_1 is no longer needed as modulation is done pre-pool.
|
|
|
|
/*
|
|
|
|
auto mod0_4d = modulation[0].to(target_device); |
|
|
|
auto mod1_4d = modulation[1].to(target_device); |
|
|
|
|
|
|
@ -682,59 +719,42 @@ torch::Tensor BBRegressor::predict_iou(std::vector<torch::Tensor> modulation, |
|
|
|
std::cout << " mod1_4d: [" << mod1_4d.sizes() << "] dev: " << mod1_4d.device() << std::endl; |
|
|
|
|
|
|
|
auto feat_prod_0 = pooled_feat1 * mod0_4d; |
|
|
|
auto feat_prod_1 = pooled_feat2 * mod1_4d; |
|
|
|
std::cout << " After element-wise product with modulation:\n feat_prod_0 (pooled_feat1 * mod0_4d): [" << feat_prod_0.sizes() << "] dev: " << feat_prod_0.device() << "\n feat_prod_1 (pooled_feat2 * mod1_4d): [" << feat_prod_1.sizes() << "] dev: " << feat_prod_1.device() << std::endl; |
|
|
|
|
|
|
|
std::cout << " Applying fc3_rt to feat_prod_0..." << std::endl; |
|
|
|
auto x0 = fc3_rt.forward(feat_prod_0); // Corrected: . instead of ->
|
|
|
|
std::cout << " Applying fc4_rt to feat_prod_1..." << std::endl; |
|
|
|
auto x1 = fc4_rt.forward(feat_prod_1); // Corrected: . instead of ->
|
|
|
|
std::cout << " After fc_rt blocks:\n x0 (fc3_rt output): [" << x0.sizes() << "] dev: " << x0.device() << "\n x1 (fc4_rt output): [" << x1.sizes() << "] dev: " << x1.device() << std::endl; |
|
|
|
auto feat_prod_1 = pooled_feat2 * mod1_4d; |
|
|
|
|
|
|
|
auto ioufeat_final = torch::cat(std::vector<torch::Tensor>{x0, x1}, 1).contiguous(); // Corrected: std::vector wrapper
|
|
|
|
std::cout << " Concatenated ioufeat_final: [" << ioufeat_final.sizes() << "] dev: " << ioufeat_final.device() << std::endl; |
|
|
|
std::cout << " Feature product shapes (pooled_feat * mod_vec):" << std::endl; |
|
|
|
std::cout << " feat_prod_0: [" << feat_prod_0.sizes() << "] dev: " << feat_prod_0.device() << std::endl; |
|
|
|
std::cout << " feat_prod_1: [" << feat_prod_1.sizes() << "] dev: " << feat_prod_1.device() << std::endl; |
|
|
|
|
|
|
|
// Forward through linear blocks
|
|
|
|
// Ensure fc3_rt and fc4_rt are on the correct device
|
|
|
|
fc3_rt.to(target_device); |
|
|
|
fc4_rt.to(target_device); |
|
|
|
|
|
|
|
torch::Tensor iou_scores; |
|
|
|
try { |
|
|
|
std::cout << " Applying final iou_predictor on GPU" << std::endl; |
|
|
|
iou_predictor->to(target_device); |
|
|
|
iou_scores = iou_predictor->forward(ioufeat_final.to(target_device)); |
|
|
|
std::cout << " Final iou_predictor on GPU successful. Output scores shape: [" << iou_scores.sizes() << "]" << std::endl; |
|
|
|
|
|
|
|
} catch (const std::exception& cuda_error) { |
|
|
|
std::cout << " GPU iou_predictor->forward() failed: " << cuda_error.what() << std::endl; |
|
|
|
std::cout << " Falling back to CPU for final iou_predictor" << std::endl; |
|
|
|
|
|
|
|
auto ioufeat_final_cpu = ioufeat_final.to(torch::kCPU).contiguous(); |
|
|
|
auto weight_cpu = iou_predictor->weight.to(torch::kCPU).contiguous(); |
|
|
|
auto bias_cpu = torch::Tensor(); |
|
|
|
if (iou_predictor->bias.defined()) { |
|
|
|
bias_cpu = iou_predictor->bias.to(torch::kCPU).contiguous(); |
|
|
|
} |
|
|
|
auto x0 = fc3_rt.forward(feat_prod_0); |
|
|
|
auto x1 = fc4_rt.forward(feat_prod_1); |
|
|
|
|
|
|
|
std::cout << " fc_rt output shapes:" << std::endl; |
|
|
|
std::cout << " x0 (fc3_rt output): [" << x0.sizes() << "] dev: " << x0.device() << std::endl; |
|
|
|
std::cout << " x1 (fc4_rt output): [" << x1.sizes() << "] dev: " << x1.device() << std::endl; |
|
|
|
|
|
|
|
std::cout << " DEBUG CPU Fallback: ioufeat_final_cpu device: " << ioufeat_final_cpu.device() << std::endl; |
|
|
|
std::cout << " DEBUG CPU Fallback: weight_cpu device: " << weight_cpu.device() << std::endl; |
|
|
|
if (bias_cpu.defined()) { |
|
|
|
std::cout << " DEBUG CPU Fallback: bias_cpu device: " << bias_cpu.device() << std::endl; |
|
|
|
} else { |
|
|
|
std::cout << " DEBUG CPU Fallback: bias_cpu is undefined." << std::endl; |
|
|
|
} |
|
|
|
|
|
|
|
try { |
|
|
|
iou_scores = torch::nn::functional::linear(ioufeat_final_cpu, weight_cpu, bias_cpu); |
|
|
|
std::cout << " CPU fallback torch::nn::functional::linear() successful. Output device: " << iou_scores.device() << std::endl; |
|
|
|
} catch (const std::exception& cpu_fwd_error) { |
|
|
|
std::cerr << "ERROR during CPU torch::nn::functional::linear(): " << cpu_fwd_error.what() << std::endl; |
|
|
|
iou_predictor->to(target_device); |
|
|
|
throw; |
|
|
|
} |
|
|
|
|
|
|
|
iou_predictor->to(target_device); |
|
|
|
auto ioufeat_final = torch::cat({x0, x1}, 1).contiguous(); |
|
|
|
|
|
|
|
std::cout << " ioufeat_final shape: [" << ioufeat_final.sizes() << "] dev: " << ioufeat_final.device() << std::endl; |
|
|
|
|
|
|
|
// Ensure iou_predictor is on the correct device
|
|
|
|
iou_predictor->to(target_device); |
|
|
|
|
|
|
|
auto iou_scores = iou_predictor->forward(ioufeat_final); |
|
|
|
|
|
|
|
// Ensure iou_scores is on the correct device before returning
|
|
|
|
iou_scores = iou_scores.to(target_device); |
|
|
|
*/ |
|
|
|
// Ensure iou_scores is on the correct device before returning.
|
|
|
|
// This was already done above, but as a final check:
|
|
|
|
if (iou_scores.device() != target_device) { |
|
|
|
iou_scores = iou_scores.to(target_device); |
|
|
|
} |
|
|
|
|
|
|
|
std::cout << " iou_scores raw shape: [" << iou_scores.size(0) << ", " << iou_scores.size(1) << "]" << std::endl; |
|
|
|
|
|
|
|
iou_scores = iou_scores.reshape({batch_size, num_proposals}); |
|
|
|
std::cout << " Final iou_scores shape: [" << iou_scores.size(0) << ", " << iou_scores.size(1) << "]" << std::endl; |
|
|
|
|
|
|
|