|
|
@ -8,10 +8,13 @@ |
|
|
|
// Add CUDA includes for required CUDA implementation
|
|
|
|
#include <cuda_runtime.h>
|
|
|
|
#include <ATen/cuda/CUDAContext.h>
|
|
|
|
#include <sys/stat.h>
|
|
|
|
#include <sys/types.h>
|
|
|
|
|
|
|
|
// Use the PrRoIPooling implementation
|
|
|
|
#include "prroi_pooling_gpu.h"
|
|
|
|
#include "prroi_pooling_gpu_impl.cuh"
|
|
|
|
#include "utils.h"
|
|
|
|
|
|
|
|
// PrRoIPool2D implementation (requires CUDA)
|
|
|
|
PrRoIPool2D::PrRoIPool2D(int pooled_height, int pooled_width, float spatial_scale) |
|
|
@ -507,7 +510,8 @@ void BBRegressor::to(torch::Device device) { |
|
|
|
} |
|
|
|
|
|
|
|
// Get IoU features from backbone features
|
|
|
|
std::vector<torch::Tensor> BBRegressor::get_iou_feat(std::vector<torch::Tensor> feat2_input) { |
|
|
|
std::vector<torch::Tensor> BBRegressor::get_iou_feat(std::vector<torch::Tensor> feat2_input, int sample_idx) { |
|
|
|
std::cout << "[DEBUG] Entered get_iou_feat with sample_idx=" << sample_idx << std::endl; |
|
|
|
torch::Tensor feat3_t_original = feat2_input[0]; |
|
|
|
torch::Tensor feat4_t_original = feat2_input[1]; |
|
|
|
|
|
|
@ -527,11 +531,62 @@ std::vector<torch::Tensor> BBRegressor::get_iou_feat(std::vector<torch::Tensor> |
|
|
|
|
|
|
|
torch::NoGradGuard no_grad; |
|
|
|
|
|
|
|
torch::Tensor c3_t_1 = conv3_1t->forward(feat3_t); |
|
|
|
torch::Tensor c3_t = conv3_2t->forward(c3_t_1); |
|
|
|
torch::Tensor c4_t_1 = conv4_1t->forward(feat4_t); |
|
|
|
torch::Tensor c4_t = conv4_2t->forward(c4_t_1); |
|
|
|
|
|
|
|
// Ensure debug directory exists for sample 0
|
|
|
|
if (sample_idx == 0) { |
|
|
|
const char* debug_dir = "test/output/bb_regressor"; |
|
|
|
struct stat st = {0}; |
|
|
|
if (stat(debug_dir, &st) == -1) { |
|
|
|
mkdir(debug_dir, 0777); |
|
|
|
} |
|
|
|
} |
|
|
|
// conv3_1t
|
|
|
|
auto c3_1t_conv = conv3_1t[0]->as<torch::nn::Conv2d>()->forward(feat3_t); |
|
|
|
auto c3_1t_bn = conv3_1t[1]->as<torch::nn::BatchNorm2d>()->forward(c3_1t_conv); |
|
|
|
auto c3_1t_relu = conv3_1t[2]->as<torch::nn::ReLU>()->forward(c3_1t_bn); |
|
|
|
if (sample_idx == 0) { |
|
|
|
std::cout << "[DEBUG] About to save debug tensors for sample_idx == 0" << std::endl; |
|
|
|
save_tensor_to_file(c3_1t_bn.cpu(), "test/output/bb_regressor/sample_0_debug_conv3_1t_bn.pt"); |
|
|
|
save_tensor_to_file(c3_1t_relu.cpu(), "test/output/bb_regressor/sample_0_debug_conv3_1t_relu.pt"); |
|
|
|
std::cout << "conv3_1t_bn: dtype=" << c3_1t_bn.dtype() << ", device=" << c3_1t_bn.device() << ", shape=" << c3_1t_bn.sizes() << std::endl; |
|
|
|
std::cout << "conv3_1t_relu: dtype=" << c3_1t_relu.dtype() << ", device=" << c3_1t_relu.device() << ", shape=" << c3_1t_relu.sizes() << std::endl; |
|
|
|
} |
|
|
|
auto c3_t_1 = c3_1t_relu; |
|
|
|
// conv3_2t
|
|
|
|
auto c3_2t_conv = conv3_2t[0]->as<torch::nn::Conv2d>()->forward(c3_t_1); |
|
|
|
auto c3_2t_bn = conv3_2t[1]->as<torch::nn::BatchNorm2d>()->forward(c3_2t_conv); |
|
|
|
auto c3_2t_relu = conv3_2t[2]->as<torch::nn::ReLU>()->forward(c3_2t_bn); |
|
|
|
if (sample_idx == 0) { |
|
|
|
std::cout << "[DEBUG] About to save debug tensors for conv3_2t, sample_idx == 0" << std::endl; |
|
|
|
save_tensor_to_file(c3_2t_bn.cpu(), "test/output/bb_regressor/sample_0_debug_conv3_2t_bn.pt"); |
|
|
|
save_tensor_to_file(c3_2t_relu.cpu(), "test/output/bb_regressor/sample_0_debug_conv3_2t_relu.pt"); |
|
|
|
std::cout << "conv3_2t_bn: dtype=" << c3_2t_bn.dtype() << ", device=" << c3_2t_bn.device() << ", shape=" << c3_2t_bn.sizes() << std::endl; |
|
|
|
std::cout << "conv3_2t_relu: dtype=" << c3_2t_relu.dtype() << ", device=" << c3_2t_relu.device() << ", shape=" << c3_2t_relu.sizes() << std::endl; |
|
|
|
} |
|
|
|
auto c3_t = c3_2t_relu; |
|
|
|
// conv4_1t
|
|
|
|
auto c4_1t_conv = conv4_1t[0]->as<torch::nn::Conv2d>()->forward(feat4_t); |
|
|
|
auto c4_1t_bn = conv4_1t[1]->as<torch::nn::BatchNorm2d>()->forward(c4_1t_conv); |
|
|
|
auto c4_1t_relu = conv4_1t[2]->as<torch::nn::ReLU>()->forward(c4_1t_bn); |
|
|
|
if (sample_idx == 0) { |
|
|
|
std::cout << "[DEBUG] About to save debug tensors for conv4_1t, sample_idx == 0" << std::endl; |
|
|
|
save_tensor_to_file(c4_1t_bn.cpu(), "test/output/bb_regressor/sample_0_debug_conv4_1t_bn.pt"); |
|
|
|
save_tensor_to_file(c4_1t_relu.cpu(), "test/output/bb_regressor/sample_0_debug_conv4_1t_relu.pt"); |
|
|
|
std::cout << "conv4_1t_bn: dtype=" << c4_1t_bn.dtype() << ", device=" << c4_1t_bn.device() << ", shape=" << c4_1t_bn.sizes() << std::endl; |
|
|
|
std::cout << "conv4_1t_relu: dtype=" << c4_1t_relu.dtype() << ", device=" << c4_1t_relu.device() << ", shape=" << c4_1t_relu.sizes() << std::endl; |
|
|
|
} |
|
|
|
auto c4_t_1 = c4_1t_relu; |
|
|
|
// conv4_2t
|
|
|
|
auto c4_2t_conv = conv4_2t[0]->as<torch::nn::Conv2d>()->forward(c4_t_1); |
|
|
|
auto c4_2t_bn = conv4_2t[1]->as<torch::nn::BatchNorm2d>()->forward(c4_2t_conv); |
|
|
|
auto c4_2t_relu = conv4_2t[2]->as<torch::nn::ReLU>()->forward(c4_2t_bn); |
|
|
|
if (sample_idx == 0) { |
|
|
|
std::cout << "[DEBUG] About to save debug tensors for conv4_2t, sample_idx == 0" << std::endl; |
|
|
|
save_tensor_to_file(c4_2t_bn.cpu(), "test/output/bb_regressor/sample_0_debug_conv4_2t_bn.pt"); |
|
|
|
save_tensor_to_file(c4_2t_relu.cpu(), "test/output/bb_regressor/sample_0_debug_conv4_2t_relu.pt"); |
|
|
|
std::cout << "conv4_2t_bn: dtype=" << c4_2t_bn.dtype() << ", device=" << c4_2t_bn.device() << ", shape=" << c4_2t_bn.sizes() << std::endl; |
|
|
|
std::cout << "conv4_2t_relu: dtype=" << c4_2t_relu.dtype() << ", device=" << c4_2t_relu.device() << ", shape=" << c4_2t_relu.sizes() << std::endl; |
|
|
|
} |
|
|
|
auto c4_t = c4_2t_relu; |
|
|
|
return {c3_t.contiguous(), c4_t.contiguous()}; // Ensure output is contiguous and float32
|
|
|
|
} |
|
|
|
|
|
|
|