#include #include #include #include #include // Include the BBRegressor and Classifier headers #include "bb_regressor/bb_regressor.h" #include "classifier/classifier.h" // Generate random input tensors for testing torch::Tensor generate_random_feature_map(int batch_size, int channels, int height, int width, torch::Device device) { // Use a fixed seed for reproducibility static std::random_device rd; static std::mt19937 gen(rd()); static std::uniform_real_distribution<> dis(0.0, 1.0); // Create tensor with random values auto tensor = torch::zeros({batch_size, channels, height, width}, torch::TensorOptions().device(device)); // Fill with random values for (int b = 0; b < batch_size; b++) { for (int c = 0; c < channels; c++) { for (int h = 0; h < height; h++) { for (int w = 0; w < width; w++) { tensor[b][c][h][w] = dis(gen); } } } } return tensor; } torch::Tensor generate_random_bounding_box(int batch_size, torch::Device device) { // Generate bounding boxes in [x, y, w, h] format, values in [0, 1] auto tensor = torch::zeros({batch_size, 4}, torch::TensorOptions().device(device)); // Use a fixed seed for reproducibility static std::random_device rd; static std::mt19937 gen(rd()); static std::uniform_real_distribution<> dis_pos(0.2, 0.8); // Position in center area static std::uniform_real_distribution<> dis_size(0.1, 0.4); // Size is 10-40% of image for (int b = 0; b < batch_size; b++) { float x = dis_pos(gen); float y = dis_pos(gen); float w = dis_size(gen); float h = dis_size(gen); // Ensure box stays within image bounds w = std::min(w, 1.0f - x); h = std::min(h, 1.0f - y); tensor[b][0] = x; tensor[b][1] = y; tensor[b][2] = w; tensor[b][3] = h; } return tensor; } // Generate multiple random proposals (bounding boxes) torch::Tensor generate_random_proposals(int batch_size, int num_proposals, torch::Device device) { // Generate proposals in [x, y, w, h] format, values in [0, 1] auto tensor = torch::zeros({batch_size, num_proposals, 4}, torch::TensorOptions().device(device)); static std::random_device rd; static std::mt19937 gen(rd()); static std::uniform_real_distribution<> dis_pos(0.1, 0.9); // Wider position range static std::uniform_real_distribution<> dis_size(0.05, 0.3); // Size is 5-30% of image for (int b = 0; b < batch_size; b++) { for (int n = 0; n < num_proposals; n++) { float x = dis_pos(gen); float y = dis_pos(gen); float w = dis_size(gen); float h = dis_size(gen); // Ensure box stays within image bounds w = std::min(w, 1.0f - x); h = std::min(h, 1.0f - y); tensor[b][n][0] = x; tensor[b][n][1] = y; tensor[b][n][2] = w; tensor[b][n][3] = h; } } return tensor; } // Helper function to print tensor statistics void print_tensor_stats(const std::string& name, const torch::Tensor& tensor) { std::cout << name << " stats:" << std::endl; std::cout << " Shape: ["; for (int i = 0; i < tensor.dim(); i++) { std::cout << tensor.size(i); if (i < tensor.dim() - 1) std::cout << ", "; } std::cout << "]" << std::endl; std::cout << " Mean: " << tensor.mean().item() << std::endl; std::cout << " Min: " << tensor.min().item() << std::endl; std::cout << " Max: " << tensor.max().item() << std::endl; std::cout << " Device: " << tensor.device() << std::endl; std::cout << " Dtype: " << tensor.dtype() << std::endl; std::cout << std::endl; } // Convert bounding boxes from [x, y, w, h] to [batch_idx, x1, y1, x2, y2] format for ROI pooling torch::Tensor convert_bbox_to_roi(torch::Tensor bbox, int batch_idx = 0) { int num_boxes = bbox.size(0); auto roi = torch::zeros({num_boxes, 5}, bbox.options()); // Set batch index roi.index_put_({torch::indexing::Slice(), 0}, batch_idx); // Copy x, y coordinates roi.index_put_({torch::indexing::Slice(), 1}, bbox.index({torch::indexing::Slice(), 0})); roi.index_put_({torch::indexing::Slice(), 2}, bbox.index({torch::indexing::Slice(), 1})); // Calculate x2, y2 from width and height auto x2 = bbox.index({torch::indexing::Slice(), 0}) + bbox.index({torch::indexing::Slice(), 2}); auto y2 = bbox.index({torch::indexing::Slice(), 1}) + bbox.index({torch::indexing::Slice(), 3}); roi.index_put_({torch::indexing::Slice(), 3}, x2); roi.index_put_({torch::indexing::Slice(), 4}, y2); return roi; } int main(int argc, char* argv[]) { try { std::cout << "=== Object Tracking Demo with BBRegressor and Classifier ===" << std::endl; // Determine which device to use torch::Device device(torch::kCPU); // Add more detailed CUDA debugging std::cout << "Checking CUDA availability..." << std::endl; std::cout << "torch::cuda::is_available(): " << (torch::cuda::is_available() ? "true" : "false") << std::endl; if (torch::cuda::is_available()) { device = torch::Device(torch::kCUDA, 0); std::cout << "Using CUDA device: " << device << std::endl; std::cout << "CUDA Device Count: " << torch::cuda::device_count() << std::endl; } else { std::cout << "CUDA is not available, using CPU" << std::endl; } std::cout << std::endl; // Find the base directory containing exported weights std::string base_dir; for (const auto& dir : {".", "..", "../..", "../../.."}) { if (std::filesystem::exists(std::filesystem::path(dir) / "exported_weights")) { base_dir = dir; break; } } if (base_dir.empty()) { std::cerr << "Cannot find exported_weights directory!" << std::endl; return 1; } std::cout << "Using exported weights from: " << std::filesystem::absolute(base_dir).string() << std::endl << std::endl; // Initialize BBRegressor and Classifier std::cout << "Initializing BBRegressor..." << std::endl; BBRegressor bb_regressor(base_dir, device); bb_regressor.print_model_info(); std::cout << std::endl; std::cout << "Initializing Classifier..." << std::endl; Classifier classifier(base_dir, device); classifier.print_model_info(); std::cout << std::endl; // Parameters for the test int batch_size = 1; int num_proposals = 5; // Generate random inputs std::cout << "Generating random inputs..." << std::endl; auto feat_layer2 = generate_random_feature_map(batch_size, 512, 18, 18, device); auto feat_layer3 = generate_random_feature_map(batch_size, 1024, 9, 9, device); auto bb = generate_random_bounding_box(batch_size, device); auto proposals = generate_random_proposals(batch_size, num_proposals, device); // Create feature vector std::vector backbone_features = {feat_layer2, feat_layer3}; // Print tensor info print_tensor_stats("feat_layer2", feat_layer2); print_tensor_stats("feat_layer3", feat_layer3); print_tensor_stats("bb", bb); print_tensor_stats("proposals", proposals); // Test BBRegressor functionality std::cout << "\n=== Testing BBRegressor functionality ===" << std::endl; // 1. Get IoU features std::cout << "Step 1: Getting IoU features..." << std::endl; auto start_time = std::chrono::high_resolution_clock::now(); std::vector iou_features = bb_regressor.get_iou_feat(backbone_features); auto end_time = std::chrono::high_resolution_clock::now(); auto duration = std::chrono::duration_cast(end_time - start_time); std::cout << "get_iou_feat completed in " << duration.count() << " ms" << std::endl; print_tensor_stats("iou_feature[0]", iou_features[0]); print_tensor_stats("iou_feature[1]", iou_features[1]); // 2. Get modulation vectors std::cout << "\nStep 2: Getting modulation vectors..." << std::endl; start_time = std::chrono::high_resolution_clock::now(); std::vector modulation = bb_regressor.get_modulation(backbone_features, bb); end_time = std::chrono::high_resolution_clock::now(); duration = std::chrono::duration_cast(end_time - start_time); std::cout << "get_modulation completed in " << duration.count() << " ms" << std::endl; print_tensor_stats("modulation[0]", modulation[0]); print_tensor_stats("modulation[1]", modulation[1]); // 3. Predict IoU std::cout << "\nStep 3: Predicting IoU..." << std::endl; start_time = std::chrono::high_resolution_clock::now(); torch::Tensor iou_scores = bb_regressor.predict_iou(modulation, iou_features, proposals); end_time = std::chrono::high_resolution_clock::now(); duration = std::chrono::duration_cast(end_time - start_time); std::cout << "predict_iou completed in " << duration.count() << " ms" << std::endl; print_tensor_stats("iou_scores", iou_scores); // Test Classifier functionality std::cout << "\n=== Testing Classifier functionality ===" << std::endl; // Extract classification features std::cout << "Extracting classification features..." << std::endl; start_time = std::chrono::high_resolution_clock::now(); torch::Tensor cls_features = classifier.extract_features(feat_layer3); end_time = std::chrono::high_resolution_clock::now(); duration = std::chrono::duration_cast(end_time - start_time); std::cout << "extract_features completed in " << duration.count() << " ms" << std::endl; print_tensor_stats("cls_features", cls_features); // Save statistics std::cout << "\n=== Saving tensor statistics ===" << std::endl; // For BBRegressor std::vector bb_stats; bb_stats.push_back(bb_regressor.compute_stats(iou_features[0])); bb_stats.push_back(bb_regressor.compute_stats(iou_features[1])); bb_stats.push_back(bb_regressor.compute_stats(modulation[0])); bb_stats.push_back(bb_regressor.compute_stats(modulation[1])); bb_stats.push_back(bb_regressor.compute_stats(iou_scores)); std::string bb_stats_file = "bb_regressor_stats.txt"; bb_regressor.save_stats(bb_stats, bb_stats_file); std::cout << "BBRegressor stats saved to " << bb_stats_file << std::endl; // For Classifier std::vector cls_stats; cls_stats.push_back(classifier.compute_stats(cls_features)); std::string cls_stats_file = "classifier_stats.txt"; classifier.save_stats(cls_stats, cls_stats_file); std::cout << "Classifier stats saved to " << cls_stats_file << std::endl; std::cout << "\nDemo completed successfully!" << std::endl; return 0; } catch (const std::exception& e) { std::cerr << "Error: " << e.what() << std::endl; return 1; } }