17 changed files with 396 additions and 16 deletions
-
13CMakeLists.txt
-
4build/CMakeCache.txt
-
1build/CMakeFiles/Makefile.cmake
-
44build/CMakeFiles/Makefile2
-
1build/CMakeFiles/TargetDirectories.txt
-
BINbuild/CMakeFiles/bb_regressor.dir/cimp/bb_regressor/bb_regressor.cpp.o
-
2build/CMakeFiles/progress.marks
-
1build/CMakeFiles/tracking_demo.dir/DependInfo.cmake
-
1build/CMakeFiles/tracking_demo.dir/build.make
-
2build/CMakeFiles/tracking_demo.dir/link.txt
-
4build/CMakeFiles/tracking_demo.dir/progress.make
-
41build/Makefile
-
4build/cmake_install.cmake
-
BINbuild/libbb_regressor.a
-
BINbuild/tracking_demo
-
97cimp/dimp_tracker.cpp
-
197cimp/dimp_tracker.h
@ -1 +1 @@ |
|||
11 |
|||
13 |
@ -1 +1 @@ |
|||
/usr/bin/c++ -O3 -DNDEBUG CMakeFiles/tracking_demo.dir/cimp/demo.cpp.o -o tracking_demo -L/usr/local/cuda-11.8/targets/x86_64-linux/lib/stubs -L/usr/local/cuda-11.8/targets/x86_64-linux/lib -Wl,-rpath,/home/mht/libtorch_1.8.0_cu111/libtorch/lib:/usr/local/cuda-11.8/lib64/stubs:/usr/local/cuda-11.8/lib64: libbb_regressor.a libclassifier.a libresnet.a /home/mht/libtorch_1.8.0_cu111/libtorch/lib/libtorch.so /home/mht/libtorch_1.8.0_cu111/libtorch/lib/libc10.so /usr/local/cuda-11.8/lib64/stubs/libcuda.so /usr/local/cuda-11.8/lib64/libnvrtc.so /usr/local/cuda-11.8/lib64/libnvToolsExt.so /usr/local/cuda-11.8/lib64/libcudart.so /home/mht/libtorch_1.8.0_cu111/libtorch/lib/libc10_cuda.so -Wl,--no-as-needed,"/home/mht/libtorch_1.8.0_cu111/libtorch/lib/libtorch_cuda.so" -Wl,--as-needed -Wl,--no-as-needed,"/home/mht/libtorch_1.8.0_cu111/libtorch/lib/libtorch_cuda_cu.so" -Wl,--as-needed -Wl,--no-as-needed,"/home/mht/libtorch_1.8.0_cu111/libtorch/lib/libtorch_cpu.so" -Wl,--as-needed /home/mht/libtorch_1.8.0_cu111/libtorch/lib/libc10_cuda.so /home/mht/libtorch_1.8.0_cu111/libtorch/lib/libc10.so /usr/local/cuda-11.8/lib64/libcufft.so /usr/local/cuda-11.8/lib64/libcurand.so /usr/local/cuda-11.8/lib64/libcublas.so /usr/lib/x86_64-linux-gnu/libcudnn.so -Wl,--no-as-needed,"/home/mht/libtorch_1.8.0_cu111/libtorch/lib/libtorch_cuda_cpp.so" -Wl,--as-needed -Wl,--no-as-needed,"/home/mht/libtorch_1.8.0_cu111/libtorch/lib/libtorch.so" -Wl,--as-needed /usr/local/cuda-11.8/lib64/libnvToolsExt.so /usr/local/cuda-11.8/lib64/libcudart.so -lcudadevrt -lcudart_static -lrt -lpthread -ldl |
|||
/usr/bin/c++ -O3 -DNDEBUG CMakeFiles/tracking_demo.dir/cimp/demo.cpp.o -o tracking_demo -L/usr/local/cuda-11.8/targets/x86_64-linux/lib/stubs -L/usr/local/cuda-11.8/targets/x86_64-linux/lib -Wl,-rpath,/home/mht/libtorch_1.8.0_cu111/libtorch/lib:/usr/local/cuda-11.8/lib64/stubs:/usr/local/cuda-11.8/lib64: libbb_regressor.a libclassifier.a libresnet.a libdimp_tracker.a /home/mht/libtorch_1.8.0_cu111/libtorch/lib/libtorch.so /home/mht/libtorch_1.8.0_cu111/libtorch/lib/libc10.so /usr/local/cuda-11.8/lib64/stubs/libcuda.so /usr/local/cuda-11.8/lib64/libnvrtc.so /usr/local/cuda-11.8/lib64/libnvToolsExt.so /usr/local/cuda-11.8/lib64/libcudart.so /home/mht/libtorch_1.8.0_cu111/libtorch/lib/libc10_cuda.so -Wl,--no-as-needed,"/home/mht/libtorch_1.8.0_cu111/libtorch/lib/libtorch_cuda.so" -Wl,--as-needed -Wl,--no-as-needed,"/home/mht/libtorch_1.8.0_cu111/libtorch/lib/libtorch_cuda_cu.so" -Wl,--as-needed -Wl,--no-as-needed,"/home/mht/libtorch_1.8.0_cu111/libtorch/lib/libtorch_cpu.so" -Wl,--as-needed /home/mht/libtorch_1.8.0_cu111/libtorch/lib/libc10_cuda.so /home/mht/libtorch_1.8.0_cu111/libtorch/lib/libc10.so /usr/local/cuda-11.8/lib64/libcufft.so /usr/local/cuda-11.8/lib64/libcurand.so /usr/local/cuda-11.8/lib64/libcublas.so /usr/lib/x86_64-linux-gnu/libcudnn.so -Wl,--no-as-needed,"/home/mht/libtorch_1.8.0_cu111/libtorch/lib/libtorch_cuda_cpp.so" -Wl,--as-needed -Wl,--no-as-needed,"/home/mht/libtorch_1.8.0_cu111/libtorch/lib/libtorch.so" -Wl,--as-needed /usr/local/cuda-11.8/lib64/libnvToolsExt.so /usr/local/cuda-11.8/lib64/libcudart.so -lcudadevrt -lcudart_static -lrt -lpthread -ldl |
@ -1,3 +1,3 @@ |
|||
CMAKE_PROGRESS_1 = 10 |
|||
CMAKE_PROGRESS_2 = 11 |
|||
CMAKE_PROGRESS_1 = 12 |
|||
CMAKE_PROGRESS_2 = 13 |
|||
|
@ -0,0 +1,97 @@ |
|||
#include "dimp_tracker.h"
|
|||
#include <iostream> // For debugging output
|
|||
|
|||
namespace cimp { |
|||
|
|||
DiMPTracker::DiMPTracker(const DiMPTrackerParams& params, |
|||
const std::string& resnet_weights_dir, |
|||
const std::string& classifier_weights_dir, |
|||
const std::string& bbregressor_weights_dir, |
|||
torch::Device device) |
|||
: params_(params), |
|||
device_(device), |
|||
resnet_model_(cimp::resnet::resnet50(resnet_weights_dir, {"layer2", "layer3", "layer4"}, device)), |
|||
classifier_model_(classifier_weights_dir, device), |
|||
bbregressor_model_(bbregressor_weights_dir, device) |
|||
{ |
|||
// Move models to the specified device (should be handled by individual model constructors or their to() methods if they have one)
|
|||
// However, explicit to() calls ensure they are on the correct device if constructors don't guarantee it.
|
|||
this->resnet_model_->to(this->device_); |
|||
// this->classifier_model_.to(this->device_); // Classifier constructor handles device
|
|||
// this->bbregressor_model_.to(this->device_); // BBRegressor constructor handles device
|
|||
|
|||
// Set models to evaluation mode
|
|||
this->resnet_model_->eval(); |
|||
// this->classifier_model_.eval(); // Classifier does not have an explicit eval() method
|
|||
this->bbregressor_model_.eval(); |
|||
|
|||
// Initialize some state variables from params
|
|||
this->img_sample_sz_ = torch::tensor(params_.image_sample_size.vec(), torch::kInt64).to(torch::kFloat32); // Convert to float tensor for calculations
|
|||
this->img_support_sz_ = this->img_sample_sz_.clone(); |
|||
|
|||
std::cout << "DiMPTracker initialized." << std::endl; |
|||
std::cout << " Device: " << (this->device_.is_cuda() ? "CUDA" : "CPU") << std::endl; |
|||
if (this->device_.is_cuda()) { |
|||
std::cout << " CUDA Device Index: " << this->device_.index() << std::endl; |
|||
} |
|||
std::cout << " ResNet, Classifier, and BBRegressor models constructed and set to eval mode." << std::endl; |
|||
} |
|||
|
|||
// --- Placeholder for initialize() ---
|
|||
void DiMPTracker::initialize(const torch::Tensor& image_tensor_hwc_uchar, const torch::Tensor& initial_bbox_xywh) { |
|||
std::cout << "DiMPTracker::initialize() called (placeholder)." << std::endl; |
|||
// TODO: Implement full initialization logic
|
|||
// 1. Convert image_tensor_hwc_uchar to CHW float tensor, normalize
|
|||
// 2. Set initial pos_, target_sz_, image_sz_, target_scale_, base_target_sz_
|
|||
// 3. Call generate_init_samples
|
|||
// 4. Call init_classifier_internal
|
|||
// 5. Call init_iou_net_internal
|
|||
|
|||
// Example: Convert image (assuming HWC uchar input)
|
|||
auto image_chw_float = convert_image_to_tensor_chw_float(image_tensor_hwc_uchar); |
|||
this->image_sz_ = torch::tensor({image_chw_float.size(1), image_chw_float.size(2)}, torch::kFloat32).to(device_); // H, W
|
|||
|
|||
// Example: Set initial state (ensure tensors are on device_)
|
|||
this->pos_ = torch::tensor({initial_bbox_xywh[1].item<float>() + (initial_bbox_xywh[3].item<float>() - 1.0f) / 2.0f, |
|||
initial_bbox_xywh[0].item<float>() + (initial_bbox_xywh[2].item<float>() - 1.0f) / 2.0f}, |
|||
torch::kFloat32).to(device_); // y_center, x_center
|
|||
this->target_sz_ = torch::tensor({initial_bbox_xywh[3].item<float>(), initial_bbox_xywh[2].item<float>()}, |
|||
torch::kFloat32).to(device_); // height, width
|
|||
|
|||
double search_area = torch::prod(this->target_sz_ * params_.search_area_scale).item<double>(); |
|||
this->target_scale_ = std::sqrt(search_area) / torch::prod(this->img_sample_sz_).sqrt().item<double>(); |
|||
this->base_target_sz_ = this->target_sz_ / this->target_scale_; |
|||
|
|||
this->init_sample_pos_ = this->pos_.round(); |
|||
this->init_sample_scale_ = this->target_scale_; |
|||
|
|||
// TODO: Call generate_init_samples, init_classifier_internal, init_iou_net_internal
|
|||
} |
|||
|
|||
// --- Placeholder for track() ---
|
|||
torch::Tensor DiMPTracker::track(const torch::Tensor& image_tensor_hwc_uchar) { |
|||
std::cout << "DiMPTracker::track() called (placeholder)." << std::endl; |
|||
// TODO: Implement full tracking logic
|
|||
// Return a dummy bounding box for now [x,y,w,h]
|
|||
return torch::tensor({0.0, 0.0, 0.0, 0.0}, torch::kFloat32); |
|||
} |
|||
|
|||
// --- Helper Method Implementations (Placeholders or Basic Forms) ---
|
|||
torch::Tensor DiMPTracker::convert_image_to_tensor_chw_float(const torch::Tensor& image_hwc_uchar) { |
|||
// Assuming image_hwc_uchar is HWC uint8 on CPU or CUDA
|
|||
auto img_float = image_hwc_uchar.to(torch::kFloat32); |
|||
img_float = img_float.permute({2, 0, 1}); // HWC to CHW
|
|||
|
|||
// Normalize: (img / 255.0 - mean) / std
|
|||
// These are standard ImageNet mean/std
|
|||
torch::Tensor mean = torch::tensor({0.485, 0.456, 0.406}, device_).reshape({3, 1, 1}); |
|||
torch::Tensor std_dev = torch::tensor({0.229, 0.224, 0.225}, device_).reshape({3, 1, 1}); |
|||
|
|||
img_float = img_float.div(255.0); |
|||
img_float = img_float.sub_(mean).div_(std_dev); |
|||
return img_float.contiguous(); |
|||
} |
|||
|
|||
// ... Other private method placeholders would go here ...
|
|||
|
|||
} // namespace cimp
|
@ -0,0 +1,197 @@ |
|||
#pragma once |
|||
|
|||
#include <torch/torch.h> |
|||
#include <string> |
|||
#include <vector> |
|||
#include <map> |
|||
#include <optional> |
|||
|
|||
// Forward declare model classes if headers are not included yet to avoid circular dependencies |
|||
// Or include them if they are fundamental. For now, let's assume they will be included. |
|||
#include "resnet/resnet.h" |
|||
#include "classifier/classifier.h" |
|||
#include "bb_regressor/bb_regressor.h" |
|||
|
|||
namespace cimp { |
|||
|
|||
struct DiMPTrackerParams { |
|||
// --- Device --- |
|||
// torch::Device device = torch::kCUDA; // Will be set by DiMPTracker constructor |
|||
|
|||
// --- Input / Preprocessing --- |
|||
torch::IntArrayRef image_sample_size = {288, 288}; // Target size of the cropped image sample |
|||
std::string border_mode = "replicate"; // Border mode for patch extraction |
|||
double patch_max_scale_change = 1.5; // Max scale change for multiscale sampling |
|||
|
|||
// --- Target Model --- |
|||
double search_area_scale = 5.0; // Scale factor for the search area relative to the target size |
|||
double target_inside_ratio = 0.2; // Ratio for keeping target inside image boundaries |
|||
|
|||
// --- Classifier --- |
|||
// Augmentation parameters (can be a sub-struct if complex) |
|||
struct AugmentationParams { |
|||
double augmentation_expansion_factor = 2.0; |
|||
double random_shift_factor = 0.0; // Typically 0 for DiMP, but can be non-zero |
|||
std::vector<double> relativeshift = {0.0, 0.0}; // Example, usually more shifts |
|||
std::vector<double> blur = {}; // Sigmas for Gaussian blur |
|||
std::vector<double> rotate = {}; // Angles for rotation |
|||
struct DropoutAug { |
|||
int num = 0; // Number of dropout samples |
|||
float prob = 0.0f; // Dropout probability |
|||
} dropout; |
|||
} augmentation; |
|||
|
|||
bool use_augmentation = true; |
|||
int sample_memory_size = 50; // For classifier's target_boxes memory |
|||
int net_opt_iter = 10; // Optimizer iterations for filter learning |
|||
|
|||
// --- IoU Net (BB Regressor) --- |
|||
bool use_iou_net = true; |
|||
double box_jitter_pos = 0.1; // Jitter for proposal generation (relative to square_box_sz) |
|||
double box_jitter_sz = 0.1; // Jitter for proposal generation |
|||
int box_refinement_iter = 5; // Iterations for box optimization |
|||
double box_refinement_step_length = 1.0; |
|||
double box_refinement_step_decay = 1.0; |
|||
double maximal_aspect_ratio = 5.0; |
|||
int iounet_k = 5; // Number of top proposals to average for final box |
|||
|
|||
// --- Localization --- |
|||
double target_not_found_threshold = 0.25; // Threshold to consider target lost |
|||
double target_neighborhood_scale = 2.2; // Scale for masking neighborhood around max score |
|||
bool update_scale_when_uncertain = true; |
|||
|
|||
// TODO: Add other parameters from DiMP Python code as needed |
|||
// e.g. feature_stride, kernel_size (these might be derived from network) |
|||
}; |
|||
|
|||
class DiMPTracker { |
|||
public: |
|||
DiMPTracker(const DiMPTrackerParams& params, |
|||
const std::string& resnet_weights_dir, |
|||
const std::string& classifier_weights_dir, |
|||
const std::string& bbregressor_weights_dir, |
|||
torch::Device device); |
|||
|
|||
// Initialize the tracker with the first frame and bounding box |
|||
// image: HWC, uint8 tensor or cv::Mat (needs conversion) |
|||
// initial_bbox_xywh: [x, y, w, h] tensor for the target in the first frame |
|||
void initialize(const torch::Tensor& image_tensor_hwc_uchar, const torch::Tensor& initial_bbox_xywh); |
|||
|
|||
// Track the target in subsequent frames |
|||
// image: HWC, uint8 tensor or cv::Mat |
|||
// Returns: [x, y, w, h] tensor for the predicted bounding box |
|||
torch::Tensor track(const torch::Tensor& image_tensor_hwc_uchar); |
|||
|
|||
private: |
|||
// --- Core Models --- |
|||
cimp::resnet::ResNet resnet_model_; |
|||
Classifier classifier_model_; // Classifier is in global namespace |
|||
BBRegressor bbregressor_model_; // BBRegressor is in global namespace |
|||
|
|||
// --- Parameters & Device --- |
|||
DiMPTrackerParams params_; |
|||
torch::Device device_; |
|||
|
|||
// --- Tracker State --- |
|||
torch::Tensor pos_; // Target position (y_center, x_center) in image coordinates |
|||
torch::Tensor target_sz_; // Target size (height, width) in image coordinates |
|||
torch::Tensor image_sz_; // Current image size (height, width) |
|||
double target_scale_; // Current scale factor of the target |
|||
torch::Tensor base_target_sz_; // Target size at scale 1.0 |
|||
torch::Tensor img_sample_sz_; // Size of the image sample patch (e.g., {288, 288}) |
|||
torch::Tensor img_support_sz_; // Usually same as img_sample_sz_ |
|||
|
|||
torch::Tensor init_sample_pos_; // Position used for generating initial samples |
|||
double init_sample_scale_; // Scale used for generating initial samples |
|||
|
|||
// Learned components |
|||
torch::Tensor target_filter_; // Learned DiMP classification filter: [num_filters, C, H, W] |
|||
std::vector<torch::Tensor> iou_modulation_; // Learned IoU modulation vectors: list of [1, C, 1, 1] |
|||
|
|||
// Feature/Kernel sizes (often derived during initialization) |
|||
torch::Tensor feature_sz_; // Size of the classification feature map (e.g., {18, 18}) |
|||
torch::Tensor kernel_size_; // Size of the classification filter (e.g., {4, 4}) |
|||
// torch::Tensor output_sz_; // output_sz = feature_sz + (kernel_size + 1)%2 |
|||
|
|||
// Augmentation transforms (might be more complex in C++) |
|||
// For now, logic will be in generate_init_samples |
|||
// std::vector<std::function<torch::Tensor(torch::Tensor)>> transforms_; |
|||
|
|||
// Stored target boxes for classifier training |
|||
torch::Tensor stored_target_boxes_; // [memory_size, 4] |
|||
|
|||
// --- Helper Methods (to be implemented in .cpp) --- |
|||
torch::Tensor convert_image_to_tensor_chw_float(const torch::Tensor& image_hwc_uchar); |
|||
|
|||
std::pair<std::vector<torch::Tensor>, torch::Tensor> generate_init_samples(const torch::Tensor& image_chw_float); |
|||
|
|||
void init_classifier_internal(const std::vector<torch::Tensor>& init_backbone_feat_list, const torch::Tensor& init_target_boxes_aug); |
|||
void init_iou_net_internal(const std::vector<torch::Tensor>& init_backbone_feat_list, const torch::Tensor& initial_bbox_for_iou); |
|||
|
|||
std::pair<std::map<std::string, torch::Tensor>, torch::Tensor> extract_backbone_features( |
|||
const torch::Tensor& image_chw_float, |
|||
const torch::Tensor& pos, |
|||
const torch::Tensor& scales, // vector of scales |
|||
const torch::IntArrayRef& sample_sz); |
|||
|
|||
torch::Tensor get_classification_features(const std::map<std::string, torch::Tensor>& backbone_feat); |
|||
std::vector<torch::Tensor> get_iou_backbone_features(const std::map<std::string, torch::Tensor>& backbone_feat); |
|||
std::vector<torch::Tensor> get_iou_features(const std::map<std::string, torch::Tensor>& backbone_feat); |
|||
|
|||
|
|||
std::pair<torch::Tensor, torch::Tensor> get_sample_location(const torch::Tensor& sample_coords_xyxy); |
|||
torch::Tensor get_centered_sample_pos(); |
|||
|
|||
torch::Tensor classify_target(const torch::Tensor& test_x_clf_feat); |
|||
|
|||
struct LocalizationResult { |
|||
torch::Tensor translation_vec_yx; // y, x displacement |
|||
int64_t scale_idx; |
|||
torch::Tensor scores_peak_map; // The score map from the peak scale |
|||
std::string flag; // "normal", "not_found", "uncertain" |
|||
}; |
|||
LocalizationResult localize_target(const torch::Tensor& scores_raw, |
|||
const torch::Tensor& sample_pos_yx, |
|||
const torch::Tensor& sample_scales); |
|||
LocalizationResult localize_advanced(const torch::Tensor& scores_scaled, |
|||
const torch::Tensor& sample_pos_yx, |
|||
const torch::Tensor& sample_scales); |
|||
|
|||
|
|||
void update_state(const torch::Tensor& new_pos_yx); |
|||
torch::Tensor get_iounet_box(const torch::Tensor& pos_yx, const torch::Tensor& sz_hw, |
|||
const torch::Tensor& sample_pos_yx, double sample_scale); |
|||
|
|||
void refine_target_box(const std::map<std::string, torch::Tensor>& backbone_feat, |
|||
const torch::Tensor& sample_pos_yx, |
|||
double sample_scale, |
|||
int64_t scale_idx, |
|||
bool update_scale_flag); |
|||
|
|||
std::pair<torch::Tensor, torch::Tensor> optimize_boxes_default( |
|||
const std::vector<torch::Tensor>& iou_features, |
|||
const torch::Tensor& init_boxes_xywh); // proposals_xywh |
|||
|
|||
// Image processing / patch sampling helpers |
|||
std::pair<torch::Tensor, torch::Tensor> sample_patch_multiscale_affine( |
|||
const torch::Tensor& im_chw_float, |
|||
const torch::Tensor& pos_yx, |
|||
const torch::Tensor& scales, // 1D tensor of scales |
|||
const torch::IntArrayRef& output_sz_hw, |
|||
const std::string& border_mode = "replicate", |
|||
std::optional<double> max_scale_change = std::nullopt); |
|||
|
|||
std::pair<torch::Tensor, torch::Tensor> sample_patch_transformed_affine( |
|||
const torch::Tensor& im_chw_float, |
|||
const torch::Tensor& pos_yx, |
|||
double scale, |
|||
const torch::IntArrayRef&aug_expansion_sz_hw, // Size of patch to extract before transform |
|||
const std::vector<torch::Tensor>& affine_matrices, // One 2x3 affine matrix per transform |
|||
const torch::IntArrayRef& out_sz_hw // Final output size after transform |
|||
); |
|||
|
|||
// Augmentation helpers |
|||
// ... |
|||
}; |
|||
|
|||
} // namespace cimp |
Write
Preview
Loading…
Cancel
Save
Reference in new issue