a stable stage

Refactor: Remove double precision in BBRegressor LinearBlock
42 changed files with 1384 additions and 20149 deletions
--- a/.idea/vcs.xml
+++ b/.idea/vcs.xml
@ -1,6 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$" vcs="Git" />
    <mapping directory="$PROJECT_DIR$/vcpkg" vcs="Git" />
  </component>
 </project>
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -12,19 +12,13 @@ find_package(Torch REQUIRED)

 message(STATUS "Found LibTorch: ${TORCH_LIBRARIES}")

-# Determine whether to use CPU-only or CUDA implementation
-option(CPU_ONLY "Build without CUDA support" TRUE)
-
-if(CPU_ONLY)
-    message(STATUS "Building in CPU-only mode")
-    add_definitions(-DCPU_ONLY)
-else()
-    message(STATUS "Building with CUDA support")
-endif()
+# Always use CUDA implementation (no CPU fallback)
+message(STATUS "Building with CUDA support")

 # Define source files for the libraries
 set(BB_REGRESSOR_SOURCES
    cimp/bb_regressor/bb_regressor.cpp
+    cimp/bb_regressor/prroi_pooling/prroi_pooling_gpu.cpp
 )

 set(CLASSIFIER_SOURCES
@ -53,13 +47,23 @@ add_executable(tracking_demo cimp/demo.cpp)
 # Link the demo with the libraries
 target_link_libraries(tracking_demo PRIVATE bb_regressor classifier ${TORCH_LIBRARIES})

+# Create the test models executable
+add_executable(test_models test/test_models.cpp)
+
+# Link the test with the libraries
+target_link_libraries(test_models PRIVATE bb_regressor classifier ${TORCH_LIBRARIES})
+
+# Create the test sample generator executable (without dependencies on our libraries)
+add_executable(generate_test_samples test/generate_test_samples.cpp)
+
+# Link the test sample generator only with LibTorch
+target_link_libraries(generate_test_samples PRIVATE ${TORCH_LIBRARIES})
+
 # Copy the executable to the binary directory
 install(TARGETS tracking_demo DESTINATION bin)
+install(TARGETS test_models DESTINATION bin)
+install(TARGETS generate_test_samples DESTINATION bin)

 # Print some info during the build
 message(STATUS "LibTorch found at: ${TORCH_INCLUDE_DIRS}")
-if(CPU_ONLY)
-    message(STATUS "Using CPU-only build")
-else()
-    message(STATUS "Using CUDA-enabled build")
-endif() 
+message(STATUS "Using CUDA-enabled build") 
--- a/bb_regressor_stats.txt
+++ b/bb_regressor_stats.txt
@ -1,45 +1,45 @@
 Output 0:
  Shape: [1, 256, 18, 18]
-  Mean: 0.261343
-  Std: 0.373308
+  Mean: 0.260712
+  Std: 0.375281
  Min: 0
-  Max: 2.83079
-  Sum: 21676.9
-  Sample values: [0.273305, 0.153942, 0.0177385]
+  Max: 2.83518
+  Sum: 21624.5
+  Sample values: [0.270668, 0.159451, 0]

 Output 1:
  Shape: [1, 256, 9, 9]
-  Mean: 0.334883
-  Std: 0.854804
+  Mean: 0.338533
+  Std: 0.864997
  Min: 0
-  Max: 7.18794
-  Sum: 6944.14
-  Sample values: [6.22439, 0, 0]
+  Max: 7.52249
+  Sum: 7019.83
+  Sample values: [6.37661, 0, 0]

 Output 2:
-  Shape: [1, 256, 1, 1]
-  Mean: 0.412101
-  Std: 0.568328
+  Shape: [1, 256]
+  Mean: 0.0984123
+  Std: 0.273788
  Min: 0
-  Max: 2.80441
-  Sum: 105.498
+  Max: 1.96461
+  Sum: 25.1935
  Sample values: [0]

 Output 3:
-  Shape: [1, 256, 1, 1]
-  Mean: 0.413812
-  Std: 0.62911
+  Shape: [1, 256]
+  Mean: 0.651337
+  Std: 1.65163
  Min: 0
-  Max: 3.27012
-  Sum: 105.936
+  Max: 10.0534
+  Sum: 166.742
  Sample values: [0]

 Output 4:
  Shape: [1, 5]
-  Mean: 0.441597
-  Std: 0.430799
-  Min: 0.0278997
-  Max: 0.899234
-  Sum: 2.20798
-  Sample values: [0.0278997]
+  Mean: -2.65813
+  Std: 8.42937e-08
+  Min: -2.65813
+  Max: -2.65813
+  Sum: -13.2907
+  Sample values: [-2.65813]

--- a/bin/tracking_demo
+++ b/bin/tracking_demo
--- a/build.sh
+++ b/build.sh
@ -28,7 +28,9 @@ if [ $CUDA_AVAILABLE -eq 1 ]; then
    CUDA_VERSION=$(nvcc --version | grep "release" | awk '{print $6}' | cut -c2- | cut -d'.' -f1-2)
    echo "Detected CUDA version: $CUDA_VERSION"
 else
-    echo "CUDA not found, building in CPU-only mode"
+    echo "CUDA not found. The project requires CUDA to build."
+    echo "Please install CUDA and try again."
+    exit 1
 fi

 # Download and extract LibTorch with appropriate CUDA support if not already present
@ -36,18 +38,13 @@ LIBTORCH_DIR="$HOME/libtorch"
 if [ ! -d "$LIBTORCH_DIR" ]; then
    echo "Downloading LibTorch..."
    # Use a compatible version based on detected CUDA
-    if [ $CUDA_AVAILABLE -eq 1 ]; then
-        echo "Downloading CUDA-enabled LibTorch"
-        if [[ "$CUDA_VERSION" == "11.5" || "$CUDA_VERSION" == "11.6" || "$CUDA_VERSION" == "11.7" ]]; then
-            LIBTORCH_URL="https://download.pytorch.org/libtorch/cu116/libtorch-cxx11-abi-shared-with-deps-1.13.0%2Bcu116.zip"
-        elif [[ "$CUDA_VERSION" == "11.3" || "$CUDA_VERSION" == "11.4" ]]; then
-            LIBTORCH_URL="https://download.pytorch.org/libtorch/cu113/libtorch-cxx11-abi-shared-with-deps-1.12.1%2Bcu113.zip"
-        else
-            LIBTORCH_URL="https://download.pytorch.org/libtorch/cu118/libtorch-cxx11-abi-shared-with-deps-2.0.0%2Bcu118.zip"
-        fi
+    echo "Downloading CUDA-enabled LibTorch"
+    if [[ "$CUDA_VERSION" == "11.5" || "$CUDA_VERSION" == "11.6" || "$CUDA_VERSION" == "11.7" ]]; then
+        LIBTORCH_URL="https://download.pytorch.org/libtorch/cu116/libtorch-cxx11-abi-shared-with-deps-1.13.0%2Bcu116.zip"
+    elif [[ "$CUDA_VERSION" == "11.3" || "$CUDA_VERSION" == "11.4" ]]; then
+        LIBTORCH_URL="https://download.pytorch.org/libtorch/cu113/libtorch-cxx11-abi-shared-with-deps-1.12.1%2Bcu113.zip"
    else
-        echo "Downloading CPU-only LibTorch"
-        LIBTORCH_URL="https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-2.0.0%2Bcpu.zip"
+        LIBTORCH_URL="https://download.pytorch.org/libtorch/cu118/libtorch-cxx11-abi-shared-with-deps-2.0.0%2Bcu118.zip"
    fi
    
    wget $LIBTORCH_URL -O libtorch.zip
@ -67,13 +64,9 @@ cd build
 # Create local bin directory
 mkdir -p ../bin

-# Configure with CMake
+# Configure with CMake - always use CUDA, never use CPU_ONLY
 echo "Configuring with CMake..."
-if [ $CUDA_AVAILABLE -eq 1 ]; then
-    cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=.. -DCPU_ONLY=OFF
-else
-    cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=.. -DCPU_ONLY=ON
-fi
+cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=.. -DCPU_ONLY=OFF

 # Build the project
 echo "Building the project..."
--- a/build/CMakeCache.txt
+++ b/build/CMakeCache.txt
@ -207,8 +207,8 @@ CMAKE_STRIP:FILEPATH=/usr/bin/strip
 // Studio IDE projects all commands are done without /nologo.
 CMAKE_VERBOSE_MAKEFILE:BOOL=FALSE

-//Build without CUDA support
-CPU_ONLY:BOOL=OFF
+//No help, variable specified on the command line.
+CPU_ONLY:UNINITIALIZED=OFF

 //Compile device code in 64 bit mode
 CUDA_64_BIT_DEVICE_CODE:BOOL=ON
@ -373,9 +373,6 @@ TORCH_LIBRARY:FILEPATH=/home/mht/libtorch/lib/libtorch.so
 //The directory containing a CMake configuration file for Torch.
 Torch_DIR:PATH=/home/mht/libtorch/share/cmake/Torch

-//Use ROI Align from torchvision
-USE_ROI_ALIGN:BOOL=OFF
-
 //Path to a library.
 c10_LIBRARY:FILEPATH=/home/mht/libtorch/lib/libc10.so

@ -388,15 +385,6 @@ cpp_tracker_IS_TOP_LEVEL:STATIC=ON
 //Value Computed by CMake
 cpp_tracker_SOURCE_DIR:STATIC=/media/mht/ADATA/repos/cpp_tracker

-//Value Computed by CMake
-cpp_tracker_tests_BINARY_DIR:STATIC=/media/mht/ADATA/repos/cpp_tracker/build/test
-
-//Value Computed by CMake
-cpp_tracker_tests_IS_TOP_LEVEL:STATIC=OFF
-
-//Value Computed by CMake
-cpp_tracker_tests_SOURCE_DIR:STATIC=/media/mht/ADATA/repos/cpp_tracker/test
-
 //Path to a library.
 kineto_LIBRARY:FILEPATH=/home/mht/libtorch/lib/libkineto.a

--- a/build/CMakeFiles/3.22.1/CMakeDetermineCompilerABI_CUDA.bin
+++ b/build/CMakeFiles/3.22.1/CMakeDetermineCompilerABI_CUDA.bin
--- a/build/CMakeFiles/CMakeOutput.log
+++ b/build/CMakeFiles/CMakeOutput.log
@ -12,13 +12,150 @@ Compilation of the CXX compiler identification source "CMakeCXXCompilerId.cpp" p

 The CXX compiler identification is GNU, found in "/media/mht/ADATA/repos/cpp_tracker/build/CMakeFiles/3.22.1/CompilerIdCXX/a.out"

+Checking whether the CUDA compiler is NVIDIA using "" matched "nvcc: NVIDIA \(R\) Cuda compiler driver":
+nvcc: NVIDIA (R) Cuda compiler driver
+Copyright (c) 2005-2021 NVIDIA Corporation
+Built on Thu_Nov_18_09:45:30_PST_2021
+Cuda compilation tools, release 11.5, V11.5.119
+Build cuda_11.5.r11.5/compiler.30672275_0
+Compiling the CUDA compiler identification source file "CMakeCUDACompilerId.cu" succeeded.
+Compiler: /usr/bin/nvcc 
+Build flags: 
+Id flags: --keep;--keep-dir;tmp -v
+
+The output was:
+0
+#$ _NVVM_BRANCH_=nvvm
+#$ _SPACE_= 
+#$ _CUDART_=cudart
+#$ _HERE_=/usr/lib/nvidia-cuda-toolkit/bin
+#$ _THERE_=/usr/lib/nvidia-cuda-toolkit/bin
+#$ _TARGET_SIZE_=
+#$ _TARGET_DIR_=
+#$ _TARGET_SIZE_=64
+#$ NVVMIR_LIBRARY_DIR=/usr/lib/nvidia-cuda-toolkit/libdevice
+#$ PATH=/usr/lib/nvidia-cuda-toolkit/bin:/home/mht/anaconda3/bin:/home/mht/anaconda3/condabin:/tmp/.mount_cursorF6bS95/usr/bin:/tmp/.mount_cursorF6bS95/usr/sbin:/tmp/.mount_cursorF6bS95/usr/games:/tmp/.mount_cursorF6bS95/bin:/tmp/.mount_cursorF6bS95/sbin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin:/snap/bin:/home/mht/.local/share/JetBrains/Toolbox/scripts:/home/mht/.local/share/JetBrains/Toolbox/scripts:/home/mht/.local/share/JetBrains/Toolbox/scripts
+#$ LIBRARIES=  -L/usr/lib/x86_64-linux-gnu/stubs -L/usr/lib/x86_64-linux-gnu
+#$ rm tmp/a_dlink.reg.c
+#$ gcc -D__CUDA_ARCH__=520 -D__CUDA_ARCH_LIST__=520 -E -x c++  -DCUDA_DOUBLE_MATH_FUNCTIONS -D__CUDACC__ -D__NVCC__   -D__CUDACC_VER_MAJOR__=11 -D__CUDACC_VER_MINOR__=5 -D__CUDACC_VER_BUILD__=119 -D__CUDA_API_VER_MAJOR__=11 -D__CUDA_API_VER_MINOR__=5 -D__NVCC_DIAG_PRAGMA_SUPPORT__=1 -include "cuda_runtime.h" -m64 "CMakeCUDACompilerId.cu" -o "tmp/CMakeCUDACompilerId.cpp1.ii" 
+#$ cicc --c++17 --gnu_version=110400 --display_error_number --orig_src_file_name "CMakeCUDACompilerId.cu" --orig_src_path_name "/media/mht/ADATA/repos/cpp_tracker/build/CMakeFiles/3.22.1/CompilerIdCUDA/CMakeCUDACompilerId.cu" --allow_managed  -arch compute_52 -m64 --no-version-ident -ftz=0 -prec_div=1 -prec_sqrt=1 -fmad=1 --include_file_name "CMakeCUDACompilerId.fatbin.c" -tused --gen_module_id_file --module_id_file_name "tmp/CMakeCUDACompilerId.module_id" --gen_c_file_name "tmp/CMakeCUDACompilerId.cudafe1.c" --stub_file_name "tmp/CMakeCUDACompilerId.cudafe1.stub.c" --gen_device_file_name "tmp/CMakeCUDACompilerId.cudafe1.gpu"  "tmp/CMakeCUDACompilerId.cpp1.ii" -o "tmp/CMakeCUDACompilerId.ptx"
+#$ ptxas -arch=sm_52 -m64 "tmp/CMakeCUDACompilerId.ptx"  -o "tmp/CMakeCUDACompilerId.sm_52.cubin" 
+#$ fatbinary --create="tmp/CMakeCUDACompilerId.fatbin" -64 --cicc-cmdline="-ftz=0 -prec_div=1 -prec_sqrt=1 -fmad=1 " "--image3=kind=elf,sm=52,file=tmp/CMakeCUDACompilerId.sm_52.cubin" "--image3=kind=ptx,sm=52,file=tmp/CMakeCUDACompilerId.ptx" --embedded-fatbin="tmp/CMakeCUDACompilerId.fatbin.c" 
+#$ gcc -D__CUDA_ARCH_LIST__=520 -E -x c++ -D__CUDACC__ -D__NVCC__   -D__CUDACC_VER_MAJOR__=11 -D__CUDACC_VER_MINOR__=5 -D__CUDACC_VER_BUILD__=119 -D__CUDA_API_VER_MAJOR__=11 -D__CUDA_API_VER_MINOR__=5 -D__NVCC_DIAG_PRAGMA_SUPPORT__=1 -include "cuda_runtime.h" -m64 "CMakeCUDACompilerId.cu" -o "tmp/CMakeCUDACompilerId.cpp4.ii" 
+#$ cudafe++ --c++17 --gnu_version=110400 --display_error_number --orig_src_file_name "CMakeCUDACompilerId.cu" --orig_src_path_name "/media/mht/ADATA/repos/cpp_tracker/build/CMakeFiles/3.22.1/CompilerIdCUDA/CMakeCUDACompilerId.cu" --allow_managed --m64 --parse_templates --gen_c_file_name "tmp/CMakeCUDACompilerId.cudafe1.cpp" --stub_file_name "CMakeCUDACompilerId.cudafe1.stub.c" --module_id_file_name "tmp/CMakeCUDACompilerId.module_id" "tmp/CMakeCUDACompilerId.cpp4.ii" 
+#$ gcc -D__CUDA_ARCH__=520 -D__CUDA_ARCH_LIST__=520 -c -x c++  -DCUDA_DOUBLE_MATH_FUNCTIONS -m64 "tmp/CMakeCUDACompilerId.cudafe1.cpp" -o "tmp/CMakeCUDACompilerId.o" 
+#$ nvlink -m64 --arch=sm_52 --register-link-binaries="tmp/a_dlink.reg.c"    -L/usr/lib/x86_64-linux-gnu/stubs -L/usr/lib/x86_64-linux-gnu -cpu-arch=X86_64 "tmp/CMakeCUDACompilerId.o"  -lcudadevrt  -o "tmp/a_dlink.sm_52.cubin"
+#$ fatbinary --create="tmp/a_dlink.fatbin" -64 --cicc-cmdline="-ftz=0 -prec_div=1 -prec_sqrt=1 -fmad=1 " -link "--image3=kind=elf,sm=52,file=tmp/a_dlink.sm_52.cubin" --embedded-fatbin="tmp/a_dlink.fatbin.c" 
+#$ gcc -D__CUDA_ARCH_LIST__=520 -c -x c++ -DFATBINFILE="\"tmp/a_dlink.fatbin.c\"" -DREGISTERLINKBINARYFILE="\"tmp/a_dlink.reg.c\"" -I. -D__NV_EXTRA_INITIALIZATION= -D__NV_EXTRA_FINALIZATION= -D__CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__   -D__CUDACC_VER_MAJOR__=11 -D__CUDACC_VER_MINOR__=5 -D__CUDACC_VER_BUILD__=119 -D__CUDA_API_VER_MAJOR__=11 -D__CUDA_API_VER_MINOR__=5 -D__NVCC_DIAG_PRAGMA_SUPPORT__=1 -m64 "/usr/lib/nvidia-cuda-toolkit/bin/crt/link.stub" -o "tmp/a_dlink.o" 
+#$ g++ -D__CUDA_ARCH_LIST__=520 -m64 -Wl,--start-group "tmp/a_dlink.o" "tmp/CMakeCUDACompilerId.o"   -L/usr/lib/x86_64-linux-gnu/stubs -L/usr/lib/x86_64-linux-gnu  -lcudadevrt  -lcudart_static  -lrt -lpthread  -ldl  -Wl,--end-group -o "a.out" 
+
+
+Compilation of the CUDA compiler identification source "CMakeCUDACompilerId.cu" produced "a.out"
+
+The CUDA compiler identification is NVIDIA, found in "/media/mht/ADATA/repos/cpp_tracker/build/CMakeFiles/3.22.1/CompilerIdCUDA/a.out"
+
+Parsed CUDA nvcc implicit link information from above output:
+  found 'PATH=' string: [/usr/lib/nvidia-cuda-toolkit/bin:/home/mht/anaconda3/bin:/home/mht/anaconda3/condabin:/tmp/.mount_cursorF6bS95/usr/bin:/tmp/.mount_cursorF6bS95/usr/sbin:/tmp/.mount_cursorF6bS95/usr/games:/tmp/.mount_cursorF6bS95/bin:/tmp/.mount_cursorF6bS95/sbin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin:/snap/bin:/home/mht/.local/share/JetBrains/Toolbox/scripts:/home/mht/.local/share/JetBrains/Toolbox/scripts:/home/mht/.local/share/JetBrains/Toolbox/scripts]
+  found 'LIBRARIES=' string: [-L/usr/lib/x86_64-linux-gnu/stubs -L/usr/lib/x86_64-linux-gnu]
+  considering line: [#$ rm tmp/a_dlink.reg.c]
+  considering line: [gcc -D__CUDA_ARCH__=520 -D__CUDA_ARCH_LIST__=520 -E -x c++  -DCUDA_DOUBLE_MATH_FUNCTIONS -D__CUDACC__ -D__NVCC__   -D__CUDACC_VER_MAJOR__=11 -D__CUDACC_VER_MINOR__=5 -D__CUDACC_VER_BUILD__=119 -D__CUDA_API_VER_MAJOR__=11 -D__CUDA_API_VER_MINOR__=5 -D__NVCC_DIAG_PRAGMA_SUPPORT__=1 -include "cuda_runtime.h" -m64 "CMakeCUDACompilerId.cu" -o "tmp/CMakeCUDACompilerId.cpp1.ii" ]
+  considering line: [cicc --c++17 --gnu_version=110400 --display_error_number --orig_src_file_name "CMakeCUDACompilerId.cu" --orig_src_path_name "/media/mht/ADATA/repos/cpp_tracker/build/CMakeFiles/3.22.1/CompilerIdCUDA/CMakeCUDACompilerId.cu" --allow_managed  -arch compute_52 -m64 --no-version-ident -ftz=0 -prec_div=1 -prec_sqrt=1 -fmad=1 --include_file_name "CMakeCUDACompilerId.fatbin.c" -tused --gen_module_id_file --module_id_file_name "tmp/CMakeCUDACompilerId.module_id" --gen_c_file_name "tmp/CMakeCUDACompilerId.cudafe1.c" --stub_file_name "tmp/CMakeCUDACompilerId.cudafe1.stub.c" --gen_device_file_name "tmp/CMakeCUDACompilerId.cudafe1.gpu"  "tmp/CMakeCUDACompilerId.cpp1.ii" -o "tmp/CMakeCUDACompilerId.ptx"]
+  considering line: [ptxas -arch=sm_52 -m64 "tmp/CMakeCUDACompilerId.ptx"  -o "tmp/CMakeCUDACompilerId.sm_52.cubin" ]
+  considering line: [fatbinary --create="tmp/CMakeCUDACompilerId.fatbin" -64 --cicc-cmdline="-ftz=0 -prec_div=1 -prec_sqrt=1 -fmad=1 " "--image3=kind=elf,sm=52,file=tmp/CMakeCUDACompilerId.sm_52.cubin" "--image3=kind=ptx,sm=52,file=tmp/CMakeCUDACompilerId.ptx" --embedded-fatbin="tmp/CMakeCUDACompilerId.fatbin.c" ]
+  considering line: [gcc -D__CUDA_ARCH_LIST__=520 -E -x c++ -D__CUDACC__ -D__NVCC__   -D__CUDACC_VER_MAJOR__=11 -D__CUDACC_VER_MINOR__=5 -D__CUDACC_VER_BUILD__=119 -D__CUDA_API_VER_MAJOR__=11 -D__CUDA_API_VER_MINOR__=5 -D__NVCC_DIAG_PRAGMA_SUPPORT__=1 -include "cuda_runtime.h" -m64 "CMakeCUDACompilerId.cu" -o "tmp/CMakeCUDACompilerId.cpp4.ii" ]
+  considering line: [cudafe++ --c++17 --gnu_version=110400 --display_error_number --orig_src_file_name "CMakeCUDACompilerId.cu" --orig_src_path_name "/media/mht/ADATA/repos/cpp_tracker/build/CMakeFiles/3.22.1/CompilerIdCUDA/CMakeCUDACompilerId.cu" --allow_managed --m64 --parse_templates --gen_c_file_name "tmp/CMakeCUDACompilerId.cudafe1.cpp" --stub_file_name "CMakeCUDACompilerId.cudafe1.stub.c" --module_id_file_name "tmp/CMakeCUDACompilerId.module_id" "tmp/CMakeCUDACompilerId.cpp4.ii" ]
+  considering line: [gcc -D__CUDA_ARCH__=520 -D__CUDA_ARCH_LIST__=520 -c -x c++  -DCUDA_DOUBLE_MATH_FUNCTIONS -m64 "tmp/CMakeCUDACompilerId.cudafe1.cpp" -o "tmp/CMakeCUDACompilerId.o" ]
+  considering line: [nvlink -m64 --arch=sm_52 --register-link-binaries="tmp/a_dlink.reg.c"    -L/usr/lib/x86_64-linux-gnu/stubs -L/usr/lib/x86_64-linux-gnu -cpu-arch=X86_64 "tmp/CMakeCUDACompilerId.o"  -lcudadevrt  -o "tmp/a_dlink.sm_52.cubin"]
+    ignoring nvlink line
+  considering line: [fatbinary --create="tmp/a_dlink.fatbin" -64 --cicc-cmdline="-ftz=0 -prec_div=1 -prec_sqrt=1 -fmad=1 " -link "--image3=kind=elf,sm=52,file=tmp/a_dlink.sm_52.cubin" --embedded-fatbin="tmp/a_dlink.fatbin.c" ]
+  considering line: [gcc -D__CUDA_ARCH_LIST__=520 -c -x c++ -DFATBINFILE="\"tmp/a_dlink.fatbin.c\"" -DREGISTERLINKBINARYFILE="\"tmp/a_dlink.reg.c\"" -I. -D__NV_EXTRA_INITIALIZATION= -D__NV_EXTRA_FINALIZATION= -D__CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__   -D__CUDACC_VER_MAJOR__=11 -D__CUDACC_VER_MINOR__=5 -D__CUDACC_VER_BUILD__=119 -D__CUDA_API_VER_MAJOR__=11 -D__CUDA_API_VER_MINOR__=5 -D__NVCC_DIAG_PRAGMA_SUPPORT__=1 -m64 "/usr/lib/nvidia-cuda-toolkit/bin/crt/link.stub" -o "tmp/a_dlink.o" ]
+  considering line: [g++ -D__CUDA_ARCH_LIST__=520 -m64 -Wl,--start-group "tmp/a_dlink.o" "tmp/CMakeCUDACompilerId.o"   -L/usr/lib/x86_64-linux-gnu/stubs -L/usr/lib/x86_64-linux-gnu  -lcudadevrt  -lcudart_static  -lrt -lpthread  -ldl  -Wl,--end-group -o "a.out" ]
+    extracted link line: [g++ -D__CUDA_ARCH_LIST__=520 -m64 -Wl,--start-group "tmp/a_dlink.o" "tmp/CMakeCUDACompilerId.o"   -L/usr/lib/x86_64-linux-gnu/stubs -L/usr/lib/x86_64-linux-gnu  -lcudadevrt  -lcudart_static  -lrt -lpthread  -ldl  -Wl,--end-group -o "a.out" ]
+  considering line: []
+  extracted link launcher name: [g++]
+  found link launcher absolute path: [/usr/lib/nvidia-cuda-toolkit/bin/g++]
+
+  link line regex: [^( *|.*[/\])(ld|CMAKE_LINK_STARTFILE-NOTFOUND|([^/\]+-)?ld|collect2)[^/\]*( |$)]
+  link line: [cuda-fake-ld g++ -D__CUDA_ARCH_LIST__=520 -m64 -Wl,--start-group "tmp/a_dlink.o" "tmp/CMakeCUDACompilerId.o"   -L/usr/lib/x86_64-linux-gnu/stubs -L/usr/lib/x86_64-linux-gnu  -lcudadevrt  -lcudart_static  -lrt -lpthread  -ldl  -Wl,--end-group -o "a.out" ]
+    arg [cuda-fake-ld] ==> ignore
+    arg [g++] ==> ignore
+    arg [-D__CUDA_ARCH_LIST__=520] ==> ignore
+    arg [-m64] ==> ignore
+    arg [-Wl,--start-group] ==> ignore
+    arg [tmp/a_dlink.o] ==> ignore
+    arg [tmp/CMakeCUDACompilerId.o] ==> ignore
+    arg [-L/usr/lib/x86_64-linux-gnu/stubs] ==> dir [/usr/lib/x86_64-linux-gnu/stubs]
+    arg [-L/usr/lib/x86_64-linux-gnu] ==> dir [/usr/lib/x86_64-linux-gnu]
+    arg [-lcudadevrt] ==> lib [cudadevrt]
+    arg [-lcudart_static] ==> lib [cudart_static]
+    arg [-lrt] ==> lib [rt]
+    arg [-lpthread] ==> lib [pthread]
+    arg [-ldl] ==> lib [dl]
+    arg [-Wl,--end-group] ==> ignore
+    arg [-o] ==> ignore
+    arg [a.out] ==> ignore
+  collapse library dir [/usr/lib/x86_64-linux-gnu/stubs] ==> [/usr/lib/x86_64-linux-gnu/stubs]
+  collapse library dir [/usr/lib/x86_64-linux-gnu] ==> [/usr/lib/x86_64-linux-gnu]
+  implicit libs: [cudadevrt;cudart_static;rt;pthread;dl]
+  implicit objs: []
+  implicit dirs: [/usr/lib/x86_64-linux-gnu/stubs;/usr/lib/x86_64-linux-gnu]
+  implicit fwks: []
+
+
+Failed to detect CUDA nvcc include information:
+  found 'PATH=' string: [/usr/lib/nvidia-cuda-toolkit/bin:/home/mht/anaconda3/bin:/home/mht/anaconda3/condabin:/tmp/.mount_cursorF6bS95/usr/bin:/tmp/.mount_cursorF6bS95/usr/sbin:/tmp/.mount_cursorF6bS95/usr/games:/tmp/.mount_cursorF6bS95/bin:/tmp/.mount_cursorF6bS95/sbin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin:/snap/bin:/home/mht/.local/share/JetBrains/Toolbox/scripts:/home/mht/.local/share/JetBrains/Toolbox/scripts:/home/mht/.local/share/JetBrains/Toolbox/scripts]
+  found 'LIBRARIES=' string: [-L/usr/lib/x86_64-linux-gnu/stubs -L/usr/lib/x86_64-linux-gnu]
+  considering line: [#$ rm tmp/a_dlink.reg.c]
+  considering line: [gcc -D__CUDA_ARCH__=520 -D__CUDA_ARCH_LIST__=520 -E -x c++  -DCUDA_DOUBLE_MATH_FUNCTIONS -D__CUDACC__ -D__NVCC__   -D__CUDACC_VER_MAJOR__=11 -D__CUDACC_VER_MINOR__=5 -D__CUDACC_VER_BUILD__=119 -D__CUDA_API_VER_MAJOR__=11 -D__CUDA_API_VER_MINOR__=5 -D__NVCC_DIAG_PRAGMA_SUPPORT__=1 -include "cuda_runtime.h" -m64 "CMakeCUDACompilerId.cu" -o "tmp/CMakeCUDACompilerId.cpp1.ii" ]
+  considering line: [cicc --c++17 --gnu_version=110400 --display_error_number --orig_src_file_name "CMakeCUDACompilerId.cu" --orig_src_path_name "/media/mht/ADATA/repos/cpp_tracker/build/CMakeFiles/3.22.1/CompilerIdCUDA/CMakeCUDACompilerId.cu" --allow_managed  -arch compute_52 -m64 --no-version-ident -ftz=0 -prec_div=1 -prec_sqrt=1 -fmad=1 --include_file_name "CMakeCUDACompilerId.fatbin.c" -tused --gen_module_id_file --module_id_file_name "tmp/CMakeCUDACompilerId.module_id" --gen_c_file_name "tmp/CMakeCUDACompilerId.cudafe1.c" --stub_file_name "tmp/CMakeCUDACompilerId.cudafe1.stub.c" --gen_device_file_name "tmp/CMakeCUDACompilerId.cudafe1.gpu"  "tmp/CMakeCUDACompilerId.cpp1.ii" -o "tmp/CMakeCUDACompilerId.ptx"]
+  considering line: [ptxas -arch=sm_52 -m64 "tmp/CMakeCUDACompilerId.ptx"  -o "tmp/CMakeCUDACompilerId.sm_52.cubin" ]
+  considering line: [fatbinary --create="tmp/CMakeCUDACompilerId.fatbin" -64 --cicc-cmdline="-ftz=0 -prec_div=1 -prec_sqrt=1 -fmad=1 " "--image3=kind=elf,sm=52,file=tmp/CMakeCUDACompilerId.sm_52.cubin" "--image3=kind=ptx,sm=52,file=tmp/CMakeCUDACompilerId.ptx" --embedded-fatbin="tmp/CMakeCUDACompilerId.fatbin.c" ]
+  considering line: [gcc -D__CUDA_ARCH_LIST__=520 -E -x c++ -D__CUDACC__ -D__NVCC__   -D__CUDACC_VER_MAJOR__=11 -D__CUDACC_VER_MINOR__=5 -D__CUDACC_VER_BUILD__=119 -D__CUDA_API_VER_MAJOR__=11 -D__CUDA_API_VER_MINOR__=5 -D__NVCC_DIAG_PRAGMA_SUPPORT__=1 -include "cuda_runtime.h" -m64 "CMakeCUDACompilerId.cu" -o "tmp/CMakeCUDACompilerId.cpp4.ii" ]
+  considering line: [cudafe++ --c++17 --gnu_version=110400 --display_error_number --orig_src_file_name "CMakeCUDACompilerId.cu" --orig_src_path_name "/media/mht/ADATA/repos/cpp_tracker/build/CMakeFiles/3.22.1/CompilerIdCUDA/CMakeCUDACompilerId.cu" --allow_managed --m64 --parse_templates --gen_c_file_name "tmp/CMakeCUDACompilerId.cudafe1.cpp" --stub_file_name "CMakeCUDACompilerId.cudafe1.stub.c" --module_id_file_name "tmp/CMakeCUDACompilerId.module_id" "tmp/CMakeCUDACompilerId.cpp4.ii" ]
+  considering line: [gcc -D__CUDA_ARCH__=520 -D__CUDA_ARCH_LIST__=520 -c -x c++  -DCUDA_DOUBLE_MATH_FUNCTIONS -m64 "tmp/CMakeCUDACompilerId.cudafe1.cpp" -o "tmp/CMakeCUDACompilerId.o" ]
+  considering line: [nvlink -m64 --arch=sm_52 --register-link-binaries="tmp/a_dlink.reg.c"    -L/usr/lib/x86_64-linux-gnu/stubs -L/usr/lib/x86_64-linux-gnu -cpu-arch=X86_64 "tmp/CMakeCUDACompilerId.o"  -lcudadevrt  -o "tmp/a_dlink.sm_52.cubin"]
+    ignoring nvlink line
+  considering line: [fatbinary --create="tmp/a_dlink.fatbin" -64 --cicc-cmdline="-ftz=0 -prec_div=1 -prec_sqrt=1 -fmad=1 " -link "--image3=kind=elf,sm=52,file=tmp/a_dlink.sm_52.cubin" --embedded-fatbin="tmp/a_dlink.fatbin.c" ]
+  considering line: [gcc -D__CUDA_ARCH_LIST__=520 -c -x c++ -DFATBINFILE="\"tmp/a_dlink.fatbin.c\"" -DREGISTERLINKBINARYFILE="\"tmp/a_dlink.reg.c\"" -I. -D__NV_EXTRA_INITIALIZATION= -D__NV_EXTRA_FINALIZATION= -D__CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__   -D__CUDACC_VER_MAJOR__=11 -D__CUDACC_VER_MINOR__=5 -D__CUDACC_VER_BUILD__=119 -D__CUDA_API_VER_MAJOR__=11 -D__CUDA_API_VER_MINOR__=5 -D__NVCC_DIAG_PRAGMA_SUPPORT__=1 -m64 "/usr/lib/nvidia-cuda-toolkit/bin/crt/link.stub" -o "tmp/a_dlink.o" ]
+  considering line: [g++ -D__CUDA_ARCH_LIST__=520 -m64 -Wl,--start-group "tmp/a_dlink.o" "tmp/CMakeCUDACompilerId.o"   -L/usr/lib/x86_64-linux-gnu/stubs -L/usr/lib/x86_64-linux-gnu  -lcudadevrt  -lcudart_static  -lrt -lpthread  -ldl  -Wl,--end-group -o "a.out" ]
+    extracted link line: [g++ -D__CUDA_ARCH_LIST__=520 -m64 -Wl,--start-group "tmp/a_dlink.o" "tmp/CMakeCUDACompilerId.o"   -L/usr/lib/x86_64-linux-gnu/stubs -L/usr/lib/x86_64-linux-gnu  -lcudadevrt  -lcudart_static  -lrt -lpthread  -ldl  -Wl,--end-group -o "a.out" ]
+  considering line: []
+  extracted link launcher name: [g++]
+  found link launcher absolute path: [/usr/lib/nvidia-cuda-toolkit/bin/g++]
+  no 'INCLUDES=' string found in nvcc output:
+    #$ _NVVM_BRANCH_=nvvm
+    #$ _SPACE_= 
+    #$ _CUDART_=cudart
+    #$ _HERE_=/usr/lib/nvidia-cuda-toolkit/bin
+    #$ _THERE_=/usr/lib/nvidia-cuda-toolkit/bin
+    #$ _TARGET_SIZE_=
+    #$ _TARGET_DIR_=
+    #$ _TARGET_SIZE_=64
+    #$ NVVMIR_LIBRARY_DIR=/usr/lib/nvidia-cuda-toolkit/libdevice
+    #$ PATH=/usr/lib/nvidia-cuda-toolkit/bin:/home/mht/anaconda3/bin:/home/mht/anaconda3/condabin:/tmp/.mount_cursorF6bS95/usr/bin:/tmp/.mount_cursorF6bS95/usr/sbin:/tmp/.mount_cursorF6bS95/usr/games:/tmp/.mount_cursorF6bS95/bin:/tmp/.mount_cursorF6bS95/sbin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin:/snap/bin:/home/mht/.local/share/JetBrains/Toolbox/scripts:/home/mht/.local/share/JetBrains/Toolbox/scripts:/home/mht/.local/share/JetBrains/Toolbox/scripts
+    #$ LIBRARIES=  -L/usr/lib/x86_64-linux-gnu/stubs -L/usr/lib/x86_64-linux-gnu
+    #$ rm tmp/a_dlink.reg.c
+    #$ gcc -D__CUDA_ARCH__=520 -D__CUDA_ARCH_LIST__=520 -E -x c++  -DCUDA_DOUBLE_MATH_FUNCTIONS -D__CUDACC__ -D__NVCC__   -D__CUDACC_VER_MAJOR__=11 -D__CUDACC_VER_MINOR__=5 -D__CUDACC_VER_BUILD__=119 -D__CUDA_API_VER_MAJOR__=11 -D__CUDA_API_VER_MINOR__=5 -D__NVCC_DIAG_PRAGMA_SUPPORT__=1 -include "cuda_runtime.h" -m64 "CMakeCUDACompilerId.cu" -o "tmp/CMakeCUDACompilerId.cpp1.ii" 
+    #$ cicc --c++17 --gnu_version=110400 --display_error_number --orig_src_file_name "CMakeCUDACompilerId.cu" --orig_src_path_name "/media/mht/ADATA/repos/cpp_tracker/build/CMakeFiles/3.22.1/CompilerIdCUDA/CMakeCUDACompilerId.cu" --allow_managed  -arch compute_52 -m64 --no-version-ident -ftz=0 -prec_div=1 -prec_sqrt=1 -fmad=1 --include_file_name "CMakeCUDACompilerId.fatbin.c" -tused --gen_module_id_file --module_id_file_name "tmp/CMakeCUDACompilerId.module_id" --gen_c_file_name "tmp/CMakeCUDACompilerId.cudafe1.c" --stub_file_name "tmp/CMakeCUDACompilerId.cudafe1.stub.c" --gen_device_file_name "tmp/CMakeCUDACompilerId.cudafe1.gpu"  "tmp/CMakeCUDACompilerId.cpp1.ii" -o "tmp/CMakeCUDACompilerId.ptx"
+    #$ ptxas -arch=sm_52 -m64 "tmp/CMakeCUDACompilerId.ptx"  -o "tmp/CMakeCUDACompilerId.sm_52.cubin" 
+    #$ fatbinary --create="tmp/CMakeCUDACompilerId.fatbin" -64 --cicc-cmdline="-ftz=0 -prec_div=1 -prec_sqrt=1 -fmad=1 " "--image3=kind=elf,sm=52,file=tmp/CMakeCUDACompilerId.sm_52.cubin" "--image3=kind=ptx,sm=52,file=tmp/CMakeCUDACompilerId.ptx" --embedded-fatbin="tmp/CMakeCUDACompilerId.fatbin.c" 
+    #$ gcc -D__CUDA_ARCH_LIST__=520 -E -x c++ -D__CUDACC__ -D__NVCC__   -D__CUDACC_VER_MAJOR__=11 -D__CUDACC_VER_MINOR__=5 -D__CUDACC_VER_BUILD__=119 -D__CUDA_API_VER_MAJOR__=11 -D__CUDA_API_VER_MINOR__=5 -D__NVCC_DIAG_PRAGMA_SUPPORT__=1 -include "cuda_runtime.h" -m64 "CMakeCUDACompilerId.cu" -o "tmp/CMakeCUDACompilerId.cpp4.ii" 
+    #$ cudafe++ --c++17 --gnu_version=110400 --display_error_number --orig_src_file_name "CMakeCUDACompilerId.cu" --orig_src_path_name "/media/mht/ADATA/repos/cpp_tracker/build/CMakeFiles/3.22.1/CompilerIdCUDA/CMakeCUDACompilerId.cu" --allow_managed --m64 --parse_templates --gen_c_file_name "tmp/CMakeCUDACompilerId.cudafe1.cpp" --stub_file_name "CMakeCUDACompilerId.cudafe1.stub.c" --module_id_file_name "tmp/CMakeCUDACompilerId.module_id" "tmp/CMakeCUDACompilerId.cpp4.ii" 
+    #$ gcc -D__CUDA_ARCH__=520 -D__CUDA_ARCH_LIST__=520 -c -x c++  -DCUDA_DOUBLE_MATH_FUNCTIONS -m64 "tmp/CMakeCUDACompilerId.cudafe1.cpp" -o "tmp/CMakeCUDACompilerId.o" 
+    #$ nvlink -m64 --arch=sm_52 --register-link-binaries="tmp/a_dlink.reg.c"    -L/usr/lib/x86_64-linux-gnu/stubs -L/usr/lib/x86_64-linux-gnu -cpu-arch=X86_64 "tmp/CMakeCUDACompilerId.o"  -lcudadevrt  -o "tmp/a_dlink.sm_52.cubin"
+    #$ fatbinary --create="tmp/a_dlink.fatbin" -64 --cicc-cmdline="-ftz=0 -prec_div=1 -prec_sqrt=1 -fmad=1 " -link "--image3=kind=elf,sm=52,file=tmp/a_dlink.sm_52.cubin" --embedded-fatbin="tmp/a_dlink.fatbin.c" 
+    #$ gcc -D__CUDA_ARCH_LIST__=520 -c -x c++ -DFATBINFILE="\"tmp/a_dlink.fatbin.c\"" -DREGISTERLINKBINARYFILE="\"tmp/a_dlink.reg.c\"" -I. -D__NV_EXTRA_INITIALIZATION= -D__NV_EXTRA_FINALIZATION= -D__CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__   -D__CUDACC_VER_MAJOR__=11 -D__CUDACC_VER_MINOR__=5 -D__CUDACC_VER_BUILD__=119 -D__CUDA_API_VER_MAJOR__=11 -D__CUDA_API_VER_MINOR__=5 -D__NVCC_DIAG_PRAGMA_SUPPORT__=1 -m64 "/usr/lib/nvidia-cuda-toolkit/bin/crt/link.stub" -o "tmp/a_dlink.o" 
+    #$ g++ -D__CUDA_ARCH_LIST__=520 -m64 -Wl,--start-group "tmp/a_dlink.o" "tmp/CMakeCUDACompilerId.o"   -L/usr/lib/x86_64-linux-gnu/stubs -L/usr/lib/x86_64-linux-gnu  -lcudadevrt  -lcudart_static  -lrt -lpthread  -ldl  -Wl,--end-group -o "a.out" 
+    
+
+
 Detecting CXX compiler ABI info compiled with the following output:
 Change Dir: /media/mht/ADATA/repos/cpp_tracker/build/CMakeFiles/CMakeTmp

-Run Build Command(s):/usr/bin/gmake -f Makefile cmTC_51a9f/fast && /usr/bin/gmake  -f CMakeFiles/cmTC_51a9f.dir/build.make CMakeFiles/cmTC_51a9f.dir/build
+Run Build Command(s):/usr/bin/gmake -f Makefile cmTC_50e53/fast && /usr/bin/gmake  -f CMakeFiles/cmTC_50e53.dir/build.make CMakeFiles/cmTC_50e53.dir/build
 gmake[1]: Entering directory '/media/mht/ADATA/repos/cpp_tracker/build/CMakeFiles/CMakeTmp'
-Building CXX object CMakeFiles/cmTC_51a9f.dir/CMakeCXXCompilerABI.cpp.o
-/usr/bin/c++   -v -o CMakeFiles/cmTC_51a9f.dir/CMakeCXXCompilerABI.cpp.o -c /usr/share/cmake-3.22/Modules/CMakeCXXCompilerABI.cpp
+Building CXX object CMakeFiles/cmTC_50e53.dir/CMakeCXXCompilerABI.cpp.o
+/usr/bin/c++   -v -o CMakeFiles/cmTC_50e53.dir/CMakeCXXCompilerABI.cpp.o -c /usr/share/cmake-3.22/Modules/CMakeCXXCompilerABI.cpp
 Using built-in specs.
 COLLECT_GCC=/usr/bin/c++
 OFFLOAD_TARGET_NAMES=nvptx-none:amdgcn-amdhsa
@ -28,8 +165,8 @@ Configured with: ../src/configure -v --with-pkgversion='Ubuntu 11.4.0-1ubuntu1~2
 Thread model: posix
 Supported LTO compression algorithms: zlib zstd
 gcc version 11.4.0 (Ubuntu 11.4.0-1ubuntu1~22.04) 
-COLLECT_GCC_OPTIONS='-v' '-o' 'CMakeFiles/cmTC_51a9f.dir/CMakeCXXCompilerABI.cpp.o' '-c' '-shared-libgcc' '-mtune=generic' '-march=x86-64' '-dumpdir' 'CMakeFiles/cmTC_51a9f.dir/'
- /usr/lib/gcc/x86_64-linux-gnu/11/cc1plus -quiet -v -imultiarch x86_64-linux-gnu -D_GNU_SOURCE /usr/share/cmake-3.22/Modules/CMakeCXXCompilerABI.cpp -quiet -dumpdir CMakeFiles/cmTC_51a9f.dir/ -dumpbase CMakeCXXCompilerABI.cpp.cpp -dumpbase-ext .cpp -mtune=generic -march=x86-64 -version -fasynchronous-unwind-tables -fstack-protector-strong -Wformat -Wformat-security -fstack-clash-protection -fcf-protection -o /tmp/ccZejfDs.s
+COLLECT_GCC_OPTIONS='-v' '-o' 'CMakeFiles/cmTC_50e53.dir/CMakeCXXCompilerABI.cpp.o' '-c' '-shared-libgcc' '-mtune=generic' '-march=x86-64' '-dumpdir' 'CMakeFiles/cmTC_50e53.dir/'
+ /usr/lib/gcc/x86_64-linux-gnu/11/cc1plus -quiet -v -imultiarch x86_64-linux-gnu -D_GNU_SOURCE /usr/share/cmake-3.22/Modules/CMakeCXXCompilerABI.cpp -quiet -dumpdir CMakeFiles/cmTC_50e53.dir/ -dumpbase CMakeCXXCompilerABI.cpp.cpp -dumpbase-ext .cpp -mtune=generic -march=x86-64 -version -fasynchronous-unwind-tables -fstack-protector-strong -Wformat -Wformat-security -fstack-clash-protection -fcf-protection -o /tmp/cc8sAEZe.s
 GNU C++17 (Ubuntu 11.4.0-1ubuntu1~22.04) version 11.4.0 (x86_64-linux-gnu)
 	compiled by GNU C version 11.4.0, GMP version 6.2.1, MPFR version 4.1.0, MPC version 1.2.1, isl version isl-0.24-GMP

@ -53,15 +190,15 @@ GNU C++17 (Ubuntu 11.4.0-1ubuntu1~22.04) version 11.4.0 (x86_64-linux-gnu)

 GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072
 Compiler executable checksum: d591828bb4d392ae8b7b160e5bb0b95f
-COLLECT_GCC_OPTIONS='-v' '-o' 'CMakeFiles/cmTC_51a9f.dir/CMakeCXXCompilerABI.cpp.o' '-c' '-shared-libgcc' '-mtune=generic' '-march=x86-64' '-dumpdir' 'CMakeFiles/cmTC_51a9f.dir/'
- as -v --64 -o CMakeFiles/cmTC_51a9f.dir/CMakeCXXCompilerABI.cpp.o /tmp/ccZejfDs.s
+COLLECT_GCC_OPTIONS='-v' '-o' 'CMakeFiles/cmTC_50e53.dir/CMakeCXXCompilerABI.cpp.o' '-c' '-shared-libgcc' '-mtune=generic' '-march=x86-64' '-dumpdir' 'CMakeFiles/cmTC_50e53.dir/'
+ as -v --64 -o CMakeFiles/cmTC_50e53.dir/CMakeCXXCompilerABI.cpp.o /tmp/cc8sAEZe.s
 GNU assembler version 2.38 (x86_64-linux-gnu) using BFD version (GNU Binutils for Ubuntu) 2.38
 COMPILER_PATH=/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/
 LIBRARY_PATH=/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../../lib/:/lib/x86_64-linux-gnu/:/lib/../lib/:/usr/lib/x86_64-linux-gnu/:/usr/lib/../lib/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../:/lib/:/usr/lib/
-COLLECT_GCC_OPTIONS='-v' '-o' 'CMakeFiles/cmTC_51a9f.dir/CMakeCXXCompilerABI.cpp.o' '-c' '-shared-libgcc' '-mtune=generic' '-march=x86-64' '-dumpdir' 'CMakeFiles/cmTC_51a9f.dir/CMakeCXXCompilerABI.cpp.'
-Linking CXX executable cmTC_51a9f
-/usr/bin/cmake -E cmake_link_script CMakeFiles/cmTC_51a9f.dir/link.txt --verbose=1
-/usr/bin/c++  -v CMakeFiles/cmTC_51a9f.dir/CMakeCXXCompilerABI.cpp.o -o cmTC_51a9f 
+COLLECT_GCC_OPTIONS='-v' '-o' 'CMakeFiles/cmTC_50e53.dir/CMakeCXXCompilerABI.cpp.o' '-c' '-shared-libgcc' '-mtune=generic' '-march=x86-64' '-dumpdir' 'CMakeFiles/cmTC_50e53.dir/CMakeCXXCompilerABI.cpp.'
+Linking CXX executable cmTC_50e53
+/usr/bin/cmake -E cmake_link_script CMakeFiles/cmTC_50e53.dir/link.txt --verbose=1
+/usr/bin/c++  -v CMakeFiles/cmTC_50e53.dir/CMakeCXXCompilerABI.cpp.o -o cmTC_50e53 
 Using built-in specs.
 COLLECT_GCC=/usr/bin/c++
 COLLECT_LTO_WRAPPER=/usr/lib/gcc/x86_64-linux-gnu/11/lto-wrapper
@ -74,9 +211,9 @@ Supported LTO compression algorithms: zlib zstd
 gcc version 11.4.0 (Ubuntu 11.4.0-1ubuntu1~22.04) 
 COMPILER_PATH=/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/
 LIBRARY_PATH=/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../../lib/:/lib/x86_64-linux-gnu/:/lib/../lib/:/usr/lib/x86_64-linux-gnu/:/usr/lib/../lib/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../:/lib/:/usr/lib/
-COLLECT_GCC_OPTIONS='-v' '-o' 'cmTC_51a9f' '-shared-libgcc' '-mtune=generic' '-march=x86-64' '-dumpdir' 'cmTC_51a9f.'
- /usr/lib/gcc/x86_64-linux-gnu/11/collect2 -plugin /usr/lib/gcc/x86_64-linux-gnu/11/liblto_plugin.so -plugin-opt=/usr/lib/gcc/x86_64-linux-gnu/11/lto-wrapper -plugin-opt=-fresolution=/tmp/ccJLlbFI.res -plugin-opt=-pass-through=-lgcc_s -plugin-opt=-pass-through=-lgcc -plugin-opt=-pass-through=-lc -plugin-opt=-pass-through=-lgcc_s -plugin-opt=-pass-through=-lgcc --build-id --eh-frame-hdr -m elf_x86_64 --hash-style=gnu --as-needed -dynamic-linker /lib64/ld-linux-x86-64.so.2 -pie -z now -z relro -o cmTC_51a9f /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/Scrt1.o /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crti.o /usr/lib/gcc/x86_64-linux-gnu/11/crtbeginS.o -L/usr/lib/gcc/x86_64-linux-gnu/11 -L/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/11/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/11/../../.. CMakeFiles/cmTC_51a9f.dir/CMakeCXXCompilerABI.cpp.o -lstdc++ -lm -lgcc_s -lgcc -lc -lgcc_s -lgcc /usr/lib/gcc/x86_64-linux-gnu/11/crtendS.o /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crtn.o
-COLLECT_GCC_OPTIONS='-v' '-o' 'cmTC_51a9f' '-shared-libgcc' '-mtune=generic' '-march=x86-64' '-dumpdir' 'cmTC_51a9f.'
+COLLECT_GCC_OPTIONS='-v' '-o' 'cmTC_50e53' '-shared-libgcc' '-mtune=generic' '-march=x86-64' '-dumpdir' 'cmTC_50e53.'
+ /usr/lib/gcc/x86_64-linux-gnu/11/collect2 -plugin /usr/lib/gcc/x86_64-linux-gnu/11/liblto_plugin.so -plugin-opt=/usr/lib/gcc/x86_64-linux-gnu/11/lto-wrapper -plugin-opt=-fresolution=/tmp/cctwOU9y.res -plugin-opt=-pass-through=-lgcc_s -plugin-opt=-pass-through=-lgcc -plugin-opt=-pass-through=-lc -plugin-opt=-pass-through=-lgcc_s -plugin-opt=-pass-through=-lgcc --build-id --eh-frame-hdr -m elf_x86_64 --hash-style=gnu --as-needed -dynamic-linker /lib64/ld-linux-x86-64.so.2 -pie -z now -z relro -o cmTC_50e53 /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/Scrt1.o /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crti.o /usr/lib/gcc/x86_64-linux-gnu/11/crtbeginS.o -L/usr/lib/gcc/x86_64-linux-gnu/11 -L/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/11/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/11/../../.. CMakeFiles/cmTC_50e53.dir/CMakeCXXCompilerABI.cpp.o -lstdc++ -lm -lgcc_s -lgcc -lc -lgcc_s -lgcc /usr/lib/gcc/x86_64-linux-gnu/11/crtendS.o /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crtn.o
+COLLECT_GCC_OPTIONS='-v' '-o' 'cmTC_50e53' '-shared-libgcc' '-mtune=generic' '-march=x86-64' '-dumpdir' 'cmTC_50e53.'
 gmake[1]: Leaving directory '/media/mht/ADATA/repos/cpp_tracker/build/CMakeFiles/CMakeTmp'


@ -106,10 +243,10 @@ Parsed CXX implicit link information from above output:
  link line regex: [^( *|.*[/\])(ld|CMAKE_LINK_STARTFILE-NOTFOUND|([^/\]+-)?ld|collect2)[^/\]*( |$)]
  ignore line: [Change Dir: /media/mht/ADATA/repos/cpp_tracker/build/CMakeFiles/CMakeTmp]
  ignore line: []
-  ignore line: [Run Build Command(s):/usr/bin/gmake -f Makefile cmTC_51a9f/fast && /usr/bin/gmake  -f CMakeFiles/cmTC_51a9f.dir/build.make CMakeFiles/cmTC_51a9f.dir/build]
+  ignore line: [Run Build Command(s):/usr/bin/gmake -f Makefile cmTC_50e53/fast && /usr/bin/gmake  -f CMakeFiles/cmTC_50e53.dir/build.make CMakeFiles/cmTC_50e53.dir/build]
  ignore line: [gmake[1]: Entering directory '/media/mht/ADATA/repos/cpp_tracker/build/CMakeFiles/CMakeTmp']
-  ignore line: [Building CXX object CMakeFiles/cmTC_51a9f.dir/CMakeCXXCompilerABI.cpp.o]
-  ignore line: [/usr/bin/c++   -v -o CMakeFiles/cmTC_51a9f.dir/CMakeCXXCompilerABI.cpp.o -c /usr/share/cmake-3.22/Modules/CMakeCXXCompilerABI.cpp]
+  ignore line: [Building CXX object CMakeFiles/cmTC_50e53.dir/CMakeCXXCompilerABI.cpp.o]
+  ignore line: [/usr/bin/c++   -v -o CMakeFiles/cmTC_50e53.dir/CMakeCXXCompilerABI.cpp.o -c /usr/share/cmake-3.22/Modules/CMakeCXXCompilerABI.cpp]
  ignore line: [Using built-in specs.]
  ignore line: [COLLECT_GCC=/usr/bin/c++]
  ignore line: [OFFLOAD_TARGET_NAMES=nvptx-none:amdgcn-amdhsa]
@ -119,8 +256,8 @@ Parsed CXX implicit link information from above output:
  ignore line: [Thread model: posix]
  ignore line: [Supported LTO compression algorithms: zlib zstd]
  ignore line: [gcc version 11.4.0 (Ubuntu 11.4.0-1ubuntu1~22.04) ]
-  ignore line: [COLLECT_GCC_OPTIONS='-v' '-o' 'CMakeFiles/cmTC_51a9f.dir/CMakeCXXCompilerABI.cpp.o' '-c' '-shared-libgcc' '-mtune=generic' '-march=x86-64' '-dumpdir' 'CMakeFiles/cmTC_51a9f.dir/']
-  ignore line: [ /usr/lib/gcc/x86_64-linux-gnu/11/cc1plus -quiet -v -imultiarch x86_64-linux-gnu -D_GNU_SOURCE /usr/share/cmake-3.22/Modules/CMakeCXXCompilerABI.cpp -quiet -dumpdir CMakeFiles/cmTC_51a9f.dir/ -dumpbase CMakeCXXCompilerABI.cpp.cpp -dumpbase-ext .cpp -mtune=generic -march=x86-64 -version -fasynchronous-unwind-tables -fstack-protector-strong -Wformat -Wformat-security -fstack-clash-protection -fcf-protection -o /tmp/ccZejfDs.s]
+  ignore line: [COLLECT_GCC_OPTIONS='-v' '-o' 'CMakeFiles/cmTC_50e53.dir/CMakeCXXCompilerABI.cpp.o' '-c' '-shared-libgcc' '-mtune=generic' '-march=x86-64' '-dumpdir' 'CMakeFiles/cmTC_50e53.dir/']
+  ignore line: [ /usr/lib/gcc/x86_64-linux-gnu/11/cc1plus -quiet -v -imultiarch x86_64-linux-gnu -D_GNU_SOURCE /usr/share/cmake-3.22/Modules/CMakeCXXCompilerABI.cpp -quiet -dumpdir CMakeFiles/cmTC_50e53.dir/ -dumpbase CMakeCXXCompilerABI.cpp.cpp -dumpbase-ext .cpp -mtune=generic -march=x86-64 -version -fasynchronous-unwind-tables -fstack-protector-strong -Wformat -Wformat-security -fstack-clash-protection -fcf-protection -o /tmp/cc8sAEZe.s]
  ignore line: [GNU C++17 (Ubuntu 11.4.0-1ubuntu1~22.04) version 11.4.0 (x86_64-linux-gnu)]
  ignore line: [	compiled by GNU C version 11.4.0  GMP version 6.2.1  MPFR version 4.1.0  MPC version 1.2.1  isl version isl-0.24-GMP]
  ignore line: []
@ -144,15 +281,15 @@ Parsed CXX implicit link information from above output:
  ignore line: []
  ignore line: [GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072]
  ignore line: [Compiler executable checksum: d591828bb4d392ae8b7b160e5bb0b95f]
-  ignore line: [COLLECT_GCC_OPTIONS='-v' '-o' 'CMakeFiles/cmTC_51a9f.dir/CMakeCXXCompilerABI.cpp.o' '-c' '-shared-libgcc' '-mtune=generic' '-march=x86-64' '-dumpdir' 'CMakeFiles/cmTC_51a9f.dir/']
-  ignore line: [ as -v --64 -o CMakeFiles/cmTC_51a9f.dir/CMakeCXXCompilerABI.cpp.o /tmp/ccZejfDs.s]
+  ignore line: [COLLECT_GCC_OPTIONS='-v' '-o' 'CMakeFiles/cmTC_50e53.dir/CMakeCXXCompilerABI.cpp.o' '-c' '-shared-libgcc' '-mtune=generic' '-march=x86-64' '-dumpdir' 'CMakeFiles/cmTC_50e53.dir/']
+  ignore line: [ as -v --64 -o CMakeFiles/cmTC_50e53.dir/CMakeCXXCompilerABI.cpp.o /tmp/cc8sAEZe.s]
  ignore line: [GNU assembler version 2.38 (x86_64-linux-gnu) using BFD version (GNU Binutils for Ubuntu) 2.38]
  ignore line: [COMPILER_PATH=/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/]
  ignore line: [LIBRARY_PATH=/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../../lib/:/lib/x86_64-linux-gnu/:/lib/../lib/:/usr/lib/x86_64-linux-gnu/:/usr/lib/../lib/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../:/lib/:/usr/lib/]
-  ignore line: [COLLECT_GCC_OPTIONS='-v' '-o' 'CMakeFiles/cmTC_51a9f.dir/CMakeCXXCompilerABI.cpp.o' '-c' '-shared-libgcc' '-mtune=generic' '-march=x86-64' '-dumpdir' 'CMakeFiles/cmTC_51a9f.dir/CMakeCXXCompilerABI.cpp.']
-  ignore line: [Linking CXX executable cmTC_51a9f]
-  ignore line: [/usr/bin/cmake -E cmake_link_script CMakeFiles/cmTC_51a9f.dir/link.txt --verbose=1]
-  ignore line: [/usr/bin/c++  -v CMakeFiles/cmTC_51a9f.dir/CMakeCXXCompilerABI.cpp.o -o cmTC_51a9f ]
+  ignore line: [COLLECT_GCC_OPTIONS='-v' '-o' 'CMakeFiles/cmTC_50e53.dir/CMakeCXXCompilerABI.cpp.o' '-c' '-shared-libgcc' '-mtune=generic' '-march=x86-64' '-dumpdir' 'CMakeFiles/cmTC_50e53.dir/CMakeCXXCompilerABI.cpp.']
+  ignore line: [Linking CXX executable cmTC_50e53]
+  ignore line: [/usr/bin/cmake -E cmake_link_script CMakeFiles/cmTC_50e53.dir/link.txt --verbose=1]
+  ignore line: [/usr/bin/c++  -v CMakeFiles/cmTC_50e53.dir/CMakeCXXCompilerABI.cpp.o -o cmTC_50e53 ]
  ignore line: [Using built-in specs.]
  ignore line: [COLLECT_GCC=/usr/bin/c++]
  ignore line: [COLLECT_LTO_WRAPPER=/usr/lib/gcc/x86_64-linux-gnu/11/lto-wrapper]
@ -165,13 +302,13 @@ Parsed CXX implicit link information from above output:
  ignore line: [gcc version 11.4.0 (Ubuntu 11.4.0-1ubuntu1~22.04) ]
  ignore line: [COMPILER_PATH=/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/]
  ignore line: [LIBRARY_PATH=/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../../lib/:/lib/x86_64-linux-gnu/:/lib/../lib/:/usr/lib/x86_64-linux-gnu/:/usr/lib/../lib/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../:/lib/:/usr/lib/]
-  ignore line: [COLLECT_GCC_OPTIONS='-v' '-o' 'cmTC_51a9f' '-shared-libgcc' '-mtune=generic' '-march=x86-64' '-dumpdir' 'cmTC_51a9f.']
-  link line: [ /usr/lib/gcc/x86_64-linux-gnu/11/collect2 -plugin /usr/lib/gcc/x86_64-linux-gnu/11/liblto_plugin.so -plugin-opt=/usr/lib/gcc/x86_64-linux-gnu/11/lto-wrapper -plugin-opt=-fresolution=/tmp/ccJLlbFI.res -plugin-opt=-pass-through=-lgcc_s -plugin-opt=-pass-through=-lgcc -plugin-opt=-pass-through=-lc -plugin-opt=-pass-through=-lgcc_s -plugin-opt=-pass-through=-lgcc --build-id --eh-frame-hdr -m elf_x86_64 --hash-style=gnu --as-needed -dynamic-linker /lib64/ld-linux-x86-64.so.2 -pie -z now -z relro -o cmTC_51a9f /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/Scrt1.o /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crti.o /usr/lib/gcc/x86_64-linux-gnu/11/crtbeginS.o -L/usr/lib/gcc/x86_64-linux-gnu/11 -L/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/11/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/11/../../.. CMakeFiles/cmTC_51a9f.dir/CMakeCXXCompilerABI.cpp.o -lstdc++ -lm -lgcc_s -lgcc -lc -lgcc_s -lgcc /usr/lib/gcc/x86_64-linux-gnu/11/crtendS.o /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crtn.o]
+  ignore line: [COLLECT_GCC_OPTIONS='-v' '-o' 'cmTC_50e53' '-shared-libgcc' '-mtune=generic' '-march=x86-64' '-dumpdir' 'cmTC_50e53.']
+  link line: [ /usr/lib/gcc/x86_64-linux-gnu/11/collect2 -plugin /usr/lib/gcc/x86_64-linux-gnu/11/liblto_plugin.so -plugin-opt=/usr/lib/gcc/x86_64-linux-gnu/11/lto-wrapper -plugin-opt=-fresolution=/tmp/cctwOU9y.res -plugin-opt=-pass-through=-lgcc_s -plugin-opt=-pass-through=-lgcc -plugin-opt=-pass-through=-lc -plugin-opt=-pass-through=-lgcc_s -plugin-opt=-pass-through=-lgcc --build-id --eh-frame-hdr -m elf_x86_64 --hash-style=gnu --as-needed -dynamic-linker /lib64/ld-linux-x86-64.so.2 -pie -z now -z relro -o cmTC_50e53 /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/Scrt1.o /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crti.o /usr/lib/gcc/x86_64-linux-gnu/11/crtbeginS.o -L/usr/lib/gcc/x86_64-linux-gnu/11 -L/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/11/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/11/../../.. CMakeFiles/cmTC_50e53.dir/CMakeCXXCompilerABI.cpp.o -lstdc++ -lm -lgcc_s -lgcc -lc -lgcc_s -lgcc /usr/lib/gcc/x86_64-linux-gnu/11/crtendS.o /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crtn.o]
    arg [/usr/lib/gcc/x86_64-linux-gnu/11/collect2] ==> ignore
    arg [-plugin] ==> ignore
    arg [/usr/lib/gcc/x86_64-linux-gnu/11/liblto_plugin.so] ==> ignore
    arg [-plugin-opt=/usr/lib/gcc/x86_64-linux-gnu/11/lto-wrapper] ==> ignore
-    arg [-plugin-opt=-fresolution=/tmp/ccJLlbFI.res] ==> ignore
+    arg [-plugin-opt=-fresolution=/tmp/cctwOU9y.res] ==> ignore
    arg [-plugin-opt=-pass-through=-lgcc_s] ==> ignore
    arg [-plugin-opt=-pass-through=-lgcc] ==> ignore
    arg [-plugin-opt=-pass-through=-lc] ==> ignore
@ -189,7 +326,7 @@ Parsed CXX implicit link information from above output:
    arg [-znow] ==> ignore
    arg [-zrelro] ==> ignore
    arg [-o] ==> ignore
-    arg [cmTC_51a9f] ==> ignore
+    arg [cmTC_50e53] ==> ignore
    arg [/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/Scrt1.o] ==> obj [/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/Scrt1.o]
    arg [/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crti.o] ==> obj [/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crti.o]
    arg [/usr/lib/gcc/x86_64-linux-gnu/11/crtbeginS.o] ==> obj [/usr/lib/gcc/x86_64-linux-gnu/11/crtbeginS.o]
@ -201,7 +338,7 @@ Parsed CXX implicit link information from above output:
    arg [-L/usr/lib/x86_64-linux-gnu] ==> dir [/usr/lib/x86_64-linux-gnu]
    arg [-L/usr/lib/../lib] ==> dir [/usr/lib/../lib]
    arg [-L/usr/lib/gcc/x86_64-linux-gnu/11/../../..] ==> dir [/usr/lib/gcc/x86_64-linux-gnu/11/../../..]
-    arg [CMakeFiles/cmTC_51a9f.dir/CMakeCXXCompilerABI.cpp.o] ==> ignore
+    arg [CMakeFiles/cmTC_50e53.dir/CMakeCXXCompilerABI.cpp.o] ==> ignore
    arg [-lstdc++] ==> lib [stdc++]
    arg [-lm] ==> lib [m]
    arg [-lgcc_s] ==> lib [gcc_s]
@ -228,150 +365,13 @@ Parsed CXX implicit link information from above output:
  implicit fwks: []


-Checking whether the CUDA compiler is NVIDIA using "" matched "nvcc: NVIDIA \(R\) Cuda compiler driver":
-nvcc: NVIDIA (R) Cuda compiler driver
-Copyright (c) 2005-2021 NVIDIA Corporation
-Built on Thu_Nov_18_09:45:30_PST_2021
-Cuda compilation tools, release 11.5, V11.5.119
-Build cuda_11.5.r11.5/compiler.30672275_0
-Compiling the CUDA compiler identification source file "CMakeCUDACompilerId.cu" succeeded.
-Compiler: /usr/bin/nvcc 
-Build flags: 
-Id flags: --keep;--keep-dir;tmp -v
-
-The output was:
-0
-#$ _NVVM_BRANCH_=nvvm
-#$ _SPACE_= 
-#$ _CUDART_=cudart
-#$ _HERE_=/usr/lib/nvidia-cuda-toolkit/bin
-#$ _THERE_=/usr/lib/nvidia-cuda-toolkit/bin
-#$ _TARGET_SIZE_=
-#$ _TARGET_DIR_=
-#$ _TARGET_SIZE_=64
-#$ NVVMIR_LIBRARY_DIR=/usr/lib/nvidia-cuda-toolkit/libdevice
-#$ PATH=/usr/lib/nvidia-cuda-toolkit/bin:/usr/lib/cuda/bin:/home/mht/anaconda3/bin:/home/mht/anaconda3/condabin:/tmp/.mount_cursorUdwtnC/usr/bin:/tmp/.mount_cursorUdwtnC/usr/sbin:/tmp/.mount_cursorUdwtnC/usr/games:/tmp/.mount_cursorUdwtnC/bin:/tmp/.mount_cursorUdwtnC/sbin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin:/snap/bin:/home/mht/.local/share/JetBrains/Toolbox/scripts:/home/mht/.local/share/JetBrains/Toolbox/scripts:/home/mht/.local/share/JetBrains/Toolbox/scripts
-#$ LIBRARIES=  -L/usr/lib/x86_64-linux-gnu/stubs -L/usr/lib/x86_64-linux-gnu
-#$ rm tmp/a_dlink.reg.c
-#$ gcc -D__CUDA_ARCH__=520 -D__CUDA_ARCH_LIST__=520 -E -x c++  -DCUDA_DOUBLE_MATH_FUNCTIONS -D__CUDACC__ -D__NVCC__   -D__CUDACC_VER_MAJOR__=11 -D__CUDACC_VER_MINOR__=5 -D__CUDACC_VER_BUILD__=119 -D__CUDA_API_VER_MAJOR__=11 -D__CUDA_API_VER_MINOR__=5 -D__NVCC_DIAG_PRAGMA_SUPPORT__=1 -include "cuda_runtime.h" -m64 "CMakeCUDACompilerId.cu" -o "tmp/CMakeCUDACompilerId.cpp1.ii" 
-#$ cicc --c++17 --gnu_version=110400 --display_error_number --orig_src_file_name "CMakeCUDACompilerId.cu" --orig_src_path_name "/media/mht/ADATA/repos/cpp_tracker/build/CMakeFiles/3.22.1/CompilerIdCUDA/CMakeCUDACompilerId.cu" --allow_managed  -arch compute_52 -m64 --no-version-ident -ftz=0 -prec_div=1 -prec_sqrt=1 -fmad=1 --include_file_name "CMakeCUDACompilerId.fatbin.c" -tused --gen_module_id_file --module_id_file_name "tmp/CMakeCUDACompilerId.module_id" --gen_c_file_name "tmp/CMakeCUDACompilerId.cudafe1.c" --stub_file_name "tmp/CMakeCUDACompilerId.cudafe1.stub.c" --gen_device_file_name "tmp/CMakeCUDACompilerId.cudafe1.gpu"  "tmp/CMakeCUDACompilerId.cpp1.ii" -o "tmp/CMakeCUDACompilerId.ptx"
-#$ ptxas -arch=sm_52 -m64 "tmp/CMakeCUDACompilerId.ptx"  -o "tmp/CMakeCUDACompilerId.sm_52.cubin" 
-#$ fatbinary --create="tmp/CMakeCUDACompilerId.fatbin" -64 --cicc-cmdline="-ftz=0 -prec_div=1 -prec_sqrt=1 -fmad=1 " "--image3=kind=elf,sm=52,file=tmp/CMakeCUDACompilerId.sm_52.cubin" "--image3=kind=ptx,sm=52,file=tmp/CMakeCUDACompilerId.ptx" --embedded-fatbin="tmp/CMakeCUDACompilerId.fatbin.c" 
-#$ gcc -D__CUDA_ARCH_LIST__=520 -E -x c++ -D__CUDACC__ -D__NVCC__   -D__CUDACC_VER_MAJOR__=11 -D__CUDACC_VER_MINOR__=5 -D__CUDACC_VER_BUILD__=119 -D__CUDA_API_VER_MAJOR__=11 -D__CUDA_API_VER_MINOR__=5 -D__NVCC_DIAG_PRAGMA_SUPPORT__=1 -include "cuda_runtime.h" -m64 "CMakeCUDACompilerId.cu" -o "tmp/CMakeCUDACompilerId.cpp4.ii" 
-#$ cudafe++ --c++17 --gnu_version=110400 --display_error_number --orig_src_file_name "CMakeCUDACompilerId.cu" --orig_src_path_name "/media/mht/ADATA/repos/cpp_tracker/build/CMakeFiles/3.22.1/CompilerIdCUDA/CMakeCUDACompilerId.cu" --allow_managed --m64 --parse_templates --gen_c_file_name "tmp/CMakeCUDACompilerId.cudafe1.cpp" --stub_file_name "CMakeCUDACompilerId.cudafe1.stub.c" --module_id_file_name "tmp/CMakeCUDACompilerId.module_id" "tmp/CMakeCUDACompilerId.cpp4.ii" 
-#$ gcc -D__CUDA_ARCH__=520 -D__CUDA_ARCH_LIST__=520 -c -x c++  -DCUDA_DOUBLE_MATH_FUNCTIONS -m64 "tmp/CMakeCUDACompilerId.cudafe1.cpp" -o "tmp/CMakeCUDACompilerId.o" 
-#$ nvlink -m64 --arch=sm_52 --register-link-binaries="tmp/a_dlink.reg.c"    -L/usr/lib/x86_64-linux-gnu/stubs -L/usr/lib/x86_64-linux-gnu -cpu-arch=X86_64 "tmp/CMakeCUDACompilerId.o"  -lcudadevrt  -o "tmp/a_dlink.sm_52.cubin"
-#$ fatbinary --create="tmp/a_dlink.fatbin" -64 --cicc-cmdline="-ftz=0 -prec_div=1 -prec_sqrt=1 -fmad=1 " -link "--image3=kind=elf,sm=52,file=tmp/a_dlink.sm_52.cubin" --embedded-fatbin="tmp/a_dlink.fatbin.c" 
-#$ gcc -D__CUDA_ARCH_LIST__=520 -c -x c++ -DFATBINFILE="\"tmp/a_dlink.fatbin.c\"" -DREGISTERLINKBINARYFILE="\"tmp/a_dlink.reg.c\"" -I. -D__NV_EXTRA_INITIALIZATION= -D__NV_EXTRA_FINALIZATION= -D__CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__   -D__CUDACC_VER_MAJOR__=11 -D__CUDACC_VER_MINOR__=5 -D__CUDACC_VER_BUILD__=119 -D__CUDA_API_VER_MAJOR__=11 -D__CUDA_API_VER_MINOR__=5 -D__NVCC_DIAG_PRAGMA_SUPPORT__=1 -m64 "/usr/lib/nvidia-cuda-toolkit/bin/crt/link.stub" -o "tmp/a_dlink.o" 
-#$ g++ -D__CUDA_ARCH_LIST__=520 -m64 -Wl,--start-group "tmp/a_dlink.o" "tmp/CMakeCUDACompilerId.o"   -L/usr/lib/x86_64-linux-gnu/stubs -L/usr/lib/x86_64-linux-gnu  -lcudadevrt  -lcudart_static  -lrt -lpthread  -ldl  -Wl,--end-group -o "a.out" 
-
-
-Compilation of the CUDA compiler identification source "CMakeCUDACompilerId.cu" produced "a.out"
-
-The CUDA compiler identification is NVIDIA, found in "/media/mht/ADATA/repos/cpp_tracker/build/CMakeFiles/3.22.1/CompilerIdCUDA/a.out"
-
-Parsed CUDA nvcc implicit link information from above output:
-  found 'PATH=' string: [/usr/lib/nvidia-cuda-toolkit/bin:/usr/lib/cuda/bin:/home/mht/anaconda3/bin:/home/mht/anaconda3/condabin:/tmp/.mount_cursorUdwtnC/usr/bin:/tmp/.mount_cursorUdwtnC/usr/sbin:/tmp/.mount_cursorUdwtnC/usr/games:/tmp/.mount_cursorUdwtnC/bin:/tmp/.mount_cursorUdwtnC/sbin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin:/snap/bin:/home/mht/.local/share/JetBrains/Toolbox/scripts:/home/mht/.local/share/JetBrains/Toolbox/scripts:/home/mht/.local/share/JetBrains/Toolbox/scripts]
-  found 'LIBRARIES=' string: [-L/usr/lib/x86_64-linux-gnu/stubs -L/usr/lib/x86_64-linux-gnu]
-  considering line: [#$ rm tmp/a_dlink.reg.c]
-  considering line: [gcc -D__CUDA_ARCH__=520 -D__CUDA_ARCH_LIST__=520 -E -x c++  -DCUDA_DOUBLE_MATH_FUNCTIONS -D__CUDACC__ -D__NVCC__   -D__CUDACC_VER_MAJOR__=11 -D__CUDACC_VER_MINOR__=5 -D__CUDACC_VER_BUILD__=119 -D__CUDA_API_VER_MAJOR__=11 -D__CUDA_API_VER_MINOR__=5 -D__NVCC_DIAG_PRAGMA_SUPPORT__=1 -include "cuda_runtime.h" -m64 "CMakeCUDACompilerId.cu" -o "tmp/CMakeCUDACompilerId.cpp1.ii" ]
-  considering line: [cicc --c++17 --gnu_version=110400 --display_error_number --orig_src_file_name "CMakeCUDACompilerId.cu" --orig_src_path_name "/media/mht/ADATA/repos/cpp_tracker/build/CMakeFiles/3.22.1/CompilerIdCUDA/CMakeCUDACompilerId.cu" --allow_managed  -arch compute_52 -m64 --no-version-ident -ftz=0 -prec_div=1 -prec_sqrt=1 -fmad=1 --include_file_name "CMakeCUDACompilerId.fatbin.c" -tused --gen_module_id_file --module_id_file_name "tmp/CMakeCUDACompilerId.module_id" --gen_c_file_name "tmp/CMakeCUDACompilerId.cudafe1.c" --stub_file_name "tmp/CMakeCUDACompilerId.cudafe1.stub.c" --gen_device_file_name "tmp/CMakeCUDACompilerId.cudafe1.gpu"  "tmp/CMakeCUDACompilerId.cpp1.ii" -o "tmp/CMakeCUDACompilerId.ptx"]
-  considering line: [ptxas -arch=sm_52 -m64 "tmp/CMakeCUDACompilerId.ptx"  -o "tmp/CMakeCUDACompilerId.sm_52.cubin" ]
-  considering line: [fatbinary --create="tmp/CMakeCUDACompilerId.fatbin" -64 --cicc-cmdline="-ftz=0 -prec_div=1 -prec_sqrt=1 -fmad=1 " "--image3=kind=elf,sm=52,file=tmp/CMakeCUDACompilerId.sm_52.cubin" "--image3=kind=ptx,sm=52,file=tmp/CMakeCUDACompilerId.ptx" --embedded-fatbin="tmp/CMakeCUDACompilerId.fatbin.c" ]
-  considering line: [gcc -D__CUDA_ARCH_LIST__=520 -E -x c++ -D__CUDACC__ -D__NVCC__   -D__CUDACC_VER_MAJOR__=11 -D__CUDACC_VER_MINOR__=5 -D__CUDACC_VER_BUILD__=119 -D__CUDA_API_VER_MAJOR__=11 -D__CUDA_API_VER_MINOR__=5 -D__NVCC_DIAG_PRAGMA_SUPPORT__=1 -include "cuda_runtime.h" -m64 "CMakeCUDACompilerId.cu" -o "tmp/CMakeCUDACompilerId.cpp4.ii" ]
-  considering line: [cudafe++ --c++17 --gnu_version=110400 --display_error_number --orig_src_file_name "CMakeCUDACompilerId.cu" --orig_src_path_name "/media/mht/ADATA/repos/cpp_tracker/build/CMakeFiles/3.22.1/CompilerIdCUDA/CMakeCUDACompilerId.cu" --allow_managed --m64 --parse_templates --gen_c_file_name "tmp/CMakeCUDACompilerId.cudafe1.cpp" --stub_file_name "CMakeCUDACompilerId.cudafe1.stub.c" --module_id_file_name "tmp/CMakeCUDACompilerId.module_id" "tmp/CMakeCUDACompilerId.cpp4.ii" ]
-  considering line: [gcc -D__CUDA_ARCH__=520 -D__CUDA_ARCH_LIST__=520 -c -x c++  -DCUDA_DOUBLE_MATH_FUNCTIONS -m64 "tmp/CMakeCUDACompilerId.cudafe1.cpp" -o "tmp/CMakeCUDACompilerId.o" ]
-  considering line: [nvlink -m64 --arch=sm_52 --register-link-binaries="tmp/a_dlink.reg.c"    -L/usr/lib/x86_64-linux-gnu/stubs -L/usr/lib/x86_64-linux-gnu -cpu-arch=X86_64 "tmp/CMakeCUDACompilerId.o"  -lcudadevrt  -o "tmp/a_dlink.sm_52.cubin"]
-    ignoring nvlink line
-  considering line: [fatbinary --create="tmp/a_dlink.fatbin" -64 --cicc-cmdline="-ftz=0 -prec_div=1 -prec_sqrt=1 -fmad=1 " -link "--image3=kind=elf,sm=52,file=tmp/a_dlink.sm_52.cubin" --embedded-fatbin="tmp/a_dlink.fatbin.c" ]
-  considering line: [gcc -D__CUDA_ARCH_LIST__=520 -c -x c++ -DFATBINFILE="\"tmp/a_dlink.fatbin.c\"" -DREGISTERLINKBINARYFILE="\"tmp/a_dlink.reg.c\"" -I. -D__NV_EXTRA_INITIALIZATION= -D__NV_EXTRA_FINALIZATION= -D__CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__   -D__CUDACC_VER_MAJOR__=11 -D__CUDACC_VER_MINOR__=5 -D__CUDACC_VER_BUILD__=119 -D__CUDA_API_VER_MAJOR__=11 -D__CUDA_API_VER_MINOR__=5 -D__NVCC_DIAG_PRAGMA_SUPPORT__=1 -m64 "/usr/lib/nvidia-cuda-toolkit/bin/crt/link.stub" -o "tmp/a_dlink.o" ]
-  considering line: [g++ -D__CUDA_ARCH_LIST__=520 -m64 -Wl,--start-group "tmp/a_dlink.o" "tmp/CMakeCUDACompilerId.o"   -L/usr/lib/x86_64-linux-gnu/stubs -L/usr/lib/x86_64-linux-gnu  -lcudadevrt  -lcudart_static  -lrt -lpthread  -ldl  -Wl,--end-group -o "a.out" ]
-    extracted link line: [g++ -D__CUDA_ARCH_LIST__=520 -m64 -Wl,--start-group "tmp/a_dlink.o" "tmp/CMakeCUDACompilerId.o"   -L/usr/lib/x86_64-linux-gnu/stubs -L/usr/lib/x86_64-linux-gnu  -lcudadevrt  -lcudart_static  -lrt -lpthread  -ldl  -Wl,--end-group -o "a.out" ]
-  considering line: []
-  extracted link launcher name: [g++]
-  found link launcher absolute path: [/usr/lib/nvidia-cuda-toolkit/bin/g++]
-
-  link line regex: [^( *|.*[/\])(ld|CMAKE_LINK_STARTFILE-NOTFOUND|([^/\]+-)?ld|collect2)[^/\]*( |$)]
-  link line: [cuda-fake-ld g++ -D__CUDA_ARCH_LIST__=520 -m64 -Wl,--start-group "tmp/a_dlink.o" "tmp/CMakeCUDACompilerId.o"   -L/usr/lib/x86_64-linux-gnu/stubs -L/usr/lib/x86_64-linux-gnu  -lcudadevrt  -lcudart_static  -lrt -lpthread  -ldl  -Wl,--end-group -o "a.out" ]
-    arg [cuda-fake-ld] ==> ignore
-    arg [g++] ==> ignore
-    arg [-D__CUDA_ARCH_LIST__=520] ==> ignore
-    arg [-m64] ==> ignore
-    arg [-Wl,--start-group] ==> ignore
-    arg [tmp/a_dlink.o] ==> ignore
-    arg [tmp/CMakeCUDACompilerId.o] ==> ignore
-    arg [-L/usr/lib/x86_64-linux-gnu/stubs] ==> dir [/usr/lib/x86_64-linux-gnu/stubs]
-    arg [-L/usr/lib/x86_64-linux-gnu] ==> dir [/usr/lib/x86_64-linux-gnu]
-    arg [-lcudadevrt] ==> lib [cudadevrt]
-    arg [-lcudart_static] ==> lib [cudart_static]
-    arg [-lrt] ==> lib [rt]
-    arg [-lpthread] ==> lib [pthread]
-    arg [-ldl] ==> lib [dl]
-    arg [-Wl,--end-group] ==> ignore
-    arg [-o] ==> ignore
-    arg [a.out] ==> ignore
-  collapse library dir [/usr/lib/x86_64-linux-gnu/stubs] ==> [/usr/lib/x86_64-linux-gnu/stubs]
-  collapse library dir [/usr/lib/x86_64-linux-gnu] ==> [/usr/lib/x86_64-linux-gnu]
-  implicit libs: [cudadevrt;cudart_static;rt;pthread;dl]
-  implicit objs: []
-  implicit dirs: [/usr/lib/x86_64-linux-gnu/stubs;/usr/lib/x86_64-linux-gnu]
-  implicit fwks: []
-
-
-Failed to detect CUDA nvcc include information:
-  found 'PATH=' string: [/usr/lib/nvidia-cuda-toolkit/bin:/usr/lib/cuda/bin:/home/mht/anaconda3/bin:/home/mht/anaconda3/condabin:/tmp/.mount_cursorUdwtnC/usr/bin:/tmp/.mount_cursorUdwtnC/usr/sbin:/tmp/.mount_cursorUdwtnC/usr/games:/tmp/.mount_cursorUdwtnC/bin:/tmp/.mount_cursorUdwtnC/sbin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin:/snap/bin:/home/mht/.local/share/JetBrains/Toolbox/scripts:/home/mht/.local/share/JetBrains/Toolbox/scripts:/home/mht/.local/share/JetBrains/Toolbox/scripts]
-  found 'LIBRARIES=' string: [-L/usr/lib/x86_64-linux-gnu/stubs -L/usr/lib/x86_64-linux-gnu]
-  considering line: [#$ rm tmp/a_dlink.reg.c]
-  considering line: [gcc -D__CUDA_ARCH__=520 -D__CUDA_ARCH_LIST__=520 -E -x c++  -DCUDA_DOUBLE_MATH_FUNCTIONS -D__CUDACC__ -D__NVCC__   -D__CUDACC_VER_MAJOR__=11 -D__CUDACC_VER_MINOR__=5 -D__CUDACC_VER_BUILD__=119 -D__CUDA_API_VER_MAJOR__=11 -D__CUDA_API_VER_MINOR__=5 -D__NVCC_DIAG_PRAGMA_SUPPORT__=1 -include "cuda_runtime.h" -m64 "CMakeCUDACompilerId.cu" -o "tmp/CMakeCUDACompilerId.cpp1.ii" ]
-  considering line: [cicc --c++17 --gnu_version=110400 --display_error_number --orig_src_file_name "CMakeCUDACompilerId.cu" --orig_src_path_name "/media/mht/ADATA/repos/cpp_tracker/build/CMakeFiles/3.22.1/CompilerIdCUDA/CMakeCUDACompilerId.cu" --allow_managed  -arch compute_52 -m64 --no-version-ident -ftz=0 -prec_div=1 -prec_sqrt=1 -fmad=1 --include_file_name "CMakeCUDACompilerId.fatbin.c" -tused --gen_module_id_file --module_id_file_name "tmp/CMakeCUDACompilerId.module_id" --gen_c_file_name "tmp/CMakeCUDACompilerId.cudafe1.c" --stub_file_name "tmp/CMakeCUDACompilerId.cudafe1.stub.c" --gen_device_file_name "tmp/CMakeCUDACompilerId.cudafe1.gpu"  "tmp/CMakeCUDACompilerId.cpp1.ii" -o "tmp/CMakeCUDACompilerId.ptx"]
-  considering line: [ptxas -arch=sm_52 -m64 "tmp/CMakeCUDACompilerId.ptx"  -o "tmp/CMakeCUDACompilerId.sm_52.cubin" ]
-  considering line: [fatbinary --create="tmp/CMakeCUDACompilerId.fatbin" -64 --cicc-cmdline="-ftz=0 -prec_div=1 -prec_sqrt=1 -fmad=1 " "--image3=kind=elf,sm=52,file=tmp/CMakeCUDACompilerId.sm_52.cubin" "--image3=kind=ptx,sm=52,file=tmp/CMakeCUDACompilerId.ptx" --embedded-fatbin="tmp/CMakeCUDACompilerId.fatbin.c" ]
-  considering line: [gcc -D__CUDA_ARCH_LIST__=520 -E -x c++ -D__CUDACC__ -D__NVCC__   -D__CUDACC_VER_MAJOR__=11 -D__CUDACC_VER_MINOR__=5 -D__CUDACC_VER_BUILD__=119 -D__CUDA_API_VER_MAJOR__=11 -D__CUDA_API_VER_MINOR__=5 -D__NVCC_DIAG_PRAGMA_SUPPORT__=1 -include "cuda_runtime.h" -m64 "CMakeCUDACompilerId.cu" -o "tmp/CMakeCUDACompilerId.cpp4.ii" ]
-  considering line: [cudafe++ --c++17 --gnu_version=110400 --display_error_number --orig_src_file_name "CMakeCUDACompilerId.cu" --orig_src_path_name "/media/mht/ADATA/repos/cpp_tracker/build/CMakeFiles/3.22.1/CompilerIdCUDA/CMakeCUDACompilerId.cu" --allow_managed --m64 --parse_templates --gen_c_file_name "tmp/CMakeCUDACompilerId.cudafe1.cpp" --stub_file_name "CMakeCUDACompilerId.cudafe1.stub.c" --module_id_file_name "tmp/CMakeCUDACompilerId.module_id" "tmp/CMakeCUDACompilerId.cpp4.ii" ]
-  considering line: [gcc -D__CUDA_ARCH__=520 -D__CUDA_ARCH_LIST__=520 -c -x c++  -DCUDA_DOUBLE_MATH_FUNCTIONS -m64 "tmp/CMakeCUDACompilerId.cudafe1.cpp" -o "tmp/CMakeCUDACompilerId.o" ]
-  considering line: [nvlink -m64 --arch=sm_52 --register-link-binaries="tmp/a_dlink.reg.c"    -L/usr/lib/x86_64-linux-gnu/stubs -L/usr/lib/x86_64-linux-gnu -cpu-arch=X86_64 "tmp/CMakeCUDACompilerId.o"  -lcudadevrt  -o "tmp/a_dlink.sm_52.cubin"]
-    ignoring nvlink line
-  considering line: [fatbinary --create="tmp/a_dlink.fatbin" -64 --cicc-cmdline="-ftz=0 -prec_div=1 -prec_sqrt=1 -fmad=1 " -link "--image3=kind=elf,sm=52,file=tmp/a_dlink.sm_52.cubin" --embedded-fatbin="tmp/a_dlink.fatbin.c" ]
-  considering line: [gcc -D__CUDA_ARCH_LIST__=520 -c -x c++ -DFATBINFILE="\"tmp/a_dlink.fatbin.c\"" -DREGISTERLINKBINARYFILE="\"tmp/a_dlink.reg.c\"" -I. -D__NV_EXTRA_INITIALIZATION= -D__NV_EXTRA_FINALIZATION= -D__CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__   -D__CUDACC_VER_MAJOR__=11 -D__CUDACC_VER_MINOR__=5 -D__CUDACC_VER_BUILD__=119 -D__CUDA_API_VER_MAJOR__=11 -D__CUDA_API_VER_MINOR__=5 -D__NVCC_DIAG_PRAGMA_SUPPORT__=1 -m64 "/usr/lib/nvidia-cuda-toolkit/bin/crt/link.stub" -o "tmp/a_dlink.o" ]
-  considering line: [g++ -D__CUDA_ARCH_LIST__=520 -m64 -Wl,--start-group "tmp/a_dlink.o" "tmp/CMakeCUDACompilerId.o"   -L/usr/lib/x86_64-linux-gnu/stubs -L/usr/lib/x86_64-linux-gnu  -lcudadevrt  -lcudart_static  -lrt -lpthread  -ldl  -Wl,--end-group -o "a.out" ]
-    extracted link line: [g++ -D__CUDA_ARCH_LIST__=520 -m64 -Wl,--start-group "tmp/a_dlink.o" "tmp/CMakeCUDACompilerId.o"   -L/usr/lib/x86_64-linux-gnu/stubs -L/usr/lib/x86_64-linux-gnu  -lcudadevrt  -lcudart_static  -lrt -lpthread  -ldl  -Wl,--end-group -o "a.out" ]
-  considering line: []
-  extracted link launcher name: [g++]
-  found link launcher absolute path: [/usr/lib/nvidia-cuda-toolkit/bin/g++]
-  no 'INCLUDES=' string found in nvcc output:
-    #$ _NVVM_BRANCH_=nvvm
-    #$ _SPACE_= 
-    #$ _CUDART_=cudart
-    #$ _HERE_=/usr/lib/nvidia-cuda-toolkit/bin
-    #$ _THERE_=/usr/lib/nvidia-cuda-toolkit/bin
-    #$ _TARGET_SIZE_=
-    #$ _TARGET_DIR_=
-    #$ _TARGET_SIZE_=64
-    #$ NVVMIR_LIBRARY_DIR=/usr/lib/nvidia-cuda-toolkit/libdevice
-    #$ PATH=/usr/lib/nvidia-cuda-toolkit/bin:/usr/lib/cuda/bin:/home/mht/anaconda3/bin:/home/mht/anaconda3/condabin:/tmp/.mount_cursorUdwtnC/usr/bin:/tmp/.mount_cursorUdwtnC/usr/sbin:/tmp/.mount_cursorUdwtnC/usr/games:/tmp/.mount_cursorUdwtnC/bin:/tmp/.mount_cursorUdwtnC/sbin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin:/snap/bin:/home/mht/.local/share/JetBrains/Toolbox/scripts:/home/mht/.local/share/JetBrains/Toolbox/scripts:/home/mht/.local/share/JetBrains/Toolbox/scripts
-    #$ LIBRARIES=  -L/usr/lib/x86_64-linux-gnu/stubs -L/usr/lib/x86_64-linux-gnu
-    #$ rm tmp/a_dlink.reg.c
-    #$ gcc -D__CUDA_ARCH__=520 -D__CUDA_ARCH_LIST__=520 -E -x c++  -DCUDA_DOUBLE_MATH_FUNCTIONS -D__CUDACC__ -D__NVCC__   -D__CUDACC_VER_MAJOR__=11 -D__CUDACC_VER_MINOR__=5 -D__CUDACC_VER_BUILD__=119 -D__CUDA_API_VER_MAJOR__=11 -D__CUDA_API_VER_MINOR__=5 -D__NVCC_DIAG_PRAGMA_SUPPORT__=1 -include "cuda_runtime.h" -m64 "CMakeCUDACompilerId.cu" -o "tmp/CMakeCUDACompilerId.cpp1.ii" 
-    #$ cicc --c++17 --gnu_version=110400 --display_error_number --orig_src_file_name "CMakeCUDACompilerId.cu" --orig_src_path_name "/media/mht/ADATA/repos/cpp_tracker/build/CMakeFiles/3.22.1/CompilerIdCUDA/CMakeCUDACompilerId.cu" --allow_managed  -arch compute_52 -m64 --no-version-ident -ftz=0 -prec_div=1 -prec_sqrt=1 -fmad=1 --include_file_name "CMakeCUDACompilerId.fatbin.c" -tused --gen_module_id_file --module_id_file_name "tmp/CMakeCUDACompilerId.module_id" --gen_c_file_name "tmp/CMakeCUDACompilerId.cudafe1.c" --stub_file_name "tmp/CMakeCUDACompilerId.cudafe1.stub.c" --gen_device_file_name "tmp/CMakeCUDACompilerId.cudafe1.gpu"  "tmp/CMakeCUDACompilerId.cpp1.ii" -o "tmp/CMakeCUDACompilerId.ptx"
-    #$ ptxas -arch=sm_52 -m64 "tmp/CMakeCUDACompilerId.ptx"  -o "tmp/CMakeCUDACompilerId.sm_52.cubin" 
-    #$ fatbinary --create="tmp/CMakeCUDACompilerId.fatbin" -64 --cicc-cmdline="-ftz=0 -prec_div=1 -prec_sqrt=1 -fmad=1 " "--image3=kind=elf,sm=52,file=tmp/CMakeCUDACompilerId.sm_52.cubin" "--image3=kind=ptx,sm=52,file=tmp/CMakeCUDACompilerId.ptx" --embedded-fatbin="tmp/CMakeCUDACompilerId.fatbin.c" 
-    #$ gcc -D__CUDA_ARCH_LIST__=520 -E -x c++ -D__CUDACC__ -D__NVCC__   -D__CUDACC_VER_MAJOR__=11 -D__CUDACC_VER_MINOR__=5 -D__CUDACC_VER_BUILD__=119 -D__CUDA_API_VER_MAJOR__=11 -D__CUDA_API_VER_MINOR__=5 -D__NVCC_DIAG_PRAGMA_SUPPORT__=1 -include "cuda_runtime.h" -m64 "CMakeCUDACompilerId.cu" -o "tmp/CMakeCUDACompilerId.cpp4.ii" 
-    #$ cudafe++ --c++17 --gnu_version=110400 --display_error_number --orig_src_file_name "CMakeCUDACompilerId.cu" --orig_src_path_name "/media/mht/ADATA/repos/cpp_tracker/build/CMakeFiles/3.22.1/CompilerIdCUDA/CMakeCUDACompilerId.cu" --allow_managed --m64 --parse_templates --gen_c_file_name "tmp/CMakeCUDACompilerId.cudafe1.cpp" --stub_file_name "CMakeCUDACompilerId.cudafe1.stub.c" --module_id_file_name "tmp/CMakeCUDACompilerId.module_id" "tmp/CMakeCUDACompilerId.cpp4.ii" 
-    #$ gcc -D__CUDA_ARCH__=520 -D__CUDA_ARCH_LIST__=520 -c -x c++  -DCUDA_DOUBLE_MATH_FUNCTIONS -m64 "tmp/CMakeCUDACompilerId.cudafe1.cpp" -o "tmp/CMakeCUDACompilerId.o" 
-    #$ nvlink -m64 --arch=sm_52 --register-link-binaries="tmp/a_dlink.reg.c"    -L/usr/lib/x86_64-linux-gnu/stubs -L/usr/lib/x86_64-linux-gnu -cpu-arch=X86_64 "tmp/CMakeCUDACompilerId.o"  -lcudadevrt  -o "tmp/a_dlink.sm_52.cubin"
-    #$ fatbinary --create="tmp/a_dlink.fatbin" -64 --cicc-cmdline="-ftz=0 -prec_div=1 -prec_sqrt=1 -fmad=1 " -link "--image3=kind=elf,sm=52,file=tmp/a_dlink.sm_52.cubin" --embedded-fatbin="tmp/a_dlink.fatbin.c" 
-    #$ gcc -D__CUDA_ARCH_LIST__=520 -c -x c++ -DFATBINFILE="\"tmp/a_dlink.fatbin.c\"" -DREGISTERLINKBINARYFILE="\"tmp/a_dlink.reg.c\"" -I. -D__NV_EXTRA_INITIALIZATION= -D__NV_EXTRA_FINALIZATION= -D__CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__   -D__CUDACC_VER_MAJOR__=11 -D__CUDACC_VER_MINOR__=5 -D__CUDACC_VER_BUILD__=119 -D__CUDA_API_VER_MAJOR__=11 -D__CUDA_API_VER_MINOR__=5 -D__NVCC_DIAG_PRAGMA_SUPPORT__=1 -m64 "/usr/lib/nvidia-cuda-toolkit/bin/crt/link.stub" -o "tmp/a_dlink.o" 
-    #$ g++ -D__CUDA_ARCH_LIST__=520 -m64 -Wl,--start-group "tmp/a_dlink.o" "tmp/CMakeCUDACompilerId.o"   -L/usr/lib/x86_64-linux-gnu/stubs -L/usr/lib/x86_64-linux-gnu  -lcudadevrt  -lcudart_static  -lrt -lpthread  -ldl  -Wl,--end-group -o "a.out" 
-    
-
-
 Detecting CUDA compiler ABI info compiled with the following output:
 Change Dir: /media/mht/ADATA/repos/cpp_tracker/build/CMakeFiles/CMakeTmp

-Run Build Command(s):/usr/bin/gmake -f Makefile cmTC_c6bab/fast && /usr/bin/gmake  -f CMakeFiles/cmTC_c6bab.dir/build.make CMakeFiles/cmTC_c6bab.dir/build
+Run Build Command(s):/usr/bin/gmake -f Makefile cmTC_4b2ab/fast && /usr/bin/gmake  -f CMakeFiles/cmTC_4b2ab.dir/build.make CMakeFiles/cmTC_4b2ab.dir/build
 gmake[1]: Entering directory '/media/mht/ADATA/repos/cpp_tracker/build/CMakeFiles/CMakeTmp'
-Building CUDA object CMakeFiles/cmTC_c6bab.dir/CMakeCUDACompilerABI.cu.o
-/usr/bin/nvcc -forward-unknown-to-host-compiler   --generate-code=arch=compute_52,code=[compute_52,sm_52]   -Xcompiler=-v -MD -MT CMakeFiles/cmTC_c6bab.dir/CMakeCUDACompilerABI.cu.o -MF CMakeFiles/cmTC_c6bab.dir/CMakeCUDACompilerABI.cu.o.d -x cu -c /usr/share/cmake-3.22/Modules/CMakeCUDACompilerABI.cu -o CMakeFiles/cmTC_c6bab.dir/CMakeCUDACompilerABI.cu.o
+Building CUDA object CMakeFiles/cmTC_4b2ab.dir/CMakeCUDACompilerABI.cu.o
+/usr/bin/nvcc -forward-unknown-to-host-compiler   --generate-code=arch=compute_52,code=[compute_52,sm_52]   -Xcompiler=-v -MD -MT CMakeFiles/cmTC_4b2ab.dir/CMakeCUDACompilerABI.cu.o -MF CMakeFiles/cmTC_4b2ab.dir/CMakeCUDACompilerABI.cu.o.d -x cu -c /usr/share/cmake-3.22/Modules/CMakeCUDACompilerABI.cu -o CMakeFiles/cmTC_4b2ab.dir/CMakeCUDACompilerABI.cu.o
 Using built-in specs.
 COLLECT_GCC=gcc-11
 OFFLOAD_TARGET_NAMES=nvptx-none:amdgcn-amdhsa
@ -381,8 +381,8 @@ Configured with: ../src/configure -v --with-pkgversion='Ubuntu 11.4.0-1ubuntu1~2
 Thread model: posix
 Supported LTO compression algorithms: zlib zstd
 gcc version 11.4.0 (Ubuntu 11.4.0-1ubuntu1~22.04) 
-COLLECT_GCC_OPTIONS='-D' '__CUDA_ARCH__=520' '-D' '__CUDA_ARCH_LIST__=520' '-E' '-D' 'CUDA_DOUBLE_MATH_FUNCTIONS' '-D' '__CUDACC__' '-D' '__NVCC__' '-v' '-D' '__CUDACC_VER_MAJOR__=11' '-D' '__CUDACC_VER_MINOR__=5' '-D' '__CUDACC_VER_BUILD__=119' '-D' '__CUDA_API_VER_MAJOR__=11' '-D' '__CUDA_API_VER_MINOR__=5' '-D' '__NVCC_DIAG_PRAGMA_SUPPORT__=1' '-include' 'cuda_runtime.h' '-m64' '-o' '/tmp/tmpxft_0000dbca_00000000-7_CMakeCUDACompilerABI.cpp1.ii' '-mtune=generic' '-march=x86-64' '-dumpdir' '/tmp/'
- /usr/lib/gcc/x86_64-linux-gnu/11/cc1plus -E -quiet -v -imultiarch x86_64-linux-gnu -D_GNU_SOURCE -D __CUDA_ARCH__=520 -D __CUDA_ARCH_LIST__=520 -D CUDA_DOUBLE_MATH_FUNCTIONS -D __CUDACC__ -D __NVCC__ -D __CUDACC_VER_MAJOR__=11 -D __CUDACC_VER_MINOR__=5 -D __CUDACC_VER_BUILD__=119 -D __CUDA_API_VER_MAJOR__=11 -D __CUDA_API_VER_MINOR__=5 -D __NVCC_DIAG_PRAGMA_SUPPORT__=1 -include cuda_runtime.h /usr/share/cmake-3.22/Modules/CMakeCUDACompilerABI.cu -o /tmp/tmpxft_0000dbca_00000000-7_CMakeCUDACompilerABI.cpp1.ii -m64 -mtune=generic -march=x86-64 -fasynchronous-unwind-tables -fstack-protector-strong -Wformat -Wformat-security -fstack-clash-protection -fcf-protection -dumpdir /tmp/ -dumpbase tmpxft_0000dbca_00000000-7_CMakeCUDACompilerABI.cpp1.cu -dumpbase-ext .cu
+COLLECT_GCC_OPTIONS='-D' '__CUDA_ARCH__=520' '-D' '__CUDA_ARCH_LIST__=520' '-E' '-D' 'CUDA_DOUBLE_MATH_FUNCTIONS' '-D' '__CUDACC__' '-D' '__NVCC__' '-v' '-D' '__CUDACC_VER_MAJOR__=11' '-D' '__CUDACC_VER_MINOR__=5' '-D' '__CUDACC_VER_BUILD__=119' '-D' '__CUDA_API_VER_MAJOR__=11' '-D' '__CUDA_API_VER_MINOR__=5' '-D' '__NVCC_DIAG_PRAGMA_SUPPORT__=1' '-include' 'cuda_runtime.h' '-m64' '-o' '/tmp/tmpxft_000229d3_00000000-7_CMakeCUDACompilerABI.cpp1.ii' '-mtune=generic' '-march=x86-64' '-dumpdir' '/tmp/'
+ /usr/lib/gcc/x86_64-linux-gnu/11/cc1plus -E -quiet -v -imultiarch x86_64-linux-gnu -D_GNU_SOURCE -D __CUDA_ARCH__=520 -D __CUDA_ARCH_LIST__=520 -D CUDA_DOUBLE_MATH_FUNCTIONS -D __CUDACC__ -D __NVCC__ -D __CUDACC_VER_MAJOR__=11 -D __CUDACC_VER_MINOR__=5 -D __CUDACC_VER_BUILD__=119 -D __CUDA_API_VER_MAJOR__=11 -D __CUDA_API_VER_MINOR__=5 -D __NVCC_DIAG_PRAGMA_SUPPORT__=1 -include cuda_runtime.h /usr/share/cmake-3.22/Modules/CMakeCUDACompilerABI.cu -o /tmp/tmpxft_000229d3_00000000-7_CMakeCUDACompilerABI.cpp1.ii -m64 -mtune=generic -march=x86-64 -fasynchronous-unwind-tables -fstack-protector-strong -Wformat -Wformat-security -fstack-clash-protection -fcf-protection -dumpdir /tmp/ -dumpbase tmpxft_000229d3_00000000-7_CMakeCUDACompilerABI.cpp1.cu -dumpbase-ext .cu
 ignoring duplicate directory "/usr/include/x86_64-linux-gnu/c++/11"
 ignoring nonexistent directory "/usr/local/include/x86_64-linux-gnu"
 ignoring nonexistent directory "/usr/lib/gcc/x86_64-linux-gnu/11/include-fixed"
@ -399,7 +399,7 @@ ignoring nonexistent directory "/usr/lib/gcc/x86_64-linux-gnu/11/../../../../x86
 End of search list.
 COMPILER_PATH=/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/
 LIBRARY_PATH=/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../../lib/:/lib/x86_64-linux-gnu/:/lib/../lib/:/usr/lib/x86_64-linux-gnu/:/usr/lib/../lib/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../:/lib/:/usr/lib/
-COLLECT_GCC_OPTIONS='-D' '__CUDA_ARCH__=520' '-D' '__CUDA_ARCH_LIST__=520' '-E' '-D' 'CUDA_DOUBLE_MATH_FUNCTIONS' '-D' '__CUDACC__' '-D' '__NVCC__' '-v' '-D' '__CUDACC_VER_MAJOR__=11' '-D' '__CUDACC_VER_MINOR__=5' '-D' '__CUDACC_VER_BUILD__=119' '-D' '__CUDA_API_VER_MAJOR__=11' '-D' '__CUDA_API_VER_MINOR__=5' '-D' '__NVCC_DIAG_PRAGMA_SUPPORT__=1' '-include' 'cuda_runtime.h' '-m64' '-o' '/tmp/tmpxft_0000dbca_00000000-7_CMakeCUDACompilerABI.cpp1.ii' '-mtune=generic' '-march=x86-64' '-dumpdir' '/tmp/tmpxft_0000dbca_00000000-7_CMakeCUDACompilerABI.cpp1.'
+COLLECT_GCC_OPTIONS='-D' '__CUDA_ARCH__=520' '-D' '__CUDA_ARCH_LIST__=520' '-E' '-D' 'CUDA_DOUBLE_MATH_FUNCTIONS' '-D' '__CUDACC__' '-D' '__NVCC__' '-v' '-D' '__CUDACC_VER_MAJOR__=11' '-D' '__CUDACC_VER_MINOR__=5' '-D' '__CUDACC_VER_BUILD__=119' '-D' '__CUDA_API_VER_MAJOR__=11' '-D' '__CUDA_API_VER_MINOR__=5' '-D' '__NVCC_DIAG_PRAGMA_SUPPORT__=1' '-include' 'cuda_runtime.h' '-m64' '-o' '/tmp/tmpxft_000229d3_00000000-7_CMakeCUDACompilerABI.cpp1.ii' '-mtune=generic' '-march=x86-64' '-dumpdir' '/tmp/tmpxft_000229d3_00000000-7_CMakeCUDACompilerABI.cpp1.'
 Using built-in specs.
 COLLECT_GCC=gcc-11
 OFFLOAD_TARGET_NAMES=nvptx-none:amdgcn-amdhsa
@ -409,8 +409,8 @@ Configured with: ../src/configure -v --with-pkgversion='Ubuntu 11.4.0-1ubuntu1~2
 Thread model: posix
 Supported LTO compression algorithms: zlib zstd
 gcc version 11.4.0 (Ubuntu 11.4.0-1ubuntu1~22.04) 
-COLLECT_GCC_OPTIONS='-D' '__CUDA_ARCH_LIST__=520' '-E' '-D' '__CUDACC__' '-D' '__NVCC__' '-v' '-D' '__CUDACC_VER_MAJOR__=11' '-D' '__CUDACC_VER_MINOR__=5' '-D' '__CUDACC_VER_BUILD__=119' '-D' '__CUDA_API_VER_MAJOR__=11' '-D' '__CUDA_API_VER_MINOR__=5' '-D' '__NVCC_DIAG_PRAGMA_SUPPORT__=1' '-include' 'cuda_runtime.h' '-m64' '-o' '/tmp/tmpxft_0000dbca_00000000-5_CMakeCUDACompilerABI.cpp4.ii' '-mtune=generic' '-march=x86-64' '-dumpdir' '/tmp/'
- /usr/lib/gcc/x86_64-linux-gnu/11/cc1plus -E -quiet -v -imultiarch x86_64-linux-gnu -D_GNU_SOURCE -D __CUDA_ARCH_LIST__=520 -D __CUDACC__ -D __NVCC__ -D __CUDACC_VER_MAJOR__=11 -D __CUDACC_VER_MINOR__=5 -D __CUDACC_VER_BUILD__=119 -D __CUDA_API_VER_MAJOR__=11 -D __CUDA_API_VER_MINOR__=5 -D __NVCC_DIAG_PRAGMA_SUPPORT__=1 -include cuda_runtime.h /usr/share/cmake-3.22/Modules/CMakeCUDACompilerABI.cu -o /tmp/tmpxft_0000dbca_00000000-5_CMakeCUDACompilerABI.cpp4.ii -m64 -mtune=generic -march=x86-64 -fasynchronous-unwind-tables -fstack-protector-strong -Wformat -Wformat-security -fstack-clash-protection -fcf-protection -dumpdir /tmp/ -dumpbase tmpxft_0000dbca_00000000-5_CMakeCUDACompilerABI.cpp4.cu -dumpbase-ext .cu
+COLLECT_GCC_OPTIONS='-D' '__CUDA_ARCH_LIST__=520' '-E' '-D' '__CUDACC__' '-D' '__NVCC__' '-v' '-D' '__CUDACC_VER_MAJOR__=11' '-D' '__CUDACC_VER_MINOR__=5' '-D' '__CUDACC_VER_BUILD__=119' '-D' '__CUDA_API_VER_MAJOR__=11' '-D' '__CUDA_API_VER_MINOR__=5' '-D' '__NVCC_DIAG_PRAGMA_SUPPORT__=1' '-include' 'cuda_runtime.h' '-m64' '-o' '/tmp/tmpxft_000229d3_00000000-5_CMakeCUDACompilerABI.cpp4.ii' '-mtune=generic' '-march=x86-64' '-dumpdir' '/tmp/'
+ /usr/lib/gcc/x86_64-linux-gnu/11/cc1plus -E -quiet -v -imultiarch x86_64-linux-gnu -D_GNU_SOURCE -D __CUDA_ARCH_LIST__=520 -D __CUDACC__ -D __NVCC__ -D __CUDACC_VER_MAJOR__=11 -D __CUDACC_VER_MINOR__=5 -D __CUDACC_VER_BUILD__=119 -D __CUDA_API_VER_MAJOR__=11 -D __CUDA_API_VER_MINOR__=5 -D __NVCC_DIAG_PRAGMA_SUPPORT__=1 -include cuda_runtime.h /usr/share/cmake-3.22/Modules/CMakeCUDACompilerABI.cu -o /tmp/tmpxft_000229d3_00000000-5_CMakeCUDACompilerABI.cpp4.ii -m64 -mtune=generic -march=x86-64 -fasynchronous-unwind-tables -fstack-protector-strong -Wformat -Wformat-security -fstack-clash-protection -fcf-protection -dumpdir /tmp/ -dumpbase tmpxft_000229d3_00000000-5_CMakeCUDACompilerABI.cpp4.cu -dumpbase-ext .cu
 ignoring duplicate directory "/usr/include/x86_64-linux-gnu/c++/11"
 ignoring nonexistent directory "/usr/local/include/x86_64-linux-gnu"
 ignoring nonexistent directory "/usr/lib/gcc/x86_64-linux-gnu/11/include-fixed"
@ -427,7 +427,7 @@ ignoring nonexistent directory "/usr/lib/gcc/x86_64-linux-gnu/11/../../../../x86
 End of search list.
 COMPILER_PATH=/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/
 LIBRARY_PATH=/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../../lib/:/lib/x86_64-linux-gnu/:/lib/../lib/:/usr/lib/x86_64-linux-gnu/:/usr/lib/../lib/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../:/lib/:/usr/lib/
-COLLECT_GCC_OPTIONS='-D' '__CUDA_ARCH_LIST__=520' '-E' '-D' '__CUDACC__' '-D' '__NVCC__' '-v' '-D' '__CUDACC_VER_MAJOR__=11' '-D' '__CUDACC_VER_MINOR__=5' '-D' '__CUDACC_VER_BUILD__=119' '-D' '__CUDA_API_VER_MAJOR__=11' '-D' '__CUDA_API_VER_MINOR__=5' '-D' '__NVCC_DIAG_PRAGMA_SUPPORT__=1' '-include' 'cuda_runtime.h' '-m64' '-o' '/tmp/tmpxft_0000dbca_00000000-5_CMakeCUDACompilerABI.cpp4.ii' '-mtune=generic' '-march=x86-64' '-dumpdir' '/tmp/tmpxft_0000dbca_00000000-5_CMakeCUDACompilerABI.cpp4.'
+COLLECT_GCC_OPTIONS='-D' '__CUDA_ARCH_LIST__=520' '-E' '-D' '__CUDACC__' '-D' '__NVCC__' '-v' '-D' '__CUDACC_VER_MAJOR__=11' '-D' '__CUDACC_VER_MINOR__=5' '-D' '__CUDACC_VER_BUILD__=119' '-D' '__CUDA_API_VER_MAJOR__=11' '-D' '__CUDA_API_VER_MINOR__=5' '-D' '__NVCC_DIAG_PRAGMA_SUPPORT__=1' '-include' 'cuda_runtime.h' '-m64' '-o' '/tmp/tmpxft_000229d3_00000000-5_CMakeCUDACompilerABI.cpp4.ii' '-mtune=generic' '-march=x86-64' '-dumpdir' '/tmp/tmpxft_000229d3_00000000-5_CMakeCUDACompilerABI.cpp4.'
 Using built-in specs.
 COLLECT_GCC=gcc-11
 OFFLOAD_TARGET_NAMES=nvptx-none:amdgcn-amdhsa
@ -437,8 +437,8 @@ Configured with: ../src/configure -v --with-pkgversion='Ubuntu 11.4.0-1ubuntu1~2
 Thread model: posix
 Supported LTO compression algorithms: zlib zstd
 gcc version 11.4.0 (Ubuntu 11.4.0-1ubuntu1~22.04) 
-COLLECT_GCC_OPTIONS='-D' '__CUDA_ARCH__=520' '-D' '__CUDA_ARCH_LIST__=520' '-c' '-D' 'CUDA_DOUBLE_MATH_FUNCTIONS' '-v' '-m64' '-o' 'CMakeFiles/cmTC_c6bab.dir/CMakeCUDACompilerABI.cu.o' '-mtune=generic' '-march=x86-64' '-dumpdir' 'CMakeFiles/cmTC_c6bab.dir/'
- /usr/lib/gcc/x86_64-linux-gnu/11/cc1plus -quiet -v -imultiarch x86_64-linux-gnu -D_GNU_SOURCE -D __CUDA_ARCH__=520 -D __CUDA_ARCH_LIST__=520 -D CUDA_DOUBLE_MATH_FUNCTIONS /tmp/tmpxft_0000dbca_00000000-6_CMakeCUDACompilerABI.cudafe1.cpp -quiet -dumpdir CMakeFiles/cmTC_c6bab.dir/ -dumpbase CMakeCUDACompilerABI.cu.cpp -dumpbase-ext .cpp -m64 -mtune=generic -march=x86-64 -version -fasynchronous-unwind-tables -fstack-protector-strong -Wformat -Wformat-security -fstack-clash-protection -fcf-protection -o /tmp/ccnCGoqW.s
+COLLECT_GCC_OPTIONS='-D' '__CUDA_ARCH__=520' '-D' '__CUDA_ARCH_LIST__=520' '-c' '-D' 'CUDA_DOUBLE_MATH_FUNCTIONS' '-v' '-m64' '-o' 'CMakeFiles/cmTC_4b2ab.dir/CMakeCUDACompilerABI.cu.o' '-mtune=generic' '-march=x86-64' '-dumpdir' 'CMakeFiles/cmTC_4b2ab.dir/'
+ /usr/lib/gcc/x86_64-linux-gnu/11/cc1plus -quiet -v -imultiarch x86_64-linux-gnu -D_GNU_SOURCE -D __CUDA_ARCH__=520 -D __CUDA_ARCH_LIST__=520 -D CUDA_DOUBLE_MATH_FUNCTIONS /tmp/tmpxft_000229d3_00000000-6_CMakeCUDACompilerABI.cudafe1.cpp -quiet -dumpdir CMakeFiles/cmTC_4b2ab.dir/ -dumpbase CMakeCUDACompilerABI.cu.cpp -dumpbase-ext .cpp -m64 -mtune=generic -march=x86-64 -version -fasynchronous-unwind-tables -fstack-protector-strong -Wformat -Wformat-security -fstack-clash-protection -fcf-protection -o /tmp/ccueYgmA.s
 GNU C++17 (Ubuntu 11.4.0-1ubuntu1~22.04) version 11.4.0 (x86_64-linux-gnu)
 	compiled by GNU C version 11.4.0, GMP version 6.2.1, MPFR version 4.1.0, MPC version 1.2.1, isl version isl-0.24-GMP

@ -462,15 +462,15 @@ GNU C++17 (Ubuntu 11.4.0-1ubuntu1~22.04) version 11.4.0 (x86_64-linux-gnu)

 GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072
 Compiler executable checksum: d591828bb4d392ae8b7b160e5bb0b95f
-COLLECT_GCC_OPTIONS='-D' '__CUDA_ARCH__=520' '-D' '__CUDA_ARCH_LIST__=520' '-c' '-D' 'CUDA_DOUBLE_MATH_FUNCTIONS' '-v' '-m64' '-o' 'CMakeFiles/cmTC_c6bab.dir/CMakeCUDACompilerABI.cu.o' '-mtune=generic' '-march=x86-64' '-dumpdir' 'CMakeFiles/cmTC_c6bab.dir/'
- as -v --64 -o CMakeFiles/cmTC_c6bab.dir/CMakeCUDACompilerABI.cu.o /tmp/ccnCGoqW.s
+COLLECT_GCC_OPTIONS='-D' '__CUDA_ARCH__=520' '-D' '__CUDA_ARCH_LIST__=520' '-c' '-D' 'CUDA_DOUBLE_MATH_FUNCTIONS' '-v' '-m64' '-o' 'CMakeFiles/cmTC_4b2ab.dir/CMakeCUDACompilerABI.cu.o' '-mtune=generic' '-march=x86-64' '-dumpdir' 'CMakeFiles/cmTC_4b2ab.dir/'
+ as -v --64 -o CMakeFiles/cmTC_4b2ab.dir/CMakeCUDACompilerABI.cu.o /tmp/ccueYgmA.s
 GNU assembler version 2.38 (x86_64-linux-gnu) using BFD version (GNU Binutils for Ubuntu) 2.38
 COMPILER_PATH=/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/
 LIBRARY_PATH=/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../../lib/:/lib/x86_64-linux-gnu/:/lib/../lib/:/usr/lib/x86_64-linux-gnu/:/usr/lib/../lib/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../:/lib/:/usr/lib/
-COLLECT_GCC_OPTIONS='-D' '__CUDA_ARCH__=520' '-D' '__CUDA_ARCH_LIST__=520' '-c' '-D' 'CUDA_DOUBLE_MATH_FUNCTIONS' '-v' '-m64' '-o' 'CMakeFiles/cmTC_c6bab.dir/CMakeCUDACompilerABI.cu.o' '-mtune=generic' '-march=x86-64' '-dumpdir' 'CMakeFiles/cmTC_c6bab.dir/CMakeCUDACompilerABI.cu.'
-Linking CUDA executable cmTC_c6bab
-/usr/bin/cmake -E cmake_link_script CMakeFiles/cmTC_c6bab.dir/link.txt --verbose=1
-/usr/lib/nvidia-cuda-toolkit/bin/g++  -v CMakeFiles/cmTC_c6bab.dir/CMakeCUDACompilerABI.cu.o -o cmTC_c6bab  -lcudadevrt -lcudart_static -lrt -lpthread -ldl  -L"/usr/lib/x86_64-linux-gnu/stubs" -L"/usr/lib/x86_64-linux-gnu"
+COLLECT_GCC_OPTIONS='-D' '__CUDA_ARCH__=520' '-D' '__CUDA_ARCH_LIST__=520' '-c' '-D' 'CUDA_DOUBLE_MATH_FUNCTIONS' '-v' '-m64' '-o' 'CMakeFiles/cmTC_4b2ab.dir/CMakeCUDACompilerABI.cu.o' '-mtune=generic' '-march=x86-64' '-dumpdir' 'CMakeFiles/cmTC_4b2ab.dir/CMakeCUDACompilerABI.cu.'
+Linking CUDA executable cmTC_4b2ab
+/usr/bin/cmake -E cmake_link_script CMakeFiles/cmTC_4b2ab.dir/link.txt --verbose=1
+/usr/lib/nvidia-cuda-toolkit/bin/g++  -v CMakeFiles/cmTC_4b2ab.dir/CMakeCUDACompilerABI.cu.o -o cmTC_4b2ab  -lcudadevrt -lcudart_static -lrt -lpthread -ldl  -L"/usr/lib/x86_64-linux-gnu/stubs" -L"/usr/lib/x86_64-linux-gnu"
 Using built-in specs.
 COLLECT_GCC=g++-11
 COLLECT_LTO_WRAPPER=/usr/lib/gcc/x86_64-linux-gnu/11/lto-wrapper
@ -483,9 +483,9 @@ Supported LTO compression algorithms: zlib zstd
 gcc version 11.4.0 (Ubuntu 11.4.0-1ubuntu1~22.04) 
 COMPILER_PATH=/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/
 LIBRARY_PATH=/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../../lib/:/lib/x86_64-linux-gnu/:/lib/../lib/:/usr/lib/x86_64-linux-gnu/:/usr/lib/../lib/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../:/lib/:/usr/lib/
-COLLECT_GCC_OPTIONS='-v' '-o' 'cmTC_c6bab' '-L/usr/lib/x86_64-linux-gnu/stubs' '-L/usr/lib/x86_64-linux-gnu' '-shared-libgcc' '-mtune=generic' '-march=x86-64' '-dumpdir' 'cmTC_c6bab.'
- /usr/lib/gcc/x86_64-linux-gnu/11/collect2 -plugin /usr/lib/gcc/x86_64-linux-gnu/11/liblto_plugin.so -plugin-opt=/usr/lib/gcc/x86_64-linux-gnu/11/lto-wrapper -plugin-opt=-fresolution=/tmp/ccfVs73h.res -plugin-opt=-pass-through=-lgcc_s -plugin-opt=-pass-through=-lgcc -plugin-opt=-pass-through=-lc -plugin-opt=-pass-through=-lgcc_s -plugin-opt=-pass-through=-lgcc --build-id --eh-frame-hdr -m elf_x86_64 --hash-style=gnu --as-needed -dynamic-linker /lib64/ld-linux-x86-64.so.2 -pie -z now -z relro -o cmTC_c6bab /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/Scrt1.o /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crti.o /usr/lib/gcc/x86_64-linux-gnu/11/crtbeginS.o -L/usr/lib/x86_64-linux-gnu/stubs -L/usr/lib/x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/11 -L/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/11/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/11/../../.. CMakeFiles/cmTC_c6bab.dir/CMakeCUDACompilerABI.cu.o -lcudadevrt -lcudart_static -lrt -lpthread -ldl -lstdc++ -lm -lgcc_s -lgcc -lc -lgcc_s -lgcc /usr/lib/gcc/x86_64-linux-gnu/11/crtendS.o /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crtn.o
-COLLECT_GCC_OPTIONS='-v' '-o' 'cmTC_c6bab' '-L/usr/lib/x86_64-linux-gnu/stubs' '-L/usr/lib/x86_64-linux-gnu' '-shared-libgcc' '-mtune=generic' '-march=x86-64' '-dumpdir' 'cmTC_c6bab.'
+COLLECT_GCC_OPTIONS='-v' '-o' 'cmTC_4b2ab' '-L/usr/lib/x86_64-linux-gnu/stubs' '-L/usr/lib/x86_64-linux-gnu' '-shared-libgcc' '-mtune=generic' '-march=x86-64' '-dumpdir' 'cmTC_4b2ab.'
+ /usr/lib/gcc/x86_64-linux-gnu/11/collect2 -plugin /usr/lib/gcc/x86_64-linux-gnu/11/liblto_plugin.so -plugin-opt=/usr/lib/gcc/x86_64-linux-gnu/11/lto-wrapper -plugin-opt=-fresolution=/tmp/ccak7FDB.res -plugin-opt=-pass-through=-lgcc_s -plugin-opt=-pass-through=-lgcc -plugin-opt=-pass-through=-lc -plugin-opt=-pass-through=-lgcc_s -plugin-opt=-pass-through=-lgcc --build-id --eh-frame-hdr -m elf_x86_64 --hash-style=gnu --as-needed -dynamic-linker /lib64/ld-linux-x86-64.so.2 -pie -z now -z relro -o cmTC_4b2ab /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/Scrt1.o /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crti.o /usr/lib/gcc/x86_64-linux-gnu/11/crtbeginS.o -L/usr/lib/x86_64-linux-gnu/stubs -L/usr/lib/x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/11 -L/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/11/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/11/../../.. CMakeFiles/cmTC_4b2ab.dir/CMakeCUDACompilerABI.cu.o -lcudadevrt -lcudart_static -lrt -lpthread -ldl -lstdc++ -lm -lgcc_s -lgcc -lc -lgcc_s -lgcc /usr/lib/gcc/x86_64-linux-gnu/11/crtendS.o /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crtn.o
+COLLECT_GCC_OPTIONS='-v' '-o' 'cmTC_4b2ab' '-L/usr/lib/x86_64-linux-gnu/stubs' '-L/usr/lib/x86_64-linux-gnu' '-shared-libgcc' '-mtune=generic' '-march=x86-64' '-dumpdir' 'cmTC_4b2ab.'
 gmake[1]: Leaving directory '/media/mht/ADATA/repos/cpp_tracker/build/CMakeFiles/CMakeTmp'


@ -515,10 +515,10 @@ Parsed CUDA implicit link information from above output:
  link line regex: [^( *|.*[/\])(ld|CMAKE_LINK_STARTFILE-NOTFOUND|([^/\]+-)?ld|collect2)[^/\]*( |$)]
  ignore line: [Change Dir: /media/mht/ADATA/repos/cpp_tracker/build/CMakeFiles/CMakeTmp]
  ignore line: []
-  ignore line: [Run Build Command(s):/usr/bin/gmake -f Makefile cmTC_c6bab/fast && /usr/bin/gmake  -f CMakeFiles/cmTC_c6bab.dir/build.make CMakeFiles/cmTC_c6bab.dir/build]
+  ignore line: [Run Build Command(s):/usr/bin/gmake -f Makefile cmTC_4b2ab/fast && /usr/bin/gmake  -f CMakeFiles/cmTC_4b2ab.dir/build.make CMakeFiles/cmTC_4b2ab.dir/build]
  ignore line: [gmake[1]: Entering directory '/media/mht/ADATA/repos/cpp_tracker/build/CMakeFiles/CMakeTmp']
-  ignore line: [Building CUDA object CMakeFiles/cmTC_c6bab.dir/CMakeCUDACompilerABI.cu.o]
-  ignore line: [/usr/bin/nvcc -forward-unknown-to-host-compiler   --generate-code=arch=compute_52 code=[compute_52 sm_52]   -Xcompiler=-v -MD -MT CMakeFiles/cmTC_c6bab.dir/CMakeCUDACompilerABI.cu.o -MF CMakeFiles/cmTC_c6bab.dir/CMakeCUDACompilerABI.cu.o.d -x cu -c /usr/share/cmake-3.22/Modules/CMakeCUDACompilerABI.cu -o CMakeFiles/cmTC_c6bab.dir/CMakeCUDACompilerABI.cu.o]
+  ignore line: [Building CUDA object CMakeFiles/cmTC_4b2ab.dir/CMakeCUDACompilerABI.cu.o]
+  ignore line: [/usr/bin/nvcc -forward-unknown-to-host-compiler   --generate-code=arch=compute_52 code=[compute_52 sm_52]   -Xcompiler=-v -MD -MT CMakeFiles/cmTC_4b2ab.dir/CMakeCUDACompilerABI.cu.o -MF CMakeFiles/cmTC_4b2ab.dir/CMakeCUDACompilerABI.cu.o.d -x cu -c /usr/share/cmake-3.22/Modules/CMakeCUDACompilerABI.cu -o CMakeFiles/cmTC_4b2ab.dir/CMakeCUDACompilerABI.cu.o]
  ignore line: [Using built-in specs.]
  ignore line: [COLLECT_GCC=gcc-11]
  ignore line: [OFFLOAD_TARGET_NAMES=nvptx-none:amdgcn-amdhsa]
@ -528,8 +528,8 @@ Parsed CUDA implicit link information from above output:
  ignore line: [Thread model: posix]
  ignore line: [Supported LTO compression algorithms: zlib zstd]
  ignore line: [gcc version 11.4.0 (Ubuntu 11.4.0-1ubuntu1~22.04) ]
-  ignore line: [COLLECT_GCC_OPTIONS='-D' '__CUDA_ARCH__=520' '-D' '__CUDA_ARCH_LIST__=520' '-E' '-D' 'CUDA_DOUBLE_MATH_FUNCTIONS' '-D' '__CUDACC__' '-D' '__NVCC__' '-v' '-D' '__CUDACC_VER_MAJOR__=11' '-D' '__CUDACC_VER_MINOR__=5' '-D' '__CUDACC_VER_BUILD__=119' '-D' '__CUDA_API_VER_MAJOR__=11' '-D' '__CUDA_API_VER_MINOR__=5' '-D' '__NVCC_DIAG_PRAGMA_SUPPORT__=1' '-include' 'cuda_runtime.h' '-m64' '-o' '/tmp/tmpxft_0000dbca_00000000-7_CMakeCUDACompilerABI.cpp1.ii' '-mtune=generic' '-march=x86-64' '-dumpdir' '/tmp/']
-  ignore line: [ /usr/lib/gcc/x86_64-linux-gnu/11/cc1plus -E -quiet -v -imultiarch x86_64-linux-gnu -D_GNU_SOURCE -D __CUDA_ARCH__=520 -D __CUDA_ARCH_LIST__=520 -D CUDA_DOUBLE_MATH_FUNCTIONS -D __CUDACC__ -D __NVCC__ -D __CUDACC_VER_MAJOR__=11 -D __CUDACC_VER_MINOR__=5 -D __CUDACC_VER_BUILD__=119 -D __CUDA_API_VER_MAJOR__=11 -D __CUDA_API_VER_MINOR__=5 -D __NVCC_DIAG_PRAGMA_SUPPORT__=1 -include cuda_runtime.h /usr/share/cmake-3.22/Modules/CMakeCUDACompilerABI.cu -o /tmp/tmpxft_0000dbca_00000000-7_CMakeCUDACompilerABI.cpp1.ii -m64 -mtune=generic -march=x86-64 -fasynchronous-unwind-tables -fstack-protector-strong -Wformat -Wformat-security -fstack-clash-protection -fcf-protection -dumpdir /tmp/ -dumpbase tmpxft_0000dbca_00000000-7_CMakeCUDACompilerABI.cpp1.cu -dumpbase-ext .cu]
+  ignore line: [COLLECT_GCC_OPTIONS='-D' '__CUDA_ARCH__=520' '-D' '__CUDA_ARCH_LIST__=520' '-E' '-D' 'CUDA_DOUBLE_MATH_FUNCTIONS' '-D' '__CUDACC__' '-D' '__NVCC__' '-v' '-D' '__CUDACC_VER_MAJOR__=11' '-D' '__CUDACC_VER_MINOR__=5' '-D' '__CUDACC_VER_BUILD__=119' '-D' '__CUDA_API_VER_MAJOR__=11' '-D' '__CUDA_API_VER_MINOR__=5' '-D' '__NVCC_DIAG_PRAGMA_SUPPORT__=1' '-include' 'cuda_runtime.h' '-m64' '-o' '/tmp/tmpxft_000229d3_00000000-7_CMakeCUDACompilerABI.cpp1.ii' '-mtune=generic' '-march=x86-64' '-dumpdir' '/tmp/']
+  ignore line: [ /usr/lib/gcc/x86_64-linux-gnu/11/cc1plus -E -quiet -v -imultiarch x86_64-linux-gnu -D_GNU_SOURCE -D __CUDA_ARCH__=520 -D __CUDA_ARCH_LIST__=520 -D CUDA_DOUBLE_MATH_FUNCTIONS -D __CUDACC__ -D __NVCC__ -D __CUDACC_VER_MAJOR__=11 -D __CUDACC_VER_MINOR__=5 -D __CUDACC_VER_BUILD__=119 -D __CUDA_API_VER_MAJOR__=11 -D __CUDA_API_VER_MINOR__=5 -D __NVCC_DIAG_PRAGMA_SUPPORT__=1 -include cuda_runtime.h /usr/share/cmake-3.22/Modules/CMakeCUDACompilerABI.cu -o /tmp/tmpxft_000229d3_00000000-7_CMakeCUDACompilerABI.cpp1.ii -m64 -mtune=generic -march=x86-64 -fasynchronous-unwind-tables -fstack-protector-strong -Wformat -Wformat-security -fstack-clash-protection -fcf-protection -dumpdir /tmp/ -dumpbase tmpxft_000229d3_00000000-7_CMakeCUDACompilerABI.cpp1.cu -dumpbase-ext .cu]
  ignore line: [ignoring duplicate directory "/usr/include/x86_64-linux-gnu/c++/11"]
  ignore line: [ignoring nonexistent directory "/usr/local/include/x86_64-linux-gnu"]
  ignore line: [ignoring nonexistent directory "/usr/lib/gcc/x86_64-linux-gnu/11/include-fixed"]
@ -546,7 +546,7 @@ Parsed CUDA implicit link information from above output:
  ignore line: [End of search list.]
  ignore line: [COMPILER_PATH=/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/]
  ignore line: [LIBRARY_PATH=/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../../lib/:/lib/x86_64-linux-gnu/:/lib/../lib/:/usr/lib/x86_64-linux-gnu/:/usr/lib/../lib/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../:/lib/:/usr/lib/]
-  ignore line: [COLLECT_GCC_OPTIONS='-D' '__CUDA_ARCH__=520' '-D' '__CUDA_ARCH_LIST__=520' '-E' '-D' 'CUDA_DOUBLE_MATH_FUNCTIONS' '-D' '__CUDACC__' '-D' '__NVCC__' '-v' '-D' '__CUDACC_VER_MAJOR__=11' '-D' '__CUDACC_VER_MINOR__=5' '-D' '__CUDACC_VER_BUILD__=119' '-D' '__CUDA_API_VER_MAJOR__=11' '-D' '__CUDA_API_VER_MINOR__=5' '-D' '__NVCC_DIAG_PRAGMA_SUPPORT__=1' '-include' 'cuda_runtime.h' '-m64' '-o' '/tmp/tmpxft_0000dbca_00000000-7_CMakeCUDACompilerABI.cpp1.ii' '-mtune=generic' '-march=x86-64' '-dumpdir' '/tmp/tmpxft_0000dbca_00000000-7_CMakeCUDACompilerABI.cpp1.']
+  ignore line: [COLLECT_GCC_OPTIONS='-D' '__CUDA_ARCH__=520' '-D' '__CUDA_ARCH_LIST__=520' '-E' '-D' 'CUDA_DOUBLE_MATH_FUNCTIONS' '-D' '__CUDACC__' '-D' '__NVCC__' '-v' '-D' '__CUDACC_VER_MAJOR__=11' '-D' '__CUDACC_VER_MINOR__=5' '-D' '__CUDACC_VER_BUILD__=119' '-D' '__CUDA_API_VER_MAJOR__=11' '-D' '__CUDA_API_VER_MINOR__=5' '-D' '__NVCC_DIAG_PRAGMA_SUPPORT__=1' '-include' 'cuda_runtime.h' '-m64' '-o' '/tmp/tmpxft_000229d3_00000000-7_CMakeCUDACompilerABI.cpp1.ii' '-mtune=generic' '-march=x86-64' '-dumpdir' '/tmp/tmpxft_000229d3_00000000-7_CMakeCUDACompilerABI.cpp1.']
  ignore line: [Using built-in specs.]
  ignore line: [COLLECT_GCC=gcc-11]
  ignore line: [OFFLOAD_TARGET_NAMES=nvptx-none:amdgcn-amdhsa]
@ -556,8 +556,8 @@ Parsed CUDA implicit link information from above output:
  ignore line: [Thread model: posix]
  ignore line: [Supported LTO compression algorithms: zlib zstd]
  ignore line: [gcc version 11.4.0 (Ubuntu 11.4.0-1ubuntu1~22.04) ]
-  ignore line: [COLLECT_GCC_OPTIONS='-D' '__CUDA_ARCH_LIST__=520' '-E' '-D' '__CUDACC__' '-D' '__NVCC__' '-v' '-D' '__CUDACC_VER_MAJOR__=11' '-D' '__CUDACC_VER_MINOR__=5' '-D' '__CUDACC_VER_BUILD__=119' '-D' '__CUDA_API_VER_MAJOR__=11' '-D' '__CUDA_API_VER_MINOR__=5' '-D' '__NVCC_DIAG_PRAGMA_SUPPORT__=1' '-include' 'cuda_runtime.h' '-m64' '-o' '/tmp/tmpxft_0000dbca_00000000-5_CMakeCUDACompilerABI.cpp4.ii' '-mtune=generic' '-march=x86-64' '-dumpdir' '/tmp/']
-  ignore line: [ /usr/lib/gcc/x86_64-linux-gnu/11/cc1plus -E -quiet -v -imultiarch x86_64-linux-gnu -D_GNU_SOURCE -D __CUDA_ARCH_LIST__=520 -D __CUDACC__ -D __NVCC__ -D __CUDACC_VER_MAJOR__=11 -D __CUDACC_VER_MINOR__=5 -D __CUDACC_VER_BUILD__=119 -D __CUDA_API_VER_MAJOR__=11 -D __CUDA_API_VER_MINOR__=5 -D __NVCC_DIAG_PRAGMA_SUPPORT__=1 -include cuda_runtime.h /usr/share/cmake-3.22/Modules/CMakeCUDACompilerABI.cu -o /tmp/tmpxft_0000dbca_00000000-5_CMakeCUDACompilerABI.cpp4.ii -m64 -mtune=generic -march=x86-64 -fasynchronous-unwind-tables -fstack-protector-strong -Wformat -Wformat-security -fstack-clash-protection -fcf-protection -dumpdir /tmp/ -dumpbase tmpxft_0000dbca_00000000-5_CMakeCUDACompilerABI.cpp4.cu -dumpbase-ext .cu]
+  ignore line: [COLLECT_GCC_OPTIONS='-D' '__CUDA_ARCH_LIST__=520' '-E' '-D' '__CUDACC__' '-D' '__NVCC__' '-v' '-D' '__CUDACC_VER_MAJOR__=11' '-D' '__CUDACC_VER_MINOR__=5' '-D' '__CUDACC_VER_BUILD__=119' '-D' '__CUDA_API_VER_MAJOR__=11' '-D' '__CUDA_API_VER_MINOR__=5' '-D' '__NVCC_DIAG_PRAGMA_SUPPORT__=1' '-include' 'cuda_runtime.h' '-m64' '-o' '/tmp/tmpxft_000229d3_00000000-5_CMakeCUDACompilerABI.cpp4.ii' '-mtune=generic' '-march=x86-64' '-dumpdir' '/tmp/']
+  ignore line: [ /usr/lib/gcc/x86_64-linux-gnu/11/cc1plus -E -quiet -v -imultiarch x86_64-linux-gnu -D_GNU_SOURCE -D __CUDA_ARCH_LIST__=520 -D __CUDACC__ -D __NVCC__ -D __CUDACC_VER_MAJOR__=11 -D __CUDACC_VER_MINOR__=5 -D __CUDACC_VER_BUILD__=119 -D __CUDA_API_VER_MAJOR__=11 -D __CUDA_API_VER_MINOR__=5 -D __NVCC_DIAG_PRAGMA_SUPPORT__=1 -include cuda_runtime.h /usr/share/cmake-3.22/Modules/CMakeCUDACompilerABI.cu -o /tmp/tmpxft_000229d3_00000000-5_CMakeCUDACompilerABI.cpp4.ii -m64 -mtune=generic -march=x86-64 -fasynchronous-unwind-tables -fstack-protector-strong -Wformat -Wformat-security -fstack-clash-protection -fcf-protection -dumpdir /tmp/ -dumpbase tmpxft_000229d3_00000000-5_CMakeCUDACompilerABI.cpp4.cu -dumpbase-ext .cu]
  ignore line: [ignoring duplicate directory "/usr/include/x86_64-linux-gnu/c++/11"]
  ignore line: [ignoring nonexistent directory "/usr/local/include/x86_64-linux-gnu"]
  ignore line: [ignoring nonexistent directory "/usr/lib/gcc/x86_64-linux-gnu/11/include-fixed"]
@ -574,7 +574,7 @@ Parsed CUDA implicit link information from above output:
  ignore line: [End of search list.]
  ignore line: [COMPILER_PATH=/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/]
  ignore line: [LIBRARY_PATH=/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../../lib/:/lib/x86_64-linux-gnu/:/lib/../lib/:/usr/lib/x86_64-linux-gnu/:/usr/lib/../lib/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../:/lib/:/usr/lib/]
-  ignore line: [COLLECT_GCC_OPTIONS='-D' '__CUDA_ARCH_LIST__=520' '-E' '-D' '__CUDACC__' '-D' '__NVCC__' '-v' '-D' '__CUDACC_VER_MAJOR__=11' '-D' '__CUDACC_VER_MINOR__=5' '-D' '__CUDACC_VER_BUILD__=119' '-D' '__CUDA_API_VER_MAJOR__=11' '-D' '__CUDA_API_VER_MINOR__=5' '-D' '__NVCC_DIAG_PRAGMA_SUPPORT__=1' '-include' 'cuda_runtime.h' '-m64' '-o' '/tmp/tmpxft_0000dbca_00000000-5_CMakeCUDACompilerABI.cpp4.ii' '-mtune=generic' '-march=x86-64' '-dumpdir' '/tmp/tmpxft_0000dbca_00000000-5_CMakeCUDACompilerABI.cpp4.']
+  ignore line: [COLLECT_GCC_OPTIONS='-D' '__CUDA_ARCH_LIST__=520' '-E' '-D' '__CUDACC__' '-D' '__NVCC__' '-v' '-D' '__CUDACC_VER_MAJOR__=11' '-D' '__CUDACC_VER_MINOR__=5' '-D' '__CUDACC_VER_BUILD__=119' '-D' '__CUDA_API_VER_MAJOR__=11' '-D' '__CUDA_API_VER_MINOR__=5' '-D' '__NVCC_DIAG_PRAGMA_SUPPORT__=1' '-include' 'cuda_runtime.h' '-m64' '-o' '/tmp/tmpxft_000229d3_00000000-5_CMakeCUDACompilerABI.cpp4.ii' '-mtune=generic' '-march=x86-64' '-dumpdir' '/tmp/tmpxft_000229d3_00000000-5_CMakeCUDACompilerABI.cpp4.']
  ignore line: [Using built-in specs.]
  ignore line: [COLLECT_GCC=gcc-11]
  ignore line: [OFFLOAD_TARGET_NAMES=nvptx-none:amdgcn-amdhsa]
@ -584,8 +584,8 @@ Parsed CUDA implicit link information from above output:
  ignore line: [Thread model: posix]
  ignore line: [Supported LTO compression algorithms: zlib zstd]
  ignore line: [gcc version 11.4.0 (Ubuntu 11.4.0-1ubuntu1~22.04) ]
-  ignore line: [COLLECT_GCC_OPTIONS='-D' '__CUDA_ARCH__=520' '-D' '__CUDA_ARCH_LIST__=520' '-c' '-D' 'CUDA_DOUBLE_MATH_FUNCTIONS' '-v' '-m64' '-o' 'CMakeFiles/cmTC_c6bab.dir/CMakeCUDACompilerABI.cu.o' '-mtune=generic' '-march=x86-64' '-dumpdir' 'CMakeFiles/cmTC_c6bab.dir/']
-  ignore line: [ /usr/lib/gcc/x86_64-linux-gnu/11/cc1plus -quiet -v -imultiarch x86_64-linux-gnu -D_GNU_SOURCE -D __CUDA_ARCH__=520 -D __CUDA_ARCH_LIST__=520 -D CUDA_DOUBLE_MATH_FUNCTIONS /tmp/tmpxft_0000dbca_00000000-6_CMakeCUDACompilerABI.cudafe1.cpp -quiet -dumpdir CMakeFiles/cmTC_c6bab.dir/ -dumpbase CMakeCUDACompilerABI.cu.cpp -dumpbase-ext .cpp -m64 -mtune=generic -march=x86-64 -version -fasynchronous-unwind-tables -fstack-protector-strong -Wformat -Wformat-security -fstack-clash-protection -fcf-protection -o /tmp/ccnCGoqW.s]
+  ignore line: [COLLECT_GCC_OPTIONS='-D' '__CUDA_ARCH__=520' '-D' '__CUDA_ARCH_LIST__=520' '-c' '-D' 'CUDA_DOUBLE_MATH_FUNCTIONS' '-v' '-m64' '-o' 'CMakeFiles/cmTC_4b2ab.dir/CMakeCUDACompilerABI.cu.o' '-mtune=generic' '-march=x86-64' '-dumpdir' 'CMakeFiles/cmTC_4b2ab.dir/']
+  ignore line: [ /usr/lib/gcc/x86_64-linux-gnu/11/cc1plus -quiet -v -imultiarch x86_64-linux-gnu -D_GNU_SOURCE -D __CUDA_ARCH__=520 -D __CUDA_ARCH_LIST__=520 -D CUDA_DOUBLE_MATH_FUNCTIONS /tmp/tmpxft_000229d3_00000000-6_CMakeCUDACompilerABI.cudafe1.cpp -quiet -dumpdir CMakeFiles/cmTC_4b2ab.dir/ -dumpbase CMakeCUDACompilerABI.cu.cpp -dumpbase-ext .cpp -m64 -mtune=generic -march=x86-64 -version -fasynchronous-unwind-tables -fstack-protector-strong -Wformat -Wformat-security -fstack-clash-protection -fcf-protection -o /tmp/ccueYgmA.s]
  ignore line: [GNU C++17 (Ubuntu 11.4.0-1ubuntu1~22.04) version 11.4.0 (x86_64-linux-gnu)]
  ignore line: [	compiled by GNU C version 11.4.0  GMP version 6.2.1  MPFR version 4.1.0  MPC version 1.2.1  isl version isl-0.24-GMP]
  ignore line: []
@ -609,15 +609,15 @@ Parsed CUDA implicit link information from above output:
  ignore line: []
  ignore line: [GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072]
  ignore line: [Compiler executable checksum: d591828bb4d392ae8b7b160e5bb0b95f]
-  ignore line: [COLLECT_GCC_OPTIONS='-D' '__CUDA_ARCH__=520' '-D' '__CUDA_ARCH_LIST__=520' '-c' '-D' 'CUDA_DOUBLE_MATH_FUNCTIONS' '-v' '-m64' '-o' 'CMakeFiles/cmTC_c6bab.dir/CMakeCUDACompilerABI.cu.o' '-mtune=generic' '-march=x86-64' '-dumpdir' 'CMakeFiles/cmTC_c6bab.dir/']
-  ignore line: [ as -v --64 -o CMakeFiles/cmTC_c6bab.dir/CMakeCUDACompilerABI.cu.o /tmp/ccnCGoqW.s]
+  ignore line: [COLLECT_GCC_OPTIONS='-D' '__CUDA_ARCH__=520' '-D' '__CUDA_ARCH_LIST__=520' '-c' '-D' 'CUDA_DOUBLE_MATH_FUNCTIONS' '-v' '-m64' '-o' 'CMakeFiles/cmTC_4b2ab.dir/CMakeCUDACompilerABI.cu.o' '-mtune=generic' '-march=x86-64' '-dumpdir' 'CMakeFiles/cmTC_4b2ab.dir/']
+  ignore line: [ as -v --64 -o CMakeFiles/cmTC_4b2ab.dir/CMakeCUDACompilerABI.cu.o /tmp/ccueYgmA.s]
  ignore line: [GNU assembler version 2.38 (x86_64-linux-gnu) using BFD version (GNU Binutils for Ubuntu) 2.38]
  ignore line: [COMPILER_PATH=/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/]
  ignore line: [LIBRARY_PATH=/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../../lib/:/lib/x86_64-linux-gnu/:/lib/../lib/:/usr/lib/x86_64-linux-gnu/:/usr/lib/../lib/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../:/lib/:/usr/lib/]
-  ignore line: [COLLECT_GCC_OPTIONS='-D' '__CUDA_ARCH__=520' '-D' '__CUDA_ARCH_LIST__=520' '-c' '-D' 'CUDA_DOUBLE_MATH_FUNCTIONS' '-v' '-m64' '-o' 'CMakeFiles/cmTC_c6bab.dir/CMakeCUDACompilerABI.cu.o' '-mtune=generic' '-march=x86-64' '-dumpdir' 'CMakeFiles/cmTC_c6bab.dir/CMakeCUDACompilerABI.cu.']
-  ignore line: [Linking CUDA executable cmTC_c6bab]
-  ignore line: [/usr/bin/cmake -E cmake_link_script CMakeFiles/cmTC_c6bab.dir/link.txt --verbose=1]
-  ignore line: [/usr/lib/nvidia-cuda-toolkit/bin/g++  -v CMakeFiles/cmTC_c6bab.dir/CMakeCUDACompilerABI.cu.o -o cmTC_c6bab  -lcudadevrt -lcudart_static -lrt -lpthread -ldl  -L"/usr/lib/x86_64-linux-gnu/stubs" -L"/usr/lib/x86_64-linux-gnu"]
+  ignore line: [COLLECT_GCC_OPTIONS='-D' '__CUDA_ARCH__=520' '-D' '__CUDA_ARCH_LIST__=520' '-c' '-D' 'CUDA_DOUBLE_MATH_FUNCTIONS' '-v' '-m64' '-o' 'CMakeFiles/cmTC_4b2ab.dir/CMakeCUDACompilerABI.cu.o' '-mtune=generic' '-march=x86-64' '-dumpdir' 'CMakeFiles/cmTC_4b2ab.dir/CMakeCUDACompilerABI.cu.']
+  ignore line: [Linking CUDA executable cmTC_4b2ab]
+  ignore line: [/usr/bin/cmake -E cmake_link_script CMakeFiles/cmTC_4b2ab.dir/link.txt --verbose=1]
+  ignore line: [/usr/lib/nvidia-cuda-toolkit/bin/g++  -v CMakeFiles/cmTC_4b2ab.dir/CMakeCUDACompilerABI.cu.o -o cmTC_4b2ab  -lcudadevrt -lcudart_static -lrt -lpthread -ldl  -L"/usr/lib/x86_64-linux-gnu/stubs" -L"/usr/lib/x86_64-linux-gnu"]
  ignore line: [Using built-in specs.]
  ignore line: [COLLECT_GCC=g++-11]
  ignore line: [COLLECT_LTO_WRAPPER=/usr/lib/gcc/x86_64-linux-gnu/11/lto-wrapper]
@ -630,13 +630,13 @@ Parsed CUDA implicit link information from above output:
  ignore line: [gcc version 11.4.0 (Ubuntu 11.4.0-1ubuntu1~22.04) ]
  ignore line: [COMPILER_PATH=/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/]
  ignore line: [LIBRARY_PATH=/usr/lib/gcc/x86_64-linux-gnu/11/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../../lib/:/lib/x86_64-linux-gnu/:/lib/../lib/:/usr/lib/x86_64-linux-gnu/:/usr/lib/../lib/:/usr/lib/gcc/x86_64-linux-gnu/11/../../../:/lib/:/usr/lib/]
-  ignore line: [COLLECT_GCC_OPTIONS='-v' '-o' 'cmTC_c6bab' '-L/usr/lib/x86_64-linux-gnu/stubs' '-L/usr/lib/x86_64-linux-gnu' '-shared-libgcc' '-mtune=generic' '-march=x86-64' '-dumpdir' 'cmTC_c6bab.']
-  link line: [ /usr/lib/gcc/x86_64-linux-gnu/11/collect2 -plugin /usr/lib/gcc/x86_64-linux-gnu/11/liblto_plugin.so -plugin-opt=/usr/lib/gcc/x86_64-linux-gnu/11/lto-wrapper -plugin-opt=-fresolution=/tmp/ccfVs73h.res -plugin-opt=-pass-through=-lgcc_s -plugin-opt=-pass-through=-lgcc -plugin-opt=-pass-through=-lc -plugin-opt=-pass-through=-lgcc_s -plugin-opt=-pass-through=-lgcc --build-id --eh-frame-hdr -m elf_x86_64 --hash-style=gnu --as-needed -dynamic-linker /lib64/ld-linux-x86-64.so.2 -pie -z now -z relro -o cmTC_c6bab /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/Scrt1.o /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crti.o /usr/lib/gcc/x86_64-linux-gnu/11/crtbeginS.o -L/usr/lib/x86_64-linux-gnu/stubs -L/usr/lib/x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/11 -L/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/11/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/11/../../.. CMakeFiles/cmTC_c6bab.dir/CMakeCUDACompilerABI.cu.o -lcudadevrt -lcudart_static -lrt -lpthread -ldl -lstdc++ -lm -lgcc_s -lgcc -lc -lgcc_s -lgcc /usr/lib/gcc/x86_64-linux-gnu/11/crtendS.o /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crtn.o]
+  ignore line: [COLLECT_GCC_OPTIONS='-v' '-o' 'cmTC_4b2ab' '-L/usr/lib/x86_64-linux-gnu/stubs' '-L/usr/lib/x86_64-linux-gnu' '-shared-libgcc' '-mtune=generic' '-march=x86-64' '-dumpdir' 'cmTC_4b2ab.']
+  link line: [ /usr/lib/gcc/x86_64-linux-gnu/11/collect2 -plugin /usr/lib/gcc/x86_64-linux-gnu/11/liblto_plugin.so -plugin-opt=/usr/lib/gcc/x86_64-linux-gnu/11/lto-wrapper -plugin-opt=-fresolution=/tmp/ccak7FDB.res -plugin-opt=-pass-through=-lgcc_s -plugin-opt=-pass-through=-lgcc -plugin-opt=-pass-through=-lc -plugin-opt=-pass-through=-lgcc_s -plugin-opt=-pass-through=-lgcc --build-id --eh-frame-hdr -m elf_x86_64 --hash-style=gnu --as-needed -dynamic-linker /lib64/ld-linux-x86-64.so.2 -pie -z now -z relro -o cmTC_4b2ab /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/Scrt1.o /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crti.o /usr/lib/gcc/x86_64-linux-gnu/11/crtbeginS.o -L/usr/lib/x86_64-linux-gnu/stubs -L/usr/lib/x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/11 -L/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/11/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/11/../../.. CMakeFiles/cmTC_4b2ab.dir/CMakeCUDACompilerABI.cu.o -lcudadevrt -lcudart_static -lrt -lpthread -ldl -lstdc++ -lm -lgcc_s -lgcc -lc -lgcc_s -lgcc /usr/lib/gcc/x86_64-linux-gnu/11/crtendS.o /usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crtn.o]
    arg [/usr/lib/gcc/x86_64-linux-gnu/11/collect2] ==> ignore
    arg [-plugin] ==> ignore
    arg [/usr/lib/gcc/x86_64-linux-gnu/11/liblto_plugin.so] ==> ignore
    arg [-plugin-opt=/usr/lib/gcc/x86_64-linux-gnu/11/lto-wrapper] ==> ignore
-    arg [-plugin-opt=-fresolution=/tmp/ccfVs73h.res] ==> ignore
+    arg [-plugin-opt=-fresolution=/tmp/ccak7FDB.res] ==> ignore
    arg [-plugin-opt=-pass-through=-lgcc_s] ==> ignore
    arg [-plugin-opt=-pass-through=-lgcc] ==> ignore
    arg [-plugin-opt=-pass-through=-lc] ==> ignore
@ -654,7 +654,7 @@ Parsed CUDA implicit link information from above output:
    arg [-znow] ==> ignore
    arg [-zrelro] ==> ignore
    arg [-o] ==> ignore
-    arg [cmTC_c6bab] ==> ignore
+    arg [cmTC_4b2ab] ==> ignore
    arg [/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/Scrt1.o] ==> obj [/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/Scrt1.o]
    arg [/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crti.o] ==> obj [/usr/lib/gcc/x86_64-linux-gnu/11/../../../x86_64-linux-gnu/crti.o]
    arg [/usr/lib/gcc/x86_64-linux-gnu/11/crtbeginS.o] ==> obj [/usr/lib/gcc/x86_64-linux-gnu/11/crtbeginS.o]
@ -668,7 +668,7 @@ Parsed CUDA implicit link information from above output:
    arg [-L/usr/lib/x86_64-linux-gnu] ==> dir [/usr/lib/x86_64-linux-gnu]
    arg [-L/usr/lib/../lib] ==> dir [/usr/lib/../lib]
    arg [-L/usr/lib/gcc/x86_64-linux-gnu/11/../../..] ==> dir [/usr/lib/gcc/x86_64-linux-gnu/11/../../..]
-    arg [CMakeFiles/cmTC_c6bab.dir/CMakeCUDACompilerABI.cu.o] ==> ignore
+    arg [CMakeFiles/cmTC_4b2ab.dir/CMakeCUDACompilerABI.cu.o] ==> ignore
    arg [-lcudadevrt] ==> lib [cudadevrt]
    arg [-lcudart_static] ==> lib [cudart_static]
    arg [-lrt] ==> lib [rt]
--- a/build/CMakeFiles/Makefile.cmake
+++ b/build/CMakeFiles/Makefile.cmake
@ -65,4 +65,6 @@ set(CMAKE_DEPEND_INFO_FILES
  "CMakeFiles/bb_regressor.dir/DependInfo.cmake"
  "CMakeFiles/classifier.dir/DependInfo.cmake"
  "CMakeFiles/tracking_demo.dir/DependInfo.cmake"
+  "CMakeFiles/test_models.dir/DependInfo.cmake"
+  "CMakeFiles/generate_test_samples.dir/DependInfo.cmake"
  )
--- a/build/CMakeFiles/Makefile2
+++ b/build/CMakeFiles/Makefile2
@ -66,6 +66,8 @@ CMAKE_BINARY_DIR = /media/mht/ADATA/repos/cpp_tracker/build
 all: CMakeFiles/bb_regressor.dir/all
 all: CMakeFiles/classifier.dir/all
 all: CMakeFiles/tracking_demo.dir/all
+all: CMakeFiles/test_models.dir/all
+all: CMakeFiles/generate_test_samples.dir/all
 .PHONY : all

 # The main recursive "preinstall" target.
@ -76,6 +78,8 @@ preinstall:
 clean: CMakeFiles/bb_regressor.dir/clean
 clean: CMakeFiles/classifier.dir/clean
 clean: CMakeFiles/tracking_demo.dir/clean
+clean: CMakeFiles/test_models.dir/clean
+clean: CMakeFiles/generate_test_samples.dir/clean
 .PHONY : clean

 #=============================================================================
@ -85,12 +89,12 @@ clean: CMakeFiles/tracking_demo.dir/clean
 CMakeFiles/bb_regressor.dir/all:
 	$(MAKE) $(MAKESILENT) -f CMakeFiles/bb_regressor.dir/build.make CMakeFiles/bb_regressor.dir/depend
 	$(MAKE) $(MAKESILENT) -f CMakeFiles/bb_regressor.dir/build.make CMakeFiles/bb_regressor.dir/build
-	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --progress-dir=/media/mht/ADATA/repos/cpp_tracker/build/CMakeFiles --progress-num=1,2 "Built target bb_regressor"
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --progress-dir=/media/mht/ADATA/repos/cpp_tracker/build/CMakeFiles --progress-num=1,2,3 "Built target bb_regressor"
 .PHONY : CMakeFiles/bb_regressor.dir/all

 # Build rule for subdir invocation for target.
 CMakeFiles/bb_regressor.dir/rule: cmake_check_build_system
-	$(CMAKE_COMMAND) -E cmake_progress_start /media/mht/ADATA/repos/cpp_tracker/build/CMakeFiles 2
+	$(CMAKE_COMMAND) -E cmake_progress_start /media/mht/ADATA/repos/cpp_tracker/build/CMakeFiles 3
 	$(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 CMakeFiles/bb_regressor.dir/all
 	$(CMAKE_COMMAND) -E cmake_progress_start /media/mht/ADATA/repos/cpp_tracker/build/CMakeFiles 0
 .PHONY : CMakeFiles/bb_regressor.dir/rule
@ -111,7 +115,7 @@ CMakeFiles/bb_regressor.dir/clean:
 CMakeFiles/classifier.dir/all:
 	$(MAKE) $(MAKESILENT) -f CMakeFiles/classifier.dir/build.make CMakeFiles/classifier.dir/depend
 	$(MAKE) $(MAKESILENT) -f CMakeFiles/classifier.dir/build.make CMakeFiles/classifier.dir/build
-	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --progress-dir=/media/mht/ADATA/repos/cpp_tracker/build/CMakeFiles --progress-num=3,4 "Built target classifier"
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --progress-dir=/media/mht/ADATA/repos/cpp_tracker/build/CMakeFiles --progress-num=4,5 "Built target classifier"
 .PHONY : CMakeFiles/classifier.dir/all

 # Build rule for subdir invocation for target.
@ -138,12 +142,12 @@ CMakeFiles/tracking_demo.dir/all: CMakeFiles/bb_regressor.dir/all
 CMakeFiles/tracking_demo.dir/all: CMakeFiles/classifier.dir/all
 	$(MAKE) $(MAKESILENT) -f CMakeFiles/tracking_demo.dir/build.make CMakeFiles/tracking_demo.dir/depend
 	$(MAKE) $(MAKESILENT) -f CMakeFiles/tracking_demo.dir/build.make CMakeFiles/tracking_demo.dir/build
-	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --progress-dir=/media/mht/ADATA/repos/cpp_tracker/build/CMakeFiles --progress-num=5,6 "Built target tracking_demo"
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --progress-dir=/media/mht/ADATA/repos/cpp_tracker/build/CMakeFiles --progress-num=10,11 "Built target tracking_demo"
 .PHONY : CMakeFiles/tracking_demo.dir/all

 # Build rule for subdir invocation for target.
 CMakeFiles/tracking_demo.dir/rule: cmake_check_build_system
-	$(CMAKE_COMMAND) -E cmake_progress_start /media/mht/ADATA/repos/cpp_tracker/build/CMakeFiles 6
+	$(CMAKE_COMMAND) -E cmake_progress_start /media/mht/ADATA/repos/cpp_tracker/build/CMakeFiles 7
 	$(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 CMakeFiles/tracking_demo.dir/all
 	$(CMAKE_COMMAND) -E cmake_progress_start /media/mht/ADATA/repos/cpp_tracker/build/CMakeFiles 0
 .PHONY : CMakeFiles/tracking_demo.dir/rule
@ -157,6 +161,59 @@ CMakeFiles/tracking_demo.dir/clean:
 	$(MAKE) $(MAKESILENT) -f CMakeFiles/tracking_demo.dir/build.make CMakeFiles/tracking_demo.dir/clean
 .PHONY : CMakeFiles/tracking_demo.dir/clean

+#=============================================================================
+# Target rules for target CMakeFiles/test_models.dir
+
+# All Build rule for target.
+CMakeFiles/test_models.dir/all: CMakeFiles/bb_regressor.dir/all
+CMakeFiles/test_models.dir/all: CMakeFiles/classifier.dir/all
+	$(MAKE) $(MAKESILENT) -f CMakeFiles/test_models.dir/build.make CMakeFiles/test_models.dir/depend
+	$(MAKE) $(MAKESILENT) -f CMakeFiles/test_models.dir/build.make CMakeFiles/test_models.dir/build
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --progress-dir=/media/mht/ADATA/repos/cpp_tracker/build/CMakeFiles --progress-num=8,9 "Built target test_models"
+.PHONY : CMakeFiles/test_models.dir/all
+
+# Build rule for subdir invocation for target.
+CMakeFiles/test_models.dir/rule: cmake_check_build_system
+	$(CMAKE_COMMAND) -E cmake_progress_start /media/mht/ADATA/repos/cpp_tracker/build/CMakeFiles 7
+	$(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 CMakeFiles/test_models.dir/all
+	$(CMAKE_COMMAND) -E cmake_progress_start /media/mht/ADATA/repos/cpp_tracker/build/CMakeFiles 0
+.PHONY : CMakeFiles/test_models.dir/rule
+
+# Convenience name for target.
+test_models: CMakeFiles/test_models.dir/rule
+.PHONY : test_models
+
+# clean rule for target.
+CMakeFiles/test_models.dir/clean:
+	$(MAKE) $(MAKESILENT) -f CMakeFiles/test_models.dir/build.make CMakeFiles/test_models.dir/clean
+.PHONY : CMakeFiles/test_models.dir/clean
+
+#=============================================================================
+# Target rules for target CMakeFiles/generate_test_samples.dir
+
+# All Build rule for target.
+CMakeFiles/generate_test_samples.dir/all:
+	$(MAKE) $(MAKESILENT) -f CMakeFiles/generate_test_samples.dir/build.make CMakeFiles/generate_test_samples.dir/depend
+	$(MAKE) $(MAKESILENT) -f CMakeFiles/generate_test_samples.dir/build.make CMakeFiles/generate_test_samples.dir/build
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --progress-dir=/media/mht/ADATA/repos/cpp_tracker/build/CMakeFiles --progress-num=6,7 "Built target generate_test_samples"
+.PHONY : CMakeFiles/generate_test_samples.dir/all
+
+# Build rule for subdir invocation for target.
+CMakeFiles/generate_test_samples.dir/rule: cmake_check_build_system
+	$(CMAKE_COMMAND) -E cmake_progress_start /media/mht/ADATA/repos/cpp_tracker/build/CMakeFiles 2
+	$(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 CMakeFiles/generate_test_samples.dir/all
+	$(CMAKE_COMMAND) -E cmake_progress_start /media/mht/ADATA/repos/cpp_tracker/build/CMakeFiles 0
+.PHONY : CMakeFiles/generate_test_samples.dir/rule
+
+# Convenience name for target.
+generate_test_samples: CMakeFiles/generate_test_samples.dir/rule
+.PHONY : generate_test_samples
+
+# clean rule for target.
+CMakeFiles/generate_test_samples.dir/clean:
+	$(MAKE) $(MAKESILENT) -f CMakeFiles/generate_test_samples.dir/build.make CMakeFiles/generate_test_samples.dir/clean
+.PHONY : CMakeFiles/generate_test_samples.dir/clean
+
 #=============================================================================
 # Special targets to cleanup operation of make.

--- a/build/CMakeFiles/TargetDirectories.txt
+++ b/build/CMakeFiles/TargetDirectories.txt
@ -1,6 +1,8 @@
 /media/mht/ADATA/repos/cpp_tracker/build/CMakeFiles/bb_regressor.dir
 /media/mht/ADATA/repos/cpp_tracker/build/CMakeFiles/classifier.dir
 /media/mht/ADATA/repos/cpp_tracker/build/CMakeFiles/tracking_demo.dir
+/media/mht/ADATA/repos/cpp_tracker/build/CMakeFiles/test_models.dir
+/media/mht/ADATA/repos/cpp_tracker/build/CMakeFiles/generate_test_samples.dir
 /media/mht/ADATA/repos/cpp_tracker/build/CMakeFiles/edit_cache.dir
 /media/mht/ADATA/repos/cpp_tracker/build/CMakeFiles/rebuild_cache.dir
 /media/mht/ADATA/repos/cpp_tracker/build/CMakeFiles/list_install_components.dir
--- a/build/CMakeFiles/bb_regressor.dir/DependInfo.cmake
+++ b/build/CMakeFiles/bb_regressor.dir/DependInfo.cmake
@ -9,6 +9,7 @@ set(CMAKE_DEPENDS_LANGUAGES
 # The set of dependency files which are needed:
 set(CMAKE_DEPENDS_DEPENDENCY_FILES
  "/media/mht/ADATA/repos/cpp_tracker/cimp/bb_regressor/bb_regressor.cpp" "CMakeFiles/bb_regressor.dir/cimp/bb_regressor/bb_regressor.cpp.o" "gcc" "CMakeFiles/bb_regressor.dir/cimp/bb_regressor/bb_regressor.cpp.o.d"
+  "/media/mht/ADATA/repos/cpp_tracker/cimp/bb_regressor/prroi_pooling/prroi_pooling_gpu.cpp" "CMakeFiles/bb_regressor.dir/cimp/bb_regressor/prroi_pooling/prroi_pooling_gpu.cpp.o" "gcc" "CMakeFiles/bb_regressor.dir/cimp/bb_regressor/prroi_pooling/prroi_pooling_gpu.cpp.o.d"
  )

 # Targets to which this target links.
--- a/build/CMakeFiles/bb_regressor.dir/build.make
+++ b/build/CMakeFiles/bb_regressor.dir/build.make
@ -83,17 +83,33 @@ CMakeFiles/bb_regressor.dir/cimp/bb_regressor/bb_regressor.cpp.s: cmake_force
 	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Compiling CXX source to assembly CMakeFiles/bb_regressor.dir/cimp/bb_regressor/bb_regressor.cpp.s"
 	/usr/bin/c++ $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -S /media/mht/ADATA/repos/cpp_tracker/cimp/bb_regressor/bb_regressor.cpp -o CMakeFiles/bb_regressor.dir/cimp/bb_regressor/bb_regressor.cpp.s

+CMakeFiles/bb_regressor.dir/cimp/bb_regressor/prroi_pooling/prroi_pooling_gpu.cpp.o: CMakeFiles/bb_regressor.dir/flags.make
+CMakeFiles/bb_regressor.dir/cimp/bb_regressor/prroi_pooling/prroi_pooling_gpu.cpp.o: ../cimp/bb_regressor/prroi_pooling/prroi_pooling_gpu.cpp
+CMakeFiles/bb_regressor.dir/cimp/bb_regressor/prroi_pooling/prroi_pooling_gpu.cpp.o: CMakeFiles/bb_regressor.dir/compiler_depend.ts
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green --progress-dir=/media/mht/ADATA/repos/cpp_tracker/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_2) "Building CXX object CMakeFiles/bb_regressor.dir/cimp/bb_regressor/prroi_pooling/prroi_pooling_gpu.cpp.o"
+	/usr/bin/c++ $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -MD -MT CMakeFiles/bb_regressor.dir/cimp/bb_regressor/prroi_pooling/prroi_pooling_gpu.cpp.o -MF CMakeFiles/bb_regressor.dir/cimp/bb_regressor/prroi_pooling/prroi_pooling_gpu.cpp.o.d -o CMakeFiles/bb_regressor.dir/cimp/bb_regressor/prroi_pooling/prroi_pooling_gpu.cpp.o -c /media/mht/ADATA/repos/cpp_tracker/cimp/bb_regressor/prroi_pooling/prroi_pooling_gpu.cpp
+
+CMakeFiles/bb_regressor.dir/cimp/bb_regressor/prroi_pooling/prroi_pooling_gpu.cpp.i: cmake_force
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Preprocessing CXX source to CMakeFiles/bb_regressor.dir/cimp/bb_regressor/prroi_pooling/prroi_pooling_gpu.cpp.i"
+	/usr/bin/c++ $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -E /media/mht/ADATA/repos/cpp_tracker/cimp/bb_regressor/prroi_pooling/prroi_pooling_gpu.cpp > CMakeFiles/bb_regressor.dir/cimp/bb_regressor/prroi_pooling/prroi_pooling_gpu.cpp.i
+
+CMakeFiles/bb_regressor.dir/cimp/bb_regressor/prroi_pooling/prroi_pooling_gpu.cpp.s: cmake_force
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Compiling CXX source to assembly CMakeFiles/bb_regressor.dir/cimp/bb_regressor/prroi_pooling/prroi_pooling_gpu.cpp.s"
+	/usr/bin/c++ $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -S /media/mht/ADATA/repos/cpp_tracker/cimp/bb_regressor/prroi_pooling/prroi_pooling_gpu.cpp -o CMakeFiles/bb_regressor.dir/cimp/bb_regressor/prroi_pooling/prroi_pooling_gpu.cpp.s
+
 # Object files for target bb_regressor
 bb_regressor_OBJECTS = \
-"CMakeFiles/bb_regressor.dir/cimp/bb_regressor/bb_regressor.cpp.o"
+"CMakeFiles/bb_regressor.dir/cimp/bb_regressor/bb_regressor.cpp.o" \
+"CMakeFiles/bb_regressor.dir/cimp/bb_regressor/prroi_pooling/prroi_pooling_gpu.cpp.o"

 # External object files for target bb_regressor
 bb_regressor_EXTERNAL_OBJECTS =

 libbb_regressor.a: CMakeFiles/bb_regressor.dir/cimp/bb_regressor/bb_regressor.cpp.o
+libbb_regressor.a: CMakeFiles/bb_regressor.dir/cimp/bb_regressor/prroi_pooling/prroi_pooling_gpu.cpp.o
 libbb_regressor.a: CMakeFiles/bb_regressor.dir/build.make
 libbb_regressor.a: CMakeFiles/bb_regressor.dir/link.txt
-	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green --bold --progress-dir=/media/mht/ADATA/repos/cpp_tracker/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_2) "Linking CXX static library libbb_regressor.a"
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green --bold --progress-dir=/media/mht/ADATA/repos/cpp_tracker/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_3) "Linking CXX static library libbb_regressor.a"
 	$(CMAKE_COMMAND) -P CMakeFiles/bb_regressor.dir/cmake_clean_target.cmake
 	$(CMAKE_COMMAND) -E cmake_link_script CMakeFiles/bb_regressor.dir/link.txt --verbose=$(VERBOSE)

--- a/build/CMakeFiles/bb_regressor.dir/cimp/bb_regressor/bb_regressor.cpp.o
+++ b/build/CMakeFiles/bb_regressor.dir/cimp/bb_regressor/bb_regressor.cpp.o
--- a/build/CMakeFiles/bb_regressor.dir/cimp/bb_regressor/bb_regressor.cpp.o.d
+++ b/build/CMakeFiles/bb_regressor.dir/cimp/bb_regressor/bb_regressor.cpp.o.d
@ -4795,4 +4795,4 @@ CMakeFiles/bb_regressor.dir/cimp/bb_regressor/bb_regressor.cpp.o: \
 /usr/include/cuda.h /home/mht/libtorch/include/ATen/cuda/Exceptions.h \
 /usr/include/cusolver_common.h \
 /media/mht/ADATA/repos/cpp_tracker/cimp/bb_regressor/prroi_pooling/prroi_pooling_gpu.h \
- /media/mht/ADATA/repos/cpp_tracker/ltr/external/PreciseRoIPooling/pytorch/prroi_pool/src/prroi_pooling_gpu_impl.cuh
+ /media/mht/ADATA/repos/cpp_tracker/cimp/bb_regressor/prroi_pooling/prroi_pooling_gpu_impl.cuh
--- a/build/CMakeFiles/bb_regressor.dir/cmake_clean.cmake
+++ b/build/CMakeFiles/bb_regressor.dir/cmake_clean.cmake
@ -1,6 +1,8 @@
 file(REMOVE_RECURSE
  "CMakeFiles/bb_regressor.dir/cimp/bb_regressor/bb_regressor.cpp.o"
  "CMakeFiles/bb_regressor.dir/cimp/bb_regressor/bb_regressor.cpp.o.d"
+  "CMakeFiles/bb_regressor.dir/cimp/bb_regressor/prroi_pooling/prroi_pooling_gpu.cpp.o"
+  "CMakeFiles/bb_regressor.dir/cimp/bb_regressor/prroi_pooling/prroi_pooling_gpu.cpp.o.d"
  "libbb_regressor.a"
  "libbb_regressor.pdb"
 )
--- a/build/CMakeFiles/bb_regressor.dir/compiler_depend.internal
+++ b/build/CMakeFiles/bb_regressor.dir/compiler_depend.internal
@ -4844,4 +4844,167 @@ CMakeFiles/bb_regressor.dir/cimp/bb_regressor/bb_regressor.cpp.o
 /home/mht/libtorch/include/torch/csrc/jit/frontend/tree_views.h
 /home/mht/libtorch/include/torch/csrc/jit/serialization/pickler.h
 /home/mht/libtorch/include/torch/csrc/jit/serialization/pickle.h
+ /usr/include/cuda_runtime.h
+ /usr/include/crt/host_config.h
+ /usr/include/builtin_types.h
+ /usr/include/device_types.h
+ /usr/include/crt/host_defines.h
+ /usr/include/driver_types.h
+ /usr/include/vector_types.h
+ /usr/include/surface_types.h
+ /usr/include/texture_types.h
+ /usr/include/library_types.h
+ /usr/include/channel_descriptor.h
+ /usr/include/cuda_runtime_api.h
+ /usr/include/cuda_device_runtime_api.h
+ /usr/include/driver_functions.h
+ /usr/include/vector_functions.h
+ /usr/include/vector_functions.hpp
+ /home/mht/libtorch/include/ATen/cuda/CUDAContext.h
+ /usr/include/cuda_runtime_api.h
+ /usr/include/cusparse.h
+ /usr/include/cuComplex.h
+ /usr/include/c++/11/math.h
+ /usr/include/cuda_fp16.h
+ /usr/include/cuda_fp16.hpp
+ /usr/include/driver_types.h
+ /usr/include/library_types.h
+ /usr/include/cublas_v2.h
+ /usr/include/cublas_api.h
+ /usr/include/cuComplex.h
+ /usr/include/cuda_bf16.h
+ /usr/include/cuda_bf16.hpp
+ /usr/include/cusolverDn.h
+ /usr/include/cublas_v2.h
+ /usr/include/cusolver_common.h
+ /home/mht/libtorch/include/c10/cuda/CUDAStream.h
+ /home/mht/libtorch/include/c10/cuda/CUDAFunctions.h
+ /home/mht/libtorch/include/c10/core/impl/GPUTrace.h
+ /home/mht/libtorch/include/c10/cuda/CUDAException.h
+ /home/mht/libtorch/include/c10/cuda/CUDADeviceAssertionHost.h
+ /home/mht/libtorch/include/c10/cuda/CUDAMacros.h
+ /home/mht/libtorch/include/c10/cuda/impl/cuda_cmake_macros.h
+ /home/mht/libtorch/include/c10/cuda/CUDAMiscFunctions.h
+ /usr/include/cuda.h
+ /home/mht/libtorch/include/ATen/cuda/Exceptions.h
+ /usr/include/cusolver_common.h
+ /media/mht/ADATA/repos/cpp_tracker/cimp/bb_regressor/prroi_pooling/prroi_pooling_gpu.h
+ /media/mht/ADATA/repos/cpp_tracker/cimp/bb_regressor/prroi_pooling/prroi_pooling_gpu_impl.cuh
+
+CMakeFiles/bb_regressor.dir/cimp/bb_regressor/prroi_pooling/prroi_pooling_gpu.cpp.o
+ /media/mht/ADATA/repos/cpp_tracker/cimp/bb_regressor/prroi_pooling/prroi_pooling_gpu.cpp
+ /usr/include/stdc-predef.h
+ /media/mht/ADATA/repos/cpp_tracker/cimp/bb_regressor/prroi_pooling/prroi_pooling_gpu.h
+ /usr/include/stdio.h
+ /usr/include/x86_64-linux-gnu/bits/libc-header-start.h
+ /usr/include/features.h
+ /usr/include/features-time64.h
+ /usr/include/x86_64-linux-gnu/bits/wordsize.h
+ /usr/include/x86_64-linux-gnu/bits/timesize.h
+ /usr/include/x86_64-linux-gnu/sys/cdefs.h
+ /usr/include/x86_64-linux-gnu/bits/long-double.h
+ /usr/include/x86_64-linux-gnu/gnu/stubs.h
+ /usr/include/x86_64-linux-gnu/gnu/stubs-64.h
+ /usr/lib/gcc/x86_64-linux-gnu/11/include/stddef.h
+ /usr/lib/gcc/x86_64-linux-gnu/11/include/stdarg.h
+ /usr/include/x86_64-linux-gnu/bits/types.h
+ /usr/include/x86_64-linux-gnu/bits/typesizes.h
+ /usr/include/x86_64-linux-gnu/bits/time64.h
+ /usr/include/x86_64-linux-gnu/bits/types/__fpos_t.h
+ /usr/include/x86_64-linux-gnu/bits/types/__mbstate_t.h
+ /usr/include/x86_64-linux-gnu/bits/types/__fpos64_t.h
+ /usr/include/x86_64-linux-gnu/bits/types/__FILE.h
+ /usr/include/x86_64-linux-gnu/bits/types/FILE.h
+ /usr/include/x86_64-linux-gnu/bits/types/struct_FILE.h
+ /usr/include/x86_64-linux-gnu/bits/types/cookie_io_functions_t.h
+ /usr/include/x86_64-linux-gnu/bits/stdio_lim.h
+ /usr/include/x86_64-linux-gnu/bits/floatn.h
+ /usr/include/x86_64-linux-gnu/bits/floatn-common.h
+ /usr/include/x86_64-linux-gnu/bits/stdio.h
+ /usr/include/x86_64-linux-gnu/bits/stdio2.h
+ /usr/include/c++/11/math.h
+ /usr/include/c++/11/cmath
+ /usr/include/x86_64-linux-gnu/c++/11/bits/c++config.h
+ /usr/include/x86_64-linux-gnu/c++/11/bits/os_defines.h
+ /usr/include/x86_64-linux-gnu/c++/11/bits/cpu_defines.h
+ /usr/include/c++/11/pstl/pstl_config.h
+ /usr/include/c++/11/bits/cpp_type_traits.h
+ /usr/include/c++/11/ext/type_traits.h
+ /usr/include/math.h
+ /usr/include/x86_64-linux-gnu/bits/math-vector.h
+ /usr/include/x86_64-linux-gnu/bits/libm-simd-decl-stubs.h
+ /usr/include/x86_64-linux-gnu/bits/flt-eval-method.h
+ /usr/include/x86_64-linux-gnu/bits/fp-logb.h
+ /usr/include/x86_64-linux-gnu/bits/fp-fast.h
+ /usr/include/x86_64-linux-gnu/bits/mathcalls-helper-functions.h
+ /usr/include/x86_64-linux-gnu/bits/mathcalls.h
+ /usr/include/x86_64-linux-gnu/bits/mathcalls-narrow.h
+ /usr/include/x86_64-linux-gnu/bits/iscanonical.h
+ /usr/include/c++/11/bits/std_abs.h
+ /usr/include/stdlib.h
+ /usr/include/x86_64-linux-gnu/bits/waitflags.h
+ /usr/include/x86_64-linux-gnu/bits/waitstatus.h
+ /usr/include/x86_64-linux-gnu/bits/types/locale_t.h
+ /usr/include/x86_64-linux-gnu/bits/types/__locale_t.h
+ /usr/include/x86_64-linux-gnu/sys/types.h
+ /usr/include/x86_64-linux-gnu/bits/types/clock_t.h
+ /usr/include/x86_64-linux-gnu/bits/types/clockid_t.h
+ /usr/include/x86_64-linux-gnu/bits/types/time_t.h
+ /usr/include/x86_64-linux-gnu/bits/types/timer_t.h
+ /usr/include/x86_64-linux-gnu/bits/stdint-intn.h
+ /usr/include/endian.h
+ /usr/include/x86_64-linux-gnu/bits/endian.h
+ /usr/include/x86_64-linux-gnu/bits/endianness.h
+ /usr/include/x86_64-linux-gnu/bits/byteswap.h
+ /usr/include/x86_64-linux-gnu/bits/uintn-identity.h
+ /usr/include/x86_64-linux-gnu/sys/select.h
+ /usr/include/x86_64-linux-gnu/bits/select.h
+ /usr/include/x86_64-linux-gnu/bits/types/sigset_t.h
+ /usr/include/x86_64-linux-gnu/bits/types/__sigset_t.h
+ /usr/include/x86_64-linux-gnu/bits/types/struct_timeval.h
+ /usr/include/x86_64-linux-gnu/bits/types/struct_timespec.h
+ /usr/include/x86_64-linux-gnu/bits/select2.h
+ /usr/include/x86_64-linux-gnu/bits/pthreadtypes.h
+ /usr/include/x86_64-linux-gnu/bits/thread-shared-types.h
+ /usr/include/x86_64-linux-gnu/bits/pthreadtypes-arch.h
+ /usr/include/x86_64-linux-gnu/bits/atomic_wide_counter.h
+ /usr/include/x86_64-linux-gnu/bits/struct_mutex.h
+ /usr/include/x86_64-linux-gnu/bits/struct_rwlock.h
+ /usr/include/alloca.h
+ /usr/include/x86_64-linux-gnu/bits/stdlib-bsearch.h
+ /usr/include/x86_64-linux-gnu/bits/stdlib-float.h
+ /usr/include/x86_64-linux-gnu/bits/stdlib.h
+ /usr/include/c++/11/bits/specfun.h
+ /usr/include/c++/11/bits/stl_algobase.h
+ /usr/include/c++/11/bits/functexcept.h
+ /usr/include/c++/11/bits/exception_defines.h
+ /usr/include/c++/11/ext/numeric_traits.h
+ /usr/include/c++/11/bits/stl_pair.h
+ /usr/include/c++/11/bits/move.h
+ /usr/include/c++/11/type_traits
+ /usr/include/c++/11/bits/stl_iterator_base_types.h
+ /usr/include/c++/11/bits/stl_iterator_base_funcs.h
+ /usr/include/c++/11/bits/concept_check.h
+ /usr/include/c++/11/debug/assertions.h
+ /usr/include/c++/11/bits/stl_iterator.h
+ /usr/include/c++/11/bits/ptr_traits.h
+ /usr/include/c++/11/debug/debug.h
+ /usr/include/c++/11/bits/predefined_ops.h
+ /usr/include/c++/11/limits
+ /usr/include/c++/11/tr1/gamma.tcc
+ /usr/include/c++/11/tr1/special_function_util.h
+ /usr/include/c++/11/tr1/bessel_function.tcc
+ /usr/include/c++/11/tr1/beta_function.tcc
+ /usr/include/c++/11/tr1/ell_integral.tcc
+ /usr/include/c++/11/tr1/exp_integral.tcc
+ /usr/include/c++/11/tr1/hypergeometric.tcc
+ /usr/include/c++/11/tr1/legendre_function.tcc
+ /usr/include/c++/11/tr1/modified_bessel_func.tcc
+ /usr/include/c++/11/tr1/poly_hermite.tcc
+ /usr/include/c++/11/tr1/poly_laguerre.tcc
+ /usr/include/c++/11/tr1/riemann_zeta.tcc
+ /usr/include/string.h
+ /usr/include/strings.h
+ /usr/include/x86_64-linux-gnu/bits/strings_fortified.h
+ /usr/include/x86_64-linux-gnu/bits/string_fortified.h

--- a/build/CMakeFiles/bb_regressor.dir/compiler_depend.make
+++ b/build/CMakeFiles/bb_regressor.dir/compiler_depend.make
@ -4842,8 +4842,202 @@ CMakeFiles/bb_regressor.dir/cimp/bb_regressor/bb_regressor.cpp.o: ../cimp/bb_reg
  /home/mht/libtorch/include/torch/csrc/jit/frontend/versioned_symbols.h \
  /home/mht/libtorch/include/torch/csrc/jit/frontend/tree_views.h \
  /home/mht/libtorch/include/torch/csrc/jit/serialization/pickler.h \
-  /home/mht/libtorch/include/torch/csrc/jit/serialization/pickle.h
+  /home/mht/libtorch/include/torch/csrc/jit/serialization/pickle.h \
+  /usr/include/cuda_runtime.h \
+  /usr/include/crt/host_config.h \
+  /usr/include/builtin_types.h \
+  /usr/include/device_types.h \
+  /usr/include/crt/host_defines.h \
+  /usr/include/driver_types.h \
+  /usr/include/vector_types.h \
+  /usr/include/surface_types.h \
+  /usr/include/texture_types.h \
+  /usr/include/library_types.h \
+  /usr/include/channel_descriptor.h \
+  /usr/include/cuda_runtime_api.h \
+  /usr/include/cuda_device_runtime_api.h \
+  /usr/include/driver_functions.h \
+  /usr/include/vector_functions.h \
+  /usr/include/vector_functions.hpp \
+  /home/mht/libtorch/include/ATen/cuda/CUDAContext.h \
+  /usr/include/cuda_runtime_api.h \
+  /usr/include/cusparse.h \
+  /usr/include/cuComplex.h \
+  /usr/include/c++/11/math.h \
+  /usr/include/cuda_fp16.h \
+  /usr/include/cuda_fp16.hpp \
+  /usr/include/driver_types.h \
+  /usr/include/library_types.h \
+  /usr/include/cublas_v2.h \
+  /usr/include/cublas_api.h \
+  /usr/include/cuComplex.h \
+  /usr/include/cuda_bf16.h \
+  /usr/include/cuda_bf16.hpp \
+  /usr/include/cusolverDn.h \
+  /usr/include/cublas_v2.h \
+  /usr/include/cusolver_common.h \
+  /home/mht/libtorch/include/c10/cuda/CUDAStream.h \
+  /home/mht/libtorch/include/c10/cuda/CUDAFunctions.h \
+  /home/mht/libtorch/include/c10/core/impl/GPUTrace.h \
+  /home/mht/libtorch/include/c10/cuda/CUDAException.h \
+  /home/mht/libtorch/include/c10/cuda/CUDADeviceAssertionHost.h \
+  /home/mht/libtorch/include/c10/cuda/CUDAMacros.h \
+  /home/mht/libtorch/include/c10/cuda/impl/cuda_cmake_macros.h \
+  /home/mht/libtorch/include/c10/cuda/CUDAMiscFunctions.h \
+  /usr/include/cuda.h \
+  /home/mht/libtorch/include/ATen/cuda/Exceptions.h \
+  /usr/include/cusolver_common.h \
+  ../cimp/bb_regressor/prroi_pooling/prroi_pooling_gpu.h \
+  ../cimp/bb_regressor/prroi_pooling/prroi_pooling_gpu_impl.cuh
+
+CMakeFiles/bb_regressor.dir/cimp/bb_regressor/prroi_pooling/prroi_pooling_gpu.cpp.o: ../cimp/bb_regressor/prroi_pooling/prroi_pooling_gpu.cpp \
+  /usr/include/stdc-predef.h \
+  ../cimp/bb_regressor/prroi_pooling/prroi_pooling_gpu.h \
+  /usr/include/stdio.h \
+  /usr/include/x86_64-linux-gnu/bits/libc-header-start.h \
+  /usr/include/features.h \
+  /usr/include/features-time64.h \
+  /usr/include/x86_64-linux-gnu/bits/wordsize.h \
+  /usr/include/x86_64-linux-gnu/bits/timesize.h \
+  /usr/include/x86_64-linux-gnu/sys/cdefs.h \
+  /usr/include/x86_64-linux-gnu/bits/long-double.h \
+  /usr/include/x86_64-linux-gnu/gnu/stubs.h \
+  /usr/include/x86_64-linux-gnu/gnu/stubs-64.h \
+  /usr/lib/gcc/x86_64-linux-gnu/11/include/stddef.h \
+  /usr/lib/gcc/x86_64-linux-gnu/11/include/stdarg.h \
+  /usr/include/x86_64-linux-gnu/bits/types.h \
+  /usr/include/x86_64-linux-gnu/bits/typesizes.h \
+  /usr/include/x86_64-linux-gnu/bits/time64.h \
+  /usr/include/x86_64-linux-gnu/bits/types/__fpos_t.h \
+  /usr/include/x86_64-linux-gnu/bits/types/__mbstate_t.h \
+  /usr/include/x86_64-linux-gnu/bits/types/__fpos64_t.h \
+  /usr/include/x86_64-linux-gnu/bits/types/__FILE.h \
+  /usr/include/x86_64-linux-gnu/bits/types/FILE.h \
+  /usr/include/x86_64-linux-gnu/bits/types/struct_FILE.h \
+  /usr/include/x86_64-linux-gnu/bits/types/cookie_io_functions_t.h \
+  /usr/include/x86_64-linux-gnu/bits/stdio_lim.h \
+  /usr/include/x86_64-linux-gnu/bits/floatn.h \
+  /usr/include/x86_64-linux-gnu/bits/floatn-common.h \
+  /usr/include/x86_64-linux-gnu/bits/stdio.h \
+  /usr/include/x86_64-linux-gnu/bits/stdio2.h \
+  /usr/include/c++/11/math.h \
+  /usr/include/c++/11/cmath \
+  /usr/include/x86_64-linux-gnu/c++/11/bits/c++config.h \
+  /usr/include/x86_64-linux-gnu/c++/11/bits/os_defines.h \
+  /usr/include/x86_64-linux-gnu/c++/11/bits/cpu_defines.h \
+  /usr/include/c++/11/pstl/pstl_config.h \
+  /usr/include/c++/11/bits/cpp_type_traits.h \
+  /usr/include/c++/11/ext/type_traits.h \
+  /usr/include/math.h \
+  /usr/include/x86_64-linux-gnu/bits/math-vector.h \
+  /usr/include/x86_64-linux-gnu/bits/libm-simd-decl-stubs.h \
+  /usr/include/x86_64-linux-gnu/bits/flt-eval-method.h \
+  /usr/include/x86_64-linux-gnu/bits/fp-logb.h \
+  /usr/include/x86_64-linux-gnu/bits/fp-fast.h \
+  /usr/include/x86_64-linux-gnu/bits/mathcalls-helper-functions.h \
+  /usr/include/x86_64-linux-gnu/bits/mathcalls.h \
+  /usr/include/x86_64-linux-gnu/bits/mathcalls-narrow.h \
+  /usr/include/x86_64-linux-gnu/bits/iscanonical.h \
+  /usr/include/c++/11/bits/std_abs.h \
+  /usr/include/stdlib.h \
+  /usr/include/x86_64-linux-gnu/bits/waitflags.h \
+  /usr/include/x86_64-linux-gnu/bits/waitstatus.h \
+  /usr/include/x86_64-linux-gnu/bits/types/locale_t.h \
+  /usr/include/x86_64-linux-gnu/bits/types/__locale_t.h \
+  /usr/include/x86_64-linux-gnu/sys/types.h \
+  /usr/include/x86_64-linux-gnu/bits/types/clock_t.h \
+  /usr/include/x86_64-linux-gnu/bits/types/clockid_t.h \
+  /usr/include/x86_64-linux-gnu/bits/types/time_t.h \
+  /usr/include/x86_64-linux-gnu/bits/types/timer_t.h \
+  /usr/include/x86_64-linux-gnu/bits/stdint-intn.h \
+  /usr/include/endian.h \
+  /usr/include/x86_64-linux-gnu/bits/endian.h \
+  /usr/include/x86_64-linux-gnu/bits/endianness.h \
+  /usr/include/x86_64-linux-gnu/bits/byteswap.h \
+  /usr/include/x86_64-linux-gnu/bits/uintn-identity.h \
+  /usr/include/x86_64-linux-gnu/sys/select.h \
+  /usr/include/x86_64-linux-gnu/bits/select.h \
+  /usr/include/x86_64-linux-gnu/bits/types/sigset_t.h \
+  /usr/include/x86_64-linux-gnu/bits/types/__sigset_t.h \
+  /usr/include/x86_64-linux-gnu/bits/types/struct_timeval.h \
+  /usr/include/x86_64-linux-gnu/bits/types/struct_timespec.h \
+  /usr/include/x86_64-linux-gnu/bits/select2.h \
+  /usr/include/x86_64-linux-gnu/bits/pthreadtypes.h \
+  /usr/include/x86_64-linux-gnu/bits/thread-shared-types.h \
+  /usr/include/x86_64-linux-gnu/bits/pthreadtypes-arch.h \
+  /usr/include/x86_64-linux-gnu/bits/atomic_wide_counter.h \
+  /usr/include/x86_64-linux-gnu/bits/struct_mutex.h \
+  /usr/include/x86_64-linux-gnu/bits/struct_rwlock.h \
+  /usr/include/alloca.h \
+  /usr/include/x86_64-linux-gnu/bits/stdlib-bsearch.h \
+  /usr/include/x86_64-linux-gnu/bits/stdlib-float.h \
+  /usr/include/x86_64-linux-gnu/bits/stdlib.h \
+  /usr/include/c++/11/bits/specfun.h \
+  /usr/include/c++/11/bits/stl_algobase.h \
+  /usr/include/c++/11/bits/functexcept.h \
+  /usr/include/c++/11/bits/exception_defines.h \
+  /usr/include/c++/11/ext/numeric_traits.h \
+  /usr/include/c++/11/bits/stl_pair.h \
+  /usr/include/c++/11/bits/move.h \
+  /usr/include/c++/11/type_traits \
+  /usr/include/c++/11/bits/stl_iterator_base_types.h \
+  /usr/include/c++/11/bits/stl_iterator_base_funcs.h \
+  /usr/include/c++/11/bits/concept_check.h \
+  /usr/include/c++/11/debug/assertions.h \
+  /usr/include/c++/11/bits/stl_iterator.h \
+  /usr/include/c++/11/bits/ptr_traits.h \
+  /usr/include/c++/11/debug/debug.h \
+  /usr/include/c++/11/bits/predefined_ops.h \
+  /usr/include/c++/11/limits \
+  /usr/include/c++/11/tr1/gamma.tcc \
+  /usr/include/c++/11/tr1/special_function_util.h \
+  /usr/include/c++/11/tr1/bessel_function.tcc \
+  /usr/include/c++/11/tr1/beta_function.tcc \
+  /usr/include/c++/11/tr1/ell_integral.tcc \
+  /usr/include/c++/11/tr1/exp_integral.tcc \
+  /usr/include/c++/11/tr1/hypergeometric.tcc \
+  /usr/include/c++/11/tr1/legendre_function.tcc \
+  /usr/include/c++/11/tr1/modified_bessel_func.tcc \
+  /usr/include/c++/11/tr1/poly_hermite.tcc \
+  /usr/include/c++/11/tr1/poly_laguerre.tcc \
+  /usr/include/c++/11/tr1/riemann_zeta.tcc \
+  /usr/include/string.h \
+  /usr/include/strings.h \
+  /usr/include/x86_64-linux-gnu/bits/strings_fortified.h \
+  /usr/include/x86_64-linux-gnu/bits/string_fortified.h
+
+
+/home/mht/libtorch/include/c10/cuda/impl/cuda_cmake_macros.h:
+
+/home/mht/libtorch/include/c10/core/impl/GPUTrace.h:

+/usr/include/cusolver_common.h:
+
+/usr/include/cuda_bf16.hpp:
+
+/usr/include/cuda_fp16.h:
+
+/usr/include/c++/11/math.h:
+
+/home/mht/libtorch/include/ATen/cuda/CUDAContext.h:
+
+/usr/include/vector_functions.h:
+
+/usr/include/driver_functions.h:
+
+/usr/include/cuda_runtime_api.h:
+
+/usr/include/channel_descriptor.h:
+
+/usr/include/library_types.h:
+
+/usr/include/driver_types.h:
+
+/usr/include/crt/host_defines.h:
+
+/usr/include/device_types.h:
+
+/usr/include/builtin_types.h:

 /home/mht/libtorch/include/torch/csrc/jit/serialization/pickle.h:

@ -5025,6 +5219,8 @@ CMakeFiles/bb_regressor.dir/cimp/bb_regressor/bb_regressor.cpp.o: ../cimp/bb_reg

 /home/mht/libtorch/include/torch/csrc/api/include/torch/data/datasets/map.h:

+../cimp/bb_regressor/prroi_pooling/prroi_pooling_gpu.h:
+
 /home/mht/libtorch/include/torch/csrc/jit/api/compilation_unit.h:

 /home/mht/libtorch/include/torch/csrc/jit/runtime/variable_tensor_list.h:
@ -6147,6 +6343,8 @@ CMakeFiles/bb_regressor.dir/cimp/bb_regressor/bb_regressor.cpp.o: ../cimp/bb_reg

 /home/mht/libtorch/include/ATen/ops/ccol_indices_copy_native.h:

+/home/mht/libtorch/include/c10/cuda/CUDAFunctions.h:
+
 /home/mht/libtorch/include/ATen/ops/ccol_indices_native.h:

 /home/mht/libtorch/include/torch/csrc/jit/runtime/interpreter.h:
@ -6161,6 +6359,8 @@ CMakeFiles/bb_regressor.dir/cimp/bb_regressor/bb_regressor.cpp.o: ../cimp/bb_reg

 /home/mht/libtorch/include/ATen/ops/bmm_meta.h:

+/home/mht/libtorch/include/c10/cuda/CUDAStream.h:
+
 /home/mht/libtorch/include/ATen/ops/block_diag_native.h:

 /usr/include/c++/11/bits/forward_list.tcc:
@ -6519,6 +6719,8 @@ CMakeFiles/bb_regressor.dir/cimp/bb_regressor/bb_regressor.cpp.o: ../cimp/bb_reg

 /home/mht/libtorch/include/ATen/ops/_pad_circular_native.h:

+/usr/include/cuda_bf16.h:
+
 /home/mht/libtorch/include/ATen/ops/_pack_padded_sequence_native.h:

 /home/mht/libtorch/include/ATen/ops/_nnpack_spatial_convolution_native.h:
@ -6959,6 +7161,8 @@ CMakeFiles/bb_regressor.dir/cimp/bb_regressor/bb_regressor.cpp.o: ../cimp/bb_reg

 /home/mht/libtorch/include/ATen/ops/unbind.h:

+/usr/include/texture_types.h:
+
 /home/mht/libtorch/include/ATen/ops/type_as.h:

 /home/mht/libtorch/include/ATen/ops/special_scaled_modified_bessel_k1_native.h:
@ -6977,6 +7181,8 @@ CMakeFiles/bb_regressor.dir/cimp/bb_regressor/bb_regressor.cpp.o: ../cimp/bb_reg

 /home/mht/libtorch/include/ATen/ops/tril.h:

+/usr/include/cuComplex.h:
+
 /home/mht/libtorch/include/ATen/ops/resize_as_native.h:

 /home/mht/libtorch/include/ATen/ops/triangular_solve.h:
@ -7093,6 +7299,8 @@ CMakeFiles/bb_regressor.dir/cimp/bb_regressor/bb_regressor.cpp.o: ../cimp/bb_reg

 /home/mht/libtorch/include/ATen/ops/split_with_sizes_copy.h:

+/usr/include/vector_types.h:
+
 /home/mht/libtorch/include/ATen/ops/split.h:

 /home/mht/libtorch/include/ATen/ops/special_xlogy_ops.h:
@ -7189,6 +7397,8 @@ CMakeFiles/bb_regressor.dir/cimp/bb_regressor/bb_regressor.cpp.o: ../cimp/bb_reg

 /home/mht/libtorch/include/ATen/ops/special_i1.h:

+../cimp/bb_regressor/prroi_pooling/prroi_pooling_gpu.cpp:
+
 /home/mht/libtorch/include/ATen/ops/special_i0e_ops.h:

 /home/mht/libtorch/include/ATen/ops/special_i0e.h:
@ -7741,6 +7951,8 @@ CMakeFiles/bb_regressor.dir/cimp/bb_regressor/bb_regressor.cpp.o: ../cimp/bb_reg

 /home/mht/libtorch/include/ATen/ops/nuclear_norm.h:

+/usr/include/cuda.h:
+
 /home/mht/libtorch/include/ATen/ops/not_equal.h:

 /home/mht/libtorch/include/ATen/ops/embedding_renorm_native.h:
@ -8147,6 +8359,8 @@ CMakeFiles/bb_regressor.dir/cimp/bb_regressor/bb_regressor.cpp.o: ../cimp/bb_reg

 /home/mht/libtorch/include/ATen/ops/_aminmax.h:

+/usr/include/cuda_fp16.hpp:
+
 /usr/lib/gcc/x86_64-linux-gnu/11/include/avx512bf16intrin.h:

 /home/mht/libtorch/include/ATen/ops/_addmm_activation.h:
@ -8579,6 +8793,8 @@ CMakeFiles/bb_regressor.dir/cimp/bb_regressor/bb_regressor.cpp.o: ../cimp/bb_reg

 /home/mht/libtorch/include/ATen/ops/resize_ops.h:

+/home/mht/libtorch/include/c10/cuda/CUDAException.h:
+
 /home/mht/libtorch/include/ATen/ops/logcumsumexp.h:

 /home/mht/libtorch/include/ATen/ops/zeros_native.h:
@ -9619,6 +9835,8 @@ CMakeFiles/bb_regressor.dir/cimp/bb_regressor/bb_regressor.cpp.o: ../cimp/bb_reg

 /home/mht/libtorch/include/ATen/ops/masked_select_backward.h:

+/home/mht/libtorch/include/c10/cuda/CUDAMiscFunctions.h:
+
 /home/mht/libtorch/include/ATen/ops/_convolution_native.h:

 /usr/include/x86_64-linux-gnu/bits/types/struct_itimerspec.h:
@ -9711,6 +9929,8 @@ CMakeFiles/bb_regressor.dir/cimp/bb_regressor/bb_regressor.cpp.o: ../cimp/bb_reg

 /home/mht/libtorch/include/ATen/ops/clamp_min_ops.h:

+/usr/include/cusolverDn.h:
+
 /home/mht/libtorch/include/ATen/ops/index_copy_ops.h:

 /home/mht/libtorch/include/ATen/ops/_foreach_log1p.h:
@ -10323,6 +10543,8 @@ CMakeFiles/bb_regressor.dir/cimp/bb_regressor/bb_regressor.cpp.o: ../cimp/bb_reg

 /home/mht/libtorch/include/ATen/ops/le_ops.h:

+/usr/include/cublas_v2.h:
+
 /home/mht/libtorch/include/ATen/ops/bitwise_and.h:

 /home/mht/libtorch/include/ATen/ops/from_file.h:
@ -10793,6 +11015,8 @@ CMakeFiles/bb_regressor.dir/cimp/bb_regressor/bb_regressor.cpp.o: ../cimp/bb_reg

 /home/mht/libtorch/include/c10/util/Exception.h:

+/usr/include/cusparse.h:
+
 /usr/include/x86_64-linux-gnu/bits/waitflags.h:

 /home/mht/libtorch/include/ATen/ops/_slow_conv2d_forward_native.h:
@ -10927,6 +11151,8 @@ CMakeFiles/bb_regressor.dir/cimp/bb_regressor/bb_regressor.cpp.o: ../cimp/bb_reg

 /usr/include/x86_64-linux-gnu/bits/endian.h:

+/usr/include/cuda_runtime.h:
+
 /home/mht/libtorch/include/ATen/ops/argwhere_ops.h:

 /home/mht/libtorch/include/ATen/ops/real.h:
@ -12325,6 +12551,8 @@ CMakeFiles/bb_regressor.dir/cimp/bb_regressor/bb_regressor.cpp.o: ../cimp/bb_reg

 /home/mht/libtorch/include/ATen/ops/_mps_convolution_transpose_ops.h:

+/home/mht/libtorch/include/ATen/cuda/Exceptions.h:
+
 /home/mht/libtorch/include/ATen/ops/lstm_cell.h:

 /home/mht/libtorch/include/ATen/ops/log_sigmoid_forward_ops.h:
@ -12953,6 +13181,8 @@ CMakeFiles/bb_regressor.dir/cimp/bb_regressor/bb_regressor.cpp.o: ../cimp/bb_reg

 /home/mht/libtorch/include/ATen/ops/_upsample_nearest_exact1d.h:

+/usr/include/cuda_device_runtime_api.h:
+
 /home/mht/libtorch/include/ATen/ops/smm_ops.h:

 /usr/include/c++/11/bits/locale_facets_nonio.tcc:
@ -13113,6 +13343,8 @@ CMakeFiles/bb_regressor.dir/cimp/bb_regressor/bb_regressor.cpp.o: ../cimp/bb_reg

 /home/mht/libtorch/include/ATen/ops/conv_tbc_backward.h:

+../cimp/bb_regressor/prroi_pooling/prroi_pooling_gpu_impl.cuh:
+
 /home/mht/libtorch/include/ATen/ops/cudnn_batch_norm_backward.h:

 /home/mht/libtorch/include/ATen/ops/_grid_sampler_2d_cpu_fallback_backward_native.h:
@ -13179,6 +13411,8 @@ CMakeFiles/bb_regressor.dir/cimp/bb_regressor/bb_regressor.cpp.o: ../cimp/bb_reg

 /home/mht/libtorch/include/ATen/ops/index_copy.h:

+/usr/include/vector_functions.hpp:
+
 /home/mht/libtorch/include/ATen/ops/amin_ops.h:

 /usr/include/x86_64-linux-gnu/sys/cdefs.h:
@ -13191,6 +13425,8 @@ CMakeFiles/bb_regressor.dir/cimp/bb_regressor/bb_regressor.cpp.o: ../cimp/bb_reg

 /home/mht/libtorch/include/ATen/ops/atan2.h:

+/usr/include/crt/host_config.h:
+
 /home/mht/libtorch/include/ATen/ops/upsample_bicubic2d_backward_native.h:

 /home/mht/libtorch/include/ATen/ops/quantized_max_pool2d_native.h:
@ -13431,6 +13667,8 @@ CMakeFiles/bb_regressor.dir/cimp/bb_regressor/bb_regressor.cpp.o: ../cimp/bb_reg

 /home/mht/libtorch/include/ATen/ops/is_leaf.h:

+/usr/include/surface_types.h:
+
 /home/mht/libtorch/include/ATen/ops/acosh_native.h:

 /home/mht/libtorch/include/ATen/ops/constant_pad_nd.h:
@ -13489,6 +13727,8 @@ CMakeFiles/bb_regressor.dir/cimp/bb_regressor/bb_regressor.cpp.o: ../cimp/bb_reg

 /home/mht/libtorch/include/ATen/ops/copy_sparse_to_sparse_ops.h:

+/usr/include/cublas_api.h:
+
 /home/mht/libtorch/include/ATen/ops/_fused_adamw_ops.h:

 /home/mht/libtorch/include/ATen/ops/layer_norm_ops.h:
@ -14347,6 +14587,8 @@ CMakeFiles/bb_regressor.dir/cimp/bb_regressor/bb_regressor.cpp.o: ../cimp/bb_reg

 /home/mht/libtorch/include/ATen/ops/linalg_svd_ops.h:

+/home/mht/libtorch/include/c10/cuda/CUDADeviceAssertionHost.h:
+
 /home/mht/libtorch/include/torch/csrc/api/include/torch/nn/modules/normalization.h:

 /home/mht/libtorch/include/ATen/ops/index_copy_native.h:
@ -14517,6 +14759,8 @@ CMakeFiles/bb_regressor.dir/cimp/bb_regressor/bb_regressor.cpp.o: ../cimp/bb_reg

 /home/mht/libtorch/include/ATen/ops/miopen_batch_norm.h:

+/home/mht/libtorch/include/c10/cuda/CUDAMacros.h:
+
 /home/mht/libtorch/include/ATen/ops/fake_quantize_per_tensor_affine_cachemask_backward_ops.h:

 /home/mht/libtorch/include/ATen/ops/miopen_batch_norm_backward.h:
--- a/build/CMakeFiles/bb_regressor.dir/link.txt
+++ b/build/CMakeFiles/bb_regressor.dir/link.txt
@ -1,2 +1,2 @@
-/usr/bin/ar qc libbb_regressor.a CMakeFiles/bb_regressor.dir/cimp/bb_regressor/bb_regressor.cpp.o
+/usr/bin/ar qc libbb_regressor.a CMakeFiles/bb_regressor.dir/cimp/bb_regressor/bb_regressor.cpp.o CMakeFiles/bb_regressor.dir/cimp/bb_regressor/prroi_pooling/prroi_pooling_gpu.cpp.o
 /usr/bin/ranlib libbb_regressor.a
--- a/build/CMakeFiles/bb_regressor.dir/progress.make
+++ b/build/CMakeFiles/bb_regressor.dir/progress.make
@ -1,3 +1,4 @@
 CMAKE_PROGRESS_1 = 1
 CMAKE_PROGRESS_2 = 2
+CMAKE_PROGRESS_3 = 3

--- a/build/CMakeFiles/classifier.dir/cimp/classifier/classifier.cpp.o
+++ b/build/CMakeFiles/classifier.dir/cimp/classifier/classifier.cpp.o
--- a/build/CMakeFiles/classifier.dir/progress.make
+++ b/build/CMakeFiles/classifier.dir/progress.make
@ -1,3 +1,3 @@
-CMAKE_PROGRESS_1 = 3
-CMAKE_PROGRESS_2 = 4
+CMAKE_PROGRESS_1 = 4
+CMAKE_PROGRESS_2 = 5

--- a/build/CMakeFiles/progress.marks
+++ b/build/CMakeFiles/progress.marks
@ -1 +1 @@
-6
+11
--- a/build/CMakeFiles/tracking_demo.dir/cimp/demo.cpp.o
+++ b/build/CMakeFiles/tracking_demo.dir/cimp/demo.cpp.o
--- a/build/CMakeFiles/tracking_demo.dir/compiler_depend.internal
+++ b/build/CMakeFiles/tracking_demo.dir/compiler_depend.internal
--- a/build/CMakeFiles/tracking_demo.dir/compiler_depend.make
+++ b/build/CMakeFiles/tracking_demo.dir/compiler_depend.make
--- a/build/CMakeFiles/tracking_demo.dir/progress.make
+++ b/build/CMakeFiles/tracking_demo.dir/progress.make
@ -1,3 +1,3 @@
-CMAKE_PROGRESS_1 = 5
-CMAKE_PROGRESS_2 = 6
+CMAKE_PROGRESS_1 = 10
+CMAKE_PROGRESS_2 = 11

--- a/build/Makefile
+++ b/build/Makefile
@ -200,6 +200,32 @@ tracking_demo/fast:
 	$(MAKE) $(MAKESILENT) -f CMakeFiles/tracking_demo.dir/build.make CMakeFiles/tracking_demo.dir/build
 .PHONY : tracking_demo/fast

+#=============================================================================
+# Target rules for targets named test_models
+
+# Build rule for target.
+test_models: cmake_check_build_system
+	$(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 test_models
+.PHONY : test_models
+
+# fast build rule for target.
+test_models/fast:
+	$(MAKE) $(MAKESILENT) -f CMakeFiles/test_models.dir/build.make CMakeFiles/test_models.dir/build
+.PHONY : test_models/fast
+
+#=============================================================================
+# Target rules for targets named generate_test_samples
+
+# Build rule for target.
+generate_test_samples: cmake_check_build_system
+	$(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 generate_test_samples
+.PHONY : generate_test_samples
+
+# fast build rule for target.
+generate_test_samples/fast:
+	$(MAKE) $(MAKESILENT) -f CMakeFiles/generate_test_samples.dir/build.make CMakeFiles/generate_test_samples.dir/build
+.PHONY : generate_test_samples/fast
+
 cimp/bb_regressor/bb_regressor.o: cimp/bb_regressor/bb_regressor.cpp.o
 .PHONY : cimp/bb_regressor/bb_regressor.o

@ -224,6 +250,30 @@ cimp/bb_regressor/bb_regressor.cpp.s:
 	$(MAKE) $(MAKESILENT) -f CMakeFiles/bb_regressor.dir/build.make CMakeFiles/bb_regressor.dir/cimp/bb_regressor/bb_regressor.cpp.s
 .PHONY : cimp/bb_regressor/bb_regressor.cpp.s

+cimp/bb_regressor/prroi_pooling/prroi_pooling_gpu.o: cimp/bb_regressor/prroi_pooling/prroi_pooling_gpu.cpp.o
+.PHONY : cimp/bb_regressor/prroi_pooling/prroi_pooling_gpu.o
+
+# target to build an object file
+cimp/bb_regressor/prroi_pooling/prroi_pooling_gpu.cpp.o:
+	$(MAKE) $(MAKESILENT) -f CMakeFiles/bb_regressor.dir/build.make CMakeFiles/bb_regressor.dir/cimp/bb_regressor/prroi_pooling/prroi_pooling_gpu.cpp.o
+.PHONY : cimp/bb_regressor/prroi_pooling/prroi_pooling_gpu.cpp.o
+
+cimp/bb_regressor/prroi_pooling/prroi_pooling_gpu.i: cimp/bb_regressor/prroi_pooling/prroi_pooling_gpu.cpp.i
+.PHONY : cimp/bb_regressor/prroi_pooling/prroi_pooling_gpu.i
+
+# target to preprocess a source file
+cimp/bb_regressor/prroi_pooling/prroi_pooling_gpu.cpp.i:
+	$(MAKE) $(MAKESILENT) -f CMakeFiles/bb_regressor.dir/build.make CMakeFiles/bb_regressor.dir/cimp/bb_regressor/prroi_pooling/prroi_pooling_gpu.cpp.i
+.PHONY : cimp/bb_regressor/prroi_pooling/prroi_pooling_gpu.cpp.i
+
+cimp/bb_regressor/prroi_pooling/prroi_pooling_gpu.s: cimp/bb_regressor/prroi_pooling/prroi_pooling_gpu.cpp.s
+.PHONY : cimp/bb_regressor/prroi_pooling/prroi_pooling_gpu.s
+
+# target to generate assembly for a file
+cimp/bb_regressor/prroi_pooling/prroi_pooling_gpu.cpp.s:
+	$(MAKE) $(MAKESILENT) -f CMakeFiles/bb_regressor.dir/build.make CMakeFiles/bb_regressor.dir/cimp/bb_regressor/prroi_pooling/prroi_pooling_gpu.cpp.s
+.PHONY : cimp/bb_regressor/prroi_pooling/prroi_pooling_gpu.cpp.s
+
 cimp/classifier/classifier.o: cimp/classifier/classifier.cpp.o
 .PHONY : cimp/classifier/classifier.o

@ -272,6 +322,54 @@ cimp/demo.cpp.s:
 	$(MAKE) $(MAKESILENT) -f CMakeFiles/tracking_demo.dir/build.make CMakeFiles/tracking_demo.dir/cimp/demo.cpp.s
 .PHONY : cimp/demo.cpp.s

+test/generate_test_samples.o: test/generate_test_samples.cpp.o
+.PHONY : test/generate_test_samples.o
+
+# target to build an object file
+test/generate_test_samples.cpp.o:
+	$(MAKE) $(MAKESILENT) -f CMakeFiles/generate_test_samples.dir/build.make CMakeFiles/generate_test_samples.dir/test/generate_test_samples.cpp.o
+.PHONY : test/generate_test_samples.cpp.o
+
+test/generate_test_samples.i: test/generate_test_samples.cpp.i
+.PHONY : test/generate_test_samples.i
+
+# target to preprocess a source file
+test/generate_test_samples.cpp.i:
+	$(MAKE) $(MAKESILENT) -f CMakeFiles/generate_test_samples.dir/build.make CMakeFiles/generate_test_samples.dir/test/generate_test_samples.cpp.i
+.PHONY : test/generate_test_samples.cpp.i
+
+test/generate_test_samples.s: test/generate_test_samples.cpp.s
+.PHONY : test/generate_test_samples.s
+
+# target to generate assembly for a file
+test/generate_test_samples.cpp.s:
+	$(MAKE) $(MAKESILENT) -f CMakeFiles/generate_test_samples.dir/build.make CMakeFiles/generate_test_samples.dir/test/generate_test_samples.cpp.s
+.PHONY : test/generate_test_samples.cpp.s
+
+test/test_models.o: test/test_models.cpp.o
+.PHONY : test/test_models.o
+
+# target to build an object file
+test/test_models.cpp.o:
+	$(MAKE) $(MAKESILENT) -f CMakeFiles/test_models.dir/build.make CMakeFiles/test_models.dir/test/test_models.cpp.o
+.PHONY : test/test_models.cpp.o
+
+test/test_models.i: test/test_models.cpp.i
+.PHONY : test/test_models.i
+
+# target to preprocess a source file
+test/test_models.cpp.i:
+	$(MAKE) $(MAKESILENT) -f CMakeFiles/test_models.dir/build.make CMakeFiles/test_models.dir/test/test_models.cpp.i
+.PHONY : test/test_models.cpp.i
+
+test/test_models.s: test/test_models.cpp.s
+.PHONY : test/test_models.s
+
+# target to generate assembly for a file
+test/test_models.cpp.s:
+	$(MAKE) $(MAKESILENT) -f CMakeFiles/test_models.dir/build.make CMakeFiles/test_models.dir/test/test_models.cpp.s
+.PHONY : test/test_models.cpp.s
+
 # Help Target
 help:
 	@echo "The following are some of the valid targets for this Makefile:"
@ -286,16 +384,27 @@ help:
 	@echo "... rebuild_cache"
 	@echo "... bb_regressor"
 	@echo "... classifier"
+	@echo "... generate_test_samples"
+	@echo "... test_models"
 	@echo "... tracking_demo"
 	@echo "... cimp/bb_regressor/bb_regressor.o"
 	@echo "... cimp/bb_regressor/bb_regressor.i"
 	@echo "... cimp/bb_regressor/bb_regressor.s"
+	@echo "... cimp/bb_regressor/prroi_pooling/prroi_pooling_gpu.o"
+	@echo "... cimp/bb_regressor/prroi_pooling/prroi_pooling_gpu.i"
+	@echo "... cimp/bb_regressor/prroi_pooling/prroi_pooling_gpu.s"
 	@echo "... cimp/classifier/classifier.o"
 	@echo "... cimp/classifier/classifier.i"
 	@echo "... cimp/classifier/classifier.s"
 	@echo "... cimp/demo.o"
 	@echo "... cimp/demo.i"
 	@echo "... cimp/demo.s"
+	@echo "... test/generate_test_samples.o"
+	@echo "... test/generate_test_samples.i"
+	@echo "... test/generate_test_samples.s"
+	@echo "... test/test_models.o"
+	@echo "... test/test_models.i"
+	@echo "... test/test_models.s"
 .PHONY : help


--- a/build/cmake_install.cmake
+++ b/build/cmake_install.cmake
@ -62,6 +62,46 @@ if("x${CMAKE_INSTALL_COMPONENT}x" STREQUAL "xUnspecifiedx" OR NOT CMAKE_INSTALL_
  endif()
 endif()

+if("x${CMAKE_INSTALL_COMPONENT}x" STREQUAL "xUnspecifiedx" OR NOT CMAKE_INSTALL_COMPONENT)
+  if(EXISTS "$ENV{DESTDIR}${CMAKE_INSTALL_PREFIX}/bin/test_models" AND
+     NOT IS_SYMLINK "$ENV{DESTDIR}${CMAKE_INSTALL_PREFIX}/bin/test_models")
+    file(RPATH_CHECK
+         FILE "$ENV{DESTDIR}${CMAKE_INSTALL_PREFIX}/bin/test_models"
+         RPATH "")
+  endif()
+  file(INSTALL DESTINATION "${CMAKE_INSTALL_PREFIX}/bin" TYPE EXECUTABLE FILES "/media/mht/ADATA/repos/cpp_tracker/build/test_models")
+  if(EXISTS "$ENV{DESTDIR}${CMAKE_INSTALL_PREFIX}/bin/test_models" AND
+     NOT IS_SYMLINK "$ENV{DESTDIR}${CMAKE_INSTALL_PREFIX}/bin/test_models")
+    file(RPATH_CHANGE
+         FILE "$ENV{DESTDIR}${CMAKE_INSTALL_PREFIX}/bin/test_models"
+         OLD_RPATH "/home/mht/libtorch/lib:"
+         NEW_RPATH "")
+    if(CMAKE_INSTALL_DO_STRIP)
+      execute_process(COMMAND "/usr/bin/strip" "$ENV{DESTDIR}${CMAKE_INSTALL_PREFIX}/bin/test_models")
+    endif()
+  endif()
+endif()
+
+if("x${CMAKE_INSTALL_COMPONENT}x" STREQUAL "xUnspecifiedx" OR NOT CMAKE_INSTALL_COMPONENT)
+  if(EXISTS "$ENV{DESTDIR}${CMAKE_INSTALL_PREFIX}/bin/generate_test_samples" AND
+     NOT IS_SYMLINK "$ENV{DESTDIR}${CMAKE_INSTALL_PREFIX}/bin/generate_test_samples")
+    file(RPATH_CHECK
+         FILE "$ENV{DESTDIR}${CMAKE_INSTALL_PREFIX}/bin/generate_test_samples"
+         RPATH "")
+  endif()
+  file(INSTALL DESTINATION "${CMAKE_INSTALL_PREFIX}/bin" TYPE EXECUTABLE FILES "/media/mht/ADATA/repos/cpp_tracker/build/generate_test_samples")
+  if(EXISTS "$ENV{DESTDIR}${CMAKE_INSTALL_PREFIX}/bin/generate_test_samples" AND
+     NOT IS_SYMLINK "$ENV{DESTDIR}${CMAKE_INSTALL_PREFIX}/bin/generate_test_samples")
+    file(RPATH_CHANGE
+         FILE "$ENV{DESTDIR}${CMAKE_INSTALL_PREFIX}/bin/generate_test_samples"
+         OLD_RPATH "/home/mht/libtorch/lib:"
+         NEW_RPATH "")
+    if(CMAKE_INSTALL_DO_STRIP)
+      execute_process(COMMAND "/usr/bin/strip" "$ENV{DESTDIR}${CMAKE_INSTALL_PREFIX}/bin/generate_test_samples")
+    endif()
+  endif()
+endif()
+
 if(CMAKE_INSTALL_COMPONENT)
  set(CMAKE_INSTALL_MANIFEST "install_manifest_${CMAKE_INSTALL_COMPONENT}.txt")
 else()
--- a/build/install_manifest.txt
+++ b/build/install_manifest.txt
@ -1 +1,3 @@
-/media/mht/ADATA/repos/cpp_tracker/bin/tracking_demo
+/media/mht/ADATA/repos/cpp_tracker/bin/tracking_demo
+/media/mht/ADATA/repos/cpp_tracker/bin/test_models
+/media/mht/ADATA/repos/cpp_tracker/bin/generate_test_samples
--- a/build/libbb_regressor.a
+++ b/build/libbb_regressor.a
--- a/build/libclassifier.a
+++ b/build/libclassifier.a
--- a/build/tracking_demo
+++ b/build/tracking_demo
--- a/cimp/bb_regressor/bb_regressor.cpp
+++ b/cimp/bb_regressor/bb_regressor.cpp
@ -221,16 +221,11 @@ torch::Tensor BBRegressor::load_tensor(const std::string& file_path) {
 }

 // Constructor
-BBRegressor::BBRegressor(const std::string& base_dir, torch::Device dev) 
-    : device(dev), model_dir(base_dir + "/exported_weights/bb_regressor"), 
+BBRegressor::BBRegressor(const std::string& model_weights_dir, torch::Device dev) 
+    : device(dev), model_dir(model_weights_dir), 
      fc3_rt(256, 256, 5, true, true, true),
      fc4_rt(256, 256, 3, true, true, true) {
    
-    // Check if base directory exists
-    if (!fs::exists(base_dir)) {
-        throw std::runtime_error("Base directory does not exist: " + base_dir);
-    }
-    
    // Check if model directory exists
    if (!fs::exists(model_dir)) {
        throw std::runtime_error("Model directory does not exist: " + model_dir);
@ -586,307 +581,213 @@ std::vector<torch::Tensor> BBRegressor::get_iou_feat(std::vector<torch::Tensor>

 // Get modulation vectors for the target
 std::vector<torch::Tensor> BBRegressor::get_modulation(std::vector<torch::Tensor> feat, torch::Tensor bb) {
-    // Apply target branch to get modulation vectors
-    std::cout << "  get_modulation input bb: " << bb.sizes() << std::endl;
-    
-    // Convert bounding box from [x, y, w, h] to [batch_idx, x1, y1, x2, y2] format for ROI pooling
-    auto roi = torch::zeros({bb.size(0), 5}, bb.options());
-    
-    // Set batch index to 0 (first element)
-    roi.index_put_({torch::indexing::Slice(), 0}, 0);
-    
-    // Copy x, y coordinates
-    roi.index_put_({torch::indexing::Slice(), 1}, bb.index({torch::indexing::Slice(), 0}));
-    roi.index_put_({torch::indexing::Slice(), 2}, bb.index({torch::indexing::Slice(), 1}));
-    
-    // Calculate x2, y2 from width and height
-    auto x2 = bb.index({torch::indexing::Slice(), 0}) + bb.index({torch::indexing::Slice(), 2});
-    auto y2 = bb.index({torch::indexing::Slice(), 1}) + bb.index({torch::indexing::Slice(), 3});
-    roi.index_put_({torch::indexing::Slice(), 3}, x2);
-    roi.index_put_({torch::indexing::Slice(), 4}, y2);
-    
-    std::cout << "  Converted ROI: [";
-    for (int i = 0; i < roi.size(1); i++) {
-        std::cout << roi[0][i].item<float>();
-        if (i < roi.size(1) - 1) std::cout << ", ";
+    // feat should contain two tensors: feat3_r and feat4_r (backbone features)
+    // bb is the initial bounding box [batch_size, 1, 4] (x,y,w,h) or [batch_size, 4]
+    // Ensure inputs are on the correct device
+    torch::NoGradGuard no_grad; // Ensure no gradients are computed
+
+    auto feat3_r = feat[0].to(device);
+    auto feat4_r = feat[1].to(device);
+    auto current_bb = bb.to(device);
+
+    // Reshape bb if it's [batch, 1, 4] to [batch, 4]
+    if (current_bb.dim() == 3 && current_bb.size(1) == 1) {
+        current_bb = current_bb.squeeze(1);
+    }
+    if (current_bb.dim() != 2 || current_bb.size(1) != 4) {
+        throw std::runtime_error("BBRegressor::get_modulation: bb must be [batch, 4] or [batch, 1, 4]");
+    }
+
+    // Pass through early conv layers (reference branch)
+    // Python: c3_r = self.conv3_1r(feat3_r)
+    auto c3_r = conv3_1r->forward(feat3_r);
+
+    // Prepare ROIs: convert bb from [x,y,w,h] to [batch_idx, x1,y1,x2,y2]
+    int batch_size = current_bb.size(0);
+    auto batch_idx = torch::arange(0, batch_size, current_bb.options().dtype(torch::kFloat)).unsqueeze(1);
+    
+    auto rois = torch::zeros({batch_size, 5}, current_bb.options());
+    rois.index_put_({torch::indexing::Slice(), 0}, batch_idx.squeeze(1)); // batch index
+    rois.index_put_({torch::indexing::Slice(), 1}, current_bb.index({torch::indexing::Slice(), 0})); // x1
+    rois.index_put_({torch::indexing::Slice(), 2}, current_bb.index({torch::indexing::Slice(), 1})); // y1
+    rois.index_put_({torch::indexing::Slice(), 3}, current_bb.index({torch::indexing::Slice(), 0}) + current_bb.index({torch::indexing::Slice(), 2})); // x2 = x1 + w
+    rois.index_put_({torch::indexing::Slice(), 4}, current_bb.index({torch::indexing::Slice(), 1}) + current_bb.index({torch::indexing::Slice(), 3})); // y2 = y1 + h
+    
+    rois = rois.to(device); // Ensure ROIs are on the correct device
+
+    std::cout << "  BBRegressor::get_modulation: Converted ROIs (first item): [";
+    if (batch_size > 0) {
+        for (int j = 0; j < rois.size(1); j++) {
+            std::cout << rois[0][j].item<float>();
+            if (j < rois.size(1) - 1) std::cout << ", ";
+        }
    }
    std::cout << "]" << std::endl;
+    std::cout << "  BBRegressor::get_modulation: c3_r shape: " << c3_r.sizes() << ", device: " << c3_r.device() << std::endl;
+
+
+    // Python: roi3r = self.prroi_pool3r(c3_r, roi1)
+    auto roi3r = prroi_pool3r->forward(c3_r, rois);
+    std::cout << "  BBRegressor::get_modulation: roi3r shape: " << roi3r.sizes() << std::endl;
+
+    // Python: c4_r = self.conv4_1r(feat4_r)
+    auto c4_r = conv4_1r->forward(feat4_r);
+    std::cout << "  BBRegressor::get_modulation: c4_r shape: " << c4_r.sizes() << ", device: " << c4_r.device() << std::endl;
+
+    // Python: roi4r = self.prroi_pool4r(c4_r, roi1)
+    auto roi4r = prroi_pool4r->forward(c4_r, rois);
+    std::cout << "  BBRegressor::get_modulation: roi4r shape: " << roi4r.sizes() << std::endl;
+
+
+    // Python: fc3_r = self.fc3_1r(roi3r)
+    // fc3_1r is a conv block: conv(128, 256, kernel_size=3, stride=1, padding=0)
+    // Input roi3r is (batch, 128, 3, 3) -> Output fc3_r is (batch, 256, 1, 1)
+    auto fc3_r = fc3_1r->forward(roi3r);
+    std::cout << "  BBRegressor::get_modulation: fc3_r shape: " << fc3_r.sizes() << std::endl;
+
+    // Python: fc34_r = torch.cat((fc3_r, roi4r), dim=1)
+    // fc3_r is (batch, 256, 1, 1), roi4r is (batch, 256, 1, 1)
+    // Result fc34_r is (batch, 512, 1, 1)
+    auto fc34_r = torch::cat({fc3_r, roi4r}, 1);
+    std::cout << "  BBRegressor::get_modulation: fc34_r shape: " << fc34_r.sizes() << std::endl;
+
+    // Python: fc34_3_r = self.fc34_3r(fc34_r)
+    // fc34_3r is conv(512, 256, kernel_size=1, stride=1, padding=0)
+    // Output fc34_3_r is (batch, 256, 1, 1)
+    auto mod_vec1 = fc34_3r->forward(fc34_r);
+    std::cout << "  BBRegressor::get_modulation: mod_vec1 (fc34_3_r) shape: " << mod_vec1.sizes() << std::endl;
+
+    // Python: fc34_4_r = self.fc34_4r(fc34_r)
+    // fc34_4r is conv(512, 256, kernel_size=1, stride=1, padding=0)
+    // Output fc34_4_r is (batch, 256, 1, 1)
+    auto mod_vec2 = fc34_4r->forward(fc34_r);
+    std::cout << "  BBRegressor::get_modulation: mod_vec2 (fc34_4_r) shape: " << mod_vec2.sizes() << std::endl;
    
-    // Apply target branch to get modulation vectors
-    auto feat1 = conv3_1t->forward(feat[0]);
-    auto feat2 = conv3_2t->forward(feat1);
-    
-    // Apply target branch to get modulation vectors for second feature map
-    auto feat3 = conv4_1t->forward(feat[1]);
-    auto feat4 = conv4_2t->forward(feat3);
-    
-    // ROI pool the features - use the same ROI for both feature maps
-    std::cout << "  Applying ROI pooling to layer 3..." << std::endl;
-    auto pooled_feat1 = prroi_pool3t->forward(feat2, roi);
-    std::cout << "  Applying ROI pooling to layer 4..." << std::endl;
-    auto pooled_feat2 = prroi_pool4t->forward(feat4, roi);
-    
-    // Flatten and concatenate the pooled features
-    auto vec1 = pooled_feat1.reshape({pooled_feat1.size(0), -1});
-    auto vec2 = pooled_feat2.reshape({pooled_feat2.size(0), -1});
-    
-    // Apply fully connected layer to get modulation vectors
-    auto modulation1 = fc3_rt.forward(vec1);
-    auto modulation2 = fc4_rt.forward(vec2);
-    
-    // Return modulation vectors
-    return {modulation1, modulation2};
+    return {mod_vec1, mod_vec2};
 }

 // Predict IoU for proposals
 torch::Tensor BBRegressor::predict_iou(std::vector<torch::Tensor> modulation, 
                                     std::vector<torch::Tensor> feat, 
                                     torch::Tensor proposals) {
-    // Debug dimensions
-    std::cout << "Input dimensions:" << std::endl;
-    std::cout << "  modulation[0]: [" << modulation[0].size(0) << ", " << modulation[0].size(1) << "]" << std::endl;
-    std::cout << "  modulation[1]: [" << modulation[1].size(0) << ", " << modulation[1].size(1) << "]" << std::endl;
-    std::cout << "  feat[0]: [" << feat[0].size(0) << ", " << feat[0].size(1) << ", " 
-              << feat[0].size(2) << ", " << feat[0].size(3) << "]" << std::endl;
-    std::cout << "  feat[1]: [" << feat[1].size(0) << ", " << feat[1].size(1) << ", " 
-              << feat[1].size(2) << ", " << feat[1].size(3) << "]" << std::endl;
-    std::cout << "  proposals: [" << proposals.size(0) << ", " << proposals.size(1) << ", " << proposals.size(2) << "]" << std::endl;
-    
-    // Convert proposals from [batch, num_proposals, 4] to [num_proposals, 5] format
-    // with batch index as the first element
-    auto batch_size = proposals.size(0);
-    auto num_proposals = proposals.size(1);
-    
-    // Reshape proposals to [num_proposals, 4]
-    auto proposals_view = proposals.reshape({-1, 4});
-    
-    // Create batch indices tensor [0, 0, 0, ...] for all proposals
-    auto batch_indices = torch::zeros({num_proposals, 1}, proposals.options());
-    
-    // Convert proposals from [x, y, w, h] to [batch_idx, x1, y1, x2, y2] format
-    auto roi = torch::zeros({num_proposals, 5}, proposals.options());
-    roi.index_put_({torch::indexing::Slice(), 0}, batch_indices.squeeze());
-    roi.index_put_({torch::indexing::Slice(), 1}, proposals_view.index({torch::indexing::Slice(), 0}));
-    roi.index_put_({torch::indexing::Slice(), 2}, proposals_view.index({torch::indexing::Slice(), 1}));
-    
-    // Calculate x2, y2 from width and height
-    auto x2 = proposals_view.index({torch::indexing::Slice(), 0}) + proposals_view.index({torch::indexing::Slice(), 2});
-    auto y2 = proposals_view.index({torch::indexing::Slice(), 1}) + proposals_view.index({torch::indexing::Slice(), 3});
-    roi.index_put_({torch::indexing::Slice(), 3}, x2);
-    roi.index_put_({torch::indexing::Slice(), 4}, y2);
-    
-    // Make sure ROI is on the same device as features
-    torch::Device feat_device = feat[0].device();
+    // Ensure all inputs are on the correct device
+    auto target_device = device; // Assuming 'device' is a member of BBRegressor
+    for (auto& t : feat) { t = t.to(target_device); }
+    for (auto& m : modulation) { m = m.to(target_device); }
+    proposals = proposals.to(target_device);
+
+    // Get batch size and number of proposals
+    int batch_size = proposals.size(0);
+    int num_proposals = proposals.size(1);
+
+    // Reshape proposals to [batch_size * num_proposals, 4]
+    // and add batch index for PrRoIPooling
+    auto proposals_view = proposals.reshape({batch_size * num_proposals, 4});
+    auto roi_batch_index = torch::arange(0, batch_size, proposals.options().dtype(torch::kInt)).unsqueeze(1);
+    roi_batch_index = roi_batch_index.repeat_interleave(num_proposals, 0);
+    auto roi = torch::cat(std::vector<torch::Tensor>{roi_batch_index.to(proposals_view.options()), proposals_view}, 1);
+    
+    // Ensure ROI is on the correct device, matching features
+    auto feat_device = feat[0].device();
    roi = roi.to(feat_device);
    
    // Apply ROI pooling to get features for each proposal
-    auto pooled_feat1 = prroi_pool3r->forward(feat[0], roi);
-    auto pooled_feat2 = prroi_pool4r->forward(feat[1], roi);
-    
-    // Make sure all tensors are on the same device (GPU)
-    torch::Device target_device = modulation[0].device();
-    pooled_feat1 = pooled_feat1.to(target_device);
-    pooled_feat2 = pooled_feat2.to(target_device);
+    auto pooled_feat1 = prroi_pool3t->forward(feat[0], roi); // Output: [batch_size * num_proposals, C, 5, 5]
+    auto pooled_feat2 = prroi_pool4t->forward(feat[1], roi); // Output: [batch_size * num_proposals, C, 3, 3]
    
-    // Print intermediate tensor shapes
    std::cout << "  Pooled shapes:" << std::endl;
-    std::cout << "    pooled_feat1: [" << pooled_feat1.size(0) << ", " << pooled_feat1.size(1) << ", " 
-              << pooled_feat1.size(2) << ", " << pooled_feat1.size(3) << "]" << std::endl;
-    std::cout << "    pooled_feat2: [" << pooled_feat2.size(0) << ", " << pooled_feat2.size(1) << ", " 
-              << pooled_feat2.size(2) << ", " << pooled_feat2.size(3) << "]" << std::endl;
+    std::cout << "    pooled_feat1 (from prroi_pool3t on feat[0]): [" << pooled_feat1.sizes() << "] dev: " << pooled_feat1.device() << std::endl;
+    std::cout << "    pooled_feat2 (from prroi_pool4t on feat[1]): [" << pooled_feat2.sizes() << "] dev: " << pooled_feat2.device() << std::endl;
    
-    // Inspect the IoU predictor dimensions
    std::cout << "  IoU predictor dimensions:" << std::endl;
-    std::cout << "    weight: [" << iou_predictor->weight.size(0) << ", " << iou_predictor->weight.size(1) << "]" << std::endl;
-    std::cout << "    bias: [" << iou_predictor->bias.size(0) << "]" << std::endl;
+    std::cout << "    weight: [" << iou_predictor->weight.sizes() << "]" << std::endl;
+    std::cout << "    bias: [" << iou_predictor->bias.sizes() << "]" << std::endl;
    
    try {
-        // Flatten pooled features
-        auto vec1 = pooled_feat1.reshape({pooled_feat1.size(0), -1});
-        auto vec2 = pooled_feat2.reshape({pooled_feat2.size(0), -1});
-        
-        // Print flattened shapes
-        std::cout << "  Flattened shapes:" << std::endl;
-        std::cout << "    vec1: [" << vec1.size(0) << ", " << vec1.size(1) << "]" << std::endl;
-        std::cout << "    vec2: [" << vec2.size(0) << ", " << vec2.size(1) << "]" << std::endl;
-        
-        // We need to adapt the input to match what the IoU predictor expects
-        // The IoU predictor has a weight matrix of size 512x1, so input should have 512 features
-        
-        // Instead of concatenating the full features, we need to first reduce them to match expected size
-        // This is based on the original Python implementation
-        
-        // Get modulation shapes
-        std::cout << "  Modulation vector shapes:" << std::endl;
-        std::cout << "    mod1: [" << modulation[0].size(0) << ", " << modulation[0].size(1) << "]" << std::endl;
-        std::cout << "    mod2: [" << modulation[1].size(0) << ", " << modulation[1].size(1) << "]" << std::endl;
-        
-        // Calculate expected dimensions
-        int mod1_dim = modulation[0].size(1);  // Should be 256
-        int mod2_dim = modulation[1].size(1);  // Should be 256
-        int total_mod_dim = mod1_dim + mod2_dim;  // Should be 512, matching iou_predictor weight row count
-        
-        std::cout << "  Using correct input dimensions for IoU predictor (total_dim=" << total_mod_dim << ")" << std::endl;
-        
-        // Create processed features with correct dimensions
-        auto processed_feat1 = torch::zeros({num_proposals, mod1_dim}, vec1.options());
-        auto processed_feat2 = torch::zeros({num_proposals, mod2_dim}, vec2.options());
-        
-        // We need to reduce the dimensionality of vec1 and vec2 to match mod1_dim and mod2_dim
-        // We'll use average pooling across spatial dimensions
-        if (vec1.size(1) > mod1_dim) {
-            // Average every N values to reduce dimension
-            int pool_size = vec1.size(1) / mod1_dim;
-            std::cout << "  Reducing vec1 features with pool_size=" << pool_size << std::endl;
-            
-            for (int i = 0; i < num_proposals; i++) {
-                for (int j = 0; j < mod1_dim; j++) {
-                    float sum = 0.0f;
-                    for (int k = 0; k < pool_size; k++) {
-                        int idx = j * pool_size + k;
-                        if (idx < vec1.size(1)) {
-                            sum += vec1[i][idx].item<float>();
-                        }
-                    }
-                    processed_feat1[i][j] = sum / pool_size;
-                }
-            }
-        } else {
-            // Just copy directly if dimensions already match
-            processed_feat1 = vec1;
+        auto mod0_4d = modulation[0].to(target_device); 
+        auto mod1_4d = modulation[1].to(target_device); 
+
+        if (mod0_4d.dim() == 2) {
+            mod0_4d = mod0_4d.reshape({mod0_4d.size(0), mod0_4d.size(1), 1, 1});
        }
-        
-        if (vec2.size(1) > mod2_dim) {
-            // Similar reduction for vec2
-            int pool_size = vec2.size(1) / mod2_dim;
-            std::cout << "  Reducing vec2 features with pool_size=" << pool_size << std::endl;
-            
-            for (int i = 0; i < num_proposals; i++) {
-                for (int j = 0; j < mod2_dim; j++) {
-                    float sum = 0.0f;
-                    for (int k = 0; k < pool_size; k++) {
-                        int idx = j * pool_size + k;
-                        if (idx < vec2.size(1)) {
-                            sum += vec2[i][idx].item<float>();
-                        }
-                    }
-                    processed_feat2[i][j] = sum / pool_size;
-                }
-            }
-        } else {
-            // Just copy directly if dimensions already match
-            processed_feat2 = vec2;
+        if (mod1_4d.dim() == 2) {
+            mod1_4d = mod1_4d.reshape({mod1_4d.size(0), mod1_4d.size(1), 1, 1});
        }
        
-        // Prepare modulation vectors for each proposal
-        auto mod1 = modulation[0].repeat({num_proposals, 1});
-        auto mod2 = modulation[1].repeat({num_proposals, 1});
-        
-        std::cout << "  Final feature shapes:" << std::endl;
-        std::cout << "    processed_feat1: [" << processed_feat1.size(0) << ", " << processed_feat1.size(1) << "]" << std::endl;
-        std::cout << "    processed_feat2: [" << processed_feat2.size(0) << ", " << processed_feat2.size(1) << "]" << std::endl;
-        std::cout << "    mod1: [" << mod1.size(0) << ", " << mod1.size(1) << "]" << std::endl;
-        std::cout << "    mod2: [" << mod2.size(0) << ", " << mod2.size(1) << "]" << std::endl;
-        
-        // Element-wise multiply features with modulation vectors
-        auto mod_feat1 = processed_feat1 * mod1;
-        auto mod_feat2 = processed_feat2 * mod2;
-        
-        // Concatenate to get final features for IoU prediction
-        auto ioufeat = torch::cat({mod_feat1, mod_feat2}, /*dim=*/1);
-        std::cout << "  ioufeat shape: [" << ioufeat.size(0) << ", " << ioufeat.size(1) << "]" << std::endl;
-        
-        // Try GPU implementation first
+        if (mod0_4d.size(0) == 1 && pooled_feat1.size(0) > 1) {
+            mod0_4d = mod0_4d.repeat({pooled_feat1.size(0), 1, 1, 1});
+        }
+        if (mod1_4d.size(0) == 1 && pooled_feat2.size(0) > 1) {
+            mod1_4d = mod1_4d.repeat({pooled_feat2.size(0), 1, 1, 1});
+        }
+
+        std::cout << "  Modulation vector shapes (reshaped 4D):" << std::endl;
+        std::cout << "    mod0_4d: [" << mod0_4d.sizes() << "] dev: " << mod0_4d.device() << std::endl;
+        std::cout << "    mod1_4d: [" << mod1_4d.sizes() << "] dev: " << mod1_4d.device() << std::endl;
+        
+        auto feat_prod_0 = pooled_feat1 * mod0_4d; 
+        auto feat_prod_1 = pooled_feat2 * mod1_4d; 
+        std::cout << "  After element-wise product with modulation:\n    feat_prod_0 (pooled_feat1 * mod0_4d): [" << feat_prod_0.sizes() << "] dev: " << feat_prod_0.device() << "\n    feat_prod_1 (pooled_feat2 * mod1_4d): [" << feat_prod_1.sizes() << "] dev: " << feat_prod_1.device() << std::endl;
+        
+        std::cout << "  Applying fc3_rt to feat_prod_0..." << std::endl;
+        auto x0 = fc3_rt.forward(feat_prod_0); // Corrected: . instead of ->
+        std::cout << "  Applying fc4_rt to feat_prod_1..." << std::endl;
+        auto x1 = fc4_rt.forward(feat_prod_1); // Corrected: . instead of ->
+        std::cout << "  After fc_rt blocks:\n    x0 (fc3_rt output): [" << x0.sizes() << "] dev: " << x0.device() << "\n    x1 (fc4_rt output): [" << x1.sizes() << "] dev: " << x1.device() << std::endl;
+
+        auto ioufeat_final = torch::cat(std::vector<torch::Tensor>{x0, x1}, 1).contiguous(); // Corrected: std::vector wrapper
+        std::cout << "  Concatenated ioufeat_final: [" << ioufeat_final.sizes() << "] dev: " << ioufeat_final.device() << std::endl;
+
        torch::Tensor iou_scores;
        try {
-            // Apply IoU predictor using GPU
-            std::cout << "  Applying IoU predictor on GPU" << std::endl;
-            iou_scores = iou_predictor->forward(ioufeat);
+            std::cout << "  Applying final iou_predictor on GPU" << std::endl;
+            iou_predictor->to(target_device); 
+            iou_scores = iou_predictor->forward(ioufeat_final.to(target_device));
+            std::cout << "  Final iou_predictor on GPU successful. Output scores shape: [" << iou_scores.sizes() << "]" << std::endl;
+
        } catch (const std::exception& cuda_error) {
-            // If GPU implementation fails, use CPU implementation
-            std::cout << "  GPU implementation failed: " << cuda_error.what() << std::endl;
-            std::cout << "  Falling back to CPU implementation" << std::endl;
-            
-            // Move tensors to CPU
-            auto ioufeat_cpu = ioufeat.to(torch::kCPU);
-            auto weight_cpu = iou_predictor->weight.to(torch::kCPU);
-            auto bias_cpu = iou_predictor->bias.to(torch::kCPU);
+            std::cout << "  GPU iou_predictor->forward() failed: " << cuda_error.what() << std::endl;
+            std::cout << "  Falling back to CPU for final iou_predictor" << std::endl;
            
-            // Implement the linear layer manually
-            // For each proposal, compute: score = bias + ioufeat * weight
-            auto scores_cpu = torch::zeros({num_proposals, 1}, torch::kCPU);
+            auto ioufeat_final_cpu = ioufeat_final.to(torch::kCPU).contiguous();
+            auto weight_cpu = iou_predictor->weight.to(torch::kCPU).contiguous();
+            auto bias_cpu = torch::Tensor(); 
+            if (iou_predictor->bias.defined()) {
+                bias_cpu = iou_predictor->bias.to(torch::kCPU).contiguous();
+            }
+
+            std::cout << "    DEBUG CPU Fallback: ioufeat_final_cpu device: " << ioufeat_final_cpu.device() << std::endl;
+            std::cout << "    DEBUG CPU Fallback: weight_cpu device: " << weight_cpu.device() << std::endl;
+            if (bias_cpu.defined()) {
+                 std::cout << "    DEBUG CPU Fallback: bias_cpu device: " << bias_cpu.device() << std::endl;
+            } else {
+                 std::cout << "    DEBUG CPU Fallback: bias_cpu is undefined." << std::endl;
+            }
            
-            for (int i = 0; i < num_proposals; i++) {
-                // Start with bias
-                float score = bias_cpu[0].item<float>();
-                
-                // Add weighted sum of features
-                for (int j = 0; j < ioufeat_cpu.size(1); j++) {
-                    score += ioufeat_cpu[i][j].item<float>() * weight_cpu[0][j].item<float>();
-                }
-                
-                scores_cpu[i][0] = score;
+            try {
+                iou_scores = torch::nn::functional::linear(ioufeat_final_cpu, weight_cpu, bias_cpu);
+                std::cout << "  CPU fallback torch::nn::functional::linear() successful. Output device: " << iou_scores.device() << std::endl;
+            } catch (const std::exception& cpu_fwd_error) {
+                std::cerr << "ERROR during CPU torch::nn::functional::linear(): " << cpu_fwd_error.what() << std::endl;
+                iou_predictor->to(target_device); 
+                throw; 
            }
            
-            // Move results back to original device
-            iou_scores = scores_cpu.to(target_device);
+            iou_predictor->to(target_device); 
+            iou_scores = iou_scores.to(target_device);
        }
        
        std::cout << "  iou_scores raw shape: [" << iou_scores.size(0) << ", " << iou_scores.size(1) << "]" << std::endl;
        
-        // Reshape back to [batch_size, num_proposals]
        iou_scores = iou_scores.reshape({batch_size, num_proposals});
        std::cout << "  Final iou_scores shape: [" << iou_scores.size(0) << ", " << iou_scores.size(1) << "]" << std::endl;
        
        return iou_scores;
        
    } catch (const std::exception& e) {
-        // This should never happen with our robust implementation
        std::cerr << "CRITICAL: Unexpected error in predict_iou: " << e.what() << std::endl;
-        
-        // We'll implement direct box overlaps as a true fallback that doesn't use "magic numbers"
-        std::cout << "  Implementing direct IoU calculation using box overlaps" << std::endl;
-        
-        // Move tensors to CPU for direct calculation
-        auto proposals_cpu = proposals.to(torch::kCPU);
-        auto bb_cpu = modulation[0].to(torch::kCPU); // Using modulation[0] to get the original target box
-        
-        // Create output tensor on CPU
-        auto iou_scores = torch::zeros({batch_size, num_proposals}, torch::kCPU);
-        
-        // Calculate IoU geometrically for each proposal
-        // This is a direct, mathematical implementation that doesn't rely on neural networks
-        for (int i = 0; i < num_proposals; i++) {
-            float target_x1 = proposals_view[i][0].item<float>();
-            float target_y1 = proposals_view[i][1].item<float>();
-            float target_x2 = target_x1 + proposals_view[i][2].item<float>();
-            float target_y2 = target_y1 + proposals_view[i][3].item<float>();
-            
-            float box_x1 = bb_cpu[0][0].item<float>();
-            float box_y1 = bb_cpu[0][1].item<float>();
-            float box_x2 = box_x1 + bb_cpu[0][2].item<float>();
-            float box_y2 = box_y1 + bb_cpu[0][3].item<float>();
-            
-            // Calculate intersection area
-            float x_left = std::max(target_x1, box_x1);
-            float y_top = std::max(target_y1, box_y1);
-            float x_right = std::min(target_x2, box_x2);
-            float y_bottom = std::min(target_y2, box_y2);
-            
-            float intersection_area = std::max(0.0f, x_right - x_left) * std::max(0.0f, y_bottom - y_top);
-            
-            // Calculate union area
-            float target_area = (target_x2 - target_x1) * (target_y2 - target_y1);
-            float box_area = (box_x2 - box_x1) * (box_y2 - box_y1);
-            float union_area = target_area + box_area - intersection_area;
-            
-            // IoU = intersection / union
-            float iou = union_area > 0 ? intersection_area / union_area : 0;
-            iou_scores[0][i] = iou;
-        }
-        
-        // Move back to original device
-        return iou_scores.to(target_device);
+        std::cout << "  Propagating critical error. No fallback available for this stage." << std::endl;
+        throw; 
    }
 }

--- a/cimp/bb_regressor/bb_regressor.h
+++ b/cimp/bb_regressor/bb_regressor.h
@ -44,19 +44,11 @@ public:
    bool use_relu;
 };

-// PrRoIPool2D implementation
+// PrRoIPool2D implementation (requires CUDA)
 class PrRoIPool2D {
 public:
    PrRoIPool2D(int pooled_height, int pooled_width, float spatial_scale);
    torch::Tensor forward(torch::Tensor feat, torch::Tensor rois);
-
-    // CPU-based fallback implementation
-    torch::Tensor forward_cpu(torch::Tensor feat, torch::Tensor rois) {
-        // Simple implementation that returns zeros (for fallback only)
-        int channels = feat.size(1);
-        int num_rois = rois.size(0);
-        return torch::zeros({num_rois, channels, pooled_height_, pooled_width_}, feat.options());
-    }
    
 private:
    int pooled_height_;
--- a/cimp/bb_regressor/prroi_pooling/prroi_pooling_gpu.c
+++ b/cimp/bb_regressor/prroi_pooling/prroi_pooling_gpu.c
@ -1,135 +0,0 @@
-/*
- * File   : prroi_pooling_gpu.c
- * Simplified version for C++ compatibility
- */
-
-#include "prroi_pooling_gpu.h"
-#include "prroi_pooling_gpu_impl.cuh"
-#include <cuda_runtime.h>
-
-// C wrapper function for the CUDA forward implementation
-int prroi_pooling_forward_cuda(
-    const float *features_data,
-    const float *rois_data,
-    float *output_data,
-    int channels,
-    int height, 
-    int width,
-    int num_rois,
-    int pooled_height,
-    int pooled_width,
-    float spatial_scale
-) {
-    int top_count = num_rois * channels * pooled_height * pooled_width;
-
-    // Get current CUDA stream
-    cudaStream_t stream = 0;  // Use default stream if no stream is available
-    cudaError_t err = cudaStreamSynchronize(stream);
-    if (err != cudaSuccess) {
-        fprintf(stderr, "CUDA stream error: %s\n", cudaGetErrorString(err));
-        return -1;
-    }
-
-    // Call the implementation
-    PrRoIPoolingForwardGpu(
-        stream,
-        features_data,
-        rois_data,
-        output_data,
-        channels,
-        height,
-        width,
-        pooled_height,
-        pooled_width,
-        spatial_scale,
-        top_count
-    );
-
-    return 0;
-}
-
-// Simplified wrapper for backward pass
-int prroi_pooling_backward_cuda(
-    const float *features_data,
-    const float *rois_data,
-    const float *output_data,
-    const float *output_diff_data,
-    float *features_diff_data,
-    int channels,
-    int height,
-    int width,
-    int num_rois,
-    int pooled_height,
-    int pooled_width,
-    float spatial_scale
-) {
-    // Calculate counts for features and output
-    int top_count = num_rois * channels * pooled_height * pooled_width;
-    int bottom_count = 1 * channels * height * width;  // Assume batch_size = 1
-    
-    // Get current CUDA stream
-    cudaStream_t stream = 0;  // Use default stream
-    
-    // Call the implementation
-    PrRoIPoolingBackwardGpu(
-        stream,
-        features_data,
-        rois_data,
-        output_data,
-        output_diff_data,
-        features_diff_data,
-        channels,
-        height,
-        width,
-        pooled_height,
-        pooled_width,
-        spatial_scale,
-        top_count,
-        bottom_count
-    );
-
-    return 0;
-}
-
-// Simplified wrapper for coordinate backward pass
-int prroi_pooling_coor_backward_cuda(
-    const float *features_data,
-    const float *rois_data,
-    const float *output_data,
-    const float *output_diff_data,
-    float *rois_diff_data,
-    int channels,
-    int height,
-    int width,
-    int num_rois,
-    int pooled_height,
-    int pooled_width,
-    float spatial_scale
-) {
-    // Calculate counts
-    int top_count = num_rois * channels * pooled_height * pooled_width;
-    int bottom_count = num_rois * 5;  // ROIs are 5D (batch_idx, x1, y1, x2, y2)
-    
-    // Get current CUDA stream
-    cudaStream_t stream = 0;  // Use default stream
-    
-    // Call the implementation
-    PrRoIPoolingCoorBackwardGpu(
-        stream,
-        features_data,
-        rois_data,
-        output_data,
-        output_diff_data,
-        rois_diff_data,
-        channels,
-        height,
-        width,
-        pooled_height,
-        pooled_width,
-        spatial_scale,
-        top_count,
-        bottom_count
-    );
-
-    return 0;
-}
--- a/cimp/bb_regressor/prroi_pooling/prroi_pooling_gpu_impl.cu
+++ b/cimp/bb_regressor/prroi_pooling/prroi_pooling_gpu_impl.cu
@ -3,19 +3,28 @@
 * Simplified version for C++ compatibility
 */

-#include "prroi_pooling_gpu_impl.cuh"
-
+#include <cuda_runtime.h>
+#include <device_launch_parameters.h>
 #include <cstdio>
 #include <cfloat>
 #include <cmath>

-#define CUDA_NUM_THREADS 512
+// Forward declaration of CUDA kernel functions
+__global__ void PRROIPoolingForwardKernel(
+    const int nthreads,
+    const float *bottom_data,
+    const float *bottom_rois,
+    float *top_data,
+    const int channels,
+    const int height,
+    const int width,
+    const int pooled_height,
+    const int pooled_width,
+    const float spatial_scale
+);

 // Helper macros for CUDA kernel execution
-#define CUDA_KERNEL_LOOP(i, n) \
-    for (int i = blockIdx.x * blockDim.x + threadIdx.x; \
-        i < (n); \
-        i += blockDim.x * gridDim.x)
+#define CUDA_NUM_THREADS 512

 // Helper function to check CUDA errors
 inline void checkCudaErrors(cudaError_t err) {
@ -55,7 +64,9 @@ __global__ void PRROIPoolingForwardKernel(
    const int pooled_width,
    const float spatial_scale
 ) {
-    CUDA_KERNEL_LOOP(index, nthreads) {
+    for (int index = blockIdx.x * blockDim.x + threadIdx.x; 
+         index < nthreads; 
+         index += blockDim.x * gridDim.x) {
        // (n, c, ph, pw) is the index in output
        int pw = index % pooled_width;
        int ph = (index / pooled_width) % pooled_height;
@ -137,7 +148,7 @@ __global__ void PRROIPoolingForwardKernel(
 }

 // C API wrapper for Forward pass
-void PrRoIPoolingForwardGpu(
+extern "C" void PrRoIPoolingForwardGpu(
    cudaStream_t stream,
    const float *bottom_data,
    const float *bottom_rois,
@ -170,7 +181,7 @@ void PrRoIPoolingForwardGpu(
 }

 // Simplified dummy implementations of backward passes
-void PrRoIPoolingBackwardGpu(
+extern "C" void PrRoIPoolingBackwardGpu(
    cudaStream_t stream,
    const float *bottom_data,
    const float *bottom_rois,
@ -190,7 +201,7 @@ void PrRoIPoolingBackwardGpu(
    cudaMemsetAsync(bottom_diff, 0, bottom_count * sizeof(float), stream);
 }

-void PrRoIPoolingCoorBackwardGpu(
+extern "C" void PrRoIPoolingCoorBackwardGpu(
    cudaStream_t stream,
    const float *bottom_data,
    const float *bottom_rois,
--- a/cimp/demo.cpp
+++ b/cimp/demo.cpp
@ -135,20 +135,20 @@ int main(int argc, char* argv[]) {
    try {
        std::cout << "=== Object Tracking Demo with BBRegressor and Classifier ===" << std::endl;
        
-        // Determine which device to use
-        torch::Device device(torch::kCPU);
-
-        // Add more detailed CUDA debugging
+        // Check CUDA availability - required for this application
        std::cout << "Checking CUDA availability..." << std::endl;
        std::cout << "torch::cuda::is_available(): " << (torch::cuda::is_available() ? "true" : "false") << std::endl;

-        if (torch::cuda::is_available()) {
-            device = torch::Device(torch::kCUDA, 0);
-            std::cout << "Using CUDA device: " << device << std::endl;
-            std::cout << "CUDA Device Count: " << torch::cuda::device_count() << std::endl;
-        } else {
-            std::cout << "CUDA is not available, using CPU" << std::endl;
+        if (!torch::cuda::is_available()) {
+            std::cerr << "ERROR: CUDA is not available. This application requires CUDA." << std::endl;
+            std::cerr << "Please ensure you have a CUDA-capable GPU and the proper drivers installed." << std::endl;
+            return 1;
        }
+
+        // Always use CUDA device
+        torch::Device device(torch::kCUDA, 0);
+        std::cout << "Using CUDA device: " << device << std::endl;
+        std::cout << "CUDA Device Count: " << torch::cuda::device_count() << std::endl;
        std::cout << std::endl;
        
        // Find the base directory containing exported weights
--- a/classifier_stats.txt
+++ b/classifier_stats.txt
@ -1,9 +1,9 @@
 Output 0:
  Shape: [1, 512, 9, 9]
-  Mean: -0.000479055
-  Std: 0.0110019
-  Min: -0.0458832
-  Max: 0.0493125
-  Sum: -19.8674
-  Sample values: [-0.0125058, 0.00186453, 0.0141787]
+  Mean: 0.000174798
+  Std: 0.0110712
+  Min: -0.0531409
+  Max: 0.0471968
+  Sum: 7.24922
+  Sample values: [-0.00536738, 0.00499956, 0.00910664]

--- a/cmake-build-debug/CMakeCache.txt
+++ b/cmake-build-debug/CMakeCache.txt
@ -100,6 +100,15 @@ Z_VCPKG_CL:FILEPATH=Z_VCPKG_CL-NOTFOUND
 // triplet
 _VCPKG_INSTALLED_DIR:PATH=/media/mht/ADATA/repos/cpp_tracker/cmake-build-debug/vcpkg_installed

+//Value Computed by CMake
+cimp_BINARY_DIR:STATIC=/media/mht/ADATA/repos/cpp_tracker/cmake-build-debug
+
+//Value Computed by CMake
+cimp_IS_TOP_LEVEL:STATIC=ON
+
+//Value Computed by CMake
+cimp_SOURCE_DIR:STATIC=/media/mht/ADATA/repos/cpp_tracker
+
 //Value Computed by CMake
 cpp_tracker_BINARY_DIR:STATIC=/media/mht/ADATA/repos/cpp_tracker/cmake-build-debug

--- a/run_demo.sh
+++ b/run_demo.sh
@ -8,7 +8,9 @@ elif [ -d "/usr/lib/cuda" ]; then
    export CUDA_HOME=/usr/lib/cuda
    echo "CUDA environment set to $CUDA_HOME"
 else
-    echo "CUDA not found. Running in CPU-only mode."
+    echo "CUDA not found. The project requires CUDA to run."
+    echo "Please install CUDA and try again."
+    exit 1
 fi

 # Set LibTorch library path
--- a/test/compare_models.py
+++ b/test/compare_models.py
@ -15,33 +15,59 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))

 # Import model wrappers
 from pytracking.features.net_wrappers import DiMPTorchScriptWrapper
+# For loading AtomIoUNet from source
+from ltr.models.bbreg.atom_iou_net import AtomIoUNet

 class ModelComparison:
    def __init__(self, model_dir='exported_weights', num_samples=1000):
-        self.model_dir = model_dir
+        self.model_dir_name = model_dir # Store the string name for paths
        self.num_samples = num_samples
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        
+        # Base path for models, inputs, and outputs
+        self.project_root = Path(__file__).resolve().parent.parent
+        self.model_base_dir = self.project_root / self.model_dir_name
+        self.input_dir_base = self.project_root / 'test' / 'input_samples'
+        self.cpp_output_dir_base = self.project_root / 'test' / 'output'
+        
        # Initialize comparison results
-        self.comparison_dir = Path('test') / 'comparison'
+        self.comparison_dir = self.project_root / 'test' / 'comparison'
        self.comparison_dir.mkdir(parents=True, exist_ok=True)
-        self.plots_dir = self.comparison_dir / 'plots' # plots_dir initialized here
+        self.plots_dir = self.comparison_dir / 'plots'
+        self.plots_dir.mkdir(parents=True, exist_ok=True)
+        
+        self.all_errors_stats = {}
        
-        # Initialize models
+        # Initialize models (TorchScript and from-source)
        self._init_models()
        
    def _init_models(self):
        """Initialize Python models"""
        print("Loading Python models...")
        
-        # Load DiMP components
+        # Load TorchScript models using _sd arguments for directories of tensors
        self.models = DiMPTorchScriptWrapper(
-            model_dir=self.model_dir,
+            model_dir=str(self.model_base_dir), # Expects string path
            device=self.device,
-            backbone_sd='backbone',  # Directory with backbone weights
-            classifier_sd='classifier',  # Directory with classifier weights
-            bbregressor_sd='bb_regressor'  # Directory with bbox regressor weights
+            backbone_sd='backbone',       # Directory name for backbone weights
+            classifier_sd='classifier',   # Directory name for classifier weights
+            bbregressor_sd='bb_regressor'  # Directory name for bb_regressor weights
+        )
+        
+        # Initialize BBRegressor from source for get_modulation fallback
+        self.bb_regressor_from_source = AtomIoUNet(
+            input_dim=(512, 1024),
+            pred_input_dim=(256, 256),
+            pred_inter_dim=(256, 256)
+        )
+        ModelComparison.load_weights_for_custom_model(
+            self.bb_regressor_from_source, 
+            'bb_regressor', # model_name for path and doc file
+            self.model_base_dir, 
+            self.device
        )
+        self.bb_regressor_from_source.eval().to(self.device)
+        print("Python models loaded.")
    
    def compare_classifier(self):
        """Compare classifier model outputs between Python and C++"""
@ -143,115 +169,159 @@ class ModelComparison:
    def compare_bb_regressor(self):
        """Compare bb_regressor model outputs between Python and C++"""
        print("\nComparing bb_regressor outputs...")
-        input_dir = Path('test') / 'input_samples' / 'bb_regressor'
-        cpp_output_dir = Path('test') / 'output' / 'bb_regressor'
+        input_dir_path = self.input_dir_base / 'bb_regressor'
+        cpp_output_dir_path = self.cpp_output_dir_base / 'bb_regressor'

-        if not input_dir.exists() or not cpp_output_dir.exists():
-            print(f"BB Regressor input or C++ output directory not found ({input_dir}, {cpp_output_dir}). Skipping.")
+        if not input_dir_path.exists() or not cpp_output_dir_path.exists():
+            print(f"BB Regressor input or C++ output directory not found ({input_dir_path}, {cpp_output_dir_path}). Skipping BB Regressor comparison.")
+            # Populate NaN for all expected BB Regressor comparisons if dirs are missing
+            for i in range(self.num_samples):
+                sample_key_base = f"BBReg_Sample_{i}"
+                current_errors = {}
+                self._compare_tensor_data(None, None, "BBReg PyIoUFeat0 vs CppIoUFeat0", i, current_errors)
+                self._compare_tensor_data(None, None, "BBReg PyIoUFeat1 vs CppIoUFeat1", i, current_errors)
+                self._compare_tensor_data(None, None, "BBReg PyMod0 vs CppMod0", i, current_errors)
+                self._compare_tensor_data(None, None, "BBReg PyMod1 vs CppMod1", i, current_errors)
+                self._compare_tensor_data(None, None, "BBReg IoUPred", i, current_errors)
+                self.all_errors_stats[sample_key_base] = current_errors
            return
-        
+
        for i in tqdm(range(self.num_samples), desc="BB Regressor samples"):
-            sample_dir = input_dir / f'sample_{i}'
-            cpp_output_sample_dir = cpp_output_dir / f'sample_{i}'
+            sample_dir = input_dir_path / f'sample_{i}'
+            cpp_out_sample_dir = cpp_output_dir_path / f'sample_{i}'
            
-            # Load input tensors for BB Regressor for this sample
-            feat_layer2_path = sample_dir / 'feat_layer2.pt'
-            feat_layer3_path = sample_dir / 'feat_layer3.pt'
-            init_bbox_path = sample_dir / 'init_bbox.pt'
-            proposals_path = sample_dir / 'proposals.pt'
-
-            feat_layer2 = self.load_cpp_tensor(feat_layer2_path, self.device)
-            feat_layer3 = self.load_cpp_tensor(feat_layer3_path, self.device)
-            init_bbox = self.load_cpp_tensor(init_bbox_path, self.device)
-            proposals = self.load_cpp_tensor(proposals_path, self.device)
-
-            if any(t is None for t in [feat_layer2, feat_layer3, init_bbox, proposals]):
-                print(f"Critical: Failed to load one or more BB Regressor input tensors for sample {i}. Skipping.")
-                continue
+            py_feat_layer2, py_feat_layer3, py_init_bbox, py_proposals = None, None, None, None
            
-            backbone_feat_tuple = (feat_layer2, feat_layer3) # Define the tuple for clarity
-
-            # Get IoU features from Python model
-            # self.models.get_backbone_bbreg_feat calls self.bb_regressor.get_iou_feat
-            with torch.no_grad():
-                py_iou_feat = self.models.get_backbone_bbreg_feat({"layer2": feat_layer2, "layer3": feat_layer3})
-            
-            # Get modulation vectors
-            squeezed_init_bbox = init_bbox
-            if init_bbox is not None and init_bbox.dim() == 3 and init_bbox.shape[1] == 1:
-                squeezed_init_bbox = init_bbox.squeeze(1)
-
-            with torch.no_grad():
-                # Pass original backbone features to get_modulation
-                py_modulation = self.models.bb_regressor.get_modulation(backbone_feat_tuple, squeezed_init_bbox)
+            if not sample_dir.exists() or not cpp_out_sample_dir.exists():
+                print(f"Warning: Skipping BB Regressor sample {i}, files not found at {sample_dir} or {cpp_out_sample_dir}.")
+            else:
+                py_feat_layer2 = self.load_cpp_tensor(sample_dir / 'feat_layer2.pt', self.device)
+                py_feat_layer3 = self.load_cpp_tensor(sample_dir / 'feat_layer3.pt', self.device)
+                py_init_bbox = self.load_cpp_tensor(sample_dir / 'init_bbox.pt', self.device)
+                py_proposals = self.load_cpp_tensor(sample_dir / 'proposals.pt', self.device)
+
+            # Get Python IoU features (these come directly from backbone, so should be reliable)
+            py_iou_feat = None
+            if py_feat_layer2 is not None and py_feat_layer3 is not None:
+                # Use from-source get_iou_feat for consistent 256-channel features
+                try:
+                    py_iou_feat = self.bb_regressor_from_source.get_iou_feat([py_feat_layer2, py_feat_layer3])
+                    if isinstance(py_iou_feat, tuple): py_iou_feat = list(py_iou_feat)
+                    print(f"Sample {i}: Successfully used from-source get_iou_feat.")
+                    # py_iou_feat will be a list of two tensors, e.g., [B, 256, H1, W1], [B, 256, H2, W2]
+                except Exception as e_iou_source:
+                    print(f"Sample {i}: From-source get_iou_feat failed: {e_iou_source}")
+                    py_iou_feat = None # Ensure it's None if failed
            
-            # DEBUG: Print shapes
-            print(f"Sample {i}: py_iou_feat[0] shape: {py_iou_feat[0].shape}, py_modulation[0] shape: {py_modulation[0].shape}")
-            print(f"Sample {i}: py_iou_feat[1] shape: {py_iou_feat[1].shape}, py_modulation[1] shape: {py_modulation[1].shape}")
+            # Get Python modulation vectors
+            py_modulation = None
+            if py_feat_layer2 is not None and py_feat_layer3 is not None and py_init_bbox is not None:
+                py_features_list = [py_feat_layer2, py_feat_layer3] # Pass as a list
+                
+                # Squeeze py_init_bbox if it's [B, 1, 4] to [B, 4]
+                squeezed_init_bbox = py_init_bbox
+                if py_init_bbox.ndim == 3 and py_init_bbox.shape[0] > 0 and py_init_bbox.shape[1] == 1:
+                    squeezed_init_bbox = py_init_bbox.squeeze(1)
+                elif py_init_bbox.ndim == 2:
+                    pass # Already [B,4] or similar, assume correct
+                else:
+                    print(f"Sample {i}: Warning - py_init_bbox has unexpected shape {py_init_bbox.shape}. Expecting 2D or 3D [B,1,4].")

-            # Predict IoU (Python model)
+                try:
+                    # Try TorchScript model first
+                    py_modulation = self.models.bb_regressor.get_modulation(py_features_list, squeezed_init_bbox)
+                    if isinstance(py_modulation, tuple): py_modulation = list(py_modulation)
+                    print(f"Sample {i}: Successfully used TorchScript get_modulation.")
+                except Exception as e_ts:
+                    print(f"Sample {i}: TorchScript get_modulation failed: {e_ts}. Trying from-source model.")
+                    try:
+                        py_modulation = self.bb_regressor_from_source.get_modulation(py_features_list, squeezed_init_bbox)
+                        if isinstance(py_modulation, tuple): py_modulation = list(py_modulation)
+                        print(f"Sample {i}: Successfully used from-source get_modulation.")
+                    except Exception as e_source:
+                        print(f"Sample {i}: From-source get_modulation also failed: {e_source}")
+                        py_modulation = None
+                
+                if py_modulation:
+                    print(f"Sample {i}: py_iou_feat[0] shape: {py_iou_feat[0].shape if py_iou_feat and len(py_iou_feat) > 0 else 'N/A'}, py_modulation[0] shape: {py_modulation[0].shape if py_modulation and len(py_modulation) > 0 else 'N/A'}")
+                    print(f"Sample {i}: py_iou_feat[1] shape: {py_iou_feat[1].shape if py_iou_feat and len(py_iou_feat) > 1 else 'N/A'}, py_modulation[1] shape: {py_modulation[1].shape if py_modulation and len(py_modulation) > 1 else 'N/A'}")
+
+            # Run Python bb_regressor's predict_iou (from TorchScript model)
            py_iou_pred = None
-            try:
-                with torch.no_grad():
-                    py_iou_pred = self.models.bb_regressor.predict_iou(py_modulation, py_iou_feat, proposals)
-            except RuntimeError as e:
-                print(f"WARNING: Python model self.models.bb_regressor.predict_iou failed for sample {i}: {e}")
+            if py_modulation is not None and py_iou_feat is not None and py_proposals is not None:
+                # Ensure modulation vectors are 2D [B, C] for predict_iou
+                py_mod_0_squeezed = py_modulation[0].squeeze(-1).squeeze(-1) if py_modulation[0].ndim == 4 else py_modulation[0]
+                py_mod_1_squeezed = py_modulation[1].squeeze(-1).squeeze(-1) if py_modulation[1].ndim == 4 else py_modulation[1]
+                py_modulation_squeezed_for_pred = [py_mod_0_squeezed, py_mod_1_squeezed]
+
+                try:
+                    # Try TorchScript predict_iou first
+                    print(f"Sample {i}: Attempting TorchScript predict_iou with mod_0: {py_modulation[0].shape}, iou_feat_0: {py_iou_feat[0].shape}")
+                    # Ensure inputs for predict_iou are 2D for modulation and 4D for iou_feat
+                    mod_0_squeezed = py_modulation[0].squeeze(-1).squeeze(-1) if py_modulation[0].dim() == 4 else py_modulation[0]
+                    mod_1_squeezed = py_modulation[1].squeeze(-1).squeeze(-1) if py_modulation[1].dim() == 4 else py_modulation[1]
+                    
+                    # --- BEGIN PYTHON DETAILED LOGGING ---
+                    print(f"    DEBUG PYTHON: py_iou_feat[0] (first 5 rows of first batch) sample values:\n{py_iou_feat[0][0, :, :5, :5] if py_iou_feat[0].numel() > 0 else 'N/A'}") # Print a slice of the feature map
+                    print(f"    DEBUG PYTHON: py_iou_feat[1] (first 5 rows of first batch) sample values:\n{py_iou_feat[1][0, :, :5, :5] if py_iou_feat[1].numel() > 0 else 'N/A'}")
+                    print(f"    DEBUG PYTHON: mod_0_squeezed sample values:\n{mod_0_squeezed[0, :5] if mod_0_squeezed.numel() > 0 else 'N/A'}")
+                    print(f"    DEBUG PYTHON: mod_1_squeezed sample values:\n{mod_1_squeezed[0, :5] if mod_1_squeezed.numel() > 0 else 'N/A'}")
+                    # Construct ioufeat equivalent for Python to log before predict_iou
+                    # This mimics the C++: mod_feat1 = mod_target_0 * mod1_repeated_for_proposals; mod_feat2 = mod_target_1 * mod2_repeated_for_proposals; ioufeat = torch.cat({mod_feat1, mod_feat2}, 1);
+                    # We need py_pooled_feat1, py_pooled_feat2 from PrRoIPool like in C++ before fc3_rt and fc4_rt
+                    # This part is tricky as Python model does it internally in predict_iou.
+                    # For now, let's log the direct inputs to predict_iou and its weights.
+
+                    # Log weights of the iou_predictor in the TorchScript model
+                    ts_iou_predictor_weight = self.models.bb_regressor.iou_predictor.weight.data
+                    ts_iou_predictor_bias = self.models.bb_regressor.iou_predictor.bias.data
+                    print(f"    DEBUG PYTHON: TorchScript iou_predictor.weight:\n{ts_iou_predictor_weight}")
+                    print(f"    DEBUG PYTHON: TorchScript iou_predictor.bias:\n{ts_iou_predictor_bias}")
+                    # --- END PYTHON DETAILED LOGGING ---
+
+                    py_iou_pred = self.models.bb_regressor.predict_iou(
+                        py_modulation_squeezed_for_pred, 
+                        py_iou_feat,  # These are now from source model (256ch)
+                        py_proposals
+                    )
+                    print(f"Sample {i}: Successfully used TorchScript predict_iou.")
+                except Exception as e_ts_pred:
+                    print(f"WARNING: Python model self.models.bb_regressor.predict_iou failed for sample {i}: {e_ts_pred}")
+                    print(f"Sample {i}: Falling back to from-source predict_iou.")
+                    try:
+                        py_iou_pred = self.bb_regressor_from_source.predict_iou(
+                            py_modulation_squeezed_for_pred, 
+                            py_iou_feat, 
+                            py_proposals
+                        )
+                        print(f"Sample {i}: Successfully used from-source predict_iou.")
+                    except Exception as e_source_pred:
+                        print(f"ERROR: Python model self.bb_regressor_from_source.predict_iou also failed for sample {i}: {e_source_pred}")
+                        py_iou_pred = None # Ensure it's None if fallback also failed
            
            # Load C++ outputs
-            cpp_iou_pred_path = cpp_output_sample_dir / 'iou_pred.pt'
-            cpp_modulation_0_path = cpp_output_sample_dir / 'modulation_0.pt'
-            cpp_modulation_1_path = cpp_output_sample_dir / 'modulation_1.pt'
-            cpp_feat_0_path = cpp_output_sample_dir / 'iou_feat_0.pt'
-            cpp_feat_1_path = cpp_output_sample_dir / 'iou_feat_1.pt'
-
-            cpp_iou_pred = self.load_cpp_tensor(cpp_iou_pred_path, self.device)
-            cpp_modulation_0 = self.load_cpp_tensor(cpp_modulation_0_path, self.device)
-            cpp_modulation_1 = self.load_cpp_tensor(cpp_modulation_1_path, self.device)
-            cpp_feat_0 = self.load_cpp_tensor(cpp_feat_0_path, self.device)
-            cpp_feat_1 = self.load_cpp_tensor(cpp_feat_1_path, self.device)
-            
-            current_errors = {} # Store errors for this sample for the HTML report
-
-            # Compare IoU features (py_iou_feat vs cpp_feat_0/1)
-            # _compare_tensor_data will handle None inputs appropriately
+            cpp_iou_pred, cpp_mod_0, cpp_mod_1, cpp_iou_feat_0, cpp_iou_feat_1 = None, None, None, None, None
+            if cpp_out_sample_dir.exists():
+                cpp_iou_pred = self.load_cpp_tensor(cpp_output_dir_path / f"sample_{i}" / 'iou_pred.pt', self.device)
+                cpp_mod_0 = self.load_cpp_tensor(cpp_output_dir_path / f"sample_{i}" / 'modulation_0.pt', self.device)
+                cpp_mod_1 = self.load_cpp_tensor(cpp_output_dir_path / f"sample_{i}" / 'modulation_1.pt', self.device)
+                cpp_iou_feat_0 = self.load_cpp_tensor(cpp_output_dir_path / f"sample_{i}" / 'iou_feat_0.pt', self.device)
+                cpp_iou_feat_1 = self.load_cpp_tensor(cpp_output_dir_path / f"sample_{i}" / 'iou_feat_1.pt', self.device)
+
+            current_errors = {}
            py_iou_f0 = py_iou_feat[0] if py_iou_feat and len(py_iou_feat) > 0 else None
            py_iou_f1 = py_iou_feat[1] if py_iou_feat and len(py_iou_feat) > 1 else None
-            self._compare_tensor_data(py_iou_f0, cpp_feat_0, "BBReg PyIoUFeat0 vs CppIoUFeat0", i, current_errors)
-            self._compare_tensor_data(py_iou_f1, cpp_feat_1, "BBReg PyIoUFeat1 vs CppIoUFeat1", i, current_errors)
-
-            # Compare modulation vectors (py_modulation vs cpp_modulation_0/1)
+            self._compare_tensor_data(py_iou_f0, cpp_iou_feat_0, "BBReg PyIoUFeat0 vs CppIoUFeat0", i, current_errors)
+            self._compare_tensor_data(py_iou_f1, cpp_iou_feat_1, "BBReg PyIoUFeat1 vs CppIoUFeat1", i, current_errors)
+            
            py_mod_0 = py_modulation[0] if py_modulation and len(py_modulation) > 0 else None
            py_mod_1 = py_modulation[1] if py_modulation and len(py_modulation) > 1 else None
-            self._compare_tensor_data(py_mod_0, cpp_modulation_0, "BBReg PyMod0 vs CppMod0", i, current_errors)
-            self._compare_tensor_data(py_mod_1, cpp_modulation_1, "BBReg PyMod1 vs CppMod1", i, current_errors)
-
-            # Compare final IoU prediction
-            # _compare_tensor_data will handle None for py_iou_pred or cpp_iou_pred
+            self._compare_tensor_data(py_mod_0, cpp_mod_0, "BBReg PyMod0 vs CppMod0", i, current_errors)
+            self._compare_tensor_data(py_mod_1, cpp_mod_1, "BBReg PyMod1 vs CppMod1", i, current_errors)
+            
            self._compare_tensor_data(py_iou_pred, cpp_iou_pred, "BBReg IoUPred", i, current_errors)
            
-            if current_errors: # Add to overall statistics if any comparisons were made/attempted
-                self.all_errors_stats[f"BBReg_Sample_{i}"] = current_errors
-            # Note: MAE accumulation for overall average needs to be selective based on valid comparisons
-            # For simplicity, we'll let the HTML report show NaNs for failed/skipped comparisons.
-
-        if not self.all_errors_stats: # Check if any BB regressor comparisons were made
-            print("No BB Regressor comparisons were performed for this model type.") # Clarified message
-            # No plots or stats if nothing was compared for BB regressor
-            return
-
-        # The following old averaging and plotting is now handled by generate_html_report using all_errors_stats
-        # print("\nBB Regressor Comparison Statistics:")
-        # if iou_pred_errors:
-        #     print(f"  IoU Prediction MAE: Mean={np.mean(iou_pred_errors):.4e}, Std={np.std(iou_pred_errors):.4e}")
-        # if modulation_errors:
-        #     print(f"  Modulation MAE: Mean={np.mean(modulation_errors):.4e}, Std={np.std(modulation_errors):.4e}")
-        # if feat_errors:
-        #     print(f"  IoU Feature MAE: Mean={np.mean(feat_errors):.4e}, Std={np.std(feat_errors):.4e}")
-
-        # # Plots - these would need to be rethought with the new error structure
-        # self._generate_stats_and_plots(iou_pred_errors, "BB Regressor IoU Prediction Error", self.plots_dir / "bbreg_iou_pred_error_hist.png")
-        # self._generate_stats_and_plots(modulation_errors, "BB Regressor Modulation Error", self.plots_dir / "bbreg_modulation_error_hist.png")
-        # self._generate_stats_and_plots(feat_errors, "BB Regressor IoU Feature Error", self.plots_dir / "bbreg_feature_error_hist.png")
+            self.all_errors_stats[f"BBReg_Sample_{i}"] = current_errors

    def generate_html_report(self):
        print("\nGenerating HTML report...")
@ -747,6 +817,73 @@ class ModelComparison:
        # For direct use, if needed, it could return the tuple:
        # return (mae, max_err, diff_arr_for_hist, mean_py_val, std_abs_err, l2_norm_py, l2_norm_cpp, l2_norm_diff, cosine_sim, pearson_corr, mean_rel_err)

+    @staticmethod
+    def load_weights_for_custom_model(model, model_name, base_model_dir, device):
+        """
+        Helper to load weights from individual .pt files into a model instance.
+        model: the PyTorch nn.Module instance.
+        model_name: e.g., 'classifier' or 'bb_regressor'.
+        base_model_dir: Path object to the base directory like 'exported_weights'.
+        device: torch device.
+        """
+        tensor_dir = base_model_dir / model_name
+        doc_file = tensor_dir / f"{model_name}_weights_doc.txt"
+
+        if not doc_file.exists():
+            print(f"Warning: Documentation file not found: {doc_file} for {model_name}. Skipping weight loading for source model.")
+            return
+
+        with open(doc_file, 'r') as f:
+            lines = f.readlines()
+        
+        i = 0
+        while i < len(lines):
+            line = lines[i]
+            if line.startswith('## '):
+                key = line.strip()[3:]
+                j = i + 1
+                while j < len(lines) and 'File:' not in lines[j]:
+                    j += 1
+                
+                if j < len(lines) and 'File:' in lines[j]:
+                    file_name = lines[j].split('File:')[1].strip()
+                    tensor_path = tensor_dir / file_name
+                    if tensor_path.exists():
+                        try:
+                            tensor_data = torch.load(str(tensor_path), map_location=device)
+                            # For .pt files that might be RecursiveScriptModule, try to extract tensor
+                            if isinstance(tensor_data, torch.jit.RecursiveScriptModule):
+                                if hasattr(tensor_data, 'weight'): tensor = tensor_data.weight
+                                elif hasattr(tensor_data, 'bias'): tensor = tensor_data.bias
+                                elif len(list(tensor_data.parameters())) > 0: tensor = list(tensor_data.parameters())[0]
+                                else: tensor = tensor_data() # Try calling it
+                            else:
+                                tensor = tensor_data
+
+                            parts = key.split('.')
+                            module_to_set = model
+                            for part in parts[:-1]:
+                                module_to_set = getattr(module_to_set, part)
+                            
+                            param_name = parts[-1]
+                            if hasattr(module_to_set, param_name):
+                                if param_name in module_to_set._parameters:
+                                    module_to_set._parameters[param_name] = torch.nn.Parameter(tensor.to(device))
+                                elif param_name in module_to_set._buffers:
+                                    module_to_set._buffers[param_name] = tensor.to(device)
+                                else: # Direct attribute assignment
+                                    setattr(module_to_set, param_name, tensor.to(device))
+                                # print(f"Loaded {key} from {file_name} into source {model_name}")
+                            else:
+                                print(f"Warning: Attribute {key} not found in source model {model_name}.")
+                        except Exception as e:
+                            print(f"Error loading tensor for {key} from {tensor_path} for source {model_name}: {e}")
+                    else:
+                        print(f"Warning: Tensor file not found: {tensor_path} for source {model_name}")
+                i = j
+            i += 1
+        model.eval().to(device)
+

 if __name__ == "__main__":
    # Parse command line arguments
Author	SHA1	Message	Date
mht	ba35c5b500	a stable stage	1 week ago
mht	c4156feccc	Refactor: Remove double precision in BBRegressor LinearBlock	1 week ago