diff --git a/cimp/resnet/resnet.cpp b/cimp/resnet/resnet.cpp index 6304d7f..1ecf121 100644 --- a/cimp/resnet/resnet.cpp +++ b/cimp/resnet/resnet.cpp @@ -71,9 +71,9 @@ BottleneckImpl::BottleneckImpl(const std::string& base_weights_dir, conv1->weight = load_named_tensor(base_weights_dir, block_param_prefix + "conv1.weight", device); bn1->weight = load_named_tensor(base_weights_dir, block_param_prefix + "bn1.weight", device); bn1->bias = load_named_tensor(base_weights_dir, block_param_prefix + "bn1.bias", device); - bn1->named_buffers()["running_mean"] = load_named_tensor(base_weights_dir, block_param_prefix + "bn1.running_mean", device); - bn1->named_buffers()["running_var"] = load_named_tensor(base_weights_dir, block_param_prefix + "bn1.running_var", device); - bn1->named_buffers()["num_batches_tracked"] = load_named_tensor(base_weights_dir, block_param_prefix + "bn1.num_batches_tracked", device); + bn1->running_mean = load_named_tensor(base_weights_dir, block_param_prefix + "bn1.running_mean", device); + bn1->running_var = load_named_tensor(base_weights_dir, block_param_prefix + "bn1.running_var", device); + bn1->num_batches_tracked = load_named_tensor(base_weights_dir, block_param_prefix + "bn1.num_batches_tracked", device); register_module("conv1", conv1); register_module("bn1", bn1); @@ -83,9 +83,9 @@ BottleneckImpl::BottleneckImpl(const std::string& base_weights_dir, conv2->weight = load_named_tensor(base_weights_dir, block_param_prefix + "conv2.weight", device); bn2->weight = load_named_tensor(base_weights_dir, block_param_prefix + "bn2.weight", device); bn2->bias = load_named_tensor(base_weights_dir, block_param_prefix + "bn2.bias", device); - bn2->named_buffers()["running_mean"] = load_named_tensor(base_weights_dir, block_param_prefix + "bn2.running_mean", device); - bn2->named_buffers()["running_var"] = load_named_tensor(base_weights_dir, block_param_prefix + "bn2.running_var", device); - bn2->named_buffers()["num_batches_tracked"] = load_named_tensor(base_weights_dir, block_param_prefix + "bn2.num_batches_tracked", device); + bn2->running_mean = load_named_tensor(base_weights_dir, block_param_prefix + "bn2.running_mean", device); + bn2->running_var = load_named_tensor(base_weights_dir, block_param_prefix + "bn2.running_var", device); + bn2->num_batches_tracked = load_named_tensor(base_weights_dir, block_param_prefix + "bn2.num_batches_tracked", device); register_module("conv2", conv2); register_module("bn2", bn2); @@ -95,9 +95,9 @@ BottleneckImpl::BottleneckImpl(const std::string& base_weights_dir, conv3->weight = load_named_tensor(base_weights_dir, block_param_prefix + "conv3.weight", device); bn3->weight = load_named_tensor(base_weights_dir, block_param_prefix + "bn3.weight", device); bn3->bias = load_named_tensor(base_weights_dir, block_param_prefix + "bn3.bias", device); - bn3->named_buffers()["running_mean"] = load_named_tensor(base_weights_dir, block_param_prefix + "bn3.running_mean", device); - bn3->named_buffers()["running_var"] = load_named_tensor(base_weights_dir, block_param_prefix + "bn3.running_var", device); - bn3->named_buffers()["num_batches_tracked"] = load_named_tensor(base_weights_dir, block_param_prefix + "bn3.num_batches_tracked", device); + bn3->running_mean = load_named_tensor(base_weights_dir, block_param_prefix + "bn3.running_mean", device); + bn3->running_var = load_named_tensor(base_weights_dir, block_param_prefix + "bn3.running_var", device); + bn3->num_batches_tracked = load_named_tensor(base_weights_dir, block_param_prefix + "bn3.num_batches_tracked", device); register_module("conv3", conv3); register_module("bn3", bn3); @@ -118,17 +118,85 @@ BottleneckImpl::BottleneckImpl(const std::string& base_weights_dir, // Forward method implementation for BottleneckImpl torch::Tensor BottleneckImpl::forward(torch::Tensor x) { torch::Tensor identity = x; + torch::ScalarType original_dtype = x.scalar_type(); + // conv1 -> bn1 -> relu x = conv1->forward(x); - x = bn1->forward(x); + + if (!this->is_training() && bn1) { + const auto& bn_module = *bn1; + torch::Tensor input_double = x.to(torch::kFloat64); + torch::Tensor weight_double = bn_module.weight.defined() ? bn_module.weight.to(torch::kFloat64) : torch::Tensor(); + torch::Tensor bias_double = bn_module.bias.defined() ? bn_module.bias.to(torch::kFloat64) : torch::Tensor(); + torch::Tensor running_mean_double = bn_module.running_mean.to(torch::kFloat64); + torch::Tensor running_var_double = bn_module.running_var.to(torch::kFloat64); + double eps_double = bn_module.options.eps(); + + auto c = x.size(1); + running_mean_double = running_mean_double.reshape({1, c, 1, 1}); + running_var_double = running_var_double.reshape({1, c, 1, 1}); + if (weight_double.defined()) weight_double = weight_double.reshape({1, c, 1, 1}); + if (bias_double.defined()) bias_double = bias_double.reshape({1, c, 1, 1}); + + torch::Tensor out_double = (input_double - running_mean_double) / (torch::sqrt(running_var_double + eps_double)); + if (weight_double.defined()) out_double = out_double * weight_double; + if (bias_double.defined()) out_double = out_double + bias_double; + x = out_double.to(original_dtype); + } else if (bn1) { + x = bn1->forward(x); + } x = relu->forward(x); + // conv2 -> bn2 -> relu x = conv2->forward(x); - x = bn2->forward(x); + if (!this->is_training() && bn2) { + const auto& bn_module = *bn2; + torch::Tensor input_double = x.to(torch::kFloat64); + torch::Tensor weight_double = bn_module.weight.defined() ? bn_module.weight.to(torch::kFloat64) : torch::Tensor(); + torch::Tensor bias_double = bn_module.bias.defined() ? bn_module.bias.to(torch::kFloat64) : torch::Tensor(); + torch::Tensor running_mean_double = bn_module.running_mean.to(torch::kFloat64); + torch::Tensor running_var_double = bn_module.running_var.to(torch::kFloat64); + double eps_double = bn_module.options.eps(); + + auto c = x.size(1); + running_mean_double = running_mean_double.reshape({1, c, 1, 1}); + running_var_double = running_var_double.reshape({1, c, 1, 1}); + if (weight_double.defined()) weight_double = weight_double.reshape({1, c, 1, 1}); + if (bias_double.defined()) bias_double = bias_double.reshape({1, c, 1, 1}); + + torch::Tensor out_double = (input_double - running_mean_double) / (torch::sqrt(running_var_double + eps_double)); + if (weight_double.defined()) out_double = out_double * weight_double; + if (bias_double.defined()) out_double = out_double + bias_double; + x = out_double.to(original_dtype); + } else if (bn2) { + x = bn2->forward(x); + } x = relu->forward(x); + // conv3 -> bn3 x = conv3->forward(x); - x = bn3->forward(x); + if (!this->is_training() && bn3) { + const auto& bn_module = *bn3; + torch::Tensor input_double = x.to(torch::kFloat64); + torch::Tensor weight_double = bn_module.weight.defined() ? bn_module.weight.to(torch::kFloat64) : torch::Tensor(); + torch::Tensor bias_double = bn_module.bias.defined() ? bn_module.bias.to(torch::kFloat64) : torch::Tensor(); + torch::Tensor running_mean_double = bn_module.running_mean.to(torch::kFloat64); + torch::Tensor running_var_double = bn_module.running_var.to(torch::kFloat64); + double eps_double = bn_module.options.eps(); + + auto c = x.size(1); + running_mean_double = running_mean_double.reshape({1, c, 1, 1}); + running_var_double = running_var_double.reshape({1, c, 1, 1}); + if (weight_double.defined()) weight_double = weight_double.reshape({1, c, 1, 1}); + if (bias_double.defined()) bias_double = bias_double.reshape({1, c, 1, 1}); + + torch::Tensor out_double = (input_double - running_mean_double) / (torch::sqrt(running_var_double + eps_double)); + if (weight_double.defined()) out_double = out_double * weight_double; + if (bias_double.defined()) out_double = out_double + bias_double; + x = out_double.to(original_dtype); + } else if (bn3) { + x = bn3->forward(x); + } if (this->projection_shortcut) { identity = this->projection_shortcut->forward(identity); @@ -150,18 +218,16 @@ ResNetImpl::ResNetImpl(const std::string& base_weights_dir_path, conv1 = torch::nn::Conv2d(torch::nn::Conv2dOptions(3, 64, 7).stride(2).padding(3).bias(false)); bn1 = torch::nn::BatchNorm2d(torch::nn::BatchNorm2dOptions(64).eps(static_cast(1e-5)).momentum(0.1).affine(true).track_running_stats(true)); this->conv1->weight = load_named_tensor(this->_base_weights_dir, "conv1.weight", device); + + // Directly assign to the public member tensors of the bn1 module this->bn1->weight = load_named_tensor(this->_base_weights_dir, "bn1.weight", device); this->bn1->bias = load_named_tensor(this->_base_weights_dir, "bn1.bias", device); - - this->bn1->named_buffers()["running_mean"] = load_named_tensor(this->_base_weights_dir, "bn1.running_mean", device); - this->bn1->named_buffers()["running_var"] = load_named_tensor(this->_base_weights_dir, "bn1.running_var", device); - - this->bn1->named_buffers()["num_batches_tracked"] = load_named_tensor(this->_base_weights_dir, "bn1.num_batches_tracked", device); - register_module("conv1", conv1); - register_module("bn1", bn1); + this->bn1->running_mean = load_named_tensor(this->_base_weights_dir, "bn1.running_mean", device); + this->bn1->running_var = load_named_tensor(this->_base_weights_dir, "bn1.running_var", device); + this->bn1->num_batches_tracked = load_named_tensor(this->_base_weights_dir, "bn1.num_batches_tracked", device); - std::cout << "CPP ResNetImpl::bn1 running_mean sum: " << std::fixed << std::setprecision(10) << this->bn1->running_mean.sum().item() << std::endl; - std::cout << "CPP ResNetImpl::bn1 running_var sum: " << std::fixed << std::setprecision(10) << this->bn1->running_var.sum().item() << std::endl; + register_module("conv1", conv1); + register_module("bn1", bn1); // bn1 is already populated correctly relu = torch::nn::ReLU(torch::nn::ReLUOptions().inplace(true)); maxpool = torch::nn::MaxPool2d(torch::nn::MaxPool2dOptions(3).stride(2).padding(1)); @@ -195,9 +261,9 @@ torch::nn::Sequential ResNetImpl::_make_layer(int64_t planes_for_block, int64_t conv_down->weight = load_named_tensor(this->_base_weights_dir, ds_block_prefix + "0.weight", device); bn_down->weight = load_named_tensor(this->_base_weights_dir, ds_block_prefix + "1.weight", device); bn_down->bias = load_named_tensor(this->_base_weights_dir, ds_block_prefix + "1.bias", device); - bn_down->named_buffers()["running_mean"] = load_named_tensor(this->_base_weights_dir, ds_block_prefix + "1.running_mean", device); - bn_down->named_buffers()["running_var"] = load_named_tensor(this->_base_weights_dir, ds_block_prefix + "1.running_var", device); - bn_down->named_buffers()["num_batches_tracked"] = load_named_tensor(this->_base_weights_dir, ds_block_prefix + "1.num_batches_tracked", device); + bn_down->running_mean = load_named_tensor(this->_base_weights_dir, ds_block_prefix + "1.running_mean", device); + bn_down->running_var = load_named_tensor(this->_base_weights_dir, ds_block_prefix + "1.running_var", device); + bn_down->num_batches_tracked = load_named_tensor(this->_base_weights_dir, ds_block_prefix + "1.num_batches_tracked", device); ds_seq->push_back(conv_down); ds_seq->push_back(bn_down); @@ -229,9 +295,50 @@ std::map ResNetImpl::forward(torch::Tensor x) { }; x = conv1->forward(x); - if (should_output("conv1_output")) outputs["conv1_output"] = x; - - x = bn1->forward(x); + if (should_output("conv1_output")) outputs["conv1_output"] = x; + if (should_output("debug_resnet_conv1_output_for_bn1_input")) { + outputs["debug_resnet_conv1_output_for_bn1_input"] = x.clone(); + } + torch::ScalarType original_dtype_resnet_bn1 = x.scalar_type(); + + // Apply bn1 + if (!this->is_training() && bn1) { + const auto& bn_module = *bn1; + torch::Tensor input_double = x.to(torch::kFloat64); + torch::Tensor weight_double = bn_module.weight.defined() ? bn_module.weight.to(torch::kFloat64) : torch::Tensor(); + torch::Tensor bias_double = bn_module.bias.defined() ? bn_module.bias.to(torch::kFloat64) : torch::Tensor(); + torch::Tensor running_mean_double = bn_module.running_mean.to(torch::kFloat64); + torch::Tensor running_var_double = bn_module.running_var.to(torch::kFloat64); + double eps_double = bn_module.options.eps(); + + auto c = x.size(1); + torch::Tensor reshaped_running_mean = running_mean_double.reshape({1, c, 1, 1}); + torch::Tensor reshaped_running_var = running_var_double.reshape({1, c, 1, 1}); + torch::Tensor reshaped_weight = weight_double.defined() ? weight_double.reshape({1, c, 1, 1}) : torch::Tensor(); + torch::Tensor reshaped_bias = bias_double.defined() ? bias_double.reshape({1, c, 1, 1}) : torch::Tensor(); + + torch::Tensor centered_x = input_double - reshaped_running_mean; + if (should_output("bn1_centered_x")) outputs["bn1_centered_x"] = centered_x.clone(); + + torch::Tensor variance_plus_eps = reshaped_running_var + eps_double; + if (should_output("bn1_variance_plus_eps")) outputs["bn1_variance_plus_eps"] = variance_plus_eps.clone(); + + torch::Tensor inv_std = torch::rsqrt(variance_plus_eps); // Using rsqrt for potential match + if (should_output("bn1_inv_std")) outputs["bn1_inv_std"] = inv_std.clone(); + + torch::Tensor normalized_x = centered_x * inv_std; + if (should_output("bn1_normalized_x")) outputs["bn1_normalized_x"] = normalized_x.clone(); + + torch::Tensor out_double = normalized_x; + if (reshaped_weight.defined()) out_double = out_double * reshaped_weight; + if (reshaped_bias.defined()) out_double = out_double + reshaped_bias; + + x = out_double.to(original_dtype_resnet_bn1); + } else if (bn1) { // Training mode or if manual is disabled + x = bn1->forward(x); + } + // End apply bn1 + if (should_output("bn1_output")) outputs["bn1_output"] = x; x = relu->forward(x); diff --git a/test/compare_models.py b/test/compare_models.py index cee1182..ae11553 100644 --- a/test/compare_models.py +++ b/test/compare_models.py @@ -37,13 +37,23 @@ def get_model_configs(root_dir_param): return { # ... (existing model_configs definitions) 'ResNet': { - 'python_model_loader': lambda: DiMPTorchScriptWrapper(os.path.join(root_dir_param, 'pytracking_models/dimp50_ Ausdruck_ep0050.pth.tar')), + 'python_model_loader': lambda: DiMPTorchScriptWrapper(os.path.join(root_dir_param, 'pytracking_models/dimp50_ausdruck_ep0050.pth.tar')), 'cpp_output_subdir': 'resnet', - 'python_output_subdir': 'resnet_py', # If Python outputs are saved separately + 'python_output_subdir': 'resnet_py', 'outputs_to_compare': { - 'Conv1': 'conv1_output.pt', # ADDED - 'BN1': 'bn1_output.pt', # ADDED - 'ReLU1': 'relu1_output.pt', # ADDED for completeness before MaxPool + 'Conv1': ('conv1_output.pt', 'conv1'), + 'Debug ResNet Conv1->BN1 Input': ('debug_resnet_conv1_output_for_bn1_input.pt', 'conv1_pre_bn'), + + # BN1 final output (manual C++ vs manual Python pre-ReLU) + 'BN1': ('bn1_output.pt', 'bn1_post_relu_pre'), + + # BN1 Intermediate comparisons + 'BN1 Centered X': ('bn1_centered_x.pt', 'bn1_centered_x_py'), + 'BN1 Var+Eps': ('bn1_variance_plus_eps.pt', 'bn1_variance_plus_eps_py'), + 'BN1 InvStd': ('bn1_inv_std.pt', 'bn1_inv_std_py'), + 'BN1 Normalized X': ('bn1_normalized_x.pt', 'bn1_normalized_x_py'), + + 'ReLU1': ('relu1_output.pt', 'conv1'), 'MaxPool': 'maxpool_output.pt', 'Features': 'features.pt', 'Layer1': 'layer1.pt', @@ -523,6 +533,9 @@ class ComparisonRunner: cpp_mod_vec0_path = cpp_output_bb_reg_dir_path / f'sample_{i}_mod_vec0.pt' cpp_mod_vec1_path = cpp_output_bb_reg_dir_path / f'sample_{i}_mod_vec1.pt' cpp_iou_scores_path = cpp_output_bb_reg_dir_path / f'sample_{i}_iou_scores.pt' + # Paths for debug C++ outputs + cpp_debug_conv3_1t_path = cpp_output_bb_reg_dir_path / f'sample_{i}_debug_conv3_1t_output.pt' + cpp_debug_conv4_1t_path = cpp_output_bb_reg_dir_path / f'sample_{i}_debug_conv4_1t_output.pt' # Load initial inputs for Python model py_image_tensor = self.load_cpp_tensor(py_image_input_path, self.device) @@ -549,6 +562,31 @@ class ComparisonRunner: else: print(f"Warning: Skipping Python BB Regressor for sample {i}, image input not found at {py_image_input_path}") + # ---- Intermediate debug outputs for conv3_1t and conv4_1t ---- + py_debug_conv3_1t_out = None + py_debug_conv4_1t_out = None + + if py_feat_layer2 is not None: + try: + _feat2_for_debug_conv3_1t = py_feat_layer2 + if _feat2_for_debug_conv3_1t.dim() == 5: + _feat2_for_debug_conv3_1t = _feat2_for_debug_conv3_1t.reshape(-1, *_feat2_for_debug_conv3_1t.shape[-3:]) + with torch.no_grad(): # Ensure no_grad context + py_debug_conv3_1t_out = self.bb_regressor_from_source.conv3_1t(_feat2_for_debug_conv3_1t) + except Exception as e: + print(f"ERROR calculating Python Debug_Conv3_1t for sample {i}: {e}") + + if py_feat_layer3 is not None: + try: + _feat3_for_debug_conv4_1t = py_feat_layer3 + if _feat3_for_debug_conv4_1t.dim() == 5: + _feat3_for_debug_conv4_1t = _feat3_for_debug_conv4_1t.reshape(-1, *_feat3_for_debug_conv4_1t.shape[-3:]) + with torch.no_grad(): # Ensure no_grad context + py_debug_conv4_1t_out = self.bb_regressor_from_source.conv4_1t(_feat3_for_debug_conv4_1t) + except Exception as e: + print(f"ERROR calculating Python Debug_Conv4_1t for sample {i}: {e}") + # ---- End intermediate debug outputs ---- + # Get Python IoU features py_iou_feat_list = [None, None] # Initialize as a list of two Nones if py_feat_layer2 is not None and py_feat_layer3 is not None: @@ -622,8 +660,13 @@ class ComparisonRunner: cpp_mod_vec0 = self.load_cpp_tensor(cpp_mod_vec0_path, self.device) cpp_mod_vec1 = self.load_cpp_tensor(cpp_mod_vec1_path, self.device) cpp_iou_scores = self.load_cpp_tensor(cpp_iou_scores_path, self.device) + # Load debug C++ tensors + cpp_debug_conv3_1t_tensor = self.load_cpp_tensor(cpp_debug_conv3_1t_path, self.device) + cpp_debug_conv4_1t_tensor = self.load_cpp_tensor(cpp_debug_conv4_1t_path, self.device) # Comparisons + self._compare_tensor_data(py_debug_conv3_1t_out, cpp_debug_conv3_1t_tensor, "BBReg Debug_Conv3_1t", i, current_errors) + self._compare_tensor_data(py_debug_conv4_1t_out, cpp_debug_conv4_1t_tensor, "BBReg Debug_Conv4_1t", i, current_errors) self._compare_tensor_data(py_iou_feat_list[0], cpp_iou_feat0, "BBReg PyIoUFeat0 vs CppIoUFeat0", i, current_errors) self._compare_tensor_data(py_iou_feat_list[1], cpp_iou_feat1, "BBReg PyIoUFeat1 vs CppIoUFeat1", i, current_errors) self._compare_tensor_data(py_modulation_list[0], cpp_mod_vec0, "BBReg PyMod0 vs CppMod0", i, current_errors) @@ -633,224 +676,311 @@ class ComparisonRunner: if current_errors: self.all_comparison_stats[f"BBReg_Sample_{i}"] = current_errors def compare_resnet_outputs(self): - print("Comparing ResNet outputs...") - print("\n--- Types at START of compare_resnet_outputs: ---") - if 'ResNet' in self.models: print(f" self.models['ResNet'] type: {type(self.models['ResNet'])}") - if 'Classifier' in self.models: print(f" self.models['Classifier'] type: {type(self.models['Classifier'])}") - if 'BBRegressor' in self.models: print(f" self.models['BBRegressor'] type: {type(self.models['BBRegressor'])}") + print("\\n--- Comparing ResNet Outputs ---") + if not self.models.get('ResNet'): + print("PYTHON: ResNet model not loaded, skipping ResNet comparison.") + return - py_input_common_dir = os.path.join(self.root_dir, 'test', 'input_samples', 'common') - cpp_output_resnet_dir = os.path.join(self.cpp_output_dir, 'resnet') - # Ensure self.py_resnet_output_dir is defined, e.g., in __init__ or where other py output dirs are - if not hasattr(self, 'py_resnet_output_dir') or not self.py_resnet_output_dir: - self.py_resnet_output_dir = Path(self.python_output_dir) / 'resnet' - self.py_resnet_output_dir.mkdir(parents=True, exist_ok=True) + resnet_model = self.models['ResNet'] + config = self.model_configs['ResNet'] + cpp_resnet_dir = os.path.join(self.cpp_output_dir, config['cpp_output_subdir']) + + python_resnet_save_dir = os.path.join(self.python_output_dir, config.get('python_output_subdir', config['cpp_output_subdir'])) + if not os.path.exists(python_resnet_save_dir): + os.makedirs(python_resnet_save_dir, exist_ok=True) + + num_samples_to_process = self.num_samples + if num_samples_to_process == -1: # If -1, determine from available C++ output files + # This logic can be complex if C++ output is sparse. For now, let's assume if -1 it means process all *common* inputs. + # A safer way for -1 would be to count common input samples first. + common_input_glob = os.path.join(self.root_dir, "test", "input_samples", "common", "sample_*_image.pt") + num_samples_to_process = len(glob.glob(common_input_glob)) + print(f"INFO: num_samples set to -1, determined {num_samples_to_process} common input samples.") + + processed_samples_count = 0 # Renamed from processed_samples to avoid conflict + + sample_input_base_dir = os.path.join(self.root_dir, "test", "input_samples", "common") + + # Loop exactly self.num_samples times (or detected count if -1) + for sample_idx in tqdm(range(num_samples_to_process), desc="Comparing ResNet samples"): + current_errors = {} # Initialize for each sample + python_intermediate_outputs_cache = {} # Reset for each sample + + # Construct the input file path based on sample_idx + sample_input_file_path = os.path.join(sample_input_base_dir, f"sample_{sample_idx}_image.pt") + + if not os.path.exists(sample_input_file_path): + print(f"Warning: Input sample file {sample_input_file_path} not found for sample index {sample_idx}. Skipping ResNet sample.") + empty_errors_for_skipped_sample = {} + for output_key_config in config['outputs_to_compare'].keys(): + self._compare_tensor_data(None, None, output_key_config, sample_idx, empty_errors_for_skipped_sample) + if empty_errors_for_skipped_sample: + self.all_comparison_stats[f"ResNet_Sample_{sample_idx}"] = empty_errors_for_skipped_sample + continue + + # --- START REINSTATED INPUT LOADING AND PREPROCESSING --- + input_tensor = self.load_cpp_tensor(sample_input_file_path, self.device, is_image=True) + + if input_tensor is None: + print(f"Warning: Failed to load a valid tensor for ResNet input sample {sample_input_file_path} (sample {sample_idx}) using self.load_cpp_tensor. Skipping.") + # Populate NaNs for all expected outputs for this sample + empty_errors_for_skipped_sample = {} + for output_key_config in config['outputs_to_compare'].keys(): + self._compare_tensor_data(None, None, output_key_config, sample_idx, empty_errors_for_skipped_sample) + if empty_errors_for_skipped_sample: + self.all_comparison_stats[f"ResNet_Sample_{sample_idx}"] = empty_errors_for_skipped_sample + continue - # Define Path objects for directory checks - py_input_common_dir_path = Path(py_input_common_dir) - cpp_output_resnet_dir_path = Path(cpp_output_resnet_dir) - - comparison_configs = [ - ("ResNet Conv1 Output (Pre-BN)", "_conv1_output_py.pt", "_conv1_output.pt", self.py_resnet_output_dir, cpp_output_resnet_dir), - ("ResNet Conv1", "_conv1_output.pt", "_conv1_output.pt", self.py_resnet_output_dir, cpp_output_resnet_dir), # Assumes Py also saved conv1 output if it was meant to be same as C++ pre-bn - ("ResNet BN1", "_bn1_output.pt", "_bn1_output.pt", self.py_resnet_output_dir, cpp_output_resnet_dir), - ("ResNet ReLU1", "_relu1_output.pt", "_relu1_output.pt", self.py_resnet_output_dir, cpp_output_resnet_dir), - ("ResNet MaxPool", "_maxpool_output.pt", "_maxpool_output.pt", self.py_resnet_output_dir, cpp_output_resnet_dir), - ("ResNet Layer1.0 Block Output", "_layer1_0_block_output.pt", "_layer1_0_block_output.pt", self.py_resnet_output_dir, cpp_output_resnet_dir), - ("ResNet Layer1.0 Shortcut Output", "_layer1_0_shortcut_output.pt", "_layer1_0_shortcut_output.pt", self.py_resnet_output_dir, cpp_output_resnet_dir), - ("ResNet Layer1", "_layer1_output.pt", "_layer1_output.pt", self.py_resnet_output_dir, cpp_output_resnet_dir), - ("ResNet Layer2", "_layer2_output.pt", "_layer2_output.pt", self.py_resnet_output_dir, cpp_output_resnet_dir), - ("ResNet Layer3", "_layer3_output.pt", "_layer3_output.pt", self.py_resnet_output_dir, cpp_output_resnet_dir), - ("ResNet Layer4", "_layer4_output.pt", "_layer4_output.pt", self.py_resnet_output_dir, cpp_output_resnet_dir), - ("ResNet Features", "_features_output.pt", "_features_output.pt", self.py_resnet_output_dir, cpp_output_resnet_dir) - ] - - if not py_input_common_dir_path.exists() or not cpp_output_resnet_dir_path.exists(): - print(f"ResNet input ({py_input_common_dir_path}) or C++ ResNet output dir ({cpp_output_resnet_dir_path}) not found. Skipping ResNet comparison.") - # Populate NaN for all expected ResNet comparisons if dirs are missing - for i in range(self.num_samples): - sample_key_base = f"ResNet_Sample_{i}" - current_errors = {} - self._compare_tensor_data(None, None, "ResNet Layer1", i, current_errors) - self._compare_tensor_data(None, None, "ResNet Layer2", i, current_errors) - self._compare_tensor_data(None, None, "ResNet Layer3", i, current_errors) - self._compare_tensor_data(None, None, "ResNet Layer4", i, current_errors) - self._compare_tensor_data(None, None, "ResNet Features", i, current_errors) - self.all_comparison_stats[sample_key_base] = current_errors - return + if not isinstance(input_tensor, torch.Tensor): + print(f"Warning: self.load_cpp_tensor for {sample_input_file_path} did not return a Tensor (got {type(input_tensor)}). Skipping sample {sample_idx}.") + # Populate NaNs for all expected outputs for this sample + empty_errors_for_skipped_sample = {} + for output_key_config in config['outputs_to_compare'].keys(): + self._compare_tensor_data(None, None, output_key_config, sample_idx, empty_errors_for_skipped_sample) + if empty_errors_for_skipped_sample: + self.all_comparison_stats[f"ResNet_Sample_{sample_idx}"] = empty_errors_for_skipped_sample + continue - for i in tqdm(range(self.num_samples), desc="ResNet samples"): - current_errors = {} # For this sample + # Preprocess the input tensor for Python's ResNet + if hasattr(self.python_wrapper, 'preprocess_image'): + processed_input_tensor = self.python_wrapper.preprocess_image(input_tensor.clone()) # Use clone + else: + print("Warning: python_wrapper.preprocess_image not found. Using input_tensor as is.") + processed_input_tensor = input_tensor.to(self.device) # Ensure device + # --- END REINSTATED INPUT LOADING AND PREPROCESSING --- - py_image_input_path = py_input_common_dir_path / f'sample_{i}_image.pt' - py_image_tensor = self.load_cpp_tensor(py_image_input_path, self.device) + # Initialize dictionaries to store Python-side outputs for the current sample + python_outputs = {} # To store outputs from the Python model for this sample - py_conv1_out, py_bn1_out, py_relu1_out, py_maxpool_out, py_layer1_out, py_layer2_out, py_layer3_out, py_layer4_out, py_features_out = None, None, None, None, None, None, None, None, None # ADDED py_conv1_out, py_bn1_out, py_relu1_out - py_layer1_0_shortcut_out = None + try: + # Python ResNet forward pass (assuming it's a JIT model or similar) + # The output of a JIT ResNet model might be a dictionary or a list/tuple of tensors + # We need to ensure we can map these to the 'outputs_to_compare' keys + print(f"PYTHON ResNet forward pass for sample {sample_idx}...") + + # For ResNet, the output is a dictionary from its forward method. + # output_layers = list(config['outputs_to_compare'].keys()) # This might be too broad initially + + # Define the layers we actually need from the Python ResNet forward pass. + # These should match the keys used in the Python ResNet's forward method. + # e.g., ['layer1', 'layer2', 'layer3', 'layer4', 'conv1_output', 'bn1_output', etc.] + # For now, let's define specific layers needed for the comparison. + # The JIT ResNet model we have should output a dictionary. + + py_output_layers_needed = ['conv1', 'layer1', 'layer2', 'layer3', 'layer4'] + # Add 'conv1_pre_bn' if we need to compare the input to BN1 + if 'Debug ResNet Conv1->BN1 Input' in config['outputs_to_compare']: + py_output_layers_needed.append('conv1_pre_bn') + + # If we are comparing the direct C++ BN1 output, we need 'bn1_output' from Python + if 'BN1' in config['outputs_to_compare']: + py_output_layers_needed.append('bn1_output') + + # If we are comparing the C++ ReLU1 output (after BN1 and ReLU), we need 'bn1_post_relu_pre' from Python + if 'ReLU1' in config['outputs_to_compare']: + py_output_layers_needed.append('bn1_post_relu_pre') + + # Add Python-side BN1 intermediate layer names if they are in outputs_to_compare + # The config value (cpp_output_filename_or_tuple) is not directly used here for this part, + # we care about the py_dict_key that will be derived from the C++ key. + bn1_intermediate_py_keys_to_request = [] + if 'BN1 Centered X' in config['outputs_to_compare']: + bn1_intermediate_py_keys_to_request.append('bn1_centered_x_py') + if 'BN1 Var+Eps' in config['outputs_to_compare']: + bn1_intermediate_py_keys_to_request.append('bn1_variance_plus_eps_py') + if 'BN1 InvStd' in config['outputs_to_compare']: + bn1_intermediate_py_keys_to_request.append('bn1_inv_std_py') + if 'BN1 Normalized X' in config['outputs_to_compare']: + bn1_intermediate_py_keys_to_request.append('bn1_normalized_x_py') + + for py_key in bn1_intermediate_py_keys_to_request: + if py_key not in py_output_layers_needed: + py_output_layers_needed.append(py_key) - if py_image_tensor is not None: - # Save Python's preprocessed input to conv1 - # This py_image_tensor is already preprocessed by DiMPTorchScriptWrapper.extract_backbone -> preprocess_image - # which is called before this compare_resnet_outputs function if we follow the logic for py_feat_layer2, py_feat_layer3 in compare_bb_regressor - # However, here in compare_resnet_outputs, py_image_tensor comes from load_cpp_tensor(py_image_input_path, ...) - # which is the RAW image. Preprocessing for python side happens inside self.python_wrapper.extract_backbone - # or when we manually call py_model_resnet.conv1(py_image_tensor) - # Let's get the preprocessed image from the wrapper as that's the true input to Python's ResNet + # Add 'fc' if configured, though not typically used in these comparisons + if 'fc' in config['outputs_to_compare']: + py_output_layers_needed.append('fc') - # The input to python_wrapper.extract_backbone is the raw image tensor - # It then calls self.preprocess_image(im) and then self.net.extract_backbone_features(im, layers) - # So, py_image_tensor IS the raw image. We need to get the preprocessed one. - - preprocessed_py_image_for_conv1 = None - if self.python_wrapper: - # Manually preprocess for saving, mimicking what extract_backbone would do before its first conv - preprocessed_py_image_for_conv1 = self.python_wrapper.preprocess_image(py_image_tensor.clone()) # Clone to avoid in-place modification of py_image_tensor - py_preprocessed_save_path = Path(self.cpp_output_dir) / 'resnet' / f'sample_{i}_image_preprocessed_python.pt' - # Ensure self.cpp_output_dir / resnet exists - (Path(self.cpp_output_dir) / 'resnet').mkdir(parents=True, exist_ok=True) - torch.save(preprocessed_py_image_for_conv1.cpu(), str(py_preprocessed_save_path)) - print(f"Saved Python preprocessed image for sample {i} to {py_preprocessed_save_path}") + # Deduplicate, just in case (though construction above should be fine) + py_output_layers_needed = list(OrderedDict.fromkeys(py_output_layers_needed)) + + print(f"DEBUG: Requesting these layers from Python ResNet: {py_output_layers_needed}") + + # Call the Python ResNet forward + # The `self.models['ResNet']` should be the loaded JIT model + # It expects the output_layers argument. + # The DiMPTorchScriptWrapper's backbone should also support this. + if hasattr(resnet_model, 'forward') and callable(getattr(resnet_model, 'forward')) and 'output_layers' in inspect.signature(resnet_model.forward).parameters: + python_model_outputs_dict = resnet_model.forward(processed_input_tensor, output_layers=py_output_layers_needed) + elif hasattr(self.python_wrapper, 'extract_backbone') and callable(getattr(self.python_wrapper, 'extract_backbone')): + # This is the case if ResNet is accessed via the DiMPTorchScriptWrapper's extract_backbone, + # which internally calls the backbone's forward with output_layers. + python_model_outputs_dict = self.python_wrapper.extract_backbone(input_tensor.clone()) # extract_backbone handles preprocessing + else: + print(f"ERROR: Cannot call forward on Python ResNet model. Type: {type(resnet_model)}") + continue + + # DEBUG: Print keys from Python model output + if isinstance(python_model_outputs_dict, dict): + print(f"DEBUG RN_CMP: Keys from python_model_outputs_dict (sample {sample_idx}): {list(python_model_outputs_dict.keys())}") else: - print("ERROR: self.python_wrapper not available to get preprocessed image for Python.") + print(f"DEBUG RN_CMP: python_model_outputs_dict is not a dict (sample {sample_idx}), type: {type(python_model_outputs_dict)}") + + # Populate python_outputs based on the python_model_outputs_dict + # This maps the Python output names to the keys used in 'outputs_to_compare' + if isinstance(python_model_outputs_dict, dict): + python_outputs = python_model_outputs_dict + # If 'features' is an alias for 'layer4' in Python output + if 'layer4' in python_outputs and 'features' not in python_outputs: + python_outputs['features'] = python_outputs['layer4'] + if 'conv1_output' in python_outputs: + python_intermediate_outputs_cache['conv1_output'] = python_outputs['conv1_output'] + + else: + print(f"ERROR: Python ResNet output is not a dict. Got {type(python_model_outputs_dict)}") + # Handle tuple/list output if necessary, mapping by order or specific logic. + # For now, we assume dict output from our ResNet. + continue + + + except Exception as e: + print(f"Error during Python ResNet forward pass for sample {sample_idx}: {e}") + import traceback + traceback.print_exc() + continue # Skip to next sample + + for output_key, cpp_output_filename_or_tuple in config['outputs_to_compare'].items(): + is_python_specific_name = isinstance(cpp_output_filename_or_tuple, tuple) + cpp_output_filename = cpp_output_filename_or_tuple[0] if is_python_specific_name else cpp_output_filename_or_tuple + + # Corrected path construction for C++ ResNet tensors: + # The sample index is already part of the cpp_output_filename for ResNet outputs from C++. + # (e.g., sample_0_conv1_output.pt) + # So, we join cpp_resnet_dir directly with this filename. + # However, the C++ code actually saves ResNet outputs as sample_X_LAYERNAME.pt directly in cpp_resnet_dir, + # not in a per-sample subdirectory for ResNet outputs. + # Let's check how test_models.cpp saves them. + # test_models.cpp -> save_resnet_outputs -> file_path = resnet_output_dir + "/sample_" + std::to_string(sample_idx) + "_" + output_name; + # This means filenames are like "sample_0_conv1_output.pt" directly in "../test/output/resnet/" + + correct_cpp_tensor_filename = f"sample_{sample_idx}_{cpp_output_filename}" + cpp_tensor_path = os.path.join(cpp_resnet_dir, correct_cpp_tensor_filename) + + # <<< START ADDED DEBUG PRINTS >>> + print(f"DEBUG RN_CMP: Attempting to load C++ tensor for '{output_key}' (sample {sample_idx}) from: {cpp_tensor_path}") + # <<< END ADDED DEBUG PRINTS >>> try: - with torch.no_grad(): - py_model_resnet = self.models.get('ResNet') - if py_model_resnet: - current_features = preprocessed_py_image_for_conv1 - - py_conv1_out = py_model_resnet.conv1(current_features) - # Ensure self.py_resnet_output_dir is defined and is a Path object - if not hasattr(self, 'py_resnet_output_dir') or not self.py_resnet_output_dir: - self.py_resnet_output_dir = Path(self.python_output_dir) / 'resnet' - self.py_resnet_output_dir.mkdir(parents=True, exist_ok=True) - py_conv1_out_path = self.py_resnet_output_dir / f'sample_{i}_conv1_output_py.pt' - torch.save(py_conv1_out.cpu(), str(py_conv1_out_path)) - - # --- BN1 on CPU for debugging (Python) --- - py_bn1_out = py_model_resnet.bn1(py_conv1_out) # Original line - - py_relu1_out = py_model_resnet.relu(py_bn1_out) - py_maxpool_out = py_model_resnet.maxpool(py_relu1_out) - x_for_py_layer1_input = py_maxpool_out - - # Output of the first bottleneck block in layer1 - py_layer1_0_block_out_tensor = None # Initialize to avoid ref before assignment if try fails - if hasattr(py_model_resnet, 'layer1') and len(py_model_resnet.layer1) > 0: - try: - py_layer1_0_block_out_tensor = py_model_resnet.layer1[0](x_for_py_layer1_input) # REMOVED .clone() for consistency with best Layer1.0 result - # Ensure cpp_resnet_sample_dir is defined, if not, use a fallback or define it earlier - # Assuming cpp_resnet_sample_dir is defined like: cpp_resnet_sample_dir = Path(self.cpp_output_dir) / 'resnet' - # Which should be: cpp_resnet_dir = Path(self.cpp_output_dir) / 'resnet' # as per usage elsewhere - # And then: cpp_resnet_sample_dir = cpp_resnet_dir # if sample specific subdirs are not used for this - # For safety, let's use the already established cpp_output_resnet_dir path from later in the code - # cpp_output_resnet_dir = os.path.join(self.cpp_output_dir, 'resnet') - # Need to ensure cpp_output_resnet_dir is a Path object if used with / - # From later code: cpp_output_resnet_dir_path = Path(self.cpp_output_dir) / 'resnet' - - current_cpp_resnet_dir = Path(self.cpp_output_dir) / 'resnet' # Define it based on existing patterns - current_cpp_resnet_dir.mkdir(parents=True, exist_ok=True) # Ensure directory exists - - py_layer1_0_block_save_path = current_cpp_resnet_dir / f'sample_{i}_layer1_0_block_output.pt' - torch.save(py_layer1_0_block_out_tensor.cpu(), str(py_layer1_0_block_save_path)) - # print(f"DEBUG: Saved Python layer1[0] block output for sample {i} to {py_layer1_0_block_save_path}") - except Exception as e_block: - print(f"ERROR: Failed to get/save Python layer1[0] block output for sample {i}: {e_block}") - - # Shortcut for layer1.0 (if exists) - if hasattr(py_model_resnet, 'layer1') and len(py_model_resnet.layer1) > 0 and \ - hasattr(py_model_resnet.layer1[0], 'downsample') and py_model_resnet.layer1[0].downsample is not None: - py_layer1_0_shortcut_out = py_model_resnet.layer1[0].downsample(x_for_py_layer1_input.clone()) - - # Get full backbone outputs using the wrapper (which uses the raw image_tensor and preprocesses internally) - # This ensures layer1, layer2, etc., are from the standard path. - if self.python_wrapper: - py_backbone_outputs = self.python_wrapper.extract_backbone(py_image_tensor) # py_image_tensor is raw - else: - print("ERROR: self.python_wrapper is None, cannot extract backbone features for ResNet outputs.") - py_backbone_outputs = {} - - py_layer1_out = py_backbone_outputs.get('layer1') - py_layer2_out = py_backbone_outputs.get('layer2') - py_layer3_out = py_backbone_outputs.get('layer3') - py_layer4_out = py_backbone_outputs.get('layer4') - py_features_out = py_backbone_outputs.get('layer4') # Typically layer4 is the final feature map - else: - print("ERROR: Python ResNet model not found in self.models") - except Exception as e: - print(f"ERROR: Python ResNet backbone/shortcut processing failed for sample {i}: {e}") - else: - print(f"Warning: Skipping Python ResNet for sample {i}, image input not found at {py_image_input_path}") + cpp_tensor = self.load_cpp_tensor(cpp_tensor_path, self.device) + # <<< START ADDED DEBUG PRINTS >>> + loaded_status = "None" + if cpp_tensor is not None: + loaded_status = f"Tensor with shape {cpp_tensor.shape}, dtype {cpp_tensor.dtype}, device {cpp_tensor.device}" + print(f"DEBUG RN_CMP: Loaded C++ tensor for '{output_key}' (sample {sample_idx}): {loaded_status}") + # <<< END ADDED DEBUG PRINTS >>> + + if cpp_tensor is None: + print(f"Warning: C++ tensor {cpp_output_filename} for sample {sample_idx} ('{output_key}') is None or loading failed. Skipping comparison for this output.") + # _compare_tensor_data will be called with cpp_tensor=None, which handles NaN population + # Fall through to _compare_tensor_data to record NaNs + # continue # This would skip the _compare_tensor_data call entirely + + # Get the corresponding Python tensor + python_tensor = None + python_output_save_path = os.path.join(python_resnet_save_dir, f"sample_{sample_idx}", cpp_output_filename) # Save with same name as C++ for consistency + + # Map the 'output_key' from config to the key used in 'python_outputs' dictionary + # This requires knowing how 'outputs_to_compare' keys map to Python model output dict keys. + # Example: 'Conv1' maps to 'conv1_output', 'Features' to 'features' (which might be 'layer4'), etc. + + py_dict_key = None + if output_key == 'Conv1': + py_dict_key = 'conv1_pre_bn' # Python ResNet outputs combined conv1+bn1+relu as 'conv1' + elif output_key == 'Debug ResNet Conv1->BN1 Input': + py_dict_key = 'conv1_pre_bn' # Our new specific output layer + elif output_key == 'BN1': + py_dict_key = 'bn1_output' # CHANGED to use the new hook + elif output_key == 'BN1 Centered X': + py_dict_key = 'bn1_centered_x_py' + elif output_key == 'BN1 Var+Eps': + py_dict_key = 'bn1_variance_plus_eps_py' + elif output_key == 'BN1 InvStd': + py_dict_key = 'bn1_inv_std_py' + elif output_key == 'BN1 Normalized X': + py_dict_key = 'bn1_normalized_x_py' + elif output_key == 'ReLU1': + py_dict_key = 'bn1_post_relu_pre' # Output of Python's BN1 + ReLU + elif output_key == 'MaxPool': + # MaxPool is applied *after* 'conv1' (conv1+bn1+relu) block in Python ResNet. + # However, the Python ResNet forward doesn't have a separate 'maxpool' output key. + # The output of layer1 is *after* maxpool. + # C++ saves maxpool_output.pt *before* layer1. + # This means we need to save python_outputs['conv1'] (after conv1,bn1,relu) then apply maxpool to it manually for comparison. + # OR, recognize that C++ output for maxpool is input to layer1. + # For now, this is tricky. Let's see if layer1 input in C++ matches python maxpool output. + # The Python output named 'layer1' is after the nn.Sequential that IS layer1. + # The input to C++ layer1 is the output of C++ maxpool. + # The input to Python model.layer1 is the output of model.maxpool(model.relu(model.bn1(model.conv1(x)))). + # So, Python's 'conv1' output, when passed through an nn.MaxPool2d, should match C++ 'maxpool_output.pt'. + print(f"Warning: Direct Python equivalent for C++ 'MaxPool' output is complex. Requires manual maxpool application to Python's 'conv1' output. Skipping {output_key} for now.") + continue # Skip this key for now + elif output_key == 'Layer1': py_dict_key = 'layer1' + elif output_key == 'Layer2': py_dict_key = 'layer2' + elif output_key == 'Layer3': py_dict_key = 'layer3' + elif output_key == 'Layer4': py_dict_key = 'layer4' + elif output_key == 'Features': py_dict_key = 'layer4' # 'Features' is an alias for 'layer4' + elif output_key == 'Layer1.0 Shortcut': + # Shortcut outputs are not available from the Python ResNet forward method. + print(f"Warning: Shortcut output '{output_key}' cannot be directly fetched from Python ResNet. Skipping.") + continue + else: + print(f"Warning: Unknown output_key '{output_key}' in ResNet config for Python tensor mapping. Skipping.") + continue - # Load C++ ResNet outputs + if py_dict_key and py_dict_key in python_outputs: + python_tensor = python_outputs[py_dict_key] + else: + # DEBUG: Print info if key is not found + print(f"DEBUG RN_CMP: py_dict_key '{py_dict_key}' not found in python_outputs (keys: {list(python_outputs.keys())}) for output_key '{output_key}', sample {sample_idx}") + + if python_tensor is None: + print(f"Warning: Python tensor for {output_key} is None for sample {sample_idx}. Skipping.") + continue - # NEW: Debug directory listing - print(f"DEBUG: Listing contents of {cpp_output_resnet_dir_path} before loading tensors for sample {i}:") - try: - if cpp_output_resnet_dir_path.exists() and cpp_output_resnet_dir_path.is_dir(): - for item_path in cpp_output_resnet_dir_path.iterdir(): - print(f" - {item_path.name}") - else: - print(f" Directory {cpp_output_resnet_dir_path} does not exist or is not a directory.") - except Exception as e_list: - print(f" ERROR listing directory: {e_list}") - # END NEW # Removing this marker - - time.sleep(0.5) # INCREASED to 0.5s delay to allow filesystem to sync - - # Debug blocks for directory listing and direct open test were here and are now fully removed. - - cpp_layer1_path = os.path.join(cpp_output_resnet_dir, f'sample_{i}_layer1.pt') - cpp_layer2_path = os.path.join(cpp_output_resnet_dir, f'sample_{i}_layer2.pt') - cpp_layer3_path = os.path.join(cpp_output_resnet_dir, f'sample_{i}_layer3.pt') - cpp_layer4_path = os.path.join(cpp_output_resnet_dir, f'sample_{i}_layer4.pt') - cpp_features_path = os.path.join(cpp_output_resnet_dir, f'sample_{i}_features.pt') - cpp_layer1_0_shortcut_path = os.path.join(cpp_output_resnet_dir, f'sample_{i}_layer1_0_shortcut_output.pt') - cpp_maxpool_path = os.path.join(cpp_output_resnet_dir, f'sample_{i}_maxpool_output.pt') - cpp_conv1_path = os.path.join(cpp_output_resnet_dir, f'sample_{i}_conv1_output.pt') # ADDED - cpp_bn1_path = os.path.join(cpp_output_resnet_dir, f'sample_{i}_bn1_output.pt') # ADDED - cpp_relu1_path = os.path.join(cpp_output_resnet_dir, f'sample_{i}_relu1_output.pt') # ADDED - cpp_layer1_0_block_output_path = os.path.join(cpp_output_resnet_dir, f'sample_{i}_layer1_0_block_output.pt') # ADDED - - cpp_layer1_out = self.load_cpp_tensor(cpp_layer1_path, self.device) - cpp_layer2_out = self.load_cpp_tensor(cpp_layer2_path, self.device) - cpp_layer3_out = self.load_cpp_tensor(cpp_layer3_path, self.device) - cpp_layer4_out = self.load_cpp_tensor(cpp_layer4_path, self.device) - cpp_features_out = self.load_cpp_tensor(cpp_features_path, self.device) - cpp_layer1_0_shortcut_out = self.load_cpp_tensor(cpp_layer1_0_shortcut_path, self.device) - cpp_maxpool_out = self.load_cpp_tensor(cpp_maxpool_path, self.device) - cpp_conv1_out = self.load_cpp_tensor(cpp_conv1_path, self.device) # ADDED - cpp_bn1_out = self.load_cpp_tensor(cpp_bn1_path, self.device) # ADDED - cpp_relu1_out = self.load_cpp_tensor(cpp_relu1_path, self.device) # ADDED - cpp_layer1_0_block_output_tensor = self.load_cpp_tensor(cpp_layer1_0_block_output_path, self.device) # ADDED - - # Load the Python pre-BN conv1 output that was saved earlier - py_conv1_out_pre_bn_tensor = None - # Ensure self.py_resnet_output_dir is defined (it should be if the save operation worked) - if hasattr(self, 'py_resnet_output_dir') and self.py_resnet_output_dir: - py_conv1_out_pre_bn_path = self.py_resnet_output_dir / f'sample_{i}_conv1_output_py.pt' - if py_conv1_out_pre_bn_path.exists(): - try: - py_conv1_out_pre_bn_tensor = torch.load(str(py_conv1_out_pre_bn_path), map_location=self.device) - except Exception as e_load_py_conv1: - print(f"Error loading Python conv1_output_py (pre-BN) for sample {i}: {e_load_py_conv1}") - else: - print(f"Warning: self.py_resnet_output_dir not defined, cannot load py_conv1_output_py.pt for sample {i}") + # Save the Python tensor (always, for record-keeping) + os.makedirs(os.path.dirname(python_output_save_path), exist_ok=True) + torch.save(python_tensor.cpu(), python_output_save_path) + # print(f"Saved Python tensor for {output_key} (sample {sample_idx}) to {python_output_save_path}") - # Comparisons - self._compare_tensor_data(py_conv1_out_pre_bn_tensor, cpp_conv1_out, "ResNet Conv1 Output (Pre-BN)", i, current_errors) - self._compare_tensor_data(py_conv1_out, cpp_conv1_out, "ResNet Conv1", i, current_errors) - self._compare_tensor_data(py_bn1_out, cpp_bn1_out, "ResNet BN1", i, current_errors) - self._compare_tensor_data(py_relu1_out, cpp_relu1_out, "ResNet ReLU1", i, current_errors) - self._compare_tensor_data(py_maxpool_out, cpp_maxpool_out, "ResNet MaxPool", i, current_errors) - self._compare_tensor_data(py_layer1_out, cpp_layer1_out, "ResNet Layer1", i, current_errors) - self._compare_tensor_data(py_layer2_out, cpp_layer2_out, "ResNet Layer2", i, current_errors) - self._compare_tensor_data(py_layer3_out, cpp_layer3_out, "ResNet Layer3", i, current_errors) - self._compare_tensor_data(py_layer4_out, cpp_layer4_out, "ResNet Layer4", i, current_errors) - self._compare_tensor_data(py_features_out, cpp_features_out, "ResNet Features", i, current_errors) - self._compare_tensor_data(py_layer1_0_shortcut_out, cpp_layer1_0_shortcut_out, "ResNet Layer1.0 Shortcut", i, current_errors) - - if current_errors: self.all_comparison_stats[f"ResNet_Sample_{i}"] = current_errors + + # Perform comparison + self._compare_tensor_data(python_tensor.to(self.device) if python_tensor is not None else None, + cpp_tensor, + output_key, + sample_idx, + current_errors) # current_errors is populated in place + + # The line above was changed to handle python_tensor being None before .to(self.device) + # current_errors is populated by _compare_tensor_data directly. + # self.all_comparison_stats is updated after this inner loop completes for the sample. + + except FileNotFoundError: + print(f"Warning: C++ output file not found: {cpp_tensor_path}. Skipping for sample {sample_idx}, output {output_key}.") + # Populate NaNs for this missing C++ file + self._compare_tensor_data(None, None, output_key, sample_idx, current_errors) + except Exception as e: + print(f"Error comparing {output_key} for sample {sample_idx}: {e}") + import traceback + traceback.print_exc() + # Populate NaNs on error + self._compare_tensor_data(None, None, output_key, sample_idx, current_errors) + + # After processing all output_keys for this sample, store the collected current_errors + if current_errors: # If any comparisons were attempted (even if they resulted in NaNs) + self.all_comparison_stats[f"ResNet_Sample_{sample_idx}"] = current_errors + + # processed_samples += 1 # This variable is no longer used as loop is range-based + print("--- ResNet Output Comparison Complete ---") def generate_html_report(self): print("\nGenerating HTML report...") @@ -1200,73 +1330,247 @@ class ComparisonRunner: print("Preprocessed input comparison: ISSUES FOUND (details above).") def load_cpp_tensor(self, file_path_str, device, is_image=False): - file_path_obj = Path(file_path_str) # Convert to Path object early - - # Removed debug print: print(f"DEBUG: load_cpp_tensor: Checking existence of Path object: '{file_path_obj}' (from string '{file_path_str}')") - - if not file_path_obj.exists(): # Use Path object for exists check - print(f"ERROR: C++ tensor file not found (Path.exists check): {file_path_obj}") + if not os.path.exists(file_path_str): return None + + attempt_jit_extraction = False + loaded_object_from_direct_load = None + try: - # Try loading as a JIT ScriptModule first (common for exported tensors that might have attributes) - # This also handles plain tensors saved with torch.save if they are not ScriptModules - loaded_obj = torch.jit.load(str(file_path_obj), map_location=device) # Convert Path to str for torch.jit.load - actual_tensor = None - - if isinstance(loaded_obj, torch.jit.ScriptModule): - # Attempt to get tensor attribute directly, common for simple JIT-saved tensors - # Check for common weight/tensor attributes first - if hasattr(loaded_obj, 'tensor'): # Explicit "tensor" attribute - if isinstance(loaded_obj.tensor, torch.Tensor): - actual_tensor = loaded_obj.tensor - elif hasattr(loaded_obj, 'weight') and isinstance(loaded_obj.weight, torch.Tensor): # Common for conv/linear - actual_tensor = loaded_obj.weight - # Heuristic: if it has parameters and only one, assume that's the one. - elif len(list(loaded_obj.parameters())) == 1: - actual_tensor = list(loaded_obj.parameters())[0] - # Heuristic: if it has attributes that are tensors, try to find the primary one - else: - tensor_attrs = [getattr(loaded_obj, attr) for attr in dir(loaded_obj) if isinstance(getattr(loaded_obj, attr, None), torch.Tensor)] - if len(tensor_attrs) == 1: - actual_tensor = tensor_attrs[0] - elif len(tensor_attrs) > 1: - # If multiple tensor attributes, try to find one that matches common patterns or is simply 'output' - if hasattr(loaded_obj, 'output') and isinstance(loaded_obj.output, torch.Tensor): - actual_tensor = loaded_obj.output - else: # Heuristic: take the largest tensor if multiple exist and no clear primary one - actual_tensor = max(tensor_attrs, key=lambda t: t.numel()) - # print(f"WARNING: Multiple tensor attributes in ScriptModule from {file_path_obj}, using largest: {actual_tensor.shape}") - - if actual_tensor is None: - print(f"ERROR: C++ tensor from {file_path_obj} is a ScriptModule, but couldn't extract a single tensor. StateDict keys: {list(loaded_obj.state_dict().keys()) if hasattr(loaded_obj, 'state_dict') else 'N/A'}") - return None + # Attempt direct load first + loaded_object_from_direct_load = torch.load(file_path_str, map_location=device, weights_only=False) + + if isinstance(loaded_object_from_direct_load, torch.Tensor): + return loaded_object_from_direct_load.to(device) # Successfully loaded a tensor directly + else: + # Loaded something, but it's not a tensor. It's likely a JIT module. + attempt_jit_extraction = True + print(f"INFO: Initial torch.load of {file_path_str} yielded a non-Tensor (type: {type(loaded_object_from_direct_load)}). Will attempt JIT extraction.") + + except Exception as e_initial_load: + # Initial load failed (e.g., it's a JIT module not readable by plain torch.load, or other error) + attempt_jit_extraction = True + print(f"INFO: Initial torch.load failed for {file_path_str}: {e_initial_load}. Will attempt JIT extraction.") + + # Common JIT tensor extraction logic + def extract_tensor_from_jit_module(module_path, jit_loaded_obj, dev): + print(f"DEBUG JIT EXTRACTION: For {module_path}, loaded_obj type: {type(jit_loaded_obj)}") + print(f"DEBUG JIT EXTRACTION: str(loaded_obj): {str(jit_loaded_obj)}") + # print(f"DEBUG JIT EXTRACTION: dir(loaded_obj): {dir(jit_loaded_obj)}") # Verbose - elif isinstance(loaded_obj, torch.Tensor): - actual_tensor = loaded_obj + extracted_tensor = None + + # 1. Try calling if 'forward' method exists + if hasattr(jit_loaded_obj, 'forward') and callable(getattr(jit_loaded_obj, 'forward')): + print(f"DEBUG JIT EXTRACTION: Attempting jit_loaded_obj.forward()") + try: + extracted_tensor = jit_loaded_obj.forward() + if not isinstance(extracted_tensor, torch.Tensor): + print(f"DEBUG JIT EXTRACTION: jit_loaded_obj.forward() did not return a tensor, got {type(extracted_tensor)}. Trying with dummy input.") + extracted_tensor = None # Reset before trying with dummy + try: + print(f"DEBUG JIT EXTRACTION: Attempting jit_loaded_obj.forward(torch.empty(0))") + extracted_tensor = jit_loaded_obj.forward(torch.empty(0, device=dev)) + if not isinstance(extracted_tensor, torch.Tensor): + print(f"DEBUG JIT EXTRACTION: jit_loaded_obj.forward(dummy) also did not return a tensor, got {type(extracted_tensor)}") + extracted_tensor = None + except Exception as e_fwd_dummy: + print(f"DEBUG JIT EXTRACTION: Error calling jit_loaded_obj.forward(dummy): {e_fwd_dummy}") + extracted_tensor = None + except Exception as e_fwd: # This covers cases where forward exists but call fails (e.g. signature mismatch) + print(f"DEBUG JIT EXTRACTION: Error calling jit_loaded_obj.forward(): {e_fwd}. Trying with dummy input as fallback.") + extracted_tensor = None # Reset + try: + print(f"DEBUG JIT EXTRACTION: Attempting jit_loaded_obj.forward(torch.empty(0)) after error.") + extracted_tensor = jit_loaded_obj.forward(torch.empty(0, device=dev)) + if not isinstance(extracted_tensor, torch.Tensor): + print(f"DEBUG JIT EXTRACTION: jit_loaded_obj.forward(dummy) after error also did not return a tensor, got {type(extracted_tensor)}") + extracted_tensor = None + except Exception as e_fwd_dummy_after_error: + print(f"DEBUG JIT EXTRACTION: Error calling jit_loaded_obj.forward(dummy) after initial fwd error: {e_fwd_dummy_after_error}") + extracted_tensor = None + + # 1b. Try calling the module directly if forward attribute exists (covers some cases) + # This is after trying explicit .forward() as direct call might have side effects or different interpretation + if extracted_tensor is None and callable(jit_loaded_obj) and hasattr(jit_loaded_obj, 'forward'): + print(f"DEBUG JIT EXTRACTION: Attempting callable jit_loaded_obj()") + try: + extracted_tensor = jit_loaded_obj() + if not isinstance(extracted_tensor, torch.Tensor): + print(f"DEBUG JIT EXTRACTION: callable jit_loaded_obj() did not return a tensor, got {type(extracted_tensor)}") + extracted_tensor = None + except Exception as e_call_obj: + print(f"DEBUG JIT EXTRACTION: Error calling callable jit_loaded_obj() (it had a forward attr): {e_call_obj}") + extracted_tensor = None + + + # 2. Check if 'forward' attribute *itself* is a tensor + if extracted_tensor is None and hasattr(jit_loaded_obj, 'forward') and isinstance(getattr(jit_loaded_obj, 'forward'), torch.Tensor): + print(f"DEBUG JIT EXTRACTION: jit_loaded_obj.forward IS a tensor.") + extracted_tensor = getattr(jit_loaded_obj, 'forward') + + # 3. Look for common direct tensor attributes + if extracted_tensor is None and hasattr(jit_loaded_obj, 'tensor') and isinstance(getattr(jit_loaded_obj, 'tensor'), torch.Tensor): + print(f"DEBUG JIT EXTRACTION: Found tensor in jit_loaded_obj.tensor") + extracted_tensor = jit_loaded_obj.tensor + + if extracted_tensor is None and hasattr(jit_loaded_obj, 'data') and isinstance(getattr(jit_loaded_obj, 'data'), torch.Tensor): + print(f"DEBUG JIT EXTRACTION: Found tensor in jit_loaded_obj.data") + extracted_tensor = jit_loaded_obj.data + + if extracted_tensor is None and hasattr(jit_loaded_obj, 'tensor_data') and isinstance(getattr(jit_loaded_obj, 'tensor_data'), torch.Tensor): + print(f"DEBUG JIT EXTRACTION: Found tensor in jit_loaded_obj.tensor_data") + extracted_tensor = jit_loaded_obj.tensor_data + + # 4. Iterate through named_buffers (common for wrapped tensors) + if extracted_tensor is None: + print(f"DEBUG JIT EXTRACTION: Iterating named_buffers for a tensor...") + try: + for name, buffer_tensor in jit_loaded_obj.named_buffers(): + if isinstance(buffer_tensor, torch.Tensor): + print(f"DEBUG JIT EXTRACTION: Found tensor in named_buffers: {name}") + extracted_tensor = buffer_tensor + break + except Exception as e_buffers: + print(f"DEBUG JIT EXTRACTION: Error iterating named_buffers: {e_buffers}") + + + # 5. Iterate through named_parameters + if extracted_tensor is None: + print(f"DEBUG JIT EXTRACTION: Iterating named_parameters for a tensor...") + try: + for name, param_tensor in jit_loaded_obj.named_parameters(): + if isinstance(param_tensor, torch.Tensor): + print(f"DEBUG JIT EXTRACTION: Found tensor in named_parameters: {name}") + extracted_tensor = param_tensor + break + except Exception as e_params: + print(f"DEBUG JIT EXTRACTION: Error iterating named_parameters: {e_params}") + + # 6. Iterate through members (attributes) using inspect.getmembers - potentially fragile + if extracted_tensor is None: + print(f"DEBUG JIT EXTRACTION: Attempting to iterate members using inspect.getmembers...") + try: + for name, member_obj in inspect.getmembers(jit_loaded_obj): + if isinstance(member_obj, torch.Tensor): + # Avoid re-picking already checked common names if they are somehow also members + if name not in ['tensor', 'data', 'tensor_data', 'forward']: + print(f"DEBUG JIT EXTRACTION: Found tensor in member (inspect.getmembers): {name}") + extracted_tensor = member_obj + break + except RuntimeError as e_inspect: + # Specifically catch RuntimeError that was observed: "Method 'forward' is not defined" + print(f"DEBUG JIT EXTRACTION: inspect.getmembers failed with RuntimeError: {e_inspect}. Skipping this method.") + except Exception as e_inspect_other: + print(f"DEBUG JIT EXTRACTION: inspect.getmembers failed with other Exception: {e_inspect_other}. Skipping this method.") + + # 7. Iterate through named_children and inspect + if extracted_tensor is None: + print(f"DEBUG JIT EXTRACTION: Iterating named_children...") + try: + for child_name, child_module in jit_loaded_obj.named_children(): + print(f"DEBUG JIT EXTRACTION: Inspecting child: {child_name} of type {type(child_module)}") + # Try common ways to get tensor from child + if hasattr(child_module, 'forward') and callable(getattr(child_module, 'forward')) : + try: + temp_tensor = child_module.forward() + if isinstance(temp_tensor, torch.Tensor): + print(f"DEBUG JIT EXTRACTION: Found tensor by calling child {child_name}.forward()") + extracted_tensor = temp_tensor; break + except: pass + + if extracted_tensor is None and callable(child_module) and hasattr(child_module, 'forward'): # Added hasattr forward here + try: + temp_tensor = child_module() + if isinstance(temp_tensor, torch.Tensor): + print(f"DEBUG JIT EXTRACTION: Found tensor by calling child {child_name}()") + extracted_tensor = temp_tensor; break + except: pass + + if extracted_tensor is None and hasattr(child_module, 'forward') and isinstance(getattr(child_module, 'forward'), torch.Tensor): + extracted_tensor = getattr(child_module, 'forward') + print(f"DEBUG JIT EXTRACTION: Found tensor in child {child_name}.forward (as attribute)") + break + + if extracted_tensor is None and hasattr(child_module, 'tensor') and isinstance(getattr(child_module, 'tensor'), torch.Tensor): + extracted_tensor = child_module.tensor + print(f"DEBUG JIT EXTRACTION: Found tensor in child {child_name}.tensor") + break + if extracted_tensor is None and hasattr(child_module, 'data') and isinstance(getattr(child_module, 'data'), torch.Tensor): + extracted_tensor = child_module.data + print(f"DEBUG JIT EXTRACTION: Found tensor in child {child_name}.data") + break + if extracted_tensor is None and hasattr(child_module, 'tensor_data') and isinstance(getattr(child_module, 'tensor_data'), torch.Tensor): + extracted_tensor = child_module.tensor_data + print(f"DEBUG JIT EXTRACTION: Found tensor in child {child_name}.tensor_data") + break + + if extracted_tensor is None: # Check general members of child if direct attributes fail + try: + for name, member_obj in inspect.getmembers(child_module): + if isinstance(member_obj, torch.Tensor): + print(f"DEBUG JIT EXTRACTION: Found tensor in member {name} of child {child_name}") + extracted_tensor = member_obj; break + if extracted_tensor is not None: break + except Exception as e_child_inspect: + print(f"DEBUG JIT EXTRACTION: inspect.getmembers on child {child_name} failed: {e_child_inspect}") + + if extracted_tensor is not None: + print(f"DEBUG JIT EXTRACTION: Tensor found in a child module.") + else: + print(f"DEBUG JIT EXTRACTION: Tensor not found in direct children.") + except Exception as e_children: + print(f"DEBUG JIT EXTRACTION: Error iterating named_children: {e_children}") + + + if isinstance(extracted_tensor, torch.Tensor): + print(f"DEBUG JIT EXTRACTION: Successfully extracted tensor of shape {extracted_tensor.shape} from JIT module {module_path}") + return extracted_tensor.to(dev) else: - print(f"ERROR: C++ tensor loaded from {file_path_obj} with torch.jit.load is not a Tensor or ScriptModule. Type: {type(loaded_obj)}") + print(f"Warning: JIT EXTRACTION: Could not extract tensor from JIT module: {module_path}. Final extracted_type: {type(extracted_tensor)}. THIS FILE WILL BE SKIPPED.") return None + + if attempt_jit_extraction: + # If primary_jit_load_needed was true, loaded_object_from_direct_load might be the JIT module already. + # Otherwise, we need to load it with torch.jit.load. + # The critical part is that C++ outputs are *always* JIT modules now if not raw tensors. - tensor = actual_tensor.to(device).float() # Ensure tensor is on the correct device and float - return tensor - except Exception as e: - # If torch.jit.load fails (e.g. it's a plain tensor not loadable by JIT), try torch.load - # This also catches errors from the processing above if actual_tensor remains None - # print(f"INFO: torch.jit.load failed for {file_path_obj} ({e}), attempting torch.load as fallback.") - try: - tensor = torch.load(str(file_path_obj), map_location=device) # Convert Path to str for torch.load - if not isinstance(tensor, torch.Tensor): - print(f"ERROR: Fallback torch.load for {file_path_obj} did not return a tensor. Type: {type(tensor)}") + jit_module_to_process = None + if loaded_object_from_direct_load is not None and not isinstance(loaded_object_from_direct_load, torch.Tensor): + # This means torch.load succeeded but returned a JIT module directly + # (common for files saved with torch.jit.save that are actually modules) + print(f"DEBUG JIT: Using object from initial torch.load (type: {type(loaded_object_from_direct_load)}) for JIT extraction for {file_path_str}.") + jit_module_to_process = loaded_object_from_direct_load + else: + # This means initial torch.load either failed OR it was an image and returned a JIT module (handled above), + # OR it was not an image and returned a tensor (already returned). + # So, if we are here, it means torch.load failed, or we need to fresh load as JIT. + try: + print(f"DEBUG JIT: Attempting torch.jit.load for {file_path_str} as fallback/primary JIT path.") + jit_module_to_process = torch.jit.load(file_path_str, map_location=device) + except Exception as e_jit_load_explicit: + print(f"Error: torch.jit.load also failed for {file_path_str}: {e_jit_load_explicit}. Traceback: {traceback.format_exc()}. SKIPPING.") return None - return tensor.to(device).float() # Ensure tensor is on the correct device and float - except Exception as e2: - print(f"ERROR: Failed to load C++ tensor from {file_path_obj}. JIT load error: {e}. Torch load error: {e2}") - import traceback - traceback.print_exc() + + if jit_module_to_process is not None: + final_tensor = extract_tensor_from_jit_module(file_path_str, jit_module_to_process, device) + if final_tensor is not None: + return final_tensor + else: + print(f"Warning: JIT extraction path for {file_path_str} (using {type(jit_module_to_process)}) failed to extract tensor. SKIPPING file.") + return None + else: + # This case should be rare if torch.jit.load was attempted and failed, as it would have returned None above. + print(f"Warning: jit_module_to_process is None for {file_path_str} before calling extraction. SKIPPING file.") return None - def _compare_tensor_data(self, tensor1, tensor2, name, sample_idx, current_errors): - """Compare two tensors and return error metrics.""" + # If we reach here, it means initial torch.load returned a tensor (and it was returned), + # or all attempts to load and extract have failed. + print(f"Warning: load_cpp_tensor is returning None for {file_path_str} after all attempts. This indicates an issue with file content or loading logic for this specific file type when is_image={is_image}.") + return None + + def _compare_tensor_data(self, tensor1, tensor2, name, sample_idx, current_errors_dict_to_populate): + """Compare two tensors and return error metrics. Modifies current_errors_dict_to_populate in place.""" num_metrics = 11 # mae, max_err, diff_arr, mean_py_val, std_abs_err, l2_py, l2_cpp, l2_diff, cos_sim, pearson, mre nan_metrics_tuple = ( float('nan'), float('nan'), [], float('nan'), float('nan'), # Original 5 @@ -1280,27 +1584,26 @@ class ComparisonRunner: t1_cpu_temp = tensor1.cpu().detach().numpy().astype(np.float32) py_mean = np.mean(t1_cpu_temp) py_l2 = np.linalg.norm(t1_cpu_temp.flatten()) - # If only tensor2 is None, we can't calculate C++ l2 or comparison metrics - # If only tensor1 is None, py_mean and py_l2 remain NaN. - current_errors[name] = ( + # Populate current_errors_dict_to_populate directly + current_errors_dict_to_populate[name] = ( float('nan'), float('nan'), [], py_mean, float('nan'), py_l2, float('nan'), float('nan'), float('nan'), float('nan'), float('nan') ) print(f"Warning: Cannot compare '{name}' for sample {sample_idx}, one or both tensors are None.") - return + return # Return None as the function modifies dict in place t1_cpu = tensor1.cpu().detach().numpy().astype(np.float32) t2_cpu = tensor2.cpu().detach().numpy().astype(np.float32) if t1_cpu.shape != t2_cpu.shape: print(f"Warning: Shape mismatch for '{name}' sample {sample_idx}. Py: {t1_cpu.shape}, Cpp: {t2_cpu.shape}. Skipping most comparisons.") - current_errors[name] = ( + current_errors_dict_to_populate[name] = ( float('nan'), float('nan'), [], np.mean(t1_cpu), float('nan'), # MAE, MaxErr, diff_arr, MeanPy, StdAbsErr np.linalg.norm(t1_cpu.flatten()), np.linalg.norm(t2_cpu.flatten()), float('nan'), # L2Py, L2Cpp, L2Diff float('nan'), float('nan'), float('nan') # CosSim, Pearson, MRE ) - return + return # Return None # All calculations from here assume shapes match and tensors are not None t1_flat = t1_cpu.flatten() @@ -1355,7 +1658,8 @@ class ComparisonRunner: # Using (abs(t1_cpu) + epsilon) in denominator handles this. mean_rel_err = np.mean(abs_diff_elements / (np.abs(t1_cpu) + epsilon_rel_err)) - current_errors[name] = ( + # Populate current_errors_dict_to_populate directly + current_errors_dict_to_populate[name] = ( mae, max_err, diff_arr_for_hist, mean_py_val, std_abs_err, l2_norm_py, l2_norm_cpp, l2_norm_diff, cosine_sim, pearson_corr, mean_rel_err ) diff --git a/test/test_models.cpp b/test/test_models.cpp index 912167c..9415cfc 100644 --- a/test/test_models.cpp +++ b/test/test_models.cpp @@ -127,7 +127,10 @@ int main(int argc, char* argv[]) { std::vector output_layers_resnet = { "conv1_output", "bn1_output", "relu1_output", "maxpool_output", "layer1", "layer2", "layer3", "layer4", "features", - "layer1_0_shortcut_output", "layer1_0_block_output" + "layer1_0_shortcut_output", "layer1_0_block_output", + "debug_resnet_conv1_output_for_bn1_input", + // New BN1 intermediate outputs + "bn1_centered_x", "bn1_variance_plus_eps", "bn1_inv_std", "bn1_normalized_x" }; resnet_model_opt.emplace(cimp::resnet::resnet50(resnet_weights_path, output_layers_resnet, device)); (*resnet_model_opt)->to(device); @@ -291,6 +294,14 @@ int main(int argc, char* argv[]) { } else { std::cerr << " Skipping BBRegressor predict_iou for sample " << i << " (iou_feats or mod_vectors empty)." << std::endl; } + + // Save debug intermediate outputs + torch::Tensor cpp_conv3_1t_out = (*bb_regressor_model_opt_wrapped).debug_get_conv3_1t_output(resnet_outputs["layer2"].clone()); + save_tensor_to_file(cpp_conv3_1t_out, (bb_reg_out_dir / (sample_suffix + "_debug_conv3_1t_output.pt")).string()); + + torch::Tensor cpp_conv4_1t_out = (*bb_regressor_model_opt_wrapped).debug_get_conv4_1t_output(resnet_outputs["layer3"].clone()); + save_tensor_to_file(cpp_conv4_1t_out, (bb_reg_out_dir / (sample_suffix + "_debug_conv4_1t_output.pt")).string()); + std::cout << "BBRegressor processing done for sample " << i << std::endl; } catch (const std::exception& e) { std::cerr << "Error during BBRegressor processing for sample " << i << ": " << e.what() << std::endl;