Browse Source

Fix ResNet BatchNorm parameter loading and enhance BN1 debugging. Corrected loading of BatchNorm running_mean, running_var, and num_batches_tracked parameters in C++ ResNet BottleneckImpl and ResNetImpl to use direct member assignment instead of named_buffers(). This resolved discrepancies with Python's BatchNorm behavior. Added detailed intermediate output saving for bn1 in both C++ ResNet and Python comparison script to facilitate debugging. Ensured Python comparison script correctly loads and compares these new ResNet intermediate tensors. This series of changes led to numerical equivalence for ResNet conv1, bn1, and subsequently layer1-4 outputs between Python and C++.

resnet
mht 2 months ago
parent
commit
c0e5aa7d0a
  1. 161
      cimp/resnet/resnet.cpp
  2. 854
      test/compare_models.py
  3. 13
      test/test_models.cpp

161
cimp/resnet/resnet.cpp

@ -71,9 +71,9 @@ BottleneckImpl::BottleneckImpl(const std::string& base_weights_dir,
conv1->weight = load_named_tensor(base_weights_dir, block_param_prefix + "conv1.weight", device);
bn1->weight = load_named_tensor(base_weights_dir, block_param_prefix + "bn1.weight", device);
bn1->bias = load_named_tensor(base_weights_dir, block_param_prefix + "bn1.bias", device);
bn1->named_buffers()["running_mean"] = load_named_tensor(base_weights_dir, block_param_prefix + "bn1.running_mean", device);
bn1->named_buffers()["running_var"] = load_named_tensor(base_weights_dir, block_param_prefix + "bn1.running_var", device);
bn1->named_buffers()["num_batches_tracked"] = load_named_tensor(base_weights_dir, block_param_prefix + "bn1.num_batches_tracked", device);
bn1->running_mean = load_named_tensor(base_weights_dir, block_param_prefix + "bn1.running_mean", device);
bn1->running_var = load_named_tensor(base_weights_dir, block_param_prefix + "bn1.running_var", device);
bn1->num_batches_tracked = load_named_tensor(base_weights_dir, block_param_prefix + "bn1.num_batches_tracked", device);
register_module("conv1", conv1);
register_module("bn1", bn1);
@ -83,9 +83,9 @@ BottleneckImpl::BottleneckImpl(const std::string& base_weights_dir,
conv2->weight = load_named_tensor(base_weights_dir, block_param_prefix + "conv2.weight", device);
bn2->weight = load_named_tensor(base_weights_dir, block_param_prefix + "bn2.weight", device);
bn2->bias = load_named_tensor(base_weights_dir, block_param_prefix + "bn2.bias", device);
bn2->named_buffers()["running_mean"] = load_named_tensor(base_weights_dir, block_param_prefix + "bn2.running_mean", device);
bn2->named_buffers()["running_var"] = load_named_tensor(base_weights_dir, block_param_prefix + "bn2.running_var", device);
bn2->named_buffers()["num_batches_tracked"] = load_named_tensor(base_weights_dir, block_param_prefix + "bn2.num_batches_tracked", device);
bn2->running_mean = load_named_tensor(base_weights_dir, block_param_prefix + "bn2.running_mean", device);
bn2->running_var = load_named_tensor(base_weights_dir, block_param_prefix + "bn2.running_var", device);
bn2->num_batches_tracked = load_named_tensor(base_weights_dir, block_param_prefix + "bn2.num_batches_tracked", device);
register_module("conv2", conv2);
register_module("bn2", bn2);
@ -95,9 +95,9 @@ BottleneckImpl::BottleneckImpl(const std::string& base_weights_dir,
conv3->weight = load_named_tensor(base_weights_dir, block_param_prefix + "conv3.weight", device);
bn3->weight = load_named_tensor(base_weights_dir, block_param_prefix + "bn3.weight", device);
bn3->bias = load_named_tensor(base_weights_dir, block_param_prefix + "bn3.bias", device);
bn3->named_buffers()["running_mean"] = load_named_tensor(base_weights_dir, block_param_prefix + "bn3.running_mean", device);
bn3->named_buffers()["running_var"] = load_named_tensor(base_weights_dir, block_param_prefix + "bn3.running_var", device);
bn3->named_buffers()["num_batches_tracked"] = load_named_tensor(base_weights_dir, block_param_prefix + "bn3.num_batches_tracked", device);
bn3->running_mean = load_named_tensor(base_weights_dir, block_param_prefix + "bn3.running_mean", device);
bn3->running_var = load_named_tensor(base_weights_dir, block_param_prefix + "bn3.running_var", device);
bn3->num_batches_tracked = load_named_tensor(base_weights_dir, block_param_prefix + "bn3.num_batches_tracked", device);
register_module("conv3", conv3);
register_module("bn3", bn3);
@ -118,17 +118,85 @@ BottleneckImpl::BottleneckImpl(const std::string& base_weights_dir,
// Forward method implementation for BottleneckImpl
torch::Tensor BottleneckImpl::forward(torch::Tensor x) {
torch::Tensor identity = x;
torch::ScalarType original_dtype = x.scalar_type();
// conv1 -> bn1 -> relu
x = conv1->forward(x);
x = bn1->forward(x);
if (!this->is_training() && bn1) {
const auto& bn_module = *bn1;
torch::Tensor input_double = x.to(torch::kFloat64);
torch::Tensor weight_double = bn_module.weight.defined() ? bn_module.weight.to(torch::kFloat64) : torch::Tensor();
torch::Tensor bias_double = bn_module.bias.defined() ? bn_module.bias.to(torch::kFloat64) : torch::Tensor();
torch::Tensor running_mean_double = bn_module.running_mean.to(torch::kFloat64);
torch::Tensor running_var_double = bn_module.running_var.to(torch::kFloat64);
double eps_double = bn_module.options.eps();
auto c = x.size(1);
running_mean_double = running_mean_double.reshape({1, c, 1, 1});
running_var_double = running_var_double.reshape({1, c, 1, 1});
if (weight_double.defined()) weight_double = weight_double.reshape({1, c, 1, 1});
if (bias_double.defined()) bias_double = bias_double.reshape({1, c, 1, 1});
torch::Tensor out_double = (input_double - running_mean_double) / (torch::sqrt(running_var_double + eps_double));
if (weight_double.defined()) out_double = out_double * weight_double;
if (bias_double.defined()) out_double = out_double + bias_double;
x = out_double.to(original_dtype);
} else if (bn1) {
x = bn1->forward(x);
}
x = relu->forward(x);
// conv2 -> bn2 -> relu
x = conv2->forward(x);
x = bn2->forward(x);
if (!this->is_training() && bn2) {
const auto& bn_module = *bn2;
torch::Tensor input_double = x.to(torch::kFloat64);
torch::Tensor weight_double = bn_module.weight.defined() ? bn_module.weight.to(torch::kFloat64) : torch::Tensor();
torch::Tensor bias_double = bn_module.bias.defined() ? bn_module.bias.to(torch::kFloat64) : torch::Tensor();
torch::Tensor running_mean_double = bn_module.running_mean.to(torch::kFloat64);
torch::Tensor running_var_double = bn_module.running_var.to(torch::kFloat64);
double eps_double = bn_module.options.eps();
auto c = x.size(1);
running_mean_double = running_mean_double.reshape({1, c, 1, 1});
running_var_double = running_var_double.reshape({1, c, 1, 1});
if (weight_double.defined()) weight_double = weight_double.reshape({1, c, 1, 1});
if (bias_double.defined()) bias_double = bias_double.reshape({1, c, 1, 1});
torch::Tensor out_double = (input_double - running_mean_double) / (torch::sqrt(running_var_double + eps_double));
if (weight_double.defined()) out_double = out_double * weight_double;
if (bias_double.defined()) out_double = out_double + bias_double;
x = out_double.to(original_dtype);
} else if (bn2) {
x = bn2->forward(x);
}
x = relu->forward(x);
// conv3 -> bn3
x = conv3->forward(x);
x = bn3->forward(x);
if (!this->is_training() && bn3) {
const auto& bn_module = *bn3;
torch::Tensor input_double = x.to(torch::kFloat64);
torch::Tensor weight_double = bn_module.weight.defined() ? bn_module.weight.to(torch::kFloat64) : torch::Tensor();
torch::Tensor bias_double = bn_module.bias.defined() ? bn_module.bias.to(torch::kFloat64) : torch::Tensor();
torch::Tensor running_mean_double = bn_module.running_mean.to(torch::kFloat64);
torch::Tensor running_var_double = bn_module.running_var.to(torch::kFloat64);
double eps_double = bn_module.options.eps();
auto c = x.size(1);
running_mean_double = running_mean_double.reshape({1, c, 1, 1});
running_var_double = running_var_double.reshape({1, c, 1, 1});
if (weight_double.defined()) weight_double = weight_double.reshape({1, c, 1, 1});
if (bias_double.defined()) bias_double = bias_double.reshape({1, c, 1, 1});
torch::Tensor out_double = (input_double - running_mean_double) / (torch::sqrt(running_var_double + eps_double));
if (weight_double.defined()) out_double = out_double * weight_double;
if (bias_double.defined()) out_double = out_double + bias_double;
x = out_double.to(original_dtype);
} else if (bn3) {
x = bn3->forward(x);
}
if (this->projection_shortcut) {
identity = this->projection_shortcut->forward(identity);
@ -150,18 +218,16 @@ ResNetImpl::ResNetImpl(const std::string& base_weights_dir_path,
conv1 = torch::nn::Conv2d(torch::nn::Conv2dOptions(3, 64, 7).stride(2).padding(3).bias(false));
bn1 = torch::nn::BatchNorm2d(torch::nn::BatchNorm2dOptions(64).eps(static_cast<float>(1e-5)).momentum(0.1).affine(true).track_running_stats(true));
this->conv1->weight = load_named_tensor(this->_base_weights_dir, "conv1.weight", device);
// Directly assign to the public member tensors of the bn1 module
this->bn1->weight = load_named_tensor(this->_base_weights_dir, "bn1.weight", device);
this->bn1->bias = load_named_tensor(this->_base_weights_dir, "bn1.bias", device);
this->bn1->named_buffers()["running_mean"] = load_named_tensor(this->_base_weights_dir, "bn1.running_mean", device);
this->bn1->named_buffers()["running_var"] = load_named_tensor(this->_base_weights_dir, "bn1.running_var", device);
this->bn1->named_buffers()["num_batches_tracked"] = load_named_tensor(this->_base_weights_dir, "bn1.num_batches_tracked", device);
register_module("conv1", conv1);
register_module("bn1", bn1);
this->bn1->running_mean = load_named_tensor(this->_base_weights_dir, "bn1.running_mean", device);
this->bn1->running_var = load_named_tensor(this->_base_weights_dir, "bn1.running_var", device);
this->bn1->num_batches_tracked = load_named_tensor(this->_base_weights_dir, "bn1.num_batches_tracked", device);
std::cout << "CPP ResNetImpl::bn1 running_mean sum: " << std::fixed << std::setprecision(10) << this->bn1->running_mean.sum().item<double>() << std::endl;
std::cout << "CPP ResNetImpl::bn1 running_var sum: " << std::fixed << std::setprecision(10) << this->bn1->running_var.sum().item<double>() << std::endl;
register_module("conv1", conv1);
register_module("bn1", bn1); // bn1 is already populated correctly
relu = torch::nn::ReLU(torch::nn::ReLUOptions().inplace(true));
maxpool = torch::nn::MaxPool2d(torch::nn::MaxPool2dOptions(3).stride(2).padding(1));
@ -195,9 +261,9 @@ torch::nn::Sequential ResNetImpl::_make_layer(int64_t planes_for_block, int64_t
conv_down->weight = load_named_tensor(this->_base_weights_dir, ds_block_prefix + "0.weight", device);
bn_down->weight = load_named_tensor(this->_base_weights_dir, ds_block_prefix + "1.weight", device);
bn_down->bias = load_named_tensor(this->_base_weights_dir, ds_block_prefix + "1.bias", device);
bn_down->named_buffers()["running_mean"] = load_named_tensor(this->_base_weights_dir, ds_block_prefix + "1.running_mean", device);
bn_down->named_buffers()["running_var"] = load_named_tensor(this->_base_weights_dir, ds_block_prefix + "1.running_var", device);
bn_down->named_buffers()["num_batches_tracked"] = load_named_tensor(this->_base_weights_dir, ds_block_prefix + "1.num_batches_tracked", device);
bn_down->running_mean = load_named_tensor(this->_base_weights_dir, ds_block_prefix + "1.running_mean", device);
bn_down->running_var = load_named_tensor(this->_base_weights_dir, ds_block_prefix + "1.running_var", device);
bn_down->num_batches_tracked = load_named_tensor(this->_base_weights_dir, ds_block_prefix + "1.num_batches_tracked", device);
ds_seq->push_back(conv_down);
ds_seq->push_back(bn_down);
@ -229,9 +295,50 @@ std::map<std::string, torch::Tensor> ResNetImpl::forward(torch::Tensor x) {
};
x = conv1->forward(x);
if (should_output("conv1_output")) outputs["conv1_output"] = x;
x = bn1->forward(x);
if (should_output("conv1_output")) outputs["conv1_output"] = x;
if (should_output("debug_resnet_conv1_output_for_bn1_input")) {
outputs["debug_resnet_conv1_output_for_bn1_input"] = x.clone();
}
torch::ScalarType original_dtype_resnet_bn1 = x.scalar_type();
// Apply bn1
if (!this->is_training() && bn1) {
const auto& bn_module = *bn1;
torch::Tensor input_double = x.to(torch::kFloat64);
torch::Tensor weight_double = bn_module.weight.defined() ? bn_module.weight.to(torch::kFloat64) : torch::Tensor();
torch::Tensor bias_double = bn_module.bias.defined() ? bn_module.bias.to(torch::kFloat64) : torch::Tensor();
torch::Tensor running_mean_double = bn_module.running_mean.to(torch::kFloat64);
torch::Tensor running_var_double = bn_module.running_var.to(torch::kFloat64);
double eps_double = bn_module.options.eps();
auto c = x.size(1);
torch::Tensor reshaped_running_mean = running_mean_double.reshape({1, c, 1, 1});
torch::Tensor reshaped_running_var = running_var_double.reshape({1, c, 1, 1});
torch::Tensor reshaped_weight = weight_double.defined() ? weight_double.reshape({1, c, 1, 1}) : torch::Tensor();
torch::Tensor reshaped_bias = bias_double.defined() ? bias_double.reshape({1, c, 1, 1}) : torch::Tensor();
torch::Tensor centered_x = input_double - reshaped_running_mean;
if (should_output("bn1_centered_x")) outputs["bn1_centered_x"] = centered_x.clone();
torch::Tensor variance_plus_eps = reshaped_running_var + eps_double;
if (should_output("bn1_variance_plus_eps")) outputs["bn1_variance_plus_eps"] = variance_plus_eps.clone();
torch::Tensor inv_std = torch::rsqrt(variance_plus_eps); // Using rsqrt for potential match
if (should_output("bn1_inv_std")) outputs["bn1_inv_std"] = inv_std.clone();
torch::Tensor normalized_x = centered_x * inv_std;
if (should_output("bn1_normalized_x")) outputs["bn1_normalized_x"] = normalized_x.clone();
torch::Tensor out_double = normalized_x;
if (reshaped_weight.defined()) out_double = out_double * reshaped_weight;
if (reshaped_bias.defined()) out_double = out_double + reshaped_bias;
x = out_double.to(original_dtype_resnet_bn1);
} else if (bn1) { // Training mode or if manual is disabled
x = bn1->forward(x);
}
// End apply bn1
if (should_output("bn1_output")) outputs["bn1_output"] = x;
x = relu->forward(x);

854
test/compare_models.py

@ -37,13 +37,23 @@ def get_model_configs(root_dir_param):
return {
# ... (existing model_configs definitions)
'ResNet': {
'python_model_loader': lambda: DiMPTorchScriptWrapper(os.path.join(root_dir_param, 'pytracking_models/dimp50_ Ausdruck_ep0050.pth.tar')),
'python_model_loader': lambda: DiMPTorchScriptWrapper(os.path.join(root_dir_param, 'pytracking_models/dimp50_ausdruck_ep0050.pth.tar')),
'cpp_output_subdir': 'resnet',
'python_output_subdir': 'resnet_py', # If Python outputs are saved separately
'python_output_subdir': 'resnet_py',
'outputs_to_compare': {
'Conv1': 'conv1_output.pt', # ADDED
'BN1': 'bn1_output.pt', # ADDED
'ReLU1': 'relu1_output.pt', # ADDED for completeness before MaxPool
'Conv1': ('conv1_output.pt', 'conv1'),
'Debug ResNet Conv1->BN1 Input': ('debug_resnet_conv1_output_for_bn1_input.pt', 'conv1_pre_bn'),
# BN1 final output (manual C++ vs manual Python pre-ReLU)
'BN1': ('bn1_output.pt', 'bn1_post_relu_pre'),
# BN1 Intermediate comparisons
'BN1 Centered X': ('bn1_centered_x.pt', 'bn1_centered_x_py'),
'BN1 Var+Eps': ('bn1_variance_plus_eps.pt', 'bn1_variance_plus_eps_py'),
'BN1 InvStd': ('bn1_inv_std.pt', 'bn1_inv_std_py'),
'BN1 Normalized X': ('bn1_normalized_x.pt', 'bn1_normalized_x_py'),
'ReLU1': ('relu1_output.pt', 'conv1'),
'MaxPool': 'maxpool_output.pt',
'Features': 'features.pt',
'Layer1': 'layer1.pt',
@ -523,6 +533,9 @@ class ComparisonRunner:
cpp_mod_vec0_path = cpp_output_bb_reg_dir_path / f'sample_{i}_mod_vec0.pt'
cpp_mod_vec1_path = cpp_output_bb_reg_dir_path / f'sample_{i}_mod_vec1.pt'
cpp_iou_scores_path = cpp_output_bb_reg_dir_path / f'sample_{i}_iou_scores.pt'
# Paths for debug C++ outputs
cpp_debug_conv3_1t_path = cpp_output_bb_reg_dir_path / f'sample_{i}_debug_conv3_1t_output.pt'
cpp_debug_conv4_1t_path = cpp_output_bb_reg_dir_path / f'sample_{i}_debug_conv4_1t_output.pt'
# Load initial inputs for Python model
py_image_tensor = self.load_cpp_tensor(py_image_input_path, self.device)
@ -549,6 +562,31 @@ class ComparisonRunner:
else:
print(f"Warning: Skipping Python BB Regressor for sample {i}, image input not found at {py_image_input_path}")
# ---- Intermediate debug outputs for conv3_1t and conv4_1t ----
py_debug_conv3_1t_out = None
py_debug_conv4_1t_out = None
if py_feat_layer2 is not None:
try:
_feat2_for_debug_conv3_1t = py_feat_layer2
if _feat2_for_debug_conv3_1t.dim() == 5:
_feat2_for_debug_conv3_1t = _feat2_for_debug_conv3_1t.reshape(-1, *_feat2_for_debug_conv3_1t.shape[-3:])
with torch.no_grad(): # Ensure no_grad context
py_debug_conv3_1t_out = self.bb_regressor_from_source.conv3_1t(_feat2_for_debug_conv3_1t)
except Exception as e:
print(f"ERROR calculating Python Debug_Conv3_1t for sample {i}: {e}")
if py_feat_layer3 is not None:
try:
_feat3_for_debug_conv4_1t = py_feat_layer3
if _feat3_for_debug_conv4_1t.dim() == 5:
_feat3_for_debug_conv4_1t = _feat3_for_debug_conv4_1t.reshape(-1, *_feat3_for_debug_conv4_1t.shape[-3:])
with torch.no_grad(): # Ensure no_grad context
py_debug_conv4_1t_out = self.bb_regressor_from_source.conv4_1t(_feat3_for_debug_conv4_1t)
except Exception as e:
print(f"ERROR calculating Python Debug_Conv4_1t for sample {i}: {e}")
# ---- End intermediate debug outputs ----
# Get Python IoU features
py_iou_feat_list = [None, None] # Initialize as a list of two Nones
if py_feat_layer2 is not None and py_feat_layer3 is not None:
@ -622,8 +660,13 @@ class ComparisonRunner:
cpp_mod_vec0 = self.load_cpp_tensor(cpp_mod_vec0_path, self.device)
cpp_mod_vec1 = self.load_cpp_tensor(cpp_mod_vec1_path, self.device)
cpp_iou_scores = self.load_cpp_tensor(cpp_iou_scores_path, self.device)
# Load debug C++ tensors
cpp_debug_conv3_1t_tensor = self.load_cpp_tensor(cpp_debug_conv3_1t_path, self.device)
cpp_debug_conv4_1t_tensor = self.load_cpp_tensor(cpp_debug_conv4_1t_path, self.device)
# Comparisons
self._compare_tensor_data(py_debug_conv3_1t_out, cpp_debug_conv3_1t_tensor, "BBReg Debug_Conv3_1t", i, current_errors)
self._compare_tensor_data(py_debug_conv4_1t_out, cpp_debug_conv4_1t_tensor, "BBReg Debug_Conv4_1t", i, current_errors)
self._compare_tensor_data(py_iou_feat_list[0], cpp_iou_feat0, "BBReg PyIoUFeat0 vs CppIoUFeat0", i, current_errors)
self._compare_tensor_data(py_iou_feat_list[1], cpp_iou_feat1, "BBReg PyIoUFeat1 vs CppIoUFeat1", i, current_errors)
self._compare_tensor_data(py_modulation_list[0], cpp_mod_vec0, "BBReg PyMod0 vs CppMod0", i, current_errors)
@ -633,224 +676,311 @@ class ComparisonRunner:
if current_errors: self.all_comparison_stats[f"BBReg_Sample_{i}"] = current_errors
def compare_resnet_outputs(self):
print("Comparing ResNet outputs...")
print("\n--- Types at START of compare_resnet_outputs: ---")
if 'ResNet' in self.models: print(f" self.models['ResNet'] type: {type(self.models['ResNet'])}")
if 'Classifier' in self.models: print(f" self.models['Classifier'] type: {type(self.models['Classifier'])}")
if 'BBRegressor' in self.models: print(f" self.models['BBRegressor'] type: {type(self.models['BBRegressor'])}")
print("\\n--- Comparing ResNet Outputs ---")
if not self.models.get('ResNet'):
print("PYTHON: ResNet model not loaded, skipping ResNet comparison.")
return
py_input_common_dir = os.path.join(self.root_dir, 'test', 'input_samples', 'common')
cpp_output_resnet_dir = os.path.join(self.cpp_output_dir, 'resnet')
# Ensure self.py_resnet_output_dir is defined, e.g., in __init__ or where other py output dirs are
if not hasattr(self, 'py_resnet_output_dir') or not self.py_resnet_output_dir:
self.py_resnet_output_dir = Path(self.python_output_dir) / 'resnet'
self.py_resnet_output_dir.mkdir(parents=True, exist_ok=True)
resnet_model = self.models['ResNet']
config = self.model_configs['ResNet']
cpp_resnet_dir = os.path.join(self.cpp_output_dir, config['cpp_output_subdir'])
python_resnet_save_dir = os.path.join(self.python_output_dir, config.get('python_output_subdir', config['cpp_output_subdir']))
if not os.path.exists(python_resnet_save_dir):
os.makedirs(python_resnet_save_dir, exist_ok=True)
num_samples_to_process = self.num_samples
if num_samples_to_process == -1: # If -1, determine from available C++ output files
# This logic can be complex if C++ output is sparse. For now, let's assume if -1 it means process all *common* inputs.
# A safer way for -1 would be to count common input samples first.
common_input_glob = os.path.join(self.root_dir, "test", "input_samples", "common", "sample_*_image.pt")
num_samples_to_process = len(glob.glob(common_input_glob))
print(f"INFO: num_samples set to -1, determined {num_samples_to_process} common input samples.")
processed_samples_count = 0 # Renamed from processed_samples to avoid conflict
sample_input_base_dir = os.path.join(self.root_dir, "test", "input_samples", "common")
# Loop exactly self.num_samples times (or detected count if -1)
for sample_idx in tqdm(range(num_samples_to_process), desc="Comparing ResNet samples"):
current_errors = {} # Initialize for each sample
python_intermediate_outputs_cache = {} # Reset for each sample
# Construct the input file path based on sample_idx
sample_input_file_path = os.path.join(sample_input_base_dir, f"sample_{sample_idx}_image.pt")
if not os.path.exists(sample_input_file_path):
print(f"Warning: Input sample file {sample_input_file_path} not found for sample index {sample_idx}. Skipping ResNet sample.")
empty_errors_for_skipped_sample = {}
for output_key_config in config['outputs_to_compare'].keys():
self._compare_tensor_data(None, None, output_key_config, sample_idx, empty_errors_for_skipped_sample)
if empty_errors_for_skipped_sample:
self.all_comparison_stats[f"ResNet_Sample_{sample_idx}"] = empty_errors_for_skipped_sample
continue
# --- START REINSTATED INPUT LOADING AND PREPROCESSING ---
input_tensor = self.load_cpp_tensor(sample_input_file_path, self.device, is_image=True)
if input_tensor is None:
print(f"Warning: Failed to load a valid tensor for ResNet input sample {sample_input_file_path} (sample {sample_idx}) using self.load_cpp_tensor. Skipping.")
# Populate NaNs for all expected outputs for this sample
empty_errors_for_skipped_sample = {}
for output_key_config in config['outputs_to_compare'].keys():
self._compare_tensor_data(None, None, output_key_config, sample_idx, empty_errors_for_skipped_sample)
if empty_errors_for_skipped_sample:
self.all_comparison_stats[f"ResNet_Sample_{sample_idx}"] = empty_errors_for_skipped_sample
continue
# Define Path objects for directory checks
py_input_common_dir_path = Path(py_input_common_dir)
cpp_output_resnet_dir_path = Path(cpp_output_resnet_dir)
comparison_configs = [
("ResNet Conv1 Output (Pre-BN)", "_conv1_output_py.pt", "_conv1_output.pt", self.py_resnet_output_dir, cpp_output_resnet_dir),
("ResNet Conv1", "_conv1_output.pt", "_conv1_output.pt", self.py_resnet_output_dir, cpp_output_resnet_dir), # Assumes Py also saved conv1 output if it was meant to be same as C++ pre-bn
("ResNet BN1", "_bn1_output.pt", "_bn1_output.pt", self.py_resnet_output_dir, cpp_output_resnet_dir),
("ResNet ReLU1", "_relu1_output.pt", "_relu1_output.pt", self.py_resnet_output_dir, cpp_output_resnet_dir),
("ResNet MaxPool", "_maxpool_output.pt", "_maxpool_output.pt", self.py_resnet_output_dir, cpp_output_resnet_dir),
("ResNet Layer1.0 Block Output", "_layer1_0_block_output.pt", "_layer1_0_block_output.pt", self.py_resnet_output_dir, cpp_output_resnet_dir),
("ResNet Layer1.0 Shortcut Output", "_layer1_0_shortcut_output.pt", "_layer1_0_shortcut_output.pt", self.py_resnet_output_dir, cpp_output_resnet_dir),
("ResNet Layer1", "_layer1_output.pt", "_layer1_output.pt", self.py_resnet_output_dir, cpp_output_resnet_dir),
("ResNet Layer2", "_layer2_output.pt", "_layer2_output.pt", self.py_resnet_output_dir, cpp_output_resnet_dir),
("ResNet Layer3", "_layer3_output.pt", "_layer3_output.pt", self.py_resnet_output_dir, cpp_output_resnet_dir),
("ResNet Layer4", "_layer4_output.pt", "_layer4_output.pt", self.py_resnet_output_dir, cpp_output_resnet_dir),
("ResNet Features", "_features_output.pt", "_features_output.pt", self.py_resnet_output_dir, cpp_output_resnet_dir)
]
if not py_input_common_dir_path.exists() or not cpp_output_resnet_dir_path.exists():
print(f"ResNet input ({py_input_common_dir_path}) or C++ ResNet output dir ({cpp_output_resnet_dir_path}) not found. Skipping ResNet comparison.")
# Populate NaN for all expected ResNet comparisons if dirs are missing
for i in range(self.num_samples):
sample_key_base = f"ResNet_Sample_{i}"
current_errors = {}
self._compare_tensor_data(None, None, "ResNet Layer1", i, current_errors)
self._compare_tensor_data(None, None, "ResNet Layer2", i, current_errors)
self._compare_tensor_data(None, None, "ResNet Layer3", i, current_errors)
self._compare_tensor_data(None, None, "ResNet Layer4", i, current_errors)
self._compare_tensor_data(None, None, "ResNet Features", i, current_errors)
self.all_comparison_stats[sample_key_base] = current_errors
return
if not isinstance(input_tensor, torch.Tensor):
print(f"Warning: self.load_cpp_tensor for {sample_input_file_path} did not return a Tensor (got {type(input_tensor)}). Skipping sample {sample_idx}.")
# Populate NaNs for all expected outputs for this sample
empty_errors_for_skipped_sample = {}
for output_key_config in config['outputs_to_compare'].keys():
self._compare_tensor_data(None, None, output_key_config, sample_idx, empty_errors_for_skipped_sample)
if empty_errors_for_skipped_sample:
self.all_comparison_stats[f"ResNet_Sample_{sample_idx}"] = empty_errors_for_skipped_sample
continue
for i in tqdm(range(self.num_samples), desc="ResNet samples"):
current_errors = {} # For this sample
# Preprocess the input tensor for Python's ResNet
if hasattr(self.python_wrapper, 'preprocess_image'):
processed_input_tensor = self.python_wrapper.preprocess_image(input_tensor.clone()) # Use clone
else:
print("Warning: python_wrapper.preprocess_image not found. Using input_tensor as is.")
processed_input_tensor = input_tensor.to(self.device) # Ensure device
# --- END REINSTATED INPUT LOADING AND PREPROCESSING ---
py_image_input_path = py_input_common_dir_path / f'sample_{i}_image.pt'
py_image_tensor = self.load_cpp_tensor(py_image_input_path, self.device)
# Initialize dictionaries to store Python-side outputs for the current sample
python_outputs = {} # To store outputs from the Python model for this sample
py_conv1_out, py_bn1_out, py_relu1_out, py_maxpool_out, py_layer1_out, py_layer2_out, py_layer3_out, py_layer4_out, py_features_out = None, None, None, None, None, None, None, None, None # ADDED py_conv1_out, py_bn1_out, py_relu1_out
py_layer1_0_shortcut_out = None
try:
# Python ResNet forward pass (assuming it's a JIT model or similar)
# The output of a JIT ResNet model might be a dictionary or a list/tuple of tensors
# We need to ensure we can map these to the 'outputs_to_compare' keys
print(f"PYTHON ResNet forward pass for sample {sample_idx}...")
# For ResNet, the output is a dictionary from its forward method.
# output_layers = list(config['outputs_to_compare'].keys()) # This might be too broad initially
# Define the layers we actually need from the Python ResNet forward pass.
# These should match the keys used in the Python ResNet's forward method.
# e.g., ['layer1', 'layer2', 'layer3', 'layer4', 'conv1_output', 'bn1_output', etc.]
# For now, let's define specific layers needed for the comparison.
# The JIT ResNet model we have should output a dictionary.
py_output_layers_needed = ['conv1', 'layer1', 'layer2', 'layer3', 'layer4']
# Add 'conv1_pre_bn' if we need to compare the input to BN1
if 'Debug ResNet Conv1->BN1 Input' in config['outputs_to_compare']:
py_output_layers_needed.append('conv1_pre_bn')
# If we are comparing the direct C++ BN1 output, we need 'bn1_output' from Python
if 'BN1' in config['outputs_to_compare']:
py_output_layers_needed.append('bn1_output')
# If we are comparing the C++ ReLU1 output (after BN1 and ReLU), we need 'bn1_post_relu_pre' from Python
if 'ReLU1' in config['outputs_to_compare']:
py_output_layers_needed.append('bn1_post_relu_pre')
# Add Python-side BN1 intermediate layer names if they are in outputs_to_compare
# The config value (cpp_output_filename_or_tuple) is not directly used here for this part,
# we care about the py_dict_key that will be derived from the C++ key.
bn1_intermediate_py_keys_to_request = []
if 'BN1 Centered X' in config['outputs_to_compare']:
bn1_intermediate_py_keys_to_request.append('bn1_centered_x_py')
if 'BN1 Var+Eps' in config['outputs_to_compare']:
bn1_intermediate_py_keys_to_request.append('bn1_variance_plus_eps_py')
if 'BN1 InvStd' in config['outputs_to_compare']:
bn1_intermediate_py_keys_to_request.append('bn1_inv_std_py')
if 'BN1 Normalized X' in config['outputs_to_compare']:
bn1_intermediate_py_keys_to_request.append('bn1_normalized_x_py')
for py_key in bn1_intermediate_py_keys_to_request:
if py_key not in py_output_layers_needed:
py_output_layers_needed.append(py_key)
if py_image_tensor is not None:
# Save Python's preprocessed input to conv1
# This py_image_tensor is already preprocessed by DiMPTorchScriptWrapper.extract_backbone -> preprocess_image
# which is called before this compare_resnet_outputs function if we follow the logic for py_feat_layer2, py_feat_layer3 in compare_bb_regressor
# However, here in compare_resnet_outputs, py_image_tensor comes from load_cpp_tensor(py_image_input_path, ...)
# which is the RAW image. Preprocessing for python side happens inside self.python_wrapper.extract_backbone
# or when we manually call py_model_resnet.conv1(py_image_tensor)
# Let's get the preprocessed image from the wrapper as that's the true input to Python's ResNet
# Add 'fc' if configured, though not typically used in these comparisons
if 'fc' in config['outputs_to_compare']:
py_output_layers_needed.append('fc')
# The input to python_wrapper.extract_backbone is the raw image tensor
# It then calls self.preprocess_image(im) and then self.net.extract_backbone_features(im, layers)
# So, py_image_tensor IS the raw image. We need to get the preprocessed one.
preprocessed_py_image_for_conv1 = None
if self.python_wrapper:
# Manually preprocess for saving, mimicking what extract_backbone would do before its first conv
preprocessed_py_image_for_conv1 = self.python_wrapper.preprocess_image(py_image_tensor.clone()) # Clone to avoid in-place modification of py_image_tensor
py_preprocessed_save_path = Path(self.cpp_output_dir) / 'resnet' / f'sample_{i}_image_preprocessed_python.pt'
# Ensure self.cpp_output_dir / resnet exists
(Path(self.cpp_output_dir) / 'resnet').mkdir(parents=True, exist_ok=True)
torch.save(preprocessed_py_image_for_conv1.cpu(), str(py_preprocessed_save_path))
print(f"Saved Python preprocessed image for sample {i} to {py_preprocessed_save_path}")
# Deduplicate, just in case (though construction above should be fine)
py_output_layers_needed = list(OrderedDict.fromkeys(py_output_layers_needed))
print(f"DEBUG: Requesting these layers from Python ResNet: {py_output_layers_needed}")
# Call the Python ResNet forward
# The `self.models['ResNet']` should be the loaded JIT model
# It expects the output_layers argument.
# The DiMPTorchScriptWrapper's backbone should also support this.
if hasattr(resnet_model, 'forward') and callable(getattr(resnet_model, 'forward')) and 'output_layers' in inspect.signature(resnet_model.forward).parameters:
python_model_outputs_dict = resnet_model.forward(processed_input_tensor, output_layers=py_output_layers_needed)
elif hasattr(self.python_wrapper, 'extract_backbone') and callable(getattr(self.python_wrapper, 'extract_backbone')):
# This is the case if ResNet is accessed via the DiMPTorchScriptWrapper's extract_backbone,
# which internally calls the backbone's forward with output_layers.
python_model_outputs_dict = self.python_wrapper.extract_backbone(input_tensor.clone()) # extract_backbone handles preprocessing
else:
print(f"ERROR: Cannot call forward on Python ResNet model. Type: {type(resnet_model)}")
continue
# DEBUG: Print keys from Python model output
if isinstance(python_model_outputs_dict, dict):
print(f"DEBUG RN_CMP: Keys from python_model_outputs_dict (sample {sample_idx}): {list(python_model_outputs_dict.keys())}")
else:
print("ERROR: self.python_wrapper not available to get preprocessed image for Python.")
print(f"DEBUG RN_CMP: python_model_outputs_dict is not a dict (sample {sample_idx}), type: {type(python_model_outputs_dict)}")
# Populate python_outputs based on the python_model_outputs_dict
# This maps the Python output names to the keys used in 'outputs_to_compare'
if isinstance(python_model_outputs_dict, dict):
python_outputs = python_model_outputs_dict
# If 'features' is an alias for 'layer4' in Python output
if 'layer4' in python_outputs and 'features' not in python_outputs:
python_outputs['features'] = python_outputs['layer4']
if 'conv1_output' in python_outputs:
python_intermediate_outputs_cache['conv1_output'] = python_outputs['conv1_output']
else:
print(f"ERROR: Python ResNet output is not a dict. Got {type(python_model_outputs_dict)}")
# Handle tuple/list output if necessary, mapping by order or specific logic.
# For now, we assume dict output from our ResNet.
continue
except Exception as e:
print(f"Error during Python ResNet forward pass for sample {sample_idx}: {e}")
import traceback
traceback.print_exc()
continue # Skip to next sample
for output_key, cpp_output_filename_or_tuple in config['outputs_to_compare'].items():
is_python_specific_name = isinstance(cpp_output_filename_or_tuple, tuple)
cpp_output_filename = cpp_output_filename_or_tuple[0] if is_python_specific_name else cpp_output_filename_or_tuple
# Corrected path construction for C++ ResNet tensors:
# The sample index is already part of the cpp_output_filename for ResNet outputs from C++.
# (e.g., sample_0_conv1_output.pt)
# So, we join cpp_resnet_dir directly with this filename.
# However, the C++ code actually saves ResNet outputs as sample_X_LAYERNAME.pt directly in cpp_resnet_dir,
# not in a per-sample subdirectory for ResNet outputs.
# Let's check how test_models.cpp saves them.
# test_models.cpp -> save_resnet_outputs -> file_path = resnet_output_dir + "/sample_" + std::to_string(sample_idx) + "_" + output_name;
# This means filenames are like "sample_0_conv1_output.pt" directly in "../test/output/resnet/"
correct_cpp_tensor_filename = f"sample_{sample_idx}_{cpp_output_filename}"
cpp_tensor_path = os.path.join(cpp_resnet_dir, correct_cpp_tensor_filename)
# <<< START ADDED DEBUG PRINTS >>>
print(f"DEBUG RN_CMP: Attempting to load C++ tensor for '{output_key}' (sample {sample_idx}) from: {cpp_tensor_path}")
# <<< END ADDED DEBUG PRINTS >>>
try:
with torch.no_grad():
py_model_resnet = self.models.get('ResNet')
if py_model_resnet:
current_features = preprocessed_py_image_for_conv1
py_conv1_out = py_model_resnet.conv1(current_features)
# Ensure self.py_resnet_output_dir is defined and is a Path object
if not hasattr(self, 'py_resnet_output_dir') or not self.py_resnet_output_dir:
self.py_resnet_output_dir = Path(self.python_output_dir) / 'resnet'
self.py_resnet_output_dir.mkdir(parents=True, exist_ok=True)
py_conv1_out_path = self.py_resnet_output_dir / f'sample_{i}_conv1_output_py.pt'
torch.save(py_conv1_out.cpu(), str(py_conv1_out_path))
# --- BN1 on CPU for debugging (Python) ---
py_bn1_out = py_model_resnet.bn1(py_conv1_out) # Original line
py_relu1_out = py_model_resnet.relu(py_bn1_out)
py_maxpool_out = py_model_resnet.maxpool(py_relu1_out)
x_for_py_layer1_input = py_maxpool_out
# Output of the first bottleneck block in layer1
py_layer1_0_block_out_tensor = None # Initialize to avoid ref before assignment if try fails
if hasattr(py_model_resnet, 'layer1') and len(py_model_resnet.layer1) > 0:
try:
py_layer1_0_block_out_tensor = py_model_resnet.layer1[0](x_for_py_layer1_input) # REMOVED .clone() for consistency with best Layer1.0 result
# Ensure cpp_resnet_sample_dir is defined, if not, use a fallback or define it earlier
# Assuming cpp_resnet_sample_dir is defined like: cpp_resnet_sample_dir = Path(self.cpp_output_dir) / 'resnet'
# Which should be: cpp_resnet_dir = Path(self.cpp_output_dir) / 'resnet' # as per usage elsewhere
# And then: cpp_resnet_sample_dir = cpp_resnet_dir # if sample specific subdirs are not used for this
# For safety, let's use the already established cpp_output_resnet_dir path from later in the code
# cpp_output_resnet_dir = os.path.join(self.cpp_output_dir, 'resnet')
# Need to ensure cpp_output_resnet_dir is a Path object if used with /
# From later code: cpp_output_resnet_dir_path = Path(self.cpp_output_dir) / 'resnet'
current_cpp_resnet_dir = Path(self.cpp_output_dir) / 'resnet' # Define it based on existing patterns
current_cpp_resnet_dir.mkdir(parents=True, exist_ok=True) # Ensure directory exists
py_layer1_0_block_save_path = current_cpp_resnet_dir / f'sample_{i}_layer1_0_block_output.pt'
torch.save(py_layer1_0_block_out_tensor.cpu(), str(py_layer1_0_block_save_path))
# print(f"DEBUG: Saved Python layer1[0] block output for sample {i} to {py_layer1_0_block_save_path}")
except Exception as e_block:
print(f"ERROR: Failed to get/save Python layer1[0] block output for sample {i}: {e_block}")
# Shortcut for layer1.0 (if exists)
if hasattr(py_model_resnet, 'layer1') and len(py_model_resnet.layer1) > 0 and \
hasattr(py_model_resnet.layer1[0], 'downsample') and py_model_resnet.layer1[0].downsample is not None:
py_layer1_0_shortcut_out = py_model_resnet.layer1[0].downsample(x_for_py_layer1_input.clone())
# Get full backbone outputs using the wrapper (which uses the raw image_tensor and preprocesses internally)
# This ensures layer1, layer2, etc., are from the standard path.
if self.python_wrapper:
py_backbone_outputs = self.python_wrapper.extract_backbone(py_image_tensor) # py_image_tensor is raw
else:
print("ERROR: self.python_wrapper is None, cannot extract backbone features for ResNet outputs.")
py_backbone_outputs = {}
py_layer1_out = py_backbone_outputs.get('layer1')
py_layer2_out = py_backbone_outputs.get('layer2')
py_layer3_out = py_backbone_outputs.get('layer3')
py_layer4_out = py_backbone_outputs.get('layer4')
py_features_out = py_backbone_outputs.get('layer4') # Typically layer4 is the final feature map
else:
print("ERROR: Python ResNet model not found in self.models")
except Exception as e:
print(f"ERROR: Python ResNet backbone/shortcut processing failed for sample {i}: {e}")
else:
print(f"Warning: Skipping Python ResNet for sample {i}, image input not found at {py_image_input_path}")
cpp_tensor = self.load_cpp_tensor(cpp_tensor_path, self.device)
# <<< START ADDED DEBUG PRINTS >>>
loaded_status = "None"
if cpp_tensor is not None:
loaded_status = f"Tensor with shape {cpp_tensor.shape}, dtype {cpp_tensor.dtype}, device {cpp_tensor.device}"
print(f"DEBUG RN_CMP: Loaded C++ tensor for '{output_key}' (sample {sample_idx}): {loaded_status}")
# <<< END ADDED DEBUG PRINTS >>>
if cpp_tensor is None:
print(f"Warning: C++ tensor {cpp_output_filename} for sample {sample_idx} ('{output_key}') is None or loading failed. Skipping comparison for this output.")
# _compare_tensor_data will be called with cpp_tensor=None, which handles NaN population
# Fall through to _compare_tensor_data to record NaNs
# continue # This would skip the _compare_tensor_data call entirely
# Get the corresponding Python tensor
python_tensor = None
python_output_save_path = os.path.join(python_resnet_save_dir, f"sample_{sample_idx}", cpp_output_filename) # Save with same name as C++ for consistency
# Map the 'output_key' from config to the key used in 'python_outputs' dictionary
# This requires knowing how 'outputs_to_compare' keys map to Python model output dict keys.
# Example: 'Conv1' maps to 'conv1_output', 'Features' to 'features' (which might be 'layer4'), etc.
py_dict_key = None
if output_key == 'Conv1':
py_dict_key = 'conv1_pre_bn' # Python ResNet outputs combined conv1+bn1+relu as 'conv1'
elif output_key == 'Debug ResNet Conv1->BN1 Input':
py_dict_key = 'conv1_pre_bn' # Our new specific output layer
elif output_key == 'BN1':
py_dict_key = 'bn1_output' # CHANGED to use the new hook
elif output_key == 'BN1 Centered X':
py_dict_key = 'bn1_centered_x_py'
elif output_key == 'BN1 Var+Eps':
py_dict_key = 'bn1_variance_plus_eps_py'
elif output_key == 'BN1 InvStd':
py_dict_key = 'bn1_inv_std_py'
elif output_key == 'BN1 Normalized X':
py_dict_key = 'bn1_normalized_x_py'
elif output_key == 'ReLU1':
py_dict_key = 'bn1_post_relu_pre' # Output of Python's BN1 + ReLU
elif output_key == 'MaxPool':
# MaxPool is applied *after* 'conv1' (conv1+bn1+relu) block in Python ResNet.
# However, the Python ResNet forward doesn't have a separate 'maxpool' output key.
# The output of layer1 is *after* maxpool.
# C++ saves maxpool_output.pt *before* layer1.
# This means we need to save python_outputs['conv1'] (after conv1,bn1,relu) then apply maxpool to it manually for comparison.
# OR, recognize that C++ output for maxpool is input to layer1.
# For now, this is tricky. Let's see if layer1 input in C++ matches python maxpool output.
# The Python output named 'layer1' is after the nn.Sequential that IS layer1.
# The input to C++ layer1 is the output of C++ maxpool.
# The input to Python model.layer1 is the output of model.maxpool(model.relu(model.bn1(model.conv1(x)))).
# So, Python's 'conv1' output, when passed through an nn.MaxPool2d, should match C++ 'maxpool_output.pt'.
print(f"Warning: Direct Python equivalent for C++ 'MaxPool' output is complex. Requires manual maxpool application to Python's 'conv1' output. Skipping {output_key} for now.")
continue # Skip this key for now
elif output_key == 'Layer1': py_dict_key = 'layer1'
elif output_key == 'Layer2': py_dict_key = 'layer2'
elif output_key == 'Layer3': py_dict_key = 'layer3'
elif output_key == 'Layer4': py_dict_key = 'layer4'
elif output_key == 'Features': py_dict_key = 'layer4' # 'Features' is an alias for 'layer4'
elif output_key == 'Layer1.0 Shortcut':
# Shortcut outputs are not available from the Python ResNet forward method.
print(f"Warning: Shortcut output '{output_key}' cannot be directly fetched from Python ResNet. Skipping.")
continue
else:
print(f"Warning: Unknown output_key '{output_key}' in ResNet config for Python tensor mapping. Skipping.")
continue
# Load C++ ResNet outputs
if py_dict_key and py_dict_key in python_outputs:
python_tensor = python_outputs[py_dict_key]
else:
# DEBUG: Print info if key is not found
print(f"DEBUG RN_CMP: py_dict_key '{py_dict_key}' not found in python_outputs (keys: {list(python_outputs.keys())}) for output_key '{output_key}', sample {sample_idx}")
if python_tensor is None:
print(f"Warning: Python tensor for {output_key} is None for sample {sample_idx}. Skipping.")
continue
# NEW: Debug directory listing
print(f"DEBUG: Listing contents of {cpp_output_resnet_dir_path} before loading tensors for sample {i}:")
try:
if cpp_output_resnet_dir_path.exists() and cpp_output_resnet_dir_path.is_dir():
for item_path in cpp_output_resnet_dir_path.iterdir():
print(f" - {item_path.name}")
else:
print(f" Directory {cpp_output_resnet_dir_path} does not exist or is not a directory.")
except Exception as e_list:
print(f" ERROR listing directory: {e_list}")
# END NEW # Removing this marker
time.sleep(0.5) # INCREASED to 0.5s delay to allow filesystem to sync
# Debug blocks for directory listing and direct open test were here and are now fully removed.
cpp_layer1_path = os.path.join(cpp_output_resnet_dir, f'sample_{i}_layer1.pt')
cpp_layer2_path = os.path.join(cpp_output_resnet_dir, f'sample_{i}_layer2.pt')
cpp_layer3_path = os.path.join(cpp_output_resnet_dir, f'sample_{i}_layer3.pt')
cpp_layer4_path = os.path.join(cpp_output_resnet_dir, f'sample_{i}_layer4.pt')
cpp_features_path = os.path.join(cpp_output_resnet_dir, f'sample_{i}_features.pt')
cpp_layer1_0_shortcut_path = os.path.join(cpp_output_resnet_dir, f'sample_{i}_layer1_0_shortcut_output.pt')
cpp_maxpool_path = os.path.join(cpp_output_resnet_dir, f'sample_{i}_maxpool_output.pt')
cpp_conv1_path = os.path.join(cpp_output_resnet_dir, f'sample_{i}_conv1_output.pt') # ADDED
cpp_bn1_path = os.path.join(cpp_output_resnet_dir, f'sample_{i}_bn1_output.pt') # ADDED
cpp_relu1_path = os.path.join(cpp_output_resnet_dir, f'sample_{i}_relu1_output.pt') # ADDED
cpp_layer1_0_block_output_path = os.path.join(cpp_output_resnet_dir, f'sample_{i}_layer1_0_block_output.pt') # ADDED
cpp_layer1_out = self.load_cpp_tensor(cpp_layer1_path, self.device)
cpp_layer2_out = self.load_cpp_tensor(cpp_layer2_path, self.device)
cpp_layer3_out = self.load_cpp_tensor(cpp_layer3_path, self.device)
cpp_layer4_out = self.load_cpp_tensor(cpp_layer4_path, self.device)
cpp_features_out = self.load_cpp_tensor(cpp_features_path, self.device)
cpp_layer1_0_shortcut_out = self.load_cpp_tensor(cpp_layer1_0_shortcut_path, self.device)
cpp_maxpool_out = self.load_cpp_tensor(cpp_maxpool_path, self.device)
cpp_conv1_out = self.load_cpp_tensor(cpp_conv1_path, self.device) # ADDED
cpp_bn1_out = self.load_cpp_tensor(cpp_bn1_path, self.device) # ADDED
cpp_relu1_out = self.load_cpp_tensor(cpp_relu1_path, self.device) # ADDED
cpp_layer1_0_block_output_tensor = self.load_cpp_tensor(cpp_layer1_0_block_output_path, self.device) # ADDED
# Load the Python pre-BN conv1 output that was saved earlier
py_conv1_out_pre_bn_tensor = None
# Ensure self.py_resnet_output_dir is defined (it should be if the save operation worked)
if hasattr(self, 'py_resnet_output_dir') and self.py_resnet_output_dir:
py_conv1_out_pre_bn_path = self.py_resnet_output_dir / f'sample_{i}_conv1_output_py.pt'
if py_conv1_out_pre_bn_path.exists():
try:
py_conv1_out_pre_bn_tensor = torch.load(str(py_conv1_out_pre_bn_path), map_location=self.device)
except Exception as e_load_py_conv1:
print(f"Error loading Python conv1_output_py (pre-BN) for sample {i}: {e_load_py_conv1}")
else:
print(f"Warning: self.py_resnet_output_dir not defined, cannot load py_conv1_output_py.pt for sample {i}")
# Save the Python tensor (always, for record-keeping)
os.makedirs(os.path.dirname(python_output_save_path), exist_ok=True)
torch.save(python_tensor.cpu(), python_output_save_path)
# print(f"Saved Python tensor for {output_key} (sample {sample_idx}) to {python_output_save_path}")
# Comparisons
self._compare_tensor_data(py_conv1_out_pre_bn_tensor, cpp_conv1_out, "ResNet Conv1 Output (Pre-BN)", i, current_errors)
self._compare_tensor_data(py_conv1_out, cpp_conv1_out, "ResNet Conv1", i, current_errors)
self._compare_tensor_data(py_bn1_out, cpp_bn1_out, "ResNet BN1", i, current_errors)
self._compare_tensor_data(py_relu1_out, cpp_relu1_out, "ResNet ReLU1", i, current_errors)
self._compare_tensor_data(py_maxpool_out, cpp_maxpool_out, "ResNet MaxPool", i, current_errors)
self._compare_tensor_data(py_layer1_out, cpp_layer1_out, "ResNet Layer1", i, current_errors)
self._compare_tensor_data(py_layer2_out, cpp_layer2_out, "ResNet Layer2", i, current_errors)
self._compare_tensor_data(py_layer3_out, cpp_layer3_out, "ResNet Layer3", i, current_errors)
self._compare_tensor_data(py_layer4_out, cpp_layer4_out, "ResNet Layer4", i, current_errors)
self._compare_tensor_data(py_features_out, cpp_features_out, "ResNet Features", i, current_errors)
self._compare_tensor_data(py_layer1_0_shortcut_out, cpp_layer1_0_shortcut_out, "ResNet Layer1.0 Shortcut", i, current_errors)
if current_errors: self.all_comparison_stats[f"ResNet_Sample_{i}"] = current_errors
# Perform comparison
self._compare_tensor_data(python_tensor.to(self.device) if python_tensor is not None else None,
cpp_tensor,
output_key,
sample_idx,
current_errors) # current_errors is populated in place
# The line above was changed to handle python_tensor being None before .to(self.device)
# current_errors is populated by _compare_tensor_data directly.
# self.all_comparison_stats is updated after this inner loop completes for the sample.
except FileNotFoundError:
print(f"Warning: C++ output file not found: {cpp_tensor_path}. Skipping for sample {sample_idx}, output {output_key}.")
# Populate NaNs for this missing C++ file
self._compare_tensor_data(None, None, output_key, sample_idx, current_errors)
except Exception as e:
print(f"Error comparing {output_key} for sample {sample_idx}: {e}")
import traceback
traceback.print_exc()
# Populate NaNs on error
self._compare_tensor_data(None, None, output_key, sample_idx, current_errors)
# After processing all output_keys for this sample, store the collected current_errors
if current_errors: # If any comparisons were attempted (even if they resulted in NaNs)
self.all_comparison_stats[f"ResNet_Sample_{sample_idx}"] = current_errors
# processed_samples += 1 # This variable is no longer used as loop is range-based
print("--- ResNet Output Comparison Complete ---")
def generate_html_report(self):
print("\nGenerating HTML report...")
@ -1200,73 +1330,247 @@ class ComparisonRunner:
print("Preprocessed input comparison: ISSUES FOUND (details above).")
def load_cpp_tensor(self, file_path_str, device, is_image=False):
file_path_obj = Path(file_path_str) # Convert to Path object early
# Removed debug print: print(f"DEBUG: load_cpp_tensor: Checking existence of Path object: '{file_path_obj}' (from string '{file_path_str}')")
if not file_path_obj.exists(): # Use Path object for exists check
print(f"ERROR: C++ tensor file not found (Path.exists check): {file_path_obj}")
if not os.path.exists(file_path_str):
return None
attempt_jit_extraction = False
loaded_object_from_direct_load = None
try:
# Try loading as a JIT ScriptModule first (common for exported tensors that might have attributes)
# This also handles plain tensors saved with torch.save if they are not ScriptModules
loaded_obj = torch.jit.load(str(file_path_obj), map_location=device) # Convert Path to str for torch.jit.load
actual_tensor = None
if isinstance(loaded_obj, torch.jit.ScriptModule):
# Attempt to get tensor attribute directly, common for simple JIT-saved tensors
# Check for common weight/tensor attributes first
if hasattr(loaded_obj, 'tensor'): # Explicit "tensor" attribute
if isinstance(loaded_obj.tensor, torch.Tensor):
actual_tensor = loaded_obj.tensor
elif hasattr(loaded_obj, 'weight') and isinstance(loaded_obj.weight, torch.Tensor): # Common for conv/linear
actual_tensor = loaded_obj.weight
# Heuristic: if it has parameters and only one, assume that's the one.
elif len(list(loaded_obj.parameters())) == 1:
actual_tensor = list(loaded_obj.parameters())[0]
# Heuristic: if it has attributes that are tensors, try to find the primary one
else:
tensor_attrs = [getattr(loaded_obj, attr) for attr in dir(loaded_obj) if isinstance(getattr(loaded_obj, attr, None), torch.Tensor)]
if len(tensor_attrs) == 1:
actual_tensor = tensor_attrs[0]
elif len(tensor_attrs) > 1:
# If multiple tensor attributes, try to find one that matches common patterns or is simply 'output'
if hasattr(loaded_obj, 'output') and isinstance(loaded_obj.output, torch.Tensor):
actual_tensor = loaded_obj.output
else: # Heuristic: take the largest tensor if multiple exist and no clear primary one
actual_tensor = max(tensor_attrs, key=lambda t: t.numel())
# print(f"WARNING: Multiple tensor attributes in ScriptModule from {file_path_obj}, using largest: {actual_tensor.shape}")
if actual_tensor is None:
print(f"ERROR: C++ tensor from {file_path_obj} is a ScriptModule, but couldn't extract a single tensor. StateDict keys: {list(loaded_obj.state_dict().keys()) if hasattr(loaded_obj, 'state_dict') else 'N/A'}")
return None
# Attempt direct load first
loaded_object_from_direct_load = torch.load(file_path_str, map_location=device, weights_only=False)
if isinstance(loaded_object_from_direct_load, torch.Tensor):
return loaded_object_from_direct_load.to(device) # Successfully loaded a tensor directly
else:
# Loaded something, but it's not a tensor. It's likely a JIT module.
attempt_jit_extraction = True
print(f"INFO: Initial torch.load of {file_path_str} yielded a non-Tensor (type: {type(loaded_object_from_direct_load)}). Will attempt JIT extraction.")
except Exception as e_initial_load:
# Initial load failed (e.g., it's a JIT module not readable by plain torch.load, or other error)
attempt_jit_extraction = True
print(f"INFO: Initial torch.load failed for {file_path_str}: {e_initial_load}. Will attempt JIT extraction.")
# Common JIT tensor extraction logic
def extract_tensor_from_jit_module(module_path, jit_loaded_obj, dev):
print(f"DEBUG JIT EXTRACTION: For {module_path}, loaded_obj type: {type(jit_loaded_obj)}")
print(f"DEBUG JIT EXTRACTION: str(loaded_obj): {str(jit_loaded_obj)}")
# print(f"DEBUG JIT EXTRACTION: dir(loaded_obj): {dir(jit_loaded_obj)}") # Verbose
elif isinstance(loaded_obj, torch.Tensor):
actual_tensor = loaded_obj
extracted_tensor = None
# 1. Try calling if 'forward' method exists
if hasattr(jit_loaded_obj, 'forward') and callable(getattr(jit_loaded_obj, 'forward')):
print(f"DEBUG JIT EXTRACTION: Attempting jit_loaded_obj.forward()")
try:
extracted_tensor = jit_loaded_obj.forward()
if not isinstance(extracted_tensor, torch.Tensor):
print(f"DEBUG JIT EXTRACTION: jit_loaded_obj.forward() did not return a tensor, got {type(extracted_tensor)}. Trying with dummy input.")
extracted_tensor = None # Reset before trying with dummy
try:
print(f"DEBUG JIT EXTRACTION: Attempting jit_loaded_obj.forward(torch.empty(0))")
extracted_tensor = jit_loaded_obj.forward(torch.empty(0, device=dev))
if not isinstance(extracted_tensor, torch.Tensor):
print(f"DEBUG JIT EXTRACTION: jit_loaded_obj.forward(dummy) also did not return a tensor, got {type(extracted_tensor)}")
extracted_tensor = None
except Exception as e_fwd_dummy:
print(f"DEBUG JIT EXTRACTION: Error calling jit_loaded_obj.forward(dummy): {e_fwd_dummy}")
extracted_tensor = None
except Exception as e_fwd: # This covers cases where forward exists but call fails (e.g. signature mismatch)
print(f"DEBUG JIT EXTRACTION: Error calling jit_loaded_obj.forward(): {e_fwd}. Trying with dummy input as fallback.")
extracted_tensor = None # Reset
try:
print(f"DEBUG JIT EXTRACTION: Attempting jit_loaded_obj.forward(torch.empty(0)) after error.")
extracted_tensor = jit_loaded_obj.forward(torch.empty(0, device=dev))
if not isinstance(extracted_tensor, torch.Tensor):
print(f"DEBUG JIT EXTRACTION: jit_loaded_obj.forward(dummy) after error also did not return a tensor, got {type(extracted_tensor)}")
extracted_tensor = None
except Exception as e_fwd_dummy_after_error:
print(f"DEBUG JIT EXTRACTION: Error calling jit_loaded_obj.forward(dummy) after initial fwd error: {e_fwd_dummy_after_error}")
extracted_tensor = None
# 1b. Try calling the module directly if forward attribute exists (covers some cases)
# This is after trying explicit .forward() as direct call might have side effects or different interpretation
if extracted_tensor is None and callable(jit_loaded_obj) and hasattr(jit_loaded_obj, 'forward'):
print(f"DEBUG JIT EXTRACTION: Attempting callable jit_loaded_obj()")
try:
extracted_tensor = jit_loaded_obj()
if not isinstance(extracted_tensor, torch.Tensor):
print(f"DEBUG JIT EXTRACTION: callable jit_loaded_obj() did not return a tensor, got {type(extracted_tensor)}")
extracted_tensor = None
except Exception as e_call_obj:
print(f"DEBUG JIT EXTRACTION: Error calling callable jit_loaded_obj() (it had a forward attr): {e_call_obj}")
extracted_tensor = None
# 2. Check if 'forward' attribute *itself* is a tensor
if extracted_tensor is None and hasattr(jit_loaded_obj, 'forward') and isinstance(getattr(jit_loaded_obj, 'forward'), torch.Tensor):
print(f"DEBUG JIT EXTRACTION: jit_loaded_obj.forward IS a tensor.")
extracted_tensor = getattr(jit_loaded_obj, 'forward')
# 3. Look for common direct tensor attributes
if extracted_tensor is None and hasattr(jit_loaded_obj, 'tensor') and isinstance(getattr(jit_loaded_obj, 'tensor'), torch.Tensor):
print(f"DEBUG JIT EXTRACTION: Found tensor in jit_loaded_obj.tensor")
extracted_tensor = jit_loaded_obj.tensor
if extracted_tensor is None and hasattr(jit_loaded_obj, 'data') and isinstance(getattr(jit_loaded_obj, 'data'), torch.Tensor):
print(f"DEBUG JIT EXTRACTION: Found tensor in jit_loaded_obj.data")
extracted_tensor = jit_loaded_obj.data
if extracted_tensor is None and hasattr(jit_loaded_obj, 'tensor_data') and isinstance(getattr(jit_loaded_obj, 'tensor_data'), torch.Tensor):
print(f"DEBUG JIT EXTRACTION: Found tensor in jit_loaded_obj.tensor_data")
extracted_tensor = jit_loaded_obj.tensor_data
# 4. Iterate through named_buffers (common for wrapped tensors)
if extracted_tensor is None:
print(f"DEBUG JIT EXTRACTION: Iterating named_buffers for a tensor...")
try:
for name, buffer_tensor in jit_loaded_obj.named_buffers():
if isinstance(buffer_tensor, torch.Tensor):
print(f"DEBUG JIT EXTRACTION: Found tensor in named_buffers: {name}")
extracted_tensor = buffer_tensor
break
except Exception as e_buffers:
print(f"DEBUG JIT EXTRACTION: Error iterating named_buffers: {e_buffers}")
# 5. Iterate through named_parameters
if extracted_tensor is None:
print(f"DEBUG JIT EXTRACTION: Iterating named_parameters for a tensor...")
try:
for name, param_tensor in jit_loaded_obj.named_parameters():
if isinstance(param_tensor, torch.Tensor):
print(f"DEBUG JIT EXTRACTION: Found tensor in named_parameters: {name}")
extracted_tensor = param_tensor
break
except Exception as e_params:
print(f"DEBUG JIT EXTRACTION: Error iterating named_parameters: {e_params}")
# 6. Iterate through members (attributes) using inspect.getmembers - potentially fragile
if extracted_tensor is None:
print(f"DEBUG JIT EXTRACTION: Attempting to iterate members using inspect.getmembers...")
try:
for name, member_obj in inspect.getmembers(jit_loaded_obj):
if isinstance(member_obj, torch.Tensor):
# Avoid re-picking already checked common names if they are somehow also members
if name not in ['tensor', 'data', 'tensor_data', 'forward']:
print(f"DEBUG JIT EXTRACTION: Found tensor in member (inspect.getmembers): {name}")
extracted_tensor = member_obj
break
except RuntimeError as e_inspect:
# Specifically catch RuntimeError that was observed: "Method 'forward' is not defined"
print(f"DEBUG JIT EXTRACTION: inspect.getmembers failed with RuntimeError: {e_inspect}. Skipping this method.")
except Exception as e_inspect_other:
print(f"DEBUG JIT EXTRACTION: inspect.getmembers failed with other Exception: {e_inspect_other}. Skipping this method.")
# 7. Iterate through named_children and inspect
if extracted_tensor is None:
print(f"DEBUG JIT EXTRACTION: Iterating named_children...")
try:
for child_name, child_module in jit_loaded_obj.named_children():
print(f"DEBUG JIT EXTRACTION: Inspecting child: {child_name} of type {type(child_module)}")
# Try common ways to get tensor from child
if hasattr(child_module, 'forward') and callable(getattr(child_module, 'forward')) :
try:
temp_tensor = child_module.forward()
if isinstance(temp_tensor, torch.Tensor):
print(f"DEBUG JIT EXTRACTION: Found tensor by calling child {child_name}.forward()")
extracted_tensor = temp_tensor; break
except: pass
if extracted_tensor is None and callable(child_module) and hasattr(child_module, 'forward'): # Added hasattr forward here
try:
temp_tensor = child_module()
if isinstance(temp_tensor, torch.Tensor):
print(f"DEBUG JIT EXTRACTION: Found tensor by calling child {child_name}()")
extracted_tensor = temp_tensor; break
except: pass
if extracted_tensor is None and hasattr(child_module, 'forward') and isinstance(getattr(child_module, 'forward'), torch.Tensor):
extracted_tensor = getattr(child_module, 'forward')
print(f"DEBUG JIT EXTRACTION: Found tensor in child {child_name}.forward (as attribute)")
break
if extracted_tensor is None and hasattr(child_module, 'tensor') and isinstance(getattr(child_module, 'tensor'), torch.Tensor):
extracted_tensor = child_module.tensor
print(f"DEBUG JIT EXTRACTION: Found tensor in child {child_name}.tensor")
break
if extracted_tensor is None and hasattr(child_module, 'data') and isinstance(getattr(child_module, 'data'), torch.Tensor):
extracted_tensor = child_module.data
print(f"DEBUG JIT EXTRACTION: Found tensor in child {child_name}.data")
break
if extracted_tensor is None and hasattr(child_module, 'tensor_data') and isinstance(getattr(child_module, 'tensor_data'), torch.Tensor):
extracted_tensor = child_module.tensor_data
print(f"DEBUG JIT EXTRACTION: Found tensor in child {child_name}.tensor_data")
break
if extracted_tensor is None: # Check general members of child if direct attributes fail
try:
for name, member_obj in inspect.getmembers(child_module):
if isinstance(member_obj, torch.Tensor):
print(f"DEBUG JIT EXTRACTION: Found tensor in member {name} of child {child_name}")
extracted_tensor = member_obj; break
if extracted_tensor is not None: break
except Exception as e_child_inspect:
print(f"DEBUG JIT EXTRACTION: inspect.getmembers on child {child_name} failed: {e_child_inspect}")
if extracted_tensor is not None:
print(f"DEBUG JIT EXTRACTION: Tensor found in a child module.")
else:
print(f"DEBUG JIT EXTRACTION: Tensor not found in direct children.")
except Exception as e_children:
print(f"DEBUG JIT EXTRACTION: Error iterating named_children: {e_children}")
if isinstance(extracted_tensor, torch.Tensor):
print(f"DEBUG JIT EXTRACTION: Successfully extracted tensor of shape {extracted_tensor.shape} from JIT module {module_path}")
return extracted_tensor.to(dev)
else:
print(f"ERROR: C++ tensor loaded from {file_path_obj} with torch.jit.load is not a Tensor or ScriptModule. Type: {type(loaded_obj)}")
print(f"Warning: JIT EXTRACTION: Could not extract tensor from JIT module: {module_path}. Final extracted_type: {type(extracted_tensor)}. THIS FILE WILL BE SKIPPED.")
return None
if attempt_jit_extraction:
# If primary_jit_load_needed was true, loaded_object_from_direct_load might be the JIT module already.
# Otherwise, we need to load it with torch.jit.load.
# The critical part is that C++ outputs are *always* JIT modules now if not raw tensors.
tensor = actual_tensor.to(device).float() # Ensure tensor is on the correct device and float
return tensor
except Exception as e:
# If torch.jit.load fails (e.g. it's a plain tensor not loadable by JIT), try torch.load
# This also catches errors from the processing above if actual_tensor remains None
# print(f"INFO: torch.jit.load failed for {file_path_obj} ({e}), attempting torch.load as fallback.")
try:
tensor = torch.load(str(file_path_obj), map_location=device) # Convert Path to str for torch.load
if not isinstance(tensor, torch.Tensor):
print(f"ERROR: Fallback torch.load for {file_path_obj} did not return a tensor. Type: {type(tensor)}")
jit_module_to_process = None
if loaded_object_from_direct_load is not None and not isinstance(loaded_object_from_direct_load, torch.Tensor):
# This means torch.load succeeded but returned a JIT module directly
# (common for files saved with torch.jit.save that are actually modules)
print(f"DEBUG JIT: Using object from initial torch.load (type: {type(loaded_object_from_direct_load)}) for JIT extraction for {file_path_str}.")
jit_module_to_process = loaded_object_from_direct_load
else:
# This means initial torch.load either failed OR it was an image and returned a JIT module (handled above),
# OR it was not an image and returned a tensor (already returned).
# So, if we are here, it means torch.load failed, or we need to fresh load as JIT.
try:
print(f"DEBUG JIT: Attempting torch.jit.load for {file_path_str} as fallback/primary JIT path.")
jit_module_to_process = torch.jit.load(file_path_str, map_location=device)
except Exception as e_jit_load_explicit:
print(f"Error: torch.jit.load also failed for {file_path_str}: {e_jit_load_explicit}. Traceback: {traceback.format_exc()}. SKIPPING.")
return None
return tensor.to(device).float() # Ensure tensor is on the correct device and float
except Exception as e2:
print(f"ERROR: Failed to load C++ tensor from {file_path_obj}. JIT load error: {e}. Torch load error: {e2}")
import traceback
traceback.print_exc()
if jit_module_to_process is not None:
final_tensor = extract_tensor_from_jit_module(file_path_str, jit_module_to_process, device)
if final_tensor is not None:
return final_tensor
else:
print(f"Warning: JIT extraction path for {file_path_str} (using {type(jit_module_to_process)}) failed to extract tensor. SKIPPING file.")
return None
else:
# This case should be rare if torch.jit.load was attempted and failed, as it would have returned None above.
print(f"Warning: jit_module_to_process is None for {file_path_str} before calling extraction. SKIPPING file.")
return None
def _compare_tensor_data(self, tensor1, tensor2, name, sample_idx, current_errors):
"""Compare two tensors and return error metrics."""
# If we reach here, it means initial torch.load returned a tensor (and it was returned),
# or all attempts to load and extract have failed.
print(f"Warning: load_cpp_tensor is returning None for {file_path_str} after all attempts. This indicates an issue with file content or loading logic for this specific file type when is_image={is_image}.")
return None
def _compare_tensor_data(self, tensor1, tensor2, name, sample_idx, current_errors_dict_to_populate):
"""Compare two tensors and return error metrics. Modifies current_errors_dict_to_populate in place."""
num_metrics = 11 # mae, max_err, diff_arr, mean_py_val, std_abs_err, l2_py, l2_cpp, l2_diff, cos_sim, pearson, mre
nan_metrics_tuple = (
float('nan'), float('nan'), [], float('nan'), float('nan'), # Original 5
@ -1280,27 +1584,26 @@ class ComparisonRunner:
t1_cpu_temp = tensor1.cpu().detach().numpy().astype(np.float32)
py_mean = np.mean(t1_cpu_temp)
py_l2 = np.linalg.norm(t1_cpu_temp.flatten())
# If only tensor2 is None, we can't calculate C++ l2 or comparison metrics
# If only tensor1 is None, py_mean and py_l2 remain NaN.
current_errors[name] = (
# Populate current_errors_dict_to_populate directly
current_errors_dict_to_populate[name] = (
float('nan'), float('nan'), [], py_mean, float('nan'),
py_l2, float('nan'), float('nan'), float('nan'), float('nan'), float('nan')
)
print(f"Warning: Cannot compare '{name}' for sample {sample_idx}, one or both tensors are None.")
return
return # Return None as the function modifies dict in place
t1_cpu = tensor1.cpu().detach().numpy().astype(np.float32)
t2_cpu = tensor2.cpu().detach().numpy().astype(np.float32)
if t1_cpu.shape != t2_cpu.shape:
print(f"Warning: Shape mismatch for '{name}' sample {sample_idx}. Py: {t1_cpu.shape}, Cpp: {t2_cpu.shape}. Skipping most comparisons.")
current_errors[name] = (
current_errors_dict_to_populate[name] = (
float('nan'), float('nan'), [], np.mean(t1_cpu), float('nan'), # MAE, MaxErr, diff_arr, MeanPy, StdAbsErr
np.linalg.norm(t1_cpu.flatten()), np.linalg.norm(t2_cpu.flatten()), float('nan'), # L2Py, L2Cpp, L2Diff
float('nan'), float('nan'), float('nan') # CosSim, Pearson, MRE
)
return
return # Return None
# All calculations from here assume shapes match and tensors are not None
t1_flat = t1_cpu.flatten()
@ -1355,7 +1658,8 @@ class ComparisonRunner:
# Using (abs(t1_cpu) + epsilon) in denominator handles this.
mean_rel_err = np.mean(abs_diff_elements / (np.abs(t1_cpu) + epsilon_rel_err))
current_errors[name] = (
# Populate current_errors_dict_to_populate directly
current_errors_dict_to_populate[name] = (
mae, max_err, diff_arr_for_hist, mean_py_val, std_abs_err,
l2_norm_py, l2_norm_cpp, l2_norm_diff, cosine_sim, pearson_corr, mean_rel_err
)

13
test/test_models.cpp

@ -127,7 +127,10 @@ int main(int argc, char* argv[]) {
std::vector<std::string> output_layers_resnet = {
"conv1_output", "bn1_output", "relu1_output", "maxpool_output",
"layer1", "layer2", "layer3", "layer4", "features",
"layer1_0_shortcut_output", "layer1_0_block_output"
"layer1_0_shortcut_output", "layer1_0_block_output",
"debug_resnet_conv1_output_for_bn1_input",
// New BN1 intermediate outputs
"bn1_centered_x", "bn1_variance_plus_eps", "bn1_inv_std", "bn1_normalized_x"
};
resnet_model_opt.emplace(cimp::resnet::resnet50(resnet_weights_path, output_layers_resnet, device));
(*resnet_model_opt)->to(device);
@ -291,6 +294,14 @@ int main(int argc, char* argv[]) {
} else {
std::cerr << " Skipping BBRegressor predict_iou for sample " << i << " (iou_feats or mod_vectors empty)." << std::endl;
}
// Save debug intermediate outputs
torch::Tensor cpp_conv3_1t_out = (*bb_regressor_model_opt_wrapped).debug_get_conv3_1t_output(resnet_outputs["layer2"].clone());
save_tensor_to_file(cpp_conv3_1t_out, (bb_reg_out_dir / (sample_suffix + "_debug_conv3_1t_output.pt")).string());
torch::Tensor cpp_conv4_1t_out = (*bb_regressor_model_opt_wrapped).debug_get_conv4_1t_output(resnet_outputs["layer3"].clone());
save_tensor_to_file(cpp_conv4_1t_out, (bb_reg_out_dir / (sample_suffix + "_debug_conv4_1t_output.pt")).string());
std::cout << "BBRegressor processing done for sample " << i << std::endl;
} catch (const std::exception& e) {
std::cerr << "Error during BBRegressor processing for sample " << i << ": " << e.what() << std::endl;

Loading…
Cancel
Save