#!/usr/bin/env python3 import os import torch import numpy as np import glob import matplotlib.pyplot as plt from pathlib import Path import sys import json from tqdm import tqdm import inspect # Add the project root to path sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) # Import model wrappers from pytracking.features.net_wrappers import DiMPTorchScriptWrapper class ModelComparison: def __init__(self, model_dir='exported_weights', num_samples=1000): self.model_dir = model_dir self.num_samples = num_samples self.device = 'cuda' if torch.cuda.is_available() else 'cpu' # Initialize comparison results self.comparison_dir = Path('test') / 'comparison' self.comparison_dir.mkdir(parents=True, exist_ok=True) self.plots_dir = self.comparison_dir / 'plots' # plots_dir initialized here # Initialize models self._init_models() def _init_models(self): """Initialize Python models""" print("Loading Python models...") # Load DiMP components self.models = DiMPTorchScriptWrapper( model_dir=self.model_dir, device=self.device, backbone_sd='backbone', # Directory with backbone weights classifier_sd='classifier', # Directory with classifier weights bbregressor_sd='bb_regressor' # Directory with bbox regressor weights ) def compare_classifier(self): """Compare classifier model outputs between Python and C++""" print("\nComparing classifier outputs...") # Ensure paths are Path objects for consistency if not already input_dir_path = Path('test') / 'input_samples' / 'classifier' cpp_output_dir_path = Path('test') / 'output' / 'classifier' if not input_dir_path.exists() or not cpp_output_dir_path.exists(): print(f"Classifier input or C++ output directory not found ({input_dir_path}, {cpp_output_dir_path}). Skipping.") return # Removed: train_errors = [] # Removed: test_errors = [] # self.all_errors_stats is initialized per test run. # Compare training samples print("\nClassifier - Comparing Training Samples...") for i in tqdm(range(self.num_samples), desc="Training samples"): current_errors = {} # For this sample sample_dir = input_dir_path / f'sample_{i}' cpp_out_sample_dir = cpp_output_dir_path / f'sample_{i}' py_clf_feat = None cpp_clf_feat = None if not sample_dir.exists() or not cpp_out_sample_dir.exists(): print(f"Warning: Skipping classifier train sample {i}, files not found at {sample_dir} or {cpp_out_sample_dir}.") # No explicit error assignment here; _compare_tensor_data will handle Nones else: feat_path = sample_dir / 'backbone_feat.pt' feat = self.load_cpp_tensor(feat_path, self.device) if feat is None: print(f"Critical: Failed to load input tensor for {feat_path} for classifier train sample {i}.") # feat is None, py_clf_feat will remain None else: try: with torch.no_grad(): py_clf_feat = self.models.classifier.extract_classification_feat(feat) except Exception as e: print(f"ERROR: Python model extract_classification_feat (train) failed for sample {i}: {e}") # py_clf_feat remains None cpp_clf_feat_path = cpp_out_sample_dir / 'clf_features.pt' cpp_clf_feat = self.load_cpp_tensor(cpp_clf_feat_path, self.device) if cpp_clf_feat is None: print(f"Warning: Failed to load C++ output tensor {cpp_clf_feat_path} for classifier train sample {i}.") # cpp_clf_feat remains None self._compare_tensor_data(py_clf_feat, cpp_clf_feat, "Classifier Features Train", i, current_errors) if current_errors: self.all_errors_stats[f"Clf_Train_Sample_{i}"] = current_errors # Compare test samples print("\nClassifier - Comparing Test Samples...") for i in tqdm(range(self.num_samples), desc="Test samples"): current_errors = {} # For this sample test_sample_input_dir = input_dir_path / f'test_{i}' cpp_test_out_sample_dir = cpp_output_dir_path / f'test_{i}' py_clf_feat_test = None cpp_clf_feat_test = None if not test_sample_input_dir.exists() or not cpp_test_out_sample_dir.exists(): print(f"Warning: Skipping classifier test sample {i}, files not found at {test_sample_input_dir} or {cpp_test_out_sample_dir}.") # No explicit error assignment here else: test_feat_path = test_sample_input_dir / 'test_feat.pt' test_feat = self.load_cpp_tensor(test_feat_path, self.device) if test_feat is None: print(f"Critical: Failed to load input tensor for {test_feat_path} for classifier test sample {i}.") # test_feat is None, py_clf_feat_test remains None else: try: with torch.no_grad(): py_clf_feat_test = self.models.classifier.extract_classification_feat(test_feat) except Exception as e: print(f"ERROR: Python model extract_classification_feat (test) failed for sample {i}: {e}") # py_clf_feat_test remains None cpp_clf_feat_test_path = cpp_test_out_sample_dir / 'clf_feat_test.pt' cpp_clf_feat_test = self.load_cpp_tensor(cpp_clf_feat_test_path, self.device) if cpp_clf_feat_test is None: print(f"Warning: Failed to load C++ output tensor {cpp_clf_feat_test_path} for classifier test sample {i}.") # cpp_clf_feat_test remains None self._compare_tensor_data(py_clf_feat_test, cpp_clf_feat_test, "Classifier Features Test", i, current_errors) if current_errors: self.all_errors_stats[f"Clf_Test_Sample_{i}"] = current_errors # Old stats and plotting code removed/commented below, now handled by HTML report # print("\nClassifier Comparison Statistics:") # if train_errors: # print(f" Training Features MAE: Mean={np.mean(train_errors):.4e}, Std={np.std(train_errors):.4e}") # if test_errors: # print(f" Test Features MAE: Mean={np.mean(test_errors):.4e}, Std={np.std(test_errors):.4e}") # self._generate_stats_and_plots(train_errors, "Classifier Training Features Error", self.plots_dir / "clf_train_feat_error_hist.png") # self._generate_stats_and_plots(test_errors, "Classifier Test Features Error", self.plots_dir / "clf_test_feat_error_hist.png") def compare_bb_regressor(self): """Compare bb_regressor model outputs between Python and C++""" print("\nComparing bb_regressor outputs...") input_dir = Path('test') / 'input_samples' / 'bb_regressor' cpp_output_dir = Path('test') / 'output' / 'bb_regressor' if not input_dir.exists() or not cpp_output_dir.exists(): print(f"BB Regressor input or C++ output directory not found ({input_dir}, {cpp_output_dir}). Skipping.") return for i in tqdm(range(self.num_samples), desc="BB Regressor samples"): sample_dir = input_dir / f'sample_{i}' cpp_output_sample_dir = cpp_output_dir / f'sample_{i}' # Load input tensors for BB Regressor for this sample feat_layer2_path = sample_dir / 'feat_layer2.pt' feat_layer3_path = sample_dir / 'feat_layer3.pt' init_bbox_path = sample_dir / 'init_bbox.pt' proposals_path = sample_dir / 'proposals.pt' feat_layer2 = self.load_cpp_tensor(feat_layer2_path, self.device) feat_layer3 = self.load_cpp_tensor(feat_layer3_path, self.device) init_bbox = self.load_cpp_tensor(init_bbox_path, self.device) proposals = self.load_cpp_tensor(proposals_path, self.device) if any(t is None for t in [feat_layer2, feat_layer3, init_bbox, proposals]): print(f"Critical: Failed to load one or more BB Regressor input tensors for sample {i}. Skipping.") continue backbone_feat_tuple = (feat_layer2, feat_layer3) # Define the tuple for clarity # Get IoU features from Python model # self.models.get_backbone_bbreg_feat calls self.bb_regressor.get_iou_feat with torch.no_grad(): py_iou_feat = self.models.get_backbone_bbreg_feat({"layer2": feat_layer2, "layer3": feat_layer3}) # Get modulation vectors squeezed_init_bbox = init_bbox if init_bbox is not None and init_bbox.dim() == 3 and init_bbox.shape[1] == 1: squeezed_init_bbox = init_bbox.squeeze(1) with torch.no_grad(): # Pass original backbone features to get_modulation py_modulation = self.models.bb_regressor.get_modulation(backbone_feat_tuple, squeezed_init_bbox) # DEBUG: Print shapes print(f"Sample {i}: py_iou_feat[0] shape: {py_iou_feat[0].shape}, py_modulation[0] shape: {py_modulation[0].shape}") print(f"Sample {i}: py_iou_feat[1] shape: {py_iou_feat[1].shape}, py_modulation[1] shape: {py_modulation[1].shape}") # Predict IoU (Python model) py_iou_pred = None try: with torch.no_grad(): py_iou_pred = self.models.bb_regressor.predict_iou(py_modulation, py_iou_feat, proposals) except RuntimeError as e: print(f"WARNING: Python model self.models.bb_regressor.predict_iou failed for sample {i}: {e}") # Load C++ outputs cpp_iou_pred_path = cpp_output_sample_dir / 'iou_pred.pt' cpp_modulation_0_path = cpp_output_sample_dir / 'modulation_0.pt' cpp_modulation_1_path = cpp_output_sample_dir / 'modulation_1.pt' cpp_feat_0_path = cpp_output_sample_dir / 'iou_feat_0.pt' cpp_feat_1_path = cpp_output_sample_dir / 'iou_feat_1.pt' cpp_iou_pred = self.load_cpp_tensor(cpp_iou_pred_path, self.device) cpp_modulation_0 = self.load_cpp_tensor(cpp_modulation_0_path, self.device) cpp_modulation_1 = self.load_cpp_tensor(cpp_modulation_1_path, self.device) cpp_feat_0 = self.load_cpp_tensor(cpp_feat_0_path, self.device) cpp_feat_1 = self.load_cpp_tensor(cpp_feat_1_path, self.device) current_errors = {} # Store errors for this sample for the HTML report # Compare IoU features (py_iou_feat vs cpp_feat_0/1) # _compare_tensor_data will handle None inputs appropriately py_iou_f0 = py_iou_feat[0] if py_iou_feat and len(py_iou_feat) > 0 else None py_iou_f1 = py_iou_feat[1] if py_iou_feat and len(py_iou_feat) > 1 else None self._compare_tensor_data(py_iou_f0, cpp_feat_0, "BBReg PyIoUFeat0 vs CppIoUFeat0", i, current_errors) self._compare_tensor_data(py_iou_f1, cpp_feat_1, "BBReg PyIoUFeat1 vs CppIoUFeat1", i, current_errors) # Compare modulation vectors (py_modulation vs cpp_modulation_0/1) py_mod_0 = py_modulation[0] if py_modulation and len(py_modulation) > 0 else None py_mod_1 = py_modulation[1] if py_modulation and len(py_modulation) > 1 else None self._compare_tensor_data(py_mod_0, cpp_modulation_0, "BBReg PyMod0 vs CppMod0", i, current_errors) self._compare_tensor_data(py_mod_1, cpp_modulation_1, "BBReg PyMod1 vs CppMod1", i, current_errors) # Compare final IoU prediction # _compare_tensor_data will handle None for py_iou_pred or cpp_iou_pred self._compare_tensor_data(py_iou_pred, cpp_iou_pred, "BBReg IoUPred", i, current_errors) if current_errors: # Add to overall statistics if any comparisons were made/attempted self.all_errors_stats[f"BBReg_Sample_{i}"] = current_errors # Note: MAE accumulation for overall average needs to be selective based on valid comparisons # For simplicity, we'll let the HTML report show NaNs for failed/skipped comparisons. if not self.all_errors_stats: # Check if any BB regressor comparisons were made print("No BB Regressor comparisons were performed for this model type.") # Clarified message # No plots or stats if nothing was compared for BB regressor return # The following old averaging and plotting is now handled by generate_html_report using all_errors_stats # print("\nBB Regressor Comparison Statistics:") # if iou_pred_errors: # print(f" IoU Prediction MAE: Mean={np.mean(iou_pred_errors):.4e}, Std={np.std(iou_pred_errors):.4e}") # if modulation_errors: # print(f" Modulation MAE: Mean={np.mean(modulation_errors):.4e}, Std={np.std(modulation_errors):.4e}") # if feat_errors: # print(f" IoU Feature MAE: Mean={np.mean(feat_errors):.4e}, Std={np.std(feat_errors):.4e}") # # Plots - these would need to be rethought with the new error structure # self._generate_stats_and_plots(iou_pred_errors, "BB Regressor IoU Prediction Error", self.plots_dir / "bbreg_iou_pred_error_hist.png") # self._generate_stats_and_plots(modulation_errors, "BB Regressor Modulation Error", self.plots_dir / "bbreg_modulation_error_hist.png") # self._generate_stats_and_plots(feat_errors, "BB Regressor IoU Feature Error", self.plots_dir / "bbreg_feature_error_hist.png") def generate_html_report(self): print("\nGenerating HTML report...") report_path = self.comparison_dir / "report.html" # plot_paths_dict = {} # This variable was unused # Prepare data for the report: group by model and comparison type report_data = { # "Model_Type Component_Name": { \ # "samples": {0: {\"mae\":X, \"max_err\":Y, \"mean_py\":Z, \"std_err\":S, \"plot_path\":\"...\"}, 1:{...} },\n # "overall_mae_mean": A, "overall_mae_std": B, "overall_max_err_mean": C\n # }\n } } for sample_key, comparisons in self.all_errors_stats.items(): # sample_key examples: "Clf_Train_Sample_0", "Clf_Test_Sample_0", "BBReg_Sample_0" parts = sample_key.split("_") model_prefix = parts[0] # Clf, BBReg sample_type_str = "" sample_idx = -1 if model_prefix == "Clf": sample_type_str = parts[1] # Train or Test sample_idx = int(parts[-1]) model_name_key = f"Classifier {sample_type_str}" elif model_prefix == "BBReg": sample_idx = int(parts[-1]) model_name_key = "BB Regressor" else: print(f"WARNING: Unknown sample key format in all_errors_stats: {sample_key}") continue for comparison_name, stats in comparisons.items(): # comparison_name examples: "Classifier Features Train", "BBReg PyIoUFeat0 vs CppIoUFeat0" # Unpack all 11 metrics now mae, max_err, diff_arr, mean_py_val, std_abs_err, \ l2_py, l2_cpp, l2_diff, cos_sim, pearson, mre = stats full_comparison_key = f"{model_name_key} - {comparison_name}" if full_comparison_key not in report_data: report_data[full_comparison_key] = { "samples": {}, "all_maes": [], "all_max_errs": [], "all_mean_py_vals": [], "all_std_abs_errs": [], # Renamed from all_std_errs "all_l2_py_vals": [], "all_l2_cpp_vals": [], "all_l2_diff_vals": [], "all_cos_sim_vals": [], "all_pearson_vals": [], "all_mre_vals": [] } plot_filename = None if diff_arr is not None and len(diff_arr) > 0 and not np.all(np.isnan(diff_arr)): plot_filename = f"{model_prefix}_{sample_type_str}_sample{sample_idx}_{comparison_name.replace(' ', '_').replace('/', '_')}_hist.png" plot_abs_path = self.plots_dir / plot_filename # Pass std_abs_err to plotting function self._generate_single_plot(diff_arr, comparison_name, plot_abs_path, mean_py_val, std_abs_err, mae, max_err) report_data[full_comparison_key]["samples"][sample_idx] = { "mae": mae, "max_err": max_err, "mean_py_val": mean_py_val, "std_abs_err": std_abs_err, # Renamed from std_err "l2_py": l2_py, "l2_cpp": l2_cpp, "l2_diff": l2_diff, "cos_sim": cos_sim, "pearson": pearson, "mre": mre, "plot_path": plot_filename # Store relative path for HTML } if not np.isnan(mae): report_data[full_comparison_key]["all_maes"].append(mae) if not np.isnan(max_err): report_data[full_comparison_key]["all_max_errs"].append(max_err) if not np.isnan(mean_py_val): report_data[full_comparison_key]["all_mean_py_vals"].append(mean_py_val) if not np.isnan(std_abs_err): report_data[full_comparison_key]["all_std_abs_errs"].append(std_abs_err) if not np.isnan(l2_py): report_data[full_comparison_key]["all_l2_py_vals"].append(l2_py) if not np.isnan(l2_cpp): report_data[full_comparison_key]["all_l2_cpp_vals"].append(l2_cpp) if not np.isnan(l2_diff): report_data[full_comparison_key]["all_l2_diff_vals"].append(l2_diff) if not np.isnan(cos_sim): report_data[full_comparison_key]["all_cos_sim_vals"].append(cos_sim) if not np.isnan(pearson): report_data[full_comparison_key]["all_pearson_vals"].append(pearson) if not np.isnan(mre): report_data[full_comparison_key]["all_mre_vals"].append(mre) # Calculate overall stats for comp_key, data in report_data.items(): data["overall_mae_mean"] = np.mean(data["all_maes"]) if data["all_maes"] else float('nan') data["overall_mae_std"] = np.std(data["all_maes"]) if data["all_maes"] else float('nan') data["overall_max_err_mean"] = np.mean(data["all_max_errs"]) if data["all_max_errs"] else float('nan') data["overall_mean_py_val_mean"] = np.mean(data["all_mean_py_vals"]) if data["all_mean_py_vals"] else float('nan') data["overall_std_abs_err_mean"] = np.mean(data["all_std_abs_errs"]) if data["all_std_abs_errs"] else float('nan') # Renamed data["overall_l2_py_mean"] = np.mean(data["all_l2_py_vals"]) if data["all_l2_py_vals"] else float('nan') data["overall_l2_cpp_mean"] = np.mean(data["all_l2_cpp_vals"]) if data["all_l2_cpp_vals"] else float('nan') data["overall_l2_diff_mean"] = np.mean(data["all_l2_diff_vals"]) if data["all_l2_diff_vals"] else float('nan') data["overall_cos_sim_mean"] = np.mean(data["all_cos_sim_vals"]) if data["all_cos_sim_vals"] else float('nan') data["overall_pearson_mean"] = np.mean(data["all_pearson_vals"]) if data["all_pearson_vals"] else float('nan') data["overall_mre_mean"] = np.mean(data["all_mre_vals"]) if data["all_mre_vals"] else float('nan') # HTML Generation html_content = """
Number of samples per model component: {self.num_samples}
mean(abs(py - cpp))
). The "Mean MAE" in the summary table is the average of these MAEs over all samples for a given comparison.mean(max(abs(py - cpp)))
over samples).mean(mean(py_tensor_sample_N))
).abs(py - cpp)
) for each sample. The "Err Std" in plot titles is this value for that specific sample.py - cpp
) over all samples.dot(py, cpp) / (norm(py) * norm(cpp))
.mean(abs(py - cpp) / (abs(py) + epsilon))
. Epsilon is a small value to prevent division by zero.Comparison Key | Mean MAE | Std MAE | Mean Max Error | Mean Py Val | Mean Std Abs Err | Mean L2 Py | Mean L2 Cpp | Mean L2 Diff | Mean Cosine Sim | Mean Pearson Corr | Mean MRE |
---|---|---|---|---|---|---|---|---|---|---|---|
{comp_key} | {f"{data['overall_mae_mean']:.4e}" if not np.isnan(data['overall_mae_mean']) else 'N/A'} | {f"{data['overall_mae_std']:.4e}" if not np.isnan(data['overall_mae_std']) else 'N/A'} | {f"{data['overall_max_err_mean']:.4e}" if not np.isnan(data['overall_max_err_mean']) else 'N/A'} | {f"{data['overall_mean_py_val_mean']:.4e}" if not np.isnan(data['overall_mean_py_val_mean']) else 'N/A'} | {f"{data['overall_std_abs_err_mean']:.4e}" if not np.isnan(data['overall_std_abs_err_mean']) else 'N/A'} | {f"{data['overall_l2_py_mean']:.4e}" if not np.isnan(data['overall_l2_py_mean']) else 'N/A'} | {f"{data['overall_l2_cpp_mean']:.4e}" if not np.isnan(data['overall_l2_cpp_mean']) else 'N/A'} | {f"{data['overall_l2_diff_mean']:.4e}" if not np.isnan(data['overall_l2_diff_mean']) else 'N/A'} | {f"{data['overall_cos_sim_mean']:.4f}" if not np.isnan(data['overall_cos_sim_mean']) else 'N/A'} | {f"{data['overall_pearson_mean']:.4f}" if not np.isnan(data['overall_pearson_mean']) else 'N/A'} | {f"{data['overall_mre_mean']:.4e}" if not np.isnan(data['overall_mre_mean']) else 'N/A'} |
Overall Mean MAE: {f'{data["overall_mae_mean"]:.4e}' if not np.isnan(data['overall_mae_mean']) else 'N/A'}
""" html_content += "Sample Index | MAE | Max Error | Mean Py Val | Std Abs Err | L2 Py | L2 Cpp | L2 Diff | Cosine Sim | Pearson Corr | MRE | Error Distribution Plot |
---|---|---|---|---|---|---|---|---|---|---|---|
{sample_idx} | {f"{sample_data['mae']:.4e}" if not np.isnan(sample_data['mae']) else 'N/A'} | {f"{sample_data['max_err']:.4e}" if not np.isnan(sample_data['max_err']) else 'N/A'} | {f"{sample_data['mean_py_val']:.4e}" if not np.isnan(sample_data['mean_py_val']) else 'N/A'} | {f"{sample_data['std_abs_err']:.4e}" if not np.isnan(sample_data['std_abs_err']) else 'N/A'} | {f"{sample_data['l2_py']:.4e}" if not np.isnan(sample_data['l2_py']) else 'N/A'} | {f"{sample_data['l2_cpp']:.4e}" if not np.isnan(sample_data['l2_cpp']) else 'N/A'} | {f"{sample_data['l2_diff']:.4e}" if not np.isnan(sample_data['l2_diff']) else 'N/A'} | {f"{sample_data['cos_sim']:.4f}" if not np.isnan(sample_data['cos_sim']) else 'N/A'} | {f"{sample_data['pearson']:.4f}" if not np.isnan(sample_data['pearson']) else 'N/A'} | {f"{sample_data['mre']:.4e}" if not np.isnan(sample_data['mre']) else 'N/A'} | {img_tag} |