import math import torch.nn as nn from collections import OrderedDict import ltr.models.target_classifier.linear_filter as target_clf import ltr.models.target_classifier.features as clf_features import ltr.models.target_classifier.initializer as clf_initializer import ltr.models.target_classifier.optimizer as clf_optimizer import ltr.models.bbreg as bbmodels import ltr.models.backbone as backbones from ltr import model_constructor class DiMPnet(nn.Module): """The DiMP network. args: feature_extractor: Backbone feature extractor network. Must return a dict of feature maps classifier: Target classification module. bb_regressor: Bounding box regression module. classification_layer: Name of the backbone feature layer to use for classification. bb_regressor_layer: Names of the backbone layers to use for bounding box regression.""" def __init__(self, feature_extractor, classifier, bb_regressor, classification_layer, bb_regressor_layer): super().__init__() self.feature_extractor = feature_extractor self.classifier = classifier self.bb_regressor = bb_regressor self.classification_layer = [classification_layer] if isinstance(classification_layer, str) else classification_layer self.bb_regressor_layer = bb_regressor_layer self.output_layers = sorted(list(set(self.classification_layer + self.bb_regressor_layer))) def get_backbone_clf_feat(self, backbone_feat): feat = OrderedDict({l: backbone_feat[l] for l in self.classification_layer}) if len(self.classification_layer) == 1: return feat[self.classification_layer[0]] return feat def get_backbone_bbreg_feat(self, backbone_feat): return [backbone_feat[l] for l in self.bb_regressor_layer] def extract_classification_feat(self, backbone_feat): return self.classifier.extract_classification_feat(self.get_backbone_clf_feat(backbone_feat)) def extract_backbone_features(self, im, layers=None): if layers is None: layers = self.output_layers return self.feature_extractor(im, layers) @model_constructor def dimpnet50(filter_size=1, optim_iter=5, optim_init_step=1.0, optim_init_reg=0.01, classification_layer='layer3', feat_stride=16, backbone_pretrained=True, clf_feat_blocks=0, clf_feat_norm=True, init_filter_norm=False, final_conv=True, out_feature_dim=512, init_gauss_sigma=1.0, num_dist_bins=5, bin_displacement=1.0, mask_init_factor=4.0, iou_input_dim=(256, 256), iou_inter_dim=(256, 256), score_act='relu', act_param=None, target_mask_act='sigmoid', detach_length=float('Inf'), frozen_backbone_layers=()): # Backbone backbone_net = backbones.resnet50(pretrained=backbone_pretrained, frozen_layers=frozen_backbone_layers) # Feature normalization norm_scale = math.sqrt(1.0 / (out_feature_dim * filter_size * filter_size)) # Classifier features if classification_layer == 'layer3': feature_dim = 256 elif classification_layer == 'layer4': feature_dim = 512 else: raise Exception clf_feature_extractor = clf_features.residual_bottleneck(feature_dim=feature_dim, num_blocks=clf_feat_blocks, l2norm=clf_feat_norm, final_conv=final_conv, norm_scale=norm_scale, out_dim=out_feature_dim) # Initializer for the DiMP classifier initializer = clf_initializer.FilterInitializerLinear(filter_size=filter_size, filter_norm=init_filter_norm, feature_dim=out_feature_dim) # Optimizer for the DiMP classifier optimizer = clf_optimizer.DiMPSteepestDescentGN(num_iter=optim_iter, feat_stride=feat_stride, init_step_length=optim_init_step, init_filter_reg=optim_init_reg, init_gauss_sigma=init_gauss_sigma, num_dist_bins=num_dist_bins, bin_displacement=bin_displacement, mask_init_factor=mask_init_factor, score_act=score_act, act_param=act_param, mask_act=target_mask_act, detach_length=detach_length) # The classifier module classifier = target_clf.LinearFilter(filter_size=filter_size, filter_initializer=initializer, filter_optimizer=optimizer, feature_extractor=clf_feature_extractor) # Bounding box regressor bb_regressor = bbmodels.AtomIoUNet(input_dim=(4*128,4*256), pred_input_dim=iou_input_dim, pred_inter_dim=iou_inter_dim) # DiMP network net = DiMPnet(feature_extractor=backbone_net, classifier=classifier, bb_regressor=bb_regressor, classification_layer=classification_layer, bb_regressor_layer=['layer2', 'layer3']) return net