import collections.abc as container_abc from collections import OrderedDict from math import ceil, floor import torch import torch.nn as nn import torch.nn.functional as F from torch.utils import model_zoo from coordatt import CoordAtt # class CoordAtt(nn.Module): # def __init__(self, inp, oup, groups=32): # super(CoordAtt, self).__init__() # self.pool_h = nn.AdaptiveAvgPool2d((None, 1)) # self.pool_w = nn.AdaptiveAvgPool2d((1, None)) # mip = max(8, inp // groups) # self.conv1 = nn.Conv2d(inp, mip, kernel_size=1, stride=1, padding=0) # self.bn1 = nn.BatchNorm2d(mip) # self.conv2 = nn.Conv2d(mip, oup, kernel_size=1, stride=1, padding=0) # self.conv3 = nn.Conv2d(mip, oup, kernel_size=1, stride=1, padding=0) # self.relu = h_swish() # def forward(self, x): # identity = x # n,c,h,w = x.size() # x_h = self.pool_h(x) # x_w = self.pool_w(x).permute(0, 1, 3, 2) # y = torch.cat([x_h, x_w], dim=2) # y = self.conv1(y) # y = self.bn1(y) # y = self.relu(y) # x_h, x_w = torch.split(y, [h, w], dim=2) # x_w = x_w.permute(0, 1, 3, 2) # x_h = self.conv2(x_h).sigmoid() # x_w = self.conv3(x_w).sigmoid() # x_h = x_h.expand(-1, -1, h, w) # x_w = x_w.expand(-1, -1, h, w) # y = identity * x_w * x_h # return y def _pair(x): if isinstance(x, container_abc.Iterable): return x return (x, x) def torch_conv_out_spatial_shape(in_spatial_shape, kernel_size, stride): if in_spatial_shape is None: return None # in_spatial_shape -> [H,W] hin, win = _pair(in_spatial_shape) kh, kw = _pair(kernel_size) sh, sw = _pair(stride) # dilation and padding are ignored since they are always fixed in efficientnetV2 hout = int(floor((hin - kh - 1) / sh + 1)) wout = int(floor((win - kw - 1) / sw + 1)) return hout, wout def get_activation(act_fn: str, **kwargs): if act_fn in ('silu', 'swish'): return nn.SiLU(**kwargs) elif act_fn == 'relu': return nn.ReLU(**kwargs) elif act_fn == 'relu6': return nn.ReLU6(**kwargs) elif act_fn == 'elu': return nn.ELU(**kwargs) elif act_fn == 'leaky_relu': return nn.LeakyReLU(**kwargs) elif act_fn == 'selu': return nn.SELU(**kwargs) elif act_fn == 'mish': return nn.Mish(**kwargs) else: raise ValueError('Unsupported act_fn {}'.format(act_fn)) def round_filters(filters, width_coefficient, depth_divisor=8): """Round number of filters based on depth multiplier.""" min_depth = depth_divisor filters *= width_coefficient new_filters = max(min_depth, int(filters + depth_divisor / 2) // depth_divisor * depth_divisor) return int(new_filters) def round_repeats(repeats, depth_coefficient): """Round number of filters based on depth multiplier.""" return int(ceil(depth_coefficient * repeats)) class DropConnect(nn.Module): def __init__(self, rate=0.5): super(DropConnect, self).__init__() self.keep_prob = None self.set_rate(rate) def set_rate(self, rate): if not 0 <= rate < 1: raise ValueError("rate must be 0<=rate<1, got {} instead".format(rate)) self.keep_prob = 1 - rate def forward(self, x): if self.training: random_tensor = self.keep_prob + torch.rand([x.size(0), 1, 1, 1], dtype=x.dtype, device=x.device) binary_tensor = torch.floor(random_tensor) return torch.mul(torch.div(x, self.keep_prob), binary_tensor) else: return x class SamePaddingConv2d(nn.Module): def __init__(self, in_spatial_shape, in_channels, out_channels, kernel_size, stride, dilation=1, enforce_in_spatial_shape=False, **kwargs): super(SamePaddingConv2d, self).__init__() self._in_spatial_shape = _pair(in_spatial_shape) # e.g. throw exception if input spatial shape does not match in_spatial_shape # when calling self.forward() self.enforce_in_spatial_shape = enforce_in_spatial_shape kernel_size = _pair(kernel_size) stride = _pair(stride) dilation = _pair(dilation) in_height, in_width = self._in_spatial_shape filter_height, filter_width = kernel_size stride_heigth, stride_width = stride dilation_height, dilation_width = dilation out_height = int(ceil(float(in_height) / float(stride_heigth))) out_width = int(ceil(float(in_width) / float(stride_width))) pad_along_height = max((out_height - 1) * stride_heigth + filter_height + (filter_height - 1) * (dilation_height - 1) - in_height, 0) pad_along_width = max((out_width - 1) * stride_width + filter_width + (filter_width - 1) * (dilation_width - 1) - in_width, 0) pad_top = pad_along_height // 2 pad_bottom = pad_along_height - pad_top pad_left = pad_along_width // 2 pad_right = pad_along_width - pad_left paddings = (pad_left, pad_right, pad_top, pad_bottom) if any(p > 0 for p in paddings): self.zero_pad = nn.ZeroPad2d(paddings) else: self.zero_pad = None self.conv = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, dilation=dilation, **kwargs) self._out_spatial_shape = (out_height, out_width) @property def out_spatial_shape(self): return self._out_spatial_shape def check_spatial_shape(self, x): if x.size(2) != self._in_spatial_shape[0] or \ x.size(3) != self._in_spatial_shape[1]: raise ValueError( "Expected input spatial shape {}, got {} instead".format(self._in_spatial_shape, x.shape[2:])) def forward(self, x): if self.enforce_in_spatial_shape: self.check_spatial_shape(x) if self.zero_pad is not None: x = self.zero_pad(x) x = self.conv(x) return x class SqueezeExcitate(nn.Module): def __init__(self, in_channels, se_size, activation=None): super(SqueezeExcitate, self).__init__() self.dim_reduce = nn.Conv2d(in_channels=in_channels, out_channels=se_size, kernel_size=1) self.dim_restore = nn.Conv2d(in_channels=se_size, out_channels=in_channels, kernel_size=1) self.activation = F.relu if activation is None else activation def forward(self, x): inp = x x = F.adaptive_avg_pool2d(x, (1, 1)) x = self.dim_reduce(x) x = self.activation(x) x = self.dim_restore(x) x = torch.sigmoid(x) return torch.mul(inp, x) class MBConvBlockV2(nn.Module): def __init__(self, in_channels, out_channels, kernel_size, stride, expansion_factor, act_fn, act_kwargs=None, bn_epsilon=None, bn_momentum=None, se_size=None, drop_connect_rate=None, bias=False, tf_style_conv=False, in_spatial_shape=None): super().__init__() if act_kwargs is None: act_kwargs = {} exp_channels = in_channels * expansion_factor self.ops_lst = [] # expansion convolution if expansion_factor != 1: self.expand_conv = nn.Conv2d(in_channels=in_channels, out_channels=exp_channels, kernel_size=1, bias=bias) self.expand_bn = nn.BatchNorm2d(num_features=exp_channels, eps=bn_epsilon, momentum=bn_momentum) self.expand_act = get_activation(act_fn, **act_kwargs) self.ops_lst.extend([self.expand_conv, self.expand_bn, self.expand_act]) # depth-wise convolution if tf_style_conv: self.dp_conv = SamePaddingConv2d(in_spatial_shape=in_spatial_shape, in_channels=exp_channels, out_channels=exp_channels, kernel_size=kernel_size, stride=stride, groups=exp_channels, bias=bias) self.out_spatial_shape = self.dp_conv.out_spatial_shape else: self.dp_conv = nn.Conv2d(in_channels=exp_channels, out_channels=exp_channels, kernel_size=kernel_size, stride=stride, padding=1, groups=exp_channels, bias=bias) self.out_spatial_shape = torch_conv_out_spatial_shape(in_spatial_shape, kernel_size, stride) self.dp_bn = nn.BatchNorm2d(num_features=exp_channels, eps=bn_epsilon, momentum=bn_momentum) self.dp_act = get_activation(act_fn, **act_kwargs) self.ops_lst.extend([self.dp_conv, self.dp_bn, self.dp_act]) # Squeeze and Excitate if se_size is not None: self.se = SqueezeExcitate(exp_channels, se_size, activation=get_activation(act_fn, **act_kwargs)) self.ops_lst.append(self.se) # projection layer self.project_conv = nn.Conv2d(in_channels=exp_channels, out_channels=out_channels, kernel_size=1, bias=bias) self.project_bn = nn.BatchNorm2d(num_features=out_channels, eps=bn_epsilon, momentum=bn_momentum) # no activation function in projection layer self.ops_lst.extend([self.project_conv, self.project_bn]) self.skip_enabled = in_channels == out_channels and stride == 1 if self.skip_enabled and drop_connect_rate is not None: self.drop_connect = DropConnect(drop_connect_rate) self.ops_lst.append(self.drop_connect) def forward(self, x): inp = x for op in self.ops_lst: x = op(x) if self.skip_enabled: return x + inp else: return x class FusedMBConvBlockV2(nn.Module): def __init__(self, in_channels, out_channels, kernel_size, stride, expansion_factor, act_fn, act_kwargs=None, bn_epsilon=None, bn_momentum=None, se_size=None, drop_connect_rate=None, bias=False, tf_style_conv=False, in_spatial_shape=None): super().__init__() if act_kwargs is None: act_kwargs = {} exp_channels = in_channels * expansion_factor self.ops_lst = [] # expansion convolution expansion_out_shape = in_spatial_shape if expansion_factor != 1: if tf_style_conv: self.expand_conv = SamePaddingConv2d(in_spatial_shape=in_spatial_shape, in_channels=in_channels, out_channels=exp_channels, kernel_size=kernel_size, stride=stride, bias=bias) expansion_out_shape = self.expand_conv.out_spatial_shape else: self.expand_conv = nn.Conv2d(in_channels=in_channels, out_channels=exp_channels, kernel_size=kernel_size, padding=1, stride=stride, bias=bias) expansion_out_shape = torch_conv_out_spatial_shape(in_spatial_shape, kernel_size, stride) self.expand_bn = nn.BatchNorm2d(num_features=exp_channels, eps=bn_epsilon, momentum=bn_momentum) self.expand_act = get_activation(act_fn, **act_kwargs) self.ops_lst.extend([self.expand_conv, self.expand_bn, self.expand_act]) # Squeeze and Excitate if se_size is not None: self.se = SqueezeExcitate(exp_channels, se_size, activation=get_activation(act_fn, **act_kwargs)) self.ops_lst.append(self.se) # projection layer kernel_size = 1 if expansion_factor != 1 else kernel_size stride = 1 if expansion_factor != 1 else stride if tf_style_conv: self.project_conv = SamePaddingConv2d(in_spatial_shape=expansion_out_shape, in_channels=exp_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, bias=bias) self.out_spatial_shape = self.project_conv.out_spatial_shape else: self.project_conv = nn.Conv2d(in_channels=exp_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=1 if kernel_size > 1 else 0, bias=bias) self.out_spatial_shape = torch_conv_out_spatial_shape(expansion_out_shape, kernel_size, stride) self.project_bn = nn.BatchNorm2d(num_features=out_channels, eps=bn_epsilon, momentum=bn_momentum) self.ops_lst.extend( [self.project_conv, self.project_bn]) if expansion_factor == 1: self.project_act = get_activation(act_fn, **act_kwargs) self.ops_lst.append(self.project_act) self.skip_enabled = in_channels == out_channels and stride == 1 if self.skip_enabled and drop_connect_rate is not None: self.drop_connect = DropConnect(drop_connect_rate) self.ops_lst.append(self.drop_connect) def forward(self, x): inp = x for op in self.ops_lst: x = op(x) if self.skip_enabled: return x + inp else: return x class EfficientNetV2(nn.Module): _models = {'b0': {'num_repeat': [1, 2, 2, 3, 5, 8], 'kernel_size': [3, 3, 3, 3, 3, 3], 'stride': [1, 2, 2, 2, 1, 2], 'expand_ratio': [1, 4, 4, 4, 6, 6], 'in_channel': [32, 16, 32, 48, 96, 112], 'out_channel': [16, 32, 48, 96, 112, 192], 'se_ratio': [None, None, None, 0.25, 0.25, 0.25], 'conv_type': [1, 1, 1, 0, 0, 0], 'is_feature_stage': [False, True, True, False, True, True], 'width_coefficient': 1.0, 'depth_coefficient': 1.0, 'train_size': 192, 'eval_size': 224, 'dropout': 0.2, 'weight_url': 'https://api.onedrive.com/v1.0/shares/u!aHR0cHM6Ly8xZHJ2Lm1zL3UvcyFBdGlRcHc5VGNjZmlnUVBhWkZRcWNXR3dINmRLP2U9UUI5ZndH/root/content', 'model_name': 'efficientnet_v2_b0_21k_ft1k-a91e14c5.pth'}, 'b1': {'num_repeat': [1, 2, 2, 3, 5, 8], 'kernel_size': [3, 3, 3, 3, 3, 3], 'stride': [1, 2, 2, 2, 1, 2], 'expand_ratio': [1, 4, 4, 4, 6, 6], 'in_channel': [32, 16, 32, 48, 96, 112], 'out_channel': [16, 32, 48, 96, 112, 192], 'se_ratio': [None, None, None, 0.25, 0.25, 0.25], 'conv_type': [1, 1, 1, 0, 0, 0], 'is_feature_stage': [False, True, True, False, True, True], 'width_coefficient': 1.0, 'depth_coefficient': 1.1, 'train_size': 192, 'eval_size': 240, 'dropout': 0.2, 'weight_url': 'https://api.onedrive.com/v1.0/shares/u!aHR0cHM6Ly8xZHJ2Lm1zL3UvcyFBdGlRcHc5VGNjZmlnUVJnVGV5UndSY2J2amwtP2U9dTBiV1lO/root/content', 'model_name': 'efficientnet_v2_b1_21k_ft1k-58f4fb47.pth'}, 'b2': {'num_repeat': [1, 2, 2, 3, 5, 8], 'kernel_size': [3, 3, 3, 3, 3, 3], 'stride': [1, 2, 2, 2, 1, 2], 'expand_ratio': [1, 4, 4, 4, 6, 6], 'in_channel': [32, 16, 32, 48, 96, 112], 'out_channel': [16, 32, 48, 96, 112, 192], 'se_ratio': [None, None, None, 0.25, 0.25, 0.25], 'conv_type': [1, 1, 1, 0, 0, 0], 'is_feature_stage': [False, True, True, False, True, True], 'width_coefficient': 1.1, 'depth_coefficient': 1.2, 'train_size': 208, 'eval_size': 260, 'dropout': 0.3, 'weight_url': 'https://api.onedrive.com/v1.0/shares/u!aHR0cHM6Ly8xZHJ2Lm1zL3UvcyFBdGlRcHc5VGNjZmlnUVY4M2NySVFZbU41X0tGP2U9ZERZVmxK/root/content', 'model_name': 'efficientnet_v2_b2_21k_ft1k-db4ac0ee.pth'}, 'b3': {'num_repeat': [1, 2, 2, 3, 5, 8], 'kernel_size': [3, 3, 3, 3, 3, 3], 'stride': [1, 2, 2, 2, 1, 2], 'expand_ratio': [1, 4, 4, 4, 6, 6], 'in_channel': [32, 16, 32, 48, 96, 112], 'out_channel': [16, 32, 48, 96, 112, 192], 'se_ratio': [None, None, None, 0.25, 0.25, 0.25], 'conv_type': [1, 1, 1, 0, 0, 0], 'is_feature_stage': [False, True, True, False, True, True], 'width_coefficient': 1.2, 'depth_coefficient': 1.4, 'train_size': 240, 'eval_size': 300, 'dropout': 0.3, 'weight_url': 'https://api.onedrive.com/v1.0/shares/u!aHR0cHM6Ly8xZHJ2Lm1zL3UvcyFBdGlRcHc5VGNjZmlnUVpkamdZUzhhaDdtTTZLP2U9anA4VWN2/root/content', 'model_name': 'efficientnet_v2_b3_21k_ft1k-3da5874c.pth'}, 's': {'num_repeat': [2, 4, 4, 6, 9, 15], 'kernel_size': [3, 3, 3, 3, 3, 3], 'stride': [1, 2, 2, 2, 1, 2], 'expand_ratio': [1, 4, 4, 4, 6, 6], 'in_channel': [24, 24, 48, 64, 128, 160], 'out_channel': [24, 48, 64, 128, 160, 256], 'se_ratio': [None, None, None, 0.25, 0.25, 0.25], 'conv_type': [1, 1, 1, 0, 0, 0], 'is_feature_stage': [False, True, True, False, True, True], 'width_coefficient': 1.0, 'depth_coefficient': 1.0, 'train_size': 300, 'eval_size': 384, 'dropout': 0.2, 'weight_url': 'https://api.onedrive.com/v1.0/shares/u!aHR0cHM6Ly8xZHJ2Lm1zL3UvcyFBdGlRcHc5VGNjZmllbFF5VWJOZzd0cmhBbm8/root/content', 'model_name': 'efficientnet_v2_s_21k_ft1k-dbb43f38.pth'}, 'm': {'num_repeat': [3, 5, 5, 7, 14, 18, 5], 'kernel_size': [3, 3, 3, 3, 3, 3, 3], 'stride': [1, 2, 2, 2, 1, 2, 1], 'expand_ratio': [1, 4, 4, 4, 6, 6, 6], 'in_channel': [24, 24, 48, 80, 160, 176, 304], 'out_channel': [24, 48, 80, 160, 176, 304, 512], 'se_ratio': [None, None, None, 0.25, 0.25, 0.25, 0.25], 'conv_type': [1, 1, 1, 0, 0, 0, 0], 'is_feature_stage': [False, True, True, False, True, False, True], 'width_coefficient': 1.0, 'depth_coefficient': 1.0, 'train_size': 384, 'eval_size': 480, 'dropout': 0.3, 'weight_url': 'https://api.onedrive.com/v1.0/shares/u!aHR0cHM6Ly8xZHJ2Lm1zL3UvcyFBdGlRcHc5VGNjZmllN1ZDazRFb0o1bnlyNUE/root/content', 'model_name': 'efficientnet_v2_m_21k_ft1k-da8e56c0.pth'}, 'l': {'num_repeat': [4, 7, 7, 10, 19, 25, 7], 'kernel_size': [3, 3, 3, 3, 3, 3, 3], 'stride': [1, 2, 2, 2, 1, 2, 1], 'expand_ratio': [1, 4, 4, 4, 6, 6, 6], 'in_channel': [32, 32, 64, 96, 192, 224, 384], 'out_channel': [32, 64, 96, 192, 224, 384, 640], 'se_ratio': [None, None, None, 0.25, 0.25, 0.25, 0.25], 'conv_type': [1, 1, 1, 0, 0, 0, 0], 'is_feature_stage': [False, True, True, False, True, False, True], 'feature_stages': [1, 2, 4, 6], 'width_coefficient': 1.0, 'depth_coefficient': 1.0, 'train_size': 384, 'eval_size': 480, 'dropout': 0.4, 'weight_url': 'https://api.onedrive.com/v1.0/shares/u!aHR0cHM6Ly8xZHJ2Lm1zL3UvcyFBdGlRcHc5VGNjZmlmcmIyRHEtQTBhUTBhWVE/root/content', 'model_name': 'efficientnet_v2_l_21k_ft1k-08121eee.pth'}, 'xl': {'num_repeat': [4, 8, 8, 16, 24, 32, 8], 'kernel_size': [3, 3, 3, 3, 3, 3, 3], 'stride': [1, 2, 2, 2, 1, 2, 1], 'expand_ratio': [1, 4, 4, 4, 6, 6, 6], 'in_channel': [32, 32, 64, 96, 192, 256, 512], 'out_channel': [32, 64, 96, 192, 256, 512, 640], 'se_ratio': [None, None, None, 0.25, 0.25, 0.25, 0.25], 'conv_type': [1, 1, 1, 0, 0, 0, 0], 'is_feature_stage': [False, True, True, False, True, False, True], 'feature_stages': [1, 2, 4, 6], 'width_coefficient': 1.0, 'depth_coefficient': 1.0, 'train_size': 384, 'eval_size': 512, 'dropout': 0.4, 'weight_url': 'https://api.onedrive.com/v1.0/shares/u!aHR0cHM6Ly8xZHJ2Lm1zL3UvcyFBdGlRcHc5VGNjZmlmVXQtRHJLa21taUkxWkE/root/content', 'model_name': 'efficientnet_v2_xl_21k_ft1k-1fcc9744.pth'}} def __init__(self, model_name, in_channels=3, n_classes=1000, tf_style_conv=False, in_spatial_shape=None, activation='silu', activation_kwargs=None, bias=False, drop_connect_rate=0.2, dropout_rate=None, bn_epsilon=1e-3, bn_momentum=0.01, pretrained=False, progress=False, ): super().__init__() self.blocks = nn.ModuleList() self.model_name = model_name self.cfg = self._models[model_name] if tf_style_conv and in_spatial_shape is None: in_spatial_shape = self.cfg['eval_size'] activation_kwargs = {} if activation_kwargs is None else activation_kwargs dropout_rate = self.cfg['dropout'] if dropout_rate is None else dropout_rate _input_ch = in_channels self.feature_block_ids = [] # stem if tf_style_conv: self.stem_conv = SamePaddingConv2d( in_spatial_shape=in_spatial_shape, in_channels=in_channels, out_channels=round_filters(self.cfg['in_channel'][0], self.cfg['width_coefficient']), kernel_size=3, stride=2, bias=bias ) in_spatial_shape = self.stem_conv.out_spatial_shape else: self.stem_conv = nn.Conv2d( in_channels=in_channels, out_channels=round_filters(self.cfg['in_channel'][0], self.cfg['width_coefficient']), kernel_size=3, stride=2, padding=1, bias=bias ) self.stem_bn = nn.BatchNorm2d( num_features=round_filters(self.cfg['in_channel'][0], self.cfg['width_coefficient']), eps=bn_epsilon, momentum=bn_momentum) self.stem_act = get_activation(activation, **activation_kwargs) drop_connect_rates = self.get_dropconnect_rates(drop_connect_rate) stages = zip(*[self.cfg[x] for x in ['num_repeat', 'kernel_size', 'stride', 'expand_ratio', 'in_channel', 'out_channel', 'se_ratio', 'conv_type', 'is_feature_stage']]) idx = 0 for stage_args in stages: (num_repeat, kernel_size, stride, expand_ratio, in_channels, out_channels, se_ratio, conv_type, is_feature_stage) = stage_args in_channels = round_filters( in_channels, self.cfg['width_coefficient']) out_channels = round_filters( out_channels, self.cfg['width_coefficient']) num_repeat = round_repeats( num_repeat, self.cfg['depth_coefficient']) conv_block = MBConvBlockV2 if conv_type == 0 else FusedMBConvBlockV2 for _ in range(num_repeat): se_size = None if se_ratio is None else max(1, int(in_channels * se_ratio)) _b = conv_block(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, expansion_factor=expand_ratio, act_fn=activation, act_kwargs=activation_kwargs, bn_epsilon=bn_epsilon, bn_momentum=bn_momentum, se_size=se_size, drop_connect_rate=drop_connect_rates[idx], bias=bias, tf_style_conv=tf_style_conv, in_spatial_shape=in_spatial_shape ) self.blocks.append(_b) idx += 1 if tf_style_conv: in_spatial_shape = _b.out_spatial_shape in_channels = out_channels stride = 1 if is_feature_stage: self.feature_block_ids.append(idx - 1) head_conv_out_channels = round_filters(1280, self.cfg['width_coefficient']) self.head_conv = nn.Conv2d(in_channels=in_channels, out_channels=head_conv_out_channels, kernel_size=1, bias=bias) self.head_bn = nn.BatchNorm2d(num_features=head_conv_out_channels, eps=bn_epsilon, momentum=bn_momentum) self.head_act = get_activation(activation, **activation_kwargs) # self.CoordAtt1 = CoordAtt(head_conv_out_channels,head_conv_out_channels) self.dropout = nn.Dropout(p=dropout_rate) self.avpool = nn.AdaptiveAvgPool2d((1, 1)) self.fc = nn.Linear(head_conv_out_channels, n_classes) if pretrained: self._load_state(_input_ch, n_classes, progress, tf_style_conv) return def _load_state(self, in_channels, n_classes, progress, tf_style_conv): state_dict = model_zoo.load_url(self.cfg['weight_url'], progress=progress, file_name=self.cfg['model_name']) strict = True if not tf_style_conv: state_dict = OrderedDict( [(k.replace('.conv.', '.'), v) if '.conv.' in k else (k, v) for k, v in state_dict.items()]) if in_channels != 3: if tf_style_conv: state_dict.pop('stem_conv.conv.weight') else: state_dict.pop('stem_conv.weight') strict = False if n_classes != 1000: state_dict.pop('fc.weight') state_dict.pop('fc.bias') strict = False self.load_state_dict(state_dict, strict=strict) print("Model weights loaded successfully.") def get_dropconnect_rates(self, drop_connect_rate): nr = self.cfg['num_repeat'] dc = self.cfg['depth_coefficient'] total = sum(round_repeats(nr[i], dc) for i in range(len(nr))) return [drop_connect_rate * i / total for i in range(total)] def get_features(self, x): x = self.stem_act(self.stem_bn(self.stem_conv(x))) features = [] feat_idx = 0 for block_idx, block in enumerate(self.blocks): x = block(x) if block_idx == self.feature_block_ids[feat_idx]: features.append(x) feat_idx += 1 return features def forward(self, x): x = self.stem_act(self.stem_bn(self.stem_conv(x))) for block in self.blocks: x = block(x) x = self.head_act(self.head_bn(self.head_conv(x))) # x = self.CoordAtt1(x) x = self.dropout(torch.flatten(self.avpool(x), 1)) x = self.fc(x) return x