@@ -2164,8 +2164,16 @@ def _cfg(url: str = '', **kwargs) -> Dict[str, Any]:
21642164 input_size = (3 , 384 , 384 ), crop_pct = 1.0 ),
21652165 'vit_so150m_patch16_reg4_map_256.untrained' : _cfg (
21662166 input_size = (3 , 256 , 256 )),
2167- 'vit_so150m2_patch16_reg1_gap_256.untrained' : _cfg (
2168- input_size = (3 , 256 , 256 ), crop_pct = 0.95 ),
2167+ 'vit_so150m2_patch16_reg1_gap_256.sbb_e200_in12k_ft_in1k' : _cfg (
2168+ hf_hub_id = 'timm/' ,
2169+ input_size = (3 , 256 , 256 ), crop_pct = 1.0 ),
2170+ 'vit_so150m2_patch16_reg1_gap_256.sbb_e200_in12k' : _cfg (
2171+ hf_hub_id = 'timm/' ,
2172+ num_classes = 11821 ,
2173+ input_size = (3 , 256 , 256 ), crop_pct = 1.0 ),
2174+ 'vit_so150m2_patch16_reg1_gap_384.sbb_e200_in12k_ft_in1k' : _cfg (
2175+ hf_hub_id = 'timm/' ,
2176+ input_size = (3 , 384 , 384 ), crop_pct = 1.0 ),
21692177
21702178 'vit_intern300m_patch14_448.ogvl_dist' : _cfg (
21712179 hf_hub_id = 'timm/' ,
@@ -3518,6 +3526,18 @@ def vit_so150m2_patch16_reg1_gap_256(pretrained: bool = False, **kwargs) -> Visi
35183526 return model
35193527
35203528
3529+ @register_model
3530+ def vit_so150m2_patch16_reg1_gap_384 (pretrained : bool = False , ** kwargs ) -> VisionTransformer :
3531+ """ SO150M v2 (shape optimized, but diff than paper def, optimized for GPU) """
3532+ model_args = dict (
3533+ patch_size = 16 , embed_dim = 832 , depth = 21 , num_heads = 13 , mlp_ratio = 34 / 13 , init_values = 1e-5 ,
3534+ qkv_bias = False , class_token = False , reg_tokens = 1 , global_pool = 'avg' ,
3535+ )
3536+ model = _create_vision_transformer (
3537+ 'vit_so150m2_patch16_reg1_gap_384' , pretrained = pretrained , ** dict (model_args , ** kwargs ))
3538+ return model
3539+
3540+
35213541@register_model
35223542def vit_intern300m_patch14_448 (pretrained : bool = False , ** kwargs ) -> VisionTransformer :
35233543 model_args = dict (
0 commit comments