NVIDIA
diff --git a/‎docker/requirements-pip-pytorch.txt‎
Lines changed: 3 additions & 1 deletion b/‎docker/requirements-pip-pytorch.txt‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎nvidia_tao_pytorch/core/mmlab/mmclassification/classification_default_config.py‎
Lines changed: 2 additions & 0 deletions b/‎nvidia_tao_pytorch/core/mmlab/mmclassification/classification_default_config.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎nvidia_tao_pytorch/cv/classification/experiment_specs/export_clip_imagenet.yaml‎
Lines changed: 71 additions & 0 deletions b/‎nvidia_tao_pytorch/cv/classification/experiment_specs/export_clip_imagenet.yaml‎
Lines changed: 71 additions & 0 deletions
diff --git a/‎nvidia_tao_pytorch/cv/classification/experiment_specs/export_dinov2_imagenet.yaml‎
Lines changed: 69 additions & 0 deletions b/‎nvidia_tao_pytorch/cv/classification/experiment_specs/export_dinov2_imagenet.yaml‎
Lines changed: 69 additions & 0 deletions
diff --git a/‎nvidia_tao_pytorch/cv/classification/experiment_specs/inference_clip_imagenet.yaml‎
Lines changed: 69 additions & 0 deletions b/‎nvidia_tao_pytorch/cv/classification/experiment_specs/inference_clip_imagenet.yaml‎
Lines changed: 69 additions & 0 deletions
diff --git a/‎nvidia_tao_pytorch/cv/classification/experiment_specs/inference_dinov2_imagenet.yaml‎
Lines changed: 67 additions & 0 deletions b/‎nvidia_tao_pytorch/cv/classification/experiment_specs/inference_dinov2_imagenet.yaml‎
Lines changed: 67 additions & 0 deletions
diff --git a/‎nvidia_tao_pytorch/cv/classification/experiment_specs/test_clip_imagenet.yaml‎
Lines changed: 69 additions & 0 deletions b/‎nvidia_tao_pytorch/cv/classification/experiment_specs/test_clip_imagenet.yaml‎
Lines changed: 69 additions & 0 deletions
@@ -7,5 +7,7 @@ pytorch-lightning==1.8.5
 pytorch_metric_learning==1.7.1
 pytorch-msssim
 thop
-timm==0.6.11
+timm>=0.9.6.dev0
 torchmetrics==0.10.3
+open-clip-torch[training]==2.20.0
+ftfy
@@ -265,6 +265,8 @@ class BackboneConfig:
 
     type: str = "fan_tiny_8_p4_hybrid"
     custom_args: Optional[Dict[Any, Any]] = None
+    freeze: bool = False
+    pretrained: Optional[str] = None
 
 
 @dataclass
 
@@ -0,0 +1,71 @@
+results_dir: ???
+
+export:
+  checkpoint: "???"
+  onnx_file: "???"
+  verify: true
+
+dataset:
+  data:
+    samples_per_gpu: 512
+    workers_per_gpu: 8
+    train:
+      data_prefix: ???
+      pipeline: # Augmentations alone
+        - type: RandomResizedCrop
+          size: 224
+        - type: RandomFlip
+          flip_prob: 0.5
+          direction: "horizontal"
+        - type: ColorJitter
+          brightness: 0.4
+          contrast: 0.4
+          saturation: 0.4
+    val:
+      data_prefix: ???
+    test:
+      data_prefix: ???
+
+model:
+  backbone:
+    type: "open_clip"
+    custom_args:
+      model_name: "ViT-B-32"
+    freeze: true
+    pretrained: ???
+  init_cfg:
+    checkpoint: ???
+  head:
+    type: LinearClsHead
+    num_classes: 1000
+    in_channels: 512
+    loss:
+      type: CrossEntropyLoss
+      loss_weight: 1.0
+      use_soft: False
+    topk: [1, 5]
+
+train:
+  train_config:
+    find_unused_parameters: True
+    optimizer:
+      type: AdamW
+      lr: 1e-3
+    lr_config:
+      policy: CosineAnnealing
+      min_lr: 0
+      warmup: 'linear'
+      warmup_iters: 10
+      warmup_by_epoch: False
+    optimizer_config:
+      grad_clip:
+        max_norm: 5.0
+    runner:
+      max_epochs: 10
+    checkpoint_config:
+      interval: 1
+    logging:
+      interval: 50
+    validate: True
+    evaluation:
+      interval: 1
@@ -0,0 +1,69 @@
+results_dir: ???
+
+export:
+  checkpoint: "???"
+  onnx_file: "???"
+  verify: true
+
+dataset:
+  data:
+    samples_per_gpu: 512
+    workers_per_gpu: 8
+    train:
+      data_prefix: ???
+      pipeline: # Augmentations alone
+        - type: RandomResizedCrop
+          size: 224
+        - type: RandomFlip
+          flip_prob: 0.5
+          direction: "horizontal"
+        - type: ColorJitter
+          brightness: 0.4
+          contrast: 0.4
+          saturation: 0.4
+    val:
+      data_prefix: ???
+    test:
+      data_prefix: ???
+
+model:
+  backbone:
+    type: "vit_large_patch14_dinov2_swiglu"
+    freeze: true
+    pretrained: ???
+  init_cfg:
+    checkpoint: ???
+  head:
+    type: LinearClsHead
+    num_classes: 1000
+    in_channels: 1024
+    loss:
+      type: CrossEntropyLoss
+      loss_weight: 1.0
+      use_soft: False
+    topk: [1, 5]
+
+train:
+  train_config:
+    find_unused_parameters: True
+    optimizer:
+      type: AdamW
+      lr: 1e-3
+    lr_config:
+      policy: CosineAnnealing
+      min_lr: 5e-6
+      warmup: 'linear'
+      warmup_iters: 10
+      warmup_by_epoch: False
+    optimizer_config:
+      grad_clip:
+        max_norm: 5.0
+    runner:
+      max_epochs: 10
+    checkpoint_config:
+      interval: 1
+    logging:
+      interval: 50
+    validate: True
+    evaluation:
+      interval: 1
@@ -0,0 +1,69 @@
+results_dir: ???
+
+inference:
+  checkpoint: ???
+
+dataset:
+  data:
+    samples_per_gpu: 512
+    workers_per_gpu: 8
+    train:
+      data_prefix: ???
+      pipeline: # Augmentations alone
+        - type: RandomResizedCrop
+          size: 224
+        - type: RandomFlip
+          flip_prob: 0.5
+          direction: "horizontal"
+        - type: ColorJitter
+          brightness: 0.4
+          contrast: 0.4
+          saturation: 0.4
+    val:
+      data_prefix: ???
+    test:
+      data_prefix: ???
+
+model:
+  backbone:
+    type: "open_clip"
+    custom_args:
+      model_name: "ViT-B-32"
+    freeze: true
+    pretrained: ???
+  init_cfg:
+    checkpoint: ???
+  head:
+    type: LinearClsHead
+    num_classes: 1000
+    in_channels: 512
+    loss:
+      type: CrossEntropyLoss
+      loss_weight: 1.0
+      use_soft: False
+    topk: [1, 5]
+
+train:
+  train_config:
+    find_unused_parameters: True
+    optimizer:
+      type: AdamW
+      lr: 1e-3
+    lr_config:
+      policy: CosineAnnealing
+      min_lr: 0
+      warmup: 'linear'
+      warmup_iters: 10
+      warmup_by_epoch: False
+    optimizer_config:
+      grad_clip:
+        max_norm: 5.0
+    runner:
+      max_epochs: 10
+    checkpoint_config:
+      interval: 1
+    logging:
+      interval: 50
+    validate: True
+    evaluation:
+      interval: 1
@@ -0,0 +1,67 @@
+results_dir: ???
+
+inference:
+  checkpoint: ???
+
+dataset:
+  data:
+    samples_per_gpu: 512
+    workers_per_gpu: 8
+    train:
+      data_prefix: ???
+      pipeline: # Augmentations alone
+        - type: RandomResizedCrop
+          size: 224
+        - type: RandomFlip
+          flip_prob: 0.5
+          direction: "horizontal"
+        - type: ColorJitter
+          brightness: 0.4
+          contrast: 0.4
+          saturation: 0.4
+    val:
+      data_prefix: ???
+    test:
+      data_prefix: ???
+
+model:
+  backbone:
+    type: "vit_large_patch14_dinov2_swiglu"
+    freeze: true
+    pretrained: ???
+  init_cfg:
+    checkpoint: ???
+  head:
+    type: LinearClsHead
+    num_classes: 1000
+    in_channels: 1024
+    loss:
+      type: CrossEntropyLoss
+      loss_weight: 1.0
+      use_soft: False
+    topk: [1, 5]
+
+train:
+  train_config:
+    find_unused_parameters: True
+    optimizer:
+      type: AdamW
+      lr: 1e-3
+    lr_config:
+      policy: CosineAnnealing
+      min_lr: 5e-6
+      warmup: 'linear'
+      warmup_iters: 10
+      warmup_by_epoch: False
+    optimizer_config:
+      grad_clip:
+        max_norm: 5.0
+    runner:
+      max_epochs: 10
+    checkpoint_config:
+      interval: 1
+    logging:
+      interval: 50
+    validate: True
+    evaluation:
+      interval: 1
@@ -0,0 +1,69 @@
+results_dir: ???
+
+evaluate:
+  checkpoint: ???
+
+dataset:
+  data:
+    samples_per_gpu: 512
+    workers_per_gpu: 8
+    train:
+      data_prefix: ???
+      pipeline: # Augmentations alone
+        - type: RandomResizedCrop
+          size: 224
+        - type: RandomFlip
+          flip_prob: 0.5
+          direction: "horizontal"
+        - type: ColorJitter
+          brightness: 0.4
+          contrast: 0.4
+          saturation: 0.4
+    val:
+      data_prefix: ???
+    test:
+      data_prefix: ???
+
+model:
+  backbone:
+    type: "open_clip"
+    custom_args:
+      model_name: "ViT-B-32"
+    freeze: true
+    pretrained: ???
+  init_cfg:
+    checkpoint: ???
+  head:
+    type: LinearClsHead
+    num_classes: 1000
+    in_channels: 512
+    loss:
+      type: CrossEntropyLoss
+      loss_weight: 1.0
+      use_soft: False
+    topk: [1, 5]
+
+train:
+  train_config:
+    find_unused_parameters: True
+    optimizer:
+      type: AdamW
+      lr: 1e-3
+    lr_config:
+      policy: CosineAnnealing
+      min_lr: 0
+      warmup: 'linear'
+      warmup_iters: 10
+      warmup_by_epoch: False
+    optimizer_config:
+      grad_clip:
+        max_norm: 5.0
+    runner:
+      max_epochs: 10
+    checkpoint_config:
+      interval: 1
+    logging:
+      interval: 50
+    validate: True
+    evaluation:
+      interval: 1