rwightman · November 18, 2024 02:32 · Oct 31, 2024 · Sep 17, 2024 · Jun 24, 2024 · Jun 24, 2024
diff --git a/resnet18_34_ra4_e3600.yaml b/resnet18_34_ra4_e3600.yaml
@@ -0,0 +1,151 @@
+aa: rand-m7-inc1-mstd1.0
+amp: true
+amp_dtype: float16
+amp_impl: native
+aug_repeats: 3.0
+aug_splits: 0
+batch_size: 896
+bce_loss: false
+bce_pos_weight: null
+bce_sum: false
+bce_target_thresh: null
+bn_eps: null
+bn_momentum: null
+channels_last: true
+checkpoint_hist: 10
+class_map: ''
+clip_grad: null
+clip_mode: norm
+color_jitter: 0.4
+color_jitter_prob: null
+cooldown_epochs: 0
+crop_pct: 0.9
+cutmix: 0.0
+cutmix_minmax: null
+data_dir: /data/imagenet/
+dataset: ''
+dataset_download: false
+decay_epochs: 90
+decay_milestones:
+- 90
+- 180
+- 270
+decay_rate: 0.1
+device: cuda
+device_modules: null
+dist_bn: reduce
+drop: 0.25
+drop_block: null
+drop_connect: null
+drop_path: null
+epoch_repeats: 0.0
+epochs: 3600
+eval_metric: top1
+experiment: ''
+fast_norm: false
+fuser: ''
+gaussian_blur_prob: 0.05
+gp: null
+grad_accum_steps: 1
+grad_checkpointing: false
+grayscale_prob: 0.05
+head_init_bias: null
+head_init_scale: null
+hflip: 0.5
+img_size: null
+in_chans: null
+initial_checkpoint: ''
+input_img_mode: null
+input_key: null
+input_size: null
+interpolation: ''
+jsd_loss: false
+layer_decay: null
+local_rank: 0
+log_interval: 50
+log_wandb: false
+lr: null
+lr_base: 0.002
+lr_base_scale: ''
+lr_base_size: 4096
+lr_cycle_decay: 0.5
+lr_cycle_limit: 1
+lr_cycle_mul: 1.0
+lr_k_decay: 1.0
+lr_noise: null
+lr_noise_pct: 0.67
+lr_noise_std: 1.0
+mean:
+- 0.5
+min_lr: 0.0
+mixup: 0.0
+mixup_mode: batch
+mixup_off_epoch: 0
+mixup_prob: 1.0
+mixup_switch_prob: 0.5
+model: resnetv2_34
+model_ema: true
+model_ema_decay: 0.99995
+model_ema_force_cpu: false
+model_ema_warmup: true
+model_kwargs: {}
+momentum: 0.9
+no_aug: false
+no_ddp_bb: false
+no_prefetcher: false
+no_resume_opt: false
+num_classes: null
+opt: adamw
+opt_betas:
+- 0.6
+- 0.995
+opt_eps: null
+opt_kwargs: {}
+output: ''
+patience_epochs: 10
+pin_mem: false
+pretrained: false
+pretrained_path: null
+ratio:
+- 0.75
+- 1.3333333333333333
+recount: 1
+recovery_interval: 0
+remode: pixel
+reprob: 0.25
+resplit: false
+resume: ''
+save_images: false
+scale:
+- 0.08
+- 1.0
+sched: cosine
+sched_on_updates: true
+seed: 42
+smoothing: 0.1
+split_bn: false
+start_epoch: null
+std:
+- 0.5
+sync_bn: false
+synchronize_step: false
+target_key: null
+torchcompile: inductor
+torchcompile_mode: null
+torchscript: false
+train_crop_mode: null
+train_interpolation: random
+train_num_samples: null
+train_split: train
+tta: 0
+use_multi_epochs_loader: false
+val_num_samples: null
+val_split: validation
+validation_batch_size: null
+vflip: 0.0
+warmup_epochs: 5
+warmup_lr: 0.0
+warmup_prefix: true
+weight_decay: 0.1
+worker_seeding: all
+workers: 10
diff --git a/resnet50_ra4_e3600.yaml b/resnet50_ra4_e3600.yaml
@@ -0,0 +1,150 @@
+aa: rand-m8-inc1-mstd1.0
+amp: true
+amp_dtype: float16
+amp_impl: native
+aug_repeats: 3.0
+aug_splits: 0
+batch_size: 768
+bce_loss: false
+bce_pos_weight: null
+bce_sum: false
+bce_target_thresh: null
+bn_eps: null
+bn_momentum: null
+channels_last: true
+checkpoint_hist: 10
+class_map: ''
+clip_grad: null
+clip_mode: norm
+color_jitter: 0.4
+color_jitter_prob: null
+cooldown_epochs: 0
+crop_pct: 0.95
+cutmix: 0.0
+cutmix_minmax: null
+data_dir: /data/imagenet/
+dataset: ''
+dataset_download: false
+decay_epochs: 90
+decay_milestones:
+- 90
+- 180
+- 270
+decay_rate: 0.1
+device: cuda
+device_modules: null
+dist_bn: reduce
+drop: 0.3
+drop_block: null
+drop_connect: null
+drop_path: 0.1
+epoch_repeats: 0.0
+epochs: 3600
+eval_metric: top1
+experiment: ''
+fast_norm: false
+fuser: ''
+gaussian_blur_prob: 0.05
+gp: null
+grad_accum_steps: 1
+grad_checkpointing: false
+grayscale_prob: 0.05
+head_init_bias: null
+head_init_scale: 0.0
+hflip: 0.5
+img_size: null
+in_chans: null
+initial_checkpoint: ''
+input_img_mode: null
+input_key: null
+input_size: null
+interpolation: ''
+jsd_loss: false
+layer_decay: null
+local_rank: 0
+log_interval: 50
+log_wandb: false
+lr: null
+lr_base: 0.002
+lr_base_scale: ''
+lr_base_size: 4096
+lr_cycle_decay: 0.5
+lr_cycle_limit: 1
+lr_cycle_mul: 1.0
+lr_k_decay: 1.0
+lr_noise: null
+lr_noise_pct: 0.67
+lr_noise_std: 1.0
+mean:
+- 0.5
+min_lr: 0.0
+mixup: 0.0
+mixup_mode: batch
+mixup_off_epoch: 0
+mixup_prob: 1.0
+mixup_switch_prob: 0.5
+model: resnet50d
+model_ema: true
+model_ema_decay: 0.99995
+model_ema_force_cpu: false
+model_ema_warmup: true
+model_kwargs: {}
+momentum: 0.9
+no_aug: false
+no_ddp_bb: false
+no_prefetcher: false
+no_resume_opt: false
+num_classes: null
+opt: adamw
+opt_betas:
+- 0.6
+- 0.995
+opt_eps: null
+opt_kwargs: {}
+output: ''
+patience_epochs: 10
+pin_mem: false
+pretrained: false
+pretrained_path: null
+ratio:
+- 0.75
+- 1.3333333333333333
+recount: 1
+recovery_interval: 0
+remode: pixel
+reprob: 0.3
+resplit: false
+resume: ''
+save_images: false
+scale:
+- 0.08
+- 1.0
+sched: cosine
+sched_on_updates: true
+seed: 42
+smoothing: 0.1
+split_bn: false
+start_epoch: null
+std:
+- 0.5
+sync_bn: false
+synchronize_step: false
+target_key: null
+torchcompile: inductor
+torchscript: false
+train_crop_mode: null
+train_interpolation: random
+train_num_samples: null
+train_split: train
+tta: 0
+use_multi_epochs_loader: false
+val_num_samples: null
+val_split: validation
+validation_batch_size: null
+vflip: 0.0
+warmup_epochs: 5
+warmup_lr: 0.0
+warmup_prefix: true
+weight_decay: 0.125
+worker_seeding: all
+workers: 12
diff --git a/_README_MobileNetV4.md b/_README_MobileNetV4.md
@@ -6,4 +6,6 @@ Note the # of GPUs, this needs to be taken into consideration for global batch s
 
 Also note, some models have `lr` set to a non null value, this LR is used directly if set. Otherwise, it falls back to `lr_base` and the used rate is calculated based on `lr_base_size` and a sqrt scaling according to the global batch size.
 
-Models with `ix` in the tag are using an alternative init for the MQA attention model projections, xavier (glorot) uniform instead of the efficientnet/mobilenet defaults. This seemed to improve stability of the hybrid models, allow a larger (closer to 1) beta2 for adam, otherwise beta2 on the adam, or the LR needed to be reduced to avoid instability with the hybrids.
+Models with `ix` in the tag are using an alternative init for the MQA attention model projections, xavier (glorot) uniform instead of the efficientnet/mobilenet defaults. This seemed to improve stability of the hybrid models, allow a larger (closer to 1) beta2 for adam, otherwise beta2 on the adam, or the LR needed to be reduced to avoid instability with the hybrids.
+
+To easily use the .yaml file, use the --config argument for the timm train.py script. eg: `train.py --config mnv4.yaml --data-dir /where/my/data ... <other arg overrides>`
diff --git a/_README.md → _README_MobileNetV4.md b/_README.md → _README_MobileNetV4.md
diff --git a/_README.md b/_README.md
@@ -4,6 +4,6 @@ Included yaml files are timm train script configs for training MobileNetV4 model
 
 Note the # of GPUs, this needs to be taken into consideration for global batch size equivalence, and LR scaling.
 
-Also note, some models have `lr` set to a non null value, this LR is used directly if set. Otherwise, it falls back to `lr_base` and the used rate is calculated based on `lr_base_sized` and a sqrt scaling according to the global batch size.
+Also note, some models have `lr` set to a non null value, this LR is used directly if set. Otherwise, it falls back to `lr_base` and the used rate is calculated based on `lr_base_size` and a sqrt scaling according to the global batch size.
 
 Models with `ix` in the tag are using an alternative init for the MQA attention model projections, xavier (glorot) uniform instead of the efficientnet/mobilenet defaults. This seemed to improve stability of the hybrid models, allow a larger (closer to 1) beta2 for adam, otherwise beta2 on the adam, or the LR needed to be reduced to avoid instability with the hybrids.
diff --git a/_README.md b/_README.md
@@ -4,4 +4,6 @@ Included yaml files are timm train script configs for training MobileNetV4 model
 
 Note the # of GPUs, this needs to be taken into consideration for global batch size equivalence, and LR scaling.
 
-Also note, some models have `lr` set to a non null value, this LR is used directly if set. Otherwise, it falls back to `lr_base` and the used rate is calculated based on `lr_base_sized` and a sqrt scaling according to the global batch size.
+Also note, some models have `lr` set to a non null value, this LR is used directly if set. Otherwise, it falls back to `lr_base` and the used rate is calculated based on `lr_base_sized` and a sqrt scaling according to the global batch size.
+
+Models with `ix` in the tag are using an alternative init for the MQA attention model projections, xavier (glorot) uniform instead of the efficientnet/mobilenet defaults. This seemed to improve stability of the hybrid models, allow a larger (closer to 1) beta2 for adam, otherwise beta2 on the adam, or the LR needed to be reduced to avoid instability with the hybrids.
diff --git a/_README.md b/_README.md
@@ -1,4 +1,4 @@
-* MobileNetV4
+# MobileNetV4 Hparams
 
 Included yaml files are timm train script configs for training MobileNetV4 models in timm (see on HF Hub: https://huggingface.co/collections/timm/mobilenetv4-pretrained-weights-6669c22cda4db4244def9637)
 

diff --git a/_README.md b/_README.md
@@ -1 +1,7 @@
-* MobileNetV4
+* MobileNetV4
+
+Included yaml files are timm train script configs for training MobileNetV4 models in timm (see on HF Hub: https://huggingface.co/collections/timm/mobilenetv4-pretrained-weights-6669c22cda4db4244def9637)
+
+Note the # of GPUs, this needs to be taken into consideration for global batch size equivalence, and LR scaling.
+
+Also note, some models have `lr` set to a non null value, this LR is used directly if set. Otherwise, it falls back to `lr_base` and the used rate is calculated based on `lr_base_sized` and a sqrt scaling according to the global batch size.
diff --git a/_README.md b/_README.md
@@ -0,0 +1 @@
+* MobileNetV4
diff --git a/mnv4_cbm_r224_e500_gpu2.yaml b/mnv4_cbm_r224_e500_gpu2.yaml
@@ -0,0 +1,147 @@
+aa: rand-m9-inc1-mstd1.0
+amp: true
+amp_dtype: bfloat16
+amp_impl: native
+aug_repeats: 0
+aug_splits: 0
+batch_size: 512
+bce_loss: false
+bce_pos_weight: null
+bce_sum: false
+bce_target_thresh: null
+bn_eps: null
+bn_momentum: null
+channels_last: false
+checkpoint_hist: 10
+class_map: ''
+clip_grad: 5.0
+clip_mode: norm
+color_jitter: 0.4
+color_jitter_prob: null
+cooldown_epochs: 0
+crop_pct: null
+cutmix: 0.0
+cutmix_minmax: null
+data: /data/imagenet/
+data_dir: null
+dataset: ''
+dataset_download: false
+decay_epochs: 90
+decay_milestones:
+- 90
+- 180
+- 270
+decay_rate: 0.1
+device: cuda
+device_modules: null
+dist_bn: reduce
+drop: 0.2
+drop_block: null
+drop_connect: null
+drop_path: 0.1
+epoch_repeats: 0.0
+epochs: 500
+eval_metric: top1
+experiment: ''
+fast_norm: false
+fuser: ''
+gaussian_blur_prob: null
+gp: null
+grad_accum_steps: 1
+grad_checkpointing: false
+grayscale_prob: null
+head_init_bias: null
+head_init_scale: null
+hflip: 0.5
+img_size: 224
+in_chans: null
+initial_checkpoint: ''
+input_img_mode: null
+input_key: null
+input_size: null
+interpolation: ''
+jsd_loss: false
+layer_decay: null
+local_rank: 0
+log_interval: 50
+log_wandb: false
+lr: 0.002
+lr_base: 0.1
+lr_base_scale: ''
+lr_base_size: 256
+lr_cycle_decay: 0.5
+lr_cycle_limit: 1
+lr_cycle_mul: 1.0
+lr_k_decay: 1.0
+lr_noise: null
+lr_noise_pct: 0.67
+lr_noise_std: 1.0
+mean: null
+min_lr: 0
+mixup: 0.2
+mixup_mode: batch
+mixup_off_epoch: 0
+mixup_prob: 1.0
+mixup_switch_prob: 0.5
+model: mobilenetv4_conv_blur_medium
+model_ema: true
+model_ema_decay: 0.9998
+model_ema_force_cpu: false
+model_ema_warmup: true
+model_kwargs: {}
+momentum: 0.9
+no_aug: false
+no_ddp_bb: false
+no_prefetcher: false
+no_resume_opt: false
+num_classes: null
+opt: nadamw
+opt_betas: null
+opt_eps: null
+opt_kwargs: {}
+output: ''
+patience_epochs: 10
+pin_mem: false
+pretrained: false
+pretrained_path: null
+ratio:
+- 0.75
+- 1.3333333333333333
+recount: 1
+recovery_interval: 0
+remode: pixel
+reprob: 0.25
+resplit: false
+resume: ''
+save_images: false
+scale:
+- 0.08
+- 1.0
+sched: cosine
+sched_on_updates: true
+seed: 42
+smoothing: 0.1
+split_bn: false
+start_epoch: null
+std: null
+sync_bn: false
+synchronize_step: false
+target_key: null
+torchcompile: inductor
+torchscript: false
+train_crop_mode: null
+train_interpolation: random
+train_num_samples: null
+train_split: train
+tta: 0
+use_multi_epochs_loader: false
+val_num_samples: null
+val_split: validation
+validation_batch_size: null
+vflip: 0.0
+warmup_epochs: 20
+warmup_lr: 0.0
+warmup_prefix: true
+weight_decay: 0.1
+worker_seeding: all
+workers: 8
diff --git a/mnv4_cl_r256_e500_gpu8.yaml b/mnv4_cl_r256_e500_gpu8.yaml
@@ -0,0 +1,147 @@
+aa: rand-m9-inc1-mstd1.0
+amp: true
+amp_dtype: bfloat16
+amp_impl: native
+aug_repeats: 0
+aug_splits: 0
+batch_size: 512
+bce_loss: false
+bce_pos_weight: null
+bce_sum: false
+bce_target_thresh: null
+bn_eps: null
+bn_momentum: null
+channels_last: false
+checkpoint_hist: 10
+class_map: ''
+clip_grad: 5.0
+clip_mode: norm
+color_jitter: 0.4
+color_jitter_prob: null
+cooldown_epochs: 0
+crop_pct: null
+cutmix: 1.0
+cutmix_minmax: null
+data: /data/imagenet/
+data_dir: null
+dataset: ''
+dataset_download: false
+decay_epochs: 90
+decay_milestones:
+- 90
+- 180
+- 270
+decay_rate: 0.1
+device: cuda
+device_modules: null
+dist_bn: reduce
+drop: 0.2
+drop_block: null
+drop_connect: null
+drop_path: 0.35
+epoch_repeats: 0.0
+epochs: 500
+eval_metric: top1
+experiment: ''
+fast_norm: false
+fuser: ''
+gaussian_blur_prob: null
+gp: null
+grad_accum_steps: 1
+grad_checkpointing: false
+grayscale_prob: null
+head_init_bias: null
+head_init_scale: 0.0
+hflip: 0.5
+img_size: null
+in_chans: null
+initial_checkpoint: ''
+input_img_mode: null
+input_key: null
+input_size: null
+interpolation: ''
+jsd_loss: false
+layer_decay: null
+local_rank: 0
+log_interval: 50
+log_wandb: false
+lr: 0.003
+lr_base: 0.1
+lr_base_scale: ''
+lr_base_size: 256
+lr_cycle_decay: 0.5
+lr_cycle_limit: 1
+lr_cycle_mul: 1.0
+lr_k_decay: 1.0
+lr_noise: null
+lr_noise_pct: 0.67
+lr_noise_std: 1.0
+mean: null
+min_lr: 0.0
+mixup: 0.8
+mixup_mode: batch
+mixup_off_epoch: 0
+mixup_prob: 0.8
+mixup_switch_prob: 0.5
+model: mobilenetv4_conv_large
+model_ema: true
+model_ema_decay: 0.9997
+model_ema_force_cpu: false
+model_ema_warmup: true
+model_kwargs: {}
+momentum: 0.9
+no_aug: false
+no_ddp_bb: false
+no_prefetcher: false
+no_resume_opt: false
+num_classes: null
+opt: adamw
+opt_betas: null
+opt_eps: null
+opt_kwargs: {}
+output: ''
+patience_epochs: 10
+pin_mem: false
+pretrained: false
+pretrained_path: null
+ratio:
+- 0.75
+- 1.3333333333333333
+recount: 1
+recovery_interval: 0
+remode: pixel
+reprob: 0.3
+resplit: false
+resume: ''
+save_images: false
+scale:
+- 0.08
+- 1.0
+sched: cosine
+sched_on_updates: true
+seed: 42
+smoothing: 0.1
+split_bn: false
+start_epoch: null
+std: null
+sync_bn: false
+synchronize_step: false
+target_key: null
+torchcompile: inductor
+torchscript: false
+train_crop_mode: null
+train_interpolation: random
+train_num_samples: null
+train_split: train
+tta: 0
+use_multi_epochs_loader: false
+val_num_samples: null
+val_split: validation
+validation_batch_size: null
+vflip: 0.0
+warmup_epochs: 20
+warmup_lr: 0.0
+warmup_prefix: true
+weight_decay: 0.2
+worker_seeding: all
+workers: 8
diff --git a/mnv4_cl_r384_e600_gpu8.yaml b/mnv4_cl_r384_e600_gpu8.yaml
@@ -0,0 +1,147 @@
+aa: rand-m8-inc1-mstd1.0
+amp: true
+amp_dtype: bfloat16
+amp_impl: native
+aug_repeats: 0
+aug_splits: 0
+batch_size: 256
+bce_loss: false
+bce_pos_weight: null
+bce_sum: false
+bce_target_thresh: null
+bn_eps: null
+bn_momentum: null
+channels_last: false
+checkpoint_hist: 10
+class_map: ''
+clip_grad: 5.0
+clip_mode: norm
+color_jitter: 0.4
+color_jitter_prob: null
+cooldown_epochs: 0
+crop_pct: null
+cutmix: 1.0
+cutmix_minmax: null
+data: /data/imagenet/
+data_dir: null
+dataset: ''
+dataset_download: false
+decay_epochs: 90
+decay_milestones:
+- 90
+- 180
+- 270
+decay_rate: 0.1
+device: cuda
+device_modules: null
+dist_bn: reduce
+drop: 0.2
+drop_block: null
+drop_connect: null
+drop_path: 0.35
+epoch_repeats: 0.0
+epochs: 600
+eval_metric: top1
+experiment: ''
+fast_norm: false
+fuser: ''
+gaussian_blur_prob: null
+gp: null
+grad_accum_steps: 1
+grad_checkpointing: false
+grayscale_prob: null
+head_init_bias: null
+head_init_scale: 0.0
+hflip: 0.5
+img_size: 384
+in_chans: null
+initial_checkpoint: ''
+input_img_mode: null
+input_key: null
+input_size: null
+interpolation: ''
+jsd_loss: false
+layer_decay: null
+local_rank: 0
+log_interval: 50
+log_wandb: false
+lr: null
+lr_base: 0.003
+lr_base_scale: ''
+lr_base_size: 4096
+lr_cycle_decay: 0.5
+lr_cycle_limit: 1
+lr_cycle_mul: 1.0
+lr_k_decay: 1.0
+lr_noise: null
+lr_noise_pct: 0.67
+lr_noise_std: 1.0
+mean: null
+min_lr: 1.0e-06
+mixup: 0.8
+mixup_mode: batch
+mixup_off_epoch: 0
+mixup_prob: 0.8
+mixup_switch_prob: 0.5
+model: mobilenetv4_conv_large
+model_ema: true
+model_ema_decay: 0.9999
+model_ema_force_cpu: false
+model_ema_warmup: true
+model_kwargs: {}
+momentum: 0.9
+no_aug: false
+no_ddp_bb: false
+no_prefetcher: false
+no_resume_opt: false
+num_classes: null
+opt: adamw
+opt_betas: null
+opt_eps: null
+opt_kwargs: {}
+output: ''
+patience_epochs: 10
+pin_mem: false
+pretrained: false
+pretrained_path: null
+ratio:
+- 0.75
+- 1.3333333333333333
+recount: 1
+recovery_interval: 0
+remode: pixel
+reprob: 0.35
+resplit: false
+resume: ''
+save_images: false
+scale:
+- 0.08
+- 1.0
+sched: cosine
+sched_on_updates: true
+seed: 42
+smoothing: 0.1
+split_bn: false
+start_epoch: null
+std: null
+sync_bn: false
+synchronize_step: false
+target_key: null
+torchcompile: inductor
+torchscript: false
+train_crop_mode: null
+train_interpolation: random
+train_num_samples: null
+train_split: train
+tta: 0
+use_multi_epochs_loader: false
+val_num_samples: null
+val_split: validation
+validation_batch_size: null
+vflip: 0.0
+warmup_epochs: 20
+warmup_lr: 0.0
+warmup_prefix: true
+weight_decay: 0.2
+worker_seeding: all
+workers: 8
diff --git a/mnv4_cm_r224_e500_gpu2.yaml b/mnv4_cm_r224_e500_gpu2.yaml
@@ -0,0 +1,147 @@
+aa: rand-m9-inc1-mstd1.0
+amp: true
+amp_dtype: bfloat16
+amp_impl: native
+aug_repeats: 0
+aug_splits: 0
+batch_size: 512
+bce_loss: false
+bce_pos_weight: null
+bce_sum: false
+bce_target_thresh: null
+bn_eps: null
+bn_momentum: null
+channels_last: false
+checkpoint_hist: 10
+class_map: ''
+clip_grad: 5.0
+clip_mode: norm
+color_jitter: 0.4
+color_jitter_prob: null
+cooldown_epochs: 0
+crop_pct: null
+cutmix: 0.0
+cutmix_minmax: null
+data: /data/imagenet/
+data_dir: null
+dataset: ''
+dataset_download: false
+decay_epochs: 90
+decay_milestones:
+- 90
+- 180
+- 270
+decay_rate: 0.1
+device: cuda
+device_modules: null
+dist_bn: reduce
+drop: 0.2
+drop_block: null
+drop_connect: null
+drop_path: 0.1
+epoch_repeats: 0.0
+epochs: 500
+eval_metric: top1
+experiment: ''
+fast_norm: false
+fuser: ''
+gaussian_blur_prob: null
+gp: null
+grad_accum_steps: 1
+grad_checkpointing: false
+grayscale_prob: null
+head_init_bias: null
+head_init_scale: null
+hflip: 0.5
+img_size: 224
+in_chans: null
+initial_checkpoint: ''
+input_img_mode: null
+input_key: null
+input_size: null
+interpolation: ''
+jsd_loss: false
+layer_decay: null
+local_rank: 0
+log_interval: 50
+log_wandb: false
+lr: 0.002
+lr_base: 0.1
+lr_base_scale: ''
+lr_base_size: 256
+lr_cycle_decay: 0.5
+lr_cycle_limit: 1
+lr_cycle_mul: 1.0
+lr_k_decay: 1.0
+lr_noise: null
+lr_noise_pct: 0.67
+lr_noise_std: 1.0
+mean: null
+min_lr: 0
+mixup: 0.2
+mixup_mode: batch
+mixup_off_epoch: 0
+mixup_prob: 1.0
+mixup_switch_prob: 0.5
+model: mobilenetv4_conv_medium
+model_ema: true
+model_ema_decay: 0.9998
+model_ema_force_cpu: false
+model_ema_warmup: true
+model_kwargs: {}
+momentum: 0.9
+no_aug: false
+no_ddp_bb: false
+no_prefetcher: false
+no_resume_opt: false
+num_classes: null
+opt: nadamw
+opt_betas: null
+opt_eps: null
+opt_kwargs: {}
+output: ''
+patience_epochs: 10
+pin_mem: false
+pretrained: false
+pretrained_path: null
+ratio:
+- 0.75
+- 1.3333333333333333
+recount: 1
+recovery_interval: 0
+remode: pixel
+reprob: 0.25
+resplit: false
+resume: ''
+save_images: false
+scale:
+- 0.08
+- 1.0
+sched: cosine
+sched_on_updates: true
+seed: 42
+smoothing: 0.1
+split_bn: false
+start_epoch: null
+std: null
+sync_bn: false
+synchronize_step: false
+target_key: null
+torchcompile: inductor
+torchscript: false
+train_crop_mode: null
+train_interpolation: random
+train_num_samples: null
+train_split: train
+tta: 0
+use_multi_epochs_loader: false
+val_num_samples: null
+val_split: validation
+validation_batch_size: null
+vflip: 0.0
+warmup_epochs: 20
+warmup_lr: 0.0
+warmup_prefix: true
+weight_decay: 0.1
+worker_seeding: all
+workers: 8
diff --git a/mnv4_cm_r256_e500_gpu2.yaml b/mnv4_cm_r256_e500_gpu2.yaml
@@ -0,0 +1,147 @@
+aa: rand-m8-inc1-mstd1.0
+amp: true
+amp_dtype: bfloat16
+amp_impl: native
+aug_repeats: 0
+aug_splits: 0
+batch_size: 512
+bce_loss: false
+bce_pos_weight: null
+bce_sum: false
+bce_target_thresh: null
+bn_eps: null
+bn_momentum: null
+channels_last: false
+checkpoint_hist: 10
+class_map: ''
+clip_grad: 5.0
+clip_mode: norm
+color_jitter: 0.4
+color_jitter_prob: null
+cooldown_epochs: 0
+crop_pct: null
+cutmix: 0.0
+cutmix_minmax: null
+data: /data/imagenet/
+data_dir: null
+dataset: ''
+dataset_download: false
+decay_epochs: 90
+decay_milestones:
+- 90
+- 180
+- 270
+decay_rate: 0.1
+device: cuda
+device_modules: null
+dist_bn: reduce
+drop: 0.2
+drop_block: null
+drop_connect: null
+drop_path: 0.1
+epoch_repeats: 0.0
+epochs: 500
+eval_metric: top1
+experiment: ''
+fast_norm: false
+fuser: ''
+gaussian_blur_prob: null
+gp: null
+grad_accum_steps: 1
+grad_checkpointing: false
+grayscale_prob: null
+head_init_bias: null
+head_init_scale: null
+hflip: 0.5
+img_size: 256
+in_chans: null
+initial_checkpoint: ''
+input_img_mode: null
+input_key: null
+input_size: null
+interpolation: ''
+jsd_loss: false
+layer_decay: null
+local_rank: 0
+log_interval: 50
+log_wandb: false
+lr: 0.002
+lr_base: 0.1
+lr_base_scale: ''
+lr_base_size: 256
+lr_cycle_decay: 0.5
+lr_cycle_limit: 1
+lr_cycle_mul: 1.0
+lr_k_decay: 1.0
+lr_noise: null
+lr_noise_pct: 0.67
+lr_noise_std: 1.0
+mean: null
+min_lr: 0
+mixup: 0.2
+mixup_mode: batch
+mixup_off_epoch: 0
+mixup_prob: 0.8
+mixup_switch_prob: 0.5
+model: mobilenetv4_conv_medium
+model_ema: true
+model_ema_decay: 0.9998
+model_ema_force_cpu: false
+model_ema_warmup: true
+model_kwargs: {}
+momentum: 0.9
+no_aug: false
+no_ddp_bb: false
+no_prefetcher: false
+no_resume_opt: false
+num_classes: null
+opt: adamw
+opt_betas: null
+opt_eps: null
+opt_kwargs: {}
+output: ''
+patience_epochs: 10
+pin_mem: false
+pretrained: false
+pretrained_path: null
+ratio:
+- 0.75
+- 1.3333333333333333
+recount: 1
+recovery_interval: 0
+remode: pixel
+reprob: 0.25
+resplit: false
+resume: ''
+save_images: false
+scale:
+- 0.08
+- 1.0
+sched: cosine
+sched_on_updates: true
+seed: 42
+smoothing: 0.1
+split_bn: false
+start_epoch: null
+std: null
+sync_bn: false
+synchronize_step: false
+target_key: null
+torchcompile: inductor
+torchscript: false
+train_crop_mode: null
+train_interpolation: random
+train_num_samples: null
+train_split: train
+tta: 0
+use_multi_epochs_loader: false
+val_num_samples: null
+val_split: validation
+validation_batch_size: null
+vflip: 0.0
+warmup_epochs: 20
+warmup_lr: 0.0
+warmup_prefix: true
+weight_decay: 0.1
+worker_seeding: all
+workers: 8
diff --git a/mnv4_cs_r224_e2400_gpu4.yaml b/mnv4_cs_r224_e2400_gpu4.yaml
@@ -0,0 +1,149 @@
+aa: rand-m8-inc1-mstd1.0
+amp: true
+amp_dtype: float16
+amp_impl: native
+aug_repeats: 3.0
+aug_splits: 0
+batch_size: 1024
+bce_loss: false
+bce_pos_weight: null
+bce_sum: false
+bce_target_thresh: null
+bn_eps: null
+bn_momentum: null
+channels_last: true
+checkpoint_hist: 10
+class_map: ''
+clip_grad: null
+clip_mode: norm
+color_jitter: 0.4
+color_jitter_prob: null
+cooldown_epochs: 0
+crop_pct: null
+cutmix: 0.0
+cutmix_minmax: null
+data: /raid/imagenet/
+data_dir: null
+dataset: ''
+dataset_download: false
+decay_epochs: 90
+decay_milestones:
+- 90
+- 180
+- 270
+decay_rate: 0.1
+device: cuda
+device_modules: null
+dist_bn: reduce
+drop: 0.25
+drop_block: null
+drop_connect: null
+drop_path: null
+epoch_repeats: 0.0
+epochs: 2400
+eval_metric: top1
+experiment: ''
+fast_norm: false
+fuser: ''
+gaussian_blur_prob: 0.05
+gp: null
+grad_accum_steps: 1
+grad_checkpointing: false
+grayscale_prob: 0.1
+head_init_bias: null
+head_init_scale: null
+hflip: 0.5
+img_size: null
+in_chans: null
+initial_checkpoint: ''
+input_img_mode: null
+input_key: null
+input_size: null
+interpolation: ''
+jsd_loss: false
+layer_decay: null
+local_rank: 0
+log_interval: 50
+log_wandb: false
+lr: null
+lr_base: 0.002
+lr_base_scale: ''
+lr_base_size: 4096
+lr_cycle_decay: 0.5
+lr_cycle_limit: 1
+lr_cycle_mul: 1.0
+lr_k_decay: 1.0
+lr_noise: null
+lr_noise_pct: 0.67
+lr_noise_std: 1.0
+mean: null
+min_lr: 0.0
+mixup: 0.0
+mixup_mode: batch
+mixup_off_epoch: 0
+mixup_prob: 1.0
+mixup_switch_prob: 0.5
+model: mobilenetv4_conv_small
+model_ema: true
+model_ema_decay: 0.99995
+model_ema_force_cpu: false
+model_ema_warmup: true
+model_kwargs: {}
+momentum: 0.9
+no_aug: false
+no_ddp_bb: false
+no_prefetcher: false
+no_resume_opt: false
+num_classes: null
+opt: adamw
+opt_betas:
+- 0.6
+- 0.995
+opt_eps: null
+opt_kwargs: {}
+output: ''
+patience_epochs: 10
+pin_mem: false
+pretrained: false
+pretrained_path: null
+ratio:
+- 0.75
+- 1.3333333333333333
+recount: 1
+recovery_interval: 0
+remode: pixel
+reprob: 0.25
+resplit: false
+resume: ''
+save_images: false
+scale:
+- 0.08
+- 1.0
+sched: cosine
+sched_on_updates: true
+seed: 42
+smoothing: 0.1
+split_bn: false
+start_epoch: null
+std: null
+sync_bn: false
+synchronize_step: false
+target_key: null
+torchcompile: null
+torchscript: false
+train_crop_mode: null
+train_interpolation: random
+train_num_samples: null
+train_split: train
+tta: 0
+use_multi_epochs_loader: false
+val_num_samples: null
+val_split: validation
+validation_batch_size: null
+vflip: 0.0
+warmup_epochs: 5
+warmup_lr: 0.0
+warmup_prefix: true
+weight_decay: 0.06
+worker_seeding: all
+workers: 8
diff --git a/mnv4_hl_r384_e600_gpu8.yaml b/mnv4_hl_r384_e600_gpu8.yaml
@@ -0,0 +1,149 @@
+aa: rand-m9-inc1-mstd1.0
+amp: true
+amp_dtype: bfloat16
+amp_impl: native
+aug_repeats: 0
+aug_splits: 0
+batch_size: 192
+bce_loss: false
+bce_pos_weight: null
+bce_sum: false
+bce_target_thresh: null
+bn_eps: null
+bn_momentum: null
+channels_last: false
+checkpoint_hist: 10
+class_map: ''
+clip_grad: 5.0
+clip_mode: norm
+color_jitter: 0.4
+color_jitter_prob: null
+cooldown_epochs: 0
+crop_pct: null
+cutmix: 1.0
+cutmix_minmax: null
+data: /data/imagenet/
+data_dir: null
+dataset: ''
+dataset_download: false
+decay_epochs: 90
+decay_milestones:
+- 90
+- 180
+- 270
+decay_rate: 0.1
+device: cuda
+device_modules: null
+dist_bn: reduce
+drop: 0.2
+drop_block: null
+drop_connect: null
+drop_path: 0.35
+epoch_repeats: 0.0
+epochs: 600
+eval_metric: top1
+experiment: ''
+fast_norm: false
+fuser: ''
+gaussian_blur_prob: null
+gp: null
+grad_accum_steps: 1
+grad_checkpointing: false
+grayscale_prob: null
+head_init_bias: null
+head_init_scale: 0.0
+hflip: 0.5
+img_size: 384
+in_chans: null
+initial_checkpoint: ''
+input_img_mode: null
+input_key: null
+input_size: null
+interpolation: ''
+jsd_loss: false
+layer_decay: null
+local_rank: 0
+log_interval: 50
+log_wandb: false
+lr: null
+lr_base: 0.003
+lr_base_scale: ''
+lr_base_size: 4096
+lr_cycle_decay: 0.5
+lr_cycle_limit: 1
+lr_cycle_mul: 1.0
+lr_k_decay: 1.0
+lr_noise: null
+lr_noise_pct: 0.67
+lr_noise_std: 1.0
+mean: null
+min_lr: 1.0e-06
+mixup: 0.8
+mixup_mode: batch
+mixup_off_epoch: 0
+mixup_prob: 0.8
+mixup_switch_prob: 0.5
+model: mobilenetv4_hybrid_large
+model_ema: true
+model_ema_decay: 0.9999
+model_ema_force_cpu: false
+model_ema_warmup: true
+model_kwargs: {}
+momentum: 0.9
+no_aug: false
+no_ddp_bb: false
+no_prefetcher: false
+no_resume_opt: false
+num_classes: null
+opt: adamw
+opt_betas:
+- 0.9
+- 0.98
+opt_eps: null
+opt_kwargs: {}
+output: ''
+patience_epochs: 10
+pin_mem: false
+pretrained: false
+pretrained_path: null
+ratio:
+- 0.75
+- 1.3333333333333333
+recount: 1
+recovery_interval: 0
+remode: pixel
+reprob: 0.35
+resplit: false
+resume: ''
+save_images: false
+scale:
+- 0.08
+- 1.0
+sched: cosine
+sched_on_updates: true
+seed: 42
+smoothing: 0.1
+split_bn: false
+start_epoch: null
+std: null
+sync_bn: false
+synchronize_step: false
+target_key: null
+torchcompile: inductor
+torchscript: false
+train_crop_mode: null
+train_interpolation: random
+train_num_samples: null
+train_split: train
+tta: 0
+use_multi_epochs_loader: false
+val_num_samples: null
+val_split: validation
+validation_batch_size: null
+vflip: 0.0
+warmup_epochs: 20
+warmup_lr: 0.0
+warmup_prefix: true
+weight_decay: 0.2
+worker_seeding: all
+workers: 8
diff --git a/mnv4_hl_r384_e600_ix_gpu8.yaml b/mnv4_hl_r384_e600_ix_gpu8.yaml
@@ -0,0 +1,149 @@
+aa: rand-m9-inc1-mstd1.0
+amp: true
+amp_dtype: bfloat16
+amp_impl: native
+aug_repeats: 0
+aug_splits: 0
+batch_size: 192
+bce_loss: false
+bce_pos_weight: null
+bce_sum: false
+bce_target_thresh: null
+bn_eps: null
+bn_momentum: null
+channels_last: false
+checkpoint_hist: 10
+class_map: ''
+clip_grad: 5.0
+clip_mode: norm
+color_jitter: 0.4
+color_jitter_prob: null
+cooldown_epochs: 0
+crop_pct: null
+cutmix: 1.0
+cutmix_minmax: null
+data: /data/imagenet/
+data_dir: null
+dataset: ''
+dataset_download: false
+decay_epochs: 90
+decay_milestones:
+- 90
+- 180
+- 270
+decay_rate: 0.1
+device: cuda
+device_modules: null
+dist_bn: reduce
+drop: 0.2
+drop_block: null
+drop_connect: null
+drop_path: 0.35
+epoch_repeats: 0.0
+epochs: 600
+eval_metric: top1
+experiment: ''
+fast_norm: false
+fuser: ''
+gaussian_blur_prob: null
+gp: null
+grad_accum_steps: 1
+grad_checkpointing: false
+grayscale_prob: null
+head_init_bias: null
+head_init_scale: 0.0
+hflip: 0.5
+img_size: 384
+in_chans: null
+initial_checkpoint: ''
+input_img_mode: null
+input_key: null
+input_size: null
+interpolation: ''
+jsd_loss: false
+layer_decay: null
+local_rank: 0
+log_interval: 50
+log_wandb: false
+lr: null
+lr_base: 0.003
+lr_base_scale: ''
+lr_base_size: 4096
+lr_cycle_decay: 0.5
+lr_cycle_limit: 1
+lr_cycle_mul: 1.0
+lr_k_decay: 1.0
+lr_noise: null
+lr_noise_pct: 0.67
+lr_noise_std: 1.0
+mean: null
+min_lr: 1.0e-06
+mixup: 0.8
+mixup_mode: batch
+mixup_off_epoch: 0
+mixup_prob: 0.8
+mixup_switch_prob: 0.5
+model: mobilenetv4_hybrid_large
+model_ema: true
+model_ema_decay: 0.9999
+model_ema_force_cpu: false
+model_ema_warmup: true
+model_kwargs: {}
+momentum: 0.9
+no_aug: false
+no_ddp_bb: false
+no_prefetcher: false
+no_resume_opt: false
+num_classes: null
+opt: adamw
+opt_betas:
+- 0.9
+- 0.99
+opt_eps: null
+opt_kwargs: {}
+output: ''
+patience_epochs: 10
+pin_mem: false
+pretrained: false
+pretrained_path: null
+ratio:
+- 0.75
+- 1.3333333333333333
+recount: 1
+recovery_interval: 0
+remode: pixel
+reprob: 0.35
+resplit: false
+resume: ''
+save_images: false
+scale:
+- 0.08
+- 1.0
+sched: cosine
+sched_on_updates: true
+seed: 42
+smoothing: 0.1
+split_bn: false
+start_epoch: null
+std: null
+sync_bn: false
+synchronize_step: false
+target_key: null
+torchcompile: inductor
+torchscript: false
+train_crop_mode: null
+train_interpolation: random
+train_num_samples: null
+train_split: train
+tta: 0
+use_multi_epochs_loader: false
+val_num_samples: null
+val_split: validation
+validation_batch_size: null
+vflip: 0.0
+warmup_epochs: 20
+warmup_lr: 0.0
+warmup_prefix: true
+weight_decay: 0.2
+worker_seeding: all
+workers: 8
diff --git a/mnv4_hm_r224_e500_gpu2.yaml b/mnv4_hm_r224_e500_gpu2.yaml
@@ -0,0 +1,147 @@
+aa: rand-m9-inc1-mstd1.0
+amp: true
+amp_dtype: bfloat16
+amp_impl: native
+aug_repeats: 0
+aug_splits: 0
+batch_size: 512
+bce_loss: false
+bce_pos_weight: null
+bce_sum: false
+bce_target_thresh: null
+bn_eps: null
+bn_momentum: null
+channels_last: false
+checkpoint_hist: 10
+class_map: ''
+clip_grad: 5.0
+clip_mode: norm
+color_jitter: 0.4
+color_jitter_prob: null
+cooldown_epochs: 0
+crop_pct: null
+cutmix: 0.0
+cutmix_minmax: null
+data: /data/imagenet/
+data_dir: null
+dataset: ''
+dataset_download: false
+decay_epochs: 90
+decay_milestones:
+- 90
+- 180
+- 270
+decay_rate: 0.1
+device: cuda
+device_modules: null
+dist_bn: reduce
+drop: 0.2
+drop_block: null
+drop_connect: null
+drop_path: 0.1
+epoch_repeats: 0.0
+epochs: 500
+eval_metric: top1
+experiment: ''
+fast_norm: false
+fuser: ''
+gaussian_blur_prob: null
+gp: null
+grad_accum_steps: 1
+grad_checkpointing: false
+grayscale_prob: null
+head_init_bias: null
+head_init_scale: null
+hflip: 0.5
+img_size: null
+in_chans: null
+initial_checkpoint: ''
+input_img_mode: null
+input_key: null
+input_size: null
+interpolation: ''
+jsd_loss: false
+layer_decay: null
+local_rank: 0
+log_interval: 50
+log_wandb: false
+lr: 0.001
+lr_base: 0.1
+lr_base_scale: ''
+lr_base_size: 256
+lr_cycle_decay: 0.5
+lr_cycle_limit: 1
+lr_cycle_mul: 1.0
+lr_k_decay: 1.0
+lr_noise: null
+lr_noise_pct: 0.67
+lr_noise_std: 1.0
+mean: null
+min_lr: 0
+mixup: 0.2
+mixup_mode: batch
+mixup_off_epoch: 0
+mixup_prob: 1.0
+mixup_switch_prob: 0.5
+model: mobilenetv4_hybrid_medium
+model_ema: true
+model_ema_decay: 0.9998
+model_ema_force_cpu: false
+model_ema_warmup: true
+model_kwargs: {}
+momentum: 0.9
+no_aug: false
+no_ddp_bb: false
+no_prefetcher: false
+no_resume_opt: false
+num_classes: null
+opt: nadamw
+opt_betas: null
+opt_eps: null
+opt_kwargs: {}
+output: ''
+patience_epochs: 10
+pin_mem: false
+pretrained: false
+pretrained_path: null
+ratio:
+- 0.75
+- 1.3333333333333333
+recount: 1
+recovery_interval: 0
+remode: pixel
+reprob: 0.25
+resplit: false
+resume: ''
+save_images: false
+scale:
+- 0.08
+- 1.0
+sched: cosine
+sched_on_updates: true
+seed: 42
+smoothing: 0.1
+split_bn: false
+start_epoch: null
+std: null
+sync_bn: false
+synchronize_step: false
+target_key: null
+torchcompile: inductor
+torchscript: false
+train_crop_mode: null
+train_interpolation: random
+train_num_samples: null
+train_split: train
+tta: 0
+use_multi_epochs_loader: false
+val_num_samples: null
+val_split: validation
+validation_batch_size: null
+vflip: 0.0
+warmup_epochs: 20
+warmup_lr: 0.0
+warmup_prefix: true
+weight_decay: 0.1
+worker_seeding: all
+workers: 8
diff --git a/mnv4_hm_r256_e550_ix_gpu8.yaml b/mnv4_hm_r256_e550_ix_gpu8.yaml
@@ -0,0 +1,149 @@
+aa: rand-m8-inc1-mstd1.0
+amp: true
+amp_dtype: bfloat16
+amp_impl: native
+aug_repeats: 0
+aug_splits: 0
+batch_size: 512
+bce_loss: false
+bce_pos_weight: null
+bce_sum: false
+bce_target_thresh: null
+bn_eps: null
+bn_momentum: null
+channels_last: false
+checkpoint_hist: 10
+class_map: ''
+clip_grad: 5.0
+clip_mode: norm
+color_jitter: 0.4
+color_jitter_prob: null
+cooldown_epochs: 0
+crop_pct: null
+cutmix: 1.0
+cutmix_minmax: null
+data: /data/imagenet/
+data_dir: null
+dataset: ''
+dataset_download: false
+decay_epochs: 90
+decay_milestones:
+- 90
+- 180
+- 270
+decay_rate: 0.1
+device: cuda
+device_modules: null
+dist_bn: reduce
+drop: 0.2
+drop_block: null
+drop_connect: null
+drop_path: 0.1
+epoch_repeats: 0.0
+epochs: 550
+eval_metric: top1
+experiment: ''
+fast_norm: false
+fuser: ''
+gaussian_blur_prob: null
+gp: null
+grad_accum_steps: 1
+grad_checkpointing: false
+grayscale_prob: null
+head_init_bias: null
+head_init_scale: 0.0
+hflip: 0.5
+img_size: 256
+in_chans: null
+initial_checkpoint: ''
+input_img_mode: null
+input_key: null
+input_size: null
+interpolation: ''
+jsd_loss: false
+layer_decay: null
+local_rank: 0
+log_interval: 50
+log_wandb: false
+lr: null
+lr_base: 0.002
+lr_base_scale: ''
+lr_base_size: 4096
+lr_cycle_decay: 0.5
+lr_cycle_limit: 1
+lr_cycle_mul: 1.0
+lr_k_decay: 1.0
+lr_noise: null
+lr_noise_pct: 0.67
+lr_noise_std: 1.0
+mean: null
+min_lr: 1.0e-06
+mixup: 0.8
+mixup_mode: batch
+mixup_off_epoch: 0
+mixup_prob: 0.5
+mixup_switch_prob: 0.5
+model: mobilenetv4_hybrid_medium
+model_ema: true
+model_ema_decay: 0.9999
+model_ema_force_cpu: false
+model_ema_warmup: true
+model_kwargs: {}
+momentum: 0.9
+no_aug: false
+no_ddp_bb: false
+no_prefetcher: false
+no_resume_opt: false
+num_classes: null
+opt: adamw
+opt_betas:
+- 0.9
+- 0.99
+opt_eps: null
+opt_kwargs: {}
+output: ''
+patience_epochs: 10
+pin_mem: false
+pretrained: false
+pretrained_path: null
+ratio:
+- 0.75
+- 1.3333333333333333
+recount: 1
+recovery_interval: 0
+remode: pixel
+reprob: 0.3
+resplit: false
+resume: ''
+save_images: false
+scale:
+- 0.08
+- 1.0
+sched: cosine
+sched_on_updates: true
+seed: 42
+smoothing: 0.1
+split_bn: false
+start_epoch: null
+std: null
+sync_bn: false
+synchronize_step: false
+target_key: null
+torchcompile: inductor
+torchscript: false
+train_crop_mode: null
+train_interpolation: random
+train_num_samples: null
+train_split: train
+tta: 0
+use_multi_epochs_loader: false
+val_num_samples: null
+val_split: validation
+validation_batch_size: null
+vflip: 0.0
+warmup_epochs: 20
+warmup_lr: 0.0
+warmup_prefix: true
+weight_decay: 0.141
+worker_seeding: all
+workers: 8
diff --git a/mnv4_hm_r384_e550_ix_gpu8.yaml b/mnv4_hm_r384_e550_ix_gpu8.yaml
@@ -0,0 +1,149 @@
+aa: rand-m9-inc1-mstd1.0
+amp: true
+amp_dtype: bfloat16
+amp_impl: native
+aug_repeats: 0
+aug_splits: 0
+batch_size: 384
+bce_loss: false
+bce_pos_weight: null
+bce_sum: false
+bce_target_thresh: null
+bn_eps: null
+bn_momentum: null
+channels_last: false
+checkpoint_hist: 10
+class_map: ''
+clip_grad: 5.0
+clip_mode: norm
+color_jitter: 0.4
+color_jitter_prob: null
+cooldown_epochs: 0
+crop_pct: null
+cutmix: 1.0
+cutmix_minmax: null
+data: /data/imagenet/
+data_dir: null
+dataset: ''
+dataset_download: false
+decay_epochs: 90
+decay_milestones:
+- 90
+- 180
+- 270
+decay_rate: 0.1
+device: cuda
+device_modules: null
+dist_bn: reduce
+drop: 0.2
+drop_block: null
+drop_connect: null
+drop_path: 0.1
+epoch_repeats: 0.0
+epochs: 550
+eval_metric: top1
+experiment: ''
+fast_norm: false
+fuser: ''
+gaussian_blur_prob: null
+gp: null
+grad_accum_steps: 1
+grad_checkpointing: false
+grayscale_prob: null
+head_init_bias: null
+head_init_scale: 0.0
+hflip: 0.5
+img_size: 384
+in_chans: null
+initial_checkpoint: ''
+input_img_mode: null
+input_key: null
+input_size: null
+interpolation: ''
+jsd_loss: false
+layer_decay: null
+local_rank: 0
+log_interval: 50
+log_wandb: false
+lr: null
+lr_base: 0.002
+lr_base_scale: ''
+lr_base_size: 4096
+lr_cycle_decay: 0.5
+lr_cycle_limit: 1
+lr_cycle_mul: 1.0
+lr_k_decay: 1.0
+lr_noise: null
+lr_noise_pct: 0.67
+lr_noise_std: 1.0
+mean: null
+min_lr: 1.0e-06
+mixup: 0.8
+mixup_mode: batch
+mixup_off_epoch: 0
+mixup_prob: 0.5
+mixup_switch_prob: 0.5
+model: mobilenetv4_hybrid_medium
+model_ema: true
+model_ema_decay: 0.9999
+model_ema_force_cpu: false
+model_ema_warmup: true
+model_kwargs: {}
+momentum: 0.9
+no_aug: false
+no_ddp_bb: false
+no_prefetcher: false
+no_resume_opt: false
+num_classes: null
+opt: adamw
+opt_betas:
+- 0.9
+- 0.99
+opt_eps: null
+opt_kwargs: {}
+output: ''
+patience_epochs: 10
+pin_mem: false
+pretrained: false
+pretrained_path: null
+ratio:
+- 0.75
+- 1.3333333333333333
+recount: 1
+recovery_interval: 0
+remode: pixel
+reprob: 0.3
+resplit: false
+resume: ''
+save_images: false
+scale:
+- 0.08
+- 1.0
+sched: cosine
+sched_on_updates: true
+seed: 42
+smoothing: 0.1
+split_bn: false
+start_epoch: null
+std: null
+sync_bn: false
+synchronize_step: false
+target_key: null
+torchcompile: inductor
+torchscript: false
+train_crop_mode: null
+train_interpolation: random
+train_num_samples: null
+train_split: train
+tta: 0
+use_multi_epochs_loader: false
+val_num_samples: null
+val_split: validation
+validation_batch_size: null
+vflip: 0.0
+warmup_epochs: 20
+warmup_lr: 0.0
+warmup_prefix: true
+weight_decay: 0.15
+worker_seeding: all
+workers: 8