Last active
November 18, 2024 02:32
Revisions
-
rwightman revised this gist
Oct 31, 2024 . 2 changed files with 301 additions and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,151 @@ aa: rand-m7-inc1-mstd1.0 amp: true amp_dtype: float16 amp_impl: native aug_repeats: 3.0 aug_splits: 0 batch_size: 896 bce_loss: false bce_pos_weight: null bce_sum: false bce_target_thresh: null bn_eps: null bn_momentum: null channels_last: true checkpoint_hist: 10 class_map: '' clip_grad: null clip_mode: norm color_jitter: 0.4 color_jitter_prob: null cooldown_epochs: 0 crop_pct: 0.9 cutmix: 0.0 cutmix_minmax: null data_dir: /data/imagenet/ dataset: '' dataset_download: false decay_epochs: 90 decay_milestones: - 90 - 180 - 270 decay_rate: 0.1 device: cuda device_modules: null dist_bn: reduce drop: 0.25 drop_block: null drop_connect: null drop_path: null epoch_repeats: 0.0 epochs: 3600 eval_metric: top1 experiment: '' fast_norm: false fuser: '' gaussian_blur_prob: 0.05 gp: null grad_accum_steps: 1 grad_checkpointing: false grayscale_prob: 0.05 head_init_bias: null head_init_scale: null hflip: 0.5 img_size: null in_chans: null initial_checkpoint: '' input_img_mode: null input_key: null input_size: null interpolation: '' jsd_loss: false layer_decay: null local_rank: 0 log_interval: 50 log_wandb: false lr: null lr_base: 0.002 lr_base_scale: '' lr_base_size: 4096 lr_cycle_decay: 0.5 lr_cycle_limit: 1 lr_cycle_mul: 1.0 lr_k_decay: 1.0 lr_noise: null lr_noise_pct: 0.67 lr_noise_std: 1.0 mean: - 0.5 min_lr: 0.0 mixup: 0.0 mixup_mode: batch mixup_off_epoch: 0 mixup_prob: 1.0 mixup_switch_prob: 0.5 model: resnetv2_34 model_ema: true model_ema_decay: 0.99995 model_ema_force_cpu: false model_ema_warmup: true model_kwargs: {} momentum: 0.9 no_aug: false no_ddp_bb: false no_prefetcher: false no_resume_opt: false num_classes: null opt: adamw opt_betas: - 0.6 - 0.995 opt_eps: null opt_kwargs: {} output: '' patience_epochs: 10 pin_mem: false pretrained: false pretrained_path: null ratio: - 0.75 - 1.3333333333333333 recount: 1 recovery_interval: 0 remode: pixel reprob: 0.25 resplit: false resume: '' save_images: false scale: - 0.08 - 1.0 sched: cosine sched_on_updates: true seed: 42 smoothing: 0.1 split_bn: false start_epoch: null std: - 0.5 sync_bn: false synchronize_step: false target_key: null torchcompile: inductor torchcompile_mode: null torchscript: false train_crop_mode: null train_interpolation: random train_num_samples: null train_split: train tta: 0 use_multi_epochs_loader: false val_num_samples: null val_split: validation validation_batch_size: null vflip: 0.0 warmup_epochs: 5 warmup_lr: 0.0 warmup_prefix: true weight_decay: 0.1 worker_seeding: all workers: 10 This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,150 @@ aa: rand-m8-inc1-mstd1.0 amp: true amp_dtype: float16 amp_impl: native aug_repeats: 3.0 aug_splits: 0 batch_size: 768 bce_loss: false bce_pos_weight: null bce_sum: false bce_target_thresh: null bn_eps: null bn_momentum: null channels_last: true checkpoint_hist: 10 class_map: '' clip_grad: null clip_mode: norm color_jitter: 0.4 color_jitter_prob: null cooldown_epochs: 0 crop_pct: 0.95 cutmix: 0.0 cutmix_minmax: null data_dir: /data/imagenet/ dataset: '' dataset_download: false decay_epochs: 90 decay_milestones: - 90 - 180 - 270 decay_rate: 0.1 device: cuda device_modules: null dist_bn: reduce drop: 0.3 drop_block: null drop_connect: null drop_path: 0.1 epoch_repeats: 0.0 epochs: 3600 eval_metric: top1 experiment: '' fast_norm: false fuser: '' gaussian_blur_prob: 0.05 gp: null grad_accum_steps: 1 grad_checkpointing: false grayscale_prob: 0.05 head_init_bias: null head_init_scale: 0.0 hflip: 0.5 img_size: null in_chans: null initial_checkpoint: '' input_img_mode: null input_key: null input_size: null interpolation: '' jsd_loss: false layer_decay: null local_rank: 0 log_interval: 50 log_wandb: false lr: null lr_base: 0.002 lr_base_scale: '' lr_base_size: 4096 lr_cycle_decay: 0.5 lr_cycle_limit: 1 lr_cycle_mul: 1.0 lr_k_decay: 1.0 lr_noise: null lr_noise_pct: 0.67 lr_noise_std: 1.0 mean: - 0.5 min_lr: 0.0 mixup: 0.0 mixup_mode: batch mixup_off_epoch: 0 mixup_prob: 1.0 mixup_switch_prob: 0.5 model: resnet50d model_ema: true model_ema_decay: 0.99995 model_ema_force_cpu: false model_ema_warmup: true model_kwargs: {} momentum: 0.9 no_aug: false no_ddp_bb: false no_prefetcher: false no_resume_opt: false num_classes: null opt: adamw opt_betas: - 0.6 - 0.995 opt_eps: null opt_kwargs: {} output: '' patience_epochs: 10 pin_mem: false pretrained: false pretrained_path: null ratio: - 0.75 - 1.3333333333333333 recount: 1 recovery_interval: 0 remode: pixel reprob: 0.3 resplit: false resume: '' save_images: false scale: - 0.08 - 1.0 sched: cosine sched_on_updates: true seed: 42 smoothing: 0.1 split_bn: false start_epoch: null std: - 0.5 sync_bn: false synchronize_step: false target_key: null torchcompile: inductor torchscript: false train_crop_mode: null train_interpolation: random train_num_samples: null train_split: train tta: 0 use_multi_epochs_loader: false val_num_samples: null val_split: validation validation_batch_size: null vflip: 0.0 warmup_epochs: 5 warmup_lr: 0.0 warmup_prefix: true weight_decay: 0.125 worker_seeding: all workers: 12 -
rwightman revised this gist
Sep 17, 2024 . 1 changed file with 3 additions and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -6,4 +6,6 @@ Note the # of GPUs, this needs to be taken into consideration for global batch s Also note, some models have `lr` set to a non null value, this LR is used directly if set. Otherwise, it falls back to `lr_base` and the used rate is calculated based on `lr_base_size` and a sqrt scaling according to the global batch size. Models with `ix` in the tag are using an alternative init for the MQA attention model projections, xavier (glorot) uniform instead of the efficientnet/mobilenet defaults. This seemed to improve stability of the hybrid models, allow a larger (closer to 1) beta2 for adam, otherwise beta2 on the adam, or the LR needed to be reduced to avoid instability with the hybrids. To easily use the .yaml file, use the --config argument for the timm train.py script. eg: `train.py --config mnv4.yaml --data-dir /where/my/data ... <other arg overrides>` -
rwightman renamed this gist
Jun 24, 2024 . 1 changed file with 0 additions and 0 deletions.There are no files selected for viewing
File renamed without changes. -
rwightman revised this gist
Jun 24, 2024 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -4,6 +4,6 @@ Included yaml files are timm train script configs for training MobileNetV4 model Note the # of GPUs, this needs to be taken into consideration for global batch size equivalence, and LR scaling. Also note, some models have `lr` set to a non null value, this LR is used directly if set. Otherwise, it falls back to `lr_base` and the used rate is calculated based on `lr_base_size` and a sqrt scaling according to the global batch size. Models with `ix` in the tag are using an alternative init for the MQA attention model projections, xavier (glorot) uniform instead of the efficientnet/mobilenet defaults. This seemed to improve stability of the hybrid models, allow a larger (closer to 1) beta2 for adam, otherwise beta2 on the adam, or the LR needed to be reduced to avoid instability with the hybrids. -
rwightman revised this gist
Jun 24, 2024 . 1 changed file with 3 additions and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -4,4 +4,6 @@ Included yaml files are timm train script configs for training MobileNetV4 model Note the # of GPUs, this needs to be taken into consideration for global batch size equivalence, and LR scaling. Also note, some models have `lr` set to a non null value, this LR is used directly if set. Otherwise, it falls back to `lr_base` and the used rate is calculated based on `lr_base_sized` and a sqrt scaling according to the global batch size. Models with `ix` in the tag are using an alternative init for the MQA attention model projections, xavier (glorot) uniform instead of the efficientnet/mobilenet defaults. This seemed to improve stability of the hybrid models, allow a larger (closer to 1) beta2 for adam, otherwise beta2 on the adam, or the LR needed to be reduced to avoid instability with the hybrids. -
rwightman revised this gist
Jun 24, 2024 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,4 +1,4 @@ # MobileNetV4 Hparams Included yaml files are timm train script configs for training MobileNetV4 models in timm (see on HF Hub: https://huggingface.co/collections/timm/mobilenetv4-pretrained-weights-6669c22cda4db4244def9637) -
rwightman revised this gist
Jun 24, 2024 . 1 changed file with 7 additions and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1 +1,7 @@ * MobileNetV4 Included yaml files are timm train script configs for training MobileNetV4 models in timm (see on HF Hub: https://huggingface.co/collections/timm/mobilenetv4-pretrained-weights-6669c22cda4db4244def9637) Note the # of GPUs, this needs to be taken into consideration for global batch size equivalence, and LR scaling. Also note, some models have `lr` set to a non null value, this LR is used directly if set. Otherwise, it falls back to `lr_base` and the used rate is calculated based on `lr_base_sized` and a sqrt scaling according to the global batch size. -
rwightman created this gist
Jun 24, 2024 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1 @@ * MobileNetV4 This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,147 @@ aa: rand-m9-inc1-mstd1.0 amp: true amp_dtype: bfloat16 amp_impl: native aug_repeats: 0 aug_splits: 0 batch_size: 512 bce_loss: false bce_pos_weight: null bce_sum: false bce_target_thresh: null bn_eps: null bn_momentum: null channels_last: false checkpoint_hist: 10 class_map: '' clip_grad: 5.0 clip_mode: norm color_jitter: 0.4 color_jitter_prob: null cooldown_epochs: 0 crop_pct: null cutmix: 0.0 cutmix_minmax: null data: /data/imagenet/ data_dir: null dataset: '' dataset_download: false decay_epochs: 90 decay_milestones: - 90 - 180 - 270 decay_rate: 0.1 device: cuda device_modules: null dist_bn: reduce drop: 0.2 drop_block: null drop_connect: null drop_path: 0.1 epoch_repeats: 0.0 epochs: 500 eval_metric: top1 experiment: '' fast_norm: false fuser: '' gaussian_blur_prob: null gp: null grad_accum_steps: 1 grad_checkpointing: false grayscale_prob: null head_init_bias: null head_init_scale: null hflip: 0.5 img_size: 224 in_chans: null initial_checkpoint: '' input_img_mode: null input_key: null input_size: null interpolation: '' jsd_loss: false layer_decay: null local_rank: 0 log_interval: 50 log_wandb: false lr: 0.002 lr_base: 0.1 lr_base_scale: '' lr_base_size: 256 lr_cycle_decay: 0.5 lr_cycle_limit: 1 lr_cycle_mul: 1.0 lr_k_decay: 1.0 lr_noise: null lr_noise_pct: 0.67 lr_noise_std: 1.0 mean: null min_lr: 0 mixup: 0.2 mixup_mode: batch mixup_off_epoch: 0 mixup_prob: 1.0 mixup_switch_prob: 0.5 model: mobilenetv4_conv_blur_medium model_ema: true model_ema_decay: 0.9998 model_ema_force_cpu: false model_ema_warmup: true model_kwargs: {} momentum: 0.9 no_aug: false no_ddp_bb: false no_prefetcher: false no_resume_opt: false num_classes: null opt: nadamw opt_betas: null opt_eps: null opt_kwargs: {} output: '' patience_epochs: 10 pin_mem: false pretrained: false pretrained_path: null ratio: - 0.75 - 1.3333333333333333 recount: 1 recovery_interval: 0 remode: pixel reprob: 0.25 resplit: false resume: '' save_images: false scale: - 0.08 - 1.0 sched: cosine sched_on_updates: true seed: 42 smoothing: 0.1 split_bn: false start_epoch: null std: null sync_bn: false synchronize_step: false target_key: null torchcompile: inductor torchscript: false train_crop_mode: null train_interpolation: random train_num_samples: null train_split: train tta: 0 use_multi_epochs_loader: false val_num_samples: null val_split: validation validation_batch_size: null vflip: 0.0 warmup_epochs: 20 warmup_lr: 0.0 warmup_prefix: true weight_decay: 0.1 worker_seeding: all workers: 8 This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,147 @@ aa: rand-m9-inc1-mstd1.0 amp: true amp_dtype: bfloat16 amp_impl: native aug_repeats: 0 aug_splits: 0 batch_size: 512 bce_loss: false bce_pos_weight: null bce_sum: false bce_target_thresh: null bn_eps: null bn_momentum: null channels_last: false checkpoint_hist: 10 class_map: '' clip_grad: 5.0 clip_mode: norm color_jitter: 0.4 color_jitter_prob: null cooldown_epochs: 0 crop_pct: null cutmix: 1.0 cutmix_minmax: null data: /data/imagenet/ data_dir: null dataset: '' dataset_download: false decay_epochs: 90 decay_milestones: - 90 - 180 - 270 decay_rate: 0.1 device: cuda device_modules: null dist_bn: reduce drop: 0.2 drop_block: null drop_connect: null drop_path: 0.35 epoch_repeats: 0.0 epochs: 500 eval_metric: top1 experiment: '' fast_norm: false fuser: '' gaussian_blur_prob: null gp: null grad_accum_steps: 1 grad_checkpointing: false grayscale_prob: null head_init_bias: null head_init_scale: 0.0 hflip: 0.5 img_size: null in_chans: null initial_checkpoint: '' input_img_mode: null input_key: null input_size: null interpolation: '' jsd_loss: false layer_decay: null local_rank: 0 log_interval: 50 log_wandb: false lr: 0.003 lr_base: 0.1 lr_base_scale: '' lr_base_size: 256 lr_cycle_decay: 0.5 lr_cycle_limit: 1 lr_cycle_mul: 1.0 lr_k_decay: 1.0 lr_noise: null lr_noise_pct: 0.67 lr_noise_std: 1.0 mean: null min_lr: 0.0 mixup: 0.8 mixup_mode: batch mixup_off_epoch: 0 mixup_prob: 0.8 mixup_switch_prob: 0.5 model: mobilenetv4_conv_large model_ema: true model_ema_decay: 0.9997 model_ema_force_cpu: false model_ema_warmup: true model_kwargs: {} momentum: 0.9 no_aug: false no_ddp_bb: false no_prefetcher: false no_resume_opt: false num_classes: null opt: adamw opt_betas: null opt_eps: null opt_kwargs: {} output: '' patience_epochs: 10 pin_mem: false pretrained: false pretrained_path: null ratio: - 0.75 - 1.3333333333333333 recount: 1 recovery_interval: 0 remode: pixel reprob: 0.3 resplit: false resume: '' save_images: false scale: - 0.08 - 1.0 sched: cosine sched_on_updates: true seed: 42 smoothing: 0.1 split_bn: false start_epoch: null std: null sync_bn: false synchronize_step: false target_key: null torchcompile: inductor torchscript: false train_crop_mode: null train_interpolation: random train_num_samples: null train_split: train tta: 0 use_multi_epochs_loader: false val_num_samples: null val_split: validation validation_batch_size: null vflip: 0.0 warmup_epochs: 20 warmup_lr: 0.0 warmup_prefix: true weight_decay: 0.2 worker_seeding: all workers: 8 This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,147 @@ aa: rand-m8-inc1-mstd1.0 amp: true amp_dtype: bfloat16 amp_impl: native aug_repeats: 0 aug_splits: 0 batch_size: 256 bce_loss: false bce_pos_weight: null bce_sum: false bce_target_thresh: null bn_eps: null bn_momentum: null channels_last: false checkpoint_hist: 10 class_map: '' clip_grad: 5.0 clip_mode: norm color_jitter: 0.4 color_jitter_prob: null cooldown_epochs: 0 crop_pct: null cutmix: 1.0 cutmix_minmax: null data: /data/imagenet/ data_dir: null dataset: '' dataset_download: false decay_epochs: 90 decay_milestones: - 90 - 180 - 270 decay_rate: 0.1 device: cuda device_modules: null dist_bn: reduce drop: 0.2 drop_block: null drop_connect: null drop_path: 0.35 epoch_repeats: 0.0 epochs: 600 eval_metric: top1 experiment: '' fast_norm: false fuser: '' gaussian_blur_prob: null gp: null grad_accum_steps: 1 grad_checkpointing: false grayscale_prob: null head_init_bias: null head_init_scale: 0.0 hflip: 0.5 img_size: 384 in_chans: null initial_checkpoint: '' input_img_mode: null input_key: null input_size: null interpolation: '' jsd_loss: false layer_decay: null local_rank: 0 log_interval: 50 log_wandb: false lr: null lr_base: 0.003 lr_base_scale: '' lr_base_size: 4096 lr_cycle_decay: 0.5 lr_cycle_limit: 1 lr_cycle_mul: 1.0 lr_k_decay: 1.0 lr_noise: null lr_noise_pct: 0.67 lr_noise_std: 1.0 mean: null min_lr: 1.0e-06 mixup: 0.8 mixup_mode: batch mixup_off_epoch: 0 mixup_prob: 0.8 mixup_switch_prob: 0.5 model: mobilenetv4_conv_large model_ema: true model_ema_decay: 0.9999 model_ema_force_cpu: false model_ema_warmup: true model_kwargs: {} momentum: 0.9 no_aug: false no_ddp_bb: false no_prefetcher: false no_resume_opt: false num_classes: null opt: adamw opt_betas: null opt_eps: null opt_kwargs: {} output: '' patience_epochs: 10 pin_mem: false pretrained: false pretrained_path: null ratio: - 0.75 - 1.3333333333333333 recount: 1 recovery_interval: 0 remode: pixel reprob: 0.35 resplit: false resume: '' save_images: false scale: - 0.08 - 1.0 sched: cosine sched_on_updates: true seed: 42 smoothing: 0.1 split_bn: false start_epoch: null std: null sync_bn: false synchronize_step: false target_key: null torchcompile: inductor torchscript: false train_crop_mode: null train_interpolation: random train_num_samples: null train_split: train tta: 0 use_multi_epochs_loader: false val_num_samples: null val_split: validation validation_batch_size: null vflip: 0.0 warmup_epochs: 20 warmup_lr: 0.0 warmup_prefix: true weight_decay: 0.2 worker_seeding: all workers: 8 This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,147 @@ aa: rand-m9-inc1-mstd1.0 amp: true amp_dtype: bfloat16 amp_impl: native aug_repeats: 0 aug_splits: 0 batch_size: 512 bce_loss: false bce_pos_weight: null bce_sum: false bce_target_thresh: null bn_eps: null bn_momentum: null channels_last: false checkpoint_hist: 10 class_map: '' clip_grad: 5.0 clip_mode: norm color_jitter: 0.4 color_jitter_prob: null cooldown_epochs: 0 crop_pct: null cutmix: 0.0 cutmix_minmax: null data: /data/imagenet/ data_dir: null dataset: '' dataset_download: false decay_epochs: 90 decay_milestones: - 90 - 180 - 270 decay_rate: 0.1 device: cuda device_modules: null dist_bn: reduce drop: 0.2 drop_block: null drop_connect: null drop_path: 0.1 epoch_repeats: 0.0 epochs: 500 eval_metric: top1 experiment: '' fast_norm: false fuser: '' gaussian_blur_prob: null gp: null grad_accum_steps: 1 grad_checkpointing: false grayscale_prob: null head_init_bias: null head_init_scale: null hflip: 0.5 img_size: 224 in_chans: null initial_checkpoint: '' input_img_mode: null input_key: null input_size: null interpolation: '' jsd_loss: false layer_decay: null local_rank: 0 log_interval: 50 log_wandb: false lr: 0.002 lr_base: 0.1 lr_base_scale: '' lr_base_size: 256 lr_cycle_decay: 0.5 lr_cycle_limit: 1 lr_cycle_mul: 1.0 lr_k_decay: 1.0 lr_noise: null lr_noise_pct: 0.67 lr_noise_std: 1.0 mean: null min_lr: 0 mixup: 0.2 mixup_mode: batch mixup_off_epoch: 0 mixup_prob: 1.0 mixup_switch_prob: 0.5 model: mobilenetv4_conv_medium model_ema: true model_ema_decay: 0.9998 model_ema_force_cpu: false model_ema_warmup: true model_kwargs: {} momentum: 0.9 no_aug: false no_ddp_bb: false no_prefetcher: false no_resume_opt: false num_classes: null opt: nadamw opt_betas: null opt_eps: null opt_kwargs: {} output: '' patience_epochs: 10 pin_mem: false pretrained: false pretrained_path: null ratio: - 0.75 - 1.3333333333333333 recount: 1 recovery_interval: 0 remode: pixel reprob: 0.25 resplit: false resume: '' save_images: false scale: - 0.08 - 1.0 sched: cosine sched_on_updates: true seed: 42 smoothing: 0.1 split_bn: false start_epoch: null std: null sync_bn: false synchronize_step: false target_key: null torchcompile: inductor torchscript: false train_crop_mode: null train_interpolation: random train_num_samples: null train_split: train tta: 0 use_multi_epochs_loader: false val_num_samples: null val_split: validation validation_batch_size: null vflip: 0.0 warmup_epochs: 20 warmup_lr: 0.0 warmup_prefix: true weight_decay: 0.1 worker_seeding: all workers: 8 This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,147 @@ aa: rand-m8-inc1-mstd1.0 amp: true amp_dtype: bfloat16 amp_impl: native aug_repeats: 0 aug_splits: 0 batch_size: 512 bce_loss: false bce_pos_weight: null bce_sum: false bce_target_thresh: null bn_eps: null bn_momentum: null channels_last: false checkpoint_hist: 10 class_map: '' clip_grad: 5.0 clip_mode: norm color_jitter: 0.4 color_jitter_prob: null cooldown_epochs: 0 crop_pct: null cutmix: 0.0 cutmix_minmax: null data: /data/imagenet/ data_dir: null dataset: '' dataset_download: false decay_epochs: 90 decay_milestones: - 90 - 180 - 270 decay_rate: 0.1 device: cuda device_modules: null dist_bn: reduce drop: 0.2 drop_block: null drop_connect: null drop_path: 0.1 epoch_repeats: 0.0 epochs: 500 eval_metric: top1 experiment: '' fast_norm: false fuser: '' gaussian_blur_prob: null gp: null grad_accum_steps: 1 grad_checkpointing: false grayscale_prob: null head_init_bias: null head_init_scale: null hflip: 0.5 img_size: 256 in_chans: null initial_checkpoint: '' input_img_mode: null input_key: null input_size: null interpolation: '' jsd_loss: false layer_decay: null local_rank: 0 log_interval: 50 log_wandb: false lr: 0.002 lr_base: 0.1 lr_base_scale: '' lr_base_size: 256 lr_cycle_decay: 0.5 lr_cycle_limit: 1 lr_cycle_mul: 1.0 lr_k_decay: 1.0 lr_noise: null lr_noise_pct: 0.67 lr_noise_std: 1.0 mean: null min_lr: 0 mixup: 0.2 mixup_mode: batch mixup_off_epoch: 0 mixup_prob: 0.8 mixup_switch_prob: 0.5 model: mobilenetv4_conv_medium model_ema: true model_ema_decay: 0.9998 model_ema_force_cpu: false model_ema_warmup: true model_kwargs: {} momentum: 0.9 no_aug: false no_ddp_bb: false no_prefetcher: false no_resume_opt: false num_classes: null opt: adamw opt_betas: null opt_eps: null opt_kwargs: {} output: '' patience_epochs: 10 pin_mem: false pretrained: false pretrained_path: null ratio: - 0.75 - 1.3333333333333333 recount: 1 recovery_interval: 0 remode: pixel reprob: 0.25 resplit: false resume: '' save_images: false scale: - 0.08 - 1.0 sched: cosine sched_on_updates: true seed: 42 smoothing: 0.1 split_bn: false start_epoch: null std: null sync_bn: false synchronize_step: false target_key: null torchcompile: inductor torchscript: false train_crop_mode: null train_interpolation: random train_num_samples: null train_split: train tta: 0 use_multi_epochs_loader: false val_num_samples: null val_split: validation validation_batch_size: null vflip: 0.0 warmup_epochs: 20 warmup_lr: 0.0 warmup_prefix: true weight_decay: 0.1 worker_seeding: all workers: 8 This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,149 @@ aa: rand-m8-inc1-mstd1.0 amp: true amp_dtype: float16 amp_impl: native aug_repeats: 3.0 aug_splits: 0 batch_size: 1024 bce_loss: false bce_pos_weight: null bce_sum: false bce_target_thresh: null bn_eps: null bn_momentum: null channels_last: true checkpoint_hist: 10 class_map: '' clip_grad: null clip_mode: norm color_jitter: 0.4 color_jitter_prob: null cooldown_epochs: 0 crop_pct: null cutmix: 0.0 cutmix_minmax: null data: /raid/imagenet/ data_dir: null dataset: '' dataset_download: false decay_epochs: 90 decay_milestones: - 90 - 180 - 270 decay_rate: 0.1 device: cuda device_modules: null dist_bn: reduce drop: 0.25 drop_block: null drop_connect: null drop_path: null epoch_repeats: 0.0 epochs: 2400 eval_metric: top1 experiment: '' fast_norm: false fuser: '' gaussian_blur_prob: 0.05 gp: null grad_accum_steps: 1 grad_checkpointing: false grayscale_prob: 0.1 head_init_bias: null head_init_scale: null hflip: 0.5 img_size: null in_chans: null initial_checkpoint: '' input_img_mode: null input_key: null input_size: null interpolation: '' jsd_loss: false layer_decay: null local_rank: 0 log_interval: 50 log_wandb: false lr: null lr_base: 0.002 lr_base_scale: '' lr_base_size: 4096 lr_cycle_decay: 0.5 lr_cycle_limit: 1 lr_cycle_mul: 1.0 lr_k_decay: 1.0 lr_noise: null lr_noise_pct: 0.67 lr_noise_std: 1.0 mean: null min_lr: 0.0 mixup: 0.0 mixup_mode: batch mixup_off_epoch: 0 mixup_prob: 1.0 mixup_switch_prob: 0.5 model: mobilenetv4_conv_small model_ema: true model_ema_decay: 0.99995 model_ema_force_cpu: false model_ema_warmup: true model_kwargs: {} momentum: 0.9 no_aug: false no_ddp_bb: false no_prefetcher: false no_resume_opt: false num_classes: null opt: adamw opt_betas: - 0.6 - 0.995 opt_eps: null opt_kwargs: {} output: '' patience_epochs: 10 pin_mem: false pretrained: false pretrained_path: null ratio: - 0.75 - 1.3333333333333333 recount: 1 recovery_interval: 0 remode: pixel reprob: 0.25 resplit: false resume: '' save_images: false scale: - 0.08 - 1.0 sched: cosine sched_on_updates: true seed: 42 smoothing: 0.1 split_bn: false start_epoch: null std: null sync_bn: false synchronize_step: false target_key: null torchcompile: null torchscript: false train_crop_mode: null train_interpolation: random train_num_samples: null train_split: train tta: 0 use_multi_epochs_loader: false val_num_samples: null val_split: validation validation_batch_size: null vflip: 0.0 warmup_epochs: 5 warmup_lr: 0.0 warmup_prefix: true weight_decay: 0.06 worker_seeding: all workers: 8 This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,149 @@ aa: rand-m9-inc1-mstd1.0 amp: true amp_dtype: bfloat16 amp_impl: native aug_repeats: 0 aug_splits: 0 batch_size: 192 bce_loss: false bce_pos_weight: null bce_sum: false bce_target_thresh: null bn_eps: null bn_momentum: null channels_last: false checkpoint_hist: 10 class_map: '' clip_grad: 5.0 clip_mode: norm color_jitter: 0.4 color_jitter_prob: null cooldown_epochs: 0 crop_pct: null cutmix: 1.0 cutmix_minmax: null data: /data/imagenet/ data_dir: null dataset: '' dataset_download: false decay_epochs: 90 decay_milestones: - 90 - 180 - 270 decay_rate: 0.1 device: cuda device_modules: null dist_bn: reduce drop: 0.2 drop_block: null drop_connect: null drop_path: 0.35 epoch_repeats: 0.0 epochs: 600 eval_metric: top1 experiment: '' fast_norm: false fuser: '' gaussian_blur_prob: null gp: null grad_accum_steps: 1 grad_checkpointing: false grayscale_prob: null head_init_bias: null head_init_scale: 0.0 hflip: 0.5 img_size: 384 in_chans: null initial_checkpoint: '' input_img_mode: null input_key: null input_size: null interpolation: '' jsd_loss: false layer_decay: null local_rank: 0 log_interval: 50 log_wandb: false lr: null lr_base: 0.003 lr_base_scale: '' lr_base_size: 4096 lr_cycle_decay: 0.5 lr_cycle_limit: 1 lr_cycle_mul: 1.0 lr_k_decay: 1.0 lr_noise: null lr_noise_pct: 0.67 lr_noise_std: 1.0 mean: null min_lr: 1.0e-06 mixup: 0.8 mixup_mode: batch mixup_off_epoch: 0 mixup_prob: 0.8 mixup_switch_prob: 0.5 model: mobilenetv4_hybrid_large model_ema: true model_ema_decay: 0.9999 model_ema_force_cpu: false model_ema_warmup: true model_kwargs: {} momentum: 0.9 no_aug: false no_ddp_bb: false no_prefetcher: false no_resume_opt: false num_classes: null opt: adamw opt_betas: - 0.9 - 0.98 opt_eps: null opt_kwargs: {} output: '' patience_epochs: 10 pin_mem: false pretrained: false pretrained_path: null ratio: - 0.75 - 1.3333333333333333 recount: 1 recovery_interval: 0 remode: pixel reprob: 0.35 resplit: false resume: '' save_images: false scale: - 0.08 - 1.0 sched: cosine sched_on_updates: true seed: 42 smoothing: 0.1 split_bn: false start_epoch: null std: null sync_bn: false synchronize_step: false target_key: null torchcompile: inductor torchscript: false train_crop_mode: null train_interpolation: random train_num_samples: null train_split: train tta: 0 use_multi_epochs_loader: false val_num_samples: null val_split: validation validation_batch_size: null vflip: 0.0 warmup_epochs: 20 warmup_lr: 0.0 warmup_prefix: true weight_decay: 0.2 worker_seeding: all workers: 8 This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,149 @@ aa: rand-m9-inc1-mstd1.0 amp: true amp_dtype: bfloat16 amp_impl: native aug_repeats: 0 aug_splits: 0 batch_size: 192 bce_loss: false bce_pos_weight: null bce_sum: false bce_target_thresh: null bn_eps: null bn_momentum: null channels_last: false checkpoint_hist: 10 class_map: '' clip_grad: 5.0 clip_mode: norm color_jitter: 0.4 color_jitter_prob: null cooldown_epochs: 0 crop_pct: null cutmix: 1.0 cutmix_minmax: null data: /data/imagenet/ data_dir: null dataset: '' dataset_download: false decay_epochs: 90 decay_milestones: - 90 - 180 - 270 decay_rate: 0.1 device: cuda device_modules: null dist_bn: reduce drop: 0.2 drop_block: null drop_connect: null drop_path: 0.35 epoch_repeats: 0.0 epochs: 600 eval_metric: top1 experiment: '' fast_norm: false fuser: '' gaussian_blur_prob: null gp: null grad_accum_steps: 1 grad_checkpointing: false grayscale_prob: null head_init_bias: null head_init_scale: 0.0 hflip: 0.5 img_size: 384 in_chans: null initial_checkpoint: '' input_img_mode: null input_key: null input_size: null interpolation: '' jsd_loss: false layer_decay: null local_rank: 0 log_interval: 50 log_wandb: false lr: null lr_base: 0.003 lr_base_scale: '' lr_base_size: 4096 lr_cycle_decay: 0.5 lr_cycle_limit: 1 lr_cycle_mul: 1.0 lr_k_decay: 1.0 lr_noise: null lr_noise_pct: 0.67 lr_noise_std: 1.0 mean: null min_lr: 1.0e-06 mixup: 0.8 mixup_mode: batch mixup_off_epoch: 0 mixup_prob: 0.8 mixup_switch_prob: 0.5 model: mobilenetv4_hybrid_large model_ema: true model_ema_decay: 0.9999 model_ema_force_cpu: false model_ema_warmup: true model_kwargs: {} momentum: 0.9 no_aug: false no_ddp_bb: false no_prefetcher: false no_resume_opt: false num_classes: null opt: adamw opt_betas: - 0.9 - 0.99 opt_eps: null opt_kwargs: {} output: '' patience_epochs: 10 pin_mem: false pretrained: false pretrained_path: null ratio: - 0.75 - 1.3333333333333333 recount: 1 recovery_interval: 0 remode: pixel reprob: 0.35 resplit: false resume: '' save_images: false scale: - 0.08 - 1.0 sched: cosine sched_on_updates: true seed: 42 smoothing: 0.1 split_bn: false start_epoch: null std: null sync_bn: false synchronize_step: false target_key: null torchcompile: inductor torchscript: false train_crop_mode: null train_interpolation: random train_num_samples: null train_split: train tta: 0 use_multi_epochs_loader: false val_num_samples: null val_split: validation validation_batch_size: null vflip: 0.0 warmup_epochs: 20 warmup_lr: 0.0 warmup_prefix: true weight_decay: 0.2 worker_seeding: all workers: 8 This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,147 @@ aa: rand-m9-inc1-mstd1.0 amp: true amp_dtype: bfloat16 amp_impl: native aug_repeats: 0 aug_splits: 0 batch_size: 512 bce_loss: false bce_pos_weight: null bce_sum: false bce_target_thresh: null bn_eps: null bn_momentum: null channels_last: false checkpoint_hist: 10 class_map: '' clip_grad: 5.0 clip_mode: norm color_jitter: 0.4 color_jitter_prob: null cooldown_epochs: 0 crop_pct: null cutmix: 0.0 cutmix_minmax: null data: /data/imagenet/ data_dir: null dataset: '' dataset_download: false decay_epochs: 90 decay_milestones: - 90 - 180 - 270 decay_rate: 0.1 device: cuda device_modules: null dist_bn: reduce drop: 0.2 drop_block: null drop_connect: null drop_path: 0.1 epoch_repeats: 0.0 epochs: 500 eval_metric: top1 experiment: '' fast_norm: false fuser: '' gaussian_blur_prob: null gp: null grad_accum_steps: 1 grad_checkpointing: false grayscale_prob: null head_init_bias: null head_init_scale: null hflip: 0.5 img_size: null in_chans: null initial_checkpoint: '' input_img_mode: null input_key: null input_size: null interpolation: '' jsd_loss: false layer_decay: null local_rank: 0 log_interval: 50 log_wandb: false lr: 0.001 lr_base: 0.1 lr_base_scale: '' lr_base_size: 256 lr_cycle_decay: 0.5 lr_cycle_limit: 1 lr_cycle_mul: 1.0 lr_k_decay: 1.0 lr_noise: null lr_noise_pct: 0.67 lr_noise_std: 1.0 mean: null min_lr: 0 mixup: 0.2 mixup_mode: batch mixup_off_epoch: 0 mixup_prob: 1.0 mixup_switch_prob: 0.5 model: mobilenetv4_hybrid_medium model_ema: true model_ema_decay: 0.9998 model_ema_force_cpu: false model_ema_warmup: true model_kwargs: {} momentum: 0.9 no_aug: false no_ddp_bb: false no_prefetcher: false no_resume_opt: false num_classes: null opt: nadamw opt_betas: null opt_eps: null opt_kwargs: {} output: '' patience_epochs: 10 pin_mem: false pretrained: false pretrained_path: null ratio: - 0.75 - 1.3333333333333333 recount: 1 recovery_interval: 0 remode: pixel reprob: 0.25 resplit: false resume: '' save_images: false scale: - 0.08 - 1.0 sched: cosine sched_on_updates: true seed: 42 smoothing: 0.1 split_bn: false start_epoch: null std: null sync_bn: false synchronize_step: false target_key: null torchcompile: inductor torchscript: false train_crop_mode: null train_interpolation: random train_num_samples: null train_split: train tta: 0 use_multi_epochs_loader: false val_num_samples: null val_split: validation validation_batch_size: null vflip: 0.0 warmup_epochs: 20 warmup_lr: 0.0 warmup_prefix: true weight_decay: 0.1 worker_seeding: all workers: 8 This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,149 @@ aa: rand-m8-inc1-mstd1.0 amp: true amp_dtype: bfloat16 amp_impl: native aug_repeats: 0 aug_splits: 0 batch_size: 512 bce_loss: false bce_pos_weight: null bce_sum: false bce_target_thresh: null bn_eps: null bn_momentum: null channels_last: false checkpoint_hist: 10 class_map: '' clip_grad: 5.0 clip_mode: norm color_jitter: 0.4 color_jitter_prob: null cooldown_epochs: 0 crop_pct: null cutmix: 1.0 cutmix_minmax: null data: /data/imagenet/ data_dir: null dataset: '' dataset_download: false decay_epochs: 90 decay_milestones: - 90 - 180 - 270 decay_rate: 0.1 device: cuda device_modules: null dist_bn: reduce drop: 0.2 drop_block: null drop_connect: null drop_path: 0.1 epoch_repeats: 0.0 epochs: 550 eval_metric: top1 experiment: '' fast_norm: false fuser: '' gaussian_blur_prob: null gp: null grad_accum_steps: 1 grad_checkpointing: false grayscale_prob: null head_init_bias: null head_init_scale: 0.0 hflip: 0.5 img_size: 256 in_chans: null initial_checkpoint: '' input_img_mode: null input_key: null input_size: null interpolation: '' jsd_loss: false layer_decay: null local_rank: 0 log_interval: 50 log_wandb: false lr: null lr_base: 0.002 lr_base_scale: '' lr_base_size: 4096 lr_cycle_decay: 0.5 lr_cycle_limit: 1 lr_cycle_mul: 1.0 lr_k_decay: 1.0 lr_noise: null lr_noise_pct: 0.67 lr_noise_std: 1.0 mean: null min_lr: 1.0e-06 mixup: 0.8 mixup_mode: batch mixup_off_epoch: 0 mixup_prob: 0.5 mixup_switch_prob: 0.5 model: mobilenetv4_hybrid_medium model_ema: true model_ema_decay: 0.9999 model_ema_force_cpu: false model_ema_warmup: true model_kwargs: {} momentum: 0.9 no_aug: false no_ddp_bb: false no_prefetcher: false no_resume_opt: false num_classes: null opt: adamw opt_betas: - 0.9 - 0.99 opt_eps: null opt_kwargs: {} output: '' patience_epochs: 10 pin_mem: false pretrained: false pretrained_path: null ratio: - 0.75 - 1.3333333333333333 recount: 1 recovery_interval: 0 remode: pixel reprob: 0.3 resplit: false resume: '' save_images: false scale: - 0.08 - 1.0 sched: cosine sched_on_updates: true seed: 42 smoothing: 0.1 split_bn: false start_epoch: null std: null sync_bn: false synchronize_step: false target_key: null torchcompile: inductor torchscript: false train_crop_mode: null train_interpolation: random train_num_samples: null train_split: train tta: 0 use_multi_epochs_loader: false val_num_samples: null val_split: validation validation_batch_size: null vflip: 0.0 warmup_epochs: 20 warmup_lr: 0.0 warmup_prefix: true weight_decay: 0.141 worker_seeding: all workers: 8 This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,149 @@ aa: rand-m9-inc1-mstd1.0 amp: true amp_dtype: bfloat16 amp_impl: native aug_repeats: 0 aug_splits: 0 batch_size: 384 bce_loss: false bce_pos_weight: null bce_sum: false bce_target_thresh: null bn_eps: null bn_momentum: null channels_last: false checkpoint_hist: 10 class_map: '' clip_grad: 5.0 clip_mode: norm color_jitter: 0.4 color_jitter_prob: null cooldown_epochs: 0 crop_pct: null cutmix: 1.0 cutmix_minmax: null data: /data/imagenet/ data_dir: null dataset: '' dataset_download: false decay_epochs: 90 decay_milestones: - 90 - 180 - 270 decay_rate: 0.1 device: cuda device_modules: null dist_bn: reduce drop: 0.2 drop_block: null drop_connect: null drop_path: 0.1 epoch_repeats: 0.0 epochs: 550 eval_metric: top1 experiment: '' fast_norm: false fuser: '' gaussian_blur_prob: null gp: null grad_accum_steps: 1 grad_checkpointing: false grayscale_prob: null head_init_bias: null head_init_scale: 0.0 hflip: 0.5 img_size: 384 in_chans: null initial_checkpoint: '' input_img_mode: null input_key: null input_size: null interpolation: '' jsd_loss: false layer_decay: null local_rank: 0 log_interval: 50 log_wandb: false lr: null lr_base: 0.002 lr_base_scale: '' lr_base_size: 4096 lr_cycle_decay: 0.5 lr_cycle_limit: 1 lr_cycle_mul: 1.0 lr_k_decay: 1.0 lr_noise: null lr_noise_pct: 0.67 lr_noise_std: 1.0 mean: null min_lr: 1.0e-06 mixup: 0.8 mixup_mode: batch mixup_off_epoch: 0 mixup_prob: 0.5 mixup_switch_prob: 0.5 model: mobilenetv4_hybrid_medium model_ema: true model_ema_decay: 0.9999 model_ema_force_cpu: false model_ema_warmup: true model_kwargs: {} momentum: 0.9 no_aug: false no_ddp_bb: false no_prefetcher: false no_resume_opt: false num_classes: null opt: adamw opt_betas: - 0.9 - 0.99 opt_eps: null opt_kwargs: {} output: '' patience_epochs: 10 pin_mem: false pretrained: false pretrained_path: null ratio: - 0.75 - 1.3333333333333333 recount: 1 recovery_interval: 0 remode: pixel reprob: 0.3 resplit: false resume: '' save_images: false scale: - 0.08 - 1.0 sched: cosine sched_on_updates: true seed: 42 smoothing: 0.1 split_bn: false start_epoch: null std: null sync_bn: false synchronize_step: false target_key: null torchcompile: inductor torchscript: false train_crop_mode: null train_interpolation: random train_num_samples: null train_split: train tta: 0 use_multi_epochs_loader: false val_num_samples: null val_split: validation validation_batch_size: null vflip: 0.0 warmup_epochs: 20 warmup_lr: 0.0 warmup_prefix: true weight_decay: 0.15 worker_seeding: all workers: 8