Skip to content

Instantly share code, notes, and snippets.

@rwightman
Last active November 18, 2024 02:32

Revisions

  1. rwightman revised this gist Oct 31, 2024. 2 changed files with 301 additions and 0 deletions.
    151 changes: 151 additions & 0 deletions resnet18_34_ra4_e3600.yaml
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,151 @@
    aa: rand-m7-inc1-mstd1.0
    amp: true
    amp_dtype: float16
    amp_impl: native
    aug_repeats: 3.0
    aug_splits: 0
    batch_size: 896
    bce_loss: false
    bce_pos_weight: null
    bce_sum: false
    bce_target_thresh: null
    bn_eps: null
    bn_momentum: null
    channels_last: true
    checkpoint_hist: 10
    class_map: ''
    clip_grad: null
    clip_mode: norm
    color_jitter: 0.4
    color_jitter_prob: null
    cooldown_epochs: 0
    crop_pct: 0.9
    cutmix: 0.0
    cutmix_minmax: null
    data_dir: /data/imagenet/
    dataset: ''
    dataset_download: false
    decay_epochs: 90
    decay_milestones:
    - 90
    - 180
    - 270
    decay_rate: 0.1
    device: cuda
    device_modules: null
    dist_bn: reduce
    drop: 0.25
    drop_block: null
    drop_connect: null
    drop_path: null
    epoch_repeats: 0.0
    epochs: 3600
    eval_metric: top1
    experiment: ''
    fast_norm: false
    fuser: ''
    gaussian_blur_prob: 0.05
    gp: null
    grad_accum_steps: 1
    grad_checkpointing: false
    grayscale_prob: 0.05
    head_init_bias: null
    head_init_scale: null
    hflip: 0.5
    img_size: null
    in_chans: null
    initial_checkpoint: ''
    input_img_mode: null
    input_key: null
    input_size: null
    interpolation: ''
    jsd_loss: false
    layer_decay: null
    local_rank: 0
    log_interval: 50
    log_wandb: false
    lr: null
    lr_base: 0.002
    lr_base_scale: ''
    lr_base_size: 4096
    lr_cycle_decay: 0.5
    lr_cycle_limit: 1
    lr_cycle_mul: 1.0
    lr_k_decay: 1.0
    lr_noise: null
    lr_noise_pct: 0.67
    lr_noise_std: 1.0
    mean:
    - 0.5
    min_lr: 0.0
    mixup: 0.0
    mixup_mode: batch
    mixup_off_epoch: 0
    mixup_prob: 1.0
    mixup_switch_prob: 0.5
    model: resnetv2_34
    model_ema: true
    model_ema_decay: 0.99995
    model_ema_force_cpu: false
    model_ema_warmup: true
    model_kwargs: {}
    momentum: 0.9
    no_aug: false
    no_ddp_bb: false
    no_prefetcher: false
    no_resume_opt: false
    num_classes: null
    opt: adamw
    opt_betas:
    - 0.6
    - 0.995
    opt_eps: null
    opt_kwargs: {}
    output: ''
    patience_epochs: 10
    pin_mem: false
    pretrained: false
    pretrained_path: null
    ratio:
    - 0.75
    - 1.3333333333333333
    recount: 1
    recovery_interval: 0
    remode: pixel
    reprob: 0.25
    resplit: false
    resume: ''
    save_images: false
    scale:
    - 0.08
    - 1.0
    sched: cosine
    sched_on_updates: true
    seed: 42
    smoothing: 0.1
    split_bn: false
    start_epoch: null
    std:
    - 0.5
    sync_bn: false
    synchronize_step: false
    target_key: null
    torchcompile: inductor
    torchcompile_mode: null
    torchscript: false
    train_crop_mode: null
    train_interpolation: random
    train_num_samples: null
    train_split: train
    tta: 0
    use_multi_epochs_loader: false
    val_num_samples: null
    val_split: validation
    validation_batch_size: null
    vflip: 0.0
    warmup_epochs: 5
    warmup_lr: 0.0
    warmup_prefix: true
    weight_decay: 0.1
    worker_seeding: all
    workers: 10
    150 changes: 150 additions & 0 deletions resnet50_ra4_e3600.yaml
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,150 @@
    aa: rand-m8-inc1-mstd1.0
    amp: true
    amp_dtype: float16
    amp_impl: native
    aug_repeats: 3.0
    aug_splits: 0
    batch_size: 768
    bce_loss: false
    bce_pos_weight: null
    bce_sum: false
    bce_target_thresh: null
    bn_eps: null
    bn_momentum: null
    channels_last: true
    checkpoint_hist: 10
    class_map: ''
    clip_grad: null
    clip_mode: norm
    color_jitter: 0.4
    color_jitter_prob: null
    cooldown_epochs: 0
    crop_pct: 0.95
    cutmix: 0.0
    cutmix_minmax: null
    data_dir: /data/imagenet/
    dataset: ''
    dataset_download: false
    decay_epochs: 90
    decay_milestones:
    - 90
    - 180
    - 270
    decay_rate: 0.1
    device: cuda
    device_modules: null
    dist_bn: reduce
    drop: 0.3
    drop_block: null
    drop_connect: null
    drop_path: 0.1
    epoch_repeats: 0.0
    epochs: 3600
    eval_metric: top1
    experiment: ''
    fast_norm: false
    fuser: ''
    gaussian_blur_prob: 0.05
    gp: null
    grad_accum_steps: 1
    grad_checkpointing: false
    grayscale_prob: 0.05
    head_init_bias: null
    head_init_scale: 0.0
    hflip: 0.5
    img_size: null
    in_chans: null
    initial_checkpoint: ''
    input_img_mode: null
    input_key: null
    input_size: null
    interpolation: ''
    jsd_loss: false
    layer_decay: null
    local_rank: 0
    log_interval: 50
    log_wandb: false
    lr: null
    lr_base: 0.002
    lr_base_scale: ''
    lr_base_size: 4096
    lr_cycle_decay: 0.5
    lr_cycle_limit: 1
    lr_cycle_mul: 1.0
    lr_k_decay: 1.0
    lr_noise: null
    lr_noise_pct: 0.67
    lr_noise_std: 1.0
    mean:
    - 0.5
    min_lr: 0.0
    mixup: 0.0
    mixup_mode: batch
    mixup_off_epoch: 0
    mixup_prob: 1.0
    mixup_switch_prob: 0.5
    model: resnet50d
    model_ema: true
    model_ema_decay: 0.99995
    model_ema_force_cpu: false
    model_ema_warmup: true
    model_kwargs: {}
    momentum: 0.9
    no_aug: false
    no_ddp_bb: false
    no_prefetcher: false
    no_resume_opt: false
    num_classes: null
    opt: adamw
    opt_betas:
    - 0.6
    - 0.995
    opt_eps: null
    opt_kwargs: {}
    output: ''
    patience_epochs: 10
    pin_mem: false
    pretrained: false
    pretrained_path: null
    ratio:
    - 0.75
    - 1.3333333333333333
    recount: 1
    recovery_interval: 0
    remode: pixel
    reprob: 0.3
    resplit: false
    resume: ''
    save_images: false
    scale:
    - 0.08
    - 1.0
    sched: cosine
    sched_on_updates: true
    seed: 42
    smoothing: 0.1
    split_bn: false
    start_epoch: null
    std:
    - 0.5
    sync_bn: false
    synchronize_step: false
    target_key: null
    torchcompile: inductor
    torchscript: false
    train_crop_mode: null
    train_interpolation: random
    train_num_samples: null
    train_split: train
    tta: 0
    use_multi_epochs_loader: false
    val_num_samples: null
    val_split: validation
    validation_batch_size: null
    vflip: 0.0
    warmup_epochs: 5
    warmup_lr: 0.0
    warmup_prefix: true
    weight_decay: 0.125
    worker_seeding: all
    workers: 12
  2. rwightman revised this gist Sep 17, 2024. 1 changed file with 3 additions and 1 deletion.
    4 changes: 3 additions & 1 deletion _README_MobileNetV4.md
    Original file line number Diff line number Diff line change
    @@ -6,4 +6,6 @@ Note the # of GPUs, this needs to be taken into consideration for global batch s

    Also note, some models have `lr` set to a non null value, this LR is used directly if set. Otherwise, it falls back to `lr_base` and the used rate is calculated based on `lr_base_size` and a sqrt scaling according to the global batch size.

    Models with `ix` in the tag are using an alternative init for the MQA attention model projections, xavier (glorot) uniform instead of the efficientnet/mobilenet defaults. This seemed to improve stability of the hybrid models, allow a larger (closer to 1) beta2 for adam, otherwise beta2 on the adam, or the LR needed to be reduced to avoid instability with the hybrids.
    Models with `ix` in the tag are using an alternative init for the MQA attention model projections, xavier (glorot) uniform instead of the efficientnet/mobilenet defaults. This seemed to improve stability of the hybrid models, allow a larger (closer to 1) beta2 for adam, otherwise beta2 on the adam, or the LR needed to be reduced to avoid instability with the hybrids.

    To easily use the .yaml file, use the --config argument for the timm train.py script. eg: `train.py --config mnv4.yaml --data-dir /where/my/data ... <other arg overrides>`
  3. rwightman renamed this gist Jun 24, 2024. 1 changed file with 0 additions and 0 deletions.
    File renamed without changes.
  4. rwightman revised this gist Jun 24, 2024. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion _README.md
    Original file line number Diff line number Diff line change
    @@ -4,6 +4,6 @@ Included yaml files are timm train script configs for training MobileNetV4 model

    Note the # of GPUs, this needs to be taken into consideration for global batch size equivalence, and LR scaling.

    Also note, some models have `lr` set to a non null value, this LR is used directly if set. Otherwise, it falls back to `lr_base` and the used rate is calculated based on `lr_base_sized` and a sqrt scaling according to the global batch size.
    Also note, some models have `lr` set to a non null value, this LR is used directly if set. Otherwise, it falls back to `lr_base` and the used rate is calculated based on `lr_base_size` and a sqrt scaling according to the global batch size.

    Models with `ix` in the tag are using an alternative init for the MQA attention model projections, xavier (glorot) uniform instead of the efficientnet/mobilenet defaults. This seemed to improve stability of the hybrid models, allow a larger (closer to 1) beta2 for adam, otherwise beta2 on the adam, or the LR needed to be reduced to avoid instability with the hybrids.
  5. rwightman revised this gist Jun 24, 2024. 1 changed file with 3 additions and 1 deletion.
    4 changes: 3 additions & 1 deletion _README.md
    Original file line number Diff line number Diff line change
    @@ -4,4 +4,6 @@ Included yaml files are timm train script configs for training MobileNetV4 model

    Note the # of GPUs, this needs to be taken into consideration for global batch size equivalence, and LR scaling.

    Also note, some models have `lr` set to a non null value, this LR is used directly if set. Otherwise, it falls back to `lr_base` and the used rate is calculated based on `lr_base_sized` and a sqrt scaling according to the global batch size.
    Also note, some models have `lr` set to a non null value, this LR is used directly if set. Otherwise, it falls back to `lr_base` and the used rate is calculated based on `lr_base_sized` and a sqrt scaling according to the global batch size.

    Models with `ix` in the tag are using an alternative init for the MQA attention model projections, xavier (glorot) uniform instead of the efficientnet/mobilenet defaults. This seemed to improve stability of the hybrid models, allow a larger (closer to 1) beta2 for adam, otherwise beta2 on the adam, or the LR needed to be reduced to avoid instability with the hybrids.
  6. rwightman revised this gist Jun 24, 2024. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion _README.md
    Original file line number Diff line number Diff line change
    @@ -1,4 +1,4 @@
    * MobileNetV4
    # MobileNetV4 Hparams

    Included yaml files are timm train script configs for training MobileNetV4 models in timm (see on HF Hub: https://huggingface.co/collections/timm/mobilenetv4-pretrained-weights-6669c22cda4db4244def9637)

  7. rwightman revised this gist Jun 24, 2024. 1 changed file with 7 additions and 1 deletion.
    8 changes: 7 additions & 1 deletion _README.md
    Original file line number Diff line number Diff line change
    @@ -1 +1,7 @@
    * MobileNetV4
    * MobileNetV4

    Included yaml files are timm train script configs for training MobileNetV4 models in timm (see on HF Hub: https://huggingface.co/collections/timm/mobilenetv4-pretrained-weights-6669c22cda4db4244def9637)

    Note the # of GPUs, this needs to be taken into consideration for global batch size equivalence, and LR scaling.

    Also note, some models have `lr` set to a non null value, this LR is used directly if set. Otherwise, it falls back to `lr_base` and the used rate is calculated based on `lr_base_sized` and a sqrt scaling according to the global batch size.
  8. rwightman created this gist Jun 24, 2024.
    1 change: 1 addition & 0 deletions _README.md
    Original file line number Diff line number Diff line change
    @@ -0,0 +1 @@
    * MobileNetV4
    147 changes: 147 additions & 0 deletions mnv4_cbm_r224_e500_gpu2.yaml
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,147 @@
    aa: rand-m9-inc1-mstd1.0
    amp: true
    amp_dtype: bfloat16
    amp_impl: native
    aug_repeats: 0
    aug_splits: 0
    batch_size: 512
    bce_loss: false
    bce_pos_weight: null
    bce_sum: false
    bce_target_thresh: null
    bn_eps: null
    bn_momentum: null
    channels_last: false
    checkpoint_hist: 10
    class_map: ''
    clip_grad: 5.0
    clip_mode: norm
    color_jitter: 0.4
    color_jitter_prob: null
    cooldown_epochs: 0
    crop_pct: null
    cutmix: 0.0
    cutmix_minmax: null
    data: /data/imagenet/
    data_dir: null
    dataset: ''
    dataset_download: false
    decay_epochs: 90
    decay_milestones:
    - 90
    - 180
    - 270
    decay_rate: 0.1
    device: cuda
    device_modules: null
    dist_bn: reduce
    drop: 0.2
    drop_block: null
    drop_connect: null
    drop_path: 0.1
    epoch_repeats: 0.0
    epochs: 500
    eval_metric: top1
    experiment: ''
    fast_norm: false
    fuser: ''
    gaussian_blur_prob: null
    gp: null
    grad_accum_steps: 1
    grad_checkpointing: false
    grayscale_prob: null
    head_init_bias: null
    head_init_scale: null
    hflip: 0.5
    img_size: 224
    in_chans: null
    initial_checkpoint: ''
    input_img_mode: null
    input_key: null
    input_size: null
    interpolation: ''
    jsd_loss: false
    layer_decay: null
    local_rank: 0
    log_interval: 50
    log_wandb: false
    lr: 0.002
    lr_base: 0.1
    lr_base_scale: ''
    lr_base_size: 256
    lr_cycle_decay: 0.5
    lr_cycle_limit: 1
    lr_cycle_mul: 1.0
    lr_k_decay: 1.0
    lr_noise: null
    lr_noise_pct: 0.67
    lr_noise_std: 1.0
    mean: null
    min_lr: 0
    mixup: 0.2
    mixup_mode: batch
    mixup_off_epoch: 0
    mixup_prob: 1.0
    mixup_switch_prob: 0.5
    model: mobilenetv4_conv_blur_medium
    model_ema: true
    model_ema_decay: 0.9998
    model_ema_force_cpu: false
    model_ema_warmup: true
    model_kwargs: {}
    momentum: 0.9
    no_aug: false
    no_ddp_bb: false
    no_prefetcher: false
    no_resume_opt: false
    num_classes: null
    opt: nadamw
    opt_betas: null
    opt_eps: null
    opt_kwargs: {}
    output: ''
    patience_epochs: 10
    pin_mem: false
    pretrained: false
    pretrained_path: null
    ratio:
    - 0.75
    - 1.3333333333333333
    recount: 1
    recovery_interval: 0
    remode: pixel
    reprob: 0.25
    resplit: false
    resume: ''
    save_images: false
    scale:
    - 0.08
    - 1.0
    sched: cosine
    sched_on_updates: true
    seed: 42
    smoothing: 0.1
    split_bn: false
    start_epoch: null
    std: null
    sync_bn: false
    synchronize_step: false
    target_key: null
    torchcompile: inductor
    torchscript: false
    train_crop_mode: null
    train_interpolation: random
    train_num_samples: null
    train_split: train
    tta: 0
    use_multi_epochs_loader: false
    val_num_samples: null
    val_split: validation
    validation_batch_size: null
    vflip: 0.0
    warmup_epochs: 20
    warmup_lr: 0.0
    warmup_prefix: true
    weight_decay: 0.1
    worker_seeding: all
    workers: 8
    147 changes: 147 additions & 0 deletions mnv4_cl_r256_e500_gpu8.yaml
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,147 @@
    aa: rand-m9-inc1-mstd1.0
    amp: true
    amp_dtype: bfloat16
    amp_impl: native
    aug_repeats: 0
    aug_splits: 0
    batch_size: 512
    bce_loss: false
    bce_pos_weight: null
    bce_sum: false
    bce_target_thresh: null
    bn_eps: null
    bn_momentum: null
    channels_last: false
    checkpoint_hist: 10
    class_map: ''
    clip_grad: 5.0
    clip_mode: norm
    color_jitter: 0.4
    color_jitter_prob: null
    cooldown_epochs: 0
    crop_pct: null
    cutmix: 1.0
    cutmix_minmax: null
    data: /data/imagenet/
    data_dir: null
    dataset: ''
    dataset_download: false
    decay_epochs: 90
    decay_milestones:
    - 90
    - 180
    - 270
    decay_rate: 0.1
    device: cuda
    device_modules: null
    dist_bn: reduce
    drop: 0.2
    drop_block: null
    drop_connect: null
    drop_path: 0.35
    epoch_repeats: 0.0
    epochs: 500
    eval_metric: top1
    experiment: ''
    fast_norm: false
    fuser: ''
    gaussian_blur_prob: null
    gp: null
    grad_accum_steps: 1
    grad_checkpointing: false
    grayscale_prob: null
    head_init_bias: null
    head_init_scale: 0.0
    hflip: 0.5
    img_size: null
    in_chans: null
    initial_checkpoint: ''
    input_img_mode: null
    input_key: null
    input_size: null
    interpolation: ''
    jsd_loss: false
    layer_decay: null
    local_rank: 0
    log_interval: 50
    log_wandb: false
    lr: 0.003
    lr_base: 0.1
    lr_base_scale: ''
    lr_base_size: 256
    lr_cycle_decay: 0.5
    lr_cycle_limit: 1
    lr_cycle_mul: 1.0
    lr_k_decay: 1.0
    lr_noise: null
    lr_noise_pct: 0.67
    lr_noise_std: 1.0
    mean: null
    min_lr: 0.0
    mixup: 0.8
    mixup_mode: batch
    mixup_off_epoch: 0
    mixup_prob: 0.8
    mixup_switch_prob: 0.5
    model: mobilenetv4_conv_large
    model_ema: true
    model_ema_decay: 0.9997
    model_ema_force_cpu: false
    model_ema_warmup: true
    model_kwargs: {}
    momentum: 0.9
    no_aug: false
    no_ddp_bb: false
    no_prefetcher: false
    no_resume_opt: false
    num_classes: null
    opt: adamw
    opt_betas: null
    opt_eps: null
    opt_kwargs: {}
    output: ''
    patience_epochs: 10
    pin_mem: false
    pretrained: false
    pretrained_path: null
    ratio:
    - 0.75
    - 1.3333333333333333
    recount: 1
    recovery_interval: 0
    remode: pixel
    reprob: 0.3
    resplit: false
    resume: ''
    save_images: false
    scale:
    - 0.08
    - 1.0
    sched: cosine
    sched_on_updates: true
    seed: 42
    smoothing: 0.1
    split_bn: false
    start_epoch: null
    std: null
    sync_bn: false
    synchronize_step: false
    target_key: null
    torchcompile: inductor
    torchscript: false
    train_crop_mode: null
    train_interpolation: random
    train_num_samples: null
    train_split: train
    tta: 0
    use_multi_epochs_loader: false
    val_num_samples: null
    val_split: validation
    validation_batch_size: null
    vflip: 0.0
    warmup_epochs: 20
    warmup_lr: 0.0
    warmup_prefix: true
    weight_decay: 0.2
    worker_seeding: all
    workers: 8
    147 changes: 147 additions & 0 deletions mnv4_cl_r384_e600_gpu8.yaml
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,147 @@
    aa: rand-m8-inc1-mstd1.0
    amp: true
    amp_dtype: bfloat16
    amp_impl: native
    aug_repeats: 0
    aug_splits: 0
    batch_size: 256
    bce_loss: false
    bce_pos_weight: null
    bce_sum: false
    bce_target_thresh: null
    bn_eps: null
    bn_momentum: null
    channels_last: false
    checkpoint_hist: 10
    class_map: ''
    clip_grad: 5.0
    clip_mode: norm
    color_jitter: 0.4
    color_jitter_prob: null
    cooldown_epochs: 0
    crop_pct: null
    cutmix: 1.0
    cutmix_minmax: null
    data: /data/imagenet/
    data_dir: null
    dataset: ''
    dataset_download: false
    decay_epochs: 90
    decay_milestones:
    - 90
    - 180
    - 270
    decay_rate: 0.1
    device: cuda
    device_modules: null
    dist_bn: reduce
    drop: 0.2
    drop_block: null
    drop_connect: null
    drop_path: 0.35
    epoch_repeats: 0.0
    epochs: 600
    eval_metric: top1
    experiment: ''
    fast_norm: false
    fuser: ''
    gaussian_blur_prob: null
    gp: null
    grad_accum_steps: 1
    grad_checkpointing: false
    grayscale_prob: null
    head_init_bias: null
    head_init_scale: 0.0
    hflip: 0.5
    img_size: 384
    in_chans: null
    initial_checkpoint: ''
    input_img_mode: null
    input_key: null
    input_size: null
    interpolation: ''
    jsd_loss: false
    layer_decay: null
    local_rank: 0
    log_interval: 50
    log_wandb: false
    lr: null
    lr_base: 0.003
    lr_base_scale: ''
    lr_base_size: 4096
    lr_cycle_decay: 0.5
    lr_cycle_limit: 1
    lr_cycle_mul: 1.0
    lr_k_decay: 1.0
    lr_noise: null
    lr_noise_pct: 0.67
    lr_noise_std: 1.0
    mean: null
    min_lr: 1.0e-06
    mixup: 0.8
    mixup_mode: batch
    mixup_off_epoch: 0
    mixup_prob: 0.8
    mixup_switch_prob: 0.5
    model: mobilenetv4_conv_large
    model_ema: true
    model_ema_decay: 0.9999
    model_ema_force_cpu: false
    model_ema_warmup: true
    model_kwargs: {}
    momentum: 0.9
    no_aug: false
    no_ddp_bb: false
    no_prefetcher: false
    no_resume_opt: false
    num_classes: null
    opt: adamw
    opt_betas: null
    opt_eps: null
    opt_kwargs: {}
    output: ''
    patience_epochs: 10
    pin_mem: false
    pretrained: false
    pretrained_path: null
    ratio:
    - 0.75
    - 1.3333333333333333
    recount: 1
    recovery_interval: 0
    remode: pixel
    reprob: 0.35
    resplit: false
    resume: ''
    save_images: false
    scale:
    - 0.08
    - 1.0
    sched: cosine
    sched_on_updates: true
    seed: 42
    smoothing: 0.1
    split_bn: false
    start_epoch: null
    std: null
    sync_bn: false
    synchronize_step: false
    target_key: null
    torchcompile: inductor
    torchscript: false
    train_crop_mode: null
    train_interpolation: random
    train_num_samples: null
    train_split: train
    tta: 0
    use_multi_epochs_loader: false
    val_num_samples: null
    val_split: validation
    validation_batch_size: null
    vflip: 0.0
    warmup_epochs: 20
    warmup_lr: 0.0
    warmup_prefix: true
    weight_decay: 0.2
    worker_seeding: all
    workers: 8
    147 changes: 147 additions & 0 deletions mnv4_cm_r224_e500_gpu2.yaml
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,147 @@
    aa: rand-m9-inc1-mstd1.0
    amp: true
    amp_dtype: bfloat16
    amp_impl: native
    aug_repeats: 0
    aug_splits: 0
    batch_size: 512
    bce_loss: false
    bce_pos_weight: null
    bce_sum: false
    bce_target_thresh: null
    bn_eps: null
    bn_momentum: null
    channels_last: false
    checkpoint_hist: 10
    class_map: ''
    clip_grad: 5.0
    clip_mode: norm
    color_jitter: 0.4
    color_jitter_prob: null
    cooldown_epochs: 0
    crop_pct: null
    cutmix: 0.0
    cutmix_minmax: null
    data: /data/imagenet/
    data_dir: null
    dataset: ''
    dataset_download: false
    decay_epochs: 90
    decay_milestones:
    - 90
    - 180
    - 270
    decay_rate: 0.1
    device: cuda
    device_modules: null
    dist_bn: reduce
    drop: 0.2
    drop_block: null
    drop_connect: null
    drop_path: 0.1
    epoch_repeats: 0.0
    epochs: 500
    eval_metric: top1
    experiment: ''
    fast_norm: false
    fuser: ''
    gaussian_blur_prob: null
    gp: null
    grad_accum_steps: 1
    grad_checkpointing: false
    grayscale_prob: null
    head_init_bias: null
    head_init_scale: null
    hflip: 0.5
    img_size: 224
    in_chans: null
    initial_checkpoint: ''
    input_img_mode: null
    input_key: null
    input_size: null
    interpolation: ''
    jsd_loss: false
    layer_decay: null
    local_rank: 0
    log_interval: 50
    log_wandb: false
    lr: 0.002
    lr_base: 0.1
    lr_base_scale: ''
    lr_base_size: 256
    lr_cycle_decay: 0.5
    lr_cycle_limit: 1
    lr_cycle_mul: 1.0
    lr_k_decay: 1.0
    lr_noise: null
    lr_noise_pct: 0.67
    lr_noise_std: 1.0
    mean: null
    min_lr: 0
    mixup: 0.2
    mixup_mode: batch
    mixup_off_epoch: 0
    mixup_prob: 1.0
    mixup_switch_prob: 0.5
    model: mobilenetv4_conv_medium
    model_ema: true
    model_ema_decay: 0.9998
    model_ema_force_cpu: false
    model_ema_warmup: true
    model_kwargs: {}
    momentum: 0.9
    no_aug: false
    no_ddp_bb: false
    no_prefetcher: false
    no_resume_opt: false
    num_classes: null
    opt: nadamw
    opt_betas: null
    opt_eps: null
    opt_kwargs: {}
    output: ''
    patience_epochs: 10
    pin_mem: false
    pretrained: false
    pretrained_path: null
    ratio:
    - 0.75
    - 1.3333333333333333
    recount: 1
    recovery_interval: 0
    remode: pixel
    reprob: 0.25
    resplit: false
    resume: ''
    save_images: false
    scale:
    - 0.08
    - 1.0
    sched: cosine
    sched_on_updates: true
    seed: 42
    smoothing: 0.1
    split_bn: false
    start_epoch: null
    std: null
    sync_bn: false
    synchronize_step: false
    target_key: null
    torchcompile: inductor
    torchscript: false
    train_crop_mode: null
    train_interpolation: random
    train_num_samples: null
    train_split: train
    tta: 0
    use_multi_epochs_loader: false
    val_num_samples: null
    val_split: validation
    validation_batch_size: null
    vflip: 0.0
    warmup_epochs: 20
    warmup_lr: 0.0
    warmup_prefix: true
    weight_decay: 0.1
    worker_seeding: all
    workers: 8
    147 changes: 147 additions & 0 deletions mnv4_cm_r256_e500_gpu2.yaml
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,147 @@
    aa: rand-m8-inc1-mstd1.0
    amp: true
    amp_dtype: bfloat16
    amp_impl: native
    aug_repeats: 0
    aug_splits: 0
    batch_size: 512
    bce_loss: false
    bce_pos_weight: null
    bce_sum: false
    bce_target_thresh: null
    bn_eps: null
    bn_momentum: null
    channels_last: false
    checkpoint_hist: 10
    class_map: ''
    clip_grad: 5.0
    clip_mode: norm
    color_jitter: 0.4
    color_jitter_prob: null
    cooldown_epochs: 0
    crop_pct: null
    cutmix: 0.0
    cutmix_minmax: null
    data: /data/imagenet/
    data_dir: null
    dataset: ''
    dataset_download: false
    decay_epochs: 90
    decay_milestones:
    - 90
    - 180
    - 270
    decay_rate: 0.1
    device: cuda
    device_modules: null
    dist_bn: reduce
    drop: 0.2
    drop_block: null
    drop_connect: null
    drop_path: 0.1
    epoch_repeats: 0.0
    epochs: 500
    eval_metric: top1
    experiment: ''
    fast_norm: false
    fuser: ''
    gaussian_blur_prob: null
    gp: null
    grad_accum_steps: 1
    grad_checkpointing: false
    grayscale_prob: null
    head_init_bias: null
    head_init_scale: null
    hflip: 0.5
    img_size: 256
    in_chans: null
    initial_checkpoint: ''
    input_img_mode: null
    input_key: null
    input_size: null
    interpolation: ''
    jsd_loss: false
    layer_decay: null
    local_rank: 0
    log_interval: 50
    log_wandb: false
    lr: 0.002
    lr_base: 0.1
    lr_base_scale: ''
    lr_base_size: 256
    lr_cycle_decay: 0.5
    lr_cycle_limit: 1
    lr_cycle_mul: 1.0
    lr_k_decay: 1.0
    lr_noise: null
    lr_noise_pct: 0.67
    lr_noise_std: 1.0
    mean: null
    min_lr: 0
    mixup: 0.2
    mixup_mode: batch
    mixup_off_epoch: 0
    mixup_prob: 0.8
    mixup_switch_prob: 0.5
    model: mobilenetv4_conv_medium
    model_ema: true
    model_ema_decay: 0.9998
    model_ema_force_cpu: false
    model_ema_warmup: true
    model_kwargs: {}
    momentum: 0.9
    no_aug: false
    no_ddp_bb: false
    no_prefetcher: false
    no_resume_opt: false
    num_classes: null
    opt: adamw
    opt_betas: null
    opt_eps: null
    opt_kwargs: {}
    output: ''
    patience_epochs: 10
    pin_mem: false
    pretrained: false
    pretrained_path: null
    ratio:
    - 0.75
    - 1.3333333333333333
    recount: 1
    recovery_interval: 0
    remode: pixel
    reprob: 0.25
    resplit: false
    resume: ''
    save_images: false
    scale:
    - 0.08
    - 1.0
    sched: cosine
    sched_on_updates: true
    seed: 42
    smoothing: 0.1
    split_bn: false
    start_epoch: null
    std: null
    sync_bn: false
    synchronize_step: false
    target_key: null
    torchcompile: inductor
    torchscript: false
    train_crop_mode: null
    train_interpolation: random
    train_num_samples: null
    train_split: train
    tta: 0
    use_multi_epochs_loader: false
    val_num_samples: null
    val_split: validation
    validation_batch_size: null
    vflip: 0.0
    warmup_epochs: 20
    warmup_lr: 0.0
    warmup_prefix: true
    weight_decay: 0.1
    worker_seeding: all
    workers: 8
    149 changes: 149 additions & 0 deletions mnv4_cs_r224_e2400_gpu4.yaml
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,149 @@
    aa: rand-m8-inc1-mstd1.0
    amp: true
    amp_dtype: float16
    amp_impl: native
    aug_repeats: 3.0
    aug_splits: 0
    batch_size: 1024
    bce_loss: false
    bce_pos_weight: null
    bce_sum: false
    bce_target_thresh: null
    bn_eps: null
    bn_momentum: null
    channels_last: true
    checkpoint_hist: 10
    class_map: ''
    clip_grad: null
    clip_mode: norm
    color_jitter: 0.4
    color_jitter_prob: null
    cooldown_epochs: 0
    crop_pct: null
    cutmix: 0.0
    cutmix_minmax: null
    data: /raid/imagenet/
    data_dir: null
    dataset: ''
    dataset_download: false
    decay_epochs: 90
    decay_milestones:
    - 90
    - 180
    - 270
    decay_rate: 0.1
    device: cuda
    device_modules: null
    dist_bn: reduce
    drop: 0.25
    drop_block: null
    drop_connect: null
    drop_path: null
    epoch_repeats: 0.0
    epochs: 2400
    eval_metric: top1
    experiment: ''
    fast_norm: false
    fuser: ''
    gaussian_blur_prob: 0.05
    gp: null
    grad_accum_steps: 1
    grad_checkpointing: false
    grayscale_prob: 0.1
    head_init_bias: null
    head_init_scale: null
    hflip: 0.5
    img_size: null
    in_chans: null
    initial_checkpoint: ''
    input_img_mode: null
    input_key: null
    input_size: null
    interpolation: ''
    jsd_loss: false
    layer_decay: null
    local_rank: 0
    log_interval: 50
    log_wandb: false
    lr: null
    lr_base: 0.002
    lr_base_scale: ''
    lr_base_size: 4096
    lr_cycle_decay: 0.5
    lr_cycle_limit: 1
    lr_cycle_mul: 1.0
    lr_k_decay: 1.0
    lr_noise: null
    lr_noise_pct: 0.67
    lr_noise_std: 1.0
    mean: null
    min_lr: 0.0
    mixup: 0.0
    mixup_mode: batch
    mixup_off_epoch: 0
    mixup_prob: 1.0
    mixup_switch_prob: 0.5
    model: mobilenetv4_conv_small
    model_ema: true
    model_ema_decay: 0.99995
    model_ema_force_cpu: false
    model_ema_warmup: true
    model_kwargs: {}
    momentum: 0.9
    no_aug: false
    no_ddp_bb: false
    no_prefetcher: false
    no_resume_opt: false
    num_classes: null
    opt: adamw
    opt_betas:
    - 0.6
    - 0.995
    opt_eps: null
    opt_kwargs: {}
    output: ''
    patience_epochs: 10
    pin_mem: false
    pretrained: false
    pretrained_path: null
    ratio:
    - 0.75
    - 1.3333333333333333
    recount: 1
    recovery_interval: 0
    remode: pixel
    reprob: 0.25
    resplit: false
    resume: ''
    save_images: false
    scale:
    - 0.08
    - 1.0
    sched: cosine
    sched_on_updates: true
    seed: 42
    smoothing: 0.1
    split_bn: false
    start_epoch: null
    std: null
    sync_bn: false
    synchronize_step: false
    target_key: null
    torchcompile: null
    torchscript: false
    train_crop_mode: null
    train_interpolation: random
    train_num_samples: null
    train_split: train
    tta: 0
    use_multi_epochs_loader: false
    val_num_samples: null
    val_split: validation
    validation_batch_size: null
    vflip: 0.0
    warmup_epochs: 5
    warmup_lr: 0.0
    warmup_prefix: true
    weight_decay: 0.06
    worker_seeding: all
    workers: 8
    149 changes: 149 additions & 0 deletions mnv4_hl_r384_e600_gpu8.yaml
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,149 @@
    aa: rand-m9-inc1-mstd1.0
    amp: true
    amp_dtype: bfloat16
    amp_impl: native
    aug_repeats: 0
    aug_splits: 0
    batch_size: 192
    bce_loss: false
    bce_pos_weight: null
    bce_sum: false
    bce_target_thresh: null
    bn_eps: null
    bn_momentum: null
    channels_last: false
    checkpoint_hist: 10
    class_map: ''
    clip_grad: 5.0
    clip_mode: norm
    color_jitter: 0.4
    color_jitter_prob: null
    cooldown_epochs: 0
    crop_pct: null
    cutmix: 1.0
    cutmix_minmax: null
    data: /data/imagenet/
    data_dir: null
    dataset: ''
    dataset_download: false
    decay_epochs: 90
    decay_milestones:
    - 90
    - 180
    - 270
    decay_rate: 0.1
    device: cuda
    device_modules: null
    dist_bn: reduce
    drop: 0.2
    drop_block: null
    drop_connect: null
    drop_path: 0.35
    epoch_repeats: 0.0
    epochs: 600
    eval_metric: top1
    experiment: ''
    fast_norm: false
    fuser: ''
    gaussian_blur_prob: null
    gp: null
    grad_accum_steps: 1
    grad_checkpointing: false
    grayscale_prob: null
    head_init_bias: null
    head_init_scale: 0.0
    hflip: 0.5
    img_size: 384
    in_chans: null
    initial_checkpoint: ''
    input_img_mode: null
    input_key: null
    input_size: null
    interpolation: ''
    jsd_loss: false
    layer_decay: null
    local_rank: 0
    log_interval: 50
    log_wandb: false
    lr: null
    lr_base: 0.003
    lr_base_scale: ''
    lr_base_size: 4096
    lr_cycle_decay: 0.5
    lr_cycle_limit: 1
    lr_cycle_mul: 1.0
    lr_k_decay: 1.0
    lr_noise: null
    lr_noise_pct: 0.67
    lr_noise_std: 1.0
    mean: null
    min_lr: 1.0e-06
    mixup: 0.8
    mixup_mode: batch
    mixup_off_epoch: 0
    mixup_prob: 0.8
    mixup_switch_prob: 0.5
    model: mobilenetv4_hybrid_large
    model_ema: true
    model_ema_decay: 0.9999
    model_ema_force_cpu: false
    model_ema_warmup: true
    model_kwargs: {}
    momentum: 0.9
    no_aug: false
    no_ddp_bb: false
    no_prefetcher: false
    no_resume_opt: false
    num_classes: null
    opt: adamw
    opt_betas:
    - 0.9
    - 0.98
    opt_eps: null
    opt_kwargs: {}
    output: ''
    patience_epochs: 10
    pin_mem: false
    pretrained: false
    pretrained_path: null
    ratio:
    - 0.75
    - 1.3333333333333333
    recount: 1
    recovery_interval: 0
    remode: pixel
    reprob: 0.35
    resplit: false
    resume: ''
    save_images: false
    scale:
    - 0.08
    - 1.0
    sched: cosine
    sched_on_updates: true
    seed: 42
    smoothing: 0.1
    split_bn: false
    start_epoch: null
    std: null
    sync_bn: false
    synchronize_step: false
    target_key: null
    torchcompile: inductor
    torchscript: false
    train_crop_mode: null
    train_interpolation: random
    train_num_samples: null
    train_split: train
    tta: 0
    use_multi_epochs_loader: false
    val_num_samples: null
    val_split: validation
    validation_batch_size: null
    vflip: 0.0
    warmup_epochs: 20
    warmup_lr: 0.0
    warmup_prefix: true
    weight_decay: 0.2
    worker_seeding: all
    workers: 8
    149 changes: 149 additions & 0 deletions mnv4_hl_r384_e600_ix_gpu8.yaml
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,149 @@
    aa: rand-m9-inc1-mstd1.0
    amp: true
    amp_dtype: bfloat16
    amp_impl: native
    aug_repeats: 0
    aug_splits: 0
    batch_size: 192
    bce_loss: false
    bce_pos_weight: null
    bce_sum: false
    bce_target_thresh: null
    bn_eps: null
    bn_momentum: null
    channels_last: false
    checkpoint_hist: 10
    class_map: ''
    clip_grad: 5.0
    clip_mode: norm
    color_jitter: 0.4
    color_jitter_prob: null
    cooldown_epochs: 0
    crop_pct: null
    cutmix: 1.0
    cutmix_minmax: null
    data: /data/imagenet/
    data_dir: null
    dataset: ''
    dataset_download: false
    decay_epochs: 90
    decay_milestones:
    - 90
    - 180
    - 270
    decay_rate: 0.1
    device: cuda
    device_modules: null
    dist_bn: reduce
    drop: 0.2
    drop_block: null
    drop_connect: null
    drop_path: 0.35
    epoch_repeats: 0.0
    epochs: 600
    eval_metric: top1
    experiment: ''
    fast_norm: false
    fuser: ''
    gaussian_blur_prob: null
    gp: null
    grad_accum_steps: 1
    grad_checkpointing: false
    grayscale_prob: null
    head_init_bias: null
    head_init_scale: 0.0
    hflip: 0.5
    img_size: 384
    in_chans: null
    initial_checkpoint: ''
    input_img_mode: null
    input_key: null
    input_size: null
    interpolation: ''
    jsd_loss: false
    layer_decay: null
    local_rank: 0
    log_interval: 50
    log_wandb: false
    lr: null
    lr_base: 0.003
    lr_base_scale: ''
    lr_base_size: 4096
    lr_cycle_decay: 0.5
    lr_cycle_limit: 1
    lr_cycle_mul: 1.0
    lr_k_decay: 1.0
    lr_noise: null
    lr_noise_pct: 0.67
    lr_noise_std: 1.0
    mean: null
    min_lr: 1.0e-06
    mixup: 0.8
    mixup_mode: batch
    mixup_off_epoch: 0
    mixup_prob: 0.8
    mixup_switch_prob: 0.5
    model: mobilenetv4_hybrid_large
    model_ema: true
    model_ema_decay: 0.9999
    model_ema_force_cpu: false
    model_ema_warmup: true
    model_kwargs: {}
    momentum: 0.9
    no_aug: false
    no_ddp_bb: false
    no_prefetcher: false
    no_resume_opt: false
    num_classes: null
    opt: adamw
    opt_betas:
    - 0.9
    - 0.99
    opt_eps: null
    opt_kwargs: {}
    output: ''
    patience_epochs: 10
    pin_mem: false
    pretrained: false
    pretrained_path: null
    ratio:
    - 0.75
    - 1.3333333333333333
    recount: 1
    recovery_interval: 0
    remode: pixel
    reprob: 0.35
    resplit: false
    resume: ''
    save_images: false
    scale:
    - 0.08
    - 1.0
    sched: cosine
    sched_on_updates: true
    seed: 42
    smoothing: 0.1
    split_bn: false
    start_epoch: null
    std: null
    sync_bn: false
    synchronize_step: false
    target_key: null
    torchcompile: inductor
    torchscript: false
    train_crop_mode: null
    train_interpolation: random
    train_num_samples: null
    train_split: train
    tta: 0
    use_multi_epochs_loader: false
    val_num_samples: null
    val_split: validation
    validation_batch_size: null
    vflip: 0.0
    warmup_epochs: 20
    warmup_lr: 0.0
    warmup_prefix: true
    weight_decay: 0.2
    worker_seeding: all
    workers: 8
    147 changes: 147 additions & 0 deletions mnv4_hm_r224_e500_gpu2.yaml
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,147 @@
    aa: rand-m9-inc1-mstd1.0
    amp: true
    amp_dtype: bfloat16
    amp_impl: native
    aug_repeats: 0
    aug_splits: 0
    batch_size: 512
    bce_loss: false
    bce_pos_weight: null
    bce_sum: false
    bce_target_thresh: null
    bn_eps: null
    bn_momentum: null
    channels_last: false
    checkpoint_hist: 10
    class_map: ''
    clip_grad: 5.0
    clip_mode: norm
    color_jitter: 0.4
    color_jitter_prob: null
    cooldown_epochs: 0
    crop_pct: null
    cutmix: 0.0
    cutmix_minmax: null
    data: /data/imagenet/
    data_dir: null
    dataset: ''
    dataset_download: false
    decay_epochs: 90
    decay_milestones:
    - 90
    - 180
    - 270
    decay_rate: 0.1
    device: cuda
    device_modules: null
    dist_bn: reduce
    drop: 0.2
    drop_block: null
    drop_connect: null
    drop_path: 0.1
    epoch_repeats: 0.0
    epochs: 500
    eval_metric: top1
    experiment: ''
    fast_norm: false
    fuser: ''
    gaussian_blur_prob: null
    gp: null
    grad_accum_steps: 1
    grad_checkpointing: false
    grayscale_prob: null
    head_init_bias: null
    head_init_scale: null
    hflip: 0.5
    img_size: null
    in_chans: null
    initial_checkpoint: ''
    input_img_mode: null
    input_key: null
    input_size: null
    interpolation: ''
    jsd_loss: false
    layer_decay: null
    local_rank: 0
    log_interval: 50
    log_wandb: false
    lr: 0.001
    lr_base: 0.1
    lr_base_scale: ''
    lr_base_size: 256
    lr_cycle_decay: 0.5
    lr_cycle_limit: 1
    lr_cycle_mul: 1.0
    lr_k_decay: 1.0
    lr_noise: null
    lr_noise_pct: 0.67
    lr_noise_std: 1.0
    mean: null
    min_lr: 0
    mixup: 0.2
    mixup_mode: batch
    mixup_off_epoch: 0
    mixup_prob: 1.0
    mixup_switch_prob: 0.5
    model: mobilenetv4_hybrid_medium
    model_ema: true
    model_ema_decay: 0.9998
    model_ema_force_cpu: false
    model_ema_warmup: true
    model_kwargs: {}
    momentum: 0.9
    no_aug: false
    no_ddp_bb: false
    no_prefetcher: false
    no_resume_opt: false
    num_classes: null
    opt: nadamw
    opt_betas: null
    opt_eps: null
    opt_kwargs: {}
    output: ''
    patience_epochs: 10
    pin_mem: false
    pretrained: false
    pretrained_path: null
    ratio:
    - 0.75
    - 1.3333333333333333
    recount: 1
    recovery_interval: 0
    remode: pixel
    reprob: 0.25
    resplit: false
    resume: ''
    save_images: false
    scale:
    - 0.08
    - 1.0
    sched: cosine
    sched_on_updates: true
    seed: 42
    smoothing: 0.1
    split_bn: false
    start_epoch: null
    std: null
    sync_bn: false
    synchronize_step: false
    target_key: null
    torchcompile: inductor
    torchscript: false
    train_crop_mode: null
    train_interpolation: random
    train_num_samples: null
    train_split: train
    tta: 0
    use_multi_epochs_loader: false
    val_num_samples: null
    val_split: validation
    validation_batch_size: null
    vflip: 0.0
    warmup_epochs: 20
    warmup_lr: 0.0
    warmup_prefix: true
    weight_decay: 0.1
    worker_seeding: all
    workers: 8
    149 changes: 149 additions & 0 deletions mnv4_hm_r256_e550_ix_gpu8.yaml
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,149 @@
    aa: rand-m8-inc1-mstd1.0
    amp: true
    amp_dtype: bfloat16
    amp_impl: native
    aug_repeats: 0
    aug_splits: 0
    batch_size: 512
    bce_loss: false
    bce_pos_weight: null
    bce_sum: false
    bce_target_thresh: null
    bn_eps: null
    bn_momentum: null
    channels_last: false
    checkpoint_hist: 10
    class_map: ''
    clip_grad: 5.0
    clip_mode: norm
    color_jitter: 0.4
    color_jitter_prob: null
    cooldown_epochs: 0
    crop_pct: null
    cutmix: 1.0
    cutmix_minmax: null
    data: /data/imagenet/
    data_dir: null
    dataset: ''
    dataset_download: false
    decay_epochs: 90
    decay_milestones:
    - 90
    - 180
    - 270
    decay_rate: 0.1
    device: cuda
    device_modules: null
    dist_bn: reduce
    drop: 0.2
    drop_block: null
    drop_connect: null
    drop_path: 0.1
    epoch_repeats: 0.0
    epochs: 550
    eval_metric: top1
    experiment: ''
    fast_norm: false
    fuser: ''
    gaussian_blur_prob: null
    gp: null
    grad_accum_steps: 1
    grad_checkpointing: false
    grayscale_prob: null
    head_init_bias: null
    head_init_scale: 0.0
    hflip: 0.5
    img_size: 256
    in_chans: null
    initial_checkpoint: ''
    input_img_mode: null
    input_key: null
    input_size: null
    interpolation: ''
    jsd_loss: false
    layer_decay: null
    local_rank: 0
    log_interval: 50
    log_wandb: false
    lr: null
    lr_base: 0.002
    lr_base_scale: ''
    lr_base_size: 4096
    lr_cycle_decay: 0.5
    lr_cycle_limit: 1
    lr_cycle_mul: 1.0
    lr_k_decay: 1.0
    lr_noise: null
    lr_noise_pct: 0.67
    lr_noise_std: 1.0
    mean: null
    min_lr: 1.0e-06
    mixup: 0.8
    mixup_mode: batch
    mixup_off_epoch: 0
    mixup_prob: 0.5
    mixup_switch_prob: 0.5
    model: mobilenetv4_hybrid_medium
    model_ema: true
    model_ema_decay: 0.9999
    model_ema_force_cpu: false
    model_ema_warmup: true
    model_kwargs: {}
    momentum: 0.9
    no_aug: false
    no_ddp_bb: false
    no_prefetcher: false
    no_resume_opt: false
    num_classes: null
    opt: adamw
    opt_betas:
    - 0.9
    - 0.99
    opt_eps: null
    opt_kwargs: {}
    output: ''
    patience_epochs: 10
    pin_mem: false
    pretrained: false
    pretrained_path: null
    ratio:
    - 0.75
    - 1.3333333333333333
    recount: 1
    recovery_interval: 0
    remode: pixel
    reprob: 0.3
    resplit: false
    resume: ''
    save_images: false
    scale:
    - 0.08
    - 1.0
    sched: cosine
    sched_on_updates: true
    seed: 42
    smoothing: 0.1
    split_bn: false
    start_epoch: null
    std: null
    sync_bn: false
    synchronize_step: false
    target_key: null
    torchcompile: inductor
    torchscript: false
    train_crop_mode: null
    train_interpolation: random
    train_num_samples: null
    train_split: train
    tta: 0
    use_multi_epochs_loader: false
    val_num_samples: null
    val_split: validation
    validation_batch_size: null
    vflip: 0.0
    warmup_epochs: 20
    warmup_lr: 0.0
    warmup_prefix: true
    weight_decay: 0.141
    worker_seeding: all
    workers: 8
    149 changes: 149 additions & 0 deletions mnv4_hm_r384_e550_ix_gpu8.yaml
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,149 @@
    aa: rand-m9-inc1-mstd1.0
    amp: true
    amp_dtype: bfloat16
    amp_impl: native
    aug_repeats: 0
    aug_splits: 0
    batch_size: 384
    bce_loss: false
    bce_pos_weight: null
    bce_sum: false
    bce_target_thresh: null
    bn_eps: null
    bn_momentum: null
    channels_last: false
    checkpoint_hist: 10
    class_map: ''
    clip_grad: 5.0
    clip_mode: norm
    color_jitter: 0.4
    color_jitter_prob: null
    cooldown_epochs: 0
    crop_pct: null
    cutmix: 1.0
    cutmix_minmax: null
    data: /data/imagenet/
    data_dir: null
    dataset: ''
    dataset_download: false
    decay_epochs: 90
    decay_milestones:
    - 90
    - 180
    - 270
    decay_rate: 0.1
    device: cuda
    device_modules: null
    dist_bn: reduce
    drop: 0.2
    drop_block: null
    drop_connect: null
    drop_path: 0.1
    epoch_repeats: 0.0
    epochs: 550
    eval_metric: top1
    experiment: ''
    fast_norm: false
    fuser: ''
    gaussian_blur_prob: null
    gp: null
    grad_accum_steps: 1
    grad_checkpointing: false
    grayscale_prob: null
    head_init_bias: null
    head_init_scale: 0.0
    hflip: 0.5
    img_size: 384
    in_chans: null
    initial_checkpoint: ''
    input_img_mode: null
    input_key: null
    input_size: null
    interpolation: ''
    jsd_loss: false
    layer_decay: null
    local_rank: 0
    log_interval: 50
    log_wandb: false
    lr: null
    lr_base: 0.002
    lr_base_scale: ''
    lr_base_size: 4096
    lr_cycle_decay: 0.5
    lr_cycle_limit: 1
    lr_cycle_mul: 1.0
    lr_k_decay: 1.0
    lr_noise: null
    lr_noise_pct: 0.67
    lr_noise_std: 1.0
    mean: null
    min_lr: 1.0e-06
    mixup: 0.8
    mixup_mode: batch
    mixup_off_epoch: 0
    mixup_prob: 0.5
    mixup_switch_prob: 0.5
    model: mobilenetv4_hybrid_medium
    model_ema: true
    model_ema_decay: 0.9999
    model_ema_force_cpu: false
    model_ema_warmup: true
    model_kwargs: {}
    momentum: 0.9
    no_aug: false
    no_ddp_bb: false
    no_prefetcher: false
    no_resume_opt: false
    num_classes: null
    opt: adamw
    opt_betas:
    - 0.9
    - 0.99
    opt_eps: null
    opt_kwargs: {}
    output: ''
    patience_epochs: 10
    pin_mem: false
    pretrained: false
    pretrained_path: null
    ratio:
    - 0.75
    - 1.3333333333333333
    recount: 1
    recovery_interval: 0
    remode: pixel
    reprob: 0.3
    resplit: false
    resume: ''
    save_images: false
    scale:
    - 0.08
    - 1.0
    sched: cosine
    sched_on_updates: true
    seed: 42
    smoothing: 0.1
    split_bn: false
    start_epoch: null
    std: null
    sync_bn: false
    synchronize_step: false
    target_key: null
    torchcompile: inductor
    torchscript: false
    train_crop_mode: null
    train_interpolation: random
    train_num_samples: null
    train_split: train
    tta: 0
    use_multi_epochs_loader: false
    val_num_samples: null
    val_split: validation
    validation_batch_size: null
    vflip: 0.0
    warmup_epochs: 20
    warmup_lr: 0.0
    warmup_prefix: true
    weight_decay: 0.15
    worker_seeding: all
    workers: 8