Created
April 17, 2024 07:49
-
-
Save mehdidc/55937d23d3b581a3f5c59455c336d55c to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
checkpoints: | |
checkpoint_interval: 10 | |
checkpoints_path: checkpoints | |
checkpoints_path_is_shared_file_system: false | |
resume_checkpoint_path: null | |
save_initial_state: false | |
data_stages: | |
- name: Stable Training Stage | |
start_training_step: 1 | |
data: | |
dataset: | |
dataset_overwrite_cache: false | |
dataset_processing_num_proc_per_process: 1 | |
hf_dataset_config_name: null | |
hf_dataset_or_datasets: /p/fastdata/mmlaion/alpaca-cleaned | |
hf_dataset_splits: train | |
text_column_name: output | |
num_loading_workers: 1 | |
seed: 42 | |
general: | |
benchmark_csv_path: null | |
consumed_train_samples: null | |
ignore_sanity_checks: false | |
project: debug | |
run: tiny_llama_%date_%jobid | |
seed: 42 | |
step: null | |
lighteval: null | |
logging: | |
iteration_step_info_interval: 1 | |
log_level: info | |
log_level_replica: info | |
model: | |
ddp_bucket_cap_mb: 25 | |
dtype: bfloat16 | |
init_method: | |
std: 0.025 | |
make_vocab_size_divisible_by: 1 | |
model_config: | |
bos_token_id: 1 | |
eos_token_id: 2 | |
hidden_act: gelu | |
hidden_size: 7168 | |
initializer_range: 0.02 | |
intermediate_size: 28672 | |
is_llama_config: true | |
max_position_embeddings: 2048 | |
num_attention_heads: 56 | |
num_hidden_layers: 54 | |
num_key_value_heads: 56 | |
pad_token_id: null | |
pretraining_tp: 1 | |
rms_norm_eps: 1.0e-05 | |
rope_scaling: null | |
tie_word_embeddings: true | |
use_cache: true | |
vocab_size: 50257 | |
optimizer: | |
accumulate_grad_in_fp32: true | |
adam_beta1: 0.9 | |
adam_beta2: 0.95 | |
adam_eps: 1.0e-08 | |
clip_grad: 1.0 | |
learning_rate_scheduler: | |
learning_rate: 0.0003 | |
lr_decay_starting_step: null | |
lr_decay_steps: 8 | |
lr_decay_style: cosine | |
lr_warmup_steps: 10 | |
lr_warmup_style: linear | |
min_decay_lr: 1.0e-05 | |
torch_adam_is_fused: true | |
weight_decay: 0.01 | |
zero_stage: 0 | |
parallelism: | |
dp: 1 | |
pp: 8 | |
pp_engine: 1f1b | |
#pp_engine: afab | |
tp: 4 | |
tp_linear_async_communication: true | |
tp_mode: REDUCE_SCATTER | |
profiler: null | |
tokenizer: | |
tokenizer_max_length: null | |
tokenizer_name_or_path: gpt2 | |
tokenizer_revision: null | |
tokens: | |
batch_accumulation_per_replica: 8 | |
limit_test_batches: 0 | |
limit_val_batches: 0 | |
micro_batch_size: 1 | |
sequence_length: 2048 | |
train_steps: 14 | |
val_check_interval: -1 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment