sshleifer · July 22, 2021 23:39 · Jul 22, 2021 · Jul 22, 2021 · Jul 22, 2021 · Jul 22, 2021
diff --git a/optim_cmds.md b/optim_cmds.md
@@ -20,7 +20,7 @@ launch_optim_experiment () {
       --checkpoints-dir . \
       --constraint volta32gb \
       --partition learnfair \
-      --resume-failed  --no-save  "$@"
+      --resume-failed  --no-save  --mu 7200 "$@"
 }
 ```
 #### Commands for different optimizers

diff --git a/optim_cmds.md b/optim_cmds.md
@@ -1,10 +1,10 @@
 ### Setup
-- `git clone git@github.com:fairinternal/fairseq-py.git && cd fairseq-py && git checkout gshard`
+- `git clone git@github.com:fairinternal/fairseq-py.git && cd fairseq-py && git checkout stable-emb`
 - if you don't have the fairseq conda env, follow [these instructions](https://fb.workplace.com/groups/fairseq/permalink/262715387865587/)
 - `pip install numpy==1.20`. (optional, but some people needed this)
 - `pip install fairscale` (should be > 0.3.7, as of writing)
-- install `bitsandbytes` following [this](https://gist.github.com/TimDettmers/c4ffe346f095ee4481aa3d4b4ad2ffe0)
-
+-  on FAIR cluster: `pip install -i https://test.pypi.org/simple/ bitsandbytes-cuda110 -U)`
+-  OR on AWS: `pip install -i https://test.pypi.org/simple/ bitsandbytes-cuda111 -U)`
 
 ### Common Logic for all commands
 Edit this as needed 
@@ -28,7 +28,7 @@ launch_optim_experiment () {
 export NCCL_DEBUG="WARN"
 launch_optim_experiment -p opt_exp --opt adam16
 launch_optim_experiment -p opt_exp --opt adam
-launch_optim_experiment -p opt_exp --opt adam8bit
+launch_optim_experiment -p opt_exp --opt adam8bit --stable
 launch_optim_experiment -p opt_exp.no_mo --opt adafactor
 launch_optim_experiment -p opt_exp.yes_mo --opt adafactor --adafactor-use-momentum
 ```

diff --git a/optim_cmds.md b/optim_cmds.md
@@ -15,7 +15,7 @@ launch_optim_experiment () {
       --ddp no_c10d \
       --dl 12 \
       --embed-dim 2048 \
-      --bs 8 --li 50 \
+      --bs 4 --li 50 \
       --epg 0 \
       --checkpoints-dir . \
       --constraint volta32gb \

diff --git a/optim_cmds.md b/optim_cmds.md
@@ -15,7 +15,7 @@ launch_optim_experiment () {
       --ddp no_c10d \
       --dl 12 \
       --embed-dim 2048 \
-      --bs 8 --li 50 --mu 10 \
+      --bs 8 --li 50 \
       --epg 0 \
       --checkpoints-dir . \
       --constraint volta32gb \

diff --git a/optim_cmds.md b/optim_cmds.md
@@ -20,7 +20,7 @@ launch_optim_experiment () {
       --checkpoints-dir . \
       --constraint volta32gb \
       --partition learnfair \
-      --resume-failed  "$@"  --no-save
+      --resume-failed  --no-save  "$@"
 }
 ```
 #### Commands for different optimizers

diff --git a/optim_cmds.md b/optim_cmds.md
@@ -0,0 +1,37 @@
+### Setup
+- `git clone git@github.com:fairinternal/fairseq-py.git && cd fairseq-py && git checkout gshard`
+- if you don't have the fairseq conda env, follow [these instructions](https://fb.workplace.com/groups/fairseq/permalink/262715387865587/)
+- `pip install numpy==1.20`. (optional, but some people needed this)
+- `pip install fairscale` (should be > 0.3.7, as of writing)
+- install `bitsandbytes` following [this](https://gist.github.com/TimDettmers/c4ffe346f095ee4481aa3d4b4ad2ffe0)
+
+
+### Common Logic for all commands
+Edit this as needed 
+```bash
+launch_optim_experiment () {
+    ./fb_sweep/benchmark_lm.py \
+      -g 8 -t 1 -n 1 \
+      --ddp no_c10d \
+      --dl 12 \
+      --embed-dim 2048 \
+      --bs 8 --li 50 --mu 10 \
+      --epg 0 \
+      --checkpoints-dir . \
+      --constraint volta32gb \
+      --partition learnfair \
+      --resume-failed  "$@"  --no-save
+}
+```
+#### Commands for different optimizers
+```bash
+export NCCL_DEBUG="WARN"
+launch_optim_experiment -p opt_exp --opt adam16
+launch_optim_experiment -p opt_exp --opt adam
+launch_optim_experiment -p opt_exp --opt adam8bit
+launch_optim_experiment -p opt_exp.no_mo --opt adafactor
+launch_optim_experiment -p opt_exp.yes_mo --opt adafactor --adafactor-use-momentum
+```
+
+
+- Note, for some hparams, you must manually edit `fb_sweep/benchmark_lm.py`.