kyunghyuncho · August 13, 2023 20:19
diff --git a/cost_tokens_7bn_llm.py b/cost_tokens_7bn_llm.py
 n_months = 4 # let's say 4 months of continuous training
 sec_per_grad = 1.7 # 1.7 seconds per gradient computation (a very rough number)

 instance_price_per_hour = 24.01 # assuming AWS p4de instances with 1yr reservation
 discount = 0.2 # assuming you can get some discount from AWS or whatever your cloud vendor is

 n_gpu = 8 # 8x A100's with 80G each
 n_instances = 4 # the number of AWS p4de instances
 ex_per_gpu = 2 # the number of training examples processed per gpu
 n_tokens_per_ex = 2000 # the length of each training example
 n_tokens_per_grad = n_gpu * n_instances * ex_per_gpu * n_tokens_per_ex # the number of tokens per gradient computation; 128,000

 n_grads = n_months * 30 * 24 * 60 * 60 / sec_per_grad # the number of gradient computations over n_months (assume 30 days a month); 6,098,823.529411765

 n_total_tokens = n_grads * n_tokens_per_grad # the total number of tokens over n_months; 780,649,411,764.7059 tokens

 total_price = n_months * 30 * 24 * instance_price_per_hour * (1.-discount) * n_instances # total price over n_months; 221,276.16000000003

 print(f'total price = ${total_price:,.2f} with {int(n_total_tokens):,} total tokens')
	n_months = 4 # let's say 4 months of continuous training
	sec_per_grad = 1.7 # 1.7 seconds per gradient computation (a very rough number)

	instance_price_per_hour = 24.01 # assuming AWS p4de instances with 1yr reservation
	discount = 0.2 # assuming you can get some discount from AWS or whatever your cloud vendor is

	n_gpu = 8 # 8x A100's with 80G each
	n_instances = 4 # the number of AWS p4de instances
	ex_per_gpu = 2 # the number of training examples processed per gpu
	n_tokens_per_ex = 2000 # the length of each training example
	n_tokens_per_grad = n_gpu * n_instances * ex_per_gpu * n_tokens_per_ex # the number of tokens per gradient computation; 128,000

	n_grads = n_months * 30 * 24 * 60 * 60 / sec_per_grad # the number of gradient computations over n_months (assume 30 days a month); 6,098,823.529411765

	n_total_tokens = n_grads * n_tokens_per_grad # the total number of tokens over n_months; 780,649,411,764.7059 tokens

	total_price = n_months * 30 * 24 * instance_price_per_hour * (1.-discount) * n_instances # total price over n_months; 221,276.16000000003

	print(f'total price = ${total_price:,.2f} with {int(n_total_tokens):,} total tokens')