Skip to content

Instantly share code, notes, and snippets.

View w32zhong's full-sized avatar
⛹️
Trying to keep up.

Wei w32zhong

⛹️
Trying to keep up.
View GitHub Profile
@w32zhong
w32zhong / Dockerfile
Last active February 28, 2025 16:34
Example dockerfile
FROM nvcr.io/nvidia/pytorch:23.11-py3
WORKDIR /workspace
RUN pip install -r r1.txt
ADD requirements.txt r2.txt
# FlashAttention-2 compatibility copied from https://github.com/Dao-AILab/flash-attention/issues/836#issuecomment-1951433985
RUN pip install flash-attn==2.5.1.post1
RUN apt update && apt install -y tmux git-lfs
RUN pip install nvitop
ADD . myproject
WORKDIR /workspace/myproject
@w32zhong
w32zhong / grpo_demo.py
Created February 18, 2025 01:17 — forked from willccbb/grpo_demo.py
GRPO Llama-1B
# train_grpo.py
#
# See https://github.com/willccbb/verifiers for ongoing developments
#
import re
import torch
from datasets import load_dataset, Dataset
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import LoraConfig
from trl import GRPOConfig, GRPOTrainer
@w32zhong
w32zhong / steps.md
Last active February 10, 2025 22:09

EAGLE v1 Replication

Set up environment and run an inference test:

git clone --branch v1 --depth 1 https://github.com/SafeAILab/EAGLE.git EAGLE-v1
cd EAGLE-v1
wget https://raw.githubusercontent.com/w32zhong/EAGLE/refs/heads/eagle-v1-save/application/test_v1.py -O eagle/application/test_v1.py
pip install -e .
pip install transformers==4.36.2
pip install accelerate==0.21.0
pip install datasets==3.2.0
@w32zhong
w32zhong / gpu_vram_estimate.py
Created October 5, 2024 17:14
GPU vram estimate for pre-training LLMs.
import math
def act_mem(layers, seqlen, h_dim, heads, precision=2, bs=1):
""" Returns amount of GPU VRAM (in GB) required to store
intermediate activations for traditional Transformer blocks
"""
mem_bytes = layers * precision * seqlen * bs * h_dim * (
16 + 2/precision + 2*heads*seqlen/h_dim
+ heads*seqlen/(precision*h_dim)
)
def test(method, bits, random_top_layer, quantize_top_layer, results={}):
print(prompt)
start_time = time.time()
if method == 'vanilla':
cnt_tokens = test_vanilla(bits)
elif method == 'eagle':
cnt_tokens = test_eagle(bits,
random_top_layer=random_top_layer,
quantize_top_layer=quantize_top_layer
)
@I.ir_module
class Module:
@T.prim_func
def main(var_A: T.handle, B: T.Buffer((768, 384), "int8"), Scale: T.Buffer((768, 3), "float16"), Zeros: T.Buffer((768, 3), "float16"), var_D: T.handle):
T.func_attr({"dequantize_info": {"B_decode": {"decode_block": "B_decode", "fast_decoding": T.bool(False), "group_size": 256, "source_format": {"bits": 4, "format": "uint"}, "storage_dtype": "int8", "target_format": "float16", "with_scaling": T.bool(True), "with_zeros": T.bool(True), "zeros_mode": "rescale"}}, "dlight.tensorcore_prenormlized": T.bool(True), "opt_shapes": {"m": [2, 12]}, "tir.is_scheduled": 1, "tir.noalias": T.bool(True)})
m = T.int32()
A = T.match_buffer(var_A, (m, 768), "float16")
D = T.match_buffer(var_D, (m, 768), "float16")
# with T.block("root"):
A_reindex_pad_shared_dyn = T.alloc_buffer((1, (m + 127) // 128 * 128, 768), "float16", scope="shared.dyn")
%pip install ipympl
%matplotlib ipympl
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.widgets import Slider
# draw initial triangle
triangle = np.array([
[1, 4, 1, 1],
@w32zhong
w32zhong / bark.py
Last active May 15, 2023 21:37
Bark
# pip install git+https://github.com/suno-ai/bark.git && pip uninstall -y torch torchvision torchaudio && pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu118
text_prompt = """
For all neural retrievers, we fine-tune them on top of a further-pretrained backbone using a batched triplets.
It contains a query q, and a pair of positive and negative passages, p plus and p minus.
we use passages of other training instances as additional negatives, which is a common practice to get more training samples for free, basically.
"""
from bark import SAMPLE_RATE, generate_audio, preload_models
@w32zhong
w32zhong / build.sh
Last active May 11, 2023 02:53
Build PyTorch 2.0
# clone pytorch at b004c0b3c6a1ee39ba0b512a00d95e7f83852556 with all submodules.
git clone -b main --recursive https://github.com/pytorch/pytorch
cd pytorch
inotifywait --event create -rm /home/tk/anaconda3/envs/pytorch-ref/
conda deactivate
conda env remove -n pytorch-src
conda create -n pytorch-src python=3.11
conda create --name llama -c conda-forge python=3.8
conda activate llama
pip3 install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu118
python -c 'import torch; print(torch.cuda.is_available())'
conda install -c conda-forge gxx_linux-64=10.4.0
conda install cuda -c nvidia/label/cuda-11.8.0
#pip install packaging flash-attn