Skip to content

Instantly share code, notes, and snippets.

View huseinzol05's full-sized avatar
🍵
Hunting cendol!

HUSEIN ZOLKEPLI huseinzol05

🍵
Hunting cendol!
View GitHub Profile
@huseinzol05
huseinzol05 / test-flash-multipacking.ipynb
Last active July 23, 2025 06:35
HuggingFace Transformers 4.51.3 Flash multipacking
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
"""
https://www.oumi.ai/docs/en/latest/_modules/oumi/core/callbacks/hf_mfu_callback.html
"""
import torch
import wandb
from transformers import TrainerCallback, TrainerState, TrainerControl
# Theoretical Peak Tensor Core Performance (BF16 / FP16)
DEVICES = {
@huseinzol05
huseinzol05 / simple_all_reduce_ray.py
Created July 13, 2025 07:36
simple stress test all reduce internodes using Ray
import os
import time
import ray
import torch
import torch.distributed as dist
from ray import train
from ray.train import ScalingConfig
from ray.train.torch import TorchTrainer, get_device
class RayConnection:
@huseinzol05
huseinzol05 / upload-audio.ipynb
Created March 9, 2025 07:12
distributed zip files
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@huseinzol05
huseinzol05 / noob-parquet-datasets.ipynb
Created February 13, 2025 03:21
Read multiple parquet files as one datasets with well defined sizes
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
import torch
import torch.nn.functional as F
from torch.nn.attention.flex_attention import flex_attention, create_block_mask
import torch
head_num = 16
dim = 128
seq_len = 100
chunk_size = 5
batch_size = 1
@huseinzol05
huseinzol05 / grpo_demo.py
Created January 30, 2025 22:27 — forked from willccbb/grpo_demo.py
GRPO Llama-1B LOL
# train_grpo.py
import re
import torch
from datasets import load_dataset, Dataset
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import LoraConfig
from trl import GRPOConfig, GRPOTrainer
# Load and prep dataset
@huseinzol05
huseinzol05 / unzip.py
Last active February 21, 2025 01:47
distributed unzip
from glob import glob
from tqdm import tqdm
from multiprocess import Pool
import itertools
import zipfile
import os
def chunks(l, n):
for i in range(0, len(l), n):
yield (l[i: i + n], i // n)
@huseinzol05
huseinzol05 / run-predict.py
Created October 23, 2024 02:20
Simple python script to evaluate using bedrock, this script intended for Fahim Surani AWS, but if you are not him, please feel free to use.
import json
import requests
import os
import boto3
import shutil
from tqdm import tqdm
client = boto3.client(
'bedrock-runtime',
region_name='us-west-2',
@huseinzol05
huseinzol05 / install.sh
Created June 25, 2024 07:30
NCCL test OpenMPI
git clone https://github.com/nvidia/nccl-tests && cd nccl-tests
sudo apt-get install openmpi-bin openmpi-common libopenmpi-dev -y
make MPI=1 MPI_HOME=/usr/lib/x86_64-linux-gnu/openmpi
./build/all_reduce_perf -b 8 -e 128M -f 2 -g 4
mpirun -np 10 -H 10.224.0.47,10.224.0.70 ./build/all_reduce_perf -b 8 -e 128M -f 2 -g 4