This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Extracted and simplified the two-level aggregation approach (first, parallel aggregation in blocks, then final sequential aggregation) from https://github.com/volcengine/verl/blob/main/verl/utils/kernel/kernels.py | |
# Examples of single-level sequential, online aggregation approaches: | |
# - https://github.com/linkedin/Liger-Kernel/blob/main/src/liger_kernel/ops/cross_entropy.py | |
# - https://github.com/Dao-AILab/flash-attention/blob/main/flash_attn/ops/triton/cross_entropy.py | |
# logsumexp_torch has some eager pseudo/code in PyTorch which emulates what Triton does, except that BLOCK_SIZE_M equials to M | |
# tl.program_id(axis=0).to(tl.int64) is used for https://arxiv.org/abs/2410.10989 and https://github.com/linkedin/Liger-Kernel/blob/05b43a14913ced3776aa3fc50020089b8c0d63c1/src/liger_kernel/ops/cross_entropy.py#L77-L79 | |
# sample_verl.pt is derived from the inputs (logits = torch.matmul(hidden, weights) uploaded by @WindowsXP-Beta in https://github.com/volcengine/verl/issues/2656#issuecomment-3131136498 ) | |
# created for |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
nvidia-smi -q -x | grep "</pid>" | tr -d "</pid>\t" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# https://github.com/pytorch/pytorch/issues/158710 | |
# https://github.com/pytorch/pytorch/issues/158698 | |
# https://github.com/pytorch/pytorch/issues/69431 | |
import torch | |
def to_(tensor1d, dtype, *, chunks = 0, split_size = 0): | |
# TODO: instead of clone() maybe could copy_ into a buffer, clone() does not allow using a buffer | |
# TODO: unclear if these codes can support autograd, and if so, will it remember too much in saved_for_backward | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# https://en.wikipedia.org/wiki/Base64 | |
# 00123456 00ABCDEF 00abcdef 00uvwxyz | |
# 123456AB CDEFabcd efuvwxyz | |
# this code does not support batches. adapting for e.g. concatenated varlen format is possible, but need to handle/preserve varlen information and paddings in some way | |
import torch | |
def base64_encode_padded(input_as_uint8_tensor): | |
base64_alphabet, base64_pad = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/', '=' | |
device = input_as_uint8_tensor.device |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Nebius s3 | |
# sudo apt-get install s3cmd # export the vars below or configure access_key / secret_key | |
# export AWS_ACCESS_KEY_ID=... | |
# export AWS_SECRET_ACCESS_KEY=... | |
# https://s3tools.org/usage | |
# s3cmd -c ~/.nebius.s3cfg ls | |
[default] | |
# access_key=... | |
# secret_key=... |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# PYTHONPATH=. python ... | |
import os | |
import torch | |
def cuda_oom_hook(device, alloc, device_alloc, device_free, info = dict(counter = 0), snapshot_dump_file_pattern = './memory_snapshot_{pid}_{oom_counter}.pt'): | |
memory_summary = torch.cuda.memory_summary(device = device) | |
memory_snapshot = torch.cuda.memory._snapshot(device = device) | |
pid = os.getpid() | |
print('device:', device, 'oom#:', info['oom_counter'], 'pid:', pid, 'alloc:', alloc, 'device_alloc:', device_alloc, 'device_free:', device_free) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import multiprocessing | |
import itertools | |
inputs = list(range(111)) | |
batchsize = 10 | |
num_workers = 4 | |
batches = itertools.batched(inputs, batchsize) | |
def reducer(xs): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Usage: bash cache_hf_model.sh Qwen/Qwen3-8B | |
# export HF_HOME=/my/cache/HF_HOME | |
python -c 'import sys, transformers; transformers.AutoModel.from_pretrained(sys.argv[-1], trust_remote_code=True, device_map="meta")' $@ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def load_dotenv(dotenv_path = '.env'): | |
# https://snarky.ca/use-toml-for-env-files/ | |
# https://github.com/theskumar/python-dotenv | |
''' | |
# such simple key-value files are toml subset and can be read via tomllib without external packages or hacks | |
a="b" | |
c="d" | |
''' | |
import os, tomllib | |
os.environ.update(tomllib.load(open(dotenv_path, 'rb'))) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys, fsspec | |
with fsspec.open(sys.argv[1], 'rt') as f: # must pass 'rt' explicitly, as in fsspec the default mode is 'rb' | |
print(f.read()) # msut use context manager as in fsspec the result of fsspec.open(...) does not have method read() | |
# echo world > hello.txt | |
# python catfsspec.py hello.txt | |
# python catfsspec.py file://hello.txt | |
# python catfsspec.py s3://mybucket/hello.txt |
NewerOlder