Skip to content

Instantly share code, notes, and snippets.

# train_grpo.py
from typing import *
import re
import torch
from datasets import load_dataset, Dataset, load_from_disk
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments
from peft import LoraConfig
from trl import GRPOConfig, GRPOTrainer, TrlParser
from dataclasses import dataclass, field
@infoslack
infoslack / grpo_demo.py
Created January 27, 2025 17:59
Group Relative Policy Optimization (GRPO) implementation
# This implementation is based on the paper: https://github.com/deepseek-ai/DeepSeek-R1/blob/main/DeepSeek_R1.pdf
#
# pip install torch transformers
# python grpo_demo.py
import torch
import torch.nn as nn
import torch.optim as optim
from transformers import BertTokenizer, BertModel
@willccbb
willccbb / grpo_demo.py
Last active April 25, 2025 22:27
GRPO Llama-1B
# train_grpo.py
#
# See https://github.com/willccbb/verifiers for ongoing developments
#
import re
import torch
from datasets import load_dataset, Dataset
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import LoraConfig
from trl import GRPOConfig, GRPOTrainer
@abodacs
abodacs / whisper-static-cache.ipynb
Created June 3, 2024 09:53 — forked from huseinzol05/whisper-static-cache.ipynb
example of whisper static cache
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@lewtun
lewtun / sft_trainer.py
Last active April 21, 2025 16:04
Fine-tuning Mistral 7B with TRL & DeepSpeed ZeRO-3
# This is a modified version of TRL's `SFTTrainer` example (https://github.com/huggingface/trl/blob/main/examples/scripts/sft_trainer.py),
# adapted to run with DeepSpeed ZeRO-3 and Mistral-7B-V1.0. The settings below were run on 1 node of 8 x A100 (80GB) GPUs.
#
# Usage:
# - Install the latest transformers & accelerate versions: `pip install -U transformers accelerate`
# - Install deepspeed: `pip install deepspeed==0.9.5`
# - Install TRL from main: pip install git+https://github.com/huggingface/trl.git
# - Clone the repo: git clone github.com/huggingface/trl.git
# - Copy this Gist into trl/examples/scripts
# - Run from root of trl repo with: accelerate launch --config_file=examples/accelerate_configs/deepspeed_zero3.yaml --gradient_accumulation_steps 8 examples/scripts/sft_trainer.py
@younesbelkada
younesbelkada / train_adapters_transformers.py
Created August 3, 2023 09:43
Train adapters using transformers integration of PEFT
from datasets import load_dataset
import torch
from peft import LoraConfig, prepare_model_for_int8_training
from trl import SFTTrainer
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, AutoTokenizer, TrainingArguments
dataset_name = "timdettmers/openassistant-guanaco"
dataset = load_dataset(dataset_name, split="train")
model_name = "facebook/opt-350m"
@younesbelkada
younesbelkada / finetune_mpt30b_guanaco.py
Last active August 30, 2023 06:04
Fine tune MPT-30B on Guanaco dataset and turn it into a chatbot - read the docstrings to install the correct versions of the required libraries.
# coding=utf-8
# Copyright 2023 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
@younesbelkada
younesbelkada / finetune_sft_trl.py
Last active August 8, 2024 20:21
Benchmarking SFT trainer with 8bit models
# coding=utf-8
# Copyright 2023 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
@abodacs
abodacs / GPT4all-langchain-demo.ipynb
Created April 4, 2023 10:52 — forked from psychemedia/GPT4all-langchain-demo.ipynb
Example of running GPT4all local LLM via langchain in a Jupyter notebook (Python)
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
ContractAddress TokenName TokenSymbol holder count transfer count # of times appears in list notes
0x420b595d8b648971b3bfcf46e66544c384860536 VenmoCash VMO 1 6 2
0xdeeb40536e94be7226b77fb89d7d3cd65a82fb85 Zoom Protocol ZOM 1 9 2
0xe670848d54788997942ecf938cd23b09550bae73 TARO TARO 1 4 2
0xf28fec34928a1dc19b650104ae082665b66f720e ETH/BTC Long-Only Alpha XTF.SWCEBL 1 4 2
0x030385efc63ebda6021d9098b1fcc422547d83d3 Tacos @ Taconomics.io $TACO 2 5 2
0x03bb9bbf0423e44370e88ec5fc31eecf4e2b4ac2 STVKE.Network STV 2 9 2
0x05e850909664a3cf926ca4777c3ec1577d36ec18 OnFlow Flow 2 8 2
0x06ca771a689d6d5f5e435be2ef1d1ffc6bdb3b4c Wing Token WING 2 8 2
0x08a958bdc9e0beb0c3ee2ec6e9c0013f14ce66e5 Harold Returns KEKW 2 6 2