A "Best of the Best Practices" (BOBP) guide to developing in Python.
- "Build tools for others that you want to be built for you." - Kenneth Reitz
- "Simplicity is alway better than functionality." - Pieter Hintjens
# train_grpo.py | |
# | |
# See https://github.com/willccbb/verifiers for ongoing developments | |
# | |
import re | |
import torch | |
from datasets import load_dataset, Dataset | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
from peft import LoraConfig | |
from trl import GRPOConfig, GRPOTrainer |
# Advent of Code template by @MathisHammel | |
# TODO | |
# - Make a snapshot of the file when a submission is correct | |
# - Display the rank when submission is accepted | |
# - Utility function to rotate/flip a 2D array | |
# - Cycle length detector/extrapolator to make loops faster | |
# - Put examples in cache | |
# - Warning if DAY is not the current day |
<!doctype html> | |
<html> | |
<head> | |
<title>Site Maintenance</title> | |
<meta charset="utf-8"/> | |
<meta name="robots" content="noindex"/> | |
<meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
<style> | |
body { text-align: center; padding: 150px; } | |
h1 { font-size: 50px; } |