Skip to content

Instantly share code, notes, and snippets.

@NohTow
NohTow / evaluation_bright.py
Created May 23, 2025 12:00
Boilerplate to reproduce evaluation of Reason-ModernColBERT
"""Evaluation script for the BRIGHT dataset with PyLate models"""
from __future__ import annotations
import argparse
import os
import mteb
import srsly
@NohTow
NohTow / train_reason_moderncolbert.py
Created May 22, 2025 12:43
Boilerplate to reproduce the training of Reason-ModernColBERT
from datasets import load_dataset
from sentence_transformers import (
SentenceTransformerTrainer,
SentenceTransformerTrainingArguments,
)
from pylate import losses, models, utils
def main():
# As ReasonIR do not re-upload the BRIGHT data, we need to load it from the original source
@NohTow
NohTow / beir_eval.py
Created May 15, 2025 11:29
BEIR Plaid PyLate boilerplate
"""Evaluation script for the SciFact dataset using the Beir library."""
from __future__ import annotations
import argparse
import os
import srsly
from pylate import evaluation, indexes, models, retrieve
@NohTow
NohTow / longembed_eval.py
Created May 15, 2025 11:28
Long embed PyLate evaluation boilerplate
"""Evaluation script for the SciFact dataset using the Beir library."""
from __future__ import annotations
import os
import mteb
import srsly
from pylate import evaluation, indexes, models, retrieve
@NohTow
NohTow / train-gte-moderncolbert.py
Last active June 27, 2025 05:02
GTE-ModernColBERT training boilerplate
from datasets import load_dataset
from sentence_transformers import (
SentenceTransformerTrainer,
SentenceTransformerTrainingArguments,
)
from pylate import evaluation, losses, models, utils
# Load the datasets required for knowledge distillation (train, queries, documents)
train = load_dataset(