This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Huggingface Hub ID of the dataset. If data is not on HF, set the variable to 'other' | |
# NOTE: Default config is used | |
DATA_HUB_ID = "gsarti/eureka-rebus-calamita-2024" | |
# Task type. Values accepted: multiple_choice, open-ended | |
OUTPUT_TYPE = "open-ended" | |
### Prompting details | |
# Template string used to compile the prompt. Use {{}} variables to fill using the dataset columns. |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Huggingface Hub ID of the dataset. If data is not on HF, set the variable to 'other' | |
# NOTE: Default config is used | |
DATA_HUB_ID = "gsarti/eureka-rebus-calamita-2024" | |
# Task type. Values accepted: multiple_choice, open-ended | |
OUTPUT_TYPE = "open-ended" | |
### Prompting details | |
# Template string used to compile the prompt. Use {{}} variables to fill using the dataset columns. |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import os | |
from datasets import load_dataset | |
from comet import download_model, load_from_checkpoint | |
def replace_missing(scores, sentences, default = -100): | |
out = [] | |
for score, sentence in zip(scores, sentences): | |
if not sentence: | |
out.append(default) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import datasets | |
import jax | |
from flax.jax_utils import replicate | |
from flax.training.common_utils import shard | |
from tqdm import tqdm | |
from transformers import FlaxMarianMTModel, MarianTokenizer | |
def translate(args): |