This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Copyright (c) 2019-present, Thomas Wolf. | |
# All rights reserved. This source code is licensed under the MIT-style license. | |
""" A very small and self-contained gist to train a GPT-2 transformer model on wikitext-103 """ | |
import os | |
from collections import namedtuple | |
from tqdm import tqdm | |
import torch | |
import torch.nn as nn | |
from torch.utils.data import DataLoader | |
from ignite.engine import Engine, Events |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Brian Abelson @brianabelson | |
# Harmony Institute | |
# December 5, 2012 | |
# lda is a wrapper for lda.collapsed.gibbs.sampler in the "lda" package | |
# it fits topic models using latent dirichlet allocation | |
# it provides arguments for cleaning the input text and tuning the parameters of the model | |
# it also returns alot of useful information about the topics/documents in a format that you can easily join back to your original data | |
# this allows you to easily model outcomes based on the distribution of topics within a collection of texts |