Created
June 26, 2020 01:26
-
-
Save turian/caf869ae30932384c7c4b7d201125493 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Number of inputs | |
NIN = 1000 | |
NHID = 10 | |
# Number of examples | |
EXAMPLES = 100000 | |
import timeit | |
import logging | |
from collections import OrderedDict | |
import pytorch_lightning as pl | |
from pytorch_lightning import LightningModule | |
from pytorch_lightning import Trainer | |
from torch import optim | |
import torch | |
import torch.nn as nn | |
import torch.nn.functional as F | |
from torch.utils.data import DataLoader | |
from torch.utils.data.distributed import DistributedSampler | |
import numpy as np | |
np.random.seed(0) | |
X = np.random.random((EXAMPLES, NIN)) | |
Y = np.random.random((EXAMPLES, 1)) | |
class BinaryDataset(torch.utils.data.Dataset): | |
def __init__(self, x, y): | |
self.x = torch.Tensor(x).float() | |
self.y = torch.Tensor(y).float() | |
def __len__(self): | |
return self.x.shape[0] | |
def __getitem__(self, index): | |
return self.x[index,:], self.y[index] | |
def cuda(self): | |
self.x = self.x.to('cuda') | |
self.y = self.y.to('cuda') | |
binaryDataset = BinaryDataset(X, Y) | |
class BinaryModule(pl.LightningModule): | |
def __init__(self, binaryDataset): | |
super().__init__() | |
self.dataset = binaryDataset | |
# build model | |
self.__build_model() | |
def __build_model(self): | |
self.fc1 = nn.Linear(NIN, NHID) | |
self.do1 = nn.Dropout(0.2) | |
self.out = nn.Linear(NHID, 1) | |
def forward(self, x): | |
x = F.relu(self.fc1(x)) | |
x = self.do1(x) | |
x = self.out(x) | |
return x | |
def loss(self, pred, true): | |
loss_val = F.mse_loss(pred, true) | |
return loss_val | |
def _step(self, batch, batch_idx, name, training_step=False): | |
x, y = batch | |
pred = self.forward(x) | |
loss_val = self.loss(pred, y) | |
# in DP mode (default) make sure if result is scalar, there's another dim in the beginning | |
if self.trainer.use_dp or self.trainer.use_ddp2: | |
loss_val = loss_val.unsqueeze(0) | |
tqdm_dict = OrderedDict({name: loss_val}) | |
if training_step: | |
return OrderedDict({ | |
'loss': loss_val, | |
'progress_bar': tqdm_dict, | |
'log': tqdm_dict | |
}) | |
else: | |
return tqdm_dict | |
def training_step(self, batch, batch_idx): | |
return self._step(batch, batch_idx, name="train_loss", training_step=True) | |
def _epoch_end(self, outputs, name): | |
# With DP training I think you have to average the things individually? Not sure | |
# Look at the pytorch lightning siamese network code | |
#if self.trainer.use_dp or self.trainer.use_ddp2: | |
# val_acc = torch.mean(val_acc) | |
avg_loss = torch.stack([x[name] for x in outputs]).mean() | |
tqdm_dict = {name: avg_loss} | |
result = OrderedDict({name: avg_loss, 'progress_bar': tqdm_dict, 'log': tqdm_dict}) | |
return result | |
# --------------------- | |
# TRAINING SETUP | |
# --------------------- | |
def configure_optimizers(self): | |
optimizer = optim.SGD(self.parameters(), | |
lr=0.01, momentum=0.90) | |
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, | |
T_max=10) | |
return [optimizer], [scheduler] | |
def __dataloader(self, train, dataset): | |
# when using multi-node (ddp) we need to add the datasampler | |
train_sampler = None | |
if self.use_ddp: | |
train_sampler = DistributedSampler(dataset) | |
should_shuffle = train and train_sampler is None | |
loader = DataLoader( | |
dataset=dataset, | |
batch_size=len(dataset), | |
shuffle=should_shuffle, | |
sampler=train_sampler, | |
num_workers=0, | |
drop_last=True | |
) | |
return loader | |
@pl.data_loader | |
def train_dataloader(self): | |
logging.info('training data loader called') | |
return self.__dataloader(train=True, dataset=self.dataset) | |
def fit(): trainer_gpu.fit(model_gpu) | |
model_gpu = BinaryModule(binaryDataset) | |
trainer_gpu = Trainer(max_epochs=10, gpus=1) | |
print(timeit.timeit(fit, number=1)) | |
print("Don't load to GPU") | |
binaryDataset.cuda() | |
model_gpu = BinaryModule(binaryDataset) | |
trainer_gpu = Trainer(max_epochs=10, gpus=1) | |
print(timeit.timeit(fit, number=1)) | |
print("Load to GPU") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Number of inputs | |
NIN = 100 | |
NHID = 1000 | |
# Size of the learned representation | |
NOUT = 200 | |
# Number of examples | |
EXAMPLES = 100000 | |
# Batch size | |
BATCH_SIZE = 1000 | |
import timeit | |
import logging | |
from collections import OrderedDict | |
import pytorch_lightning as pl | |
from pytorch_lightning import LightningModule | |
from pytorch_lightning import Trainer | |
from torch import optim | |
import torch | |
import torch.nn as nn | |
import torch.nn.functional as F | |
from torch.utils.data import DataLoader | |
from torch.utils.data.distributed import DistributedSampler | |
import numpy as np | |
np.random.seed(0) | |
X1 = np.random.random((EXAMPLES, NIN)) | |
X2 = np.random.random((EXAMPLES, NIN)) | |
DIST = np.random.random((EXAMPLES,)) | |
class TableDistanceDataset(torch.utils.data.Dataset): | |
def __init__(self, x1, x2, dist): | |
self.dist = torch.Tensor(dist).float() | |
self.X1 = torch.Tensor(X1).float() | |
self.X2 = torch.Tensor(X2).float() | |
def __len__(self): | |
return self.X1.shape[0] | |
def __getitem__(self, index): | |
return self.X1[index,:], self.X2[index,:], self.dist[index] | |
def cuda(self): | |
self.dist = self.dist.to('cuda') | |
self.X1 = self.X1.to('cuda') | |
self.X2 = self.X2.to('cuda') | |
tableDistanceDataset = TableDistanceDataset(X1, X2, DIST) | |
class Table2Representation(pl.LightningModule): | |
def __init__(self): | |
super().__init__() | |
# build model | |
self.__build_model() | |
def __build_model(self): | |
self.fc1 = nn.Linear(NIN, NHID) | |
self.do1 = nn.Dropout(0.2) | |
self.out = nn.Linear(NHID, NOUT) | |
def forward(self, x): | |
x = F.relu(self.fc1(x)) | |
x = self.do1(x) | |
x = self.out(x) | |
return x | |
# Based upon https://github.com/PyTorchLightning/Siamese-Neural-Networks/blob/master/model.py | |
class TableDistanceModule(pl.LightningModule): | |
def __init__(self, tableDistanceDataset): | |
super().__init__() | |
self.dataset = tableDistanceDataset | |
self.table2Representation = Table2Representation() | |
# build model | |
self.__build_model() | |
def __build_model(self): | |
pass | |
def forward(self, x1, x2): | |
z1 = self.table2Representation.forward(x1) | |
z2 = self.table2Representation.forward(x2) | |
dis = torch.mean(torch.abs(z1 - z2), axis=1) | |
return dis | |
def loss(self, pred_dists, true_dists): | |
loss_val = F.mse_loss(pred_dists, true_dists) | |
return loss_val | |
def _step(self, batch, batch_idx, name, training_step=False): | |
X1, X2, dist = batch | |
pred = self.forward(X1, X2) | |
loss_val = self.loss(pred, dist) | |
# in DP mode (default) make sure if result is scalar, there's another dim in the beginning | |
if self.trainer.use_dp or self.trainer.use_ddp2: | |
loss_val = loss_val.unsqueeze(0) | |
tqdm_dict = OrderedDict({name: loss_val}) | |
if training_step: | |
return OrderedDict({ | |
'loss': loss_val, | |
'progress_bar': tqdm_dict, | |
'log': tqdm_dict | |
}) | |
else: | |
return tqdm_dict | |
def training_step(self, batch, batch_idx): | |
return self._step(batch, batch_idx, name="train_loss", training_step=True) | |
def _epoch_end(self, outputs, name): | |
# With DP training I think you have to average the things individually? Not sure | |
# Look at the pytorch lightning siamese network code | |
#if self.trainer.use_dp or self.trainer.use_ddp2: | |
# val_acc = torch.mean(val_acc) | |
avg_loss = torch.stack([x[name] for x in outputs]).mean() | |
tqdm_dict = {name: avg_loss} | |
result = OrderedDict({name: avg_loss, 'progress_bar': tqdm_dict, 'log': tqdm_dict}) | |
return result | |
# --------------------- | |
# TRAINING SETUP | |
# --------------------- | |
def configure_optimizers(self): | |
optimizer = optim.SGD(self.parameters(), | |
lr=0.01, momentum=0.90) | |
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, | |
T_max=10) | |
return [optimizer], [scheduler] | |
def __dataloader(self, train, dataset): | |
# when using multi-node (ddp) we need to add the datasampler | |
train_sampler = None | |
batch_size = BATCH_SIZE | |
if self.use_ddp: | |
train_sampler = DistributedSampler(dataset) | |
should_shuffle = train and train_sampler is None | |
loader = DataLoader( | |
dataset=dataset, | |
batch_size=len(dataset), | |
shuffle=should_shuffle, | |
sampler=train_sampler, | |
num_workers=0, | |
drop_last=True | |
) | |
return loader | |
@pl.data_loader | |
def train_dataloader(self): | |
logging.info('training data loader called') | |
return self.__dataloader(train=True, dataset=self.dataset) | |
def fit(): trainer_gpu.fit(model_gpu) | |
model_gpu = TableDistanceModule(tableDistanceDataset) | |
trainer_gpu = Trainer(max_epochs=10, gpus=1) | |
print("Don't load", timeit.timeit(fit, number=1)) | |
tableDistanceDataset.cuda() | |
model_gpu = TableDistanceModule(tableDistanceDataset) | |
trainer_gpu = Trainer(max_epochs=10, gpus=1) | |
print("Load", timeit.timeit(fit, number=1)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment