Last active
May 6, 2023 16:50
-
-
Save MasanoriYamada/9c12a3854836b7cedc970ab7350af219 to your computer and use it in GitHub Desktop.
torchopt lr scheduler
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "07c385dd", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import torch\n", | |
"import torch.nn as nn\n", | |
"import torch.nn.functional as F\n", | |
"import torch.optim as optim\n", | |
"import torch.optim.lr_scheduler\n", | |
"import torchvision\n", | |
"import torchvision.transforms as transforms\n", | |
"#from torch.nn.utils._stateless import functional_call\n", | |
"from torch.func import functional_call\n", | |
"from collections import OrderedDict\n", | |
"\n", | |
"import torchopt\n", | |
"import functorch\n", | |
"from copy import deepcopy\n", | |
"\n", | |
"# Define a simple CNN model\n", | |
"class SimpleCNN(nn.Module):\n", | |
" def __init__(self):\n", | |
" super(SimpleCNN, self).__init__()\n", | |
" self.conv1 = nn.Conv2d(1, 6, 5)\n", | |
" self.pool = nn.MaxPool2d(2, 2)\n", | |
" self.conv2 = nn.Conv2d(6, 16, 5)\n", | |
" self.fc1 = nn.Linear(16 * 4 * 4, 120)\n", | |
" self.fc2 = nn.Linear(120, 84)\n", | |
" self.fc3 = nn.Linear(84, 10)\n", | |
"\n", | |
" def forward(self, x):\n", | |
" x = self.pool(F.relu(self.conv1(x)))\n", | |
" x = self.pool(F.relu(self.conv2(x)))\n", | |
" x = x.view(-1, 16 * 4 * 4)\n", | |
" x = F.relu(self.fc1(x))\n", | |
" x = F.relu(self.fc2(x))\n", | |
" x = self.fc3(x)\n", | |
" return x\n", | |
"\n", | |
"# Load the MNIST dataset\n", | |
"transform = transforms.Compose(\n", | |
" [transforms.ToTensor(),\n", | |
" transforms.Normalize((0.5,), (0.5,))])\n", | |
"\n", | |
"trainset = torchvision.datasets.MNIST(root='./data', train=False,\n", | |
" download=True, transform=transform)\n", | |
"trainloader = torch.utils.data.DataLoader(trainset, batch_size=100,\n", | |
" shuffle=False, num_workers=0)\n", | |
"trainloader2 = torch.utils.data.DataLoader(trainset, batch_size=100,\n", | |
" shuffle=False, num_workers=0)\n", | |
"\n", | |
"\n", | |
"# Initialize the models\n", | |
"model = SimpleCNN()\n", | |
"model_ref = deepcopy(model)\n", | |
"model_refref = deepcopy(model)\n", | |
"\n", | |
"# Set device and dtype\n", | |
"device = 'cpu'\n", | |
"dtype = torch.float32\n", | |
"\n", | |
"# Set parameters for the test\n", | |
"lr = 1\n", | |
"total_iters = 10\n", | |
"optimizers = (torchopt.sgd, torch.optim.SGD, {})\n", | |
"inplace = False\n", | |
"weight_decay = 0.9\n", | |
"use_chain_flat = True" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "bbb332db", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"\n", | |
"model.to(device)\n", | |
"model_ref.to(device)\n", | |
"\n", | |
"\n", | |
"torchopt_optimizer, torch_optimizer, optimizer_kwargs = optimizers\n", | |
"\n", | |
"fmodel, params, buffers = functorch.make_functional_with_buffers(model)\n", | |
"lr_schedule = torchopt.schedule.linear_schedule(\n", | |
" init_value=lr,\n", | |
" end_value=0.1 * lr,\n", | |
" transition_steps=total_iters,\n", | |
" transition_begin=0,\n", | |
" )\n", | |
"optim = torchopt_optimizer(\n", | |
" lr_schedule,\n", | |
" weight_decay=weight_decay,\n", | |
" **optimizer_kwargs,\n", | |
")\n", | |
"optim_state = optim.init(params)\n", | |
"optim_ref = torch_optimizer(\n", | |
" model_ref.parameters(),\n", | |
" lr,\n", | |
" weight_decay=weight_decay,\n", | |
" **optimizer_kwargs,\n", | |
")\n", | |
"torch_scheduler = torch.optim.lr_scheduler.LinearLR(\n", | |
" optim_ref,\n", | |
" start_factor=1.0,\n", | |
" end_factor=0.1,\n", | |
" total_iters=total_iters,\n", | |
")\n", | |
"\n", | |
"for i, (xs, ys) in enumerate(trainloader):\n", | |
" if i >= total_iters:\n", | |
" break\n", | |
" xs = xs.to(device=device, dtype=dtype)\n", | |
" ys = ys.to(device=device)\n", | |
"\n", | |
" pred = fmodel(params, buffers, xs)\n", | |
" pred_ref = model_ref(xs)\n", | |
" loss = F.cross_entropy(pred, ys)\n", | |
" loss_ref = F.cross_entropy(pred_ref, ys)\n", | |
" print(loss, loss == loss_ref)\n", | |
" grads = torch.autograd.grad(loss, params, allow_unused=True)\n", | |
" #print(grads[0][0]) \n", | |
" updates, optim_state = optim.update(grads, optim_state, params=params, inplace=inplace)\n", | |
" params = torchopt.apply_updates(params, updates, inplace=inplace)\n", | |
"\n", | |
" optim_ref.zero_grad()\n", | |
" loss_ref.backward()\n", | |
" optim_ref.step()\n", | |
" print('lr', optim_ref.param_groups[0]['lr'])\n", | |
" print('lr', lr_schedule(i)) \n", | |
" torch_scheduler.step()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "2e689e2c", | |
"metadata": {}, | |
"source": [ | |
"# functional_callで書き直す" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "eb4417d0", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"lr_schedule = torchopt.schedule.linear_schedule(\n", | |
" init_value=lr,\n", | |
" end_value=0.1 * lr,\n", | |
" transition_steps=total_iters,\n", | |
" transition_begin=0,\n", | |
" )\n", | |
"\n", | |
"optim = torchopt_optimizer(\n", | |
" lr_schedule,\n", | |
" weight_decay=weight_decay,\n", | |
" **optimizer_kwargs,\n", | |
")\n", | |
"params = model_refref.state_dict()\n", | |
"optim_state = optim.init(params)\n", | |
"\n", | |
"for i, (xs, ys) in enumerate(trainloader2):\n", | |
" if i >= total_iters:\n", | |
" break\n", | |
" xs = xs.to(device=device, dtype=dtype)\n", | |
" ys = ys.to(device=device)\n", | |
" for key in params:\n", | |
" params[key] = params[key].detach() # leaf\n", | |
" params[key].requires_grad = True\n", | |
" params[key].grad = None # optimizer.zero_grad()\n", | |
" pred = functional_call(model_refref, params, xs)\n", | |
" loss = F.cross_entropy(pred, ys)\n", | |
" print(loss)\n", | |
" loss.backward()\n", | |
" grads = OrderedDict()\n", | |
" for key in params:\n", | |
" if params[key].grad is None:\n", | |
" grads[key] = torch.zeros_like(params[key])\n", | |
" else:\n", | |
" grads[key] = params[key].grad\n", | |
" #print(grads['conv1.weight'][0])\n", | |
" updates, optim_state = optim.update(grads, optim_state, params=params, inplace=False)\n", | |
" params = torchopt.apply_updates(params, updates, inplace=False)\n", | |
" print('lr', lr_schedule(i))" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "17345c29", | |
"metadata": {}, | |
"source": [ | |
"# custum functionで実現\n", | |
"\n", | |
"- CosineLRScheduler" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 29, | |
"id": "1686eba2", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import torch\n", | |
"import torch.nn as nn\n", | |
"import torch.nn.functional as F\n", | |
"import torch.optim as optim\n", | |
"import torch.optim.lr_scheduler\n", | |
"import torchvision\n", | |
"import torchvision.transforms as transforms\n", | |
"#from torch.nn.utils._stateless import functional_call\n", | |
"from torch.func import functional_call\n", | |
"from collections import OrderedDict\n", | |
"\n", | |
"import torchopt\n", | |
"import functorch\n", | |
"from copy import deepcopy\n", | |
"\n", | |
"# Define a simple CNN model\n", | |
"class SimpleCNN(nn.Module):\n", | |
" def __init__(self):\n", | |
" super(SimpleCNN, self).__init__()\n", | |
" self.conv1 = nn.Conv2d(1, 6, 5)\n", | |
" self.pool = nn.MaxPool2d(2, 2)\n", | |
" self.conv2 = nn.Conv2d(6, 16, 5)\n", | |
" self.fc1 = nn.Linear(16 * 4 * 4, 120)\n", | |
" self.fc2 = nn.Linear(120, 84)\n", | |
" self.fc3 = nn.Linear(84, 10)\n", | |
"\n", | |
" def forward(self, x):\n", | |
" x = self.pool(F.relu(self.conv1(x)))\n", | |
" x = self.pool(F.relu(self.conv2(x)))\n", | |
" x = x.view(-1, 16 * 4 * 4)\n", | |
" x = F.relu(self.fc1(x))\n", | |
" x = F.relu(self.fc2(x))\n", | |
" x = self.fc3(x)\n", | |
" return x\n", | |
"\n", | |
"# Load the MNIST dataset\n", | |
"transform = transforms.Compose(\n", | |
" [transforms.ToTensor(),\n", | |
" transforms.Normalize((0.5,), (0.5,))])\n", | |
"\n", | |
"trainset = torchvision.datasets.MNIST(root='./data', train=False,\n", | |
" download=True, transform=transform)\n", | |
"trainloader = torch.utils.data.DataLoader(trainset, batch_size=100,\n", | |
" shuffle=False, num_workers=0)\n", | |
"trainloader2 = torch.utils.data.DataLoader(trainset, batch_size=100,\n", | |
" shuffle=False, num_workers=0)\n", | |
"\n", | |
"\n", | |
"# Initialize the models\n", | |
"model = SimpleCNN()\n", | |
"model_ref = deepcopy(model)\n", | |
"\n", | |
"# Set device and dtype\n", | |
"device = 'cpu'\n", | |
"dtype = torch.float32\n", | |
"\n", | |
"# Set parameters for the test\n", | |
"lr = 1\n", | |
"total_epoch = 10\n", | |
"optimizers = (torchopt.sgd, torch.optim.SGD, {})\n", | |
"inplace = False\n", | |
"weight_decay = 0.9\n", | |
"use_chain_flat = True" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 30, | |
"id": "d9248512", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from timm.scheduler import CosineLRScheduler\n", | |
"\n", | |
"optim_ref = torch.optim.SGD(\n", | |
" model_ref.parameters(),\n", | |
" lr,\n", | |
" weight_decay=weight_decay,\n", | |
")\n", | |
"\n", | |
"warmup_epoch = 1\n", | |
"scheduler = CosineLRScheduler(optim_ref, t_initial=total_epoch, lr_min=1e-6, warmup_t=warmup_epoch,\n", | |
" warmup_lr_init=1e-6, warmup_prefix=True)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 31, | |
"id": "81cf504e", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"torchopt_optimizer = torchopt.sgd\n", | |
"steps_per_epoch = len(trainloader)\n", | |
"\n", | |
"def get_custom_schedule(scheduler, steps_per_epoch):\n", | |
" def custom_schedule(step: int) -> float:\n", | |
" epoch = step // steps_per_epoch\n", | |
" return scheduler.get_epoch_values(epoch)[0]\n", | |
" return custom_schedule\n", | |
"\n", | |
"custom_schedule = get_custom_schedule(scheduler, steps_per_epoch)\n", | |
"\n", | |
"optim = torchopt_optimizer(\n", | |
" custom_schedule,\n", | |
" weight_decay=weight_decay,\n", | |
")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 32, | |
"id": "36f4fb78", | |
"metadata": { | |
"scrolled": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"tensor(2.3040, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.2977, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.2990, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3096, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3005, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3050, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3079, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.2998, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3103, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3096, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3013, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3044, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3019, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3070, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3059, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.2983, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3067, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.2942, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3040, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3105, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3069, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3125, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.2931, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.2991, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3091, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3023, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3071, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3094, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.2977, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.2997, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3062, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3129, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3052, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3111, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.2973, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3053, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3085, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.2998, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.2933, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3083, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.2919, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.2975, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.2985, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.2996, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3061, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3054, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.2999, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.2996, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3030, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3056, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.2943, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3029, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3009, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3023, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3038, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3023, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.2992, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3060, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3048, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3046, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3114, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3045, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3042, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3099, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3003, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3073, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.2959, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3002, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3096, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3003, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3037, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3065, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3031, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3036, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3037, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3012, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3074, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.2899, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3025, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.2993, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3093, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.2961, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3032, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3043, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3030, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3051, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3000, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3060, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.2981, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3052, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3091, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.2959, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3078, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3031, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3014, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.2992, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3067, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3021, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3036, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"lr1: 1e-06\n", | |
"lr2: 1e-06\n", | |
"tensor(2.3040, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3000, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3015, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3047, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3012, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3035, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3057, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3055, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.2979, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.2984, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3045, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3053, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3037, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3041, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3047, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3089, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3078, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3061, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3016, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3015, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3049, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3031, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3003, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3014, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3114, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.2999, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3049, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3085, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3060, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3057, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3046, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3008, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3067, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3071, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3051, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3048, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3038, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3004, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3049, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3004, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.2974, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.2984, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3079, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3031, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.2977, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3015, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.2994, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3036, grad_fn=<NllLossBackward0>) tensor(True)\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"tensor(2.2994, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3048, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3045, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3021, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3020, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3019, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3016, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3034, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3014, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3049, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3024, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3030, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3045, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3071, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3016, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3051, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3061, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3051, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3023, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3020, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3043, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3031, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3037, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3035, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3052, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3044, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3025, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3049, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3023, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3024, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3030, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3034, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3030, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3034, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3008, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3033, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3040, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3047, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3021, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3031, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3032, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3029, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3014, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"lr1: 1.0\n", | |
"lr2: 1.0\n", | |
"tensor(2.2998, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.2998, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3015, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3046, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3013, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3034, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3057, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3052, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.2979, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.2984, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3045, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3052, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3036, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3041, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3046, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3087, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3075, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3059, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3018, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3015, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3048, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3031, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3004, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3014, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3113, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3000, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3049, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3083, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3058, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3056, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3045, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3008, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3066, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3070, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3050, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3047, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3039, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3005, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3049, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3004, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.2976, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.2984, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3077, grad_fn=<NllLossBackward0>) tensor(False)\n", | |
"tensor(2.3031, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.2978, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3015, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.2995, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3035, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.2995, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3048, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3044, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3021, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3020, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3019, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3016, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3033, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3015, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3049, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3024, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3030, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3045, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3070, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3015, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3051, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3060, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3050, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3023, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3020, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3043, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3031, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3036, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3034, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3051, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3043, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3025, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3048, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3023, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3023, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3029, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3033, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3030, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3034, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3008, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3033, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3039, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3046, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3021, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3030, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3032, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3029, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3014, grad_fn=<NllLossBackward0>) tensor(True)\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"lr1: 0.9755282826193187\n", | |
"lr2: 0.9755282826193187\n", | |
"tensor(2.2998, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3000, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3013, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3043, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3017, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3033, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3056, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3047, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.2979, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.2987, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3045, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3049, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3034, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3040, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3044, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3080, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3068, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3055, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3022, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3017, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3044, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3032, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3007, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3016, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3110, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3001, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3049, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3079, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3054, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3052, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3041, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3007, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3062, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3067, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3047, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3045, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3041, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3007, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3046, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3004, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.2979, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.2986, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3072, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3031, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.2980, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3016, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.2997, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3034, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.2997, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3047, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3041, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3021, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3021, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3019, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3016, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3033, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3017, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3047, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3024, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3029, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3043, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3066, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3014, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3050, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3055, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3048, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3023, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3020, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3041, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3030, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3035, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3033, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3048, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3039, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3046, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3021, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3022, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3033, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3029, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3033, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3010, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3033, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3037, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3043, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3021, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3029, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3031, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3014, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"lr1: 0.9045085926789765\n", | |
"lr2: 0.9045085926789765\n", | |
"tensor(2.2999, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3002, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3011, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3038, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3022, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3030, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3054, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3039, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.2979, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.2990, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3045, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3045, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3031, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3040, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3041, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3070, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3058, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3050, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3018, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3040, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3033, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3011, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3018, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3104, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3004, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3049, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3073, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3047, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3047, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3029, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3037, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3006, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3056, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3062, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3042, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3042, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3044, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3011, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3043, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3002, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.2984, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.2990, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3066, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3030, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.2985, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3017, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3001, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3032, grad_fn=<NllLossBackward0>) tensor(True)\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"tensor(2.3000, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3045, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3037, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3022, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3021, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3019, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3016, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3033, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3019, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3044, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3024, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3041, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3061, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3012, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3049, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3049, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3044, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3024, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3021, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3039, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3029, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3033, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3031, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3044, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3035, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3043, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3020, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3021, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3032, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3032, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3011, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3033, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3034, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3038, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3021, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3030, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3024, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3014, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"lr1: 0.7938928322536104\n", | |
"lr2: 0.7938928322536104\n", | |
"tensor(2.3000, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3006, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3009, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3035, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3029, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3051, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3030, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.2981, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.2994, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3044, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3041, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3025, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3040, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3037, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3059, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3050, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3045, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3033, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3020, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3036, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3033, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3015, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3021, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3094, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3008, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3049, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3066, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3038, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3042, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3031, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3035, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3008, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3050, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3056, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3035, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3038, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3047, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3015, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3038, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3000, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.2988, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.2994, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3058, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.2991, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3019, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3006, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3004, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3042, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3033, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3022, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3022, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3019, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3016, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3033, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3025, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3022, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3041, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3024, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3038, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3054, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3011, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3046, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3042, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3039, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3025, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3021, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3036, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3029, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3031, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3029, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3039, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3030, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3039, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3018, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3021, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3024, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3032, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3031, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3014, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3033, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3030, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3034, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3020, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3024, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3021, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3014, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"lr1: 0.6545088426789766\n", | |
"lr2: 0.6545088426789766\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"tensor(2.3001, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3011, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3008, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3033, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3035, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3024, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3044, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3022, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.2984, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.2998, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3042, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3036, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3018, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3041, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3032, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3048, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3044, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3041, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3037, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3022, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3033, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3033, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3019, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3024, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3082, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3014, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3048, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3056, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3030, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3037, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3031, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3034, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3012, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3041, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3050, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3035, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3049, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3019, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3032, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.2999, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.2993, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.2998, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3052, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3020, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.2998, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3020, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3012, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3021, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3007, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3039, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3030, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3023, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3023, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3019, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3017, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3032, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3024, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3024, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3036, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3025, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3025, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3035, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3046, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3011, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3043, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3037, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3035, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3021, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3032, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3029, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3034, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3035, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3017, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3021, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3022, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3032, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3030, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3015, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3032, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3030, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3020, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3021, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3018, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3015, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"lr1: 0.5000005\n", | |
"lr2: 0.5000005\n", | |
"tensor(2.3002, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3016, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3009, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3033, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3039, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3022, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3033, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3018, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.2988, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3001, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3039, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3031, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3011, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3041, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3029, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3039, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3038, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3036, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3037, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3024, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3032, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3029, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3021, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3067, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3019, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3047, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3046, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3025, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3033, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3030, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3034, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3019, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3032, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3045, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3018, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3031, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3052, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3024, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3000, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.2999, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3003, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3046, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3014, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3002, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3021, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3018, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3014, grad_fn=<NllLossBackward0>) tensor(True)\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"tensor(2.3009, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3036, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3023, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3023, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3019, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3018, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3030, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3024, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3025, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3032, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3025, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3025, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3032, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3039, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3013, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3038, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3033, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3030, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3020, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3029, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3025, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3030, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3024, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3031, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3017, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3021, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3021, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3031, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3025, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3016, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3031, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3024, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3020, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3018, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3016, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3015, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"lr1: 0.3454921573210235\n", | |
"lr2: 0.3454921573210235\n", | |
"tensor(2.3004, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3020, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3012, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3033, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3040, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3021, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3022, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3016, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.2993, grad_fn=<NllLossBackward0>) tensor(False)\n", | |
"tensor(2.3005, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3035, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3006, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3040, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3031, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3031, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3031, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3035, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3029, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3024, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3019, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3052, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3024, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3044, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3035, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3024, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3032, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3025, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3025, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3040, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3016, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3029, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3052, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3021, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3005, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3008, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3011, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3040, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3011, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3004, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3022, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3023, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3009, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3010, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3035, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3024, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3024, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3025, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3019, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3020, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3024, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3025, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3031, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3015, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3034, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3030, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3018, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3023, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3023, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3018, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3022, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3021, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3030, grad_fn=<NllLossBackward0>) tensor(False)\n", | |
"tensor(2.3025, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3024, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3015, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3029, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3022, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3021, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3025, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3017, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3016, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3016, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"lr1: 0.20610816774638963\n", | |
"lr2: 0.20610816774638963\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"tensor(2.3006, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3024, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3025, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3015, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3032, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3036, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3019, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3017, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3015, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.2998, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3011, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3029, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3007, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3040, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3020, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3025, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3031, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3019, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3014, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3041, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3041, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3025, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3024, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3025, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3023, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3020, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3034, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3017, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3025, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3048, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3029, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3020, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3012, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3018, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3024, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3035, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3012, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3009, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3022, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3009, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3012, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3032, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3023, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3024, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3019, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3021, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3023, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3025, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3029, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3025, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3022, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3020, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3017, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3031, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3025, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3025, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3017, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3025, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3021, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3025, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3021, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3024, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3018, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3022, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3021, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3029, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3025, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3028, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3021, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3015, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3027, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3021, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3025, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3021, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3024, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3016, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3026, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3016, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"tensor(2.3017, grad_fn=<NllLossBackward0>) tensor(True)\n", | |
"lr1: 0.09549240732102351\n", | |
"lr2: 0.09549240732102351\n" | |
] | |
} | |
], | |
"source": [ | |
"model.to(device)\n", | |
"model_ref.to(device)\n", | |
"\n", | |
"\n", | |
"fmodel, params, buffers = functorch.make_functional_with_buffers(model)\n", | |
"optim_state = optim.init(params)\n", | |
"\n", | |
"\n", | |
"for epoch in range(total_epoch):\n", | |
" scheduler.step(epoch)\n", | |
" for i, (xs, ys) in enumerate(trainloader):\n", | |
" step = len(trainloader) * epoch + i\n", | |
" xs = xs.to(device=device, dtype=dtype)\n", | |
" ys = ys.to(device=device)\n", | |
"\n", | |
" pred = fmodel(params, buffers, xs)\n", | |
" pred_ref = model_ref(xs)\n", | |
" loss = F.cross_entropy(pred, ys)\n", | |
" loss_ref = F.cross_entropy(pred_ref, ys)\n", | |
" print(loss, loss == loss_ref)\n", | |
" grads = torch.autograd.grad(loss, params, allow_unused=True)\n", | |
" #print(grads[0][0]) \n", | |
" updates, optim_state = optim.update(grads, optim_state, params=params, inplace=inplace)\n", | |
" params = torchopt.apply_updates(params, updates, inplace=inplace)\n", | |
"\n", | |
" optim_ref.zero_grad()\n", | |
" loss_ref.backward()\n", | |
" optim_ref.step()\n", | |
" print('lr1:', optim_ref.param_groups[0]['lr'])\n", | |
" print('lr2:', custom_schedule(step))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "3c22cbad", | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3 (ipykernel)", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.9.16" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
torchoptのlrスケジューリングの正しさのチェック
https://github.com/metaopt/torchopt/blob/4701569cf12cf4be3787371390bd66c647594cdb/tests/test_schedule.py#L117
custum functionでちゃんと動くようになった