Created
July 4, 2024 11:58
-
-
Save thomasdullien/651b012406c25760c36be9fca59b6682 to your computer and use it in GitHub Desktop.
Visualising creases in a relu network.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from PIL import Image, ImageOps, ImageDraw | |
import numpy as np | |
import pandas as pd | |
import os, sys | |
import logging | |
logging.basicConfig( | |
format='%(asctime)s %(levelname)-8s %(message)s', | |
level=logging.INFO, | |
datefmt='%Y-%m-%d %H:%M:%S') | |
# Load and process the image | |
import argparse | |
parser = argparse.ArgumentParser() | |
parser.add_argument("--inputimage", help="Input PNG image to approximate", type=str, | |
default = "/home/thomasdullien/Downloads/black_circle.png") | |
parser.add_argument("--layer_neurons", help="How many neurons in the first layer", | |
type=int, default=10) | |
parser.add_argument("--number_of_layers", help="How many layers?", type=int, default=1) | |
parser.add_argument("--draw_size", help="How big the output drawing should be", type=int, default=300) | |
parser.add_argument("--draw_interval", help="How often (in epochs) should the boundaries be drawn?", type=int, default=500) | |
parser.add_argument("--epochs", help="How many epochs to train", type=int, default=300000) | |
parser.add_argument("--random_seed", help="Random seed", type=int, default=1) | |
# Function to load an image from a file | |
def load_image(file_path): | |
return Image.open(file_path) | |
# Step 2: Grayscale the PNG file | |
def grayscale_image(img): | |
gray_img = ImageOps.grayscale(img) | |
return gray_img | |
# Step 3: Truncate the PNG file so it is square | |
def truncate_image(img): | |
min_side = min(img.size) | |
left = (img.width - min_side) // 2 | |
top = (img.height - min_side) // 2 | |
right = (img.width + min_side) // 2 | |
bottom = (img.height + min_side) // 2 | |
square_img = img.crop((left, top, right, bottom)) | |
return square_img | |
# Step 4: Convert the PNG file to (x, y, z) triples | |
def image_to_triples(img): | |
img = np.array(img) | |
height, width = img.shape | |
triples = [] | |
for y in range(height): | |
for x in range(width): | |
z = img[y, x] / 255.0 | |
triples.append((x / width, y / height, z)) | |
return np.array(triples) | |
args = parser.parse_args() | |
g_file_path = args.inputimage | |
g_layer_neurons = args.layer_neurons | |
g_epochs = args.epochs | |
g_draw_size = args.draw_size | |
g_draw_interval = args.draw_interval | |
g_rand = args.random_seed | |
# Load the input graphics | |
filename = os.path.split(g_file_path)[1] | |
img = load_image(g_file_path) | |
grayscale_img = grayscale_image(img) | |
# Make sure the image is square. | |
square_img = truncate_image(grayscale_img) | |
# Convert the image to triples. | |
triples = image_to_triples(square_img) | |
import torch | |
import torch.nn as nn | |
import torch.optim as optim | |
torch.manual_seed(g_rand) | |
# Make sure the data is all moved to the GPU | |
torch.set_default_tensor_type("torch.cuda.FloatTensor") | |
# Step 5: Create and train a 1-layer ReLU network | |
class SimpleNN(nn.Module): | |
def __init__(self): | |
super(SimpleNN, self).__init__() | |
self.fc = nn.Linear(2, g_layer_neurons) | |
self.relu = nn.ReLU() | |
self.out = nn.Linear(g_layer_neurons, 1) | |
def forward(self, x): | |
x = self.fc(x) | |
x = self.relu(x) | |
x = self.out(x) | |
return x | |
def train_network(triples): | |
model = SimpleNN() | |
criterion = nn.MSELoss() | |
optimizer = optim.Adam(model.parameters(), lr=0.01) | |
x = torch.tensor(triples[:, :2], dtype=torch.float32) | |
y = torch.tensor(triples[:, 2], dtype=torch.float32).unsqueeze(1) | |
for epoch in range(g_epochs): | |
optimizer.zero_grad() | |
outputs = model(x) | |
loss = criterion(outputs, y) | |
loss.backward() | |
optimizer.step() | |
logging.info("Epoch %d: Training loss is now %f" % (epoch, loss)) | |
if epoch % g_draw_interval == 0: | |
write_model_and_decision_boundaries(model, epoch, loss, optimizer.param_groups[0]['lr']) | |
return model | |
def write_model_and_decision_boundaries(model, train_step, loss, learn_rate): | |
logging.info("Writing the model and polytopes: Beginning calculation of derivatives.") | |
derivative_image, value_image = calculate_derivative(model) | |
logging.info("Creating new image.") | |
create_new_image(derivative_image, value_image, | |
"./%s-%d-%d-step-%08.08d.png" % (filename, g_rand, g_layer_neurons, train_step), loss, train_step, learn_rate) | |
logging.info("Done writing image.") | |
# Step 6: Calculate the derivative of the model | |
def calculate_derivative(model): | |
float_draw_size = float(g_draw_size) | |
# The image where the derivatives at each point are stored. | |
derivative_image = np.zeros((g_draw_size, g_draw_size)) | |
# The image where the model outputs at each point are stored. | |
value_image = np.zeros((g_draw_size, g_draw_size)) | |
# The points at which to evaluate the model. | |
if not ('g_points' in globals()): | |
global g_points | |
g_points = torch.tensor([ [ x/float_draw_size, y/float_draw_size ] for x in range(g_draw_size) for y in range(g_draw_size)], | |
dtype=torch.float32) | |
global g_points_int | |
g_points_int = [[x,y] for x in range(g_draw_size) for y in range(g_draw_size)] | |
g_points.requires_grad = True | |
# Evaluate the model on all image points. This should entirely happen on GPU. | |
output = model(g_points) | |
loss = output.mean() | |
loss.backward() | |
# Copy the gradients back to CPU so we can build the derivative image. Is this even a | |
# sensible thing to do? Would it not make *much* more sense to run this on GPU too? | |
gradients = g_points.grad.cpu().detach().numpy() | |
g_points.grad.data.zero_() | |
output_cpu = output.cpu().detach().numpy() | |
for index in range(len(g_points_int)): | |
x,y = g_points_int[index] | |
value_image[x,y] = output_cpu[index] | |
derivative_image[x,y] = gradients[index][0]**2 + gradients[index][1]**2 | |
return (derivative_image, value_image) | |
# Step 7: Create a new PNG file from the data points | |
def create_new_image(derivative_image, value_image, filename, loss, epoch, learn_rate): | |
new_image = Image.new("RGB", (g_draw_size * 2, g_draw_size)) | |
for x in range(g_draw_size-1): | |
for y in range(g_draw_size-1): | |
pixel_color = int(value_image[x,y] * 255) | |
new_image.putpixel((x + g_draw_size, y), (pixel_color, pixel_color, pixel_color)) | |
if derivative_image[x, y] != derivative_image[x+1, y] or derivative_image[x, y] != derivative_image[x, y+1]: | |
new_image.putpixel((x, y), (255, 0, 0)) | |
else: | |
new_image.putpixel((x, y), (pixel_color, pixel_color, pixel_color)) | |
draw = ImageDraw.Draw(new_image) | |
draw.text((g_draw_size + 5, 5), "Loss: %f\nEpoch: %d\nLR %f" % (loss, epoch, learn_rate), fill=(255,0,0,255)) | |
new_image.save(filename) | |
# Train the network and calculate the derivative image | |
print("About to train the network.") | |
model = train_network(triples) | |
#print("Done training the network. Beginning calculation of derivatives.") | |
#derivative_image, value_image = calculate_derivative(model) | |
#print("Creating new image.") | |
#create_new_image(derivative_image, value_image) | |
# The final image is saved as output.png |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment