-
-
Save mpasternak/b2289f0e9acf05852aba53cb2c92294b to your computer and use it in GitHub Desktop.
Reinforcement learning on multiple CPUs with Genetic Algorithm using PyGAD, PyTorch, Open AI Gym (CartPole) and multiprocessing.Pool
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import time | |
import gym | |
import numpy as np | |
import pygad.torchga | |
import pygad | |
import torch | |
import torch.nn as nn | |
from multiprocessing import Pool | |
def fitness_func(solution, sol_idx): | |
global model, observation_space_size, env | |
model_weights_dict = pygad.torchga.model_weights_as_dict(model=model, weights_vector=solution) | |
model.load_state_dict(model_weights_dict) | |
# play game | |
observation = env.reset() | |
sum_reward = 0 | |
done = False | |
while (not done) and (sum_reward < 1000): | |
# env.render() | |
ob_tensor = torch.tensor(observation.copy(), dtype=torch.float) | |
q_values = model(ob_tensor) | |
action = np.argmax(q_values).numpy() | |
observation_next, reward, done, info = env.step(action) | |
observation = observation_next | |
sum_reward += reward | |
return sum_reward | |
def callback_generation(ga_instance): | |
print("Generation = {generation}".format(generation=ga_instance.generations_completed)) | |
print("Fitness = {fitness}".format(fitness=ga_instance.best_solution()[1])) | |
def fitness_wrapper(solution): | |
return fitness_func(solution, 0) | |
class PooledGA(pygad.GA): | |
def cal_pop_fitness(self): | |
global pool | |
pop_fitness = pool.map(fitness_wrapper, self.population) | |
print(pop_fitness) | |
pop_fitness = np.array(pop_fitness) | |
return pop_fitness | |
env = gym.make("CartPole-v1") | |
observation_space_size = env.observation_space.shape[0] | |
action_space_size = env.action_space.n | |
torch.set_grad_enabled(False) | |
model = nn.Sequential( | |
nn.Linear(observation_space_size, 16), | |
nn.ReLU(), | |
nn.Linear(16, 16), | |
nn.ReLU(), | |
nn.Linear(16, action_space_size) | |
) | |
torch_ga = pygad.torchga.TorchGA(model=model, num_solutions=10) | |
# Prepare the PyGAD parameters. Check the documentation for more information: https://pygad.readthedocs.io/en/latest/README_pygad_ReadTheDocs.html#pygad-ga-class | |
num_generations = 50 # Number of generations. | |
num_parents_mating = 5 # Number of solutions to be selected as parents in the mating pool. | |
initial_population = torch_ga.population_weights # Initial population of network weights | |
parent_selection_type = "sss" # Type of parent selection. | |
crossover_type = "single_point" # Type of the crossover operator. | |
mutation_type = "random" # Type of the mutation operator. | |
mutation_percent_genes = 10 # Percentage of genes to mutate. This parameter has no action if the parameter mutation_num_genes exists. | |
keep_parents = -1 # Number of parents to keep in the next population. -1 means keep all parents and 0 means keep nothing. | |
start_time = time.time() | |
# ga_instance = pygad.GA(num_generations=num_generations, | |
# num_parents_mating=num_parents_mating, | |
# initial_population=initial_population, | |
# fitness_func=fitness_func, | |
# parent_selection_type=parent_selection_type, | |
# crossover_type=crossover_type, | |
# mutation_type=mutation_type, | |
# mutation_percent_genes=mutation_percent_genes, | |
# keep_parents=keep_parents, | |
# on_generation=callback_generation) | |
# | |
# ga_instance.run() | |
ga_instance = PooledGA(num_generations=num_generations, | |
num_parents_mating=num_parents_mating, | |
initial_population=initial_population, | |
fitness_func=fitness_func, | |
parent_selection_type=parent_selection_type, | |
crossover_type=crossover_type, | |
mutation_type=mutation_type, | |
mutation_percent_genes=mutation_percent_genes, | |
keep_parents=keep_parents, | |
on_generation=callback_generation) | |
with Pool(processes=10) as pool: | |
ga_instance.run() | |
solution, solution_fitness, solution_idx = ga_instance.best_solution() | |
print("Fitness value of the best solution = {solution_fitness}".format(solution_fitness=solution_fitness)) | |
print("Index of the best solution : {solution_idx}".format(solution_idx=solution_idx)) | |
print("--- %s seconds ---" % (time.time() - start_time)) | |
model_weights_dict = pygad.torchga.model_weights_as_dict(model=model, weights_vector=solution) | |
model.load_state_dict(model_weights_dict) | |
# play game | |
observation = env.reset() | |
sum_reward = 0 | |
done = False | |
while not done: | |
env.render() | |
ob_tensor = torch.tensor(observation.copy(), dtype=torch.float) | |
q_values = model(ob_tensor) | |
action = np.argmax(q_values).numpy() | |
observation_next, reward, done, info = env.step(action) | |
observation = observation_next | |
sum_reward += reward | |
print("Sum reward: " + str(sum_reward)) | |
# After the generations complete, some plots are showed that summarize how the outputs/fitness values evolve over generations. | |
ga_instance.plot_result(title="PyGAD & Keras - Iteration vs. Fitness", linewidth=4) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment