Last active
May 19, 2019 11:24
Revisions
-
zoq revised this gist
May 19, 2019 . 1 changed file with 10 additions and 10 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -8,10 +8,10 @@ class Task // This is just a super simple example that shows how to set the input // values. genome.Input() = arma::vec("1 2 0.3"); // Calculate the fitness using the output. genome.Output(); double loss = ..; return loss } @@ -26,13 +26,13 @@ class Task Task(EnvironmentType& environment) : environment(environment) { } double Evalaute(Genome& genome) { // Set the initial state. State state = environment.InitialSample(); genome.Input() = state.Data(); double loss = 0; while (!environment.IsTerminal()) { @@ -43,16 +43,16 @@ class Task // number of actions we could search for the maximum // value and return the index of that value in the vector. Action action = static_cast<Action>(genome.Output()[0]); // Use the current action to get the next state. loss += environment.Sample(state, action, state); // Update the state of the genome for the next step. genome.Input() = state.Data(); } return loss; } private: EnvironmentType environment; } -
zoq revised this gist
May 19, 2019 . 1 changed file with 10 additions and 10 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -3,18 +3,18 @@ */ class Task { double Evalaute(Genome& genome) { // This is just a super simple example that shows how to set the input // values. genome.Input() = arma::vec("1 2 0.3"); // Calculate the fitness using the output. genome.Output(); double loss = ..; return loss } } /** -
zoq created this gist
May 19, 2019 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,58 @@ /** * Main task idea. */ class Task { double Evalaute(Genome& genome) { // This is just a super simple example that shows how to set the input // values. genome.Input() = arma::vec("1 2 0.3"); // Calculate the fitness using the output. genome.Output(); double loss = ..; return loss } } /** * Task idea that wraps the RL environment. */ template<class EnvironmentType> class Task { Task(EnvironmentType& environment) : environment(environment) { } double Evalaute(Genome& genome) { // Set the initial state. State state = environment.InitialSample(); genome.Input() = state.Data(); double loss = 0; while (!environment.IsTerminal()) { // In this case we expect that genome.Output() // returns a vector of length 1, that does represent // the action to take at this state. // In case genome will return a vector of length // number of actions we could search for the maximum // value and return the index of that value in the vector. Action action = static_cast<Action>(genome.Output()[0]); // Use the current action to get the next state. loss += environment.Sample(state, action, state); // Update the state of the genome for the next step. genome.Input() = state.Data(); } return loss; } private: EnvironmentType environment; }