Created
December 20, 2018 05:10
-
-
Save tomwwright/184b5ce92f75f8c7d706a9a1476f7a2f to your computer and use it in GitHub Desktop.
Medium : AWS DeepRacer
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
THROTTLE_PENALTY_FACTOR = 0.5 | |
STEERING_PENALTY_FACTOR = 0.5 | |
def reward_function(on_track, x, y, distance_from_center, car_orientation, progress, steps, throttle, steering, track_width, waypoints, closest_waypoint): | |
''' | |
@on_track (boolean) :: The vehicle is off-track if the front of the vehicle is outside of the white | |
lines | |
@x (float range: [0, 1]) :: Fraction of where the car is along the x-axis. 1 indicates | |
max 'x' value in the coordinate system. | |
@y (float range: [0, 1]) :: Fraction of where the car is along the y-axis. 1 indicates | |
max 'y' value in the coordinate system. | |
@distance_from_center (float [0, track_width/2]) :: Displacement from the center line of the track | |
as defined by way points | |
@car_orientation (float: [-3.14, 3.14]) :: yaw of the car with respect to the car's x-axis in | |
radians | |
@progress (float: [0,1]) :: % of track complete | |
@steps (int) :: numbers of steps completed | |
@throttle :: (float) 0 to 1 (0 indicates stop, 1 max throttle) | |
@steering :: (float) -1 to 1 (-1 is right, 1 is left) | |
@track_width (float) :: width of the track (> 0) | |
@waypoints (ordered list) :: list of waypoint in order; each waypoint is a set of coordinates | |
(x,y,yaw) that define a turning point | |
@closest_waypoint (int) :: index of the closest waypoint (0-indexed) given the car's x,y | |
position as measured by the eucliedean distance | |
@@output: @reward (float [-1e5, 1e5]) | |
''' | |
import math | |
MAX_REWARD = 1 | |
# base reward is distance from center normalised to [0,1] | |
percent_towards_edge = distance_from_center / (track_width / 2) | |
reward = MAX_REWARD * (1 - percent_towards_edge) | |
# penalty if we ain't throttling brah | |
reward -= (1 - throttle) * THROTTLE_PENALTY_FACTOR | |
# penalty for steering | |
reward -= abs(steering) * STEERING_PENALTY_FACTOR | |
return float(reward) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment