tomwwright · December 20, 2018 05:10
diff --git a/deepracer.py b/deepracer.py
 THROTTLE_PENALTY_FACTOR = 0.5
 STEERING_PENALTY_FACTOR = 0.5

 def reward_function(on_track, x, y, distance_from_center, car_orientation, progress, steps, throttle, steering, track_width, waypoints, closest_waypoint):
    '''
    @on_track (boolean) :: The vehicle is off-track if the front of the vehicle is outside of the white
    lines

    @x (float range: [0, 1]) :: Fraction of where the car is along the x-axis. 1 indicates
    max 'x' value in the coordinate system.

    @y (float range: [0, 1]) :: Fraction of where the car is along the y-axis. 1 indicates
    max 'y' value in the coordinate system.

    @distance_from_center (float [0, track_width/2]) :: Displacement from the center line of the track
    as defined by way points

    @car_orientation (float: [-3.14, 3.14]) :: yaw of the car with respect to the car's x-axis in
    radians

    @progress (float: [0,1]) :: % of track complete

    @steps (int) :: numbers of steps completed

    @throttle :: (float) 0 to 1 (0 indicates stop, 1 max throttle)

    @steering :: (float) -1 to 1 (-1 is right, 1 is left)

    @track_width (float) :: width of the track (> 0)

    @waypoints (ordered list) :: list of waypoint in order; each waypoint is a set of coordinates
    (x,y,yaw) that define a turning point

    @closest_waypoint (int) :: index of the closest waypoint (0-indexed) given the car's x,y
    position as measured by the eucliedean distance

    @@output: @reward (float [-1e5, 1e5])
    '''

    import math

    MAX_REWARD = 1

    # base reward is distance from center normalised to [0,1]
    percent_towards_edge = distance_from_center / (track_width / 2)
    reward = MAX_REWARD * (1 - percent_towards_edge)

    # penalty if we ain't throttling brah
    reward -= (1 - throttle) * THROTTLE_PENALTY_FACTOR

    # penalty for steering
    reward -= abs(steering) * STEERING_PENALTY_FACTOR

    return float(reward)
	THROTTLE_PENALTY_FACTOR = 0.5
	STEERING_PENALTY_FACTOR = 0.5

	def reward_function(on_track, x, y, distance_from_center, car_orientation, progress, steps, throttle, steering, track_width, waypoints, closest_waypoint):
	'''
	@on_track (boolean) :: The vehicle is off-track if the front of the vehicle is outside of the white
	lines

	@x (float range: [0, 1]) :: Fraction of where the car is along the x-axis. 1 indicates
	max 'x' value in the coordinate system.

	@y (float range: [0, 1]) :: Fraction of where the car is along the y-axis. 1 indicates
	max 'y' value in the coordinate system.

	@distance_from_center (float [0, track_width/2]) :: Displacement from the center line of the track
	as defined by way points

	@car_orientation (float: [-3.14, 3.14]) :: yaw of the car with respect to the car's x-axis in
	radians

	@progress (float: [0,1]) :: % of track complete

	@steps (int) :: numbers of steps completed

	@throttle :: (float) 0 to 1 (0 indicates stop, 1 max throttle)

	@steering :: (float) -1 to 1 (-1 is right, 1 is left)

	@track_width (float) :: width of the track (> 0)

	@waypoints (ordered list) :: list of waypoint in order; each waypoint is a set of coordinates
	(x,y,yaw) that define a turning point

	@closest_waypoint (int) :: index of the closest waypoint (0-indexed) given the car's x,y
	position as measured by the eucliedean distance

	@@output: @reward (float [-1e5, 1e5])
	'''

	import math

	MAX_REWARD = 1

	# base reward is distance from center normalised to [0,1]
	percent_towards_edge = distance_from_center / (track_width / 2)
	reward = MAX_REWARD * (1 - percent_towards_edge)

	# penalty if we ain't throttling brah
	reward -= (1 - throttle) * THROTTLE_PENALTY_FACTOR

	# penalty for steering
	reward -= abs(steering) * STEERING_PENALTY_FACTOR

	return float(reward)