pierrelux · July 19, 2019 17:59
diff --git a/accumulate_discounted.py b/accumulate_discounted.py
 import autograd.numpy as np
 from scipy.signal import lfilter
 from autograd.extend import primitive, defvjp

 @primitive
 def accumulate_discounted(rewards, discount=1.):
    """Behaves like `accumulate` but where each array element gets discounted.
    Args:
      rewards (np.ndarray): 1D array of rewards
      discount (float): Scalar discount factor
    Returns:
        np.ndarray: 1D array of the same size as `rewards`, where each element is the discounted
        sum of rewards onward.
    """
    if rewards.ndim == 1:
        rewards = np.expand_dims(rewards, 0)
    return lfilter([1], [1, -discount], x=rewards[:, ::-1])[:, ::-1]


 def accumulate_discounted_vjp(ans, rewards, discount=1.):
    """ Vector-Jacobian product wrt. to the "rewards" vector
    The output is $y_n = \\sum_{i=0}^n \\bar{x}_i \\gamma^{n-i}$ (a convolution).
    """
    del ans
    del rewards

    def _vjp(xbar):
        return lfilter([1], [1, -discount], x=xbar)
    return _vjp


 defvjp(accumulate_discounted, accumulate_discounted_vjp)
	import autograd.numpy as np
	from scipy.signal import lfilter
	from autograd.extend import primitive, defvjp

	@primitive
	def accumulate_discounted(rewards, discount=1.):
	"""Behaves like `accumulate` but where each array element gets discounted.
	Args:
	rewards (np.ndarray): 1D array of rewards
	discount (float): Scalar discount factor
	Returns:
	np.ndarray: 1D array of the same size as `rewards`, where each element is the discounted
	sum of rewards onward.
	"""
	if rewards.ndim == 1:
	rewards = np.expand_dims(rewards, 0)
	return lfilter([1], [1, -discount], x=rewards[:, ::-1])[:, ::-1]


	def accumulate_discounted_vjp(ans, rewards, discount=1.):
	""" Vector-Jacobian product wrt. to the "rewards" vector
	The output is $y_n = \\sum_{i=0}^n \\bar{x}_i \\gamma^{n-i}$ (a convolution).
	"""
	del ans
	del rewards

	def _vjp(xbar):
	return lfilter([1], [1, -discount], x=xbar)
	return _vjp


	defvjp(accumulate_discounted, accumulate_discounted_vjp)