brenns10 · December 12, 2024 14:26 · brenns10 · Oct 14, 2015
diff --git a/gambler.py b/gambler.py
 """
 Finds optimal strategy for gambling chip game.

 Problem: two players (Alice and Bob) play a game where gambling chips (with
 numeric values) are laid out in a line.  They alternate turns.  During their
 turn, a player picks up a chip on either end of the line, but not from anywhere
 in the middle, and adds that chip to their collection.  At the end, the player
 with the higher valued collection of chips wins.

 This is a dynamic programming algorithm for solving the problem.  It runs in
 O(n^2) time, where n is the length of the line of chips.  It finds the optimal
 strategy for the first player, assuming that the second player also uses the
 same, optimal strategy.

 This problem was originally presented in my EECS 477 (Advanced Algorithms,
 taught by Vincenzo Liberatore) class, as a homework problem.  The dynamic
 programming algorithm and implementation are both by Stephen Brennan, and as
 far as I'm concerned they're in the public domain.
 """

 from __future__ import print_function
 import numpy as np


 def gambler(chips):
    """
    Return a solution to the gambling chip game described above.

    The algorithm's "objective function" is the difference between the two
    players' scores.  This algorithm fills up a table containing the objective
    value of the best strategies for each sublist of the chips (from i to j).
    Once it completes, the best strategy is located at (0, len(chips)-1).  The
    function returns the best objective value along with the complete game
    table.

    :param chips: A list of numeric chip values.
    :returns: (optimal score, game table)
    """
    A = np.zeros((len(chips), len(chips)))

    for base in range(len(chips)):
        for offset in range(len(chips) - base):

            # We iterate through successive diagonals, because each table entry
            # depends on the entry down and to the left of it.
            i, j = offset, base + offset
            if i == j:
                # Base case is to choose the last available chip.
                A[i, j] = chips[i]
            else:
                # Otherwise, your objective value will be the chip you choose
                # minus your opponent's objective value.  Choose the chip that
                # maximizes your value.
                A[i, j] = max([chips[i] - A[i+1, j], chips[j] - A[i, j-1]])

    return A[0, len(chips) - 1], A


 def print_gambler_solution(chips, A, start=None):
    """
    Print to stdout the the solution for the gambler problem.

    :param chips: The list of chip values given to gambler().
    :param A: The game table returned by gambler().
    :param start: Where in the game to start from.  Defaults to 0, n-1.
    """
    if start is None:
        start = (0, len(chips) - 1)
    i, j = start

    my_move = True

    while i <= j:
        print('Game State: %r' % chips[i:j+1])

        prefix = 'ME' if my_move else 'OP'
        if A[i, j] == chips[i] - A[i+1, j]:
            print('%s: take left chip (%r)' % (prefix, chips[i]))
            i = i + 1
        else:
            print('%s: take right chip (%r)' % (prefix, chips[j]))
            j = j - 1

        my_move = not my_move

    print('Final ME - OP score: %r' % A[start])
	"""
	Finds optimal strategy for gambling chip game.

	Problem: two players (Alice and Bob) play a game where gambling chips (with
	numeric values) are laid out in a line. They alternate turns. During their
	turn, a player picks up a chip on either end of the line, but not from anywhere
	in the middle, and adds that chip to their collection. At the end, the player
	with the higher valued collection of chips wins.

	This is a dynamic programming algorithm for solving the problem. It runs in
	O(n^2) time, where n is the length of the line of chips. It finds the optimal
	strategy for the first player, assuming that the second player also uses the
	same, optimal strategy.

	This problem was originally presented in my EECS 477 (Advanced Algorithms,
	taught by Vincenzo Liberatore) class, as a homework problem. The dynamic
	programming algorithm and implementation are both by Stephen Brennan, and as
	far as I'm concerned they're in the public domain.
	"""

	from __future__ import print_function
	import numpy as np


	def gambler(chips):
	"""
	Return a solution to the gambling chip game described above.

	The algorithm's "objective function" is the difference between the two
	players' scores. This algorithm fills up a table containing the objective
	value of the best strategies for each sublist of the chips (from i to j).
	Once it completes, the best strategy is located at (0, len(chips)-1). The
	function returns the best objective value along with the complete game
	table.

	:param chips: A list of numeric chip values.
	:returns: (optimal score, game table)
	"""
	A = np.zeros((len(chips), len(chips)))

	for base in range(len(chips)):
	for offset in range(len(chips) - base):

	# We iterate through successive diagonals, because each table entry
	# depends on the entry down and to the left of it.
	i, j = offset, base + offset
	if i == j:
	# Base case is to choose the last available chip.
	A[i, j] = chips[i]
	else:
	# Otherwise, your objective value will be the chip you choose
	# minus your opponent's objective value. Choose the chip that
	# maximizes your value.
	A[i, j] = max([chips[i] - A[i+1, j], chips[j] - A[i, j-1]])

	return A[0, len(chips) - 1], A


	def print_gambler_solution(chips, A, start=None):
	"""
	Print to stdout the the solution for the gambler problem.

	:param chips: The list of chip values given to gambler().
	:param A: The game table returned by gambler().
	:param start: Where in the game to start from. Defaults to 0, n-1.
	"""
	if start is None:
	start = (0, len(chips) - 1)
	i, j = start

	my_move = True

	while i <= j:
	print('Game State: %r' % chips[i:j+1])

	prefix = 'ME' if my_move else 'OP'
	if A[i, j] == chips[i] - A[i+1, j]:
	print('%s: take left chip (%r)' % (prefix, chips[i]))
	i = i + 1
	else:
	print('%s: take right chip (%r)' % (prefix, chips[j]))
	j = j - 1

	my_move = not my_move

	print('Final ME - OP score: %r' % A[start])