codefever · October 4, 2019 09:21
diff --git a/bfs.py b/bfs.py
 #!/usr/bin/env python

 from enum import IntEnum
 import math
 import random
 import logging

 logging.basicConfig(level=logging.INFO)


 GRID = [
    [0,0,0,0,0],
    [1,1,1,0,1],
    [1,0,0,0,1],
    [0,0,1,1,1],
    [1,0,0,0,0],
 ]


 class Direction(IntEnum):
    LEFT = 0
    RIGHT = 1
    TOP = 2
    DOWN = 3


 DIRS = {
    Direction.LEFT: (0,-1),
    Direction.RIGHT: (0,1),
    Direction.TOP: (-1,0),
    Direction.DOWN: (1,0),
 }


 def _get_dir(src, dst):
    if src[1] != dst[1]:
        return Direction.LEFT if src[1] - dst[1] == 1 else Direction.RIGHT
    else:
        return Direction.TOP if src[0] - dst[0] == 1 else Direction.DOWN


 def neighbors(grid, y, x):
    for d in DIRS.values():
        yy = y + d[0]
        xx = x + d[1]
        if yy >= 0 and xx >= 0 and yy < len(grid) and xx < len(grid[0]):
            if grid[yy][xx] > 0: continue
            yield yy, xx


 def bfs(grid):
    start = (0, 0)
    end = (len(grid)-1, len(grid[0])-1)
    visited = [[False for _ in range(len(grid[0]))] for _ in range(len(grid))]
    visited[start[0]][start[1]] = True
    q = [start]
    dist = 0
    while q:
        new_q = []
        while q:
            origin, q = q[0], q[1:]
            if origin == end:
                return dist
            for nn in neighbors(grid, origin[0], origin[1]):
                if visited[nn[0]][nn[1]]: continue
                visited[nn[0]][nn[1]] = True
                new_q.append(nn)
        dist += 1
        q = new_q
    return -1


 def qlearning(grid):
    m, n = len(grid), len(grid[0])
    end = (m-1, n-1)
    start = (0, 0)
    qtable = [[[0.0]*len(Direction) for _ in range(n)] for _ in range(m)]

    def reward(node, todir):
        d = DIRS[todir]
        if (node[0] + d[0], node[1] + d[1]) == end:
            # if it could reach the end, it would gain a greate bounus.
            return m*n
        return 1.0

    def get_path():
        node = start
        path = []
        steps = 0
        max_steps = m*n
        while node != end:
            val = -float('infinity')
            candidate = None
            todir = None
            for nn in neighbors(grid, node[0], node[1]):
                d = _get_dir(node, nn)
                if qtable[node[0]][node[1]][d.value] > val:
                    val = qtable[node[0]][node[1]][d.value]
                    candidate = nn
                    todir = d

            path.append(todir.name[0])
            node = candidate
            steps += 1
            if steps >= max_steps:
                return None
        return ''.join(path)

    gramma = 0.777
    alpha = 0.5
    num_episodes = 20
    for i in range(num_episodes):
        node = start
        steps = 0
        eprate = math.exp(-i*0.1)  # exploit if >
        while steps < 100 and node != end:
            valid_nn = list(neighbors(grid, node[0], node[1]))
            if random.random() > eprate:  # exploitation
                val = -float('infinity')
                todir = None
                for tmp in valid_nn:
                    tmp_dir = _get_dir(node, tmp)
                    if qtable[node[0]][node[1]][tmp_dir] > val:
                        nn = tmp
                        todir = tmp_dir
            else:  # explorartion
                nn = random.choice(valid_nn)
                todir = _get_dir(node, nn)

            # update table
            valid_tmps = [qtable[nn[0]][nn[1]][_get_dir(nn, tmp).value] for tmp in neighbors(grid, nn[0], nn[1])]
            qtable[node[0]][node[1]][todir.value] += alpha * (reward(node, todir) + gramma * max(valid_tmps) - qtable[node[0]][node[1]][todir.value])
            # assert nn != node
            node = nn
            steps += 1

        path = get_path()
        logging.debug('episode[{}] exploit=[{}] reach_end=[{}], path=[{}], steps=[{}]'.format(i, eprate, node==end, path, len(path) if path else 'INF'))

    path = get_path()
    return len(path) if path else -1


 if __name__ == '__main__':
    print(bfs(GRID))
    print(qlearning(GRID))
	#!/usr/bin/env python

	from enum import IntEnum
	import math
	import random
	import logging

	logging.basicConfig(level=logging.INFO)


	GRID = [
	[0,0,0,0,0],
	[1,1,1,0,1],
	[1,0,0,0,1],
	[0,0,1,1,1],
	[1,0,0,0,0],
	]


	class Direction(IntEnum):
	LEFT = 0
	RIGHT = 1
	TOP = 2
	DOWN = 3


	DIRS = {
	Direction.LEFT: (0,-1),
	Direction.RIGHT: (0,1),
	Direction.TOP: (-1,0),
	Direction.DOWN: (1,0),
	}


	def _get_dir(src, dst):
	if src[1] != dst[1]:
	return Direction.LEFT if src[1] - dst[1] == 1 else Direction.RIGHT
	else:
	return Direction.TOP if src[0] - dst[0] == 1 else Direction.DOWN


	def neighbors(grid, y, x):
	for d in DIRS.values():
	yy = y + d[0]
	xx = x + d[1]
	if yy >= 0 and xx >= 0 and yy < len(grid) and xx < len(grid[0]):
	if grid[yy][xx] > 0: continue
	yield yy, xx


	def bfs(grid):
	start = (0, 0)
	end = (len(grid)-1, len(grid[0])-1)
	visited = [[False for _ in range(len(grid[0]))] for _ in range(len(grid))]
	visited[start[0]][start[1]] = True
	q = [start]
	dist = 0
	while q:
	new_q = []
	while q:
	origin, q = q[0], q[1:]
	if origin == end:
	return dist
	for nn in neighbors(grid, origin[0], origin[1]):
	if visited[nn[0]][nn[1]]: continue
	visited[nn[0]][nn[1]] = True
	new_q.append(nn)
	dist += 1
	q = new_q
	return -1


	def qlearning(grid):
	m, n = len(grid), len(grid[0])
	end = (m-1, n-1)
	start = (0, 0)
	qtable = [[[0.0]*len(Direction) for _ in range(n)] for _ in range(m)]

	def reward(node, todir):
	d = DIRS[todir]
	if (node[0] + d[0], node[1] + d[1]) == end:
	# if it could reach the end, it would gain a greate bounus.
	return m*n
	return 1.0

	def get_path():
	node = start
	path = []
	steps = 0
	max_steps = m*n
	while node != end:
	val = -float('infinity')
	candidate = None
	todir = None
	for nn in neighbors(grid, node[0], node[1]):
	d = _get_dir(node, nn)
	if qtable[node[0]][node[1]][d.value] > val:
	val = qtable[node[0]][node[1]][d.value]
	candidate = nn
	todir = d

	path.append(todir.name[0])
	node = candidate
	steps += 1
	if steps >= max_steps:
	return None
	return ''.join(path)

	gramma = 0.777
	alpha = 0.5
	num_episodes = 20
	for i in range(num_episodes):
	node = start
	steps = 0
	eprate = math.exp(-i*0.1) # exploit if >
	while steps < 100 and node != end:
	valid_nn = list(neighbors(grid, node[0], node[1]))
	if random.random() > eprate: # exploitation
	val = -float('infinity')
	todir = None
	for tmp in valid_nn:
	tmp_dir = _get_dir(node, tmp)
	if qtable[node[0]][node[1]][tmp_dir] > val:
	nn = tmp
	todir = tmp_dir
	else: # explorartion
	nn = random.choice(valid_nn)
	todir = _get_dir(node, nn)

	# update table
	valid_tmps = [qtable[nn[0]][nn[1]][_get_dir(nn, tmp).value] for tmp in neighbors(grid, nn[0], nn[1])]
	qtable[node[0]][node[1]][todir.value] += alpha * (reward(node, todir) + gramma * max(valid_tmps) - qtable[node[0]][node[1]][todir.value])
	# assert nn != node
	node = nn
	steps += 1

	path = get_path()
	logging.debug('episode[{}] exploit=[{}] reach_end=[{}], path=[{}], steps=[{}]'.format(i, eprate, node==end, path, len(path) if path else 'INF'))

	path = get_path()
	return len(path) if path else -1


	if __name__ == '__main__':
	print(bfs(GRID))
	print(qlearning(GRID))