Commit 5e11cfcc authored by Julian Rudolf's avatar Julian Rudolf
Browse files

finished functions to calculate state space

started with qlearning implementations
parent 079ac989
from snake_logic import step, reset, render, init_game
from state import State
class QLearning:
# contains all parameters needed for qlearning
# alpha: learning rate
# gamma: discount factor
# qTable state x action
def __init__(self, alpha_, r_, g_):
self.alpha = alpha_
self.gamma = g_
self.qTable = {}
# calculates new qTable entry
def update(self, state, next_state, action, reward):
self.qTable[(state, action)] = (1 - self.alpha) * self.qTable[(state, action)] + \
self.alpha * reward + \
self.alpha * self.gamma * self.max_val(next_state)
# calculates the max qTable value for a given state
def max_val(self, state):
poss = []
actions = possible_actions(state)
for action in actions:
poss.append(self.qTable[(state, action)])
return poss.max()
# returns all possible actions
def possible_actions(state):
poss_actions = []
if not state.kill_l:
poss_actions.append("left")
if not state.kill_r:
poss_actions.append("right")
if not state.kill_u:
poss_actions.append("up")
if not state.kill_d:
poss_actions.append("down")
return poss_actions
init_game()
render()
step("right")
s, r, round_over = step("down")
render()
print("dfl, dfr, dfu, dfd, killl, killr, killu, killd")
print(s.df_l, s.df_r, s.df_u, s.df_d, " ", s.kill_l, s.kill_r, s.kill_u, s.kill_d)
import gym
from gym import error, spaces, utils
from gym.utils import seeding
import Agent.snake_logic_old as logic
import Agent.snake_logic as logic
observation_map = [30, 30]
......
This diff is collapsed.
import random
import Game.maps as maps
map = maps.map3
block_size = 30
head_e = 0
dir_e = 0
class State:
# parameters:
# df_l, df_r, df_u, df_d :
# ranks in which direction (left, right, down, up) the nearest apple is
# from 0 (nearest) to 3 (furthest)
# kill_l, kill_r, kill_u, kill_d :
# describes in which direction a crash would be inevitable
# binary: 0 no crash || 1 crash
def __init__(self, df_l_, df_r_, df_u_, df_d_, kill_l_, kill_r_, kill_u_, kill_d_):
self.df_l = df_l_
self.df_r = df_r_
self.df_d = df_d_
self.df_u = df_u_
self.kill_l = kill_l_
self.kill_r = kill_r_
self.kill_d = kill_d_
self.kill_u = kill_u_
# checks which directions are possible
def get_surround(x, y):
l, r, u, d = 0, 0, 0, 0
x = int(x)
y = int(y)
if map[y][x] == 1:
assert False, 'position not path'
if x < 29:
if map[y][x+1] == 0:
r = 1
elif x == 29:
r = 1
if y < 29:
if map[y+1][x] == 0:
d = 1
elif y == 29:
d = 1
if x > 0:
if map[y][x-1] == 0:
l = 1
elif x == 0:
l = 1
if y > 0:
if map[y-1][x] == 0:
u = 1
elif y == 0:
u = 1
return l, r, u, d
# searches recursive for apples in one direction and returns steps
# saves the steps traveled in a global var if smaller
# if traveling longer then max_steps, abort because smallest amount of steps is needed
def search(dir, head, apples, steps, max_steps=50, kill=0, crossing=False):
cont = True
x = head[1]
y = head[0]
if dir == "up":
while cont:
if y == 0:
y = 29
else:
y -= 1
steps += 1
if head_e == (y, x) and dir_e == "down" and not crossing:
kill = 1
for pos in apples:
if pos == (x, y):
max_steps = steps
return max_steps, kill
if steps > max_steps:
return max_steps, kill
l, r, u, d = get_surround(x, y)
if l+r+u+d > 2:
crossing = True
if u == 0:
cont = False
if l == 1:
max_steps, kill = search("left", (y, x), apples, steps, max_steps, kill, crossing)
if r == 1:
max_steps, kill = search("right", (y, x), apples, steps, max_steps, kill, crossing)
elif dir == "down":
while cont:
if y == 29:
y = 0
else:
y += 1
steps += 1
if head_e == (y, x) and dir_e == "up" and not crossing:
kill = 1
for pos in apples:
if pos == (x, y):
max_steps = steps
return max_steps, kill
if steps > max_steps:
return max_steps, kill
l, r, u, d = get_surround(x, y)
if l+r+u+d > 2:
crossing = True
if d == 0:
cont = False
if l == 1:
max_steps, kill = search("left", (y, x), apples, steps, max_steps, kill, crossing)
if r == 1:
max_steps, kill = search("right", (y, x), apples, steps, max_steps, kill, crossing)
elif dir == "left":
while cont:
if x == 0:
x = 29
else:
x -= 1
steps += 1
if head_e == (y, x) and dir_e == "right" and not crossing:
kill = 1
for pos in apples:
if pos == (x, y):
max_steps = steps
return max_steps, kill
if steps > max_steps:
return max_steps, kill
l, r, u, d = get_surround(x, y)
if l+r+u+d > 2:
crossing = True
if l == 0:
cont = False
if d == 1:
max_steps, kill = search("down", (y, x), apples, steps, max_steps, kill, crossing)
if u == 1:
max_steps, kill = search("up", (y, x), apples, steps, max_steps, kill, crossing)
elif dir == "right":
while cont:
if x == 29:
x = 0
else:
x += 1
steps += 1
if head_e == (y, x) and dir_e == "left" and not crossing:
kill = 1
for pos in apples:
if pos == (x, y):
max_steps = steps
return max_steps, kill
if steps > max_steps:
return max_steps, kill
l, r, u, d = get_surround(x, y)
if l+r+u+d > 2:
crossing = True
if r == 0:
cont = False
if d == 1:
max_steps, kill = search("down", (y, x), apples, steps, max_steps, kill, crossing)
if u == 1:
max_steps, kill = search("up", (y, x), apples, steps, max_steps, kill, crossing)
else:
assert False, 'direction unknown'
return max_steps, kill
# calculates nearest apple from all directions
# determines if enemy snake travels towards agent snake on neighboring edges
# returns a Feature object
def calc_state_space(snake_agent, snake_enemy, apples):
global head_e
global dir_e
head = snake_agent.pos[1] / block_size, snake_agent.pos[0] / block_size
dir = snake_agent.direction
head_e = snake_enemy.pos[1] / block_size, snake_enemy.pos[0] / block_size
dir_e = snake_enemy.direction
# head = snake_agent
# dir = "left"
# head_e = snake_enemy
# dir_e = "up"
dfl, dfr, dfu, dfd = -1, -1, -1, -1
sl, sr, su, sd = 100, 101, 102, 103
l, r, u, d = get_surround(head[1], head[0])
if dir == "up":
sd = 103
killd = 1
if l: sl, killl = search("left", head, apples, 0)
else: killl = 1
if r: sr, killr = search("right", head, apples, 0)
else: killr = 1
if u: su, killu = search("up", head, apples, 0)
else: killu = 1
elif dir == "down":
su = 102
killu = 1
if l: sl, killl = search("left", head, apples, 0)
else: killl = 1
if r: sr, killr = search("right", head, apples, 0)
else: killr = 1
if d: sd, killd = search("down", head, apples, 0)
else: killd = 1
elif dir == "left":
sr = 101
killr = 1
if l: sl, killl = search("left", head, apples, 0)
else: killl = 1
if d: sd, killd = search("down", head, apples, 0)
else: killd = 1
if u: su, killu = search("up", head, apples, 0)
else: killu = 1
elif dir == "right":
sl = 100
killl = 1
if d: sd, killd = search("down", head, apples, 0)
else: killd = 1
if r: sr, killr = search("right", head, apples, 0)
else: killr = 1
if u: su, killu = search("up", head, apples, 0)
else: killu = 1
else:
assert False, 'direction unknown'
dirs = sorted([sl, sd, sr, su])
for i in range(4):
if dirs[i] == sl and dfl == -1:
dfl = i
elif dirs[i] == sr and dfr == -1:
dfr = i
elif dirs[i] == sd and dfd == -1:
dfd = i
elif dirs[i] == su and dfu == -1:
dfu = i
# print("Number of steps in each direction:")
# print("l, r, u, d")
# print(str(sl) + ", " + str(sr) + ", " + str(su) + ", " + str(sd))
# print(dir)
# print("Ranking:")
# print(str(dfl) + ", " + str(dfr) + ", " + str(dfu) + ", " + str(dfd))
# print("Kill:")
# print(killl, killr, killu, killd)
if dfl == -1 or dfd == -1 or dfu == -1 or dfr == -1:
assert False, 'directions were not set'
return State(dfl, dfr, dfu, dfd, killl, killr, killu, killd)
# head = (5,6)
# enemy = (8,6)
# app = ((17,28), (20,9), (17,29))
# f = calc_state_space(head, enemy, app)
# print("dfl, dfr, dfu, dfd, killl, killr, killu, killd")
# print(f.df_l, f.df_r, f.df_u, f.df_d, " ", f.kill_l, f.kill_r, f.kill_u, f.kill_d)
......@@ -145,6 +145,37 @@ map3 = [[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1],
[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1],
[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1]]
# map3 = [[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1],
# [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1],
# [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,A,0,0,0,0,0,0,0,0,0,0,0,A],
# [1,0,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,0,1],
# [1,0,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,0,1],
# [1,0,o,0,0,0,0,0,0,S,0,0,0,0,0,0,0,0,0,0,A,0,0,0,0,0,0,0,0,1],
# [1,1,1,1,1,1,0,1,1,1,1,1,0,1,1,1,1,1,0,1,1,1,1,0,1,1,1,1,1,1],
# [1,1,1,1,1,1,0,1,1,1,1,1,0,1,1,1,1,1,0,1,1,1,1,0,1,1,1,1,1,1],
# [1,1,1,1,1,1,0,1,1,1,1,1,0,1,1,1,1,1,0,1,1,1,1,0,1,1,1,1,1,1],
# [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,A,0,0,0,0,0,0,0,0,0],
# [1,0,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,0,1],
# [1,0,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,0,1],
# [1,0,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,0,1],
# [1,0,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,0,1],
# [1,0,0,0,0,0,0,0,0,0,A,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1],
# [1,1,1,0,1,1,0,1,1,1,1,1,0,1,1,1,1,1,0,1,1,1,1,0,1,1,0,1,1,1],
# [1,1,0,0,1,1,0,1,1,1,1,1,0,1,1,1,1,1,0,1,1,1,1,0,1,1,0,0,1,1],
# [1,0,0,1,1,1,0,1,1,1,1,1,0,1,1,1,1,1,0,1,1,1,1,0,1,1,1,0,0,1],
# [1,0,1,1,1,1,0,1,1,1,1,1,0,1,1,1,1,1,0,1,1,1,1,0,1,1,1,1,0,1],
# [1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1],
# [1,0,1,1,1,1,1,1,1,0,1,1,1,1,0,1,1,1,1,1,0,1,1,1,1,1,1,1,0,1],
# [1,0,1,1,1,1,1,1,1,0,1,1,1,1,0,1,1,1,1,1,0,1,1,1,1,1,1,1,0,1],
# [1,0,1,1,1,1,1,1,1,0,1,1,1,1,0,1,1,1,1,1,0,1,1,1,1,1,1,1,0,1],
# [1,0,1,1,1,1,1,1,1,0,1,1,1,1,0,1,1,1,1,1,0,1,1,1,1,1,1,1,0,1],
# [1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1],
# [1,0,1,1,1,1,0,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,0,1,1,1,1,0,1],
# [1,0,1,1,1,1,0,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,0,1,1,1,1,0,1],
# [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
# [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1],
# [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,A,1,1,1,1,1,1,1,1,1,1,1,1]]
# startpos4 = [[840, 420], [30, 420]]
# map4 = [[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1],
# [1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1],
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment