Commit 583c7dc5 authored by Julian Rudolf's avatar Julian Rudolf
Browse files

working agent (tries to collect all apples) with updated rewards

parent 4f810ade
......@@ -233,7 +233,7 @@ alpha = 0.1
gamma = 0.5
epsilon = 0.6
rounds = 800
agent, q0, poss_dirs0 = learning(alpha, gamma, epsilon, rounds)
#agent, q0, poss_dirs0 = learning(alpha, gamma, epsilon, rounds)
#save_qtable(agent.qTable)
#testing(agent, 50, q0, poss_dirs0, True)
#testing(agent, 50, q0, poss_dirs0)
......
......@@ -15,22 +15,17 @@ class State:
# kill_l, kill_r, kill_u, kill_d :
# describes in which direction a crash would be inevitable
# binary: 0 no crash || 1 crash
def __init__(self, df_l_, df_r_, df_u_, df_d_, kill_l_, kill_r_, kill_u_, kill_d_):
def __init__(self, df_l_, df_r_, df_u_, df_d_):
self.df_l = df_l_
self.df_r = df_r_
self.df_d = df_d_
self.df_u = df_u_
self.kill_l = kill_l_
self.kill_r = kill_r_
self.kill_d = kill_d_
self.kill_u = kill_u_
def __hash__(self):
return hash((self.df_l, self.df_r, self.df_d, self.df_u, self.kill_l, self.kill_r, self.kill_d, self.kill_u))
return hash((self.df_l, self.df_r, self.df_d, self.df_u))
def __eq__(self, other):
return (self.df_l, self.df_r, self.df_d, self.df_u, self.kill_l, self.kill_r, self.kill_d, self.kill_u) == \
(other.df_l, other.df_r, other.df_d, other.df_u, other.kill_l, other.kill_r, other.kill_d, other.kill_u)
return (self.df_l, self.df_r, self.df_d, self.df_u) == (other.df_l, other.df_r, other.df_d, other.df_u)
# checks which directions are possible
......@@ -255,7 +250,7 @@ def calc_state_space(snake_agent, snake_enemy, apples):
if dfl == -1 or dfd == -1 or dfu == -1 or dfr == -1:
assert False, 'directions were not set'
return State(dfl, dfr, dfu, dfd, killl, killr, killu, killd)
return State(dfl, dfr, dfu, dfd)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment