Commit 4f810ade authored by Julian Rudolf's avatar Julian Rudolf
Browse files

agent with kill direction in state (doesnt really work)

parent 1946e957
......@@ -29,13 +29,8 @@ class QLearning:
def update(self, state, next_state, action, reward):
old_val = self.qTable[(state, action)]
self.qTable[(state, action)] = (1 - self.alpha) * self.qTable[(state, action)] + \
self.alpha * reward + \
self.alpha * self.gamma * self.max_val(next_state)
#print(state, ", ", action, " = ", old_val, "->", self.qTable[(state, action)])
#print(self.qTable[(state, action)], "->", (1 - self.alpha), "*", old_val, " + ", self.alpha, "*", reward, " + ", self.alpha, "*", self.gamma, "*", self.max_val(next_state))
#print(action)
#print("----------------------------------")
self.alpha * reward + \
self.alpha * self.gamma * self.max_val(next_state)
# calculates the max qTable value for a given state
def max_val(self, state):
......@@ -54,18 +49,19 @@ class QLearning:
best_val = -100000
best_dir = "none"
for dir in poss_actions:
#print(dir, "-> ", end='')
if (state, dir) in self.qTable:
val = self.qTable[(state, dir)]
if val > best_val:
best_dir = dir
best_val = val
else:
if (state, dir) not in self.qTable:
self.qTable[(state, dir)] = 0
val = self.qTable[(state, dir)]
#print(dir, " -> ", val)
if val > best_val:
best_dir = dir
best_val = val
if best_val == -100000:
print("new state -> random")
# print("dfl, dfr, dfu, dfd, killl, killr, killu, killd")
# print(state.df_l, state.df_r, state.df_u, state.df_d)#, " ", state.kill_l, state.kill_r, state.kill_u, state.kill_d)
# print("Action choosen: ", best_dir)
# print(self.epsilon)
# print("-----------------------------------------------------")
return best_dir
# function to choose which direction to travel
......@@ -73,10 +69,8 @@ class QLearning:
poss_actions = self.possible_directions()
rand_action = random.choice(poss_actions)
best_direction = self.best_direction(state, poss_actions)
if best_direction == "none":
best_direction = rand_action
self.epsilon = self.epsilon - 0.0001
self.epsilon = self.epsilon - 0.00005
if random.random() > self.epsilon:
#print("agent chose ", best_direction)
return best_direction
......@@ -146,8 +140,6 @@ def learning(al, ga, ep, games):
# print("Epsilon = ", qagent.epsilon)
# #render()
# print("-------------------------------------------")
# qagent.print_table()
# print(qagent.qTable)
print("Score after learing: ", score)
return qagent, q_0, poss_dirs_0
......@@ -200,6 +192,7 @@ def testing(qtable, games, q_0, poss_dirs_0, rand=False):
print("Starting testing process!")
for i in range(max_games):
playerwin = play_game_testing(qtable, q_0, poss_dirs_0, rand)
q_0, poss_dirs_0 = reset()
print("Testing Game ", i+1, "/", max_games, " finished!")
if playerwin == 1:
score[0] += 1
......@@ -238,10 +231,10 @@ def load_qtable():
alpha = 0.1
gamma = 0.5
epsilon = 0.3
rounds = 600
epsilon = 0.6
rounds = 800
agent, q0, poss_dirs0 = learning(alpha, gamma, epsilon, rounds)
save_qtable(agent.qTable)
#save_qtable(agent.qTable)
#testing(agent, 50, q0, poss_dirs0, True)
#testing(agent, 50, q0, poss_dirs0)
......@@ -510,14 +510,12 @@ def step(action):
# print("Snake 1 wins")
# print(overall_score)
# print("Apples eaten: ", snake1.getscore())
reward += 100
if playerwin == 2 or (playerwin == 3 and snake2.getscore() > snake1.getscore()):
overall_score[1] += 1
# print("-------------------------------------------")
# print("Snake 2 wins")
# print(overall_score)
# print("Apples eaten: ", snake1.getscore())
reward -= 100
if playerwin == 3 and snake1.getscore() == snake2.getscore():
# print("Tie")
reward = 0
......@@ -576,8 +574,6 @@ def step(action):
playerwin = 2
if check_if_crossing(snake1.pos[0] / block_size, snake1.pos[1] / block_size):
if not init:
reward -= 1
agent_apples = []
for a, id in apples:
for apple in a:
......
......@@ -258,6 +258,7 @@ def calc_state_space(snake_agent, snake_enemy, apples):
return State(dfl, dfr, dfu, dfd, killl, killr, killu, killd)
# head = (5,6)
# enemy = (8,6)
# app = ((17,28), (20,9), (17,29))
......
......@@ -1075,10 +1075,10 @@ def game_loop():
while apple_count > len(a):
apple = gen_rand_apple(img)
a.add(apple)
for b, img in bombs:
while bomb_count > len(b):
bomb = gen_rand_apple(img)
b.add(bomb)
# for b, img in bombs:
# while bomb_count > len(b):
# bomb = gen_rand_apple(img)
# b.add(bomb)
# initialize shield
init_shield(map_filename)
......@@ -1199,15 +1199,15 @@ def game_loop():
if agent_control and not action_taken:
if check_if_crossing(snake2.pos[0] / block_size, snake2.pos[1] / block_size):
agent_apples = []
for a, id in apples:
for a, img in apples:
for apple in a:
if id == 0:
if img == purple_apple:
agent_apples.append((int(apple.pos[0] / block_size), int(apple.pos[1] / block_size)))
state = calc_state_space(snake2, snake1, agent_apples)
# print("l: ", state.df_l, " r: ", state.df_r, " u: ", state.df_u, " d: ", state.df_d)
# print("l: ", state.kill_l, " r: ", state.kill_r, " u: ", state.kill_u, " d: ", state.kill_d)
#print("l: ", state.df_l, " r: ", state.df_r, " u: ", state.df_u, " d: ", state.df_d)
#print("l: ", state.kill_l, " r: ", state.kill_r, " u: ", state.kill_u, " d: ", state.kill_d)
action = snake2.choose_best_action(qtable, state)
print(action)
#print(action)
snake2.key_event(action)
action_taken = True
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment