Commit 6892e5b4 authored by Alexander Palmisano's avatar Alexander Palmisano
Browse files

added more ways to game over

parent 49860496
......@@ -19,18 +19,19 @@ from tf_agents.replay_buffers import reverb_replay_buffer, reverb_utils
from tf_agents.trajectories import trajectory
from tf_agents.specs import tensor_spec
from tf_agents.utils import common
from tf_agents.networks.q_network import QNetwork
num_iterations = 20000 # @param {type:"integer"}
num_iterations = 50000 # @param {type:"integer"}
initial_collect_steps = 100 # @param {type:"integer"}
collect_steps_per_iteration = 1 # @param {type:"integer"}
replay_buffer_max_length = 100000 # @param {type:"integer"}
batch_size = 64 # @param {type:"integer"}
learning_rate = 1e-3 # @param {type:"number"}
learning_rate = 1e-5 # @param {type:"number"}
log_interval = 200 # @param {type:"integer"}
num_eval_episodes = 10 # @param {type:"integer"}
num_eval_episodes = 5 # @param {type:"integer"}
eval_interval = 1000 # @param {type:"integer"}
env = SnakeEnv()
......@@ -164,6 +165,7 @@ agent.train_step_counter.assign(0)
avg_return = compute_avg_return(eval_tf_env, agent.policy, num_eval_episodes)
# print(avg_return)
returns = [avg_return]
# returns = []
# Reset the environment.
time_step = train_py_env.reset()
......
......@@ -8,6 +8,6 @@ env.reset()
done = False
while not done:
print(env.render())
state, reward, done = env.step(random.randrange(4))
state, directions, reward, done = env.step(random.randrange(4))
print(reward, done)
print(env.render())
\ No newline at end of file
......@@ -18,8 +18,8 @@ class SnakeEnv(py_environment.PyEnvironment):
self._action_spec = array_spec.BoundedArraySpec(
shape=(), dtype=np.int32, minimum=0, maximum=3, name='action')
self._observation_spec = array_spec.BoundedArraySpec(
shape=(30*30,), dtype=np.int32, minimum=0, maximum=255, name='observation')
self._state = np.zeros((30*30,), dtype=np.int32)
shape=(30*30+4,), dtype=np.int32, minimum=0, maximum=255, name='observation')
self._state = np.zeros((30*30+4,), dtype=np.int32)
self._episode_ended = False
self._current_time_step = None
init_game()
......@@ -35,7 +35,7 @@ class SnakeEnv(py_environment.PyEnvironment):
def _reset(self):
reset()
self._state = np.zeros((30*30,), dtype=np.int32)
self._state = np.zeros((30*30+4,), dtype=np.int32)
self._episode_ended = False
self._current_time_step = ts.restart(self._state)
return self._current_time_step
......@@ -44,8 +44,9 @@ class SnakeEnv(py_environment.PyEnvironment):
if self._episode_ended:
return self._reset()
state, reward, done = step(action)
state, directions, reward, done = step(action)
self._state = state.flatten().reshape(30*30,)
self._state = np.append(state, directions)
if done:
self._episode_ended = True
self._current_time_step = ts.termination(self._state, reward)
......
......@@ -52,7 +52,9 @@ apples = [(set([]), 0), (set([]), 1)]
overall_score = [0, 0]
snake1, snake2 = None, None
count_wrong_dir = 0
steps = 0
last_action = None
same_action_count = 0
playerwin = 0
gameExit = False
rounds = 0
......@@ -481,7 +483,7 @@ def init_game(max_steps=15, length=15, filename=False):
snake2.set_enemy_norm_snake(snake1)
# snake needs to be on crossing for shield
# but step function thinks it needs action on first crossing
state, _, _ = step(-1)
state, _, _, _ = step(-1)
return state
# translate action from int to string
......@@ -506,36 +508,36 @@ def translate_action(action):
# step function
# inputs action and steps snake to next crossing
# game is also lost if 100 times a wrong action is chosen at the same crossing
# reward: +5 if apple eaten
# lose game if the same action is performed 50 times in a row
# reward: +10 if apple eaten
# +100 if game won
# -100 if game lost
# -50 if tied
# -5 if action is not possible
# -20 if action is not possible
# returns state, reward and if game is over
def step(action):
global snake1, snake2, count_wrong_dir
game_exit = False
global snake1, snake2
global steps, last_action, same_action_count
steps += 1
playerwin = 0
action_done = False
reward = 0
action = translate_action(action)
same_action_count += 1 if last_action == action else 0
last_action = action
if steps == 1000 or same_action_count == 50:
reward = -100
return get_state(), [snake1.get_surround(True)], reward, True
if action != "init":
left, right, up, down = snake1.get_surround(True)
if action == "left" and not left or action == "right" and not right \
or action == "up" and not up or action == "down" and not down:
count_wrong_dir += 1
if count_wrong_dir < 100:
reward = -5
return get_state(), reward, False
else:
reward = -100
return get_state(), reward, True
else:
count_wrong_dir = 0
while not game_exit:
reward = -20
return get_state(), [snake1.get_surround(True)], reward, False
while True:
# if win screen
if playerwin != 0:
if playerwin == 1 or (playerwin == 3 and snake1.getscore() > snake2.getscore()):
......@@ -558,7 +560,7 @@ def step(action):
# left, right, up, down = snake1.get_surround(True)
# poss_dirs = (left, right, up, down)
return get_state(), reward, True
return get_state(), [snake1.get_surround(True)], reward, True
# agent controlled snake
if not action_done:
......@@ -595,7 +597,7 @@ def step(action):
playerwin = snake_crash([snake1, snake2]) if playerwin == 0 else playerwin
if snake1.eat():
reward += 5
reward += 10
snake2.eat()
if snake1.getscore() >= apple_win_count:
......@@ -606,10 +608,7 @@ def step(action):
if check_if_crossing(snake1.pos[0] / block_size, snake1.pos[1] / block_size):
# left, right, up, down = snake1.get_surround(True)
# poss_dirs = (left, right, up, down)
return get_state(), reward, False
exit_game()
return get_state(), [snake1.get_surround(True)], reward, False
def get_state():
global snake1, snake2
......@@ -672,6 +671,7 @@ def reset(length=15):
global snake1, snake2
global gameExit
global playerwin
global steps, same_action_count, last_action
for a, id in apples:
a.clear()
while apple_count > len(a):
......@@ -688,11 +688,8 @@ def reset(length=15):
snake2.set_enemy_norm_snake(snake1)
gameExit = False
playerwin = 0
state, _, _ = step(-1)
return state
# exit game
def exit_game():
quit()
steps = 0
same_action_count = 0
last_action = None
state, _, _, _= step(-1)
return state
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment