From afaa4a050d97197170c3aba7908282010ad83593 Mon Sep 17 00:00:00 2001 From: Florian Jung Date: Thu, 7 Jan 2016 00:09:20 +0100 Subject: configurable number of episodes --- sol.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/sol.py b/sol.py index 0dac104..84c533e 100644 --- a/sol.py +++ b/sol.py @@ -11,6 +11,7 @@ maze = [ [0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 1, 2] ] start=(1,1) +n_episodes = 1000000 theta = 0.01 gamma=0.9 # discount epsilon = 0.1 @@ -63,7 +64,9 @@ if arg['-h']: " --epsilon NUM # value for the epsilon-policy used in q-learning\n" + " # default: %f\n" % epsilon + " --epsred NUM # reduction of epsilon per episode\n" + - " # default: %f\n\n" % epsilon_reduction + + " # default: %f\n" % epsilon_reduction + + " --episodes NUM # maximum number of episodes\n"+ + " # default: %i\n\n" % n_episodes + " --qfunc TYPE # type of the Q function's representation\n" + " arr / array -> plain standard array\n" + " nn -> neural network representation\n" + @@ -78,6 +81,9 @@ if arg['-h']: if arg['-q'] or arg['--quiet']: visu = False +if arg['--episodes']: + n_episodes = int(arg['--episodes']) + if arg['--theta']: theta = float(arg['--theta']) @@ -268,7 +274,8 @@ else: i=0 stopstate = -1 total_reward = 0. -for i in range(1000000): + +for i in range(n_episodes): s = start maxdiff=0. for j in range(100): -- cgit v1.2.3