diff options
author | Florian Jung <flo@windfisch.org> | 2016-01-07 00:09:20 +0100 |
---|---|---|
committer | Florian Jung <flo@windfisch.org> | 2016-01-07 00:09:20 +0100 |
commit | afaa4a050d97197170c3aba7908282010ad83593 (patch) | |
tree | a3bf9d9d53b93ae3253e06c233c024e010031a7c | |
parent | 5c22ac4f841d993a15ec3460a2945e328ee7f726 (diff) |
configurable number of episodes
-rw-r--r-- | sol.py | 11 |
1 files changed, 9 insertions, 2 deletions
@@ -11,6 +11,7 @@ maze = [ [0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 1, 2] ] start=(1,1) +n_episodes = 1000000 theta = 0.01 gamma=0.9 # discount epsilon = 0.1 @@ -63,7 +64,9 @@ if arg['-h']: " --epsilon NUM # value for the epsilon-policy used in q-learning\n" + " # default: %f\n" % epsilon + " --epsred NUM # reduction of epsilon per episode\n" + - " # default: %f\n\n" % epsilon_reduction + + " # default: %f\n" % epsilon_reduction + + " --episodes NUM # maximum number of episodes\n"+ + " # default: %i\n\n" % n_episodes + " --qfunc TYPE # type of the Q function's representation\n" + " arr / array -> plain standard array\n" + " nn -> neural network representation\n" + @@ -78,6 +81,9 @@ if arg['-h']: if arg['-q'] or arg['--quiet']: visu = False +if arg['--episodes']: + n_episodes = int(arg['--episodes']) + if arg['--theta']: theta = float(arg['--theta']) @@ -268,7 +274,8 @@ else: i=0 stopstate = -1 total_reward = 0. -for i in range(1000000): + +for i in range(n_episodes): s = start maxdiff=0. for j in range(100): |