configurable number of episodes

author: Florian Jung <flo@windfisch.org> 2016-01-07 00:09:20 +0100
committer: Florian Jung <flo@windfisch.org> 2016-01-07 00:09:20 +0100
commit: afaa4a050d97197170c3aba7908282010ad83593 (patch)
tree: a3bf9d9d53b93ae3253e06c233c024e010031a7c
parent: 5c22ac4f841d993a15ec3460a2945e328ee7f726 (diff)
1 files changed, 9 insertions, 2 deletions
diff --git a/sol.py b/sol.py
index 0dac104..84c533e 100644
--- a/sol.py
+++ b/sol.py
@@ -11,6 +11,7 @@ maze = [ [0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 1, 2] ]
 
 start=(1,1)
+n_episodes = 1000000
 theta = 0.01
 gamma=0.9 # discount
 epsilon = 0.1
@@ -63,7 +64,9 @@ if arg['-h']:
           "                --epsilon NUM   # value for the epsilon-policy used in q-learning\n" +
           "                                # default: %f\n" % epsilon +
           "                --epsred NUM    # reduction of epsilon per episode\n" +
-          "                                # default: %f\n\n" % epsilon_reduction +
+          "                                # default: %f\n" % epsilon_reduction +
+          "                --episodes NUM  # maximum number of episodes\n"+
+          "                                # default: %i\n\n" % n_episodes +
           "                --qfunc TYPE    # type of the Q function's representation\n" +
           "                                  arr / array -> plain standard array\n" +
           "                                  nn          -> neural network representation\n" +
@@ -78,6 +81,9 @@ if arg['-h']:
 if arg['-q'] or arg['--quiet']:
     visu = False
 
+if arg['--episodes']:
+    n_episodes = int(arg['--episodes'])
+
 if arg['--theta']:
     theta = float(arg['--theta'])
 
@@ -268,7 +274,8 @@ else:
 i=0
 stopstate = -1
 total_reward = 0.
-for i in range(1000000):
+
+for i in range(n_episodes):
     s = start
     maxdiff=0.
     for j in range(100):
author	Florian Jung <flo@windfisch.org>	2016-01-07 00:09:20 +0100
committer	Florian Jung <flo@windfisch.org>	2016-01-07 00:09:20 +0100
commit	afaa4a050d97197170c3aba7908282010ad83593 (patch)
tree	a3bf9d9d53b93ae3253e06c233c024e010031a7c
parent	5c22ac4f841d993a15ec3460a2945e328ee7f726 (diff)