summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFlorian Jung <flo@windfisch.org>2016-01-06 17:13:23 +0100
committerFlorian Jung <flo@windfisch.org>2016-01-06 17:13:23 +0100
commit5c22ac4f841d993a15ec3460a2945e328ee7f726 (patch)
treeee9f52a170d3f9088c35482fdc284aedaa5f364a
parent7fd1cc0e18ba0635c176f8bb912da17f5014c27c (diff)
episode no-op
-rw-r--r--sol.py8
1 files changed, 8 insertions, 0 deletions
diff --git a/sol.py b/sol.py
index 20e9fa9..0dac104 100644
--- a/sol.py
+++ b/sol.py
@@ -212,14 +212,18 @@ class QArray:
def __init__(self):
self.Q = [ [ [0. for k in range(4)] for i in range(a.xlen) ] for j in range(a.ylen) ]
+ # calculates Q(x,y)
def eval(self,x,y = None):
if y==None: x,y = x
return self.Q[y][x]
+
def change(self, s, action, diff):
self.Q[s[1]][s[0]][action] += diff
+ def episode(self):
+ pass
# implements the Q function not through an array, but through a neuronal network instead.
class QNN:
@@ -250,6 +254,9 @@ class QNN:
self.NN.train(list(s), [x/10. for x in newval])
+ def episode(self):
+ pass
+
a = World(maze, start)
Q = None
@@ -283,6 +290,7 @@ for i in range(1000000):
s = ss
if a.is_final(ss):
break
+ Q.episode()
if (i % (frameskip+1) == 0):
print("iteration %.3d, alpha=%.3e, epsilon=%.3e maxdiff=%.7f"%(i,alpha,epsilon,maxdiff))