diff options
author | Florian Jung <flo@windfisch.org> | 2016-01-06 17:13:23 +0100 |
---|---|---|
committer | Florian Jung <flo@windfisch.org> | 2016-01-06 17:13:23 +0100 |
commit | 5c22ac4f841d993a15ec3460a2945e328ee7f726 (patch) | |
tree | ee9f52a170d3f9088c35482fdc284aedaa5f364a | |
parent | 7fd1cc0e18ba0635c176f8bb912da17f5014c27c (diff) |
episode no-op
-rw-r--r-- | sol.py | 8 |
1 files changed, 8 insertions, 0 deletions
@@ -212,14 +212,18 @@ class QArray: def __init__(self): self.Q = [ [ [0. for k in range(4)] for i in range(a.xlen) ] for j in range(a.ylen) ] + # calculates Q(x,y) def eval(self,x,y = None): if y==None: x,y = x return self.Q[y][x] + def change(self, s, action, diff): self.Q[s[1]][s[0]][action] += diff + def episode(self): + pass # implements the Q function not through an array, but through a neuronal network instead. class QNN: @@ -250,6 +254,9 @@ class QNN: self.NN.train(list(s), [x/10. for x in newval]) + def episode(self): + pass + a = World(maze, start) Q = None @@ -283,6 +290,7 @@ for i in range(1000000): s = ss if a.is_final(ss): break + Q.episode() if (i % (frameskip+1) == 0): print("iteration %.3d, alpha=%.3e, epsilon=%.3e maxdiff=%.7f"%(i,alpha,epsilon,maxdiff)) |