summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--sol.py54
1 files changed, 28 insertions, 26 deletions
diff --git a/sol.py b/sol.py
index c4d8458..ac3c35c 100644
--- a/sol.py
+++ b/sol.py
@@ -2,7 +2,10 @@ import random
from itertools import product
from collections import defaultdict
from sys import argv
-from fann2 import libfann
+import keras
+from keras.models import Sequential
+from keras.layers.core import Dense, Dropout, Activation
+import numpy as np
maze = [ [0, 0, 0, 0, 0, 0, 0],
[0, 0, 1, 0, 0, 0, 0],
@@ -288,30 +291,27 @@ class QNN (QCommon):
num_output = 4
learning_rate = 0.7
- self.NN = libfann.neural_net()
- #self.NN.set_training_algorithm(libfann.TRAIN_BATCH)
- #self.NN.set_training_algorithm(libfann.TRAIN_RPROP)
- #self.NN.set_training_algorithm(libfann.TRAIN_QUICKPROP)
- self.NN.create_sparse_array(connection_rate, (num_input,)+hidden+(num_output,))
- self.NN.randomize_weights(-1,1)
- self.NN.set_learning_rate(learning_rate)
- self.NN.set_activation_function_hidden(libfann.SIGMOID_SYMMETRIC_STEPWISE)
- self.NN.set_activation_function_output(libfann.SIGMOID_SYMMETRIC_STEPWISE)
- #self.NN.set_activation_function_output(libfann.LINEAR)
+ self.NN = keras.models.Sequential()
+ self.NN.add(keras.layers.core.Dense(50, input_dim=num_input, init='glorot_normal', activation='tanh'))
+ self.NN.add(keras.layers.core.Dense(4, init='glorot_normal', activation='tanh'))
+
+ print("compiling model")
+ self.NN.compile(optimizer='sgd', loss='mse')
+ print("done")
def eval(self,x,y = None):
#print ("eval "+str(x)+", "+str(y))
if y==None: x,y = x
- #print ("self.NN.run("+str([x/7.,y/5.])+")")
- return self.NN.run([x/7.,y/5.])
+ return self.NN.predict(np.array([[x/7.,y/5.]]))[0].tolist()
def change(self, s, action, diff):
- oldval = self.eval(s)
- newval = list(oldval) # copy list
- newval[action] += diff
+ UNSUPPORTED
+ #oldval = self.eval(s)
+ #newval = list(oldval) # copy list
+ #newval[action] += diff
- self.NN.train([s[0]/7.,s[1]/5.], newval)
+ #self.NN.train([s[0]/7.,s[1]/5.], newval)
# learn a transition "from oldstate by action into newstate with reward `reward`"
# this does not necessarily mean that the action is instantly trained into the function
@@ -319,12 +319,13 @@ class QNN (QCommon):
def learn(self, oldstate, action, newstate, reward):
diff,_ = self.value_update(oldstate,action,newstate,reward)
if self.dumbtraining == True:
- self.change(oldstate,action,diff)
+ #self.change(oldstate,action,diff)
+ UNSUPPORTED
else:
self.learnbuffer += [(oldstate, action, newstate, reward)]
self.learnbuffer = self.learnbuffer[-20000:]
- self.train_on_minibatch()
+ #self.train_on_minibatch()
def train_on_minibatch(self):
n = min(300, len(self.learnbuffer))
@@ -341,15 +342,16 @@ class QNN (QCommon):
#print("training minibatch of size %i:\n%s\n%s\n\n"%(n, str(inputs), str(outputs)))
- training_data = libfann.training_data()
- training_data.set_train_data(inputs, outputs)
- self.NN.train_epoch(training_data)
- #self.NN.train_on_data(training_data, 5, 0, 0)
- #print(".")
+ self.NN.fit(np.array(inputs), np.array(outputs), batch_size=n, validation_split=0., verbose=0, nb_epoch=25)
+
# must be called on every end-of-episode. might trigger batch-training or whatever.
- #def episode(self):
- # pass
+ def episode(self):
+ self.train_on_minibatch()
+ self.train_on_minibatch()
+ self.train_on_minibatch()
+ self.train_on_minibatch()
+ return 42
a = World(maze, start)