diff options
-rw-r--r-- | sol.py | 54 |
1 files changed, 28 insertions, 26 deletions
@@ -2,7 +2,10 @@ import random from itertools import product from collections import defaultdict from sys import argv -from fann2 import libfann +import keras +from keras.models import Sequential +from keras.layers.core import Dense, Dropout, Activation +import numpy as np maze = [ [0, 0, 0, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0, 0], @@ -288,30 +291,27 @@ class QNN (QCommon): num_output = 4 learning_rate = 0.7 - self.NN = libfann.neural_net() - #self.NN.set_training_algorithm(libfann.TRAIN_BATCH) - #self.NN.set_training_algorithm(libfann.TRAIN_RPROP) - #self.NN.set_training_algorithm(libfann.TRAIN_QUICKPROP) - self.NN.create_sparse_array(connection_rate, (num_input,)+hidden+(num_output,)) - self.NN.randomize_weights(-1,1) - self.NN.set_learning_rate(learning_rate) - self.NN.set_activation_function_hidden(libfann.SIGMOID_SYMMETRIC_STEPWISE) - self.NN.set_activation_function_output(libfann.SIGMOID_SYMMETRIC_STEPWISE) - #self.NN.set_activation_function_output(libfann.LINEAR) + self.NN = keras.models.Sequential() + self.NN.add(keras.layers.core.Dense(50, input_dim=num_input, init='glorot_normal', activation='tanh')) + self.NN.add(keras.layers.core.Dense(4, init='glorot_normal', activation='tanh')) + + print("compiling model") + self.NN.compile(optimizer='sgd', loss='mse') + print("done") def eval(self,x,y = None): #print ("eval "+str(x)+", "+str(y)) if y==None: x,y = x - #print ("self.NN.run("+str([x/7.,y/5.])+")") - return self.NN.run([x/7.,y/5.]) + return self.NN.predict(np.array([[x/7.,y/5.]]))[0].tolist() def change(self, s, action, diff): - oldval = self.eval(s) - newval = list(oldval) # copy list - newval[action] += diff + UNSUPPORTED + #oldval = self.eval(s) + #newval = list(oldval) # copy list + #newval[action] += diff - self.NN.train([s[0]/7.,s[1]/5.], newval) + #self.NN.train([s[0]/7.,s[1]/5.], newval) # learn a transition "from oldstate by action into newstate with reward `reward`" # this does not necessarily mean that the action is instantly trained into the function @@ -319,12 +319,13 @@ class QNN (QCommon): def learn(self, oldstate, action, newstate, reward): diff,_ = self.value_update(oldstate,action,newstate,reward) if self.dumbtraining == True: - self.change(oldstate,action,diff) + #self.change(oldstate,action,diff) + UNSUPPORTED else: self.learnbuffer += [(oldstate, action, newstate, reward)] self.learnbuffer = self.learnbuffer[-20000:] - self.train_on_minibatch() + #self.train_on_minibatch() def train_on_minibatch(self): n = min(300, len(self.learnbuffer)) @@ -341,15 +342,16 @@ class QNN (QCommon): #print("training minibatch of size %i:\n%s\n%s\n\n"%(n, str(inputs), str(outputs))) - training_data = libfann.training_data() - training_data.set_train_data(inputs, outputs) - self.NN.train_epoch(training_data) - #self.NN.train_on_data(training_data, 5, 0, 0) - #print(".") + self.NN.fit(np.array(inputs), np.array(outputs), batch_size=n, validation_split=0., verbose=0, nb_epoch=25) + # must be called on every end-of-episode. might trigger batch-training or whatever. - #def episode(self): - # pass + def episode(self): + self.train_on_minibatch() + self.train_on_minibatch() + self.train_on_minibatch() + self.train_on_minibatch() + return 42 a = World(maze, start) |