summaryrefslogtreecommitdiff
path: root/doc
diff options
context:
space:
mode:
authorFlorian Jung <flo@windfisch.org>2016-01-08 18:09:36 +0100
committerFlorian Jung <flo@windfisch.org>2016-01-08 18:09:36 +0100
commitc77a636c82fcf5211beadab90371719f175ef954 (patch)
tree33668c37acfc367015d8f93e6e0d863558f07977 /doc
parent2483f393d9a3740b35606ca6acb6cb2df8ffdcd2 (diff)
more doc
Diffstat (limited to 'doc')
-rw-r--r--doc/array_naive_vs_episodic.gnuplot4
-rw-r--r--doc/array_naive_vs_episodic_deriv.gnuplot4
-rw-r--r--doc/doc.tex17
3 files changed, 22 insertions, 3 deletions
diff --git a/doc/array_naive_vs_episodic.gnuplot b/doc/array_naive_vs_episodic.gnuplot
index 3f7ea0e..ebb7408 100644
--- a/doc/array_naive_vs_episodic.gnuplot
+++ b/doc/array_naive_vs_episodic.gnuplot
@@ -1,6 +1,8 @@
set xrange [0: 1000]
plot 'logs/old.log' with lines,\
- 'logs/new.log' with lines
+ 'logs/new.log' with lines,\
+ 'logs/nn.log' using ($1,$2*20) with lines,\
+ 'logs/nn2.log' using ($1,$2*20) with lines
pause -1
diff --git a/doc/array_naive_vs_episodic_deriv.gnuplot b/doc/array_naive_vs_episodic_deriv.gnuplot
index 0900f2e..de66989 100644
--- a/doc/array_naive_vs_episodic_deriv.gnuplot
+++ b/doc/array_naive_vs_episodic_deriv.gnuplot
@@ -1,5 +1,7 @@
set xrange [0: 1000]
plot 'derivs/old.log' with lines,\
- 'derivs/new.log' with lines
+ 'derivs/new.log' with lines,\
+ 'derivs/nn.log' using ($1, $2*20) with lines,\
+ 'derivs/nn2.log' using ($1, $2*20) with lines
pause -1
diff --git a/doc/doc.tex b/doc/doc.tex
index 6eef2cb..71e5e1e 100644
--- a/doc/doc.tex
+++ b/doc/doc.tex
@@ -26,10 +26,25 @@ Using the \verb+QArray+ function approximator, different training strategies can
In the graphs, which show the total reward earned as a function of the episodes ran, we see, that the second ("new") approach converges to an equally good training result, however it gets good faster.
+\begin{itemize}
+ \item new: second approach as above
+ \item old: first approach as above
+ \item nn2: neuronal network with hidden layer with 50 neurons and sigmoidal-symmetric-stepwise-linear activation function
+ \item nn: same as nn2, but with a friendlyness of $1$ instead of $0.7$
+\end{itemize}
+
+Note that the nn and nn2 runs get a reward of only $0.5$ for reaching the goal, while the old and new runs get $10$. Therefore, the nn/nn2 results have been multiplied by $20$ to make them comparable.
+
\gnuplotloadfile[terminal=pdf]{array_naive_vs_episodic.gnuplot}
\gnuplotloadfile[terminal=pdf]{array_naive_vs_episodic_deriv.gnuplot}
-Code used: \verb+f9a9a51884aadef97b8952b2807541d31b7e9917+ for the "new" plots, and the same code with the line 254 (\verb+self.flush_learnbuffer() # TODO TRYME+) enabled for the "old" plots. Use \verb+sol.py+, or the \verb+test.sh+ script.
+Code used:
+\begin{itemize}
+ \item \verb+f9a9a51884aadef97b8952b2807541d31b7e9917+ for the "new" plots
+ \item the same code with the line 254 (\verb+self.flush_learnbuffer() # TODO TRYME+) enabled for the "old" plots
+ \item \verb+2483f393d9a3740b35606ca6acb6cb2df8ffdcd2+ for the nn/nn2 plots
+\end{itemize}
+Use \verb+sol.py+, or the \verb+test.sh+ script.
\end{document}