From c77a636c82fcf5211beadab90371719f175ef954 Mon Sep 17 00:00:00 2001
From: Florian Jung <flo@windfisch.org>
Date: Fri, 8 Jan 2016 18:09:36 +0100
Subject: more doc

---
 doc/doc.tex | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

(limited to 'doc/doc.tex')

diff --git a/doc/doc.tex b/doc/doc.tex
index 6eef2cb..71e5e1e 100644
--- a/doc/doc.tex
+++ b/doc/doc.tex
@@ -26,10 +26,25 @@ Using the \verb+QArray+ function approximator, different training strategies can
 
 In the graphs, which show the total reward earned as a function of the episodes ran, we see, that the second ("new") approach converges to an equally good training result, however it gets good faster.
 
+\begin{itemize}
+	\item new: second approach as above
+	\item old: first approach as above
+	\item nn2: neuronal network with hidden layer with 50 neurons and sigmoidal-symmetric-stepwise-linear activation function
+	\item nn: same as nn2, but with a friendlyness of $1$ instead of $0.7$
+\end{itemize}
+
+Note that the nn and nn2 runs get a reward of only $0.5$ for reaching the goal, while the old and new runs get $10$. Therefore, the nn/nn2 results have been multiplied by $20$ to make them comparable.
+
 \gnuplotloadfile[terminal=pdf]{array_naive_vs_episodic.gnuplot}
 
 \gnuplotloadfile[terminal=pdf]{array_naive_vs_episodic_deriv.gnuplot}
 
-Code used: \verb+f9a9a51884aadef97b8952b2807541d31b7e9917+ for the "new" plots, and the same code with the line 254 (\verb+self.flush_learnbuffer() # TODO TRYME+) enabled for the "old" plots. Use \verb+sol.py+, or the \verb+test.sh+ script.
+Code used:
+\begin{itemize}
+	\item \verb+f9a9a51884aadef97b8952b2807541d31b7e9917+ for the "new" plots
+	\item the same code with the line 254 (\verb+self.flush_learnbuffer() # TODO TRYME+) enabled for the "old" plots
+	\item \verb+2483f393d9a3740b35606ca6acb6cb2df8ffdcd2+ for the nn/nn2 plots
+\end{itemize}
+Use \verb+sol.py+, or the \verb+test.sh+ script.
 
 \end{document}
-- 
cgit v1.2.3