From c383083c3e41ae252f8e66d0b13795c5536b7b01 Mon Sep 17 00:00:00 2001
From: Florian Jung <flo@windfisch.org>
Date: Sun, 30 Aug 2015 21:08:57 +0200
Subject: analyze_{virus_sizes,remerge}() functions

---
 analyze.py |  5 ++++-
 stats.py   | 22 ++++++++++++++++++++--
 2 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/analyze.py b/analyze.py
index 0477583..a0537f7 100644
--- a/analyze.py
+++ b/analyze.py
@@ -11,7 +11,7 @@ s = Stats.load(files[0])
 for f in files[1:]:
     s.merge(f)
 
-s.analyze_speed()
+#s.analyze_speed()
 print("\n" + "-"*40 + "\n")
 #s.analyze_visible_window(True)
 for i in ["split cell", "ejected mass", "virus"]:
@@ -19,3 +19,6 @@ for i in ["split cell", "ejected mass", "virus"]:
 print("")
 for i in ["split cell", "ejected mass", "virus"]:
     s.analyze_distances(i)
+
+s.analyze_virus_sizes()
+s.analyze_remerge()
diff --git a/stats.py b/stats.py
index 4827d8a..42ee0ea 100644
--- a/stats.py
+++ b/stats.py
@@ -388,13 +388,17 @@ class Stats:
 
     def analyze_distances(self, celltype):
         ds = [v[0] for v in self.data.eject_distlogs[celltype]]
+        ns = [v[4] for v in self.data.eject_distlogs[celltype]]
 
         try:
             mean, stddev = fit_gaussian(ds)
+            meann, stddevn = fit_gaussian(ns)
         except:
             mean, stddev = "???", "???"
+            meann, stddevn = "???", "???"
 
-        print(celltype+" eject/split distances: mean = "+str(mean)+", stddev="+str(stddev)+", ndata="+str(len(ds)))
+        print(celltype+" eject/split distances: mean  = "+str(mean) +", stddev  ="+str(stddev) +", ndata="+str(len(ds)))
+        print(celltype+"                        meann = "+str(meann)+", stddevn ="+str(stddevn))
         
         #a,b = numpy.histogram(ds, bins=100)
         #midpoints = list(map(lambda x : (x[0]+x[1])/2, zip(b, b[1:])))
@@ -408,5 +412,19 @@ class Stats:
         #quant = quantile(list(map(lambda v : abs(v-midpoints[maxidx]), ds)), q/100)
         #print("\t"+str(q)+"% of values lie have a distance of at most "+str(quant)+" from the maximum")
         
-        print("\t75%% of the values lie in the interval %.2f plusminus %.2f" % find_smallest_q_confidence_area(ds, 0.75))
+        print("\t75%% of the distances lie in the interval %.2f plusminus %.2f" % find_smallest_q_confidence_area(ds, 0.75))
+        print("\t75%% of the flight lengths lie in the interval %.2f plusminus %.2f" % find_smallest_q_confidence_area(ns, 0.75))
         print("")
+
+    def analyze_virus_sizes(self):
+        print("\nI've seen the following %d virus sizes:" % len(self.data.observed_virus_sizes))
+        for size, ndata in sorted(self.data.observed_virus_sizes.items(), key=lambda x:x[0]):
+            print("\t%4d: %7d times" % (size, ndata))
+
+    def analyze_remerge(self):
+        relevant = list(filter(lambda r : r.is_parent_child, self.data.remerging.values()))
+        durations = list(map(lambda r : r.end_time - r.begin_time, relevant))
+        print(fit_gaussian(durations))
+        waittimes = list(map(lambda r : r.begin_time - max(r.birth1, r.birth2), relevant))
+        print(fit_gaussian(waittimes))
+
-- 
cgit v1.2.3