Merge branch 'master' into pathfinding

author: Florian Jung <flo@windfisch.org> 2015-09-01 20:47:11 +0200
committer: Florian Jung <flo@windfisch.org> 2015-09-01 20:48:03 +0200
commit: 7a836f4a0a68188a1486b669c4cd437b5f592a5d (patch)
tree: 666d7867967e8d6fdc04aca6b18d8a95a077f33c /stats.py
parent: f99e10cff97afdc3e6ef07db22cf5f7fd442e067 (diff)
parent: 7c1180a7b58e7b8c17c8dab297058d0c001386c6 (diff)
1 files changed, 459 insertions, 20 deletions
diff --git a/stats.py b/stats.py
index f850339..b263f01 100644
--- a/stats.py
+++ b/stats.py
@@ -1,36 +1,475 @@
 import time
+import math
+import random
+from collections import defaultdict
+import pickle
+from functools import reduce
+import mechanics
+import geometry
+#import numpy
+
+def fit_gaussian(l):
+    mean = sum(l) / len(l)
+    stddev = math.sqrt(sum(map(lambda v : (v-mean)**2, l)) / len(l))
+    return mean, stddev
+
+def flatten(l):
+    return reduce(lambda a,b:a+b, l)
+
+def quantile(values, q):
+    if isinstance(values, dict):
+        return quantile(flatten(map(lambda x : [x[0]]*x[1], sorted(values.items(),key=lambda x:x[0]))), q)
+    else:
+        try:
+            return sorted(values)[ int(len(values)*q) ]
+        except:
+            return 0
+
+def find_smallest_q_confidence_area(values, q):
+    """Calculates the (mid, delta) with the smallest delta, such that at least q * len(values)
+       lie within the interval [mid-delta, mid+delta]."""
+    try:
+        mid = min(values, key = lambda value : quantile(list(map(lambda x : abs(x-value), values)), q))
+        deviation = quantile(list(map(lambda x : abs(x-mid), values)),q)
+        #print(list(map(lambda x : abs(x-mid), values)))
+        return mid,deviation
+    except:
+        return 0,0
+
+def get_delta_confidence(values, mid, delta):
+    #"""Calculates which fraction of the values lie within [mid-delta, mid+delta]"""
+    #try:
+        return len(list(filter(lambda v : (mid-delta <= v and v <= mid+delta), values))) / len(values)
+    #except:
+    #    raise
+    #    return 0
+
+def avg(values):
+    if not isinstance(values, dict):
+        return sum(values)/len(values)
+    else:
+        return int(sum(map( lambda x : x[0]*x[1], values.items() )) / sum(map(lambda x : x[1], values.items())))
+
+def stddev(values):
+    a=avg(values)
+    return avg(list(map(lambda v : (v-a)**2, values)))
+
+def normalize(values):
+    a=avg(values)
+    return [x/a for x in values]
+
+class StatData():
+    pass
+
+def return_empty_list():
+    return []
+
+def return_defaultdict_with_empty_list():
+    return defaultdict(return_empty_list)
+
+def return_zero():
+    return 0
+
+def return_defaultdict_with_zeros():
+    return defaultdict(return_zero)
+
+class ReMerging:
+    def __init__(self, size1, size2, birth1, birth2, is_parent_child, begin_time):
+        self.size1 = size1
+        self.size2 = size2
+        self.birth1 = birth1
+        self.birth2 = birth2
+        self.is_parent_child = is_parent_child
+        self.begin_time = begin_time
+        self.end_time = None
 
 class Stats:
-    def __init__(self):
-        self.min_mass = 0
-        self.max_mass = 0
-        self.current_mass = 0
+    def __init__(self,c,data=None):
+        self.c = c
+
+        self.countdown = 27*20
+
+        if data == None:
+            self.data = StatData()
+            self.data.version = 4
+
+            self.data.min_mass = 0
+            self.data.max_mass = 0
+            self.data.current_mass = 0
+            
+            self.data.mass_history = []
+            self.data.pos_history = []
+            self.data.cell_aggressivity = {}
+            self.data.cell_split_frequency = {}
+            self.data.cell_defensiveness = {}
+
+            self.data.size_vs_speed = defaultdict(return_defaultdict_with_zeros)
+            self.data.size_vs_visible_window = defaultdict(return_defaultdict_with_empty_list)
+            self.data.mass_vs_visible_window = defaultdict(return_defaultdict_with_empty_list)
+
+            self.data.eject_distlogs = {"virus" : [], "split cell" : [], "ejected mass" : []}
+            self.data.eject_deviations = {"virus" : [], "virus2" : [], "virus3" : [], "split cell" : [], "ejected mass" : []}
+
+            self.data.observed_virus_sizes = defaultdict(return_zero)
+            self.data.remerging = {}
+        else:
+            self.data = data
         
-        self.mass_history = []
-        self.pos_history = []
-        self.cell_aggressivity = {}
-        self.cell_split_frequency = {}
-        self.cell_defensiveness = {}
+    def save(self,filename):
+        pickle.dump(self.data, open(filename,"wb"))
+
+    def load(filename):
+        return Stats(None, pickle.load(open(filename,"rb")))
+
+    def merge(self, filename):
+        data2 = pickle.load(open(filename,"rb"))
+        self.data.min_mass = min(self.data.min_mass, data2.min_mass)
+        self.data.max_mass = max(self.data.max_mass, data2.max_mass)
         
+        for i in data2.size_vs_visible_window:
+            for j in data2.size_vs_visible_window[i]:
+                self.data.size_vs_visible_window[i][j] += data2.size_vs_visible_window[i][j]
+        for i in data2.mass_vs_visible_window:
+            for j in data2.mass_vs_visible_window[i]:
+                self.data.mass_vs_visible_window[i][j] += data2.mass_vs_visible_window[i][j]
+
+        for i in data2.size_vs_speed:
+            for j in data2.size_vs_speed[i]:
+                self.data.size_vs_speed[i][j] += data2.size_vs_speed[i][j]
+
+        for i in data2.eject_deviations:
+            self.data.eject_deviations[i] += data2.eject_deviations[i]
+
+        for i in data2.eject_distlogs:
+            self.data.eject_distlogs[i] += data2.eject_distlogs[i]
+
+        for i in self.data.observed_virus_sizes:
+            self.data.observed_virus_sizes[i] += data2.observed_virus_sizes[i]
+
+        self.data.remerging.update(data2.remerging)
+
     def log_mass(self, mass):
-        self.mass_history.append((time.time(), mass))
-        self.current_mass = mass
-        if mass > self.max_mass:
-            self.max_mass = mass
-        if mass < self.min_mass:
-            self.min_mass = mass
+        self.data.mass_history.append((time.time(), mass))
+        self.data.current_mass = mass
+        if mass > self.data.max_mass:
+            self.data.max_mass = mass
+        if mass < self.data.min_mass:
+            self.data.min_mass = mass
     
     def log_pos(self, pos):
-        self.pos_history.append((time.time(), (pos[0], pos[1])))
+        self.data.pos_history.append((time.time(), (pos[0], pos[1])))
         
     def update_cell_aggressivity(self, cell, value):
-        self.cell_aggressivity[cell] = value
+        self.data.cell_aggressivity[cell] = value
         
     def update_cell_split_frequency(self, cell, value):
-        self.cell_split_frequency[cell] = value
+        self.data.cell_split_frequency[cell] = value
     
     def update_cell_defensiveness(self, cell, value):
-        self.cell_defensiveness[cell] = value
+        self.data.cell_defensiveness[cell] = value
         
     def get_last_steps(self, list, steps = 10):
-        return list[-steps:]
-\ No newline at end of file
+        return list[-steps:]
+
+    def process_frame(self):
+        self.countdown -= 1
+        if False and (self.countdown <= 0):
+            quick_followup = (random.random() < 0.1)
+            
+            if quick_followup:
+                self.countdown = 7
+            else:
+                self.countdown = int(27* (random.random() * 15))
+
+            what_to_do = random.random()
+            if what_to_do < 0.2:
+                self.c.send_split()
+            else:
+                self.c.send_shoot()
+
+        self.log_pos(self.c.player.center)
+        self.log_mass(self.c.player.total_mass)
+        
+        cells = self.c.world.cells.values()
+        own_cells = list(self.c.player.own_cells)
+
+        own_total_size = sum( map(lambda cell : cell.size, own_cells) )
+        own_total_mass = sum( map(lambda cell : cell.mass, own_cells) )
+        n_own_cells = len(own_cells)
+
+        n = 3
+        for cell in filter(lambda cell : not cell.is_food and not cell.is_virus and not cell.is_ejected_mass, cells):
+            if hasattr(cell,'poslog') and len(cell.poslog) > n+1:
+                cellspeed = 0
+                for i in range(1,n+1):
+                    cellspeed += (cell.poslog[-i] - cell.poslog[-i-1]).len() / n
+                
+                cellspeed = int(cellspeed*10)/10
+                self.data.size_vs_speed[cell.size][cellspeed] += 1
+
+        visible_width = max( map(lambda cell : cell.pos.x - cell.size, cells) ) - min( map(lambda cell : cell.pos.x + cell.size, cells) )
+        visible_height = max( map(lambda cell : cell.pos.y - cell.size, cells) ) - min( map(lambda cell : cell.pos.y + cell.size, cells) )
+
+        self.data.size_vs_visible_window[n_own_cells][own_total_size].append((visible_width,visible_height))
+        self.data.mass_vs_visible_window[n_own_cells][own_total_mass].append((visible_width,visible_height))
+
+
+        # log virus sizes
+        for cell in cells:
+            if cell.is_virus:
+                self.data.observed_virus_sizes[cell.size] += 1
+        
+        # detect re-merging cells
+        for cell in own_cells:
+            for cell2 in own_cells:
+                if cell2 != cell:
+                    dist = (cell.pos - cell2.pos).len()
+                    expected_dist = cell.size + cell2.size
+                    min_dist = max(cell.size, cell2.size)
+
+                    if (dist < (0.9 * expected_dist + 0.1 * min_dist)):
+                        is_parent_child = (cell == cell2.parent or cell2 == cell.parent)
+                        print("cells seem to be merging! they are "+ ("" if is_parent_child else "NOT ") + "parent and child")
+                        pair_id = (min(cell.cid,cell2.cid), max(cell.cid,cell2.cid))
+                        
+                        if pair_id not in self.data.remerging:
+                            self.data.remerging[pair_id] = ReMerging(cell.size, cell2.size, cell.spawntime, cell2.spawntime, is_parent_child, self.c.world.time)
+                        else:
+                            self.data.remerging[pair_id].end_time = self.c.world.time
+                
+
+
+        # find ejected mass, split cells or viruses that have come to rest
+        for cell in cells:
+            if hasattr(cell,"parent") and cell.parent != None and not cell.calmed_down:
+                # we're only interested in cells with a parent set, because
+                # this also implies that we have tracked them since their
+                # creation.
+                # also, we're only interested in cells that are still flying
+                # as a result of being ejected/split.
+                
+                if not cell.is_food and not cell.is_ejected_mass and not cell.is_virus:
+                    expected_speed = mechanics.speed(cell.size)
+                    celltype = "split cell"
+                elif cell.is_virus:
+                    expected_speed = 1
+                    celltype = "virus"
+                elif cell.is_ejected_mass:
+                    expected_speed = 1
+                    celltype = "ejected mass"
+
+
+                if cell.movement.len() < expected_speed * 1.1:
+                    print(celltype+" has come to rest, nframes="+str(len(cell.poslog)))
+                    cell.calmed_down = True
+                    # TODO: speed log
+
+                    # distance is calculated naively
+                    distance = (cell.spawnpoint - cell.pos).len()
+
+                    # distance2 is calculated along the cell's path (will differ if the flight was not colinear)
+                    poslog = list(cell.poslog)
+                    speeds = list(map(lambda vecs : (vecs[0]-vecs[1]).len(), zip(poslog, poslog[1:])))
+                    distance2 = sum(speeds)
+
+                    distance_from_parent = (cell.parentpos_when_spawned - cell.pos).len()
+
+                    self.data.eject_distlogs[celltype] += [(distance, distance2, distance_from_parent, cell.parentsize_when_spawned, len(cell.poslog), speeds)]
+                    print("  flown distance = %.2f / %.2f"%(distance,distance2))
+
+                if len(cell.poslog) == 5:
+                    # calculate movement direction from the first 5 samples
+
+                    # first check whether they're on a straight line
+                    if geometry.is_colinear(cell.poslog) and cell.shoot_vec != None:
+                        print(celltype+" direction available!")
+                        fly_direction = cell.poslog[-1] - cell.poslog[0]
+                        fly_angle = math.atan2(fly_direction.y, fly_direction.x)
+
+                        shoot_angle = math.atan2(cell.shoot_vec.y, cell.shoot_vec.x)
+
+
+                        deviation = (fly_angle - shoot_angle) % (2*math.pi)
+                        if deviation > math.pi: deviation -= 2*math.pi
+                        print("  deviation = "+str(deviation*180/math.pi))
+
+                        self.data.eject_deviations[celltype] += [deviation]
+
+                        if (celltype == 'virus'):
+                            # FIXME so ugly
+                            try:
+                                shoot_angle = math.atan2(cell.shoot_vec2.y, cell.shoot_vec2.x)
+
+                                deviation = (fly_angle - shoot_angle) % (2*math.pi)
+                                if deviation > math.pi: deviation -= 2*math.pi
+                                print("  deviation2= "+str(deviation*180/math.pi))
+
+                                self.data.eject_deviations['virus2'] += [deviation]
+                            except AttributeError:
+                                print("virus2 not available, wtf?!")
+
+                            try:
+                                shoot_angle = math.atan2(cell.shoot_vec3.y, cell.shoot_vec3.x)
+
+                                deviation = (fly_angle - shoot_angle) % (2*math.pi)
+                                if deviation > math.pi: deviation -= 2*math.pi
+                                print("  deviation3= "+str(deviation*180/math.pi))
+
+                                self.data.eject_deviations['virus3'] += [deviation]
+                            except AttributeError:
+                                print("virus3 not available")
+
+                    else:
+                        print(celltype+" did NOT fly in a straight line, ignoring...")
+
+
+    
+    def analyze_speed(self):
+        results=[]
+        for size, values in sorted(self.data.size_vs_speed.items(), key=lambda x : x[0]):
+            minimum = quantile(values, 0.2)
+            average = quantile(values, 0.5)
+            maximum = quantile(values, 0.8)
+
+            results += [(size,maximum,average,minimum,False,False,False,sum(values.values()))]
+      
+        # mark outliers
+        for i in range(1, len(results)-1):
+            for j in range(1,4):
+                if abs(results[i][j] - results[i-1][j]) > 2 and abs(results[i][j] - results[i+1][j]) > 2:
+                    tmp = list(results[i])
+                    tmp[j+3] = True
+                    results[i] = tuple(tmp)
+
+        coeff_vs_stddev = []
+        for coeff in [x/100 for x in range(10,100,1)]:
+            products = []
+            for size, maximum, average, minimum, maxoutlier, avgoutlier, minoutlier, ndata in results:
+                if not maxoutlier:
+                    products += [size**coeff * maximum]
+            
+            coeff_vs_stddev += [(coeff, avg(products), stddev(normalize(products)))]
+
+        best = min(coeff_vs_stddev, key=lambda v:v[2])
+
+        print("size\tcalc\tmax\tavg\tmin\t\tndata")
+        for size, maximum, average, minimum, maxoutlier, avgoutlier, minoutlier, ndata in results:
+            print(str(size) + ":\t" + "%.1f" % (best[1] / size**best[0]) + "\t" + ("*" if maxoutlier else "") + str(maximum) + "\t" + ("*" if avgoutlier else "") + str(average) + "\t" + ("*" if minoutlier else "") + str(minimum) + "\t\t" + str(ndata))
+        
+        print("size**"+str(best[0])+" * speed = "+str(best[1])  )
+
+    def analyze_visible_window_helper(self, foo_vs_visible_window, verbose=False):
+        svw = {}
+        ratios = []
+        if verbose: print("size\tdiag")
+        for size, rects in sorted(foo_vs_visible_window.items(), key=lambda x:x[0]):
+            maxwidth = quantile(sorted(map(lambda x:x[0], rects)), 0.75)
+            maxheight = quantile(sorted(map(lambda x:x[1], rects)), 0.75)
+            
+            if math.sqrt(maxwidth**2+maxheight**2) < 4000:
+                # TODO FIXME
+                svw[size] = (maxwidth,maxheight)
+            ratios += [maxwidth/maxheight]
+        
+            if verbose: print(str(size)+"\t"+str(math.sqrt(maxwidth**2+maxheight**2))+"\t\t"+str(len(rects)))
+        
+        print ("median ratio = "+str(quantile(sorted(ratios),0.5)))
+
+        coeff_vs_stddev=[]
+        for coeff in [x/100 for x in range(0,100,1)]:
+            quotients = []
+            for size, rect in svw.items():
+                if size != 0:
+                    diag = math.sqrt(rect[0]**2+rect[1]**2)
+                    quotients += [diag / size**coeff]
+            
+            coeff_vs_stddev += [(coeff, avg(quotients), stddev(normalize(quotients)))]
+
+        best = min(coeff_vs_stddev, key=lambda v:v[2])
+
+        print("diag / size**"+str(best[0])+" = "+str(best[1]))
+
+    def analyze_visible_window(self, verbose=False):
+        for ncells in sorted(self.data.size_vs_visible_window.keys()):
+            if len(self.data.size_vs_visible_window[ncells]) > 0:
+                print("\nwith "+str(ncells)+" cells, depending on sum(size)")
+                try:
+                    self.analyze_visible_window_helper(self.data.size_vs_visible_window[ncells], verbose)
+                except ZeroDivisionError:
+                    print("\toops.")
+        for ncells in sorted(self.data.mass_vs_visible_window.keys()):
+            if len(self.data.mass_vs_visible_window[ncells]) > 0:
+                print("\nwith "+str(ncells)+" cells, depending on sum(mass)")
+                try:
+                    self.analyze_visible_window_helper(self.data.mass_vs_visible_window[ncells], verbose)
+                except ZeroDivisionError:
+                    print("\toops.")
+
+    def analyze_deviations(self, celltype):
+        ds = self.data.eject_deviations[celltype]
+
+        try:
+            mean, stddev = fit_gaussian(ds)
+        except:
+            mean, stddev = "???", "???"
+
+
+        quant = quantile(list(map(abs, ds)), 0.75)
+
+        print(celltype+" eject/split direction deviations: mean = "+str(mean)+", stddev="+str(stddev)+", ndata="+str(len(ds)))
+        print("\t75%% of the splits had a deviation smaller than %.2f rad = %.2f deg" % (quant, quant*180/math.pi))
+        print("")
+        
+
+        #a,b = numpy.histogram(ds, bins=100)
+        #midpoints = map(lambda x : (x[0]+x[1])/2, zip(b, b[1:]))
+        #for n,x in zip(a,midpoints):
+        #    print(str(n) + "\t" + str(x))
+
+    def analyze_distances(self, celltype):
+        ds = [v[0] for v in self.data.eject_distlogs[celltype]]
+        ns = [v[4] for v in self.data.eject_distlogs[celltype]]
+
+        try:
+            mean, stddev = fit_gaussian(ds)
+            meann, stddevn = fit_gaussian(ns)
+        except:
+            mean, stddev = "???", "???"
+            meann, stddevn = "???", "???"
+
+        print(celltype+" eject/split distances: mean  = "+str(mean) +", stddev  ="+str(stddev) +", ndata="+str(len(ds)))
+        print(celltype+"                        meann = "+str(meann)+", stddevn ="+str(stddevn))
+        
+        #a,b = numpy.histogram(ds, bins=100)
+        #midpoints = list(map(lambda x : (x[0]+x[1])/2, zip(b, b[1:])))
+        #for n,x in zip(a,midpoints):
+        #    print(str(n) + "\t" + str(x))
+
+        #maxidx = max(range(0,len(a)), key = lambda i : a[i])
+        #print("\tmaximum at "+str(midpoints[maxidx]))
+
+        #q = 75 if celltype == "ejected mass" else 75
+        #quant = quantile(list(map(lambda v : abs(v-midpoints[maxidx]), ds)), q/100)
+        #print("\t"+str(q)+"% of values lie have a distance of at most "+str(quant)+" from the maximum")
+        
+        mid, delta = find_smallest_q_confidence_area(ds, 0.75)
+        print("\t75%% of the distances lie in the interval %.2f plusminus %.2f" % (mid,delta))
+        print("\t%2d%% of the distances lie in the interval %.2f plusminus %.2f" % (100*get_delta_confidence(ds, mid, delta*1.2), mid, delta*1.2) )
+        print("\tmax = %.2f" % (max(ds)))
+        mid, delta = find_smallest_q_confidence_area(ns, 0.75)
+        print("\t75%% of the flight lengths lie in the interval %.2f plusminus %.2f" % (mid,delta))
+        print("\t%2d%% of the flight lengths lie in the interval %.2f plusminus %.2f" % (100*get_delta_confidence(ns,mid,delta*1.2),mid,delta*1.2))
+        print("")
+
+    def analyze_virus_sizes(self):
+        print("\nI've seen the following %d virus sizes:" % len(self.data.observed_virus_sizes))
+        for size, ndata in sorted(self.data.observed_virus_sizes.items(), key=lambda x:x[0]):
+            print("\t%4d: %7d times" % (size, ndata))
+
+    def analyze_remerge(self):
+        relevant = list(filter(lambda r : r.is_parent_child, self.data.remerging.values()))
+        durations = list(map(lambda r : r.end_time - r.begin_time, relevant))
+        print("75%% of the remerge durations lie at %.2f plusminus %.2f frames" % find_smallest_q_confidence_area(durations,0.75))
+        waittimes = list(map(lambda r : r.begin_time - max(r.birth1, r.birth2), relevant))
+        print("75%% of the remerges were started after %.2f plusminus %.2f frames" % find_smallest_q_confidence_area(waittimes,0.75))
+
author	Florian Jung <flo@windfisch.org>	2015-09-01 20:47:11 +0200
committer	Florian Jung <flo@windfisch.org>	2015-09-01 20:48:03 +0200
commit	7a836f4a0a68188a1486b669c4cd437b5f592a5d (patch)
tree	666d7867967e8d6fdc04aca6b18d8a95a077f33c /stats.py
parent	f99e10cff97afdc3e6ef07db22cf5f7fd442e067 (diff)
parent	7c1180a7b58e7b8c17c8dab297058d0c001386c6 (diff)