import re from collections import defaultdict class Dumps: def __init__(self): self.dumps = [] def load(self, f): dump = [] tag = None no = 0 for line in open(f,"r").readlines(): if re.match("[0-9A-Z]{2}( [0-9A-Z]{2}){25} *$", line): # this is a line of a hexdump dump += [int(s,16) for s in line.split()] else: if len(dump): self.dumps += [(no,tag,dump)] dump = [] tag = None no+=1 if line.strip() != "": if tag != None: print("overriding tag %s with %s"%(tag,line)) tag = line.replace("\n","") # else ignore def show(self,xcols,xrows): if xcols == None: xcols = [] if xrows == None: xrows = [] for n, tag, dump in self.filter(xcols,xrows): for byte in dump: print("%02X "%byte, end="") # TODO print(tag) def filter(self,xcols,xrows): return [ (dump[0], dump[1], [byte for byte,pos in zip(dump[2],range(len(dump[2]))) if pos not in xcols ]) for dump in self.dumps if dump[0] not in xrows] def mirror(dumps): n = len(dumps) m = len(dumps[0][2]) result = [[None]*n for i in range(m)] for i in range(n): for j in range(m): result[j][i] = dumps[i][2][j] return result def stats(cols): ncols = len(cols) result = [defaultdict(lambda : 0) for i in range(ncols)] for col,stat in zip(cols,result): for a,b in zip(col,col[1:]): stat[(b-a)%256]+=1./(len(col)-1) return result def getconstcols(stats): return [col for stat,col in zip(stats,range(len(stats))) if stat[0]>0.9999] def showstats(stats): for stat,pos in zip(stats,range(len(stats))): print("%i:\t%s"%(pos,str(dict(stat)))) def prettydict(d): for k,v in sorted(d.items(), key=lambda a:-a[1]): print("%02X: %2.0f | "%(k,100*v),end="") print("") d = Dumps() d.load("mydump.txt") cc = getconstcols(stats(mirror(d.filter([],[])))) d.show(cc,[]) for i in stats(mirror(d.filter(cc,[]))): prettydict(i)