#!/usr/bin/env python # program that reads arbitrary space-separated values and creates tables from it # 2010 thomas@deselaers.de # - -head to specify that the first row in the data specifies the variable names # - -n X to specify that there are X output variables # -avg to calculate averages over columns and rows # internally, the data is held in a map of maps and the innermost map # has in the value 'X' (see -n) columns and in the key it has the # value before it. # import sys,os class TableTree: def __init__(self): self.children={} self.data=[] self.varname="" self.datavarnames=["","",""] self.nResults=1 self.doAverages=0 def insert(self,entry,headers=[]): if len(entry)>self.nResults+2: key=entry[0] val=entry[1:] if len(headers)>0: self.varname=headers[0] if not self.children.has_key(key): self.children[key]=TableTree() self.children[key].doAverages=self.doAverages self.children[key].nResults=self.nResults; if len(headers)>0: self.children[key].insert(val,headers[1:]) else: self.children[key].insert(val,headers[1:]) else: self.data+=[entry] if len(headers)==2+self.nResults: self.datavarnames=headers def formatTable(self): steps=[{},{}] data={} # determine how many fields we have maximally maxlength=0 for x in self.data: maxlength=max(maxlength,len(x)) # maxlen is an array, which has one entry per possible field # and it saves the largest length of any string in a # particular column maxlen=[0]*maxlength # determine the widht of the individual columns and put the data into the right format for x in self.data: for i in range(len(x)): maxlen[i]=max(maxlen[i],len(x[i])); try: v0=float(x[0]) if len(str(v0))>maxlen[0]: maxlen[0]=len(str(v0)) except: v0=x[0] try: v1=float(x[1]) if len(str(v1))>maxlen[1]: maxlen[1]=len(str(v1)) except: v1=x[1] steps[0][v0]=1 steps[1][v1]=1 if data.has_key(v0): data[v0][v1]=" ".join(x[2:]) else: data[v0]={v1 : " ".join(x[2:])} # sum up the length of the fields of the individual results columns flength=reduce(lambda x,y: x+y,maxlen[-self.nResults:])+self.nResults; steps0sorted=steps[0].keys(); steps0sorted.sort() steps1sorted=steps[1].keys(); steps1sorted.sort() print ("%%%ds|" % (maxlen[0]+1))%"", for j in steps1sorted: print ("%%%ds" % flength)%j, print self.datavarnames[1] print (maxlen[0]+1)*"-"+"+"+(flength+1)*(len(steps[1]))*"-" averages0={} averages1={} counts0={} counts1={} for i in steps0sorted: print ("%%%ds |" % maxlen[0])%i, for j in steps1sorted: # print i,j,":",averages0, averages1 if data.has_key(i): if data[i].has_key(j): print ("%%%ds" % flength)%data[i][j], if self.doAverages: tok=data[i][j].split() tok=map(lambda x:float(x), tok) if averages0.has_key(i): for d in range(len(averages0[i])): if len(tok)>d: averages0[i][d]+=tok[d] counts0[i][d]+=1 else: averages0[i]=[0]*len(tok) counts0[i]=[1]*len(tok) for d in range(len(tok)): averages0[i][d]=tok[d] if averages1.has_key(j): for d in range(len(tok)): #range(len(averages1[j])): while len(averages1[j])<=d: averages1[j].append(0) counts1[j].append(0) averages1[j][d]+=tok[d] counts1[j][d]+=1 else: counts1[j]=[1]*len(tok) averages1[j]=[0]*len(tok) for d in range(len(tok)): averages1[j][d]=tok[d] else: print ("%%%ds" % flength)%"-", else: print ("%%%ds" % flength)%"-", if self.doAverages: for d in range(len(averages0[i])): print "%1.4f" % (averages0[i][d]/counts0[i][d]), print if self.doAverages: print ("%%%ds | " % maxlen[0])%"AV", for j in steps1sorted: for d in range(len(averages1[j])): print "%1.4f" % (averages1[j][d]/counts1[j][d]), print "", print print self.datavarnames[0] print def write(self): if len(self.children)==0: self.formatTable() else: for i in self.children: if self.varname!="": print "%s=%s" % (self.varname,i) else: print i self.children[i].write() def main(): lines=sys.stdin.readlines() headers=[] if "-head" in sys.argv: hline=lines.pop(0) headers=hline.split() nResults=1 if "-n" in sys.argv: nResults=int(sys.argv[sys.argv.index("-n")+1]) doAverages=False if "-avg" in sys.argv: doAverages=True if "-w" in sys.argv: colwidth=int(sys.argv[sys.argv.index("-w")+1]) table=TableTree() table.nResults=nResults table.doAverages=doAverages lines=filter(lambda x: x!="\n", lines) for l in lines: tok=l.split() table.insert(tok,headers) table.write() if __name__=="__main__": main()