Source code for woo.batch

# encoding: utf-8

# for use with globals() when reading table

nan=float('nan')
from math import * 
from minieigen import *
import warnings
import sys
from . import utils

log=utils.makeLog(__name__)

from wooMain import options as wooOptions


try:
    from lockfile import FileLock
except ImportError:
    class FileLock:
        'Dummy class if the `lockfile </https://pypi.python.org/pypi/lockfile>`_ module is not importable.'
        def __init__(self,f):
            warnings.warn("The 'lockfile' module is not importable, '%s' will not be locked. If this file is concurrently accessed by several simulations, it may get corrupted!"%f)
        def __enter__(self): pass
        def __exit__(self,*args): pass
    

# increase every time the db format changes, to avoid errors
# applies to both sqlite and hdf5 dbs
dbFormatVersion=3

## timeout for opening the database if locked
sqliteTimeout=1000

[docs]def wait(): 'If running inside a batch, start the master simulation (if not already running) and block until it stops by itself. Typically used at the end of script so that it does not finish prematurely in batch mode (the execution would be ended in such a case). Does nothing outisde of batch.' import woo S=woo.master.scene if inBatch(): if not S.running: S.run() woo.master.waitForScenes() # wait for Master (in case scene is re-assigned)
[docs]def inBatch(): 'Tell whether we are running inside the batch or separately.' import os return bool(wooOptions.batchTable) or wooOptions.batchLine>=0
[docs]def hasBatchTable(): 'Tell whether an external batch table is given or not (scripts may be run in script also without batch tables)' return wooOptions.batchTable!=None and len(wooOptions.batchTable)>0
[docs]def mayHaveStaleLock(db): import os.path if not os.path.splitext(db)[-1] in ('.h5','.hdf5','.he5','.hdf'): return return FileLock(db).is_locked()
[docs]def writeResults(scene,defaultDb='woo-results.hdf5',syncXls=True,dbFmt=None,series=None,quiet=False,postHooks=[],**kw): ''' Write results to batch database. With *syncXls*, corresponding excel-file is re-generated. Series is a dicionary of 1d arrays written to separate sheets in the XLS. If *series* is `None` (default), `S.plot.data` are automatically added. All other ``**kw`` arguments are serialized in the misc field, which then appears in the main XLS sheet. All data are serialized using json so that they can be read back in a language-independent manner. *postHooks* is list of functions (taking a single argument - the database name) which will be called once the database has been updated. They can be used in conjunction with :obj:`woo.batch.dbReadResults` to write aaggregate results from all records in the database. ''' import woo, woo.plot, woo.core import os, os.path, datetime import numpy import json import logging S=scene if inBatch() and hasBatchTable(): table,line,db=wooOptions.batchTable,wooOptions.batchLine,wooOptions.batchResults else: table,line,db='',-1,(defaultDb if not wooOptions.batchResults else wooOptions.batchResults) if not db: raise ValueError('No database to write results to (forgot to pass --batch-results?).') newDb=not os.path.exists(db) if not quiet: log.info('Writing results to the database %s (%s)'%(db,'new' if newDb else 'existing')) if dbFmt==None: ext=os.path.splitext(db)[-1] if ext in ('.sqlite','.db',b'.sqlite',b'.db',u'.sqlite',u'.db'): dbFmt='sqlite' elif ext in ('.h5','.hdf5','.he5','.hdf',b'.h5',b'.hdf5',b'.he4',b'.hdf',u'.h5',u'.hdf5',u'.he5',u'.hdf'): dbFmt='hdf5' else: raise ValueError("Unable to determine database format from '"+db+"' (extension '"+ext+"'): must be *.h5, *.hdf5, *.he5, *.hdf.") # make sure keys are unicode objects (which is what json converts to!) unicodeTags=dict(S.tags) # make sure series are 1d arrays if series==None: series={}; series.update([('plot/'+k,v) for k,v in S.plot.data.items()]) for k,v in series.items(): if isinstance(v,numpy.ndarray): if dbFmt=='sqlite': series[k]=v.tolist() # sqlite needs lists, hdf5 is fine with numpy arrays elif not hasattr(v,'__len__'): raise ValueError('series["%s"] not a sequence (__len__ not defined).'%k) if dbFmt=='sqlite': raise RuntimeError('SQLite is no longer supported.') elif dbFmt=='hdf5': import h5py try: hdf=h5py.File(db,('w' if newDb else 'a'),libver='latest') except IOError: import warnings warnings.warn("Error opening HDF5 file %s, moving to %s~~corrupt and creating a new one"%(db,db)) import shutil shutil.move(db,db+'~~corrupt') hdf=h5py.File(db,'a',libver='latest') with FileLock(db): wooJSON=woo.core.WooJSONEncoder(indent=None,oneway=True) i=0 while True: sceneId=S.tags['id']+('' if i==0 else '~%d'%i) if sceneId not in hdf: break i+=1 # group for our Scene G=hdf.create_group(sceneId) G.attrs['formatVersion']=dbFormatVersion G.attrs['finished']=datetime.datetime.now().replace(microsecond=0).isoformat('_') G.attrs['batchTable']=table G.attrs['batchTableLine']=line G.attrs['sceneId']=S.tags['id'] G.attrs['title']=S.tags['title'] G.attrs['duration']=S.duration G.attrs['pre']=S.pre.dumps(format='json') if S.pre else '' G.attrs['tags']=json.dumps(unicodeTags) G.attrs['plots']=json.dumps(S.plot.plots) G.attrs['labels']=wooJSON.encode(dict(S.labels)) G.attrs['engines']=wooJSON.encode(list(S.engines)) G_misc=G.create_group('misc') for k,v in kw.items(): G_misc.attrs[k]=wooJSON.encode(v) G_series=G.create_group('series') for k,v in series.items(): # hdf5 is smart enough to create sub-groups automatically if the name contains slashes G_series[k]=v hdf.close() else: raise ValueError('*fmt* must be one of "sqlite", "hdf5", None (autodetect based on suffix)') if syncXls: import re xls=db+'.xlsx' if not quiet: log.info('Converting %s to file://%s'%(db,os.path.abspath(xls))) dbToSpread(db,out=xls,dialect='xlsx') for ph in postHooks: ph(db)
def _checkHdf5sim(sim): if not 'formatVersion' in sim.attrs: raise RuntimeError('database %s: simulation %s does not define formatVersion?!') if sim.attrs['formatVersion']!=dbFormatVersion: raise RuntimeError('database format mismatch: %s: %s/formatVersion==%s, should be %s'%(db,sim,sim.attrs['formatVersion'],dbFormatVersion)) return True # return all series stored in the database
[docs]def dbReadResults(db,basicTypes=False): '''Return list of dictionaries, representing database contents. :param basicTypes: don't reconstruct Woo objects from JSON (keep those as dicts) and don't return data series as numpy arrays. .. todo:: Nested (grouped) series are not read correctly from HDF5. Should be fixed either by flattening the hiearchy (like we do in :obj:`dbToSpread` and stuffing it into returned dict; or by reflecting the hierarchy in the dict returned. ''' import numpy, sqlite3, json, woo.core try: import h5py, h5py.h5f if not h5py.h5f.is_hdf5(bytes(db,'utf-8')): raise IOError('Not a HDF5 file.') hdf=h5py.File(db,'r',libver='latest') except (ImportError,IOError): # connect always succeeds, as it seems, even if the type is not sqlite3 db # in that case, it will fail at conn.execute below conn=sqlite3.connect(db,timeout=sqliteTimeout,detect_types=sqlite3.PARSE_DECLTYPES) hdf=None if hdf: with FileLock(db): ret=[] # iterate over simulations for simId in hdf: sim=hdf[simId] _checkHdf5sim(sim) rowDict={} for att in sim.attrs: if att in ('pre','tags','plots','engines','labels'): val=sim.attrs[att] if hasattr(val,'__len__') and len(val)==0: continue rowDict[att]=woo.core.WooJSONDecoder(onError='warn').decode(val) else: rowDict[att]=sim.attrs[att] rowDict['misc'],rowDict['series']={},{} for misc in sim['misc'].attrs: rowDict['misc'][misc]=woo.core.WooJSONDecoder(onError='warn').decode(sim['misc'].attrs[misc]) for s in sim['series']: try: # This was sometimes causing trouble (why?) # # File "/usr/lib/python2.7/dist-packages/h5py/_hl/group.py", line 153, in __getitem__ # oid = h5o.open(self.id, self._e(name), lapl=self._lapl) # File "/usr/lib/python2.7/dist-packages/h5py/_hl/base.py", line 113, in _e # name = name.encode('ascii') # AttributeError: 'int' object has no attribute 'encode' rowDict['series'][s]=numpy.array(sim['series'][s]) except AttributeError: # should we fail, do the conversion indirectly, through list (perhaps slower) rowDict['series'][s]=numpy.array(list(sim['series'][s])) ret.append(rowDict) ## hdf.close() return ret else: # sqlite conn.row_factory=sqlite3.Row ret=[] for i,row in enumerate(conn.execute('SELECT * FROM batch ORDER BY finished')): rowDict={} for key in row.keys(): # json-encoded fields if key in ('pre','tags','plots','misc'): if basicTypes: val=json.loads(row[key]) else: val=woo.core.WooJSONDecoder(onError='warn').decode(row[key]) elif key=='series': series=json.loads(row[key]) assert type(series)==dict if basicTypes: val=series else: val=dict([(k,numpy.array(v)) for k,v in series.items()]) else: val=row[key] if basicTypes and key=='finished': val=val.isoformat(sep='_') rowDict[key]=val ret.append(rowDict) conn.close() # don't occupy the db longer than necessary return ret
[docs]def dbToJSON(db,**kw): '''Return simulation database as JSON string. :param kw: additional arguments passed to `json.dumps <http://docs.python.org/3/library/json.html#json.dumps>`_. ''' import woo.core return woo.core.WooJSONEncoder(indent=None,oneway=True).encode(dbReadResults(db,basicTypes=True),**kw)
[docs]def dbToSpread(db,out=None,dialect='xls',rows=False,series=True,ignored=('plotData','tags'),sortFirst=('title','batchtable','batchTableLine','finished','sceneId','duration'),selector=None): ''' Select simulation results (using *selector*) stored in batch database *db*, flatten data for each simulation, and dump the data in the CSV format (using *dialect*: 'excel', 'excel-tab', 'xls') into file *out* (standard output if not given). If *rows*, every simulation is saved into one row of the CSV file (i.e. attributes are in columns), otherwise each simulation corresponds to one column and each attribute is in one row. If *out* ends with '.xls', the 'xls' dialect is forced regardless of the value given. The 'xls' format will refuse to write to standard output (*out* must be given). *ignored* fields are used to exclude large data from the dump: either database column of that name, or any attribute of that name. Attributes are flattened and path separated with '.'. *series* determines whether the `series` field will be written to a separate sheet, named by the sceneId. This is only supported with the `xls` dialect and raises error otherwise (unless `series` field is empty). Fields are sorted in their natural order (i.e. alphabetically, but respecting numbers), with *sortFirst* fields coming at the beginning. ''' def flatten(obj,path='',sep='.',ret=None): '''Flatten possibly nested structure of dictionaries and lists (such as data decoded from JSON). Returns dictionary, where each object is denoted by its path, paths being separated by *sep*. Unicode strings are encoded to utf-8 strings (with encoding errors ignored) so that the result can be written with the csv module. Adapted from http://stackoverflow.com/questions/8477550/flattening-a-list-of-dicts-of-lists-of-dicts-etc-of-unknown-depth-in-python-n . ''' if ret is None: ret={} if isinstance(obj,list): for i,item in enumerate(obj): flatten(item,(path+sep if path else '')+str(i),ret=ret) elif isinstance(obj,dict): for key,value in list(obj.items()): flatten(value,(path+sep if path else '')+str(key),ret=ret) elif isinstance(obj,str): ret[path]=str(obj) else: # other values passed as they are ret[path]=obj return ret def natural_key(string_): '''Return key for natural sorting (recognizing consecutive numerals as numbers): http://www.codinghorror.com/blog/archives/001018.html http://stackoverflow.com/a/3033342/761090 ''' import re return [int(s) if s.isdigit() else s for s in re.split(r'(\d+)', string_)] def fixSheetname(n): # truncate the name to 31 characters, otherwise there would be exception # see https://groups.google.com/forum/?fromgroups=#!topic/python-excel/QK4iJrPDSB8 if len(n)>30: n=u'…'+n[-29:] # invald characters (is that documented somewhere?? those are the only ones I found manually) n=n.replace('[','_').replace(']','_').replace('*','_').replace(':','_').replace('/','_') return n import sqlite3,json,sys,csv,warnings,numpy,operator allData={} # lowercase ignored=[i.lower() for i in ignored] sortFirst=[sf.lower() for sf in sortFirst] seriesData={} # open db and get rows try: import h5py, h5py.h5f if isinstance(db,str): dbBytes=bytes(db,'utf-8') else: dbBytes=db # check first, to avoid warning from h5py.File in stderr if not h5py.h5f.is_hdf5(dbBytes): raise IOError('Not a HDF5 file.') hdf=h5py.File(db,'r',libver='latest') except (ImportError,IOError): # connect always succeeds, as it seems, even if the type is not sqlite3 db # in that case, it will fail at conn.execute below conn=sqlite3.connect(db,timeout=sqliteTimeout,detect_types=sqlite3.PARSE_DECLTYPES) hdf=None if hdf: with FileLock(db): ret=[] if selector: warnings.warn('selector parameter ignored, since the file is HDF5 (not SQLite)') # first loop: sort by batchTable, batchTableLine, finished # for i,simId in enumerate(hdf): sortAttrs=('batchTable','batchTableLine','finished') # sort simulation ids by attribute tuples: # http://stackoverflow.com/a/6620187/761090 simIds=list(zip(*sorted(zip([s for s in hdf if _checkHdf5sim(hdf[s])],[tuple([natural_key(str(hdf[s].attrs[a])) for a in sortAttrs if a in hdf[s].attrs]) for s in hdf if _checkHdf5sim(hdf[s])]),key=operator.itemgetter(1))))[0] # print simIds # iterate over simulations for i,simId in enumerate(simIds): sim=hdf[simId] _checkHdf5sim(sim) rowDict={} for att in sim.attrs: val=sim.attrs[att] try: val=json.loads(val) except: pass rowDict[att]=val rowDict['misc']={} for att in sim['misc'].attrs: val=sim['misc'].attrs[att] try: val=json.loads(val) except: pass rowDict['misc'][att]=val series={} # we have to flatten series, since it may contain nested hdf5 groups # http://stackoverflow.com/a/6036037/761090 def flat_group_helper(prefix,g): if prefix: prefix+='/' for name,sub in g.items(): if isinstance(sub,h5py.Group): for j in flat_group_helper(prefix+name,sub): yield j else: yield (prefix+name,sub) def flat_group(g): import collections return collections.OrderedDict(flat_group_helper('',g)) # print flat_group(sim['series']).keys() for sName,sVal in flat_group(sim['series']).items(): # print sName series[sName]=numpy.array(sVal) # attributes are bytes, so '_' must be b'_' to avoid TypeError (can't concat bytes to str) seriesData[str(sim.attrs['title'])+str('_')+str(sim.attrs['sceneId'])]=series # same as for sqlite3 below flat=flatten(rowDict) for key,val in flat.items(): if key.lower() in ignored: continue if key not in allData: allData[key]=[None]*i+[val] else: allData[key]+=[None]*(i-len(allData[key]))+[val] ## hdf.close() else: conn=sqlite3.connect(db,timeout=sqliteTimeout,detect_types=sqlite3.PARSE_DECLTYPES) conn.row_factory=sqlite3.Row for i,row in enumerate(conn.execute(selector if selector!=None else 'SELECT * FROM batch ORDER BY title')): rowDict={} for key in row.keys(): if key.lower() in ignored: continue val=row[key] # decode val from json, if it fails, leave it alone try: val=json.loads(val) except: pass if key!='series': rowDict[key]=val elif series: seriesData[row['title']+'_'+row['sceneId']]=val # set only if allowed flat=flatten(rowDict) for key,val in flat.items(): if key.lower() in ignored: continue if key not in allData: allData[key]=[None]*i+[val] else: allData[key]+=[None]*(i-len(allData[key]))+[val] conn.close() # don't occupy the db longer than necessary fields=sorted(allData.keys(),key=natural_key) # apply sortFirst fieldsLower=[f.lower() for f in fields]; fields0=fields[:] # these two have always same order for sf in reversed(sortFirst): # reverse so that the order of sortFirst is respected if sf in fieldsLower: # lowercased name should be put to the front field=fields0[fieldsLower.index(sf)] # get the case-sensitive one fields=[field]+[f for f in fields if f!=field] # rearrange xls=dialect.lower()=='xls' or (out and out.endswith('.xls')) xlsx=dialect.lower()=='xlsx' or (out and out.endswith('.xlsx')) if xls or xlsx: if out==None: raise ValueError('The *out* parameter must be given when using the xls/xlsx dialects (refusing to write binary to standard output).') # http://scienceoss.com/write-excel-files-with-python-using-xlwt/ # http://www.youlikeprogramming.com/2011/04/examples-generating-excel-documents-using-pythons-xlwt/ import urllib.parse import datetime if xls: import xlwt wbk=xlwt.Workbook('utf-8') sheet=wbk.add_sheet(fixSheetname(db)) # truncate if too long, see below # datetime style datetimeStyle=xlwt.XFStyle() datetimeStyle.num_format_str='yyyy-mm-dd_hh:mm:ss' # http://office.microsoft.com/en-us/excel-help/number-format-codes-HP005198679.aspx # header style font=xlwt.Font() font.bold=True headStyle=xlwt.XFStyle() headStyle.font=font hrefStyle=xlwt.easyxf('font: underline single') # default style defaultStyle=xlwt.Style.default_style else: import xlsxwriter wbk=xlsxwriter.Workbook(out) sheet=wbk.add_worksheet(fixSheetname(db)) headStyle=wbk.add_format({'bold':True}) datetimeStyle=wbk.add_format({'num_format':'yyyy-mm-dd_hh:mm:ss'}) hrefStyle=wbk.add_format({'underline':1}) defaultStyle=None # cell styling styleDict={datetime.datetime:datetimeStyle} # add styles for other custom types here # normal and transposed setters if rows: setCell=lambda s,r,c,data,style: write_cell(s,r,c,data,style) else: setCell=lambda s,r,c,data,style: write_cell(s,c,r,data,style) def write_cell(s,c,r,data,style): hyperlink=isinstance(data,(str,str)) and (data.startswith('file://') or data.startswith('http://') or data.startswith('https://')) if hyperlink: if xls: data=data.replace('"',"'") data=xlwt.Formula('HYPERLINK("%s","%s")'%(urllib.parse.quote(data,safe=':/'),data)) style=hrefStyle s.write(c,r,data,style) else: s.write_url(c,r,data,string=data) else: if isinstance(data,numpy.int64): data=int(data) elif isinstance(data,numpy.float64): data=float(data) # XLSX does not handle NaN/Inf (yet) if not xls and isinstance(data,float) and (isinf(data) or isnan(data)): s.write(c,r,str(data),style) else: s.write(c,r,data,style) for col,field in enumerate(fields): # headers setCell(sheet,0,col,field,headStyle) # data for row,val in enumerate(allData[field]): style=styleDict.get(type(val),defaultStyle) setCell(sheet,row+1,col,val,style) # print row,type(val),val # save data series if seriesData: for sheetName,dic in seriesData.items(): if xls: sheet=wbk.add_sheet(fixSheetname(sheetName)) else: sheet=wbk.add_worksheet(fixSheetname(sheetName)) # perhaps write some header here for col,colName in enumerate(sorted(dic.keys())): if xls and col>255: log.warning('Rhe data being converted to XLS (%s) contain %d columns, which is more than 255, the limit of the XLS format. Extra data will be discarded from the XLS output. Use .xlsx to overcome this limitation.'%(out,len(dic))) break sheet.write(0,col,colName,headStyle) rowOffset=1 # length of header for row in range(0,len(dic[colName])): if xls and row+rowOffset>65535: log.warning('the data being converted to XLS (%s) contain %d rows (with %d header rows), which is more than 65535, the limit of the XLS file format. Extra data will be discarded from the XLS output. Use .xlsx to overcome this limitation.'%(out,len(dic[colName]),rowOffset)) break val=dic[colName][row] if xlsx and (isnan(val) or isinf(val)): val=str(val) sheet.write(row+rowOffset,col,val) if xls: wbk.save(out) else: wbk.close() else: if seriesData: raise RuntimeError('Data series can only be written with the *xls* dialect') outt=(open(out,'w') if out else sys.stdout) import datetime def asStr(x): 'Customize string conversions for some types' if type(x)==datetime.datetime: return x.strftime('%Y-%m-%d_%H:%M:%S') # as ISO, but without microseconds return x # write into CSV if rows: # one attribute per column writer=csv.DictWriter(outt,fieldnames=fields,dialect=dialect) writer.writeheader() for i in range(0,len(allData[fields[0]])): writer.writerow(dict([(k,asStr(allData[k][i])) for k in allData.keys()])) else: # one attribute per row writer=csv.writer(outt,dialect=dialect) for a in fields: writer.writerow([a]+[asStr(b) for b in allData[a]])
[docs]def readParamsFromTable(scene,under='table',noTableOk=True,unknownOk=False,**kw): """ Read parameters from a file and assign them to :obj:`woo.core.Scene.lab` under the ``under`` pseudo-module (e.g. ``Scene.lab.table.foo`` and so on. This function is used for scripts (as opposed to preprocessors) running in a batch. The file format is described in :obj:`TableParamReader` (CSV or XLS). Assigned tags (the ``title`` column is synthesized if absent,see :obj:`woo.utils.TableParamReader`):: S=woo.master.scene S.tags['title']=… # assigns the title column; might be synthesized S.tags['params']="name1=val1,name2=val2,…" # all explicitly assigned parameters S.tags['defaultParams']="unassignedName1=defaultValue1,…" # parameters that were left at their defaults S.tags['d.id']=s.tags['id']+'.'+s.tags['title'] S.tags['id.d']=s.tags['title']+'.'+s.tags['id'] :param tableFile: text file (with one value per blank-separated columns) :param under: name of pseudo-module under ``S.lab`` to save all values to (``table`` by default) :param int tableLine: number of line where to get the values from :param bool noTableOk: if False, raise exception if the file cannot be open; use default values otherwise :param bool unknownOk: do not raise exception if unknown column name is found in the file, and assign it as well :return: None """ tagsParams=[] # dictParams is what eventually ends up in S.lab.table.* (default+specified values) dictDefaults,dictParams={},{} import os, builtins,re,math,woo # create the S.lab.table pseudo-module S=scene S.lab._newModule(under) pseudoMod=getattr(S.lab,under) if not inBatch() or not hasBatchTable(): if not noTableOk: raise EnvironmentError("Batch options not defined (and required; pass noTableOk=True if they are not)") S.tags['line']='l!' else: tableFile,tableLine=wooOptions.batchTable,wooOptions.batchLine if tableFile=='': if not noTableOk: raise RuntimeError("No table specified in batch options, but noTableOk was not given.") else: return allTab=TableParamReader(tableFile).paramDict() if tableLine not in allTab: raise RuntimeError("Table %s doesn't contain valid line number %d"%(tableFile,tableLine)) vv=allTab[tableLine] S.tags['line']='l%d'%tableLine S.tags['title']=str(vv['title']) #S.tags['idt']=S.tags['id']+'.'+S.tags['title']; #S.tags['tid']=S.tags['title']+'.'+S.tags['id'] # assign values specified in the table to python vars # !something cols are skipped, those are env vars we don't treat at all (they are contained in title, though) for col in vv.keys(): if col=='title' or col[0]=='!': continue if col not in kw.keys() and (not unknownOk): raise NameError("Parameter `%s' has no default value assigned"%col) if vv[col]=='*': vv[col]=kw[col] # use default value for * in the table elif vv[col]=='-': continue # skip this column elif col in kw.keys(): kw.pop(col) # remove the var from kw, so that it contains only those that were default at the end of this loop #print 'ASSIGN',col,vv[col] tagsParams+=['%s=%s'%(col,vv[col])]; # when reading from XLS, data might be numbers; use eval only for strings, otherwise use the thing itself dictParams[col]=eval(vv[col],dict(woo=woo,**math.__dict__)) if isinstance(vv[col],str) else vv[col] # assign remaining (default) keys to python vars defaults=[] for k in kw.keys(): dictDefaults[k]=kw[k] defaults+=["%s=%s"%(k,kw[k])]; pseudoMod.defaultParams_=",".join(defaults) pseudoMod.explicitParams_=",".join(tagsParams) # save all vars to the pseudo-module dictDefaults.update(dictParams) for k,v in dictDefaults.items(): setattr(pseudoMod,k,v) return None
[docs]def runPreprocessor(pre,preFile=None): """Execute given :obj:`Preprocessor <woo.core.Preprocessor>`, modifying its attributes from batch (if running in batch). Each column from the batch table (except of environment variables starting with ``!``) must correspond to a preprocessor's attribute. Nested attributes are allowed, e.g. with :obj:`woo.pre.horse.FallingHorse`, a column named ``mat.tanPhi`` will modify horse's material's friction angle, using the default material object. """ def nestedSetattr(obj,attr,val): import re attrs=attr.split(".") indexRE=r'([a-zA-Z_0-9]+)\s*\[\s*(-?[0-9]+)\s*\]' for i in attrs[:-1]: #if i.strip().endswith(']') m=re.match(indexRE,i.strip()) if m: obj=getattr(obj,m.group(1))[int(m.group(2))] else: obj=getattr(obj,i) m=re.match(indexRE,attrs[-1].strip()) if m: getattr(obj,m.group(1))[int(m.group(2))]=val else: setattr(obj,attrs[-1],val) #if not hasattr(obj,attrs[-1]): raise AttributeError('%s: no such attribute: %s.'%(obj.__module__+'.'+type(obj).__name__,attrs[-1])) #setattr(obj,attrs[-1],val) # just run preprocessor in this case, plus set title, if given in Preprocessor if not inBatch(): S=pre() if pre.title: S.tags['title']=pre.title return S import os import woo,math,numpy tableFileLine=wooOptions.batchTable,wooOptions.batchLine evalParams=[] if wooOptions.batchTable and wooOptions.batchLine>=0: allTab=TableParamReader(wooOptions.batchTable).paramDict() if not wooOptions.batchLine in allTab: raise RuntimeError("Table %s doesn't contain valid line number %d"%(wooOptions.batchTable,wooOptions.batchLine)) vv=allTab[wooOptions.batchLine] # overriding things set in the #: lines of preprocessor from the table using %varName=... if len([v for v in vv.keys() if v.startswith('%')]): if preFile is None: raise RuntimeError('Unable to re-load preprocessor due to %something columns: preFile was left empty by the caller.') overrideHashPercent={} for v in list(vv.keys()): # copy list so that dictionary does not change while iterating over it if not v.startswith('%'): continue val=vv[v] if isinstance(val,str): val=eval(val,dict(woo=woo,**math.__dict__)) overrideHashPercent[v[1:]]=val log.info('Re-assigning #%% variable %s = %s'%(str(v[1:]),str(val))) vv.pop(v) # print(vv) pre=woo.core.Object.load(preFile,overrideHashPercent=overrideHashPercent) # set preprocessor parameters first for name,val in vv.items(): if name[0]=='!': continue # pseudo-variables such as !SCRIPT, !THREADS and so on if name=='title': continue if val in ('*','-',''): continue # postponed, computed later # postpone evaluation of parameters starting with = so that they can use other params if isinstance(val,str) and val.startswith('='): evalParams.append((name,val[1:])) elif isinstance(val,str) and val.startswith("'="): evalParams.append((name,val[2:])) else: log.info('OVERRIDING FROM TABLE: %s = %s'%(name,val)) nestedSetattr(pre,name,eval(val,globals(),dict(woo=woo,math=math,numpy=numpy))) # woo.unit # postponed evaluation of computable params for name,val in evalParams: log.info('OVERRIDING FROM TABLE (delayed): %s = %s'%(name,val)) nestedSetattr(pre,name,eval(val,globals(),dict(woo=woo,math=math,numpy=numpy,self=pre))) # check types, if this is a python preprocessor if hasattr(pre,'checkAttrTypes'): pre.checkAttrTypes() # run preprocessor if wooOptions.batchTable: pre.title=str(vv['title']) S=pre() # set tags from batch if wooOptions.batchTable: S.tags['line']='l%d'%wooOptions.batchLine S.tags['title']=str(vv['title']) else: S.tags['line']='default' S.tags['title']=str(preFile if preFile else '[no file]') #S.tags['idt']=(S.tags['id']+'.'+S.tags['title']).replace('/','_') #S.tags['tid']=(S.tags['title']+'.'+S.tags['id']).replace('/','_') return S
[docs]class TableParamReader(object): r"""Class for reading simulation parameters from text file. Each parameter is represented by one column, each parameter set by one line. Colums are separated by blanks (no quoting). First non-empty line contains column titles (without quotes). You may use special column named 'title' to describe this parameter set; if such colum is absent, title will be built by concatenating column names and corresponding values (``param1=34,param2=12.22,param4=foo``) * from columns ending in ``!`` (the ``!`` is not included in the column name) * from all columns, if no columns end in ``!``. * columns containing literal - (minus) will be ignored Empty lines within the file are ignored (although counted); ``#`` starts comment till the end of line. Number of blank-separated columns must be the same for all non-empty lines. A special value ``=`` can be used instead of parameter value; value from the previous non-empty line will be used instead (works recursively); in XLS, *empty* cell is treated the same as ``=``. This class is used by :obj:`woo.utils.readParamsFromTable`. >>> tryData=[ ... ['head1','important2!','head3','...','...','...','!OMP_NUM_THREADS!','abcd'], ... [1,1.1, '1','.','1','5', 1.2,1.3,], ... ['a','b','HE','AD','_','3','c','d','###','comment'], ... ['# empty line'], ... [1,'=','=','=','=','=','=','g'] ... ] >>> import woo >>> tryFile=woo.master.tmpFilename() >>> # write text >>> f1=tryFile+'.txt' >>> txt=open(f1,'w') >>> for ll in tryData: n=txt.write(' '.join([str(l) for l in ll])+'\n') # set n to suppress output in doctest under py3k >>> txt.close() >>> >>> # write xls >>> import xlwt,itertools >>> f2=tryFile+'.xls' >>> xls=xlwt.Workbook(); sheet=xls.add_sheet('test') >>> for r in range(len(tryData)): ... for c in range(len(tryData[r])): ... sheet.write(r,c,tryData[r][c]) >>> xls.save(f2) >>> >>> from pprint import * >>> pprint(TableParamReader(f1).paramDict()) {2: {'!OMP_NUM_THREADS': '1.2', 'abcd': '1.3', 'head1': '1', 'head3': '1.15', 'important2': '1.1', u'title': u'important2=1.1,OMP_NUM_THREADS=1.2'}, 3: {'!OMP_NUM_THREADS': 'c', 'abcd': 'd', 'head1': 'a', 'head3': 'HEAD_3', 'important2': 'b', u'title': u'important2=b,OMP_NUM_THREADS=c'}, 5: {'!OMP_NUM_THREADS': 'c', 'abcd': 'g', 'head1': '1', 'head3': 'HEAD_3', 'important2': 'b', u'title': u'important2=b,OMP_NUM_THREADS=c__line=5__'}} >>> pprint(TableParamReader(f2).paramDict()) {2: {u'!OMP_NUM_THREADS': '1.2', u'abcd': '1.3', u'head1': '1', u'head3': '1.15', u'important2': '1.1', u'title': u'important2=1.1,OMP_NUM_THREADS=1.2'}, 3: {u'!OMP_NUM_THREADS': 'c', u'abcd': 'd', u'head1': 'a', u'head3': 'HEAD_3', u'important2': 'b', u'title': u'important2=b,OMP_NUM_THREADS=c'}, 5: {u'!OMP_NUM_THREADS': 'c', u'abcd': 'g', u'head1': '1', u'head3': 'HEAD_3', u'important2': 'b', u'title': u'important2=b,OMP_NUM_THREADS=c__line=5__'}} """ def __init__(self,file,firstLine=-1): "Setup the reader class, read data into memory. *firstLine* determines the number of the first line; if negative, 1 is used for XLS files and 0 for text files. The reason is that spreadsheets number lines from 1 whereas text editors number lines from zero, and having the numbering the same as the usual UI for editing that format is convenient." import re if 1: if file.lower().endswith('.xls'): if firstLine<0: firstLine=1 import xlrd xls=xlrd.open_workbook(file) sheet=xls.sheet_by_index(0) maxCol=0 rows=[] # rows actually containing data (filled in the loop) for row in range(sheet.nrows): # find first non-empty and non-comment cell lastDataCol=-1 for col in range(sheet.ncols): c=sheet.cell(row,col) empty=(c.ctype in (xlrd.XL_CELL_EMPTY,xlrd.XL_CELL_BLANK) or (c.ctype==xlrd.XL_CELL_TEXT and c.value.strip()=='')) comment=(c.ctype==xlrd.XL_CELL_TEXT and re.match(r'^\s*(#.*)?$',c.value)) if comment: break # comment cancels all remaining cells on the line if not empty: lastDataCol=max(col,lastDataCol) if lastDataCol>=0: rows.append(row) maxCol=max(maxCol,lastDataCol) # if lastDataCol<maxCol: raise RuntimeError('Error in %s: all data rows should have the same number of colums; row %d has only %d columns, should have %d.'%(file,row,lastDataCol+1,maxCol+1)) # rows and cols with data cols=list(range(maxCol+1)) # print 'maxCol=%d,cols=%s'%(maxCol,cols) # iterate through cells, define rawHeadings, headings, values headings=[sheet.cell(rows[0],c).value for c in cols] #headings=[(h[:-1] if (h and h[-1]=='!') else h) for h in rawHeadings] # without trailing bangs values={} for r in rows[1:]: vv={} for c in cols: v=str(sheet.cell(r,c).value) # if not isinstance(v,str): v=str(v) # represent numbers with zero fractional part as ints, without trailing ".0" or such # XLS does not know ints # http://stackoverflow.com/questions/8825681/integers-from-excel-files-become-floats try: f=float(v) if f==int(f): v=str(int(f)) except ValueError: pass vv[c]=v values[r+firstLine]=[vv[c] for c in cols] else: if firstLine<0: firstLine=0 # text file, space separated # read file in memory, remove newlines and comments; the [''] makes lines 1-indexed with open(file,'r') as f: ll=[re.sub(r'\s*#.*','',l[:-1]) for l in ['']+f.readlines()] # usable lines are those that contain something else than just spaces usableLines=[i for i in range(len(ll)) if not re.match(r'^\s*(#.*)?$',ll[i])] headings=ll[usableLines[0]].split() # headings=[(h[:-1] if h[-1]=='!' else h) for h in rawHeadings] # copy of headings without trailing bangs (if any) # use all values of which heading has ! after its name to build up the title string # if there are none, use all columns usableLines=usableLines[1:] # and remove headindgs from usableLines values={} for l in usableLines: lSplit=ll[l].split() values[l+firstLine]=[str(lSplit[i]) for i in range(len(headings))] # # each format has to define the following: # values={lineNumber:[val,val,...],...} # values ordered the same as headings # headings=['col1title!','col2title',...] # as in the file # # replace empty cells or '=' by the previous value of the parameter lines=list(values.keys()); lines.sort() # print file,lines for i,l in enumerate(lines): for col,val in enumerate(values[l]): if val in ('=',''): try: values[l][col]=values[lines[i-1]][col] except IndexError as KeyError: raise ValueError("The = specifier on line %d, column %d, refers to nonexistent value on previous line?"%(l,col)) nCollapsed=0 for ih,h in enumerate(headings): # index of headings (headings are pruned after the loop) iv=ih-nCollapsed # index of values: changes as values are being removed if h in ('...',u'...',u'…'): nCollapsed+=1 if ih<1: raise ValueError("The ... header continuation is not allowed in the first column.") # merge adjacent cols contents for i,l in enumerate(lines): vv=values[l] collapsed=str(vv[iv-1])+str(vv[iv]) values[l]=vv[:iv-1]+[collapsed]+vv[iv+1:] headings=[h for h in headings if h not in ('...',u'...',u'…')] # prune headings with trailing bangs rawHeadings=headings headings=[(h[:-1] if h[-1]=='!' else h) for h in headings] # copy to dictionary; that is the way results are supposed to be returned dvalues={} for i,l in enumerate(lines): dvalues[l]=dict([(headings[c],str(values[l][c])) for c in range(len(headings))]) # add descriptions, but if they repeat, append line number as well if not 'title' in headings: # bangHeads=[h[:-1] for h in rawHeadings if (h and h[-1]=='!')] or headings hasBangs=sum([1 for h in rawHeadings if h[-1]=='!'])>0 descs=set() for l in lines: ddd=[] for col,head in enumerate(rawHeadings): if hasBangs and head[-1]!='!': continue val=values[l][col] if isinstance(val,str) and val.strip() in ('','-','*'): continue # default value used ddd.append(head.replace('!','')+'='+('%g'%val if isinstance(val,float) else str(val))) dd=','.join(ddd).replace("'",'').replace('"','') #dd=','.join(head.replace('!','')+'='+('%g'%values[head] if isinstance(values[l][head],float) else str(values[l][head])) for head in bangHeads if (values[l][head].strip()!='-').replace("'",'').replace('"','') if dd in descs: dd+='__line=%d__'%l dvalues[l][u'title']=dd.replace('/','_').replace('[','').replace(']','').replace('*woo.unit','') descs.add(dd) self.values=dvalues
[docs] def paramDict(self): """Return dictionary containing data from file given to constructor. Keys are line numbers (which might be non-contiguous and refer to real line numbers that one can see in text editors), values are dictionaries mapping parameter names to their values given in the file. The special value '=' has already been interpreted, ``!`` (bangs) (if any) were already removed from column titles, ``title`` column has already been added (if absent).""" return self.values
[docs]def cartProdParamTable(params,out,same=''): '''Write parameter table (as XLS) where all parameters in pp (which is a dictionary, or :obj:`python:collections.OrderedDict`) are traversed. :param same: content of repeated cellls; if ``None``, repeated cells are filled with the repeated value. Other useful values are ``'='`` and ``''`` (empty cell) :param out: XLS file to write to :param params: dictionary-like with parameter values; keys may be n-tuples, which will span multiple columns -- in that case, values must also be n-tuples, and will also span those columns :return: total number of lines written >>> import collections, woo.batch >>> pp=collections.OrderedDict() # use OrderedDict for predictable column ordering >>> pp['pattern']=['ortho','hexa'] >>> pp['radius','...']=[(r,'*woo.unit["mm"]') for r in (1,2,3)] # use continuation columns for unit specification >>> pp['gravity','...','...']=[('(0,0,',g,')') for g in (9.81,20)] # use continuation columns for concatenation of expression >>> xls=woo.master.tmpFilename()+'.xls' >>> woo.batch.cartProdParamTable(params=pp,out=xls) 12 >>> import pprint >>> pprint.pprint(TableParamReader(xls).paramDict()) {2: {u'gravity': '(0,0,9.81)', u'pattern': 'ortho', u'radius': '1*woo.unit["mm"]', u'title': u'pattern=ortho,radius=1mm,gravity=(0,0,9.81)'}, 3: {u'gravity': '(0,0,20)', u'pattern': 'ortho', u'radius': '1*woo.unit["mm"]', u'title': u'pattern=ortho,radius=1mm,gravity=(0,0,20)'}, 4: {u'gravity': '(0,0,9.81)', u'pattern': 'ortho', u'radius': '2*woo.unit["mm"]', u'title': u'pattern=ortho,radius=2mm,gravity=(0,0,9.81)'}, 5: {u'gravity': '(0,0,20)', u'pattern': 'ortho', u'radius': '2*woo.unit["mm"]', u'title': u'pattern=ortho,radius=2mm,gravity=(0,0,20)'}, 6: {u'gravity': '(0,0,9.81)', u'pattern': 'ortho', u'radius': '3*woo.unit["mm"]', u'title': u'pattern=ortho,radius=3mm,gravity=(0,0,9.81)'}, 7: {u'gravity': '(0,0,20)', u'pattern': 'ortho', u'radius': '3*woo.unit["mm"]', u'title': u'pattern=ortho,radius=3mm,gravity=(0,0,20)'}, 8: {u'gravity': '(0,0,9.81)', u'pattern': 'hexa', u'radius': '1*woo.unit["mm"]', u'title': u'pattern=hexa,radius=1mm,gravity=(0,0,9.81)'}, 9: {u'gravity': '(0,0,20)', u'pattern': 'hexa', u'radius': '1*woo.unit["mm"]', u'title': u'pattern=hexa,radius=1mm,gravity=(0,0,20)'}, 10: {u'gravity': '(0,0,9.81)', u'pattern': 'hexa', u'radius': '2*woo.unit["mm"]', u'title': u'pattern=hexa,radius=2mm,gravity=(0,0,9.81)'}, 11: {u'gravity': '(0,0,20)', u'pattern': 'hexa', u'radius': '2*woo.unit["mm"]', u'title': u'pattern=hexa,radius=2mm,gravity=(0,0,20)'}, 12: {u'gravity': '(0,0,9.81)', u'pattern': 'hexa', u'radius': '3*woo.unit["mm"]', u'title': u'pattern=hexa,radius=3mm,gravity=(0,0,9.81)'}, 13: {u'gravity': '(0,0,20)', u'pattern': 'hexa', u'radius': '3*woo.unit["mm"]', u'title': u'pattern=hexa,radius=3mm,gravity=(0,0,20)'}} .. csv-table:: Generated cartesian product parameter table (XLS) :header: pattern,radius,...,gravity,...,... ortho,1,"*woo.unit[""mm""]","(0,0,",9.81,) ,,,,20, ,2,,,9.81, ,,,,20, ,3,,,9.81, ,,,,20, hexa,1,,,9.81, ,,,,20, ,2,,,9.81, ,,,,20, ,3,,,9.81, ,,,,20, ''' import xlwt,itertools xls=xlwt.Workbook(); sheet=xls.add_sheet('product') kk=params.keys() col=0 bold=xlwt.easyxf('font: bold on') for k in kk: if isinstance(k,tuple): for l in k: sheet.write(0,col,l,style=bold) col+=1 else: sheet.write(0,col,k,style=bold) col+=1 prevVV=None for row,vv in enumerate(itertools.product(*params.values())): #print row+1,vv col=0 for i,v in enumerate(vv): if isinstance(v,tuple): for ii,w in enumerate(v): # print w #print row+1,col,w if same==None or not prevVV or prevVV[i][ii]!=w: sheet.write(row+1,col,str(w)) else: if same!='': sheet.write(row+1,col,same) col+=1 else: #print row+1,col,v if same==None or not prevVV or prevVV[i]!=v: sheet.write(row+1,col,str(v)) else: if same!='': sheet.write(row+1,col,same) col+=1 prevVV=vv xls.save(out) return row+1
TableParamReader.__doc__=TableParamReader.__doc__.replace("u'","'") cartProdParamTable.__doc__=cartProdParamTable.__doc__.replace("u'","'") if __name__=="__main__": ## this is now in the doctest as well tryData=[ ['head1','important2!','!OMP_NUM_THREADS!','abcd'], [1,1.1,1.2,1.3,], ['a','b','c','d','###','comment'], ['# empty line'], [1,'=','=','g'] ] tryFile='/tmp/try-tbl' # write text f1=tryFile+'.txt' txt=open(f1,'w') for ll in tryData: txt.write(' '.join([str(l) for l in ll])+'\n') txt.close() # write xls import xlwt,itertools f2=tryFile+'.xls' xls=xlwt.Workbook(); sheet=xls.add_sheet('test') for r in range(len(tryData)): for c in range(len(tryData[r])): sheet.write(r,c,tryData[r][c]) xls.save(f2) from pprint import * pprint(TableParamReader(f1).paramDict()) pprint(TableParamReader(f2).paramDict())