# encoding: utf-8
# for use with globals() when reading table
nan=float('nan')
from math import *
from minieigen import *
import warnings
import sys
from . import utils
log=utils.makeLog(__name__)
from wooMain import options as wooOptions
try:
from lockfile import FileLock
except ImportError:
class FileLock:
'Dummy class if the `lockfile </https://pypi.python.org/pypi/lockfile>`_ module is not importable.'
def __init__(self,f):
warnings.warn("The 'lockfile' module is not importable, '%s' will not be locked. If this file is concurrently accessed by several simulations, it may get corrupted!"%f)
def __enter__(self): pass
def __exit__(self,*args): pass
# increase every time the db format changes, to avoid errors
# applies to both sqlite and hdf5 dbs
dbFormatVersion=3
## timeout for opening the database if locked
sqliteTimeout=1000
[docs]def wait():
'If running inside a batch, start the master simulation (if not already running) and block until it stops by itself. Typically used at the end of script so that it does not finish prematurely in batch mode (the execution would be ended in such a case). Does nothing outisde of batch.'
import woo
S=woo.master.scene
if inBatch():
if not S.running: S.run()
woo.master.waitForScenes() # wait for Master (in case scene is re-assigned)
[docs]def inBatch():
'Tell whether we are running inside the batch or separately.'
import os
return bool(wooOptions.batchTable) or wooOptions.batchLine>=0
[docs]def hasBatchTable():
'Tell whether an external batch table is given or not (scripts may be run in script also without batch tables)'
return wooOptions.batchTable!=None and len(wooOptions.batchTable)>0
[docs]def mayHaveStaleLock(db):
import os.path
if not os.path.splitext(db)[-1] in ('.h5','.hdf5','.he5','.hdf'): return
return FileLock(db).is_locked()
[docs]def writeResults(scene,defaultDb='woo-results.hdf5',syncXls=True,dbFmt=None,series=None,quiet=False,postHooks=[],**kw):
'''
Write results to batch database. With *syncXls*, corresponding excel-file is re-generated.
Series is a dicionary of 1d arrays written to separate sheets in the XLS. If *series* is `None`
(default), `S.plot.data` are automatically added. All other ``**kw``
arguments are serialized in the misc field, which then appears in the main XLS sheet.
All data are serialized using json so that they can be read back in a language-independent manner.
*postHooks* is list of functions (taking a single argument - the database name) which will be called
once the database has been updated. They can be used in conjunction with :obj:`woo.batch.dbReadResults`
to write aaggregate results from all records in the database.
'''
import woo, woo.plot, woo.core
import os, os.path, datetime
import numpy
import json
import logging
S=scene
if inBatch() and hasBatchTable(): table,line,db=wooOptions.batchTable,wooOptions.batchLine,wooOptions.batchResults
else: table,line,db='',-1,(defaultDb if not wooOptions.batchResults else wooOptions.batchResults)
if not db: raise ValueError('No database to write results to (forgot to pass --batch-results?).')
newDb=not os.path.exists(db)
if not quiet: log.info('Writing results to the database %s (%s)'%(db,'new' if newDb else 'existing'))
if dbFmt==None:
ext=os.path.splitext(db)[-1]
if ext in ('.sqlite','.db',b'.sqlite',b'.db',u'.sqlite',u'.db'): dbFmt='sqlite'
elif ext in ('.h5','.hdf5','.he5','.hdf',b'.h5',b'.hdf5',b'.he4',b'.hdf',u'.h5',u'.hdf5',u'.he5',u'.hdf'): dbFmt='hdf5'
else: raise ValueError("Unable to determine database format from '"+db+"' (extension '"+ext+"'): must be *.h5, *.hdf5, *.he5, *.hdf.")
# make sure keys are unicode objects (which is what json converts to!)
unicodeTags=dict(S.tags)
# make sure series are 1d arrays
if series==None:
series={}; series.update([('plot/'+k,v) for k,v in S.plot.data.items()])
for k,v in series.items():
if isinstance(v,numpy.ndarray):
if dbFmt=='sqlite': series[k]=v.tolist() # sqlite needs lists, hdf5 is fine with numpy arrays
elif not hasattr(v,'__len__'): raise ValueError('series["%s"] not a sequence (__len__ not defined).'%k)
if dbFmt=='sqlite':
raise RuntimeError('SQLite is no longer supported.')
elif dbFmt=='hdf5':
import h5py
try:
hdf=h5py.File(db,('w' if newDb else 'a'),libver='latest')
except IOError:
import warnings
warnings.warn("Error opening HDF5 file %s, moving to %s~~corrupt and creating a new one"%(db,db))
import shutil
shutil.move(db,db+'~~corrupt')
hdf=h5py.File(db,'a',libver='latest')
with FileLock(db):
wooJSON=woo.core.WooJSONEncoder(indent=None,oneway=True)
i=0
while True:
sceneId=S.tags['id']+('' if i==0 else '~%d'%i)
if sceneId not in hdf: break
i+=1
# group for our Scene
G=hdf.create_group(sceneId)
G.attrs['formatVersion']=dbFormatVersion
G.attrs['finished']=datetime.datetime.now().replace(microsecond=0).isoformat('_')
G.attrs['batchTable']=table
G.attrs['batchTableLine']=line
G.attrs['sceneId']=S.tags['id']
G.attrs['title']=S.tags['title']
G.attrs['duration']=S.duration
G.attrs['pre']=S.pre.dumps(format='json') if S.pre else ''
G.attrs['tags']=json.dumps(unicodeTags)
G.attrs['plots']=json.dumps(S.plot.plots)
G.attrs['labels']=wooJSON.encode(dict(S.labels))
G.attrs['engines']=wooJSON.encode(list(S.engines))
G_misc=G.create_group('misc')
for k,v in kw.items(): G_misc.attrs[k]=wooJSON.encode(v)
G_series=G.create_group('series')
for k,v in series.items():
# hdf5 is smart enough to create sub-groups automatically if the name contains slashes
G_series[k]=v
hdf.close()
else: raise ValueError('*fmt* must be one of "sqlite", "hdf5", None (autodetect based on suffix)')
if syncXls:
import re
xls=db+'.xlsx'
if not quiet: log.info('Converting %s to file://%s'%(db,os.path.abspath(xls)))
dbToSpread(db,out=xls,dialect='xlsx')
for ph in postHooks: ph(db)
def _checkHdf5sim(sim):
if not 'formatVersion' in sim.attrs: raise RuntimeError('database %s: simulation %s does not define formatVersion?!')
if sim.attrs['formatVersion']!=dbFormatVersion: raise RuntimeError('database format mismatch: %s: %s/formatVersion==%s, should be %s'%(db,sim,sim.attrs['formatVersion'],dbFormatVersion))
return True
# return all series stored in the database
[docs]def dbReadResults(db,basicTypes=False):
'''Return list of dictionaries, representing database contents.
:param basicTypes: don't reconstruct Woo objects from JSON (keep those as dicts) and don't return data series as numpy arrays.
.. todo:: Nested (grouped) series are not read correctly from HDF5. Should be fixed either by flattening the hiearchy (like we do in :obj:`dbToSpread` and stuffing it into returned dict; or by reflecting the hierarchy in the dict returned.
'''
import numpy, sqlite3, json, woo.core
try:
import h5py, h5py.h5f
if not h5py.h5f.is_hdf5(bytes(db,'utf-8')): raise IOError('Not a HDF5 file.')
hdf=h5py.File(db,'r',libver='latest')
except (ImportError,IOError):
# connect always succeeds, as it seems, even if the type is not sqlite3 db
# in that case, it will fail at conn.execute below
conn=sqlite3.connect(db,timeout=sqliteTimeout,detect_types=sqlite3.PARSE_DECLTYPES)
hdf=None
if hdf:
with FileLock(db):
ret=[]
# iterate over simulations
for simId in hdf:
sim=hdf[simId]
_checkHdf5sim(sim)
rowDict={}
for att in sim.attrs:
if att in ('pre','tags','plots','engines','labels'):
val=sim.attrs[att]
if hasattr(val,'__len__') and len(val)==0: continue
rowDict[att]=woo.core.WooJSONDecoder(onError='warn').decode(val)
else: rowDict[att]=sim.attrs[att]
rowDict['misc'],rowDict['series']={},{}
for misc in sim['misc'].attrs: rowDict['misc'][misc]=woo.core.WooJSONDecoder(onError='warn').decode(sim['misc'].attrs[misc])
for s in sim['series']:
try:
# This was sometimes causing trouble (why?)
#
# File "/usr/lib/python2.7/dist-packages/h5py/_hl/group.py", line 153, in __getitem__
# oid = h5o.open(self.id, self._e(name), lapl=self._lapl)
# File "/usr/lib/python2.7/dist-packages/h5py/_hl/base.py", line 113, in _e
# name = name.encode('ascii')
# AttributeError: 'int' object has no attribute 'encode'
rowDict['series'][s]=numpy.array(sim['series'][s])
except AttributeError:
# should we fail, do the conversion indirectly, through list (perhaps slower)
rowDict['series'][s]=numpy.array(list(sim['series'][s]))
ret.append(rowDict)
## hdf.close()
return ret
else:
# sqlite
conn.row_factory=sqlite3.Row
ret=[]
for i,row in enumerate(conn.execute('SELECT * FROM batch ORDER BY finished')):
rowDict={}
for key in row.keys():
# json-encoded fields
if key in ('pre','tags','plots','misc'):
if basicTypes: val=json.loads(row[key])
else: val=woo.core.WooJSONDecoder(onError='warn').decode(row[key])
elif key=='series':
series=json.loads(row[key])
assert type(series)==dict
if basicTypes: val=series
else: val=dict([(k,numpy.array(v)) for k,v in series.items()])
else:
val=row[key]
if basicTypes and key=='finished': val=val.isoformat(sep='_')
rowDict[key]=val
ret.append(rowDict)
conn.close() # don't occupy the db longer than necessary
return ret
[docs]def dbToJSON(db,**kw):
'''Return simulation database as JSON string.
:param kw: additional arguments passed to `json.dumps <http://docs.python.org/3/library/json.html#json.dumps>`_.
'''
import woo.core
return woo.core.WooJSONEncoder(indent=None,oneway=True).encode(dbReadResults(db,basicTypes=True),**kw)
[docs]def dbToSpread(db,out=None,dialect='xls',rows=False,series=True,ignored=('plotData','tags'),sortFirst=('title','batchtable','batchTableLine','finished','sceneId','duration'),selector=None):
'''
Select simulation results (using *selector*) stored in batch database *db*, flatten data for each simulation,
and dump the data in the CSV format (using *dialect*: 'excel', 'excel-tab', 'xls') into file *out* (standard output
if not given). If *rows*, every simulation is saved into one row of the CSV file (i.e. attributes are in columns),
otherwise each simulation corresponds to one column and each attribute is in one row.
If *out* ends with '.xls', the 'xls' dialect is forced regardless of the value given. The 'xls' format will refuse to write to standard output (*out* must be given).
*ignored* fields are used to exclude large data from the dump: either database column of that name, or any attribute
of that name. Attributes are flattened and path separated with '.'.
*series* determines whether the `series` field will be written to a separate sheet, named by the sceneId. This is only supported with the `xls` dialect and raises error otherwise (unless `series` field is empty).
Fields are sorted in their natural order (i.e. alphabetically, but respecting numbers), with *sortFirst* fields coming at the beginning.
'''
def flatten(obj,path='',sep='.',ret=None):
'''Flatten possibly nested structure of dictionaries and lists (such as data decoded from JSON).
Returns dictionary, where each object is denoted by its path, paths being separated by *sep*.
Unicode strings are encoded to utf-8 strings (with encoding errors ignored) so that the result
can be written with the csv module.
Adapted from http://stackoverflow.com/questions/8477550/flattening-a-list-of-dicts-of-lists-of-dicts-etc-of-unknown-depth-in-python-n .
'''
if ret is None: ret={}
if isinstance(obj,list):
for i,item in enumerate(obj): flatten(item,(path+sep if path else '')+str(i),ret=ret)
elif isinstance(obj,dict):
for key,value in list(obj.items()): flatten(value,(path+sep if path else '')+str(key),ret=ret)
elif isinstance(obj,str):
ret[path]=str(obj)
else:
# other values passed as they are
ret[path]=obj
return ret
def natural_key(string_):
'''Return key for natural sorting (recognizing consecutive numerals as numbers):
http://www.codinghorror.com/blog/archives/001018.html
http://stackoverflow.com/a/3033342/761090
'''
import re
return [int(s) if s.isdigit() else s for s in re.split(r'(\d+)', string_)]
def fixSheetname(n):
# truncate the name to 31 characters, otherwise there would be exception
# see https://groups.google.com/forum/?fromgroups=#!topic/python-excel/QK4iJrPDSB8
if len(n)>30: n=u'…'+n[-29:]
# invald characters (is that documented somewhere?? those are the only ones I found manually)
n=n.replace('[','_').replace(']','_').replace('*','_').replace(':','_').replace('/','_')
return n
import sqlite3,json,sys,csv,warnings,numpy,operator
allData={}
# lowercase
ignored=[i.lower() for i in ignored]
sortFirst=[sf.lower() for sf in sortFirst]
seriesData={}
# open db and get rows
try:
import h5py, h5py.h5f
if isinstance(db,str): dbBytes=bytes(db,'utf-8')
else: dbBytes=db
# check first, to avoid warning from h5py.File in stderr
if not h5py.h5f.is_hdf5(dbBytes): raise IOError('Not a HDF5 file.')
hdf=h5py.File(db,'r',libver='latest')
except (ImportError,IOError):
# connect always succeeds, as it seems, even if the type is not sqlite3 db
# in that case, it will fail at conn.execute below
conn=sqlite3.connect(db,timeout=sqliteTimeout,detect_types=sqlite3.PARSE_DECLTYPES)
hdf=None
if hdf:
with FileLock(db):
ret=[]
if selector: warnings.warn('selector parameter ignored, since the file is HDF5 (not SQLite)')
# first loop: sort by batchTable, batchTableLine, finished
# for i,simId in enumerate(hdf):
sortAttrs=('batchTable','batchTableLine','finished')
# sort simulation ids by attribute tuples:
# http://stackoverflow.com/a/6620187/761090
simIds=list(zip(*sorted(zip([s for s in hdf if _checkHdf5sim(hdf[s])],[tuple([natural_key(str(hdf[s].attrs[a])) for a in sortAttrs if a in hdf[s].attrs]) for s in hdf if _checkHdf5sim(hdf[s])]),key=operator.itemgetter(1))))[0]
# print simIds
# iterate over simulations
for i,simId in enumerate(simIds):
sim=hdf[simId]
_checkHdf5sim(sim)
rowDict={}
for att in sim.attrs:
val=sim.attrs[att]
try: val=json.loads(val)
except: pass
rowDict[att]=val
rowDict['misc']={}
for att in sim['misc'].attrs:
val=sim['misc'].attrs[att]
try: val=json.loads(val)
except: pass
rowDict['misc'][att]=val
series={}
# we have to flatten series, since it may contain nested hdf5 groups
# http://stackoverflow.com/a/6036037/761090
def flat_group_helper(prefix,g):
if prefix: prefix+='/'
for name,sub in g.items():
if isinstance(sub,h5py.Group):
for j in flat_group_helper(prefix+name,sub): yield j
else: yield (prefix+name,sub)
def flat_group(g):
import collections
return collections.OrderedDict(flat_group_helper('',g))
# print flat_group(sim['series']).keys()
for sName,sVal in flat_group(sim['series']).items():
# print sName
series[sName]=numpy.array(sVal)
# attributes are bytes, so '_' must be b'_' to avoid TypeError (can't concat bytes to str)
seriesData[str(sim.attrs['title'])+str('_')+str(sim.attrs['sceneId'])]=series
# same as for sqlite3 below
flat=flatten(rowDict)
for key,val in flat.items():
if key.lower() in ignored: continue
if key not in allData: allData[key]=[None]*i+[val]
else:
allData[key]+=[None]*(i-len(allData[key]))+[val]
## hdf.close()
else:
conn=sqlite3.connect(db,timeout=sqliteTimeout,detect_types=sqlite3.PARSE_DECLTYPES)
conn.row_factory=sqlite3.Row
for i,row in enumerate(conn.execute(selector if selector!=None else 'SELECT * FROM batch ORDER BY title')):
rowDict={}
for key in row.keys():
if key.lower() in ignored: continue
val=row[key]
# decode val from json, if it fails, leave it alone
try: val=json.loads(val)
except: pass
if key!='series': rowDict[key]=val
elif series: seriesData[row['title']+'_'+row['sceneId']]=val # set only if allowed
flat=flatten(rowDict)
for key,val in flat.items():
if key.lower() in ignored: continue
if key not in allData: allData[key]=[None]*i+[val]
else:
allData[key]+=[None]*(i-len(allData[key]))+[val]
conn.close() # don't occupy the db longer than necessary
fields=sorted(allData.keys(),key=natural_key)
# apply sortFirst
fieldsLower=[f.lower() for f in fields]; fields0=fields[:] # these two have always same order
for sf in reversed(sortFirst): # reverse so that the order of sortFirst is respected
if sf in fieldsLower: # lowercased name should be put to the front
field=fields0[fieldsLower.index(sf)] # get the case-sensitive one
fields=[field]+[f for f in fields if f!=field] # rearrange
xls=dialect.lower()=='xls' or (out and out.endswith('.xls'))
xlsx=dialect.lower()=='xlsx' or (out and out.endswith('.xlsx'))
if xls or xlsx:
if out==None: raise ValueError('The *out* parameter must be given when using the xls/xlsx dialects (refusing to write binary to standard output).')
# http://scienceoss.com/write-excel-files-with-python-using-xlwt/
# http://www.youlikeprogramming.com/2011/04/examples-generating-excel-documents-using-pythons-xlwt/
import urllib.parse
import datetime
if xls:
import xlwt
wbk=xlwt.Workbook('utf-8')
sheet=wbk.add_sheet(fixSheetname(db)) # truncate if too long, see below
# datetime style
datetimeStyle=xlwt.XFStyle()
datetimeStyle.num_format_str='yyyy-mm-dd_hh:mm:ss' # http://office.microsoft.com/en-us/excel-help/number-format-codes-HP005198679.aspx
# header style
font=xlwt.Font()
font.bold=True
headStyle=xlwt.XFStyle()
headStyle.font=font
hrefStyle=xlwt.easyxf('font: underline single')
# default style
defaultStyle=xlwt.Style.default_style
else:
import xlsxwriter
wbk=xlsxwriter.Workbook(out)
sheet=wbk.add_worksheet(fixSheetname(db))
headStyle=wbk.add_format({'bold':True})
datetimeStyle=wbk.add_format({'num_format':'yyyy-mm-dd_hh:mm:ss'})
hrefStyle=wbk.add_format({'underline':1})
defaultStyle=None
# cell styling
styleDict={datetime.datetime:datetimeStyle} # add styles for other custom types here
# normal and transposed setters
if rows: setCell=lambda s,r,c,data,style: write_cell(s,r,c,data,style)
else: setCell=lambda s,r,c,data,style: write_cell(s,c,r,data,style)
def write_cell(s,c,r,data,style):
hyperlink=isinstance(data,(str,str)) and (data.startswith('file://') or data.startswith('http://') or data.startswith('https://'))
if hyperlink:
if xls:
data=data.replace('"',"'")
data=xlwt.Formula('HYPERLINK("%s","%s")'%(urllib.parse.quote(data,safe=':/'),data))
style=hrefStyle
s.write(c,r,data,style)
else:
s.write_url(c,r,data,string=data)
else:
if isinstance(data,numpy.int64): data=int(data)
elif isinstance(data,numpy.float64): data=float(data)
# XLSX does not handle NaN/Inf (yet)
if not xls and isinstance(data,float) and (isinf(data) or isnan(data)): s.write(c,r,str(data),style)
else: s.write(c,r,data,style)
for col,field in enumerate(fields):
# headers
setCell(sheet,0,col,field,headStyle)
# data
for row,val in enumerate(allData[field]):
style=styleDict.get(type(val),defaultStyle)
setCell(sheet,row+1,col,val,style)
# print row,type(val),val
# save data series
if seriesData:
for sheetName,dic in seriesData.items():
if xls: sheet=wbk.add_sheet(fixSheetname(sheetName))
else: sheet=wbk.add_worksheet(fixSheetname(sheetName))
# perhaps write some header here
for col,colName in enumerate(sorted(dic.keys())):
if xls and col>255:
log.warning('Rhe data being converted to XLS (%s) contain %d columns, which is more than 255, the limit of the XLS format. Extra data will be discarded from the XLS output. Use .xlsx to overcome this limitation.'%(out,len(dic)))
break
sheet.write(0,col,colName,headStyle)
rowOffset=1 # length of header
for row in range(0,len(dic[colName])):
if xls and row+rowOffset>65535:
log.warning('the data being converted to XLS (%s) contain %d rows (with %d header rows), which is more than 65535, the limit of the XLS file format. Extra data will be discarded from the XLS output. Use .xlsx to overcome this limitation.'%(out,len(dic[colName]),rowOffset))
break
val=dic[colName][row]
if xlsx and (isnan(val) or isinf(val)): val=str(val)
sheet.write(row+rowOffset,col,val)
if xls: wbk.save(out)
else: wbk.close()
else:
if seriesData: raise RuntimeError('Data series can only be written with the *xls* dialect')
outt=(open(out,'w') if out else sys.stdout)
import datetime
def asStr(x):
'Customize string conversions for some types'
if type(x)==datetime.datetime: return x.strftime('%Y-%m-%d_%H:%M:%S') # as ISO, but without microseconds
return x
# write into CSV
if rows:
# one attribute per column
writer=csv.DictWriter(outt,fieldnames=fields,dialect=dialect)
writer.writeheader()
for i in range(0,len(allData[fields[0]])):
writer.writerow(dict([(k,asStr(allData[k][i])) for k in allData.keys()]))
else:
# one attribute per row
writer=csv.writer(outt,dialect=dialect)
for a in fields: writer.writerow([a]+[asStr(b) for b in allData[a]])
[docs]def readParamsFromTable(scene,under='table',noTableOk=True,unknownOk=False,**kw):
"""
Read parameters from a file and assign them to :obj:`woo.core.Scene.lab` under the ``under`` pseudo-module (e.g. ``Scene.lab.table.foo`` and so on. This function is used for scripts (as opposed to preprocessors) running in a batch. The file format is described in :obj:`TableParamReader` (CSV or XLS).
Assigned tags (the ``title`` column is synthesized if absent,see :obj:`woo.utils.TableParamReader`)::
S=woo.master.scene
S.tags['title']=… # assigns the title column; might be synthesized
S.tags['params']="name1=val1,name2=val2,…" # all explicitly assigned parameters
S.tags['defaultParams']="unassignedName1=defaultValue1,…" # parameters that were left at their defaults
S.tags['d.id']=s.tags['id']+'.'+s.tags['title']
S.tags['id.d']=s.tags['title']+'.'+s.tags['id']
:param tableFile: text file (with one value per blank-separated columns)
:param under: name of pseudo-module under ``S.lab`` to save all values to (``table`` by default)
:param int tableLine: number of line where to get the values from
:param bool noTableOk: if False, raise exception if the file cannot be open; use default values otherwise
:param bool unknownOk: do not raise exception if unknown column name is found in the file, and assign it as well
:return: None
"""
tagsParams=[]
# dictParams is what eventually ends up in S.lab.table.* (default+specified values)
dictDefaults,dictParams={},{}
import os, builtins,re,math,woo
# create the S.lab.table pseudo-module
S=scene
S.lab._newModule(under)
pseudoMod=getattr(S.lab,under)
if not inBatch() or not hasBatchTable():
if not noTableOk: raise EnvironmentError("Batch options not defined (and required; pass noTableOk=True if they are not)")
S.tags['line']='l!'
else:
tableFile,tableLine=wooOptions.batchTable,wooOptions.batchLine
if tableFile=='':
if not noTableOk: raise RuntimeError("No table specified in batch options, but noTableOk was not given.")
else: return
allTab=TableParamReader(tableFile).paramDict()
if tableLine not in allTab: raise RuntimeError("Table %s doesn't contain valid line number %d"%(tableFile,tableLine))
vv=allTab[tableLine]
S.tags['line']='l%d'%tableLine
S.tags['title']=str(vv['title'])
#S.tags['idt']=S.tags['id']+'.'+S.tags['title'];
#S.tags['tid']=S.tags['title']+'.'+S.tags['id']
# assign values specified in the table to python vars
# !something cols are skipped, those are env vars we don't treat at all (they are contained in title, though)
for col in vv.keys():
if col=='title' or col[0]=='!': continue
if col not in kw.keys() and (not unknownOk): raise NameError("Parameter `%s' has no default value assigned"%col)
if vv[col]=='*': vv[col]=kw[col] # use default value for * in the table
elif vv[col]=='-': continue # skip this column
elif col in kw.keys(): kw.pop(col) # remove the var from kw, so that it contains only those that were default at the end of this loop
#print 'ASSIGN',col,vv[col]
tagsParams+=['%s=%s'%(col,vv[col])];
# when reading from XLS, data might be numbers; use eval only for strings, otherwise use the thing itself
dictParams[col]=eval(vv[col],dict(woo=woo,**math.__dict__)) if isinstance(vv[col],str) else vv[col]
# assign remaining (default) keys to python vars
defaults=[]
for k in kw.keys():
dictDefaults[k]=kw[k]
defaults+=["%s=%s"%(k,kw[k])];
pseudoMod.defaultParams_=",".join(defaults)
pseudoMod.explicitParams_=",".join(tagsParams)
# save all vars to the pseudo-module
dictDefaults.update(dictParams)
for k,v in dictDefaults.items():
setattr(pseudoMod,k,v)
return None
[docs]def runPreprocessor(pre,preFile=None):
"""Execute given :obj:`Preprocessor <woo.core.Preprocessor>`, modifying its attributes from batch (if running in batch). Each column from the batch table (except of environment variables starting with ``!``) must correspond to a preprocessor's attribute.
Nested attributes are allowed, e.g. with :obj:`woo.pre.horse.FallingHorse`, a column named ``mat.tanPhi`` will modify horse's material's friction angle, using the default material object.
"""
def nestedSetattr(obj,attr,val):
import re
attrs=attr.split(".")
indexRE=r'([a-zA-Z_0-9]+)\s*\[\s*(-?[0-9]+)\s*\]'
for i in attrs[:-1]:
#if i.strip().endswith(']')
m=re.match(indexRE,i.strip())
if m: obj=getattr(obj,m.group(1))[int(m.group(2))]
else: obj=getattr(obj,i)
m=re.match(indexRE,attrs[-1].strip())
if m: getattr(obj,m.group(1))[int(m.group(2))]=val
else: setattr(obj,attrs[-1],val)
#if not hasattr(obj,attrs[-1]): raise AttributeError('%s: no such attribute: %s.'%(obj.__module__+'.'+type(obj).__name__,attrs[-1]))
#setattr(obj,attrs[-1],val)
# just run preprocessor in this case, plus set title, if given in Preprocessor
if not inBatch():
S=pre()
if pre.title:
S.tags['title']=pre.title
return S
import os
import woo,math,numpy
tableFileLine=wooOptions.batchTable,wooOptions.batchLine
evalParams=[]
if wooOptions.batchTable and wooOptions.batchLine>=0:
allTab=TableParamReader(wooOptions.batchTable).paramDict()
if not wooOptions.batchLine in allTab: raise RuntimeError("Table %s doesn't contain valid line number %d"%(wooOptions.batchTable,wooOptions.batchLine))
vv=allTab[wooOptions.batchLine]
# overriding things set in the #: lines of preprocessor from the table using %varName=...
if len([v for v in vv.keys() if v.startswith('%')]):
if preFile is None: raise RuntimeError('Unable to re-load preprocessor due to %something columns: preFile was left empty by the caller.')
overrideHashPercent={}
for v in list(vv.keys()): # copy list so that dictionary does not change while iterating over it
if not v.startswith('%'): continue
val=vv[v]
if isinstance(val,str): val=eval(val,dict(woo=woo,**math.__dict__))
overrideHashPercent[v[1:]]=val
log.info('Re-assigning #%% variable %s = %s'%(str(v[1:]),str(val)))
vv.pop(v)
# print(vv)
pre=woo.core.Object.load(preFile,overrideHashPercent=overrideHashPercent)
# set preprocessor parameters first
for name,val in vv.items():
if name[0]=='!': continue # pseudo-variables such as !SCRIPT, !THREADS and so on
if name=='title': continue
if val in ('*','-',''): continue # postponed, computed later
# postpone evaluation of parameters starting with = so that they can use other params
if isinstance(val,str) and val.startswith('='): evalParams.append((name,val[1:]))
elif isinstance(val,str) and val.startswith("'="): evalParams.append((name,val[2:]))
else:
log.info('OVERRIDING FROM TABLE: %s = %s'%(name,val))
nestedSetattr(pre,name,eval(val,globals(),dict(woo=woo,math=math,numpy=numpy))) # woo.unit
# postponed evaluation of computable params
for name,val in evalParams:
log.info('OVERRIDING FROM TABLE (delayed): %s = %s'%(name,val))
nestedSetattr(pre,name,eval(val,globals(),dict(woo=woo,math=math,numpy=numpy,self=pre)))
# check types, if this is a python preprocessor
if hasattr(pre,'checkAttrTypes'): pre.checkAttrTypes()
# run preprocessor
if wooOptions.batchTable: pre.title=str(vv['title'])
S=pre()
# set tags from batch
if wooOptions.batchTable:
S.tags['line']='l%d'%wooOptions.batchLine
S.tags['title']=str(vv['title'])
else:
S.tags['line']='default'
S.tags['title']=str(preFile if preFile else '[no file]')
#S.tags['idt']=(S.tags['id']+'.'+S.tags['title']).replace('/','_')
#S.tags['tid']=(S.tags['title']+'.'+S.tags['id']).replace('/','_')
return S
[docs]class TableParamReader(object):
r"""Class for reading simulation parameters from text file.
Each parameter is represented by one column, each parameter set by one line. Colums are separated by blanks (no quoting).
First non-empty line contains column titles (without quotes).
You may use special column named 'title' to describe this parameter set;
if such colum is absent, title will be built by concatenating column names and corresponding values (``param1=34,param2=12.22,param4=foo``)
* from columns ending in ``!`` (the ``!`` is not included in the column name)
* from all columns, if no columns end in ``!``.
* columns containing literal - (minus) will be ignored
Empty lines within the file are ignored (although counted); ``#`` starts comment till the end of line. Number of blank-separated columns must be the same for all non-empty lines.
A special value ``=`` can be used instead of parameter value; value from the previous non-empty line will be used instead (works recursively); in XLS, *empty* cell is treated the same as ``=``.
This class is used by :obj:`woo.utils.readParamsFromTable`.
>>> tryData=[
... ['head1','important2!','head3','...','...','...','!OMP_NUM_THREADS!','abcd'],
... [1,1.1, '1','.','1','5', 1.2,1.3,],
... ['a','b','HE','AD','_','3','c','d','###','comment'],
... ['# empty line'],
... [1,'=','=','=','=','=','=','g']
... ]
>>> import woo
>>> tryFile=woo.master.tmpFilename()
>>> # write text
>>> f1=tryFile+'.txt'
>>> txt=open(f1,'w')
>>> for ll in tryData: n=txt.write(' '.join([str(l) for l in ll])+'\n') # set n to suppress output in doctest under py3k
>>> txt.close()
>>>
>>> # write xls
>>> import xlwt,itertools
>>> f2=tryFile+'.xls'
>>> xls=xlwt.Workbook(); sheet=xls.add_sheet('test')
>>> for r in range(len(tryData)):
... for c in range(len(tryData[r])):
... sheet.write(r,c,tryData[r][c])
>>> xls.save(f2)
>>>
>>> from pprint import *
>>> pprint(TableParamReader(f1).paramDict())
{2: {'!OMP_NUM_THREADS': '1.2',
'abcd': '1.3',
'head1': '1',
'head3': '1.15',
'important2': '1.1',
u'title': u'important2=1.1,OMP_NUM_THREADS=1.2'},
3: {'!OMP_NUM_THREADS': 'c',
'abcd': 'd',
'head1': 'a',
'head3': 'HEAD_3',
'important2': 'b',
u'title': u'important2=b,OMP_NUM_THREADS=c'},
5: {'!OMP_NUM_THREADS': 'c',
'abcd': 'g',
'head1': '1',
'head3': 'HEAD_3',
'important2': 'b',
u'title': u'important2=b,OMP_NUM_THREADS=c__line=5__'}}
>>> pprint(TableParamReader(f2).paramDict())
{2: {u'!OMP_NUM_THREADS': '1.2',
u'abcd': '1.3',
u'head1': '1',
u'head3': '1.15',
u'important2': '1.1',
u'title': u'important2=1.1,OMP_NUM_THREADS=1.2'},
3: {u'!OMP_NUM_THREADS': 'c',
u'abcd': 'd',
u'head1': 'a',
u'head3': 'HEAD_3',
u'important2': 'b',
u'title': u'important2=b,OMP_NUM_THREADS=c'},
5: {u'!OMP_NUM_THREADS': 'c',
u'abcd': 'g',
u'head1': '1',
u'head3': 'HEAD_3',
u'important2': 'b',
u'title': u'important2=b,OMP_NUM_THREADS=c__line=5__'}}
"""
def __init__(self,file,firstLine=-1):
"Setup the reader class, read data into memory. *firstLine* determines the number of the first line; if negative, 1 is used for XLS files and 0 for text files. The reason is that spreadsheets number lines from 1 whereas text editors number lines from zero, and having the numbering the same as the usual UI for editing that format is convenient."
import re
if 1:
if file.lower().endswith('.xls'):
if firstLine<0: firstLine=1
import xlrd
xls=xlrd.open_workbook(file)
sheet=xls.sheet_by_index(0)
maxCol=0
rows=[] # rows actually containing data (filled in the loop)
for row in range(sheet.nrows):
# find first non-empty and non-comment cell
lastDataCol=-1
for col in range(sheet.ncols):
c=sheet.cell(row,col)
empty=(c.ctype in (xlrd.XL_CELL_EMPTY,xlrd.XL_CELL_BLANK) or (c.ctype==xlrd.XL_CELL_TEXT and c.value.strip()==''))
comment=(c.ctype==xlrd.XL_CELL_TEXT and re.match(r'^\s*(#.*)?$',c.value))
if comment: break # comment cancels all remaining cells on the line
if not empty: lastDataCol=max(col,lastDataCol)
if lastDataCol>=0:
rows.append(row)
maxCol=max(maxCol,lastDataCol)
# if lastDataCol<maxCol: raise RuntimeError('Error in %s: all data rows should have the same number of colums; row %d has only %d columns, should have %d.'%(file,row,lastDataCol+1,maxCol+1))
# rows and cols with data
cols=list(range(maxCol+1))
# print 'maxCol=%d,cols=%s'%(maxCol,cols)
# iterate through cells, define rawHeadings, headings, values
headings=[sheet.cell(rows[0],c).value for c in cols]
#headings=[(h[:-1] if (h and h[-1]=='!') else h) for h in rawHeadings] # without trailing bangs
values={}
for r in rows[1:]:
vv={}
for c in cols:
v=str(sheet.cell(r,c).value)
# if not isinstance(v,str): v=str(v)
# represent numbers with zero fractional part as ints, without trailing ".0" or such
# XLS does not know ints
# http://stackoverflow.com/questions/8825681/integers-from-excel-files-become-floats
try:
f=float(v)
if f==int(f): v=str(int(f))
except ValueError: pass
vv[c]=v
values[r+firstLine]=[vv[c] for c in cols]
else:
if firstLine<0: firstLine=0
# text file, space separated
# read file in memory, remove newlines and comments; the [''] makes lines 1-indexed
with open(file,'r') as f: ll=[re.sub(r'\s*#.*','',l[:-1]) for l in ['']+f.readlines()]
# usable lines are those that contain something else than just spaces
usableLines=[i for i in range(len(ll)) if not re.match(r'^\s*(#.*)?$',ll[i])]
headings=ll[usableLines[0]].split()
# headings=[(h[:-1] if h[-1]=='!' else h) for h in rawHeadings] # copy of headings without trailing bangs (if any)
# use all values of which heading has ! after its name to build up the title string
# if there are none, use all columns
usableLines=usableLines[1:] # and remove headindgs from usableLines
values={}
for l in usableLines:
lSplit=ll[l].split()
values[l+firstLine]=[str(lSplit[i]) for i in range(len(headings))]
#
# each format has to define the following:
# values={lineNumber:[val,val,...],...} # values ordered the same as headings
# headings=['col1title!','col2title',...] # as in the file
#
# replace empty cells or '=' by the previous value of the parameter
lines=list(values.keys()); lines.sort()
# print file,lines
for i,l in enumerate(lines):
for col,val in enumerate(values[l]):
if val in ('=',''):
try:
values[l][col]=values[lines[i-1]][col]
except IndexError as KeyError:
raise ValueError("The = specifier on line %d, column %d, refers to nonexistent value on previous line?"%(l,col))
nCollapsed=0
for ih,h in enumerate(headings): # index of headings (headings are pruned after the loop)
iv=ih-nCollapsed # index of values: changes as values are being removed
if h in ('...',u'...',u'…'):
nCollapsed+=1
if ih<1: raise ValueError("The ... header continuation is not allowed in the first column.")
# merge adjacent cols contents
for i,l in enumerate(lines):
vv=values[l]
collapsed=str(vv[iv-1])+str(vv[iv])
values[l]=vv[:iv-1]+[collapsed]+vv[iv+1:]
headings=[h for h in headings if h not in ('...',u'...',u'…')]
# prune headings with trailing bangs
rawHeadings=headings
headings=[(h[:-1] if h[-1]=='!' else h) for h in headings]
# copy to dictionary; that is the way results are supposed to be returned
dvalues={}
for i,l in enumerate(lines):
dvalues[l]=dict([(headings[c],str(values[l][c])) for c in range(len(headings))])
# add descriptions, but if they repeat, append line number as well
if not 'title' in headings:
# bangHeads=[h[:-1] for h in rawHeadings if (h and h[-1]=='!')] or headings
hasBangs=sum([1 for h in rawHeadings if h[-1]=='!'])>0
descs=set()
for l in lines:
ddd=[]
for col,head in enumerate(rawHeadings):
if hasBangs and head[-1]!='!': continue
val=values[l][col]
if isinstance(val,str) and val.strip() in ('','-','*'): continue # default value used
ddd.append(head.replace('!','')+'='+('%g'%val if isinstance(val,float) else str(val)))
dd=','.join(ddd).replace("'",'').replace('"','')
#dd=','.join(head.replace('!','')+'='+('%g'%values[head] if isinstance(values[l][head],float) else str(values[l][head])) for head in bangHeads if (values[l][head].strip()!='-').replace("'",'').replace('"','')
if dd in descs: dd+='__line=%d__'%l
dvalues[l][u'title']=dd.replace('/','_').replace('[','').replace(']','').replace('*woo.unit','')
descs.add(dd)
self.values=dvalues
[docs] def paramDict(self):
"""Return dictionary containing data from file given to constructor. Keys are line numbers (which might be non-contiguous and refer to real line numbers that one can see in text editors), values are dictionaries mapping parameter names to their values given in the file. The special value '=' has already been interpreted, ``!`` (bangs) (if any) were already removed from column titles, ``title`` column has already been added (if absent)."""
return self.values
[docs]def cartProdParamTable(params,out,same=''):
'''Write parameter table (as XLS) where all parameters in pp (which is a dictionary, or :obj:`python:collections.OrderedDict`) are traversed.
:param same: content of repeated cellls; if ``None``, repeated cells are filled with the repeated value. Other useful values are ``'='`` and ``''`` (empty cell)
:param out: XLS file to write to
:param params: dictionary-like with parameter values; keys may be n-tuples, which will span multiple columns -- in that case, values must also be n-tuples, and will also span those columns
:return: total number of lines written
>>> import collections, woo.batch
>>> pp=collections.OrderedDict() # use OrderedDict for predictable column ordering
>>> pp['pattern']=['ortho','hexa']
>>> pp['radius','...']=[(r,'*woo.unit["mm"]') for r in (1,2,3)] # use continuation columns for unit specification
>>> pp['gravity','...','...']=[('(0,0,',g,')') for g in (9.81,20)] # use continuation columns for concatenation of expression
>>> xls=woo.master.tmpFilename()+'.xls'
>>> woo.batch.cartProdParamTable(params=pp,out=xls)
12
>>> import pprint
>>> pprint.pprint(TableParamReader(xls).paramDict())
{2: {u'gravity': '(0,0,9.81)',
u'pattern': 'ortho',
u'radius': '1*woo.unit["mm"]',
u'title': u'pattern=ortho,radius=1mm,gravity=(0,0,9.81)'},
3: {u'gravity': '(0,0,20)',
u'pattern': 'ortho',
u'radius': '1*woo.unit["mm"]',
u'title': u'pattern=ortho,radius=1mm,gravity=(0,0,20)'},
4: {u'gravity': '(0,0,9.81)',
u'pattern': 'ortho',
u'radius': '2*woo.unit["mm"]',
u'title': u'pattern=ortho,radius=2mm,gravity=(0,0,9.81)'},
5: {u'gravity': '(0,0,20)',
u'pattern': 'ortho',
u'radius': '2*woo.unit["mm"]',
u'title': u'pattern=ortho,radius=2mm,gravity=(0,0,20)'},
6: {u'gravity': '(0,0,9.81)',
u'pattern': 'ortho',
u'radius': '3*woo.unit["mm"]',
u'title': u'pattern=ortho,radius=3mm,gravity=(0,0,9.81)'},
7: {u'gravity': '(0,0,20)',
u'pattern': 'ortho',
u'radius': '3*woo.unit["mm"]',
u'title': u'pattern=ortho,radius=3mm,gravity=(0,0,20)'},
8: {u'gravity': '(0,0,9.81)',
u'pattern': 'hexa',
u'radius': '1*woo.unit["mm"]',
u'title': u'pattern=hexa,radius=1mm,gravity=(0,0,9.81)'},
9: {u'gravity': '(0,0,20)',
u'pattern': 'hexa',
u'radius': '1*woo.unit["mm"]',
u'title': u'pattern=hexa,radius=1mm,gravity=(0,0,20)'},
10: {u'gravity': '(0,0,9.81)',
u'pattern': 'hexa',
u'radius': '2*woo.unit["mm"]',
u'title': u'pattern=hexa,radius=2mm,gravity=(0,0,9.81)'},
11: {u'gravity': '(0,0,20)',
u'pattern': 'hexa',
u'radius': '2*woo.unit["mm"]',
u'title': u'pattern=hexa,radius=2mm,gravity=(0,0,20)'},
12: {u'gravity': '(0,0,9.81)',
u'pattern': 'hexa',
u'radius': '3*woo.unit["mm"]',
u'title': u'pattern=hexa,radius=3mm,gravity=(0,0,9.81)'},
13: {u'gravity': '(0,0,20)',
u'pattern': 'hexa',
u'radius': '3*woo.unit["mm"]',
u'title': u'pattern=hexa,radius=3mm,gravity=(0,0,20)'}}
.. csv-table:: Generated cartesian product parameter table (XLS)
:header: pattern,radius,...,gravity,...,...
ortho,1,"*woo.unit[""mm""]","(0,0,",9.81,)
,,,,20,
,2,,,9.81,
,,,,20,
,3,,,9.81,
,,,,20,
hexa,1,,,9.81,
,,,,20,
,2,,,9.81,
,,,,20,
,3,,,9.81,
,,,,20,
'''
import xlwt,itertools
xls=xlwt.Workbook(); sheet=xls.add_sheet('product')
kk=params.keys()
col=0
bold=xlwt.easyxf('font: bold on')
for k in kk:
if isinstance(k,tuple):
for l in k:
sheet.write(0,col,l,style=bold)
col+=1
else:
sheet.write(0,col,k,style=bold)
col+=1
prevVV=None
for row,vv in enumerate(itertools.product(*params.values())):
#print row+1,vv
col=0
for i,v in enumerate(vv):
if isinstance(v,tuple):
for ii,w in enumerate(v):
# print w
#print row+1,col,w
if same==None or not prevVV or prevVV[i][ii]!=w: sheet.write(row+1,col,str(w))
else:
if same!='': sheet.write(row+1,col,same)
col+=1
else:
#print row+1,col,v
if same==None or not prevVV or prevVV[i]!=v: sheet.write(row+1,col,str(v))
else:
if same!='': sheet.write(row+1,col,same)
col+=1
prevVV=vv
xls.save(out)
return row+1
TableParamReader.__doc__=TableParamReader.__doc__.replace("u'","'")
cartProdParamTable.__doc__=cartProdParamTable.__doc__.replace("u'","'")
if __name__=="__main__":
## this is now in the doctest as well
tryData=[
['head1','important2!','!OMP_NUM_THREADS!','abcd'],
[1,1.1,1.2,1.3,],
['a','b','c','d','###','comment'],
['# empty line'],
[1,'=','=','g']
]
tryFile='/tmp/try-tbl'
# write text
f1=tryFile+'.txt'
txt=open(f1,'w')
for ll in tryData: txt.write(' '.join([str(l) for l in ll])+'\n')
txt.close()
# write xls
import xlwt,itertools
f2=tryFile+'.xls'
xls=xlwt.Workbook(); sheet=xls.add_sheet('test')
for r in range(len(tryData)):
for c in range(len(tryData[r])):
sheet.write(r,c,tryData[r][c])
xls.save(f2)
from pprint import *
pprint(TableParamReader(f1).paramDict())
pprint(TableParamReader(f2).paramDict())