Source code for woo.batch

# encoding: utf-8

# for use with globals() when reading table

nan=float('nan')
from math import * 
from minieigen import *
import warnings
import sys
from . import utils

log=utils.makeLog(__name__)

from wooMain import options as wooOptions


try:
    from lockfile import FileLock
except ImportError:
    class FileLock:
        'Dummy class if the `lockfile </https://pypi.python.org/pypi/lockfile>`_ module is not importable.'
        def __init__(self,f):
            warnings.warn("The 'lockfile' module is not importable, '%s' will not be locked. If this file is concurrently accessed by several simulations, it may get corrupted!"%f)
        def __enter__(self): pass
        def __exit__(self,*args): pass
    

# increase every time the db format changes, to avoid errors
# applies to both sqlite and hdf5 dbs
dbFormatVersion=3

## timeout for opening the database if locked
sqliteTimeout=1000

[docs]def wait():
    'If running inside a batch, start the master simulation (if not already running) and block until it stops by itself. Typically used at the end of script so that it does not finish prematurely in batch mode (the execution would be ended in such a case). Does nothing outisde of batch.'
    import woo
    S=woo.master.scene
    if inBatch():
        if not S.running: S.run()
        woo.master.waitForScenes() # wait for Master (in case scene is re-assigned)

[docs]def inBatch():
    'Tell whether we are running inside the batch or separately.'
    import os
    return bool(wooOptions.batchTable) or wooOptions.batchLine>=0

[docs]def hasBatchTable():
    'Tell whether an external batch table is given or not (scripts may be run in script also without batch tables)'
    return wooOptions.batchTable!=None and len(wooOptions.batchTable)>0

[docs]def mayHaveStaleLock(db):
    import os.path
    if not os.path.splitext(db)[-1] in ('.h5','.hdf5','.he5','.hdf'): return
    return FileLock(db).is_locked()

[docs]def writeResults(scene,defaultDb='woo-results.hdf5',syncXls=True,dbFmt=None,series=None,quiet=False,postHooks=[],**kw):
    '''
    Write results to batch database. With *syncXls*, corresponding excel-file is re-generated.
    Series is a dicionary of 1d arrays written to separate sheets in the XLS. If *series* is `None`
    (default), `S.plot.data` are automatically added. All other ``**kw``
    arguments are serialized in the misc field, which then appears in the main XLS sheet.

    All data are serialized using json so that they can be read back in a language-independent manner.

    *postHooks* is list of functions (taking a single argument - the database name) which will be called
    once the database has been updated. They can be used in conjunction with :obj:`woo.batch.dbReadResults`
    to write aaggregate results from all records in the database.
    '''
    import woo, woo.plot, woo.core
    import os, os.path, datetime
    import numpy
    import json
    import logging
    S=scene
    if inBatch() and hasBatchTable(): table,line,db=wooOptions.batchTable,wooOptions.batchLine,wooOptions.batchResults
    else: table,line,db='',-1,(defaultDb if not wooOptions.batchResults else wooOptions.batchResults)
    if not db: raise ValueError('No database to write results to (forgot to pass --batch-results?).')
    newDb=not os.path.exists(db)
    if not quiet: log.info('Writing results to the database %s (%s)'%(db,'new' if newDb else 'existing'))
    if dbFmt==None:
        ext=os.path.splitext(db)[-1]
        if ext in ('.sqlite','.db',b'.sqlite',b'.db',u'.sqlite',u'.db'): dbFmt='sqlite'
        elif ext in ('.h5','.hdf5','.he5','.hdf',b'.h5',b'.hdf5',b'.he4',b'.hdf',u'.h5',u'.hdf5',u'.he5',u'.hdf'): dbFmt='hdf5'
        else: raise ValueError("Unable to determine database format from '"+db+"' (extension '"+ext+"'): must be *.h5, *.hdf5, *.he5, *.hdf.")

    # make sure keys are unicode objects (which is what json converts to!)
    unicodeTags=dict(S.tags)
    # make sure series are 1d arrays
    if series==None:
        series={}; series.update([('plot/'+k,v) for k,v in S.plot.data.items()])
    for k,v in series.items():
        if isinstance(v,numpy.ndarray):
            if dbFmt=='sqlite': series[k]=v.tolist() # sqlite needs lists, hdf5 is fine with numpy arrays
        elif not hasattr(v,'__len__'): raise ValueError('series["%s"] not a sequence (__len__ not defined).'%k)
    if dbFmt=='sqlite':
        raise RuntimeError('SQLite is no longer supported.')
    elif dbFmt=='hdf5':
        import h5py
        try:
            hdf=h5py.File(db,('w' if newDb else 'a'),libver='latest')
        except IOError:
            import warnings
            warnings.warn("Error opening HDF5 file %s, moving to %s~~corrupt and creating a new one"%(db,db))
            import shutil
            shutil.move(db,db+'~~corrupt')
            hdf=h5py.File(db,'a',libver='latest')
        with FileLock(db):
            wooJSON=woo.core.WooJSONEncoder(indent=None,oneway=True)
            i=0
            while True:
                sceneId=S.tags['id']+('' if i==0 else '~%d'%i)
                if sceneId not in hdf: break
                i+=1
            # group for our Scene
            G=hdf.create_group(sceneId)
            G.attrs['formatVersion']=dbFormatVersion
            G.attrs['finished']=datetime.datetime.now().replace(microsecond=0).isoformat('_')
            G.attrs['batchTable']=table
            G.attrs['batchTableLine']=line
            G.attrs['sceneId']=S.tags['id']
            G.attrs['title']=S.tags['title']
            G.attrs['duration']=S.duration
            G.attrs['pre']=S.pre.dumps(format='json') if S.pre else ''
            G.attrs['tags']=json.dumps(unicodeTags)
            G.attrs['plots']=json.dumps(S.plot.plots)
            G.attrs['labels']=wooJSON.encode(dict(S.labels))
            G.attrs['engines']=wooJSON.encode(list(S.engines))
            G_misc=G.create_group('misc')
            for k,v in kw.items(): G_misc.attrs[k]=wooJSON.encode(v)
            G_series=G.create_group('series')
            for k,v in series.items():
                # hdf5 is smart enough to create sub-groups automatically if the name contains slashes
                G_series[k]=v
            hdf.close()
    else: raise ValueError('*fmt* must be one of "sqlite", "hdf5", None (autodetect based on suffix)')

    if syncXls:
        import re
        xls=db+'.xlsx'
        if not quiet: log.info('Converting %s to file://%s'%(db,os.path.abspath(xls)))
        dbToSpread(db,out=xls,dialect='xlsx')
    for ph in postHooks: ph(db)

def _checkHdf5sim(sim):
    if not 'formatVersion' in sim.attrs: raise RuntimeError('database %s: simulation %s does not define formatVersion?!')
    if sim.attrs['formatVersion']!=dbFormatVersion: raise RuntimeError('database format mismatch: %s: %s/formatVersion==%s, should be %s'%(db,sim,sim.attrs['formatVersion'],dbFormatVersion))
    return True


# return all series stored in the database
[docs]def dbReadResults(db,basicTypes=False):    
    '''Return list of dictionaries, representing database contents.

    :param basicTypes: don't reconstruct Woo objects from JSON (keep those as dicts) and don't return data series as numpy arrays.

    .. todo:: Nested (grouped) series are not read correctly from HDF5. Should be fixed either by flattening the hiearchy (like we do in :obj:`dbToSpread` and stuffing it into returned dict; or by reflecting the hierarchy in the dict returned.
    '''
    import numpy, sqlite3, json, woo.core
    try:
        import h5py, h5py.h5f
        if not h5py.h5f.is_hdf5(bytes(db,'utf-8')): raise IOError('Not a HDF5 file.')
        hdf=h5py.File(db,'r',libver='latest')
    except (ImportError,IOError):
        # connect always succeeds, as it seems, even if the type is not sqlite3 db
        # in that case, it will fail at conn.execute below
        conn=sqlite3.connect(db,timeout=sqliteTimeout,detect_types=sqlite3.PARSE_DECLTYPES)
        hdf=None
    if hdf:
        with FileLock(db):
            ret=[]
            # iterate over simulations
            for simId in hdf:
                sim=hdf[simId]
                _checkHdf5sim(sim)
                rowDict={}
                for att in sim.attrs:
                    if att in ('pre','tags','plots','engines','labels'):
                        val=sim.attrs[att]
                        if hasattr(val,'__len__') and len(val)==0: continue
                        rowDict[att]=woo.core.WooJSONDecoder(onError='warn').decode(val)
                    else: rowDict[att]=sim.attrs[att]
                rowDict['misc'],rowDict['series']={},{}
                for misc in sim['misc'].attrs: rowDict['misc'][misc]=woo.core.WooJSONDecoder(onError='warn').decode(sim['misc'].attrs[misc])
                for s in sim['series']:
                    try:
                        # This was sometimes causing trouble (why?)
                        #
                        # File "/usr/lib/python2.7/dist-packages/h5py/_hl/group.py", line 153, in __getitem__
                        #    oid = h5o.open(self.id, self._e(name), lapl=self._lapl)
                        #  File "/usr/lib/python2.7/dist-packages/h5py/_hl/base.py", line 113, in _e
                        #    name = name.encode('ascii')
                        # AttributeError: 'int' object has no attribute 'encode'
                        rowDict['series'][s]=numpy.array(sim['series'][s])
                    except AttributeError:
                        # should we fail, do the conversion indirectly, through list (perhaps slower)
                        rowDict['series'][s]=numpy.array(list(sim['series'][s]))
                ret.append(rowDict)
            ## hdf.close()
            return ret
    else:
        # sqlite
        conn.row_factory=sqlite3.Row
        ret=[]
        for i,row in enumerate(conn.execute('SELECT * FROM batch ORDER BY finished')):
            rowDict={}
            for key in row.keys():
                # json-encoded fields
                if key in ('pre','tags','plots','misc'):
                    if basicTypes: val=json.loads(row[key])
                    else: val=woo.core.WooJSONDecoder(onError='warn').decode(row[key])
                elif key=='series':
                    series=json.loads(row[key])
                    assert type(series)==dict
                    if basicTypes: val=series
                    else: val=dict([(k,numpy.array(v)) for k,v in series.items()])
                else:
                    val=row[key]
                if basicTypes and key=='finished': val=val.isoformat(sep='_')
                rowDict[key]=val
            ret.append(rowDict)
        conn.close() # don't occupy the db longer than necessary
        return ret

[docs]def dbToJSON(db,**kw):
    '''Return simulation database as JSON string.
    
    :param kw: additional arguments passed to `json.dumps <http://docs.python.org/3/library/json.html#json.dumps>`_.
    '''
    import woo.core
    return woo.core.WooJSONEncoder(indent=None,oneway=True).encode(dbReadResults(db,basicTypes=True),**kw)



[docs]def dbToSpread(db,out=None,dialect='xls',rows=False,series=True,ignored=('plotData','tags'),sortFirst=('title','batchtable','batchTableLine','finished','sceneId','duration'),selector=None):
    '''
    Select simulation results (using *selector*) stored in batch database *db*, flatten data for each simulation,
    and dump the data in the CSV format (using *dialect*: 'excel', 'excel-tab', 'xls') into file *out* (standard output
    if not given). If *rows*, every simulation is saved into one row of the CSV file (i.e. attributes are in columns),
    otherwise each simulation corresponds to one column and each attribute is in one row.
    
    If *out* ends with '.xls', the 'xls' dialect is forced regardless of the value given. The 'xls' format will refuse to write to standard output (*out* must be given).

    *ignored* fields are used to exclude large data from the dump: either database column of that name, or any attribute
    of that name. Attributes are flattened and path separated with '.'.

    *series* determines whether the `series` field will be written to a separate sheet, named by the sceneId. This is only supported with the `xls` dialect and raises error otherwise (unless `series` field is empty).

    Fields are sorted in their natural order (i.e. alphabetically, but respecting numbers), with *sortFirst* fields coming at the beginning.
    '''

    def flatten(obj,path='',sep='.',ret=None):
        '''Flatten possibly nested structure of dictionaries and lists (such as data decoded from JSON).
        Returns dictionary, where each object is denoted by its path, paths being separated by *sep*.
        Unicode strings are encoded to utf-8 strings (with encoding errors ignored) so that the result
        can be written with the csv module.
        
        Adapted from http://stackoverflow.com/questions/8477550/flattening-a-list-of-dicts-of-lists-of-dicts-etc-of-unknown-depth-in-python-n .
        '''
        if ret is None: ret={}
        if isinstance(obj,list):
            for i,item in enumerate(obj): flatten(item,(path+sep if path else '')+str(i),ret=ret)
        elif isinstance(obj,dict):
            for key,value in list(obj.items()): flatten(value,(path+sep if path else '')+str(key),ret=ret)
        elif isinstance(obj,str):
            ret[path]=str(obj)
        else:
            # other values passed as they are
            ret[path]=obj
        return ret

    def natural_key(string_):
        '''Return key for natural sorting (recognizing consecutive numerals as numbers):
        http://www.codinghorror.com/blog/archives/001018.html
        http://stackoverflow.com/a/3033342/761090
        '''
        import re
        return [int(s) if s.isdigit() else s for s in re.split(r'(\d+)', string_)]
    
    def fixSheetname(n):
        # truncate the name to 31 characters, otherwise there would be exception
        # see https://groups.google.com/forum/?fromgroups=#!topic/python-excel/QK4iJrPDSB8
        if len(n)>30: n=u'…'+n[-29:]
        # invald characters (is that documented somewhere?? those are the only ones I found manually)
        n=n.replace('[','_').replace(']','_').replace('*','_').replace(':','_').replace('/','_')
        return n

    import sqlite3,json,sys,csv,warnings,numpy,operator
    allData={}
    # lowercase
    ignored=[i.lower() for i in ignored]
    sortFirst=[sf.lower() for sf in sortFirst]
    seriesData={}
    # open db and get rows
    try:
        import h5py, h5py.h5f
        if isinstance(db,str): dbBytes=bytes(db,'utf-8')
        else: dbBytes=db
        # check first, to avoid warning from h5py.File in stderr
        if not h5py.h5f.is_hdf5(dbBytes): raise IOError('Not a HDF5 file.')
        hdf=h5py.File(db,'r',libver='latest')
    except (ImportError,IOError):
        # connect always succeeds, as it seems, even if the type is not sqlite3 db
        # in that case, it will fail at conn.execute below
        conn=sqlite3.connect(db,timeout=sqliteTimeout,detect_types=sqlite3.PARSE_DECLTYPES)
        hdf=None
    if hdf:
        with FileLock(db):
            ret=[]
            if selector: warnings.warn('selector parameter ignored, since the file is HDF5 (not SQLite)')
            # first loop: sort by batchTable, batchTableLine, finished
            # for i,simId in enumerate(hdf):
            sortAttrs=('batchTable','batchTableLine','finished')
            # sort simulation ids by attribute tuples:
            # http://stackoverflow.com/a/6620187/761090
            simIds=list(zip(*sorted(zip([s for s in hdf if _checkHdf5sim(hdf[s])],[tuple([natural_key(str(hdf[s].attrs[a])) for a in sortAttrs if a in hdf[s].attrs]) for s in hdf if _checkHdf5sim(hdf[s])]),key=operator.itemgetter(1))))[0]
            # print simIds
            # iterate over simulations
            for i,simId in enumerate(simIds):
                sim=hdf[simId]
                _checkHdf5sim(sim)
                rowDict={}
                for att in sim.attrs:
                    val=sim.attrs[att]
                    try: val=json.loads(val)
                    except: pass
                    rowDict[att]=val
                rowDict['misc']={}
                for att in sim['misc'].attrs:
                    val=sim['misc'].attrs[att]
                    try: val=json.loads(val)
                    except: pass
                    rowDict['misc'][att]=val
                series={}
                # we have to flatten series, since it may contain nested hdf5 groups
                # http://stackoverflow.com/a/6036037/761090
                def flat_group_helper(prefix,g):
                    if prefix: prefix+='/'
                    for name,sub in g.items():
                        if isinstance(sub,h5py.Group):
                            for j in flat_group_helper(prefix+name,sub): yield j
                        else: yield (prefix+name,sub)
                def flat_group(g):
                    import collections
                    return collections.OrderedDict(flat_group_helper('',g))
                # print flat_group(sim['series']).keys()
                for sName,sVal in flat_group(sim['series']).items():
                    # print sName
                    series[sName]=numpy.array(sVal)
                # attributes are bytes, so '_' must be b'_' to avoid TypeError (can't concat bytes to str)
                seriesData[str(sim.attrs['title'])+str('_')+str(sim.attrs['sceneId'])]=series
                # same as for sqlite3 below
                flat=flatten(rowDict)
                for key,val in flat.items():
                    if key.lower() in ignored: continue
                    if key not in allData: allData[key]=[None]*i+[val]
                    else:
                        allData[key]+=[None]*(i-len(allData[key]))+[val]
            ## hdf.close()
    else:
        conn=sqlite3.connect(db,timeout=sqliteTimeout,detect_types=sqlite3.PARSE_DECLTYPES)
        conn.row_factory=sqlite3.Row
        for i,row in enumerate(conn.execute(selector if selector!=None else 'SELECT * FROM batch ORDER BY title')):
            rowDict={}
            for key in row.keys():
                if key.lower() in ignored: continue
                val=row[key]
                # decode val from json, if it fails, leave it alone
                try: val=json.loads(val)
                except: pass
                if key!='series': rowDict[key]=val
                elif series: seriesData[row['title']+'_'+row['sceneId']]=val # set only if allowed
            flat=flatten(rowDict)
            for key,val in flat.items():
                if key.lower() in ignored: continue
                if key not in allData: allData[key]=[None]*i+[val]
                else: 
                    allData[key]+=[None]*(i-len(allData[key]))+[val]
        conn.close() # don't occupy the db longer than necessary
    fields=sorted(allData.keys(),key=natural_key)
    # apply sortFirst
    fieldsLower=[f.lower() for f in fields]; fields0=fields[:] # these two have always same order
    for sf in reversed(sortFirst): # reverse so that the order of sortFirst is respected
        if sf in fieldsLower: # lowercased name should be put to the front
            field=fields0[fieldsLower.index(sf)] # get the case-sensitive one
            fields=[field]+[f for f in fields if f!=field] # rearrange

    xls=dialect.lower()=='xls' or (out and out.endswith('.xls'))
    xlsx=dialect.lower()=='xlsx' or (out and out.endswith('.xlsx'))
    if xls or xlsx:
        if out==None: raise ValueError('The *out* parameter must be given when using the xls/xlsx dialects (refusing to write binary to standard output).')
        # http://scienceoss.com/write-excel-files-with-python-using-xlwt/
        # http://www.youlikeprogramming.com/2011/04/examples-generating-excel-documents-using-pythons-xlwt/
        import urllib.parse
        import datetime
        if xls:
            import xlwt
            wbk=xlwt.Workbook('utf-8')
            sheet=wbk.add_sheet(fixSheetname(db)) # truncate if too long, see below
            # datetime style
            datetimeStyle=xlwt.XFStyle()
            datetimeStyle.num_format_str='yyyy-mm-dd_hh:mm:ss' # http://office.microsoft.com/en-us/excel-help/number-format-codes-HP005198679.aspx
            # header style
            font=xlwt.Font()
            font.bold=True
            headStyle=xlwt.XFStyle()
            headStyle.font=font
            hrefStyle=xlwt.easyxf('font: underline single')
            # default style
            defaultStyle=xlwt.Style.default_style
        else:
            import xlsxwriter
            wbk=xlsxwriter.Workbook(out)
            sheet=wbk.add_worksheet(fixSheetname(db))
            headStyle=wbk.add_format({'bold':True})
            datetimeStyle=wbk.add_format({'num_format':'yyyy-mm-dd_hh:mm:ss'})
            hrefStyle=wbk.add_format({'underline':1})
            defaultStyle=None
        # cell styling 
        styleDict={datetime.datetime:datetimeStyle} # add styles for other custom types here
        # normal and transposed setters
        if rows: setCell=lambda s,r,c,data,style: write_cell(s,r,c,data,style)
        else: setCell=lambda s,r,c,data,style: write_cell(s,c,r,data,style)
        def write_cell(s,c,r,data,style):
            hyperlink=isinstance(data,(str,str)) and (data.startswith('file://') or data.startswith('http://') or data.startswith('https://'))
            if hyperlink:
                if xls:
                    data=data.replace('"',"'")
                    data=xlwt.Formula('HYPERLINK("%s","%s")'%(urllib.parse.quote(data,safe=':/'),data))
                    style=hrefStyle
                    s.write(c,r,data,style)
                else:
                    s.write_url(c,r,data,string=data)
            else:
                if isinstance(data,numpy.int64): data=int(data)
                elif isinstance(data,numpy.float64): data=float(data)
                # XLSX does not handle NaN/Inf (yet)
                if not xls and isinstance(data,float) and (isinf(data) or isnan(data)): s.write(c,r,str(data),style)
                else: s.write(c,r,data,style)

        for col,field in enumerate(fields):
            # headers
            setCell(sheet,0,col,field,headStyle)
            # data
            for row,val in enumerate(allData[field]):
                style=styleDict.get(type(val),defaultStyle)
                setCell(sheet,row+1,col,val,style)
                # print row,type(val),val
        # save data series
        if seriesData:
            for sheetName,dic in seriesData.items():
                if xls: sheet=wbk.add_sheet(fixSheetname(sheetName))
                else: sheet=wbk.add_worksheet(fixSheetname(sheetName))
                # perhaps write some header here
                for col,colName in enumerate(sorted(dic.keys())):
                    if xls and col>255:
                        log.warning('Rhe data being converted to XLS (%s) contain %d columns, which is more than 255, the limit of the XLS format. Extra data will be discarded from the XLS output. Use .xlsx to overcome this limitation.'%(out,len(dic)))
                        break
                    sheet.write(0,col,colName,headStyle)
                    rowOffset=1 # length of header
                    for row in range(0,len(dic[colName])):
                        if xls and row+rowOffset>65535:
                            log.warning('the data being converted to XLS (%s) contain %d rows (with %d header rows), which is more than 65535, the limit of the XLS file format. Extra data will be discarded from the XLS output. Use .xlsx to overcome this limitation.'%(out,len(dic[colName]),rowOffset))
                            break
                        val=dic[colName][row]
                        if xlsx and (isnan(val) or isinf(val)): val=str(val)
                        sheet.write(row+rowOffset,col,val)
        if xls: wbk.save(out)
        else: wbk.close()
    else:
        if seriesData: raise RuntimeError('Data series can only be written with the *xls* dialect')
        outt=(open(out,'w') if out else sys.stdout)
        import datetime
        def asStr(x):
            'Customize string conversions for some types'
            if type(x)==datetime.datetime: return x.strftime('%Y-%m-%d_%H:%M:%S') # as ISO, but without microseconds
            return x
        # write into CSV
        if rows:
            # one attribute per column
            writer=csv.DictWriter(outt,fieldnames=fields,dialect=dialect)
            writer.writeheader()
            for i in range(0,len(allData[fields[0]])):
                writer.writerow(dict([(k,asStr(allData[k][i])) for k in allData.keys()]))
        else:
            # one attribute per row
            writer=csv.writer(outt,dialect=dialect)
            for a in fields: writer.writerow([a]+[asStr(b) for b in allData[a]])

    

[docs]def readParamsFromTable(scene,under='table',noTableOk=True,unknownOk=False,**kw):
    """
    Read parameters from a file and assign them to :obj:`woo.core.Scene.lab` under the ``under`` pseudo-module (e.g. ``Scene.lab.table.foo`` and so on. This function is used for scripts (as opposed to preprocessors) running in a batch. The file format is described in :obj:`TableParamReader` (CSV or XLS).

    Assigned tags (the ``title`` column is synthesized if absent,see :obj:`woo.utils.TableParamReader`):: 

        S=woo.master.scene
        S.tags['title']=…                                            # assigns the title column; might be synthesized
        S.tags['params']="name1=val1,name2=val2,…"                   # all explicitly assigned parameters
        S.tags['defaultParams']="unassignedName1=defaultValue1,…"    # parameters that were left at their defaults
        S.tags['d.id']=s.tags['id']+'.'+s.tags['title']
        S.tags['id.d']=s.tags['title']+'.'+s.tags['id']

    :param tableFile: text file (with one value per blank-separated columns)
    :param under: name of pseudo-module under ``S.lab`` to save all values to (``table`` by default)
    :param int tableLine: number of line where to get the values from
    :param bool noTableOk: if False, raise exception if the file cannot be open; use default values otherwise
    :param bool unknownOk: do not raise exception if unknown column name is found in the file, and assign it as well
    :return: None
    """
    tagsParams=[]
    # dictParams is what eventually ends up in S.lab.table.* (default+specified values)
    dictDefaults,dictParams={},{}
    import os, builtins,re,math,woo
    # create the S.lab.table pseudo-module
    S=scene
    S.lab._newModule(under)
    pseudoMod=getattr(S.lab,under)
    if not inBatch() or not hasBatchTable():
        if not noTableOk: raise EnvironmentError("Batch options not defined (and required; pass noTableOk=True if they are not)")
        S.tags['line']='l!'
    else:
        tableFile,tableLine=wooOptions.batchTable,wooOptions.batchLine
        if tableFile=='':
            if not noTableOk: raise RuntimeError("No table specified in batch options, but noTableOk was not given.")
            else: return
        allTab=TableParamReader(tableFile).paramDict()
        if tableLine not in allTab: raise RuntimeError("Table %s doesn't contain valid line number %d"%(tableFile,tableLine))
        vv=allTab[tableLine]
        S.tags['line']='l%d'%tableLine
        S.tags['title']=str(vv['title'])
        #S.tags['idt']=S.tags['id']+'.'+S.tags['title'];
        #S.tags['tid']=S.tags['title']+'.'+S.tags['id']
        # assign values specified in the table to python vars
        # !something cols are skipped, those are env vars we don't treat at all (they are contained in title, though)
        for col in vv.keys():
            if col=='title' or col[0]=='!': continue
            if col not in kw.keys() and (not unknownOk): raise NameError("Parameter `%s' has no default value assigned"%col)
            if vv[col]=='*': vv[col]=kw[col] # use default value for * in the table
            elif vv[col]=='-': continue # skip this column
            elif col in kw.keys(): kw.pop(col) # remove the var from kw, so that it contains only those that were default at the end of this loop
            #print 'ASSIGN',col,vv[col]
            tagsParams+=['%s=%s'%(col,vv[col])];
            # when reading from XLS, data might be numbers; use eval only for strings, otherwise use the thing itself
            dictParams[col]=eval(vv[col],dict(woo=woo,**math.__dict__)) if isinstance(vv[col],str) else vv[col]
    # assign remaining (default) keys to python vars
    defaults=[]
    for k in kw.keys():
        dictDefaults[k]=kw[k]
        defaults+=["%s=%s"%(k,kw[k])];
    pseudoMod.defaultParams_=",".join(defaults)
    pseudoMod.explicitParams_=",".join(tagsParams)
    # save all vars to the pseudo-module
    dictDefaults.update(dictParams)
    for k,v in dictDefaults.items():
        setattr(pseudoMod,k,v)
    return None


[docs]def runPreprocessor(pre,preFile=None):
    """Execute given :obj:`Preprocessor <woo.core.Preprocessor>`, modifying its attributes from batch (if running in batch). Each column from the batch table (except of environment variables starting with ``!``) must correspond to a preprocessor's attribute.

    Nested attributes are allowed, e.g. with :obj:`woo.pre.horse.FallingHorse`, a column named ``mat.tanPhi`` will modify horse's material's friction angle, using the default material object.
    """

    def nestedSetattr(obj,attr,val):
        import re
        attrs=attr.split(".")
        indexRE=r'([a-zA-Z_0-9]+)\s*\[\s*(-?[0-9]+)\s*\]'
        for i in attrs[:-1]:
            #if i.strip().endswith(']')
            m=re.match(indexRE,i.strip())
            if m: obj=getattr(obj,m.group(1))[int(m.group(2))]
            else: obj=getattr(obj,i)
        m=re.match(indexRE,attrs[-1].strip())
        if m: getattr(obj,m.group(1))[int(m.group(2))]=val
        else: setattr(obj,attrs[-1],val)
        #if not hasattr(obj,attrs[-1]): raise AttributeError('%s: no such attribute: %s.'%(obj.__module__+'.'+type(obj).__name__,attrs[-1]))
        #setattr(obj,attrs[-1],val)

    # just run preprocessor in this case, plus set title, if given in Preprocessor
    if not inBatch():
        S=pre()
        if pre.title:
            S.tags['title']=pre.title
        return S

    import os
    import woo,math,numpy
    tableFileLine=wooOptions.batchTable,wooOptions.batchLine
    evalParams=[]
    if wooOptions.batchTable and wooOptions.batchLine>=0:
        allTab=TableParamReader(wooOptions.batchTable).paramDict()
        if not wooOptions.batchLine in allTab: raise RuntimeError("Table %s doesn't contain valid line number %d"%(wooOptions.batchTable,wooOptions.batchLine))
        vv=allTab[wooOptions.batchLine]
        # overriding things set in the #: lines of preprocessor from the table using %varName=...
        if len([v for v in vv.keys() if v.startswith('%')]):
            if preFile is None: raise RuntimeError('Unable to re-load preprocessor due to %something columns: preFile was left empty by the caller.')
            overrideHashPercent={}
            for v in list(vv.keys()): # copy list so that dictionary does not change while iterating over it
                if not v.startswith('%'): continue
                val=vv[v]
                if isinstance(val,str): val=eval(val,dict(woo=woo,**math.__dict__))
                overrideHashPercent[v[1:]]=val
                log.info('Re-assigning #%% variable %s = %s'%(str(v[1:]),str(val)))
                vv.pop(v)
            # print(vv)
            pre=woo.core.Object.load(preFile,overrideHashPercent=overrideHashPercent)
        # set preprocessor parameters first
        for name,val in vv.items():
            if name[0]=='!': continue # pseudo-variables such as !SCRIPT, !THREADS and so on
            if name=='title': continue
            if val in ('*','-',''): continue # postponed, computed later
            # postpone evaluation of parameters starting with = so that they can use other params
            if isinstance(val,str) and val.startswith('='): evalParams.append((name,val[1:]))
            elif isinstance(val,str) and val.startswith("'="): evalParams.append((name,val[2:]))
            else:
                log.info('OVERRIDING FROM TABLE: %s = %s'%(name,val))
                nestedSetattr(pre,name,eval(val,globals(),dict(woo=woo,math=math,numpy=numpy))) # woo.unit
    # postponed evaluation of computable params
    for name,val in evalParams:
        log.info('OVERRIDING FROM TABLE (delayed): %s = %s'%(name,val))
        nestedSetattr(pre,name,eval(val,globals(),dict(woo=woo,math=math,numpy=numpy,self=pre)))
    # check types, if this is a python preprocessor
    if hasattr(pre,'checkAttrTypes'): pre.checkAttrTypes()
    # run preprocessor
    if wooOptions.batchTable: pre.title=str(vv['title'])
    S=pre()
    # set tags from batch
    if wooOptions.batchTable:
        S.tags['line']='l%d'%wooOptions.batchLine
        S.tags['title']=str(vv['title'])
    else:
        S.tags['line']='default'
        S.tags['title']=str(preFile if preFile else '[no file]')
    #S.tags['idt']=(S.tags['id']+'.'+S.tags['title']).replace('/','_')
    #S.tags['tid']=(S.tags['title']+'.'+S.tags['id']).replace('/','_')
    return S

[docs]class TableParamReader(object):
    r"""Class for reading simulation parameters from text file.

Each parameter is represented by one column, each parameter set by one line. Colums are separated by blanks (no quoting).

First non-empty line contains column titles (without quotes).
You may use special column named 'title' to describe this parameter set;
if such colum is absent, title will be built by concatenating column names and corresponding values (``param1=34,param2=12.22,param4=foo``)

* from columns ending in ``!`` (the ``!`` is not included in the column name)
* from all columns, if no columns end in ``!``.
* columns containing literal - (minus) will be ignored

Empty lines within the file are ignored (although counted); ``#`` starts comment till the end of line. Number of blank-separated columns must be the same for all non-empty lines.

A special value ``=`` can be used instead of parameter value; value from the previous non-empty line will be used instead (works recursively); in XLS, *empty* cell is treated the same as ``=``.

This class is used by :obj:`woo.utils.readParamsFromTable`.

>>> tryData=[
...   ['head1','important2!','head3','...','...','...','!OMP_NUM_THREADS!','abcd'],
...   [1,1.1, '1','.','1','5', 1.2,1.3,],
...   ['a','b','HE','AD','_','3','c','d','###','comment'],
...   ['# empty line'],
...   [1,'=','=','=','=','=','=','g']
... ]
>>> import woo
>>> tryFile=woo.master.tmpFilename()
>>> # write text
>>> f1=tryFile+'.txt'
>>> txt=open(f1,'w')
>>> for ll in tryData: n=txt.write(' '.join([str(l) for l in ll])+'\n') # set n to suppress output in doctest under py3k
>>> txt.close()
>>> 
>>> # write xls
>>> import xlwt,itertools
>>> f2=tryFile+'.xls'
>>> xls=xlwt.Workbook(); sheet=xls.add_sheet('test')
>>> for r in range(len(tryData)):
...   for c in range(len(tryData[r])):
...     sheet.write(r,c,tryData[r][c])
>>> xls.save(f2)
>>> 
>>> from pprint import *
>>> pprint(TableParamReader(f1).paramDict())
{2: {'!OMP_NUM_THREADS': '1.2',
     'abcd': '1.3',
     'head1': '1',
     'head3': '1.15',
     'important2': '1.1',
     u'title': u'important2=1.1,OMP_NUM_THREADS=1.2'},
 3: {'!OMP_NUM_THREADS': 'c',
     'abcd': 'd',
     'head1': 'a',
     'head3': 'HEAD_3',
     'important2': 'b',
     u'title': u'important2=b,OMP_NUM_THREADS=c'},
 5: {'!OMP_NUM_THREADS': 'c',
     'abcd': 'g',
     'head1': '1',
     'head3': 'HEAD_3',
     'important2': 'b',
     u'title': u'important2=b,OMP_NUM_THREADS=c__line=5__'}}
>>> pprint(TableParamReader(f2).paramDict())
{2: {u'!OMP_NUM_THREADS': '1.2',
     u'abcd': '1.3',
     u'head1': '1',
     u'head3': '1.15',
     u'important2': '1.1',
     u'title': u'important2=1.1,OMP_NUM_THREADS=1.2'},
 3: {u'!OMP_NUM_THREADS': 'c',
     u'abcd': 'd',
     u'head1': 'a',
     u'head3': 'HEAD_3',
     u'important2': 'b',
     u'title': u'important2=b,OMP_NUM_THREADS=c'},
 5: {u'!OMP_NUM_THREADS': 'c',
     u'abcd': 'g',
     u'head1': '1',
     u'head3': 'HEAD_3',
     u'important2': 'b',
     u'title': u'important2=b,OMP_NUM_THREADS=c__line=5__'}}

    """
    def __init__(self,file,firstLine=-1):
        "Setup the reader class, read data into memory. *firstLine* determines the number of the first line; if negative, 1 is used for XLS files and 0 for text files. The reason is that spreadsheets number lines from 1 whereas text editors number lines from zero, and having the numbering the same as the usual UI for editing that format is convenient."
        import re
        
        if 1:
            if file.lower().endswith('.xls'):
                if firstLine<0: firstLine=1
                import xlrd
                xls=xlrd.open_workbook(file)
                sheet=xls.sheet_by_index(0)
                maxCol=0
                rows=[] # rows actually containing data (filled in the loop)
                for row in range(sheet.nrows):
                    # find first non-empty and non-comment cell
                    lastDataCol=-1
                    for col in range(sheet.ncols):
                        c=sheet.cell(row,col)
                        empty=(c.ctype in (xlrd.XL_CELL_EMPTY,xlrd.XL_CELL_BLANK) or (c.ctype==xlrd.XL_CELL_TEXT and c.value.strip()==''))
                        comment=(c.ctype==xlrd.XL_CELL_TEXT and re.match(r'^\s*(#.*)?$',c.value)) 
                        if comment: break # comment cancels all remaining cells on the line
                        if not empty: lastDataCol=max(col,lastDataCol)
                    if lastDataCol>=0:
                        rows.append(row)
                        maxCol=max(maxCol,lastDataCol)
                        # if lastDataCol<maxCol: raise RuntimeError('Error in %s: all data rows should have the same number of colums; row %d has only %d columns, should have %d.'%(file,row,lastDataCol+1,maxCol+1))
                # rows and cols with data
                cols=list(range(maxCol+1))
                # print 'maxCol=%d,cols=%s'%(maxCol,cols)
                # iterate through cells, define rawHeadings, headings, values
                headings=[sheet.cell(rows[0],c).value for c in cols]
                #headings=[(h[:-1] if (h and h[-1]=='!') else h) for h in rawHeadings] # without trailing bangs
                values={}
                for r in rows[1:]:
                    vv={}
                    for c in cols:
                        v=str(sheet.cell(r,c).value)
                        # if not isinstance(v,str): v=str(v)
                        # represent numbers with zero fractional part as ints, without trailing ".0" or such
                        # XLS does not know ints
                        # http://stackoverflow.com/questions/8825681/integers-from-excel-files-become-floats
                        try:
                            f=float(v)
                            if f==int(f): v=str(int(f))
                        except ValueError: pass
                        vv[c]=v
                    values[r+firstLine]=[vv[c] for c in cols]
            else:
                if firstLine<0: firstLine=0
                # text file, space separated
                # read file in memory, remove newlines and comments; the [''] makes lines 1-indexed
                with open(file,'r') as f: ll=[re.sub(r'\s*#.*','',l[:-1]) for l in ['']+f.readlines()]
                # usable lines are those that contain something else than just spaces
                usableLines=[i for i in range(len(ll)) if not re.match(r'^\s*(#.*)?$',ll[i])]
                headings=ll[usableLines[0]].split()
                # headings=[(h[:-1] if h[-1]=='!' else h) for h in rawHeadings] # copy of headings without trailing bangs (if any)
                # use all values of which heading has ! after its name to build up the title string
                # if there are none, use all columns
                usableLines=usableLines[1:] # and remove headindgs from usableLines
                values={}
                for l in usableLines:
                    lSplit=ll[l].split()
                    values[l+firstLine]=[str(lSplit[i]) for i in range(len(headings))]
            #
            # each format has to define the following:
            #   values={lineNumber:[val,val,...],...}         # values ordered the same as headings
            #   headings=['col1title!','col2title',...]       # as in the file
            #

            # replace empty cells or '=' by the previous value of the parameter
            lines=list(values.keys()); lines.sort()
            # print file,lines
            for i,l in enumerate(lines):
                for col,val in enumerate(values[l]):
                    if val in ('=',''):
                        try:
                            values[l][col]=values[lines[i-1]][col]
                        except IndexError as KeyError:
                            raise ValueError("The = specifier on line %d, column %d, refers to nonexistent value on previous line?"%(l,col))
    
            nCollapsed=0
            for ih,h in enumerate(headings): # index of headings (headings are pruned after the loop)
                iv=ih-nCollapsed # index of values: changes as values are being removed 
                if h in ('...',u'...',u'…'):
                    nCollapsed+=1
                    if ih<1: raise ValueError("The ... header continuation is not allowed in the first column.")
                    # merge adjacent cols contents
                    for i,l in enumerate(lines):
                        vv=values[l]
                        collapsed=str(vv[iv-1])+str(vv[iv])
                        values[l]=vv[:iv-1]+[collapsed]+vv[iv+1:]
            headings=[h for h in headings if h not in ('...',u'...',u'…')]

            # prune headings with trailing bangs
            rawHeadings=headings
            headings=[(h[:-1] if h[-1]=='!' else h) for h in headings]

            # copy to dictionary; that is the way results are supposed to be returned
            dvalues={}
            for i,l in enumerate(lines):

                dvalues[l]=dict([(headings[c],str(values[l][c])) for c in range(len(headings))])

            # add descriptions, but if they repeat, append line number as well
            if not 'title' in headings:
                # bangHeads=[h[:-1] for h in rawHeadings if (h and h[-1]=='!')] or headings
                hasBangs=sum([1 for h in rawHeadings if h[-1]=='!'])>0
                descs=set()
                for l in lines:
                    ddd=[]
                    for col,head in enumerate(rawHeadings):
                        if hasBangs and head[-1]!='!': continue
                        val=values[l][col]
                        if isinstance(val,str) and val.strip() in ('','-','*'): continue # default value used
                        ddd.append(head.replace('!','')+'='+('%g'%val if isinstance(val,float) else str(val)))
                    dd=','.join(ddd).replace("'",'').replace('"','')
                    #dd=','.join(head.replace('!','')+'='+('%g'%values[head] if isinstance(values[l][head],float) else str(values[l][head])) for head in bangHeads if (values[l][head].strip()!='-').replace("'",'').replace('"','')
                    if dd in descs: dd+='__line=%d__'%l
                    dvalues[l][u'title']=dd.replace('/','_').replace('[','').replace(']','').replace('*woo.unit','')
                    descs.add(dd)

            self.values=dvalues

[docs]    def paramDict(self):
        """Return dictionary containing data from file given to constructor. Keys are line numbers (which might be non-contiguous and refer to real line numbers that one can see in text editors), values are dictionaries mapping parameter names to their values given in the file. The special value '=' has already been interpreted, ``!`` (bangs) (if any) were already removed from column titles, ``title`` column has already been added (if absent)."""
        return self.values


[docs]def cartProdParamTable(params,out,same=''):
    '''Write parameter table (as XLS) where all parameters in pp (which is a dictionary, or :obj:`python:collections.OrderedDict`) are traversed.
    
:param same: content of repeated cellls; if ``None``, repeated cells are filled with the repeated value. Other useful values are ``'='`` and ``''`` (empty cell)
:param out: XLS file to write to
:param params: dictionary-like with parameter values; keys may be n-tuples, which will span multiple columns -- in that case, values must also be n-tuples, and will also span those columns
:return: total number of lines written

>>> import collections, woo.batch 
>>> pp=collections.OrderedDict() # use OrderedDict for predictable column ordering
>>> pp['pattern']=['ortho','hexa']
>>> pp['radius','...']=[(r,'*woo.unit["mm"]') for r in (1,2,3)]    # use continuation columns for unit specification
>>> pp['gravity','...','...']=[('(0,0,',g,')') for g in (9.81,20)] # use continuation columns for concatenation of expression
>>> xls=woo.master.tmpFilename()+'.xls'
>>> woo.batch.cartProdParamTable(params=pp,out=xls)
12
>>> import pprint
>>> pprint.pprint(TableParamReader(xls).paramDict())
{2: {u'gravity': '(0,0,9.81)',
     u'pattern': 'ortho',
     u'radius': '1*woo.unit["mm"]',
     u'title': u'pattern=ortho,radius=1mm,gravity=(0,0,9.81)'},
 3: {u'gravity': '(0,0,20)',
     u'pattern': 'ortho',
     u'radius': '1*woo.unit["mm"]',
     u'title': u'pattern=ortho,radius=1mm,gravity=(0,0,20)'},
 4: {u'gravity': '(0,0,9.81)',
     u'pattern': 'ortho',
     u'radius': '2*woo.unit["mm"]',
     u'title': u'pattern=ortho,radius=2mm,gravity=(0,0,9.81)'},
 5: {u'gravity': '(0,0,20)',
     u'pattern': 'ortho',
     u'radius': '2*woo.unit["mm"]',
     u'title': u'pattern=ortho,radius=2mm,gravity=(0,0,20)'},
 6: {u'gravity': '(0,0,9.81)',
     u'pattern': 'ortho',
     u'radius': '3*woo.unit["mm"]',
     u'title': u'pattern=ortho,radius=3mm,gravity=(0,0,9.81)'},
 7: {u'gravity': '(0,0,20)',
     u'pattern': 'ortho',
     u'radius': '3*woo.unit["mm"]',
     u'title': u'pattern=ortho,radius=3mm,gravity=(0,0,20)'},
 8: {u'gravity': '(0,0,9.81)',
     u'pattern': 'hexa',
     u'radius': '1*woo.unit["mm"]',
     u'title': u'pattern=hexa,radius=1mm,gravity=(0,0,9.81)'},
 9: {u'gravity': '(0,0,20)',
     u'pattern': 'hexa',
     u'radius': '1*woo.unit["mm"]',
     u'title': u'pattern=hexa,radius=1mm,gravity=(0,0,20)'},
 10: {u'gravity': '(0,0,9.81)',
      u'pattern': 'hexa',
      u'radius': '2*woo.unit["mm"]',
      u'title': u'pattern=hexa,radius=2mm,gravity=(0,0,9.81)'},
 11: {u'gravity': '(0,0,20)',
      u'pattern': 'hexa',
      u'radius': '2*woo.unit["mm"]',
      u'title': u'pattern=hexa,radius=2mm,gravity=(0,0,20)'},
 12: {u'gravity': '(0,0,9.81)',
      u'pattern': 'hexa',
      u'radius': '3*woo.unit["mm"]',
      u'title': u'pattern=hexa,radius=3mm,gravity=(0,0,9.81)'},
 13: {u'gravity': '(0,0,20)',
      u'pattern': 'hexa',
      u'radius': '3*woo.unit["mm"]',
      u'title': u'pattern=hexa,radius=3mm,gravity=(0,0,20)'}}


.. csv-table:: Generated cartesian product parameter table (XLS)
   :header: pattern,radius,...,gravity,...,...

   ortho,1,"*woo.unit[""mm""]","(0,0,",9.81,)
   ,,,,20,
   ,2,,,9.81,
   ,,,,20,
   ,3,,,9.81,
   ,,,,20,
   hexa,1,,,9.81,
   ,,,,20,
   ,2,,,9.81,
   ,,,,20,
   ,3,,,9.81,
   ,,,,20,

    '''
    import xlwt,itertools
    xls=xlwt.Workbook(); sheet=xls.add_sheet('product')
    kk=params.keys()
    col=0
    bold=xlwt.easyxf('font: bold on')
    for k in kk:
        if isinstance(k,tuple):
            for l in k:
                sheet.write(0,col,l,style=bold)
                col+=1
        else:
            sheet.write(0,col,k,style=bold)
            col+=1
    prevVV=None
    for row,vv in enumerate(itertools.product(*params.values())):
        #print row+1,vv
        col=0
        for i,v in enumerate(vv):
            if isinstance(v,tuple):
                for ii,w in enumerate(v):
                    # print w
                    #print row+1,col,w
                    if same==None or not prevVV or prevVV[i][ii]!=w: sheet.write(row+1,col,str(w))
                    else:
                        if same!='': sheet.write(row+1,col,same)
                    col+=1
            else:
                #print row+1,col,v
                if same==None or not prevVV or prevVV[i]!=v: sheet.write(row+1,col,str(v))
                else:
                    if same!='': sheet.write(row+1,col,same)
                col+=1
        prevVV=vv
    xls.save(out)
    return row+1



TableParamReader.__doc__=TableParamReader.__doc__.replace("u'","'")
cartProdParamTable.__doc__=cartProdParamTable.__doc__.replace("u'","'")


if __name__=="__main__":
    ## this is now in the doctest as well
    tryData=[
        ['head1','important2!','!OMP_NUM_THREADS!','abcd'],
        [1,1.1,1.2,1.3,],
        ['a','b','c','d','###','comment'],
        ['# empty line'],
        [1,'=','=','g']
    ]
    tryFile='/tmp/try-tbl'

    # write text
    f1=tryFile+'.txt'
    txt=open(f1,'w')
    for ll in tryData: txt.write(' '.join([str(l) for l in ll])+'\n')
    txt.close()

    # write xls
    import xlwt,itertools
    f2=tryFile+'.xls'
    xls=xlwt.Workbook(); sheet=xls.add_sheet('test')
    for r in range(len(tryData)):
        for c in range(len(tryData[r])):
            sheet.write(r,c,tryData[r][c])
    xls.save(f2)

    from pprint import *
    pprint(TableParamReader(f1).paramDict())
    pprint(TableParamReader(f2).paramDict())