File indexing completed on 2024-06-19 05:11:17 UTC
view on githubraw file Latest commit 95edcfd9 on 2024-06-18 17:05:14 UTC
ae10cb9ac4 Oliv*0001 import sys
0002 import re
0003 import glob
0004 import numpy as np
2436bf20b6 Oliv*0005 from operator import mul
ae10cb9ac4 Oliv*0006
0007 debug = False
0008
0009
0010
0011
0012
0013 try: next
0014 except NameError:
0015 def next ( obj ): return obj.next()
0016
0017 _currentline = ''
0018
0019 class ParseError(ValueError):
0020 def __str__(self):
0021 metafile = self.args[0]
0022 lines = self.args[1:]
0023 try:
0024 name = metafile.name
0025 except AttributeError:
0026 name = metafile
0027
0028 return '\n'.join(('in metafile: '+name,)
0029 + lines
0030 + ('in: ' + _currentline,))
0031
0032
0033
0034
0035 _comment_pattern = re.compile(
0036 r'//.*?$|/\*.*?\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"',
0037 re.DOTALL | re.MULTILINE
0038 )
0039
0040 def _comment_replacer(match):
0041 s = match.group(0)
0042 if s.startswith('/'):
0043 return ""
0044 else:
0045 return s
0046
0047 def strip_comments(text):
0048 """ strips C and C++ style comments from text """
0049 return re.sub(_comment_pattern, _comment_replacer, text)
0050
0051
0052 _string_pattern = re.compile(r"'(.*)'$")
0053
7621b5d564 Oliv*0054 def _parse1(s):
ae10cb9ac4 Oliv*0055 """ convert one item to appropriate type """
0056 m = _string_pattern.match(s)
0057 if m:
0058 s = m.group(1)
0059
0060 s = re.sub(r"''","'",s)
0061 return s
0062
0063 if '.' in s or 'e' in s.lower():
0064 return float(s)
0065 else:
0066 try:
0067 return int(s)
0068 except ValueError:
0069 raise ParseError("Cannot parse value: " + s)
0070
0071
0072 _closing = {'[':']',
0073 '{':'}',
0074 }
0075
0076 def parsemeta(metafile):
0077 """ parses metafile (file object or filename) into a dictionary of lists
0078 of floats, ints or strings
0079 """
0080 global _currentline
0081
0082 try:
0083 lines = open(metafile)
0084 except TypeError:
0085 lines = iter(metafile)
0086
0087 d = {}
0088 for line in lines:
0089 line = strip_comments(line)
0090
0091 if re.match(r'\s*$', line):
0092 continue
0093
0094 m = re.match(r' *(\w*) *= *(.*?) *$', line)
0095 if m:
0096 key,line = m.groups()
0097 else:
0098 raise ParseError(metafile,line)
0099
0100
0101 opening = line[0]
0102 try:
0103 closing = _closing[opening]
0104 except KeyError:
0105 raise ParseError(metafile,line,'Values must be enclosed in [] or {}.')
0106
0107
0108 while closing not in line:
0109 try:
0110 nextline = next(lines)
0111 except StopIteration:
0112 raise ParseError(metafile,line,'No closing ' + closing + ' found.')
0113
0114 line += ' ' + strip_comments(nextline).rstrip()
0115
0116 if line[-2:] != closing + ';':
0117 raise ParseError(metafile,line,
0118 'Values must be enclosed in "[ ];" or "{ };".')
0119
0120
0121 line = line[1:-2].strip(" ,")
0122 _currentline = line
0123
0124 if opening == '[':
0125
7621b5d564 Oliv*0126 val = [ _parse1(s) for s in re.split(r'[, ] *',line) ]
ae10cb9ac4 Oliv*0127 else:
0128
0129 val = [ s.rstrip() for s in re.split(r"' *'", line.strip("'")) ]
0130
0131 d[key] = val
0132
0133 return d
0134
0135
0136
0137 def message(*args):
d9f2883c13 Oliv*0138 sys.stdout.write(' '.join([str(s) for s in args]) + '\n')
0139
0140
0141 def warning(*args):
ae10cb9ac4 Oliv*0142 sys.stderr.write(' '.join([str(s) for s in args]) + '\n')
0143
0144
7621b5d564 Oliv*0145 def _aslist(i):
ae10cb9ac4 Oliv*0146 """ if iterable, turn into list, otherwise put into list """
0147 try:
0148 res = list(i)
0149 except TypeError:
0150 res = [i]
0151 return res
0152
0153
0154 def fromfileshape(filename,dtype,shape=None,**kwargs):
0155 return np.fromfile(filename, dtype, **kwargs).reshape(shape)
0156
0157
0158 def scanforfiles(fname):
0159 """ return list of iteration numbers for which metafiles with base fname exist """
0160 import glob
0161 allfiles = glob.glob(fname + '.' + 10*'[0-9]' + '.001.001.meta')
0162 if len(allfiles) == 0:
0163 allfiles = glob.glob(fname + '.' + 10*'[0-9]' + '.meta')
0164 off = -5
0165 else:
0166 off = -13
0167
0168 itrs = [ int(s[off-10:off]) for s in allfiles ]
0169 itrs.sort()
0170 return itrs
0171
0172
0173 def readmeta(f):
0174 """ read meta file and extract tile/timestep-specific parameters """
0175 meta = parsemeta(f)
0176 dimList = meta.pop('dimList')
0177
0178 gdims = tuple(dimList[-3::-3])
0179 i0s = [ i-1 for i in dimList[-2::-3] ]
0180 ies = dimList[-1::-3]
0181
0182 timeInterval = meta.pop('timeInterval', None)
0183 timeStepNumber = meta.pop('timeStepNumber', None)
0184 map2gl = meta.pop('map2glob', None)
0185
0186 meta['dimList'] = list(gdims[::-1])
0187 return gdims,i0s,ies,timeStepNumber,timeInterval,map2gl,meta
0188
0189
0190 _typeprefixes = {'ieee-be':'>',
0191 'b' :'>',
0192 '>' :'>',
0193 'ieee-le':'<',
0194 'l' :'<',
0195 '<' :'<',
0196 }
0197 _typesuffixes = {'float32':'f4',
0198 'float64':'f8',
0199 }
0200
0201 def rdmds(fnamearg,itrs=-1,machineformat='b',rec=None,fill_value=0,
0202 returnmeta=False,astype=float,region=None,lev=(),
f9fdb96e02 Oliv*0203 usememmap=False,mm=False,squeeze=True,verbose=False):
7621b5d564 Oliv*0204 """
ae10cb9ac4 Oliv*0205 Read meta-data files as written by MITgcm.
0206
7621b5d564 Oliv*0207 Call signatures:
b63a5f2e9d Oliv*0208
7621b5d564 Oliv*0209 a = rdmds(fname,...)
b63a5f2e9d Oliv*0210
7621b5d564 Oliv*0211 a,its,meta = rdmds(fname,...,returnmeta=True)
b63a5f2e9d Oliv*0212
7621b5d564 Oliv*0213 Parameters
0214 ----------
0215 fname : string
0216 name of file to read, without the '.data' or '.meta' suffix. If itrs is
0217 given, the iteration number is added to `fname` as well. `fname` may
0218 contain shell wildcards, which is useful for tile files organized into
0219 directories, e.g.,
0220
0221 T = rdmds('prefix*/T', 2880)
0222
0223 will read prefix0000/T.0000002880.*, prefix0001/T.0000002880.*, ...
0224 (and any others that match the wildcard, so be careful how you name things!)
0225
95edcfd937 Oliv*0226 itrs : int or list of ints or np.nan or np.inf
7621b5d564 Oliv*0227 Iteration number(s). With itrs=-1, will try to read
0228
0229 fname.meta or fname.001.001.meta, ...
0230
0231 If itrs is a list of integers of an integer, it will read the corresponding
0232
0233 fname.000000iter.meta, ...
0234
95edcfd937 Oliv*0235 If itrs is np.nan, it will read all iterations for which files are found.
0236 If itrs is np.inf, it will read the highest iteration found.
7621b5d564 Oliv*0237
0238 machineformat : int
0239 endianness ('b' or 'l', default 'b')
0240 rec : list of int or None
0241 list of records to read (default all)
0242 useful for pickups and multi-field diagnostics files
0243 fill_value : float
0244 fill value for missing (blank) tiles (default 0)
0245 astype : data type
0246 data type to return (default: double precision)
0247 None: keep data type/precision of file
0248 region : tuple of int
0249 (x0,x1,y0,y1) read only this region (default (0,nx,0,ny))
0250 lev : list of int or tuple of lists of int
0251 list of levels to read, or, for multiple dimensions
0252 (excluding x,y), tuple(!) of lists (see examples below)
0253 usememmap : bool
0254 if True, use a memory map for reading data (default False)
0255 recommended when using lev, or region with global files
0256 to save memory and, possibly, time
0257
0258 Returns
0259 -------
0260 a : array_like
0261 numpy array of the data read
0262 its : list of int
0263 list of iteration numbers read (only if returnmeta=True)
0264 meta : dict
0265 dictionary of metadata (only if returnmeta=True)
0266
0267 Examples
0268 --------
0269 >>> XC = rdmds('XC')
0270 >>> XC = rdmds('res_*/XC')
0271 >>> T = rdmds('T.0000002880')
0272 >>> T = rdmds('T',2880)
0273 >>> T2 = rdmds('T',[2880,5760])
95edcfd937 Oliv*0274 >>> T,its = rdmds('T',numpy.inf)
7621b5d564 Oliv*0275 >>> VVEL = rdmds('pickup',2880,rec=range(50,100))
0276 >>> a5 = rdmds('diags',2880,rec=0,lev=[5])
0277 >>> a = rdmds('diags',2880,rec=0,lev=([0],[0,1,5,6,7]))
0278 >>> from numpy import r_
0279 >>> a = rdmds('diags',2880,rec=0,lev=([0],r_[:2,5:8])) # same as previous
0280 >>> a = rdmds('diags',2880,rec=0)[0, [0,1,5,6,7], ...] # same, but less efficient
0281 >>> a = rdmds('diags',2880)[0, 0, [0,1,5,6,7], ...] # even less efficient
ae10cb9ac4 Oliv*0282 """
8db0c6507a Mart*0283 import functools
ae10cb9ac4 Oliv*0284 usememmap = usememmap or mm
0285 if usememmap:
0286 readdata = np.memmap
0287 else:
0288 readdata = fromfileshape
0289
0290
0291 additrs = itrs != -1
0292 if itrs is np.nan:
0293
0294 itrs = scanforfiles(fnamearg)
f9fdb96e02 Oliv*0295 if verbose: warning('Reading {0} time levels: '.format(len(itrs)), *itrs)
ae10cb9ac4 Oliv*0296 returnits = True
0297 itrsislist = True
0298 elif itrs is np.inf:
0299
0300 itrs = scanforfiles(fnamearg)
0301 if len(itrs):
f9fdb96e02 Oliv*0302 if verbose: warning('Found {0} time levels, reading'.format(len(itrs)), itrs[-1])
ae10cb9ac4 Oliv*0303 else:
f9fdb96e02 Oliv*0304 if verbose: warning('Found 0 time levels for {}'.format(fnamearg))
ae10cb9ac4 Oliv*0305 itrs = itrs[-1:]
0306 returnits = True
0307 itrsislist = False
0308 else:
0309 returnits = False
0310 itrsislist = np.iterable(itrs)
0311
0312
7621b5d564 Oliv*0313 itrs = _aslist(itrs)
ae10cb9ac4 Oliv*0314
0315 allrec = rec is None
7621b5d564 Oliv*0316 reclist = _aslist(rec)
ae10cb9ac4 Oliv*0317 if not isinstance(lev,tuple):
0318 lev = (lev,)
7621b5d564 Oliv*0319 levs = tuple( _aslist(l) for l in lev )
ae10cb9ac4 Oliv*0320 levdims = tuple(len(l) for l in levs)
0321 levinds = np.ix_(*levs)
0322 nlev = len(levdims)
0323
0324 if usememmap:
0325 recsatonce = True
0326 readdata = np.memmap
0327 else:
0328 recsatonce = allrec
0329 readdata = fromfileshape
0330
0331 try:
0332 typepre = _typeprefixes[machineformat]
0333 except KeyError:
0334 raise ValueError('Allowed machineformats: ' + ' '.join(_typeprefixes))
0335
0336 arr = None
0337 metaref = {}
0338 timeStepNumbers = []
0339 timeIntervals = []
0340 for iit,it in enumerate(itrs):
0341 if additrs:
0342 fname = fnamearg + '.{0:010d}'.format(int(it))
0343 else:
0344 fname = fnamearg
0345
0346 metafiles = glob.glob(fname + 2*('.'+3*'[0-9]') + '.meta') or glob.glob(fname+'.meta')
0347 if len(metafiles) == 0:
0348 raise IOError('No files found for ' + fname + '.meta')
0349
f9fdb96e02 Oliv*0350 if verbose: warning(metafiles[0])
ae10cb9ac4 Oliv*0351
d9f2883c13 Oliv*0352 if debug: warning('Found',len(metafiles),'metafiles for iteration',it)
ae10cb9ac4 Oliv*0353
0354 for metafile in metafiles:
0355 gdims,i0s,ies,timestep,timeinterval,map2gl,meta = readmeta(metafile)
0356 if arr is None:
0357
0358 try:
0359 dataprec, = meta['dataprec']
0360 except KeyError:
0361 dataprec, = meta['format']
0362 tp = typepre + _typesuffixes[dataprec]
0363 size = np.dtype(tp).itemsize
0364 if astype is None: astype = tp
0365 recshape = tuple( ie-i0 for i0,ie in zip(i0s,ies) )
8db0c6507a Mart*0366 count = functools.reduce(mul, recshape)
ae10cb9ac4 Oliv*0367 nrecords, = meta['nrecords']
0368 tileshape = (nrecords,) + recshape
0369 if allrec:
0370 reclist = range(nrecords)
0371 recinds = np.s_[:,] + levinds
0372 else:
0373 recinds = np.ix_(reclist, *levs)
0374
0375 if region is None:
0376 ri0,rie,rj0,rje = 0,gdims[-1],0,gdims[-2]
0377 else:
0378 ri0,rie,rj0,rje = region
0379 if ri0 < 0: ri0 += gdims[-1]
0380 if rie < 0: rie += gdims[-1]
0381 if rj0 < 0: rj0 += gdims[-2]
0382 if rje < 0: rje += gdims[-2]
0383
0384 assert nlev+2 <= len(gdims)
0385 rdims = levdims + gdims[len(levdims):-2] + (rje-rj0,rie-ri0)
0386
0387 arr = np.empty((len(itrs),len(reclist))+rdims, astype)
0388 arr[...] = fill_value
0389 metaref = meta
0390 else:
0391 if meta != metaref:
0392 raise ValueError('Meta files not compatible')
0393
0394 datafile = metafile[:-4] + 'data'
0395
0396 if region is not None:
0397 if map2gl is None:
0398
0399 i0 = min(rie, max(ri0, i0s[-1]))
0400 ie = min(rie, max(ri0, ies[-1]))
0401 j0 = min(rje, max(rj0, i0s[-2]))
0402 je = min(rje, max(rj0, ies[-2]))
0403
0404 I0 = i0 - i0s[-1]
0405 Ie = ie - i0s[-1]
0406 J0 = j0 - i0s[-2]
0407 Je = je - i0s[-2]
0408
0409 i0s[-1] = i0 - ri0
0410 ies[-1] = ie - ri0
0411 i0s[-2] = j0 - rj0
0412 ies[-2] = je - rj0
0413 else:
0414 raise NotImplementedError('Region selection is not implemented for map2glob != [0,1]')
0415
0416 sl = tuple( slice(i0,ie) for i0,ie in zip(i0s,ies) )
0417 if map2gl is None:
0418
0419 arrtile = arr[(iit,slice(None))+sl]
0420 else:
0421 ny,nx = arr.shape[-2:]
0422 i0 = i0s[-1]
0423 j0 = i0s[-2]
0424 ie = ies[-1]
0425 je = ies[-2]
0426
0427 jstride = map2gl[1]*nx + map2gl[0]
0428 n = (je-j0)*jstride
0429
0430 ii0 = min(i0+nx*j0, nx*ny-n)
0431
0432 ioff = nx*j0 - ii0
0433
0434 arrflat = arr.reshape(arr.shape[:-2]+(nx*ny,))
0435
0436 arrmap = arrflat[...,ii0:ii0+n].reshape(arr.shape[:-2]+(je-j0,jstride))[...,:,ioff+i0:ioff+ie]
0437
0438 arrtile = arrmap[(iit,slice(None))+sl[:-2]]
0439 del arrflat,arrmap
0440
0441 if recsatonce:
0442 if region is None:
0443 arrtile[...] = readdata(datafile, tp, shape=tileshape)[recinds]
0444 else:
0445 if Ie > I0 and Je > J0:
d9f2883c13 Oliv*0446 if debug: message(datafile, I0,Ie,J0,Je)
ae10cb9ac4 Oliv*0447 arrtile[...] = readdata(datafile, tp, shape=tileshape)[recinds + np.s_[...,J0:Je,I0:Ie]]
0448 else:
0449 f = open(datafile)
0450 for irec,recnum in enumerate(reclist):
0451 if recnum < 0: recnum += nrecords
0452 f.seek(recnum*count*size)
0453 if region is None:
0454 arrtile[irec] = np.fromfile(f, tp, count=count).reshape(recshape)[levinds]
0455 else:
0456 if Ie > I0 and Je > J0:
d9f2883c13 Oliv*0457 if debug: message(datafile, I0,Ie,J0,Je)
ae10cb9ac4 Oliv*0458 tilerec = np.fromfile(f, tp, count=count).reshape(recshape)
0459 arrtile[irec] = tilerec[levinds + np.s_[...,J0:Je,I0:Ie]]
0460 f.close()
0461
0462 if timestep is not None:
0463 timeStepNumbers.extend(timestep)
0464
0465 if timeinterval is not None:
0466 timeIntervals.append(timeinterval)
0467
0468
0469 if len(timeStepNumbers):
0470 metaref['timeStepNumber'] = timeStepNumbers
0471
0472 if len(timeIntervals):
0473 metaref['timeInterval'] = timeIntervals
0474
0475 if arr is None:
0476 arr = np.array([])
0477 else:
0478
0479 dims = (len(itrs),len(reclist)) + levdims
0480 if squeeze:
0481
0482 squeezed = tuple( d for d in dims if d > 1 )
0483 else:
0484
0485 keepers = [itrsislist, np.iterable(rec)] + [np.iterable(l) for l in lev]
0486 squeezed = tuple( d for d,keep in zip(dims, keepers) if keep )
0487
0488 arr = arr.reshape(squeezed+arr.shape[2+nlev:])
0489
0490 if returnmeta:
0491 meta = dict((k.lower(),v) for k,v in metaref.items())
0492 return arr,itrs,meta
0493
0494
0495 else:
0496 return arr
0497
0498
0499 def wrmds(fbase, arr, itr=None, dataprec='float32', ndims=None, nrecords=None,
0500 times=None, fields=None, simulation=None, machineformat='b',
0501 deltat=None, dimlist=None):
7621b5d564 Oliv*0502 '''Write an array to an mds meta/data file set.
ae10cb9ac4 Oliv*0503
7621b5d564 Oliv*0504 If itr is given, the files will be named fbase.0000000itr.data and
0505 fbase.0000000itr.meta, otherwise just fbase.data and fbase.meta.
ae10cb9ac4 Oliv*0506
0507 Parameters
0508 ----------
7621b5d564 Oliv*0509 fbase : string
0510 Name of file to write, without the '.data' or '.meta' suffixes,
0511 and without the iteration number if itr is give
0512 arr : array_like
0513 Numpy array to write
0514 itr : int or None
0515 If given, this iteration number will be appended to the file name
0516 dataprec : string
0517 precision of resulting file ('float32' or 'float64')
0518 ndims : int
0519 number of non-record dimensions; extra (leading) dimensions
0520 will be folded into 1 record dimension
0521 nrecords : int
0522 number of records; will fold as many leading dimensions as
0523 necessary (has to match shape!)
0524 times : float or list of floats
0525 times to write into meta file. Either a single float or a list
0526 of two for a time interval
0527 fields : list of strings
0528 list of fields
0529 simulation : string
0530 string describing the simulation
0531 machineformat : string
0532 'b' or 'l' for big or little endian
0533 deltat : float
0534 time step; provide in place of either times or itr to have one
0535 computed from the other
0536 dimlist : tuple
0537 dimensions as will be stored in file (only useful when passing
0538 meta data from an existing file to wrmds as keyword args)
ae10cb9ac4 Oliv*0539 '''
0540 if type(dataprec) == type([]): dataprec, = dataprec
0541 if type(ndims) == type([]): ndims, = ndims
0542 if type(nrecords) == type([]): nrecords, = nrecords
0543 if type(simulation) == type([]): simulation, = simulation
0544 if type(machineformat) == type([]): machineformat, = machineformat
0545 if type(deltat) == type([]): deltat, = deltat
0546
0547 tp = _typeprefixes[machineformat]
0548 try:
0549 tp = tp + _typesuffixes[dataprec]
0550 except KeyError:
0551 raise ValueError("dataprec must be 'float32' or 'float64'.")
0552
0553 if ndims is None:
0554 if nrecords is None:
0555 ndims = min(3,len(arr.shape))
0556 else:
0557
0558 dims = list(arr.shape[::-1])
0559 n = 1
0560 while n < nrecords:
0561 n *= dims.pop()
0562
0563 assert n == nrecords
0564 ndims = len(dims)
0565
0566 dims = arr.shape[-1:-ndims-1:-1]
0567 nrec = np.prod(arr.shape[:-ndims], dtype=int)
0568 if nrecords is not None and nrecords != nrec:
0569 raise ValueError('Shape/nrecords mismatch')
0570 if dimlist is not None and tuple(dimlist) != dims:
0571 raise ValueError('Shape/dimlist mismatch: {} vs {}'.format(dims, dimlist))
0572
0573 if arr.ndim > ndims + 1:
0574 sys.stderr.write("Warning: folding several dimensions into record dimension.\n")
0575
0576
0577
0578 if times is not None:
0579 try:
0580 iter(times)
0581 except TypeError:
0582 times = [ times ]
0583
0584 if deltat is not None:
0585 if itr is None:
0586 itr = int(times[-1]//deltat)
0587 elif times is None:
0588 times = [ deltat*itr ]
0589 else:
0590 sys.stderr.write('Warning: discarding deltat.\n')
0591
0592 if itr is not None:
0593 fbase = fbase + '.{:010d}'.format(itr)
0594
0595 with open(fbase + '.meta', 'w') as f:
0596 if simulation is not None:
0597 f.write(" simulation = { '" + simulation + "' };\n")
0598
0599 f.write(" nDims = [ {:3d} ];\n".format(ndims))
0600
0601 if max(dims) < 10000:
0602 fmt = '{:5d}'
0603 else:
0604 fmt = '{:10d}'
0605
0606 fmt = fmt + ',' + fmt + ',' + fmt
0607
0608 f.write(" dimList = [\n " +
0609 ",\n ".join(fmt.format(d,1,d) for d in dims) +
0610 "\n ];\n")
0611
0612
0613
0614 f.write(" dataprec = [ '" + dataprec + "' ];\n")
0615
0616 f.write(" nrecords = [ {:5d} ];\n".format(nrec))
0617
0618 if itr is not None:
0619 f.write(" timeStepNumber = [ {:10d} ];\n".format(itr))
0620
0621 if times is not None:
0622 f.write(" timeInterval = [" +
0623 "".join("{:20.12E}".format(t) for t in times) +
0624 " ];\n")
0625
0626 if fields is not None:
0627 nflds = len(fields)
0628 f.write(" nFlds = [ {:4d} ];\n".format(nflds))
0629 f.write(" fldList = {\n")
0630 for row in range((nflds+19)//20):
0631 for field in fields[20*row:20*(row+1)]:
0632 f.write(" '{:<8s}'".format(field))
0633 f.write("\n")
0634 f.write(" };\n")
0635
0636 arr.astype(tp).tofile(fbase + '.data')
0637