Warning, /utils/python/MITgcmutils/scripts/gluemncbig is written in an unsupported language. File is not indexed.
view on githubraw file Latest commit 9a1526c0 on 2021-01-11 22:38:19 UTC
ae10cb9ac4 Oliv*0001 #!/usr/bin/env python
0002 # -*- coding: utf-8 -*-
7621b5d564 Oliv*0003 """Usage: gluemncbig [-2] [-q] [--verbose] [--help] [--many] [-v <vars>] -o <outfile> <files>
ae10cb9ac4 Oliv*0004
0005 -v <vars> comma-separated list of variable names or glob patterns
3c7f3cce62 Oliv*0006 -2 write a NetCDF version 2 (64-Bit Offset) file allowing for large records
692071215d Oliv*0007 --many many tiles: assemble only along x in memory; less efficient
0008 on some filesystems, but opens fewer files simultaneously and
0009 uses less memory
ae10cb9ac4 Oliv*0010 -q suppress progress messages
0011 --verbose report variables
7621b5d564 Oliv*0012 --help show this help text
ae10cb9ac4 Oliv*0013
0014 All files must have the same variables.
0015 Each variable (or 1 record of it) must fit in memory.
692071215d Oliv*0016 With --many, only a row of tiles along x must fit in memory.
ae10cb9ac4 Oliv*0017
0018 Examples:
0019
0020 gluemncbig -o ptr.nc mnc_*/ptr_tave.*.nc
0021 gluemncbig -o BIO.nc -v 'BIO_*' mnc_*/ptr_tave.*.nc
0022 """
8b8576185f Mart*0023 from __future__ import print_function
ae10cb9ac4 Oliv*0024
0025 # NetCDF reader/writer module modified from pupynere,
0026 # https://bitbucket.org/robertodealmeida/pupynere/
0027 # to allow delayed reading/writing of variable data.
0028 # MIT license
0029
0030 moduledoc = u"""
0031 NetCDF reader/writer module.
0032
0033 This module is used to read and create NetCDF files. NetCDF files are
0034 accessed through the `netcdf_file` object. Data written to and from NetCDF
0035 files are contained in `netcdf_variable` objects. Attributes are given
0036 as member variables of the `netcdf_file` and `netcdf_variable` objects.
0037
0038 Notes
0039 -----
0040 NetCDF files are a self-describing binary data format. The file contains
0041 metadata that describes the dimensions and variables in the file. More
0042 details about NetCDF files can be found `here
0043 <http://www.unidata.ucar.edu/software/netcdf/docs/netcdf.html>`_. There
0044 are three main sections to a NetCDF data structure:
0045
0046 1. Dimensions
0047 2. Variables
0048 3. Attributes
0049
0050 The dimensions section records the name and length of each dimension used
0051 by the variables. The variables would then indicate which dimensions it
0052 uses and any attributes such as data units, along with containing the data
0053 values for the variable. It is good practice to include a
0054 variable that is the same name as a dimension to provide the values for
0055 that axes. Lastly, the attributes section would contain additional
0056 information such as the name of the file creator or the instrument used to
0057 collect the data.
0058
0059 When writing data to a NetCDF file, there is often the need to indicate the
0060 'record dimension'. A record dimension is the unbounded dimension for a
0061 variable. For example, a temperature variable may have dimensions of
0062 latitude, longitude and time. If one wants to add more temperature data to
0063 the NetCDF file as time progresses, then the temperature variable should
0064 have the time dimension flagged as the record dimension.
0065
0066 This module implements the Scientific.IO.NetCDF API to read and create
0067 NetCDF files. The same API is also used in the PyNIO and pynetcdf
0068 modules, allowing these modules to be used interchangeably when working
0069 with NetCDF files. The major advantage of this module over other
0070 modules is that it doesn't require the code to be linked to the NetCDF
0071 libraries.
0072
0073 In addition, the NetCDF file header contains the position of the data in
0074 the file, so access can be done in an efficient manner without loading
0075 unnecessary data into memory. It uses the ``mmap`` module to create
0076 Numpy arrays mapped to the data on disk, for the same purpose.
0077
0078 Examples
0079 --------
0080
0081 To create a NetCDF file:
0082
0083 >>> f = netcdf_file('simple.nc', 'w')
0084 >>> f.history = 'Created for a test'
0085 >>> f.location = u'北京'
0086 >>> f.createDimension('time', 10)
0087 >>> time = f.createVariable('time', 'i', ('time',))
0088 >>> time[:] = range(10)
0089 >>> time.units = u'µs since 2008-01-01'
0090 >>> f.close()
0091
0092 Note the assignment of ``range(10)`` to ``time[:]``. Exposing the slice
0093 of the time variable allows for the data to be set in the object, rather
0094 than letting ``range(10)`` overwrite the ``time`` variable.
0095
0096 To read the NetCDF file we just created:
0097
0098 >>> f = netcdf_file('simple.nc', 'r')
4d634188c7 Mart*0099 >>> print(f.history)
ae10cb9ac4 Oliv*0100 Created for a test
4d634188c7 Mart*0101 >>> print(f.location)
ae10cb9ac4 Oliv*0102 北京
0103 >>> time = f.variables['time']
4d634188c7 Mart*0104 >>> print(time.units)
ae10cb9ac4 Oliv*0105 µs since 2008-01-01
4d634188c7 Mart*0106 >>> print(time.shape)
ae10cb9ac4 Oliv*0107 (10,)
4d634188c7 Mart*0108 >>> print(time[-1])
ae10cb9ac4 Oliv*0109 9
0110 >>> f.close()
0111
0112 TODO:
0113 * properly implement ``_FillValue``.
0114 * implement Jeff Whitaker's patch for masked variables.
0115 * fix character variables.
0116 * implement PAGESIZE for Python 2.6?
0117 """
0118
0119 __all__ = ['netcdf_file']
0120
0121
0122 from operator import mul
0123 try:
0124 from collections import OrderedDict
0125 except ImportError:
0126 OrderedDict = dict
0127 from mmap import mmap, ACCESS_READ
0128
0129 import numpy as np
7621b5d564 Oliv*0130 from numpy import frombuffer, ndarray, dtype, empty, array, asarray
ae10cb9ac4 Oliv*0131 from numpy import little_endian as LITTLE_ENDIAN
0132
0133 import sys
0134
1e6e2cc1e4 Oliv*0135 # the following are mostly from numpy.compat.py3k
0136
ae10cb9ac4 Oliv*0137 if sys.version_info[0] >= 3:
42a4453aaf Mart*0138 from functools import reduce
ae10cb9ac4 Oliv*0139 def asbytes(s):
0140 if isinstance(s, bytes):
0141 return s
0142 return str(s).encode('latin1')
0143
0144 def asstr(s):
0145 if isinstance(s, bytes):
0146 return s.decode('latin1')
0147 return str(s)
42a4453aaf Mart*0148
0149 long = int
0150 unicode = str
1e6e2cc1e4 Oliv*0151 basestring = str
ae10cb9ac4 Oliv*0152 else:
42a4453aaf Mart*0153 # python 2
ae10cb9ac4 Oliv*0154 asbytes = str
0155 asstr = str
42a4453aaf Mart*0156 long = long
0157 unicode = unicode
0158 basestring = basestring
ae10cb9ac4 Oliv*0159
e1e3780d86 Oliv*0160 ABSENT = b'\x00\x00\x00\x00\x00\x00\x00\x00'
0161 ZERO = b'\x00\x00\x00\x00'
0162 NC_BYTE = b'\x00\x00\x00\x01'
0163 NC_CHAR = b'\x00\x00\x00\x02'
0164 NC_SHORT = b'\x00\x00\x00\x03'
0165 NC_INT = b'\x00\x00\x00\x04'
0166 NC_FLOAT = b'\x00\x00\x00\x05'
0167 NC_DOUBLE = b'\x00\x00\x00\x06'
0168 NC_DIMENSION = b'\x00\x00\x00\n'
0169 NC_VARIABLE = b'\x00\x00\x00\x0b'
0170 NC_ATTRIBUTE = b'\x00\x00\x00\x0c'
ae10cb9ac4 Oliv*0171
0172
0173 TYPEMAP = { NC_BYTE: dtype(np.byte),
0174 NC_CHAR: dtype('c'),
0175 NC_SHORT: dtype(np.int16).newbyteorder('>'),
0176 NC_INT: dtype(np.int32).newbyteorder('>'),
0177 NC_FLOAT: dtype(np.float32).newbyteorder('>'),
0178 NC_DOUBLE: dtype(np.float64).newbyteorder('>'),
0179 }
0180
0181 REVERSE = { dtype(np.byte): NC_BYTE,
0182 dtype('c'): NC_CHAR,
0183 dtype(np.int16): NC_SHORT,
0184 dtype(np.int32): NC_INT,
0185 dtype(np.int64): NC_INT, # will be converted to int32
0186 dtype(np.float32): NC_FLOAT,
0187 dtype(np.float64): NC_DOUBLE,
0188 }
0189
0190
0191 class NetCDFError(Exception):
0192 pass
0193
0194
0195 class unmapped_array(object):
0196 def __init__(self, shape, dtype_):
0197 self.shape = shape
0198 self.dtype = dtype(dtype_)
0199
0200 @property
0201 def itemsize(self):
0202 return self.dtype.itemsize
0203
0204 @property
0205 def size(self):
0206 return reduce(mul, self.shape, 1)
0207
0208 @property
0209 def nbytes(self):
0210 return self.size * self.itemsize
0211
0212 def __len__(self):
0213 return self.shape[0]
0214
0215 def __getitem__(self, indx):
0216 raise NetCDFError('netcdf_file: delay is True, use read_var or read_recvar to read variable data')
0217
0218 def __setitem__(self, indx, val):
0219 raise NetCDFError('netcdf_file: delay is True, use write_var or write_recvar to assign variable data')
0220
0221
0222 class netcdf_file(object):
0223 """
0224 A file object for NetCDF data.
0225
0226 A `netcdf_file` object has two standard attributes: `dimensions` and
0227 `variables`. The values of both are dictionaries, mapping dimension
0228 names to their associated lengths and variable names to variables,
0229 respectively. Application programs should never modify these
0230 dictionaries.
0231
0232 All other attributes correspond to global attributes defined in the
0233 NetCDF file. Global file attributes are created by assigning to an
0234 attribute of the `netcdf_file` object.
0235
0236 If delay is True, variable data is only read/written on demand.
0237 In this case, if mode="w", write_metadata() needs to be called after
0238 all dimensions, variables and attributes have been defined, but before
0239 any variable data is written.
0240
0241 Parameters
0242 ----------
0243 filename : string or file-like
0244 string -> filename
0245 mode : {'r', 'w'}, optional
0246 read-write mode, default is 'r'
0247 mmap : None or bool, optional
0248 Whether to mmap `filename` when reading. Default is True
0249 when `filename` is a file name, False when `filename` is a
0250 file-like object
0251 delay : bool, optional
0252 Whether to delay reading of variable data. Default is False.
0253 This is an alternative to mmap for more efficient reading.
0254 version : {1, 2}, optional
0255 version of netcdf to read / write, where 1 means *Classic
0256 format* and 2 means *64-bit offset format*. Default is 1. See
0257 `here <http://www.unidata.ucar.edu/software/netcdf/docs/netcdf/Which-Format.html>`_
0258 for more info.
0259
0260 """
0261 def __init__(self, filename, mode='r', mmap=None, version=1, delay=False):
0262 """Initialize netcdf_file from fileobj (str or file-like)."""
0263 if delay:
0264 if mmap is None:
0265 mmap = False
0266 else:
0267 raise ValueError('Cannot delay variables for mmap')
0268 if hasattr(filename, 'seek'): # file-like
0269 self.fp = filename
0270 self.filename = 'None'
0271 if mmap is None:
0272 mmap = False
0273 elif mmap and not hasattr(filename, 'fileno'):
0274 raise ValueError('Cannot use file object for mmap')
0275 else: # string?
0276 self.filename = filename
0277 self.fp = open(self.filename, '%sb' % mode)
0278 if mmap is None:
0279 mmap = True
0280 self.use_mmap = mmap
0281 self.version_byte = version
0282 self.delay = delay
0283
0284 if not mode in 'rw':
0285 raise ValueError("Mode must be either 'r' or 'w'.")
0286 self.mode = mode
0287
0288 self.dimensions = OrderedDict()
0289 self.variables = OrderedDict()
0290
0291 self._dims = []
0292 self._recs = 0
0293 self._recsize = 0
0294
0295 self._mapped = False
0296 self._begins = OrderedDict()
0297
0298 self._attributes = OrderedDict()
0299
0300 if mode == 'r':
0301 self._read()
0302
0303 def __setattr__(self, attr, value):
0304 # Store user defined attributes in a separate dict,
0305 # so we can save them to file later.
0306 try:
0307 self._attributes[attr] = value
0308 except AttributeError:
0309 pass
0310 self.__dict__[attr] = value
0311
0312 def close(self):
0313 """Closes the NetCDF file."""
0314 try:
0315 closed = self.fp.closed
0316 except AttributeError:
0317 pass
0318 else:
0319 if not closed:
0320 try:
0321 self.flush()
0322 finally:
0323 self.fp.close()
0324 __del__ = close
0325
0326 def createDimension(self, name, length):
0327 """
0328 Adds a dimension to the Dimension section of the NetCDF data structure.
0329
0330 Note that this function merely adds a new dimension that the variables can
0331 reference. The values for the dimension, if desired, should be added as
0332 a variable using `createVariable`, referring to this dimension.
0333
0334 Parameters
0335 ----------
0336 name : str
0337 Name of the dimension (Eg, 'lat' or 'time').
0338 length : int
0339 Length of the dimension.
0340
0341 See Also
0342 --------
0343 createVariable
0344
0345 """
0346 self.dimensions[name] = length
0347 self._dims.append(name)
0348
0349 def createVariable(self, name, type, dimensions):
0350 """
0351 Create an empty variable for the `netcdf_file` object, specifying its data
0352 type and the dimensions it uses.
0353
0354 Parameters
0355 ----------
0356 name : str
0357 Name of the new variable.
0358 type : dtype or str
0359 Data type of the variable.
0360 dimensions : sequence of str
0361 List of the dimension names used by the variable, in the desired order.
0362
0363 Returns
0364 -------
0365 variable : netcdf_variable
0366 The newly created ``netcdf_variable`` object.
0367 This object has also been added to the `netcdf_file` object as well.
0368
0369 See Also
0370 --------
0371 createDimension
0372
0373 Notes
0374 -----
0375 Any dimensions to be used by the variable should already exist in the
0376 NetCDF data structure or should be created by `createDimension` prior to
0377 creating the NetCDF variable.
0378
0379 """
0380 shape = tuple([self.dimensions[dim] for dim in dimensions])
0381 shape_ = tuple([dim or 0 for dim in shape]) # replace None with 0 for numpy
0382
0383 if not isinstance(type, dtype): type = dtype(type)
0384 if type.newbyteorder('=') not in REVERSE:
0385 raise ValueError("NetCDF 3 does not support type %s" % type)
0386
0387 if self.delay:
0388 data = unmapped_array(shape_, type)
0389 else:
0390 data = empty(shape_, type)
0391 self.variables[name] = netcdf_variable(data, type, shape, dimensions)
0392 return self.variables[name]
0393
0394 def flush(self):
0395 """
0396 Perform a sync-to-disk flush if the `netcdf_file` object is in write mode.
0397
0398 See Also
0399 --------
0400 sync : Identical function
0401
0402 """
9a1526c006 Oliv*0403 if getattr(self, 'mode', None) == 'w':
ae10cb9ac4 Oliv*0404 if self.delay:
0405 if not self._mapped:
0406 self._map()
0407 self.update_numrecs(self._recs)
0408 else:
0409 self._write()
0410 sync = flush
0411
0412 def write_metadata(self):
0413 '''This needs to be called before assigning any data to variables!'''
0414 if self.delay:
0415 self._map()
0416 else:
0417 raise UserWarning('write_metadata is void unless delay is True')
0418
0419 def _map(self):
0420 self.fp.seek(0)
e1e3780d86 Oliv*0421 self.fp.write(b'CDF')
9a1526c006 Oliv*0422 self.fp.write(array(self.version_byte, '>b').tobytes())
ae10cb9ac4 Oliv*0423
0424 # Write headers
0425 self._write_numrecs()
0426 self._write_dim_array()
0427 self._write_gatt_array()
0428 self._map_var_array()
0429 self._mapped = True
0430
0431 def _map_var_array(self):
0432 if self.variables:
0433 self.fp.write(NC_VARIABLE)
0434 self._pack_int(len(self.variables))
0435
0436 # Separate record variables from non-record ones, keep order
0437 nonrec_vars = [ k for k,v in self.variables.items() if not v.isrec ]
0438 rec_vars = [ k for k,v in self.variables.items() if v.isrec ]
0439
0440 # Set the metadata for all variables.
0441 for name in nonrec_vars + rec_vars:
0442 self._map_var_metadata(name)
0443
0444 # Now that we have the metadata, we know the vsize of
0445 # each variable, so we can calculate their position in the file
0446
0447 pos0 = pos = self.fp.tell()
0448
0449 # set file pointers for all variables.
0450 for name in nonrec_vars:
0451 var = self.variables[name]
0452 # Set begin in file header.
0453 self.fp.seek(var._begin)
0454 self._pack_begin(pos)
0455 self._begins[name] = pos
0456 pos += var._vsize
0457
0458 recstart = pos
0459
0460 for name in rec_vars:
0461 var = self.variables[name]
0462 # Set begin in file header.
0463 self.fp.seek(var._begin)
0464 self._pack_begin(pos)
0465 self._begins[name] = pos
0466 pos += var._vsize
0467
0468 self.__dict__['_recsize'] = pos - recstart
0469
0470 # first var
0471 self.fp.seek(pos0)
0472 else:
0473 self.fp.write(ABSENT)
0474
0475 def _map_var_metadata(self, name):
0476 var = self.variables[name]
0477
0478 self._pack_string(name)
0479 self._pack_int(len(var.dimensions))
0480 for dimname in var.dimensions:
0481 dimid = self._dims.index(dimname)
0482 self._pack_int(dimid)
0483
0484 self._write_att_array(var._attributes)
0485
0486 nc_type = REVERSE[var.dtype.newbyteorder('=')]
0487 self.fp.write(asbytes(nc_type))
0488
0489 if not var.isrec:
0490 vsize = var.data.size * var.data.itemsize
0491 vsize += -vsize % 4
0492 else: # record variable
0493 if 1: #var.data.shape[0]:
0494 size = reduce(mul, var.data.shape[1:], 1)
0495 vsize = size * var.data.itemsize
0496 else:
0497 vsize = 0
0498 rec_vars = len([var for var in self.variables.values()
0499 if var.isrec])
0500 if rec_vars > 1:
0501 vsize += -vsize % 4
0502 self.variables[name].__dict__['_vsize'] = vsize
0503 self._pack_int(vsize)
0504
0505 # Pack a bogus begin, and set the real value later.
0506 self.variables[name].__dict__['_begin'] = self.fp.tell()
0507 self._pack_begin(0)
0508
0509 @property
0510 def numrecs(self):
0511 return self._recs
0512
0513 @property
0514 def attributes(self):
0515 return self._attributes
0516
0517 @property
0518 def begins(self):
0519 return [(name,pos,self.variables[name].isrec) for name,pos in self._begins.items()]
0520
692071215d Oliv*0521 def write_var(self, name, val, j0=None, iY=-2):
ae10cb9ac4 Oliv*0522 if not self.delay:
0523 raise NetCDFError('netcdf_file: delay is False, need to assign to variables')
0524 if not self._mapped:
0525 raise NetCDFError('netcdf_file: need to call write_metadata first')
0526 pos = self._begins[name]
692071215d Oliv*0527 var = self.variables[name]
ae10cb9ac4 Oliv*0528 count = var.data.size * var.data.itemsize
692071215d Oliv*0529 if j0 is not None:
0530 if iY == 0:
0531 idxs = [()]
0532 else:
0533 idxs = np.ndindex(*var.data.shape[:iY])
0534 for idx in idxs:
0535 ii = idx + (j0,) + len(var.data.shape[iY+1:])*(0,)
0536 offset = np.ravel_multi_index(ii, var.data.shape)*var.data.itemsize
0537 end = offset + val[idx].size*var.data.itemsize
0538 if end > count:
0539 raise NetCDFError('array too large: {} > {}'.format(end, count))
0540 self.fp.seek(pos+offset)
0541 np.asanyarray(val[idx], var.data.dtype.newbyteorder('>')).tofile(self.fp)
0542 else:
0543 end = val.size*var.data.itemsize
0544 if end != count:
0545 raise NetCDFError('array too large: {} > {}'.format(end, count))
0546 self.fp.seek(pos)
0547 np.asanyarray(val, var.data.dtype.newbyteorder('>')).tofile(self.fp)
0548 # pad
e1e3780d86 Oliv*0549 self.fp.write(b'\x00' * (var._vsize - count))
ae10cb9ac4 Oliv*0550
692071215d Oliv*0551 def write_recvar(self, name, rec, val, j0=None, iY=-2):
ae10cb9ac4 Oliv*0552 if not self.delay:
0553 raise NetCDFError('netcdf_file: delay is False, need to assign to variables')
0554 if not self._mapped:
0555 raise NetCDFError('netcdf_file: need to call write_metadata first')
0556 pos = self._begins[name] + rec*self._recsize
692071215d Oliv*0557 var = self.variables[name]
ae10cb9ac4 Oliv*0558 count = reduce(mul, var.data.shape[1:], 1) * var.data.itemsize
692071215d Oliv*0559 if j0 is not None:
0560 if iY == 0:
0561 idxs = [()]
0562 else:
0563 idxs = np.ndindex(*var.data.shape[1:iY])
0564 for idx in idxs:
0565 ii = idx + (j0,) + len(var.data.shape[iY+1:])*(0,)
0566 offset = np.ravel_multi_index(ii, var.data.shape[1:])*var.data.itemsize
0567 end = offset + val[idx].size*var.data.itemsize
0568 if end > count:
0569 raise NetCDFError('array too large: {} > {}'.format(end, count))
0570 self.fp.seek(pos+offset)
0571 np.asanyarray(val[idx], var.data.dtype.newbyteorder('>')).tofile(self.fp)
0572 else:
0573 end = val.size*var.data.itemsize
0574 if end != count:
0575 raise ValueError('netcdf_file.write_recvar: array too large')
0576 self.fp.seek(pos)
0577 np.asanyarray(val, var.data.dtype.newbyteorder('>')).tofile(self.fp)
0578 # pad
e1e3780d86 Oliv*0579 self.fp.write(b'\x00' * (var._vsize - count))
ae10cb9ac4 Oliv*0580 if rec >= self._recs:
0581 self.__dict__['_recs'] = rec + 1
0582
0583 def _write(self):
0584 self.fp.seek(0)
e1e3780d86 Oliv*0585 self.fp.write(b'CDF')
9a1526c006 Oliv*0586 self.fp.write(array(self.version_byte, '>b').tobytes())
ae10cb9ac4 Oliv*0587
0588 # Write headers and data.
0589 self._write_numrecs()
0590 self._write_dim_array()
0591 self._write_gatt_array()
0592 self._write_var_array()
0593
0594 def _write_numrecs(self):
0595 # Get highest record count from all record variables.
0596 for var in self.variables.values():
0597 if var.isrec and len(var.data) > self._recs:
0598 self.__dict__['_recs'] = len(var.data)
0599 self.__dict__['_numrecs_begin'] = self.fp.tell()
0600 self._pack_int(self._recs)
0601
0602 def update_numrecs(self, numrecs):
0603 self.__dict__['_recs'] = numrecs
0604 self.fp.seek(self._numrecs_begin)
0605 self._pack_int(numrecs)
0606
0607 def _write_dim_array(self):
0608 if self.dimensions:
0609 self.fp.write(NC_DIMENSION)
0610 self._pack_int(len(self.dimensions))
0611 for name in self._dims:
0612 self._pack_string(name)
0613 length = self.dimensions[name]
0614 self._pack_int(length or 0) # replace None with 0 for record dimension
0615 else:
0616 self.fp.write(ABSENT)
0617
0618 def _write_gatt_array(self):
0619 self._write_att_array(self._attributes)
0620
0621 def _write_att_array(self, attributes):
0622 if attributes:
0623 self.fp.write(NC_ATTRIBUTE)
0624 self._pack_int(len(attributes))
0625 for name, values in attributes.items():
0626 self._pack_string(name)
0627 self._write_values(values)
0628 else:
0629 self.fp.write(ABSENT)
0630
0631 def _write_var_array(self):
0632 if self.variables:
0633 self.fp.write(NC_VARIABLE)
0634 self._pack_int(len(self.variables))
0635
0636 # # Sort variables non-recs first, then recs. We use a DSU
0637 # # since some people use pupynere with Python 2.3.x.
0638 # deco = [ (v._shape and not v.isrec, k) for (k, v) in self.variables.items() ]
0639 # deco.sort()
0640 # variables = [ k for (unused, k) in deco ][::-1]
0641 # Separate record variables from non-record ones, keep order
0642 nonrec_vars = [ k for k,v in self.variables.items() if not v.isrec ]
0643 rec_vars = [ k for k,v in self.variables.items() if v.isrec ]
0644 variables = nonrec_vars + rec_vars
0645
0646 # Set the metadata for all variables.
0647 for name in variables:
0648 self._write_var_metadata(name)
0649 # Now that we have the metadata, we know the vsize of
0650 # each record variable, so we can calculate recsize.
0651 self.__dict__['_recsize'] = sum([
0652 var._vsize for var in self.variables.values()
0653 if var.isrec])
0654 # Set the data for all variables.
0655 for name in variables:
0656 self._write_var_data(name)
0657 else:
0658 self.fp.write(ABSENT)
0659
0660 def _write_var_metadata(self, name):
0661 var = self.variables[name]
0662
0663 self._pack_string(name)
0664 self._pack_int(len(var.dimensions))
0665 for dimname in var.dimensions:
0666 dimid = self._dims.index(dimname)
0667 self._pack_int(dimid)
0668
0669 self._write_att_array(var._attributes)
0670
0671 nc_type = REVERSE[var.dtype.newbyteorder('=')]
0672 self.fp.write(asbytes(nc_type))
0673
0674 if not var.isrec:
0675 vsize = var.data.size * var.data.itemsize
0676 vsize += -vsize % 4
0677 else: # record variable
0678 try:
0679 vsize = var.data[0].size * var.data.itemsize
0680 except IndexError:
0681 vsize = 0
0682 rec_vars = len([var for var in self.variables.values()
0683 if var.isrec])
0684 if rec_vars > 1:
0685 vsize += -vsize % 4
0686 self.variables[name].__dict__['_vsize'] = vsize
0687 self._pack_int(vsize)
0688
0689 # Pack a bogus begin, and set the real value later.
0690 self.variables[name].__dict__['_begin'] = self.fp.tell()
0691 self._pack_begin(0)
0692
0693 def _write_var_data(self, name):
0694 var = self.variables[name]
0695
0696 # Set begin in file header.
0697 the_beguine = self.fp.tell()
0698 self.fp.seek(var._begin)
0699 self._pack_begin(the_beguine)
0700 self.fp.seek(the_beguine)
0701
0702 # Write data.
0703 if not var.isrec:
0704 if (var.data.dtype.byteorder == '<' or
0705 (var.data.dtype.byteorder == '=' and LITTLE_ENDIAN)):
0706 var.data = var.data.byteswap()
9a1526c006 Oliv*0707 self.fp.write(var.data.tobytes())
ae10cb9ac4 Oliv*0708 count = var.data.size * var.data.itemsize
e1e3780d86 Oliv*0709 self.fp.write(b'\x00' * (var._vsize - count))
ae10cb9ac4 Oliv*0710 else: # record variable
0711 # Handle rec vars with shape[0] < nrecs.
0712 if self._recs > len(var.data):
0713 shape = (self._recs,) + var.data.shape[1:]
0714 var.data.resize(shape)
0715
0716 pos0 = pos = self.fp.tell()
0717 for rec in var.data:
0718 if (rec.dtype.byteorder == '<' or
0719 (rec.dtype.byteorder == '=' and LITTLE_ENDIAN)):
0720 rec = rec.byteswap()
9a1526c006 Oliv*0721 self.fp.write(rec.tobytes())
ae10cb9ac4 Oliv*0722 # Padding
0723 count = rec.size * rec.itemsize
e1e3780d86 Oliv*0724 self.fp.write(b'\x00' * (var._vsize - count))
ae10cb9ac4 Oliv*0725 pos += self._recsize
0726 self.fp.seek(pos)
0727 self.fp.seek(pos0 + var._vsize)
0728
0729 def _write_values(self, values):
0730 if hasattr(values, 'dtype'):
0731 nc_type = REVERSE[values.dtype.newbyteorder('=')]
0732 else:
0733 types = [
0734 (int, NC_INT),
42a4453aaf Mart*0735 (long, NC_INT),
ae10cb9ac4 Oliv*0736 (float, NC_FLOAT),
42a4453aaf Mart*0737 (basestring, NC_CHAR),
ae10cb9ac4 Oliv*0738 ]
0739 try:
0740 sample = values[0]
0741 except (IndexError, TypeError):
0742 sample = values
42a4453aaf Mart*0743 if isinstance(sample, unicode):
0744 if not isinstance(values, unicode):
0745 raise ValueError(
0746 "NetCDF requires that text be encoded as UTF-8")
a97439910f Oliv*0747 values = values.encode('utf-8')
ae10cb9ac4 Oliv*0748 for class_, nc_type in types:
0749 if isinstance(sample, class_): break
0750
a97439910f Oliv*0751 if nc_type == NC_CHAR:
0752 if len(values) == 0:
0753 # only this can represent zero-length strings
0754 dtype_ = dtype('c')
0755 else:
0756 # this avoids double encoding in python 3
0757 dtype_ = dtype('S')
0758 else:
0759 dtype_ = TYPEMAP[nc_type]
0760
0761 values = asarray(values, dtype_)
ae10cb9ac4 Oliv*0762
0763 self.fp.write(asbytes(nc_type))
0764
0765 if values.dtype.char == 'S':
0766 nelems = values.itemsize
0767 else:
0768 nelems = values.size
0769 self._pack_int(nelems)
0770
0771 if not values.shape and (values.dtype.byteorder == '<' or
0772 (values.dtype.byteorder == '=' and LITTLE_ENDIAN)):
0773 values = values.byteswap()
9a1526c006 Oliv*0774 self.fp.write(values.tobytes())
ae10cb9ac4 Oliv*0775 count = values.size * values.itemsize
e1e3780d86 Oliv*0776 self.fp.write(b'\x00' * (-count % 4)) # pad
ae10cb9ac4 Oliv*0777
0778 def _read(self):
0779 # Check magic bytes and version
0780 magic = self.fp.read(3)
e1e3780d86 Oliv*0781 if not magic == b'CDF':
ae10cb9ac4 Oliv*0782 raise TypeError("Error: %s is not a valid NetCDF 3 file" %
0783 self.filename)
7621b5d564 Oliv*0784 self.__dict__['version_byte'] = frombuffer(self.fp.read(1), '>b')[0]
ae10cb9ac4 Oliv*0785
0786 # Read file headers and set data.
0787 self._read_numrecs()
0788 self._read_dim_array()
0789 self._read_gatt_array()
0790 self._read_var_array()
0791
0792 def _read_numrecs(self):
0793 self.__dict__['_recs'] = self._unpack_int()
0794
0795 def _read_dim_array(self):
0796 header = self.fp.read(4)
0797 if not header in [ZERO, NC_DIMENSION]:
0798 raise ValueError("Unexpected header.")
0799 count = self._unpack_int()
0800
0801 for dim in range(count):
0802 name = asstr(self._unpack_string())
0803 length = self._unpack_int() or None # None for record dimension
0804 self.dimensions[name] = length
0805 self._dims.append(name) # preserve order
0806
0807 def _read_gatt_array(self):
0808 for k, v in self._read_att_array().items():
0809 self.__setattr__(k, v)
0810
0811 def _read_att_array(self):
0812 header = self.fp.read(4)
0813 if not header in [ZERO, NC_ATTRIBUTE]:
0814 raise ValueError("Unexpected header.")
0815 count = self._unpack_int()
0816
0817 attributes = OrderedDict()
0818 for attr in range(count):
0819 name = asstr(self._unpack_string())
0820 attributes[name] = self._read_values()
0821 return attributes
0822
0823 def _read_var_array(self):
0824 header = self.fp.read(4)
0825 if not header in [ZERO, NC_VARIABLE]:
0826 raise ValueError("Unexpected header.")
0827
0828 begin = 0
0829 dtypes = {'names': [], 'formats': []}
0830 rec_vars = []
0831 count = self._unpack_int()
3c7f3cce62 Oliv*0832 rec_vsizes = []
ae10cb9ac4 Oliv*0833 for var in range(count):
0834 name, dimensions, shape, attributes, type, begin_, vsize = self._read_var()
0835 # http://www.unidata.ucar.edu/software/netcdf/docs/netcdf.html
0836 # Note that vsize is the product of the dimension lengths
0837 # (omitting the record dimension) and the number of bytes
0838 # per value (determined from the type), increased to the
0839 # next multiple of 4, for each variable. If a record
0840 # variable, this is the amount of space per record. The
0841 # netCDF "record size" is calculated as the sum of the
0842 # vsize's of all the record variables.
0843 #
0844 # The vsize field is actually redundant, because its value
0845 # may be computed from other information in the header. The
0846 # 32-bit vsize field is not large enough to contain the size
0847 # of variables that require more than 2^32 - 4 bytes, so
0848 # 2^32 - 1 is used in the vsize field for such variables.
0849 if shape and shape[0] is None: # record variable
0850 rec_vars.append(name)
0851 # The netCDF "record size" is calculated as the sum of
0852 # the vsize's of all the record variables.
0853 self.__dict__['_recsize'] += vsize
3c7f3cce62 Oliv*0854 rec_vsizes.append(vsize)
ae10cb9ac4 Oliv*0855 if begin == 0: begin = begin_
0856 dtypes['names'].append(name)
0857 dtypes['formats'].append(str(shape[1:]) + '>' + type.char)
0858
0859 # Handle padding with a virtual variable.
0860 if type.char in 'bch':
0861 actual_size = reduce(mul, (1,) + shape[1:]) * type.itemsize
0862 padding = -actual_size % 4
0863 if padding:
0864 dtypes['names'].append('_padding_%d' % var)
0865 dtypes['formats'].append('(%d,)>b' % padding)
0866
0867 # Data will be set later.
0868 if self.delay:
0869 self._begins[name] = begin_
0870 data = unmapped_array((self._recs,)+shape[1:], type)
0871 else:
0872 data = None
0873 else: # not a record variable
0874 # Calculate size to avoid problems with vsize (above)
0875 a_size = reduce(mul, shape, 1) * type.itemsize
0876 pos = self.fp.tell()
0877 if self.use_mmap:
0878 mm = mmap(self.fp.fileno(), begin_+a_size, access=ACCESS_READ)
0879 data = ndarray.__new__(ndarray, shape, dtype=type,
0880 buffer=mm, offset=begin_, order=0)
0881 elif self.delay:
0882 self._begins[name] = begin_
0883 data = unmapped_array(shape, type)
0884 else:
0885 self.fp.seek(begin_)
7621b5d564 Oliv*0886 data = frombuffer(self.fp.read(a_size), type)
ae10cb9ac4 Oliv*0887 data.shape = shape
0888 self.fp.seek(pos)
0889
0890 # Add variable.
0891 self.variables[name] = netcdf_variable(data, type, shape, dimensions, attributes)
0892
0893 if rec_vars and not self.delay:
0894 dtypes['formats'] = [f.replace('()', '').replace(' ', '') for f in dtypes['formats']]
0895 # Remove padding when only one record variable.
0896 if len(rec_vars) == 1:
0897 dtypes['names'] = dtypes['names'][:1]
0898 dtypes['formats'] = dtypes['formats'][:1]
0899
0900 # Build rec array.
0901 pos = self.fp.tell()
3c7f3cce62 Oliv*0902 rec_arrays = []
ae10cb9ac4 Oliv*0903 if self.use_mmap:
0904 mm = mmap(self.fp.fileno(), begin+self._recs*self._recsize, access=ACCESS_READ)
3c7f3cce62 Oliv*0905 if self._recsize >= 1<<31:
0906 # need to work around limitation of numpy.dtype.itemsize to 32 bit
0907 i = 0
0908 while i < len(rec_vsizes):
0909 ends = np.cumsum(rec_vsizes[i:])
0910 n = np.searchsorted(ends, 1<<31)
0911 dtype1 = dict(names=dtypes['names'][i:i+n], formats=dtypes['formats'][i:i+n])
0912 rec_array = ndarray.__new__(ndarray, (self._recs,), dtype=dtype1,
0913 buffer=mm, offset=begin, order=0)
0914 rec_arrays.append(rec_array)
0915 begin += ends[n-1]
0916 i += n
0917 else:
0918 rec_array = ndarray.__new__(ndarray, (self._recs,), dtype=dtypes,
0919 buffer=mm, offset=begin, order=0)
0920 rec_arrays = [ rec_array ]
ae10cb9ac4 Oliv*0921 else:
0922 self.fp.seek(begin)
7621b5d564 Oliv*0923 rec_array = frombuffer(self.fp.read(self._recs*self._recsize), dtype=dtypes)
ae10cb9ac4 Oliv*0924 rec_array.shape = (self._recs,)
3c7f3cce62 Oliv*0925 rec_arrays = [ rec_array ]
ae10cb9ac4 Oliv*0926 self.fp.seek(pos)
0927
3c7f3cce62 Oliv*0928 for rec_array in rec_arrays:
0929 for var in rec_array.dtype.names:
0930 self.variables[var].__dict__['data'] = rec_array[var]
ae10cb9ac4 Oliv*0931
0932 def read_var(self, name):
0933 var = self.variables[name]
0934 pos = self._begins[name]
0935 self.fp.seek(pos)
7621b5d564 Oliv*0936 data = frombuffer(self.fp.read(var.data.nbytes), dtype=var.data.dtype)
ae10cb9ac4 Oliv*0937 data.shape = var.data.shape
0938 return data
0939
0940 def read_recvar(self, name, rec):
0941 var = self.variables[name]
0942 pos = self._begins[name] + rec*self._recsize
0943 self.fp.seek(pos)
0944 count = reduce(mul, var.data.shape[1:], 1) * var.data.itemsize
7621b5d564 Oliv*0945 data = frombuffer(self.fp.read(count), dtype=var.data.dtype)
ae10cb9ac4 Oliv*0946 data.shape = var.data.shape[1:]
0947 return data
0948
0949 def _read_var(self):
0950 name = asstr(self._unpack_string())
0951 dimensions = []
0952 shape = []
0953 dims = self._unpack_int()
0954
0955 for i in range(dims):
0956 dimid = self._unpack_int()
0957 dimname = self._dims[dimid]
0958 dimensions.append(dimname)
0959 dim = self.dimensions[dimname]
0960 shape.append(dim)
0961 dimensions = tuple(dimensions)
0962 shape = tuple(shape)
0963
0964 attributes = self._read_att_array()
0965 nc_type = self.fp.read(4)
0966 vsize = self._unpack_int()
0967 begin = [self._unpack_int, self._unpack_int64][self.version_byte-1]()
0968 type = TYPEMAP[nc_type]
0969
0970 return name, dimensions, shape, attributes, type, begin, vsize
0971
0972 def _read_values(self):
0973 nc_type = self.fp.read(4)
0974 n = self._unpack_int()
0975
0976 type = TYPEMAP[nc_type]
0977
0978 count = n*type.itemsize
0979 values = self.fp.read(int(count))
0980 self.fp.read(-count % 4) # read padding
0981
9a1526c006 Oliv*0982 if type.char != 'c':
7621b5d564 Oliv*0983 values = frombuffer(values, type)
ae10cb9ac4 Oliv*0984 if values.shape == (1,): values = values[0]
0985 else:
0986 ## text values are encoded via UTF-8, per NetCDF standard
e1e3780d86 Oliv*0987 values = values.rstrip(b'\x00').decode('utf-8', 'replace')
ae10cb9ac4 Oliv*0988 return values
0989
0990 def _pack_begin(self, begin):
0991 if self.version_byte == 1:
0992 self._pack_int(begin)
0993 elif self.version_byte == 2:
0994 self._pack_int64(begin)
0995
0996 def _pack_int(self, value):
9a1526c006 Oliv*0997 self.fp.write(array(value, '>i').tobytes())
ae10cb9ac4 Oliv*0998 _pack_int32 = _pack_int
0999
1000 def _unpack_int(self):
7621b5d564 Oliv*1001 return int(frombuffer(self.fp.read(4), '>i')[0])
ae10cb9ac4 Oliv*1002 _unpack_int32 = _unpack_int
1003
1004 def _pack_int64(self, value):
9a1526c006 Oliv*1005 self.fp.write(array(value, '>q').tobytes())
ae10cb9ac4 Oliv*1006
1007 def _unpack_int64(self):
7621b5d564 Oliv*1008 return frombuffer(self.fp.read(8), '>q')[0]
ae10cb9ac4 Oliv*1009
1010 def _pack_string(self, s):
1011 count = len(s)
1012 self._pack_int(count)
1013 self.fp.write(asbytes(s))
e1e3780d86 Oliv*1014 self.fp.write(b'\x00' * (-count % 4)) # pad
ae10cb9ac4 Oliv*1015
1016 def _unpack_string(self):
1017 count = self._unpack_int()
e1e3780d86 Oliv*1018 s = self.fp.read(count).rstrip(b'\x00')
ae10cb9ac4 Oliv*1019 self.fp.read(-count % 4) # read padding
1020 return s
1021
1022
1023 class netcdf_variable(object):
1024 """
1025 A data object for the `netcdf` module.
1026
1027 `netcdf_variable` objects are constructed by calling the method
1028 `netcdf_file.createVariable` on the `netcdf_file` object. `netcdf_variable`
1029 objects behave much like array objects defined in numpy, except that their
1030 data resides in a file. Data is read by indexing and written by assigning
1031 to an indexed subset; the entire array can be accessed by the index ``[:]``
1032 or (for scalars) by using the methods `getValue` and `assignValue`.
1033 `netcdf_variable` objects also have attribute `shape` with the same meaning
1034 as for arrays, but the shape cannot be modified. There is another read-only
1035 attribute `dimensions`, whose value is the tuple of dimension names.
1036
1037 All other attributes correspond to variable attributes defined in
1038 the NetCDF file. Variable attributes are created by assigning to an
1039 attribute of the `netcdf_variable` object.
1040
1041 Parameters
1042 ----------
1043 data : array_like
1044 The data array that holds the values for the variable.
1045 Typically, this is initialized as empty, but with the proper shape.
1046 type: numpy dtype
1047 Desired data-type for the data array.
1048 shape : sequence of ints
1049 The shape of the array. This should match the lengths of the
1050 variable's dimensions.
1051 dimensions : sequence of strings
1052 The names of the dimensions used by the variable. Must be in the
1053 same order of the dimension lengths given by `shape`.
1054 attributes : dict, optional
1055 Attribute values (any type) keyed by string names. These attributes
1056 become attributes for the netcdf_variable object.
1057
1058
1059 Attributes
1060 ----------
1061 dimensions : list of str
1062 List of names of dimensions used by the variable object.
1063 isrec, shape
1064 Properties
1065
1066 See also
1067 --------
1068 isrec, shape
1069
1070 """
1071 def __init__(self, data, type, shape, dimensions, attributes=None):
1072 self.data = data
1073 self.dtype = type
1074 self._shape = shape
1075 self.dimensions = dimensions
1076
1077 self._attributes = attributes or OrderedDict()
1078 for k, v in self._attributes.items():
1079 self.__dict__[k] = v
1080
1081 def __setattr__(self, attr, value):
1082 # Store user defined attributes in a separate dict,
1083 # so we can save them to file later.
1084 try:
1085 self._attributes[attr] = value
1086 except AttributeError:
1087 pass
1088 self.__dict__[attr] = value
1089
1090 @property
1091 def attributes(self):
1092 return self._attributes
1093
1094 def isrec(self):
1095 """Returns whether the variable has a record dimension or not.
1096
1097 A record dimension is a dimension along which additional data could be
1098 easily appended in the netcdf data structure without much rewriting of
1099 the data file. This attribute is a read-only property of the
1100 `netcdf_variable`.
1101
1102 """
1103 return self.data.shape and not self._shape[0]
1104 isrec = property(isrec)
1105
1106 def shape(self):
1107 """Returns the shape tuple of the data variable.
1108
1109 This is a read-only attribute and can not be modified in the
1110 same manner of other numpy arrays.
1111 """
1112 return self.data.shape
1113 shape = property(shape)
1114
1115 def getValue(self):
1116 """
1117 Retrieve a scalar value from a `netcdf_variable` of length one.
1118
1119 Raises
1120 ------
1121 ValueError
1122 If the netcdf variable is an array of length greater than one,
1123 this exception will be raised.
1124
1125 """
1126 return self.data.item()
1127
1128 def assignValue(self, value):
1129 """
1130 Assign a scalar value to a `netcdf_variable` of length one.
1131
1132 Parameters
1133 ----------
1134 value : scalar
1135 Scalar value (of compatible type) to assign to a length-one netcdf
1136 variable. This value will be written to file.
1137
1138 Raises
1139 ------
1140 ValueError
1141 If the input is not a scalar, or if the destination is not a length-one
1142 netcdf variable.
1143
1144 """
1145 if not self.data.flags.writeable:
1146 # Work-around for a bug in NumPy. Calling itemset() on a read-only
1147 # memory-mapped array causes a seg. fault.
1148 # See NumPy ticket #1622, and SciPy ticket #1202.
1149 # This check for `writeable` can be removed when the oldest version
1150 # of numpy still supported by scipy contains the fix for #1622.
1151 raise RuntimeError("variable is not writeable")
1152
1153 self.data.itemset(value)
1154
1155 def typecode(self):
1156 """
1157 Return the typecode of the variable.
1158
1159 Returns
1160 -------
1161 typecode : char
1162 The character typecode of the variable (eg, 'i' for int).
1163
1164 """
1165 return self.dtype.char
1166
1167 def itemsize(self):
1168 """
1169 Return the itemsize of the variable.
1170
1171 Returns
1172 -------
1173 itemsize : int
1174 The element size of the variable (eg, 8 for float64).
1175
1176 """
1177 return self.dtype.itemsize
1178
1179 def __getitem__(self, index):
1180 return self.data[index]
1181
1182 def __setitem__(self, index, data):
1183 # Expand data for record vars?
1184 if self.isrec:
1185 if isinstance(index, tuple):
1186 rec_index = index[0]
1187 else:
1188 rec_index = index
1189 if isinstance(rec_index, slice):
1190 recs = (rec_index.start or 0) + len(data)
1191 else:
1192 recs = rec_index + 1
1193 if recs > len(self.data):
1194 shape = (recs,) + self._shape[1:]
1195 self.data.resize(shape)
1196 self.data[index] = data
1197
1198
1199 NetCDFFile = netcdf_file
1200 NetCDFVariable = netcdf_variable
1201
1202 # End of NetCDF reader/writer module
1203
1204
1205 if __name__ == '__main__':
1206 import sys
1207 import os
692071215d Oliv*1208 import errno
ae10cb9ac4 Oliv*1209 import re
1210 import glob
1211 import fnmatch
1212 from getopt import gnu_getopt as getopt
a23974bf3e Oliv*1213 from getopt import GetoptError
ae10cb9ac4 Oliv*1214 try:
1215 from collections import OrderedDict
1216 except ImportError:
1217 OrderedDict = dict
1218 import numpy as np
1219
692071215d Oliv*1220 tilepatt = re.compile(r'\.t([0-9]{3}[0-9]*)\.nc$')
ae10cb9ac4 Oliv*1221 iterpatt = re.compile(r'(\.[0-9]{10})$')
1222
1223 # parse command-line arguments
1224 try:
7621b5d564 Oliv*1225 optlist,fnames = getopt(sys.argv[1:], '2qho:v:', ['many', 'verbose', 'help'])
a23974bf3e Oliv*1226 except GetoptError as e:
1227 sys.exit('Error: ' + str(e) + '\n\n' + __doc__)
ae10cb9ac4 Oliv*1228
7621b5d564 Oliv*1229 opts = dict(optlist)
1230
1231 if '--help' in opts or '-h' in opts:
1232 print(__doc__)
1233 sys.exit()
1234
a23974bf3e Oliv*1235 if not fnames:
1236 sys.exit('You need to specify at least one input file.')
1237
ae10cb9ac4 Oliv*1238 outname = opts.get('-o')
3c7f3cce62 Oliv*1239 version = '-2' in opts and 2 or 1
ae10cb9ac4 Oliv*1240 progress = '-q' not in opts
1241 verbose = '--verbose' in opts
692071215d Oliv*1242 manytiles = '--many' in opts
ae10cb9ac4 Oliv*1243 tname = 'T'
1244
a23974bf3e Oliv*1245 if outname is None:
1246 sys.exit('You need to specify an output file using the -o option.')
1247
ae10cb9ac4 Oliv*1248 # turn into list of compiled regular expressions
1249 varpatt = opts.get('-v', '').split(',')
1250 varpatt = [ re.compile(fnmatch.translate(patt.strip())) for patt in varpatt ]
1251
1252 readopts = dict(delay=True)
3c7f3cce62 Oliv*1253 writeopts = dict(delay=True, version=version)
ae10cb9ac4 Oliv*1254
1255 if len(fnames) == 1 and sum(s in fnames[0] for s in '*?[]'):
1256 globpatt = fnames[0]
1257 fnames = glob.glob(globpatt)
1258 if len(fnames) == 0:
03f05d3ffd Oliv*1259 raise IOError('No files matching {0}', globpatt)
ae10cb9ac4 Oliv*1260
1261 fnames.sort()
1262
1263 # Get list of iterations
1264 itfiles = {}
692071215d Oliv*1265 # files without iteration number
1dcbcd7847 Oliv*1266 files0 = {}
ae10cb9ac4 Oliv*1267 for fname in fnames:
1dcbcd7847 Oliv*1268 tn = tilepatt.search(fname).group(1)
ae10cb9ac4 Oliv*1269 base = tilepatt.sub('', fname)
1270 m = iterpatt.search(base)
1271 if m:
1dcbcd7847 Oliv*1272 it = m.group(1)
1273 itfiles.setdefault(it, {})[tn] = fname
692071215d Oliv*1274 else:
1dcbcd7847 Oliv*1275 files0[tn] = fname
1276
1277 files0 = [v for k,v in sorted(files0.items())]
ae10cb9ac4 Oliv*1278
8b8576185f Mart*1279 its = sorted(itfiles.keys())
ae10cb9ac4 Oliv*1280 for it in its:
1dcbcd7847 Oliv*1281 itfiles[it] = [v for k,v in sorted(itfiles[it].items())]
ae10cb9ac4 Oliv*1282
1283 filess = [ itfiles[it] for it in its ]
692071215d Oliv*1284 if len(files0) and len(filess):
1285 sys.exit('Both files with and without iteration number present. Aborting.')
1286 elif len(filess):
ae10cb9ac4 Oliv*1287 files0 = filess[0]
692071215d Oliv*1288 else:
1289 filess = [files0]
ae10cb9ac4 Oliv*1290
1291 if verbose:
4d634188c7 Mart*1292 print('Files to be read:')
ae10cb9ac4 Oliv*1293 for files in filess:
4d634188c7 Mart*1294 print(files[0], '... (%d)' % (len(files)))
ae10cb9ac4 Oliv*1295
1296 nc = netcdf_file(files0[0], 'r', **readopts)
1297 gatt = OrderedDict(nc.attributes)
1298 #gatt['tile_number'] = 1
1299 #gatt['bi'] = 1
1300 #gatt['bj'] = 1
1301 del gatt['tile_number']
1302 del gatt['bi']
1303 del gatt['bj']
111bdbd545 Oliv*1304 for k in list(gatt):
ae10cb9ac4 Oliv*1305 if k.startswith('exch2_'):
1306 del gatt[k]
1307
1308 sNx = gatt['sNx']
1309 sNy = gatt['sNy']
1310 Nx = gatt['Nx']
1311 Ny = gatt['Ny']
1312 ntx = gatt['nSx']*gatt['nPx']
1313 nty = gatt['nSy']*gatt['nPy']
1314 ntiles = ntx*nty
1315
1316 for it in its:
1317 if len(itfiles[it]) != ntiles:
1318 raise ValueError('Error: found %d tiles for iteration %s, need %d' % (
1319 len(itfiles[it]), it[1:], ntiles))
1320
1321 Xslice = []
1322 Yslice = []
1323 for tn in range(ntx*nty):
1324 bj,bi = divmod(tn, ntx)
1325 ie = sNx*(bi+1-ntx) or None
1326 je = sNy*(bj+1-nty) or None
1327 Xslice.append(slice(sNx*bi, ie))
1328 Yslice.append(slice(sNy*bj, je))
1329
1330 dims = OrderedDict()
1331 for k,n in nc.dimensions.items():
1332 if k[0] == 'X':
1333 n += Nx - sNx
1334 elif k[0] == 'Y':
1335 n += Ny - sNy
1336 dims[k] = n
1337
4d634188c7 Mart*1338 print('Tiled dimensions:',
1339 ' '.join([k for k in nc.dimensions if k[0] in 'XY']))
ae10cb9ac4 Oliv*1340
1341 havetime = tname in dims
1342 if havetime:
1343 assert dims[tname] is None
1344 nrec = nc.numrecs
4d634188c7 Mart*1345 print('Record dimension:', tname)
ae10cb9ac4 Oliv*1346 else:
1347 assert len(its) <= 1
1348 nrec = None
1349
1350 if verbose:
4d634188c7 Mart*1351 print('Variables:')
ae10cb9ac4 Oliv*1352
1353 varprops = OrderedDict()
1354 for name,var in nc.variables.items():
1355 if name in dims or sum(patt.search(name) is not None for patt in varpatt) or len(varpatt) == 0:
1356 varprops[name] = {}
1357 varprops[name]['dtype'] = var.data.dtype
1358 varprops[name]['dimensions'] = var.dimensions
1359 varprops[name]['ncattrs'] = OrderedDict(var.attributes)
1360 iX = None
1361 iY = None
1362 for i,dim in enumerate(var.dimensions):
1363 if dim[0] == 'X':
1364 iX = i
1365 elif dim[0] == 'Y':
1366 iY = i
1367 varprops[name]['iX'] = iX
1368 varprops[name]['iY'] = iY
1369 dimstrs = len(var.dimensions)*[':']
1370 if tname in var.dimensions: dimstrs[list(var.dimensions).index(tname)] = tname
1371 if iX is not None: dimstrs[iX] = var.dimensions[iX]
1372 if iY is not None: dimstrs[iY] = var.dimensions[iY]
4d634188c7 Mart*1373 if verbose:
1374 print( '%s %s(%s)' % (var.typecode(), name, ','.join(dimstrs)))
ae10cb9ac4 Oliv*1375
1376 ######################################################################
1377 # create global netcdf file
1378 ncout = netcdf_file(outname, 'w', **writeopts)
1379
1380 try:
1381 # global attributes
1382 for name,att in gatt.items():
1383 setattr(ncout, name, att)
1384
1385 # create dimensions
1386 for name,n in dims.items():
1387 ncout.createDimension(name, n)
1388
1389 # create variables with attributes
1390 vars = {}
1391 for name,var in varprops.items():
1392 dtype_ = np.dtype(var['dtype']).newbyteorder('>')
4d634188c7 Mart*1393 # if verbose: print('Creating variable', name, dtype_, var['dimensions'])
ae10cb9ac4 Oliv*1394 vars[name] = ncout.createVariable(name, dtype_, var['dimensions'])
1395 for attname,att in var['ncattrs'].items():
1396 setattr(vars[name], attname, att)
1397
1398 ncout.write_metadata()
1399
3c7f3cce62 Oliv*1400 if version == 1:
1401 for sz in ncout._begins.values():
1402 if sz >= 1<<31:
1403 sys.exit('ERROR: Variables too big for NetCDF version 1, try "-2" option.')
1404
692071215d Oliv*1405 if manytiles:
1406 # open only files along x-slice of tiles simultaneously
1407 # will have to jump around output file a bit...
1408
1409 # easier this way; have to reopen many files anyway
1410 nc.close()
1411
1412 nyslice = sNy
1413
1414 # sort tiles into x-slices
1415 iterslices = []
ae10cb9ac4 Oliv*1416 for fnames in filess:
692071215d Oliv*1417 myslicefiles = [[] for _ in range(nty)]
1418 for fname in fnames:
1419 m = tilepatt.search(fname)
1420 if m is None:
1421 sys.exit('Could not extract tile number from file name: {0}'.format(fname))
1422 tn = int(m.group(1)) - 1
1423 bj,bi = divmod(tn, ntx)
1424 myslicefiles[bj].append(fname)
1425 iterslices.append(myslicefiles)
1426
1427 irec = 0
1428 for tileslices in iterslices:
1429 for bj in range(nty):
1430 if len(tileslices[bj]) != ntx:
1431 raise ValueError('found %d tiles for bj = %d, need %d' % (
1432 len(tileslices[bj]), bj+1, ntx))
1433 # open files
1dcbcd7847 Oliv*1434 ncs = []
692071215d Oliv*1435 for fname in tileslices[bj]:
1436 try:
1437 nc = netcdf_file(fname, 'r', **readopts)
1438 except IOError as e:
1439 if e.errno == errno.EMFILE:
1440 sys.exit('ERROR: Too many open files. Ask developer to improve script ;)')
1441 raise
1dcbcd7847 Oliv*1442 ncs.append(nc)
692071215d Oliv*1443
1444 if irec == 0:
1445 # assemble non-record variable data
1446 if bj == 0 and progress and not verbose:
1447 sys.stderr.write('Writing non-record variables\n')
1448 indstrings = {}
1449 for name,pos,isrec in ncout.begins:
1450 if not isrec:
4d634188c7 Mart*1451 if verbose: print(name)
692071215d Oliv*1452 prop = varprops[name]
1453 vardims = prop['dimensions']
1454 var = vars[name]
1455 indx = len(vardims)*[slice(None)]
1456 iX = prop['iX']
1457 iY = prop['iY']
1458 shape = list(var.shape)
1459 if iY is not None:
1460 # this is needed for V and vorticity point fields that have Ny+1
1461 shape[iY] = shape[iY] - Ny + nyslice
1462 j0 = bj*nyslice
1463 else:
1464 j0 = None
1465 data = np.empty(shape, var.data.dtype.newbyteorder('>'))
1dcbcd7847 Oliv*1466 for nc in ncs:
692071215d Oliv*1467 tn = nc.tile_number - 1
1468 if iX is not None: indx[iX] = Xslice[tn]
1469 data[tuple(indx)] = nc.read_var(name)
1470
1471 ncout.write_var(name, data, j0, iY)
1472 del data
1473 else: # isrec
1474 if not havetime:
1475 raise ValueError('record variables, but no time')
1476
1477 # assemble record variable data
1478 if havetime:
1479 # any of the ncs
1480 nrec = nc.numrecs
1481 if bj == 0 and progress and not verbose:
1482 sys.stderr.write('Writing {0} records: '.format(nrec))
1483 for irecin in range(nrec):
1484 if bj == 0 and progress and not verbose:
1485 sys.stderr.write('.')
1486 for name,pos,isrec in ncout.begins:
1487 if isrec:
4d634188c7 Mart*1488 if verbose: print(irec+irecin, name)
692071215d Oliv*1489 prop = varprops[name]
1490 vardims = prop['dimensions'][1:]
1491 var = vars[name]
1492 indx = len(vardims)*[slice(None)]
1493 iX = prop['iX']
1494 iY = prop['iY']
1495 shape = list(var.data.shape)
1496 if iY is not None:
1497 shape[iY] = shape[iY] - Ny + nyslice
1498 j0 = bj*nyslice
1499 else:
1500 j0 = None
1501 data = np.empty(shape[1:], var.data.dtype.newbyteorder('>'))
1dcbcd7847 Oliv*1502 for nc in ncs:
692071215d Oliv*1503 tn = nc.tile_number - 1
1504 if iX is not None: indx[iX-1] = Xslice[tn]
1505 data[tuple(indx)] = nc.read_recvar(name, irecin)
1506
1507 ncout.write_recvar(name, irec+irecin, data, j0, iY)
1508 del data
1509
1510 if bj == 0 and progress and not verbose:
1511 sys.stderr.write('\n')
1512
1dcbcd7847 Oliv*1513 for nc in ncs:
692071215d Oliv*1514 nc.close()
1515
1516 if progress and not verbose:
1517 if bj == 0 and nty > 1:
1518 sys.stderr.write('Writing {0} more slices: '.format(nty-1))
1519 else:
1520 sys.stderr.write('.')
1521
1522 if nty > 1 and progress and not verbose:
1523 sys.stderr.write('\n')
1524
1525 if havetime:
1526 irec += nrec
1527
1528 else:
1dcbcd7847 Oliv*1529 ncs = [nc]
1530 for fname in files0[1:]:
1531 try:
1532 nc = netcdf_file(fname, 'r', **readopts)
1533 except IOError as e:
1534 if e.errno == errno.EMFILE:
1535 sys.exit('ERROR: Too many open files. Try again with --many or increase the limit on open files.')
1536 raise
1537 ncs.append(nc)
692071215d Oliv*1538
1539 # assemble non-record variable data
1540 if progress and not verbose: sys.stderr.write('Writing non-record variables\n')
1541 indstrings = {}
1542 for name,pos,isrec in ncout.begins:
1543 if not isrec:
4d634188c7 Mart*1544 if verbose: print(name)
692071215d Oliv*1545 prop = varprops[name]
1546 vardims = prop['dimensions']
1547 var = vars[name]
1548 indx = len(vardims)*[slice(None)]
1549 iX = prop['iX']
1550 iY = prop['iY']
1551 data = np.empty(var.shape, var.data.dtype.newbyteorder('>'))
1dcbcd7847 Oliv*1552 for nc in ncs:
692071215d Oliv*1553 tn = nc.tile_number - 1
1554 if iX is not None: indx[iX] = Xslice[tn]
1555 if iY is not None: indx[iY] = Yslice[tn]
1556 data[tuple(indx)] = nc.read_var(name)
1557
1558 ncout.write_var(name, data)
1559 del data
1560 else: # isrec
1561 if not havetime:
1562 raise ValueError('record variables, but no time')
1563
1564 # assemble record variable data
1565 if havetime:
1566 irec = 0
1567 for fnames in filess:
1568 if irec:
1dcbcd7847 Oliv*1569 ncs = []
692071215d Oliv*1570 for fname in fnames:
1dcbcd7847 Oliv*1571 nc = netcdf_file(fname, 'r', **readopts)
1572 ncs.append(nc)
692071215d Oliv*1573
1574 nrec = nc.numrecs
1575 if progress and not verbose: sys.stderr.write('Writing {0} records: '.format(nrec))
1576 for irecin in range(nrec):
1577 if progress and not verbose: sys.stderr.write('.')
1578 for name,pos,isrec in ncout.begins:
1579 if isrec:
4d634188c7 Mart*1580 if verbose: print(irec+irecin, name)
692071215d Oliv*1581 prop = varprops[name]
1582 vardims = prop['dimensions'][1:]
1583 var = vars[name]
1584 indx = len(vardims)*[slice(None)]
1585 iX = prop['iX']
1586 iY = prop['iY']
1587 data = np.empty(var.data.shape[1:], var.data.dtype.newbyteorder('>'))
1dcbcd7847 Oliv*1588 for nc in ncs:
692071215d Oliv*1589 tn = nc.tile_number - 1
1590 if iX is not None: indx[iX-1] = Xslice[tn]
1591 if iY is not None: indx[iY-1] = Yslice[tn]
1592 data[tuple(indx)] = nc.read_recvar(name, irecin)
1593
1594 ncout.write_recvar(name, irec+irecin, data)
1595 del data
1596
1597 irec += nrec
1598
1dcbcd7847 Oliv*1599 for nc in ncs:
692071215d Oliv*1600 nc.close()
1601
1602 if progress and not verbose: sys.stderr.write('\n')
ae10cb9ac4 Oliv*1603
1604 finally:
1605 ncout.close()
1606