Back to home page

MITgcm

 
 

    


File indexing completed on 2024-06-19 05:11:17 UTC

view on githubraw file Latest commit 95edcfd9 on 2024-06-18 17:05:14 UTC
70dbdfecef Oliv*0001 """
                0002 NetCDF reader/writer module.
                0003 
                0004 This module is used to read and create NetCDF files. NetCDF files are
                0005 accessed through the `netcdf_file` object. Data written to and from NetCDF
                0006 files are contained in `netcdf_variable` objects. Attributes are given
                0007 as member variables of the `netcdf_file` and `netcdf_variable` objects.
                0008 
                0009 This module implements the Scientific.IO.NetCDF API to read and create
                0010 NetCDF files. The same API is also used in the PyNIO and pynetcdf
                0011 modules, allowing these modules to be used interchangeably when working
                0012 with NetCDF files.
                0013 """
                0014 
                0015 from __future__ import division, print_function, absolute_import
                0016 
                0017 # TODO:
                0018 # * properly implement ``_FillValue``.
                0019 # * implement Jeff Whitaker's patch for masked variables.
                0020 # * fix character variables.
                0021 # * implement PAGESIZE for Python 2.6?
                0022 
                0023 # The Scientific.IO.NetCDF API allows attributes to be added directly to
                0024 # instances of ``netcdf_file`` and ``netcdf_variable``. To differentiate
                0025 # between user-set attributes and instance attributes, user-set attributes
                0026 # are automatically stored in the ``_attributes`` attribute by overloading
                0027 #``__setattr__``. This is the reason why the code sometimes uses
                0028 #``obj.__dict__['key'] = value``, instead of simply ``obj.key = value``;
                0029 # otherwise the key would be inserted into userspace attributes.
                0030 
                0031 
                0032 __all__ = ['netcdf_file']
                0033 
                0034 
                0035 from operator import mul
                0036 from mmap import mmap, ACCESS_READ
                0037 
                0038 import numpy as np
7621b5d564 Oliv*0039 from numpy import frombuffer, ndarray, dtype, empty, array, asarray
70dbdfecef Oliv*0040 from numpy import little_endian as LITTLE_ENDIAN
                0041 from functools import reduce
                0042 
                0043 import sys
                0044 
                0045 PY3 = sys.version_info[0] == 3
                0046 if PY3:
                0047     integer_types = int,
                0048 else:
                0049     integer_types = (int, long)
                0050 
                0051 ABSENT = b'\x00\x00\x00\x00\x00\x00\x00\x00'
                0052 ZERO = b'\x00\x00\x00\x00'
                0053 NC_BYTE = b'\x00\x00\x00\x01'
                0054 NC_CHAR = b'\x00\x00\x00\x02'
                0055 NC_SHORT = b'\x00\x00\x00\x03'
                0056 NC_INT = b'\x00\x00\x00\x04'
                0057 NC_FLOAT = b'\x00\x00\x00\x05'
                0058 NC_DOUBLE = b'\x00\x00\x00\x06'
                0059 NC_DIMENSION = b'\x00\x00\x00\n'
                0060 NC_VARIABLE = b'\x00\x00\x00\x0b'
                0061 NC_ATTRIBUTE = b'\x00\x00\x00\x0c'
                0062 
                0063 
                0064 TYPEMAP = {NC_BYTE: ('b', 1),
                0065             NC_CHAR: ('c', 1),
                0066             NC_SHORT: ('h', 2),
                0067             NC_INT: ('i', 4),
                0068             NC_FLOAT: ('f', 4),
                0069             NC_DOUBLE: ('d', 8)}
                0070 
                0071 REVERSE = {('b', 1): NC_BYTE,
                0072             ('B', 1): NC_CHAR,
                0073             ('c', 1): NC_CHAR,
                0074             ('h', 2): NC_SHORT,
                0075             ('i', 4): NC_INT,
                0076             ('f', 4): NC_FLOAT,
                0077             ('d', 8): NC_DOUBLE,
                0078 
                0079             # these come from asarray(1).dtype.char and asarray('foo').dtype.char,
                0080             # used when getting the types from generic attributes.
                0081             ('l', 4): NC_INT,
                0082             ('S', 1): NC_CHAR}
                0083 
                0084 
                0085 class netcdf_file(object):
                0086     """
                0087     A file object for NetCDF data.
                0088 
                0089     A `netcdf_file` object has two standard attributes: `dimensions` and
                0090     `variables`. The values of both are dictionaries, mapping dimension
                0091     names to their associated lengths and variable names to variables,
                0092     respectively. Application programs should never modify these
                0093     dictionaries.
                0094 
                0095     All other attributes correspond to global attributes defined in the
                0096     NetCDF file. Global file attributes are created by assigning to an
                0097     attribute of the `netcdf_file` object.
                0098 
                0099     Parameters
                0100     ----------
                0101     filename : string or file-like
                0102         string -> filename
                0103     mode : {'r', 'w'}, optional
                0104         read-write mode, default is 'r'
                0105     mmap : None or bool, optional
                0106         Whether to mmap `filename` when reading.  Default is True
                0107         when `filename` is a file name, False when `filename` is a
                0108         file-like object
                0109     version : {1, 2}, optional
                0110         version of netcdf to read / write, where 1 means *Classic
                0111         format* and 2 means *64-bit offset format*.  Default is 1.  See
                0112         `here <http://www.unidata.ucar.edu/software/netcdf/docs/netcdf/Which-Format.html>`_
                0113         for more info.
                0114 
                0115     Notes
                0116     -----
                0117     The major advantage of this module over other modules is that it doesn't
                0118     require the code to be linked to the NetCDF libraries. This module is
                0119     derived from `pupynere <https://bitbucket.org/robertodealmeida/pupynere/>`_.
                0120 
                0121     NetCDF files are a self-describing binary data format. The file contains
                0122     metadata that describes the dimensions and variables in the file. More
                0123     details about NetCDF files can be found `here
                0124     <http://www.unidata.ucar.edu/software/netcdf/docs/netcdf.html>`_. There
                0125     are three main sections to a NetCDF data structure:
                0126 
                0127     1. Dimensions
                0128     2. Variables
                0129     3. Attributes
                0130 
                0131     The dimensions section records the name and length of each dimension used
                0132     by the variables. The variables would then indicate which dimensions it
                0133     uses and any attributes such as data units, along with containing the data
                0134     values for the variable. It is good practice to include a
                0135     variable that is the same name as a dimension to provide the values for
                0136     that axes. Lastly, the attributes section would contain additional
                0137     information such as the name of the file creator or the instrument used to
                0138     collect the data.
                0139 
                0140     When writing data to a NetCDF file, there is often the need to indicate the
                0141     'record dimension'. A record dimension is the unbounded dimension for a
                0142     variable. For example, a temperature variable may have dimensions of
                0143     latitude, longitude and time. If one wants to add more temperature data to
                0144     the NetCDF file as time progresses, then the temperature variable should
                0145     have the time dimension flagged as the record dimension.
                0146 
                0147     In addition, the NetCDF file header contains the position of the data in
                0148     the file, so access can be done in an efficient manner without loading
                0149     unnecessary data into memory. It uses the ``mmap`` module to create
                0150     Numpy arrays mapped to the data on disk, for the same purpose.
                0151 
                0152     Examples
                0153     --------
                0154     To create a NetCDF file:
                0155 
                0156         >>> from scipy.io import netcdf
                0157         >>> f = netcdf.netcdf_file('simple.nc', 'w')
                0158         >>> f.history = 'Created for a test'
                0159         >>> f.createDimension('time', 10)
                0160         >>> time = f.createVariable('time', 'i', ('time',))
                0161         >>> time[:] = np.arange(10)
                0162         >>> time.units = 'days since 2008-01-01'
                0163         >>> f.close()
                0164 
                0165     Note the assignment of ``range(10)`` to ``time[:]``.  Exposing the slice
                0166     of the time variable allows for the data to be set in the object, rather
                0167     than letting ``range(10)`` overwrite the ``time`` variable.
                0168 
                0169     To read the NetCDF file we just created:
                0170 
                0171         >>> from scipy.io import netcdf
                0172         >>> f = netcdf.netcdf_file('simple.nc', 'r')
                0173         >>> print(f.history)
                0174         Created for a test
                0175         >>> time = f.variables['time']
                0176         >>> print(time.units)
                0177         days since 2008-01-01
                0178         >>> print(time.shape)
                0179         (10,)
                0180         >>> print(time[-1])
                0181         9
                0182         >>> f.close()
                0183 
                0184     A NetCDF file can also be used as context manager:
                0185 
                0186         >>> from scipy.io import netcdf
                0187         >>> with netcdf.netcdf_file('simple.nc', 'r') as f:
                0188         >>>     print(f.history)
                0189         Created for a test
                0190     """
                0191     def __init__(self, filename, mode='r', mmap=None, version=1):
                0192         """Initialize netcdf_file from fileobj (str or file-like)."""
                0193         if hasattr(filename, 'seek'):  # file-like
                0194             self.fp = filename
                0195             self.filename = 'None'
                0196             if mmap is None:
                0197                 mmap = False
                0198             elif mmap and not hasattr(filename, 'fileno'):
                0199                 raise ValueError('Cannot use file object for mmap')
                0200         else:  # maybe it's a string
                0201             self.filename = filename
                0202             self.fp = open(self.filename, '%sb' % mode)
                0203             if mmap is None:
                0204                 mmap = True
                0205         self.use_mmap = mmap
                0206         self._fds = []
                0207         self.version_byte = version
                0208 
                0209         if not mode in 'rw':
                0210             raise ValueError("Mode must be either 'r' or 'w'.")
                0211         self.mode = mode
                0212 
                0213         self.dimensions = {}
                0214         self.variables = {}
                0215 
                0216         self._dims = []
                0217         self._recs = 0
                0218         self._recsize = 0
                0219 
                0220         self._attributes = {}
                0221 
                0222         if mode == 'r':
                0223             self._read()
                0224 
                0225     def __setattr__(self, attr, value):
                0226         # Store user defined attributes in a separate dict,
                0227         # so we can save them to file later.
                0228         try:
                0229             self._attributes[attr] = value
                0230         except AttributeError:
                0231             pass
                0232         self.__dict__[attr] = value
                0233 
                0234     def close(self):
                0235         """Closes the NetCDF file."""
                0236         try:
                0237             # mmaps are only for reading (for now)
                0238             for mmap_fd in self._fds:
                0239                 mmap_fd.close()
                0240         finally:
                0241             if not self.fp.closed:
                0242                 try:
                0243                     self.flush()
                0244                 finally:
                0245                     self.fp.close()
                0246     __del__ = close
                0247 
                0248     def __enter__(self):
                0249         return self
                0250 
                0251     def __exit__(self, type, value, traceback):
                0252         self.close()
                0253 
                0254     def createDimension(self, name, length):
                0255         """
                0256         Adds a dimension to the Dimension section of the NetCDF data structure.
                0257 
                0258         Note that this function merely adds a new dimension that the variables can
                0259         reference.  The values for the dimension, if desired, should be added as
                0260         a variable using `createVariable`, referring to this dimension.
                0261 
                0262         Parameters
                0263         ----------
                0264         name : str
                0265             Name of the dimension (Eg, 'lat' or 'time').
                0266         length : int
                0267             Length of the dimension.
                0268 
                0269         See Also
                0270         --------
                0271         createVariable
                0272 
                0273         """
                0274         self.dimensions[name] = length
                0275         self._dims.append(name)
                0276 
                0277     def createVariable(self, name, type, dimensions):
                0278         """
                0279         Create an empty variable for the `netcdf_file` object, specifying its data
                0280         type and the dimensions it uses.
                0281 
                0282         Parameters
                0283         ----------
                0284         name : str
                0285             Name of the new variable.
                0286         type : dtype or str
                0287             Data type of the variable.
                0288         dimensions : sequence of str
                0289             List of the dimension names used by the variable, in the desired order.
                0290 
                0291         Returns
                0292         -------
                0293         variable : netcdf_variable
                0294             The newly created ``netcdf_variable`` object.
                0295             This object has also been added to the `netcdf_file` object as well.
                0296 
                0297         See Also
                0298         --------
                0299         createDimension
                0300 
                0301         Notes
                0302         -----
                0303         Any dimensions to be used by the variable should already exist in the
                0304         NetCDF data structure or should be created by `createDimension` prior to
                0305         creating the NetCDF variable.
                0306 
                0307         """
                0308         shape = tuple([self.dimensions[dim] for dim in dimensions])
                0309         shape_ = tuple([dim or 0 for dim in shape])  # replace None with 0 for numpy
                0310 
                0311         type = dtype(type)
                0312         typecode, size = type.char, type.itemsize
                0313         if (typecode, size) not in REVERSE:
                0314             raise ValueError("NetCDF 3 does not support type %s" % type)
                0315 
                0316         data = empty(shape_, dtype=type.newbyteorder("B"))  # convert to big endian always for NetCDF 3
                0317         self.variables[name] = netcdf_variable(data, typecode, size, shape, dimensions)
                0318         return self.variables[name]
                0319 
                0320     def flush(self):
                0321         """
                0322         Perform a sync-to-disk flush if the `netcdf_file` object is in write mode.
                0323 
                0324         See Also
                0325         --------
                0326         sync : Identical function
                0327 
                0328         """
9a1526c006 Oliv*0329         if hasattr(self, 'mode') and self.mode == 'w':
70dbdfecef Oliv*0330             self._write()
                0331     sync = flush
                0332 
                0333     def _write(self):
                0334         self.fp.seek(0)
                0335         self.fp.write(b'CDF')
                0336         self.fp.write(array(self.version_byte, '>b').tostring())
                0337 
                0338         # Write headers and data.
                0339         self._write_numrecs()
                0340         self._write_dim_array()
                0341         self._write_gatt_array()
                0342         self._write_var_array()
                0343 
                0344     def _write_numrecs(self):
                0345         # Get highest record count from all record variables.
                0346         for var in self.variables.values():
                0347             if var.isrec and len(var.data) > self._recs:
                0348                 self.__dict__['_recs'] = len(var.data)
                0349         self._pack_int(self._recs)
                0350 
                0351     def _write_dim_array(self):
                0352         if self.dimensions:
                0353             self.fp.write(NC_DIMENSION)
                0354             self._pack_int(len(self.dimensions))
                0355             for name in self._dims:
                0356                 self._pack_string(name)
                0357                 length = self.dimensions[name]
                0358                 self._pack_int(length or 0)  # replace None with 0 for record dimension
                0359         else:
                0360             self.fp.write(ABSENT)
                0361 
                0362     def _write_gatt_array(self):
                0363         self._write_att_array(self._attributes)
                0364 
                0365     def _write_att_array(self, attributes):
                0366         if attributes:
                0367             self.fp.write(NC_ATTRIBUTE)
                0368             self._pack_int(len(attributes))
                0369             for name, values in attributes.items():
                0370                 self._pack_string(name)
                0371                 self._write_values(values)
                0372         else:
                0373             self.fp.write(ABSENT)
                0374 
                0375     def _write_var_array(self):
                0376         if self.variables:
                0377             self.fp.write(NC_VARIABLE)
                0378             self._pack_int(len(self.variables))
                0379 
                0380             # Sort variables non-recs first, then recs. We use a DSU
                0381             # since some people use pupynere with Python 2.3.x.
27d195aed6 Oliv*0382             deco = [(bool(v._shape) and not v.isrec, k) for (k, v) in self.variables.items()]
70dbdfecef Oliv*0383             deco.sort()
                0384             variables = [k for (unused, k) in deco][::-1]
                0385 
                0386             # Set the metadata for all variables.
                0387             for name in variables:
                0388                 self._write_var_metadata(name)
                0389             # Now that we have the metadata, we know the vsize of
                0390             # each record variable, so we can calculate recsize.
                0391             self.__dict__['_recsize'] = sum([
                0392                     var._vsize for var in self.variables.values()
                0393                     if var.isrec])
                0394             # Set the data for all variables.
                0395             for name in variables:
                0396                 self._write_var_data(name)
                0397         else:
                0398             self.fp.write(ABSENT)
                0399 
                0400     def _write_var_metadata(self, name):
                0401         var = self.variables[name]
                0402 
                0403         self._pack_string(name)
                0404         self._pack_int(len(var.dimensions))
                0405         for dimname in var.dimensions:
                0406             dimid = self._dims.index(dimname)
                0407             self._pack_int(dimid)
                0408 
                0409         self._write_att_array(var._attributes)
                0410 
                0411         nc_type = REVERSE[var.typecode(), var.itemsize()]
95edcfd937 Oliv*0412         self.fp.write(nc_type)
70dbdfecef Oliv*0413 
                0414         if not var.isrec:
                0415             vsize = var.data.size * var.data.itemsize
                0416             vsize += -vsize % 4
                0417         else:  # record variable
                0418             try:
                0419                 vsize = var.data[0].size * var.data.itemsize
                0420             except IndexError:
                0421                 vsize = 0
                0422             rec_vars = len([v for v in self.variables.values()
                0423                             if v.isrec])
                0424             if rec_vars > 1:
                0425                 vsize += -vsize % 4
                0426         self.variables[name].__dict__['_vsize'] = vsize
                0427         self._pack_int(vsize)
                0428 
                0429         # Pack a bogus begin, and set the real value later.
                0430         self.variables[name].__dict__['_begin'] = self.fp.tell()
                0431         self._pack_begin(0)
                0432 
                0433     def _write_var_data(self, name):
                0434         var = self.variables[name]
                0435 
                0436         # Set begin in file header.
                0437         the_beguine = self.fp.tell()
                0438         self.fp.seek(var._begin)
                0439         self._pack_begin(the_beguine)
                0440         self.fp.seek(the_beguine)
                0441 
                0442         # Write data.
                0443         if not var.isrec:
                0444             self.fp.write(var.data.tostring())
                0445             count = var.data.size * var.data.itemsize
                0446             self.fp.write(b'0' * (var._vsize - count))
                0447         else:  # record variable
                0448             # Handle rec vars with shape[0] < nrecs.
                0449             if self._recs > len(var.data):
                0450                 shape = (self._recs,) + var.data.shape[1:]
                0451                 var.data.resize(shape)
                0452 
                0453             pos0 = pos = self.fp.tell()
                0454             for rec in var.data:
                0455                 # Apparently scalars cannot be converted to big endian. If we
                0456                 # try to convert a ``=i4`` scalar to, say, '>i4' the dtype
                0457                 # will remain as ``=i4``.
                0458                 if not rec.shape and (rec.dtype.byteorder == '<' or
                0459                         (rec.dtype.byteorder == '=' and LITTLE_ENDIAN)):
                0460                     rec = rec.byteswap()
                0461                 self.fp.write(rec.tostring())
                0462                 # Padding
                0463                 count = rec.size * rec.itemsize
                0464                 self.fp.write(b'0' * (var._vsize - count))
                0465                 pos += self._recsize
                0466                 self.fp.seek(pos)
                0467             self.fp.seek(pos0 + var._vsize)
                0468 
                0469     def _write_values(self, values):
                0470         if hasattr(values, 'dtype'):
                0471             nc_type = REVERSE[values.dtype.char, values.dtype.itemsize]
                0472         else:
                0473             types = [(t, NC_INT) for t in integer_types]
                0474             types += [
                0475                     (float, NC_FLOAT),
                0476                     (str, NC_CHAR),
                0477                     ]
                0478             try:
                0479                 sample = values[0]
                0480             except TypeError:
                0481                 sample = values
                0482             except IndexError:
                0483                 if isinstance(values, basestring):
                0484                     sample = values
                0485                 else:
                0486                     raise
                0487             for class_, nc_type in types:
                0488                 if isinstance(sample, class_):
                0489                     break
                0490 
                0491         typecode, size = TYPEMAP[nc_type]
                0492         dtype_ = '>%s' % typecode
                0493 
                0494         values = asarray(values, dtype=dtype_)
                0495 
95edcfd937 Oliv*0496         self.fp.write(nc_type)
70dbdfecef Oliv*0497 
                0498         if values.dtype.char == 'S':
                0499             nelems = values.itemsize
                0500         else:
                0501             nelems = values.size
                0502         self._pack_int(nelems)
                0503 
                0504         if not values.shape and (values.dtype.byteorder == '<' or
                0505                 (values.dtype.byteorder == '=' and LITTLE_ENDIAN)):
                0506             values = values.byteswap()
                0507         self.fp.write(values.tostring())
                0508         count = values.size * values.itemsize
                0509         self.fp.write(b'0' * (-count % 4))  # pad
                0510 
                0511     def _read(self):
                0512         # Check magic bytes and version
                0513         magic = self.fp.read(3)
                0514         if not magic == b'CDF':
                0515             raise TypeError("Error: %s is not a valid NetCDF 3 file" %
                0516                             self.filename)
7621b5d564 Oliv*0517         self.__dict__['version_byte'] = frombuffer(self.fp.read(1), '>b')[0]
70dbdfecef Oliv*0518 
                0519         # Read file headers and set data.
                0520         self._read_numrecs()
                0521         self._read_dim_array()
                0522         self._read_gatt_array()
                0523         self._read_var_array()
                0524 
                0525     def _read_numrecs(self):
                0526         self.__dict__['_recs'] = self._unpack_int()
                0527 
                0528     def _read_dim_array(self):
                0529         header = self.fp.read(4)
                0530         if not header in [ZERO, NC_DIMENSION]:
                0531             raise ValueError("Unexpected header.")
                0532         count = self._unpack_int()
                0533 
                0534         for dim in range(count):
95edcfd937 Oliv*0535             name = self._unpack_string()
70dbdfecef Oliv*0536             length = self._unpack_int() or None  # None for record dimension
                0537             self.dimensions[name] = length
                0538             self._dims.append(name)  # preserve order
                0539 
                0540     def _read_gatt_array(self):
                0541         for k, v in self._read_att_array().items():
                0542             self.__setattr__(k, v)
                0543 
                0544     def _read_att_array(self):
                0545         header = self.fp.read(4)
                0546         if not header in [ZERO, NC_ATTRIBUTE]:
                0547             raise ValueError("Unexpected header.")
                0548         count = self._unpack_int()
                0549 
                0550         attributes = {}
                0551         for attr in range(count):
95edcfd937 Oliv*0552             name = self._unpack_string()
70dbdfecef Oliv*0553             attributes[name] = self._read_values()
                0554         return attributes
                0555 
                0556     def _read_var_array(self):
                0557         header = self.fp.read(4)
                0558         if not header in [ZERO, NC_VARIABLE]:
                0559             raise ValueError("Unexpected header.")
                0560 
                0561         nrsize = 0
                0562         nrbegin = 0
                0563         recbegin = 0
                0564         nrdtype = {'names': [], 'formats': []}
                0565         recdtype = {'names': [], 'formats': []}
                0566         nr_vars = []
                0567         rec_vars = []
                0568         count = self._unpack_int()
                0569         for var in range(count):
                0570             (name, dimensions, shape, attributes,
                0571              typecode, size, dtype_, begin_, vsize) = self._read_var()
                0572             # http://www.unidata.ucar.edu/software/netcdf/docs/netcdf.html
                0573             # Note that vsize is the product of the dimension lengths
                0574             # (omitting the record dimension) and the number of bytes
                0575             # per value (determined from the type), increased to the
                0576             # next multiple of 4, for each variable. If a record
                0577             # variable, this is the amount of space per record. The
                0578             # netCDF "record size" is calculated as the sum of the
                0579             # vsize's of all the record variables.
                0580             #
                0581             # The vsize field is actually redundant, because its value
                0582             # may be computed from other information in the header. The
                0583             # 32-bit vsize field is not large enough to contain the size
                0584             # of variables that require more than 2^32 - 4 bytes, so
                0585             # 2^32 - 1 is used in the vsize field for such variables.
                0586             isrec = shape and shape[0] is None  # record variable
                0587             recshape = shape[isrec:]  # shape without record dimension
                0588 
                0589             # construct dtype
                0590             names = [name]
                0591             formats = [str(recshape) + dtype_]
                0592             # Handle padding with a virtual variable.
                0593             if typecode in 'bch':
                0594                 actual_size = reduce(mul, recshape, 1) * size
                0595                 padding = -actual_size % 4
                0596                 if padding:
                0597                     names.append('_padding_%d' % var)
                0598                     formats.append('(%d,)>b' % padding)
                0599 
                0600             if isrec:
                0601                 rec_vars.append(name)
                0602                 # The netCDF "record size" is calculated as the sum of
                0603                 # the vsize's of all the record variables.
                0604                 self.__dict__['_recsize'] += vsize
                0605                 if recbegin == 0:
                0606                     recbegin = begin_
                0607                 recdtype['names'].extend(names)
                0608                 recdtype['formats'].extend(formats)
                0609 
                0610                 # Data will be set later.
                0611                 data = None
                0612             else:  # not a record variable
                0613                 nr_vars.append(name)
                0614                 nrsize += vsize
                0615                 if nrbegin == 0:
                0616                     nrbegin = begin_
                0617                 nrdtype['names'].extend(names)
                0618                 nrdtype['formats'].extend(formats)
                0619 
                0620                 # Calculate size to avoid problems with vsize (above)
                0621                 a_size = reduce(mul, shape, 1) * size
                0622                 if self.use_mmap:
                0623                     data = None
                0624                 else:
                0625                     pos = self.fp.tell()
                0626                     self.fp.seek(begin_)
7621b5d564 Oliv*0627                     data = frombuffer(self.fp.read(a_size), dtype=dtype_)
70dbdfecef Oliv*0628                     data.shape = shape
                0629                     self.fp.seek(pos)
                0630 
                0631             # Add variable.
                0632             self.variables[name] = netcdf_variable(
                0633                     data, typecode, size, shape, dimensions, attributes)
                0634 
                0635         if self.use_mmap:
                0636             # Build nonrec array.
                0637             mm = mmap(self.fp.fileno(), nrbegin+nrsize, access=ACCESS_READ)
                0638             nr_array = ndarray.__new__(ndarray, (), dtype=nrdtype, buffer=mm,
7621b5d564 Oliv*0639                     offset=nrbegin, order='C')
70dbdfecef Oliv*0640             self._fds.append(mm)
                0641             for var in nr_vars:
                0642                 self.variables[var].__dict__['data'] = nr_array[var]
                0643 
                0644         if rec_vars:
                0645             # Remove padding when only one record variable.
                0646             if len(rec_vars) == 1:
                0647                 recdtype['names'] = recdtype['names'][:1]
                0648                 recdtype['formats'] = recdtype['formats'][:1]
                0649 
                0650             # Build rec array.
                0651             if self.use_mmap:
                0652                 mm = mmap(self.fp.fileno(), recbegin+self._recs*self._recsize, access=ACCESS_READ)
                0653                 rec_array = ndarray.__new__(ndarray, (self._recs,), dtype=recdtype,
7621b5d564 Oliv*0654                         buffer=mm, offset=recbegin, order='C')
70dbdfecef Oliv*0655                 self._fds.append(mm)
                0656             else:
                0657                 pos = self.fp.tell()
                0658                 self.fp.seek(recbegin)
7621b5d564 Oliv*0659                 rec_array = frombuffer(self.fp.read(self._recs*self._recsize), dtype=recdtype)
70dbdfecef Oliv*0660                 rec_array.shape = (self._recs,)
                0661                 self.fp.seek(pos)
                0662 
                0663             for var in rec_vars:
                0664                 self.variables[var].__dict__['data'] = rec_array[var]
                0665 
                0666         # further reading will be done through the mmaps
                0667         self.fp.close()
                0668 
                0669     def _read_var(self):
95edcfd937 Oliv*0670         name = self._unpack_string()
70dbdfecef Oliv*0671         dimensions = []
                0672         shape = []
                0673         dims = self._unpack_int()
                0674 
                0675         for i in range(dims):
                0676             dimid = self._unpack_int()
                0677             dimname = self._dims[dimid]
                0678             dimensions.append(dimname)
                0679             dim = self.dimensions[dimname]
                0680             shape.append(dim)
                0681         dimensions = tuple(dimensions)
                0682         shape = tuple(shape)
                0683 
                0684         attributes = self._read_att_array()
                0685         nc_type = self.fp.read(4)
                0686         vsize = self._unpack_int()
                0687         begin = [self._unpack_int, self._unpack_int64][self.version_byte-1]()
                0688 
                0689         typecode, size = TYPEMAP[nc_type]
                0690         dtype_ = '>%s' % typecode
                0691 
                0692         return name, dimensions, shape, attributes, typecode, size, dtype_, begin, vsize
                0693 
                0694     def _read_values(self):
                0695         nc_type = self.fp.read(4)
                0696         n = self._unpack_int()
                0697 
                0698         typecode, size = TYPEMAP[nc_type]
                0699 
                0700         count = n*size
                0701         values = self.fp.read(int(count))
                0702         self.fp.read(-count % 4)  # read padding
                0703 
9a1526c006 Oliv*0704         if typecode != 'c':
7621b5d564 Oliv*0705             values = frombuffer(values, dtype='>%s' % typecode)
70dbdfecef Oliv*0706             if values.shape == (1,):
                0707                 values = values[0]
                0708         else:
                0709             values = values.rstrip(b'\x00')
                0710         return values
                0711 
                0712     def _pack_begin(self, begin):
                0713         if self.version_byte == 1:
                0714             self._pack_int(begin)
                0715         elif self.version_byte == 2:
                0716             self._pack_int64(begin)
                0717 
                0718     def _pack_int(self, value):
                0719         self.fp.write(array(value, '>i').tostring())
                0720     _pack_int32 = _pack_int
                0721 
                0722     def _unpack_int(self):
7621b5d564 Oliv*0723         return int(frombuffer(self.fp.read(4), '>i')[0])
70dbdfecef Oliv*0724     _unpack_int32 = _unpack_int
                0725 
                0726     def _pack_int64(self, value):
                0727         self.fp.write(array(value, '>q').tostring())
                0728 
                0729     def _unpack_int64(self):
7621b5d564 Oliv*0730         return frombuffer(self.fp.read(8), '>q')[0]
70dbdfecef Oliv*0731 
                0732     def _pack_string(self, s):
                0733         count = len(s)
                0734         self._pack_int(count)
95edcfd937 Oliv*0735         self.fp.write(s.encode('latin1'))
70dbdfecef Oliv*0736         self.fp.write(b'0' * (-count % 4))  # pad
                0737 
                0738     def _unpack_string(self):
                0739         count = self._unpack_int()
                0740         s = self.fp.read(count).rstrip(b'\x00')
                0741         self.fp.read(-count % 4)  # read padding
95edcfd937 Oliv*0742         return s.decode('latin1')
70dbdfecef Oliv*0743 
                0744 
                0745 class netcdf_variable(object):
                0746     """
                0747     A data object for the `netcdf` module.
                0748 
                0749     `netcdf_variable` objects are constructed by calling the method
                0750     `netcdf_file.createVariable` on the `netcdf_file` object. `netcdf_variable`
                0751     objects behave much like array objects defined in numpy, except that their
                0752     data resides in a file. Data is read by indexing and written by assigning
                0753     to an indexed subset; the entire array can be accessed by the index ``[:]``
                0754     or (for scalars) by using the methods `getValue` and `assignValue`.
                0755     `netcdf_variable` objects also have attribute `shape` with the same meaning
                0756     as for arrays, but the shape cannot be modified. There is another read-only
                0757     attribute `dimensions`, whose value is the tuple of dimension names.
                0758 
                0759     All other attributes correspond to variable attributes defined in
                0760     the NetCDF file. Variable attributes are created by assigning to an
                0761     attribute of the `netcdf_variable` object.
                0762 
                0763     Parameters
                0764     ----------
                0765     data : array_like
                0766         The data array that holds the values for the variable.
                0767         Typically, this is initialized as empty, but with the proper shape.
                0768     typecode : dtype character code
                0769         Desired data-type for the data array.
                0770     size : int
                0771         Desired element size for the data array.
                0772     shape : sequence of ints
                0773         The shape of the array.  This should match the lengths of the
                0774         variable's dimensions.
                0775     dimensions : sequence of strings
                0776         The names of the dimensions used by the variable.  Must be in the
                0777         same order of the dimension lengths given by `shape`.
                0778     attributes : dict, optional
                0779         Attribute values (any type) keyed by string names.  These attributes
                0780         become attributes for the netcdf_variable object.
                0781 
                0782 
                0783     Attributes
                0784     ----------
                0785     dimensions : list of str
                0786         List of names of dimensions used by the variable object.
                0787     isrec, shape
                0788         Properties
                0789 
                0790     See also
                0791     --------
                0792     isrec, shape
                0793 
                0794     """
                0795     def __init__(self, data, typecode, size, shape, dimensions, attributes=None):
                0796         self.data = data
                0797         self._typecode = typecode
                0798         self._size = size
                0799         self._shape = shape
                0800         self.dimensions = dimensions
                0801 
                0802         self._attributes = attributes or {}
                0803         for k, v in self._attributes.items():
                0804             self.__dict__[k] = v
                0805 
                0806     def __setattr__(self, attr, value):
                0807         # Store user defined attributes in a separate dict,
                0808         # so we can save them to file later.
                0809         try:
                0810             self._attributes[attr] = value
                0811         except AttributeError:
                0812             pass
                0813         self.__dict__[attr] = value
                0814 
                0815     def isrec(self):
                0816         """Returns whether the variable has a record dimension or not.
                0817 
                0818         A record dimension is a dimension along which additional data could be
                0819         easily appended in the netcdf data structure without much rewriting of
                0820         the data file. This attribute is a read-only property of the
                0821         `netcdf_variable`.
                0822 
                0823         """
baadc14463 Oliv*0824         return bool(self.data.shape) and not self._shape[0]
70dbdfecef Oliv*0825     isrec = property(isrec)
                0826 
                0827     def shape(self):
                0828         """Returns the shape tuple of the data variable.
                0829 
                0830         This is a read-only attribute and can not be modified in the
                0831         same manner of other numpy arrays.
                0832         """
                0833         return self.data.shape
                0834     shape = property(shape)
                0835 
                0836     def getValue(self):
                0837         """
                0838         Retrieve a scalar value from a `netcdf_variable` of length one.
                0839 
                0840         Raises
                0841         ------
                0842         ValueError
                0843             If the netcdf variable is an array of length greater than one,
                0844             this exception will be raised.
                0845 
                0846         """
                0847         return self.data.item()
                0848 
                0849     def assignValue(self, value):
                0850         """
                0851         Assign a scalar value to a `netcdf_variable` of length one.
                0852 
                0853         Parameters
                0854         ----------
                0855         value : scalar
                0856             Scalar value (of compatible type) to assign to a length-one netcdf
                0857             variable. This value will be written to file.
                0858 
                0859         Raises
                0860         ------
                0861         ValueError
                0862             If the input is not a scalar, or if the destination is not a length-one
                0863             netcdf variable.
                0864 
                0865         """
                0866         if not self.data.flags.writeable:
                0867             # Work-around for a bug in NumPy.  Calling itemset() on a read-only
                0868             # memory-mapped array causes a seg. fault.
                0869             # See NumPy ticket #1622, and SciPy ticket #1202.
                0870             # This check for `writeable` can be removed when the oldest version
                0871             # of numpy still supported by scipy contains the fix for #1622.
                0872             raise RuntimeError("variable is not writeable")
                0873 
                0874         self.data.itemset(value)
                0875 
                0876     def typecode(self):
                0877         """
                0878         Return the typecode of the variable.
                0879 
                0880         Returns
                0881         -------
                0882         typecode : char
                0883             The character typecode of the variable (eg, 'i' for int).
                0884 
                0885         """
                0886         return self._typecode
                0887 
                0888     def itemsize(self):
                0889         """
                0890         Return the itemsize of the variable.
                0891 
                0892         Returns
                0893         -------
                0894         itemsize : int
                0895             The element size of the variable (eg, 8 for float64).
                0896 
                0897         """
                0898         return self._size
                0899 
                0900     def __getitem__(self, index):
                0901         return self.data[index]
                0902 
                0903     def __setitem__(self, index, data):
                0904         # Expand data for record vars?
                0905         if self.isrec:
                0906             if isinstance(index, tuple):
                0907                 rec_index = index[0]
                0908             else:
                0909                 rec_index = index
                0910             if isinstance(rec_index, slice):
                0911                 recs = (rec_index.start or 0) + len(data)
                0912             else:
                0913                 recs = rec_index + 1
                0914             if recs > len(self.data):
                0915                 shape = (recs,) + self._shape[1:]
                0916                 self.data.resize(shape)
                0917         self.data[index] = data
                0918 
                0919 
                0920 NetCDFFile = netcdf_file
                0921 NetCDFVariable = netcdf_variable