Source code for reV.handlers.outputs

# -*- coding: utf-8 -*-
"""
Classes to handle reV h5 output files.
"""
import logging
import NRWAL
import PySAM
import rex
import sys
import json

from reV.version import __version__
from rex.outputs import Outputs as rexOutputs

logger = logging.getLogger(__name__)


[docs]class Outputs(rexOutputs):
    """
    Base class to handle reV output data in .h5 format

    Examples
    --------
    The reV Outputs handler can be used to initialize h5 files in the standard
    reV/rex resource data format.

    >>> from reV import Outputs
    >>> import pandas as pd
    >>> import numpy as np
    >>>
    >>> meta = pd.DataFrame({'latitude': np.ones(100),
    >>>                      'longitude': np.ones(100)})
    >>>
    >>> time_index = pd.date_range('20210101', '20220101', freq='1h',
    >>>                            closed='right')
    >>>
    >>> with Outputs('test.h5', 'w') as f:
    >>>     f.meta = meta
    >>>     f.time_index = time_index

    You can also use the Outputs handler to read output h5 files from disk.
    The Outputs handler will automatically parse the meta data and time index
    into the expected pandas objects (DataFrame and DatetimeIndex,
    respectively).

    >>> with Outputs('test.h5') as f:
    >>>     print(f.meta.head())
    >>>
         latitude  longitude
    gid
    0         1.0        1.0
    1         1.0        1.0
    2         1.0        1.0
    3         1.0        1.0
    4         1.0        1.0

    >>> with Outputs('test.h5') as f:
    >>>     print(f.time_index)
    DatetimeIndex(['2021-01-01 01:00:00+00:00', '2021-01-01 02:00:00+00:00',
                   '2021-01-01 03:00:00+00:00', '2021-01-01 04:00:00+00:00',
                   '2021-01-01 05:00:00+00:00', '2021-01-01 06:00:00+00:00',
                   '2021-01-01 07:00:00+00:00', '2021-01-01 08:00:00+00:00',
                   '2021-01-01 09:00:00+00:00', '2021-01-01 10:00:00+00:00',
                   ...
                   '2021-12-31 15:00:00+00:00', '2021-12-31 16:00:00+00:00',
                   '2021-12-31 17:00:00+00:00', '2021-12-31 18:00:00+00:00',
                   '2021-12-31 19:00:00+00:00', '2021-12-31 20:00:00+00:00',
                   '2021-12-31 21:00:00+00:00', '2021-12-31 22:00:00+00:00',
                   '2021-12-31 23:00:00+00:00', '2022-01-01 00:00:00+00:00'],
                  dtype='datetime64[ns, UTC]', length=8760, freq=None)

    There are a few ways to use the Outputs handler to write data to a file.
    Here is one example using the pre-initialized file we created earlier.
    Note that the Outputs handler will automatically scale float data using
    the "scale_factor" attribute. The Outputs handler will unscale the data
    while being read unless the unscale kwarg is explicityly set to False.
    This behavior is intended to reduce disk storage requirements for big
    data and can be disabled by setting dtype=np.float32 or dtype=np.float64
    when writing data.

    >>> Outputs.add_dataset(h5_file='test.h5', dset_name='dset1',
    >>>                     dset_data=np.ones((8760, 100)) * 42.42,
    >>>                     attrs={'scale_factor': 100}, dtype=np.int32)


    >>> with Outputs('test.h5') as f:
    >>>     print(f['dset1'])
    >>>     print(f['dset1'].dtype)
    [[42.42 42.42 42.42 ... 42.42 42.42 42.42]
     [42.42 42.42 42.42 ... 42.42 42.42 42.42]
     [42.42 42.42 42.42 ... 42.42 42.42 42.42]
     ...
     [42.42 42.42 42.42 ... 42.42 42.42 42.42]
     [42.42 42.42 42.42 ... 42.42 42.42 42.42]
     [42.42 42.42 42.42 ... 42.42 42.42 42.42]]
    float32

    >>> with Outputs('test.h5', unscale=False) as f:
    >>>     print(f['dset1'])
    >>>     print(f['dset1'].dtype)
    [[4242 4242 4242 ... 4242 4242 4242]
     [4242 4242 4242 ... 4242 4242 4242]
     [4242 4242 4242 ... 4242 4242 4242]
     ...
     [4242 4242 4242 ... 4242 4242 4242]
     [4242 4242 4242 ... 4242 4242 4242]
     [4242 4242 4242 ... 4242 4242 4242]]
    int32

    Note that the reV Outputs handler is specifically designed to read and
    write spatiotemporal data. It is therefore important to intialize the meta
    data and time index objects even if your data is only spatial or only
    temporal. Furthermore, the Outputs handler will always assume that 1D
    datasets represent scalar data (non-timeseries) that corresponds to the
    meta data shape, and that 2D datasets represent spatiotemporal data whose
    shape corresponds to (len(time_index), len(meta)). You can see these
    constraints here:

    >>> Outputs.add_dataset(h5_file='test.h5', dset_name='bad_shape',
                            dset_data=np.ones((1, 100)) * 42.42,
                            attrs={'scale_factor': 100}, dtype=np.int32)
    HandlerValueError: 2D data with shape (1, 100) is not of the proper
    spatiotemporal shape: (8760, 100)

    >>> Outputs.add_dataset(h5_file='test.h5', dset_name='bad_shape',
                            dset_data=np.ones((8760,)) * 42.42,
                            attrs={'scale_factor': 100}, dtype=np.int32)
    HandlerValueError: 1D data with shape (8760,) is not of the proper
    spatial shape: (100,)
    """

    @property
    def full_version_record(self):
        """Get record of versions for dependencies

        Returns
        -------
        dict
            Dictionary of package versions for dependencies
        """
        rev_versions = {'reV': __version__,
                        'rex': rex.__version__,
                        'pysam': PySAM.__version__,
                        'python': sys.version,
                        'nrwal': NRWAL.__version__,
                        }
        versions = super().full_version_record
        versions.update(rev_versions)
        return versions

[docs]    def set_version_attr(self):
        """Set the version attribute to the h5 file."""
        self.h5.attrs['version'] = __version__
        self.h5.attrs['full_version_record'] = json.dumps(
            self.full_version_record)
        self.h5.attrs['package'] = 'reV'