Source code for rex.outputs

# -*- coding: utf-8 -*-
"""
Classes to handle h5 output files.
"""
import json
import logging
import numpy as np
import pandas as pd
import time
import sys
import click
import h5py
import h5pyd
import scipy

from rex.version import __version__
from rex.utilities.exceptions import (HandlerRuntimeError, HandlerValueError,
                                      ResourceKeyError)
from rex.resource import BaseResource
from rex.utilities.parse_keys import parse_keys, parse_slice
from rex.utilities.utilities import to_records_array

logger = logging.getLogger(__name__)


[docs] class Outputs(BaseResource): """ Base class to handle output data in .h5 format Examples -------- The Outputs handler can be used to initialize h5 files in the standard reV/rex resource data format. >>> from rex import Outputs >>> import pandas as pd >>> import numpy as np >>> >>> meta = pd.DataFrame({'latitude': np.ones(100), >>> 'longitude': np.ones(100)}) >>> >>> time_index = pd.date_range('20210101', '20220101', freq='1h', >>> closed='right') >>> >>> with Outputs('test.h5', 'w') as f: >>> f.meta = meta >>> f.time_index = time_index You can also use the Outputs handler to read output h5 files from disk. The Outputs handler will automatically parse the meta data and time index into the expected pandas objects (DataFrame and DatetimeIndex, respectively). >>> with Outputs('test.h5') as f: >>> print(f.meta.head()) >>> latitude longitude gid 0 1.0 1.0 1 1.0 1.0 2 1.0 1.0 3 1.0 1.0 4 1.0 1.0 >>> with Outputs('test.h5') as f: >>> print(f.time_index) DatetimeIndex(['2021-01-01 01:00:00+00:00', '2021-01-01 02:00:00+00:00', '2021-01-01 03:00:00+00:00', '2021-01-01 04:00:00+00:00', '2021-01-01 05:00:00+00:00', '2021-01-01 06:00:00+00:00', '2021-01-01 07:00:00+00:00', '2021-01-01 08:00:00+00:00', '2021-01-01 09:00:00+00:00', '2021-01-01 10:00:00+00:00', ... '2021-12-31 15:00:00+00:00', '2021-12-31 16:00:00+00:00', '2021-12-31 17:00:00+00:00', '2021-12-31 18:00:00+00:00', '2021-12-31 19:00:00+00:00', '2021-12-31 20:00:00+00:00', '2021-12-31 21:00:00+00:00', '2021-12-31 22:00:00+00:00', '2021-12-31 23:00:00+00:00', '2022-01-01 00:00:00+00:00'], dtype='datetime64[ns, UTC]', length=8760, freq=None) There are a few ways to use the Outputs handler to write data to a file. Here is one example using the pre-initialized file we created earlier. Note that the Outputs handler will automatically scale float data using the "scale_factor" attribute. The Outputs handler will unscale the data while being read unless the unscale kwarg is explicityly set to False. This behavior is intended to reduce disk storage requirements for big data and can be disabled by setting dtype=np.float32 or dtype=np.float64 when writing data. >>> Outputs.add_dataset(h5_file='test.h5', dset_name='dset1', >>> dset_data=np.ones((8760, 100)) * 42.42, >>> attrs={'scale_factor': 100}, dtype=np.int32) >>> with Outputs('test.h5') as f: >>> print(f['dset1']) >>> print(f['dset1'].dtype) [[42.42 42.42 42.42 ... 42.42 42.42 42.42] [42.42 42.42 42.42 ... 42.42 42.42 42.42] [42.42 42.42 42.42 ... 42.42 42.42 42.42] ... [42.42 42.42 42.42 ... 42.42 42.42 42.42] [42.42 42.42 42.42 ... 42.42 42.42 42.42] [42.42 42.42 42.42 ... 42.42 42.42 42.42]] float32 >>> with Outputs('test.h5', unscale=False) as f: >>> print(f['dset1']) >>> print(f['dset1'].dtype) [[4242 4242 4242 ... 4242 4242 4242] [4242 4242 4242 ... 4242 4242 4242] [4242 4242 4242 ... 4242 4242 4242] ... [4242 4242 4242 ... 4242 4242 4242] [4242 4242 4242 ... 4242 4242 4242] [4242 4242 4242 ... 4242 4242 4242]] int32 Note that the Outputs handler is specifically designed to read and write spatiotemporal data. It is therefore important to intialize the meta data and time index objects even if your data is only spatial or only temporal. Furthermore, the Outputs handler will always assume that 1D datasets represent scalar data (non-timeseries) that corresponds to the meta data shape, and that 2D datasets represent spatiotemporal data whose shape corresponds to (len(time_index), len(meta)). You can see these constraints here: >>> Outputs.add_dataset(h5_file='test.h5', dset_name='bad_shape', dset_data=np.ones((1, 100)) * 42.42, attrs={'scale_factor': 100}, dtype=np.int32) HandlerValueError: 2D data with shape (1, 100) is not of the proper spatiotemporal shape: (8760, 100) >>> Outputs.add_dataset(h5_file='test.h5', dset_name='bad_shape', dset_data=np.ones((8760,)) * 42.42, attrs={'scale_factor': 100}, dtype=np.int32) HandlerValueError: 1D data with shape (8760,) is not of the proper spatial shape: (100,) """ def __init__(self, h5_file, mode='r', unscale=True, str_decode=True, group=None): """ Parameters ---------- h5_file : str Path to .h5 resource file mode : str, optional Mode to instantiate h5py.File instance, by default 'r' unscale : bool, optional Boolean flag to automatically unscale variables on extraction, by default True str_decode : bool, optional Boolean flag to decode the bytestring meta data into normal strings. Setting this to False will speed up the meta data read, by default True group : str, optional Group within .h5 resource file to open, by default None """ super().__init__(h5_file, unscale=unscale, hsds=False, str_decode=str_decode, group=group, mode=mode) self._mode = mode self._group = self._check_group(group) self._shape = None if self.writable: self.set_version_attr() def __len__(self): _len = 0 if 'meta' in self.datasets: _len = self.h5['meta'].shape[0] return _len def __setitem__(self, keys, arr): if self.writable: ds, ds_slice = parse_keys(keys) slice_test = False if isinstance(ds_slice, tuple): slice_test = ds_slice[0] == slice(None, None, None) if ds.endswith('meta') and slice_test: self._set_meta(ds, arr) elif ds.endswith('time_index') and slice_test: self._set_time_index(ds, arr) else: self._set_ds_array(ds, arr, ds_slice) @property def full_version_record(self): """Get record of versions for dependencies Returns ------- dict Dictionary of package versions for dependencies """ versions = {'rex': __version__, 'pandas': pd.__version__, 'numpy': np.__version__, 'python': sys.version, 'click': click.__version__, 'h5py': h5py.__version__, 'h5pyd': h5pyd.__version__, 'scipy': scipy.__version__ } return versions
[docs] def set_version_attr(self): """Set the version attribute to the h5 file.""" self.h5.attrs['version'] = __version__ self.h5.attrs['full_version_record'] = json.dumps( self.full_version_record) self.h5.attrs['package'] = 'rex'
@property def version(self): """ Version of package used to create file Returns ------- str """ return self.h5.attrs['version'] @property def package(self): """ Package used to create file Returns ------- str """ return self.h5.attrs['package'] @property def source(self): """ Package and version used to create file Returns ------- str """ out = ("{}_{}" .format(self.h5.attrs['package'], self.h5.attrs['version'])) return out @property def shape(self): """ Variable array shape from time_index and meta Returns ------- tuple shape of variables arrays == (time, locations) """ if self._shape is None: dsets = self.datasets if 'meta' in dsets: self._shape = self.h5['meta'].shape if 'time_index' in dsets: self._shape = self.h5['time_index'].shape + self._shape return self._shape @property def writable(self): """ Check to see if h5py.File instance is writable Returns ------- is_writable : bool Flag if mode is writable """ is_writable = True mode = ['a', 'w', 'w-', 'x'] if self._mode not in mode: is_writable = False return is_writable @BaseResource.meta.setter # pylint: disable-msg=E1101 def meta(self, meta): """ Write meta data to disk, convert type if neccessary Parameters ---------- meta : pandas.DataFrame | numpy.recarray Locational meta data """ self._set_meta('meta', meta) @BaseResource.time_index.setter # pylint: disable-msg=E1101 def time_index(self, time_index): """ Write time_index to dics, convert type if neccessary Parameters ---------- time_index : pandas.DatetimeIndex | ndarray Temporal index of timesteps """ self._set_time_index('time_index', time_index) @property def SAM_configs(self): """ SAM configuration JSONs used to create CF profiles Returns ------- configs : dict Dictionary of SAM configuration JSONs """ if 'meta' in self.datasets: configs = {k: json.loads(v) for k, v in self.h5['meta'].attrs.items()} else: configs = {} return configs @property def run_attrs(self): """ Runtime attributes stored at the global (file) level Returns ------- global_attrs : dict """ return self.global_attrs @run_attrs.setter def run_attrs(self, run_attrs): """ Set runtime attributes as global (file) attributes Parameters ---------- run_attrs : dict Dictionary of runtime attributes (args, kwargs) """ if self.writable: for k, v in run_attrs.items(): self.h5.attrs[k] = v @staticmethod def _check_data_dtype(dset_name, data, dtype, attrs=None): """ Check data dtype and scale if needed Parameters ---------- dset_name : str Name of dataset being written to disk data : ndarray Data to be written to disc dtype : str dtype of data on disc attrs : dict, optional Attributes to be set. May include 'scale_factor', by default None Returns ------- data : ndarray Data ready for writing to disc: - Scaled and converted to dtype """ if attrs is None: attrs = {} scale_factor = attrs.get('scale_factor', None) scale = (scale_factor is not None and not np.issubdtype(data.dtype, np.integer)) if scale: if scale_factor != 1 and not np.issubdtype(dtype, np.integer): msg = ('Output dtype for "{}" must be an integer in ' 'order to apply scale factor {}".' .format(dset_name, scale_factor)) logger.error(msg) raise HandlerRuntimeError(msg) data_type_differs = not np.issubdtype(data.dtype, np.dtype(dtype)) is_integer = np.issubdtype(dtype, np.integer) if data_type_differs and is_integer: # apply scale factor and dtype data = np.round(data * scale_factor).astype(dtype) elif (not np.issubdtype(data.dtype, np.dtype(dtype)) and not np.issubdtype(np.dtype(dtype), np.floating)): msg = ('A scale_factor is needed to scale ' '"{}" of type "{}" to "{}".' .format(dset_name, data.dtype, dtype)) raise HandlerRuntimeError(msg) return data def _check_group(self, group): """ Ensure group is in .h5 file Parameters ---------- group : str Group of interest """ if group is not None: if group not in self._h5: try: if self.writable: self._h5.create_group(group) except Exception as ex: msg = ('Cannot create group {}: {}' .format(group, ex)) raise HandlerRuntimeError(msg) from ex return group def _set_meta(self, ds, meta, attrs=None): """ Write meta data to disk Parameters ---------- ds : str meta dataset name meta : pandas.DataFrame | numpy.recarray Locational meta data attrs : dict Attributes to add to the meta data dataset """ # pylint: disable=attribute-defined-outside-init self._meta = meta if isinstance(meta, pd.DataFrame): meta = to_records_array(meta) if ds in self.datasets: self.update_dset(ds, meta) else: self._create_dset(ds, meta.shape, meta.dtype, data=meta, attrs=attrs) def _set_time_index(self, ds, time_index, attrs=None): """ Write time index to disk Parameters ---------- ds : str time index dataset name time_index : pandas.DatetimeIndex | ndarray Temporal index of timesteps attrs : dict Attributes to add to the meta data dataset """ # pylint: disable=attribute-defined-outside-init self._time_index = time_index if isinstance(time_index, pd.DatetimeIndex): time_index = time_index.astype(str) dtype = "S{}".format(len(time_index[0])) time_index = np.array(time_index, dtype=dtype) if ds in self.datasets: self.update_dset(ds, time_index) else: self._create_dset(ds, time_index.shape, time_index.dtype, data=time_index, attrs=attrs)
[docs] def get_config(self, config_name): """ Get SAM config Parameters ---------- config_name : str Name of config Returns ------- config : dict SAM config JSON as a dictionary """ if 'meta' in self.datasets: config = json.loads(self.h5['meta'].attrs[config_name]) else: config = None return config
[docs] def set_configs(self, SAM_configs): """ Set SAM configuration JSONs as attributes of 'meta' Parameters ---------- SAM_configs : dict Dictionary of SAM configuration JSONs """ if self.writable: for key, config in SAM_configs.items(): if isinstance(config, dict): config = json.dumps(config) if not isinstance(key, str): key = str(key) self.h5['meta'].attrs[key] = config
def _set_ds_array(self, ds_name, arr, ds_slice): """ Write ds to disk Parameters ---------- ds_name : str Dataset name arr : ndarray Dataset data array ds_slice : tuple Dataset slicing that corresponds to arr """ if ds_name not in self.datasets: msg = '{} must be initialized!'.format(ds_name) raise HandlerRuntimeError(msg) dtype = self.h5[ds_name].dtype attrs = self.get_attrs(ds_name) ds_slice = parse_slice(ds_slice) self.h5[ds_name][ds_slice] = self._check_data_dtype( ds_name, arr, dtype, attrs=attrs) def _check_chunks(self, chunks, data=None): """ Convert dataset chunk size into valid tuple based on variable array shape Parameters ---------- chunks : tuple Desired dataset chunk size data : ndarray Dataset array being chunked Returns ------- ds_chunks : tuple | None dataset chunk size """ if chunks is None: return None if data is not None: shape = data.shape else: shape = self.shape if len(shape) != len(chunks): msg = ('Shape dimensions ({}) are not the same length as chunks ' '({}). Please provide a single chunk value for each ' 'dimension!' .format(shape, chunks)) logger.error(msg) raise HandlerRuntimeError(msg) return tuple(np.min((s, s if c is None else c)) for s, c in zip(shape, chunks)) def _create_dset(self, ds_name, shape, dtype, chunks=None, attrs=None, data=None, replace=True): """ Initialize dataset Parameters ---------- ds_name : str Dataset name shape : tuple Dataset shape dtype : str Dataset numpy dtype chunks : tuple Dataset chunk size attrs : dict Dataset attributes data : ndarray Dataset data array replace : bool If previous dataset exists with the same name, it will be replaced. """ ds = None if self.writable: if ds_name in self.datasets and replace: del self.h5[ds_name] elif ds_name in self.datasets: old_shape, old_dtype, _ = self.get_dset_properties(ds_name) if old_shape != shape or old_dtype != dtype: e = ('Trying to create dataset "{}", but already exists ' 'with mismatched shape and dtype. New shape/dtype ' 'is {}/{}, previous shape/dtype is {}/{}' .format(ds_name, shape, dtype, old_shape, old_dtype)) logger.error(e) raise HandlerRuntimeError(e) if ds_name not in self.datasets: chunks = self._check_chunks(chunks, data=data) try: ds = self.h5.create_dataset(ds_name, shape=shape, dtype=dtype, chunks=chunks) except Exception as e: msg = ('Could not create dataset "{}" in file!' .format(ds_name)) logger.error(msg) raise IOError(msg) from e if attrs is not None: self._create_ds_attrs(ds, ds_name, attrs) if data is not None: ds[...] = data @staticmethod def _create_ds_attrs(ds, ds_name, attrs): """Create dataset attributes. Parameters ---------- ds : h5py.Dataset Dataset object to write attributes to. ds_name : str Dataset name for logging / debugging attrs : dict | None Dataset attributes to write (None if no attributes to write). """ if attrs is not None: for key, value in attrs.items(): try: ds.attrs[key] = value except Exception as e: msg = ('Could not save datset "{}" attribute "{}" ' 'to value: {}'.format(ds_name, key, value)) logger.error(msg) raise IOError(msg) from e def _check_dset_shape(self, dset_name, dset_data): """ Check to ensure that dataset array is of the proper shape Parameters ---------- dset_name : str Dataset name being written to disk. dset_data : ndarray Dataset data array """ dset_shape = dset_data.shape if len(dset_shape) == 1: possible_shapes = {} try: possible_shapes["spatial"] = (len(self.meta),) except ResourceKeyError: pass try: possible_shapes["temporal"] = (len(self.time_index),) except ResourceKeyError: pass if not possible_shapes: msg = ("Please load either 'meta' or 'time_index' before " "loading a 1D dataset.") logger.error(msg) raise HandlerRuntimeError(msg) if dset_shape not in possible_shapes.values(): possible_shapes_str = " or ".join(["{} {}".format(k, v) for k, v in possible_shapes.items()]) msg = ('1D dataset "{}" with shape {} is not of ' 'the proper {} shape!' .format(dset_name, dset_shape, possible_shapes_str)) logger.error(msg) raise HandlerValueError(msg) else: shape = self.shape if shape: if dset_shape != shape: msg = ('2D dataset "{}" with shape {} is not of the ' 'proper spatiotemporal shape: {}' .format(dset_name, dset_shape, shape)) logger.error(msg) raise HandlerValueError(msg) else: msg = ("'meta' and 'time_index' have not been loaded") logger.error(msg) raise HandlerRuntimeError(msg) def _add_dset(self, dset_name, data, dtype, chunks=None, attrs=None): """ Write dataset to disk. Dataset it created in .h5 file and data is scaled if needed. Parameters ---------- dset_name : str Name of dataset to be added to h5 file. data : ndarray Data to be added to h5 file. dtype : str Intended dataset datatype after scaling. chunks : tuple Chunk size for capacity factor means dataset. attrs : dict Attributes to be set. May include 'scale_factor'. """ self._check_dset_shape(dset_name, data) data = self._check_data_dtype(dset_name, data, dtype, attrs=attrs) self._create_dset(dset_name, data.shape, dtype, chunks=chunks, attrs=attrs, data=data)
[docs] def update_dset(self, dset, dset_array, dset_slice=None): """ Check to see if dset needs to be updated on disk If so write dset_array to disk Parameters ---------- dset : str dataset to update dset_array : ndarray dataset array dset_slice : tuple slice of dataset to update, it None update all """ if dset_slice is None: dset_slice = (slice(None, None, None), ) keys = (dset, ) + dset_slice arr = self.__getitem__(keys) if not np.array_equal(arr, dset_array): self._set_ds_array(dset, dset_array, dset_slice)
[docs] def write_dataset(self, dset_name, data, dtype, chunks=None, attrs=None): """ Write dataset to disk. Dataset it created in .h5 file and data is scaled if needed. Parameters ---------- dset_name : str Name of dataset to be added to h5 file. data : ndarray Data to be added to h5 file. dtype : str Intended dataset datatype after scaling. chunks : tuple Chunk size for capacity factor means dataset. attrs : dict Attributes to be set. May include 'scale_factor'. """ self._add_dset(dset_name, data, dtype, chunks=chunks, attrs=attrs)
[docs] @classmethod def write_profiles(cls, h5_file, meta, time_index, dset_name, profiles, dtype, attrs=None, SAM_configs=None, chunks=(None, 100), unscale=True, mode='w-', str_decode=True, group=None): """ Write profiles to disk Parameters ---------- h5_file : str Path to .h5 resource file meta : pandas.Dataframe Locational meta data time_index : pandas.DatetimeIndex Temporal timesteps dset_name : str Name of the target dataset (should identify the profiles). profiles : ndarray output result timeseries profiles dtype : str Intended dataset datatype after scaling. attrs : dict, optional Attributes to be set. May include 'scale_factor', by default None SAM_configs : dict, optional Dictionary of SAM configuration JSONs used to compute cf means, by default None chunks : tuple, optional Chunk size for capacity factor means dataset, by default (None, 100) unscale : bool, optional Boolean flag to automatically unscale variables on extraction, by default True mode : str, optional Mode to instantiate h5py.File instance, by default 'w-' str_decode : bool, optional Boolean flag to decode the bytestring meta data into normal strings. Setting this to False will speed up the meta data read, by default True group : str, optional Group within .h5 resource file to open, by default None """ logger.info("Saving profiles ({}) to {}".format(dset_name, h5_file)) if profiles.shape != (len(time_index), len(meta)): raise HandlerValueError("Profile dimensions does not match" "'time_index' and 'meta'") ts = time.time() kwargs = {"unscale": unscale, "mode": mode, "str_decode": str_decode, "group": group} with cls(h5_file, **kwargs) as f: # Save time index f['time_index'] = time_index logger.debug("\t- 'time_index' saved to disc") # Save meta f['meta'] = meta logger.debug("\t- 'meta' saved to disc") # Add SAM configurations as attributes to meta if SAM_configs is not None: f.set_configs(SAM_configs) logger.debug("\t- SAM configurations saved as attributes " "on 'meta'") # Write dset to disk f._add_dset(dset_name, profiles, dtype, chunks=chunks, attrs=attrs) logger.debug("\t- '{}' saved to disc".format(dset_name)) tt = (time.time() - ts) / 60 logger.info('{} is complete'.format(h5_file)) logger.debug('\t- Saving to disc took {:.4f} minutes' .format(tt))
[docs] @classmethod def write_means(cls, h5_file, meta, dset_name, means, dtype, attrs=None, SAM_configs=None, chunks=None, unscale=True, mode='w-', str_decode=True, group=None): """ Write means array to disk Parameters ---------- h5_file : str Path to .h5 resource file meta : pandas.Dataframe Locational meta data dset_name : str Name of the target dataset (should identify the means). means : ndarray output means array. dtype : str Intended dataset datatype after scaling. attrs : dict, optional Attributes to be set. May include 'scale_factor', by default None SAM_configs : dict, optional Dictionary of SAM configuration JSONs used to compute cf means, by default None chunks : tuple, optional Chunk size for capacity factor means dataset, by default None unscale : bool, optional Boolean flag to automatically unscale variables on extraction, by default True mode : str, optional Mode to instantiate h5py.File instance, by default 'w-' str_decode : bool, optional Boolean flag to decode the bytestring meta data into normal strings. Setting this to False will speed up the meta data read, by default True group : str, optional Group within .h5 resource file to open, by default None """ logger.info("Saving means ({}) to {}".format(dset_name, h5_file)) if len(means) != len(meta): msg = 'Number of means does not match meta' raise HandlerValueError(msg) ts = time.time() kwargs = {"unscale": unscale, "mode": mode, "str_decode": str_decode, "group": group} with cls(h5_file, **kwargs) as f: # Save meta f['meta'] = meta logger.debug("\t- 'meta' saved to disc") # Add SAM configurations as attributes to meta if SAM_configs is not None: f.set_configs(SAM_configs) logger.debug("\t- SAM configurations saved as attributes " "on 'meta'") # Write dset to disk f._add_dset(dset_name, means, dtype, chunks=chunks, attrs=attrs) logger.debug("\t- '{}' saved to disc".format(dset_name)) tt = (time.time() - ts) / 60 logger.info('{} is complete'.format(h5_file)) logger.debug('\t- Saving to disc took {:.4f} minutes' .format(tt))
[docs] @classmethod def add_dataset(cls, h5_file, dset_name, dset_data, dtype, attrs=None, chunks=None, unscale=True, mode='a', str_decode=True, group=None): """ Add dataset to h5_file Parameters ---------- h5_file : str Path to .h5 resource file dset_name : str Name of dataset to be added to h5 file dset_data : ndarray Data to be added to h5 file dtype : str Intended dataset datatype after scaling. attrs : dict, optional Attributes to be set. May include 'scale_factor', by default None unscale : bool, optional Boolean flag to automatically unscale variables on extraction, by default True mode : str, optional Mode to instantiate h5py.File instance, by default 'a' str_decode : bool, optional Boolean flag to decode the bytestring meta data into normal strings. Setting this to False will speed up the meta data read, by default True group : str, optional Group within .h5 resource file to open, by default None """ logger.info("Adding {} to {}".format(dset_name, h5_file)) ts = time.time() kwargs = {"unscale": unscale, "mode": mode, "str_decode": str_decode, "group": group} with cls(h5_file, **kwargs) as f: f._add_dset(dset_name, dset_data, dtype, chunks=chunks, attrs=attrs) tt = (time.time() - ts) / 60 logger.info('{} added'.format(dset_name)) logger.debug('\t- Saving to disc took {:.4f} minutes' .format(tt))
[docs] @classmethod def init_h5(cls, h5_file, dsets, shapes, attrs, chunks, dtypes, meta, time_index=None, configs=None, unscale=True, mode='w', str_decode=True, group=None, run_attrs=None): """Init a full output file with the final intended shape without data. Parameters ---------- h5_file : str Full h5 output filepath. dsets : list List of strings of dataset names to initialize (does not include meta or time_index). shapes : dict Dictionary of dataset shapes (keys correspond to dsets). attrs : dict Dictionary of dataset attributes (keys correspond to dsets). chunks : dict Dictionary of chunk tuples (keys correspond to dsets). dtypes : dict dictionary of numpy datatypes (keys correspond to dsets). meta : pd.DataFrame Full meta data. time_index : pd.datetimeindex | None Full pandas datetime index. None implies that only 1D results (no site profiles) are being written. configs : dict | None Optional input configs to set as attr on meta. unscale : bool Boolean flag to automatically unscale variables on extraction mode : str Mode to instantiate h5py.File instance str_decode : bool Boolean flag to decode the bytestring meta data into normal strings. Setting this to False will speed up the meta data read. group : str Group within .h5 resource file to open run_attrs : dict | NoneType Runtime attributes (args, kwargs) to add as global (file) attributes """ logger.debug("Initializing output file: {}".format(h5_file)) kwargs = {"unscale": unscale, "mode": mode, "str_decode": str_decode, "group": group} with cls(h5_file, **kwargs) as f: if run_attrs is not None: f.run_attrs = run_attrs f['meta'] = meta if time_index is not None: f['time_index'] = time_index for dset in dsets: if dset not in ('meta', 'time_index'): # initialize each dset to disk f._create_dset(dset, shapes[dset], dtypes[dset], chunks=chunks[dset], attrs=attrs[dset]) if configs is not None: f.set_configs(configs) logger.debug("\t- Configurations saved as attributes " "on 'meta'") logger.debug('Output file has been initialized.')