# -*- coding: utf-8 -*-
"""
Classes to handle resource data
"""
import os
from glob import glob
import h5py
import numpy as np
from rex.renewable_resource import (NSRDB, SolarResource, GeothermalResource,
WindResource, WaveResource,
AbstractInterpolatedResource)
from rex.resource import Resource, BaseDatasetIterable
from rex.utilities.exceptions import FileInputError, ResourceRuntimeError
from rex.utilities.utilities import unstupify_path
[docs]
class MultiH5(BaseDatasetIterable):
"""
Class to handle multiple h5 file Resources
"""
def __init__(self, h5_files, check_files=False):
"""
Parameters
----------
h5_files : list
List of .h5 files to source data from
check_files : bool
Check to ensure files have the same coordinates and time_index
"""
self._dset_map = self._map_file_dsets(h5_files)
self._h5_map = self._map_file_instances(set(self._dset_map.values()))
if check_files:
self._preflight_check()
def __repr__(self):
msg = ("{} contains {} files and {} datasets"
.format(self.__class__.__name__, len(self),
len(self._dset_map)))
return msg
def __enter__(self):
return self
def __exit__(self, type, value, traceback):
self.close()
if type is not None:
raise
def __len__(self):
return len(self._h5_map)
def __getitem__(self, dset):
if dset in self:
path = self._dset_map[dset]
h5 = self._h5_map[path]
ds = h5[dset]
else:
raise ValueError('{} is invalid must be one of: {}'
.format(dset, self.datasets))
return ds
def __contains__(self, dset):
return dset in self.datasets
@property
def attrs(self):
"""
Global .h5 file attributes sourced from first .h5 file
Returns
-------
attrs : dict
.h5 file attributes sourced from first .h5 file
"""
path = self.h5_files[0]
attrs = dict(self._h5_map[path].attrs)
return attrs
@property
def datasets(self):
"""
Available datasets
Returns
-------
list
List of dataset present in .h5 files
"""
return sorted(self._dset_map)
@property
def h5_files(self):
"""
.h5 files data is being sourced from
Returns
-------
list
List of .h5 files data is being sourced from
"""
return sorted(self._h5_map)
@staticmethod
def _get_dsets(h5_path):
"""
Get datasets in given .h5 file
Parameters
----------
h5_path : str
Path to .h5 file to get variables for
Returns
-------
unique_dsets : list
List of unique datasets in .h5 file
shared_dsets : list
List of shared datasets in .h5 file
"""
unique_dsets = []
shared_dsets = []
try:
with h5py.File(h5_path, mode='r') as f:
for dset in Resource._get_datasets(f):
if dset not in ['meta', 'time_index', 'coordinates']:
unique_dsets.append(dset)
else:
shared_dsets.append(dset)
except Exception as e:
msg = ('Could not read file: "{}"'.format(h5_path))
raise IOError(msg) from e
return unique_dsets, shared_dsets
@classmethod
def _map_file_dsets(cls, h5_files):
"""
Map 5min variables to their .h5 files in given directory
Parameters
----------
h5_files : list
List of h5_files to source data from
Returns
-------
dset_map : dict
Dictionary mapping datasets to file paths
"""
dset_map = {}
for file in h5_files:
unique_dsets, shared_dsets = cls._get_dsets(file)
for dset in shared_dsets:
if dset not in dset_map:
dset_map[dset] = file
for dset in unique_dsets:
dset_map[dset] = file
return dset_map
@staticmethod
def _map_file_instances(h5_files):
"""
Open all .h5 files and map the open h5py instances to the
associated file paths
Parameters
----------
h5_files : list
List of .h5 files to open
Returns
-------
h5_map : dict
Dictionary mapping file paths to open resource instances
"""
h5_map = {}
for f_path in h5_files:
h5_map[f_path] = h5py.File(f_path, mode='r')
return h5_map
def _preflight_check(self):
"""
Check time_index and coordinates accross files
"""
time_index = None
lat_lon = None
bad_files = []
for file in self.h5_files:
with Resource(file) as f:
if 'time_index' in f:
ti = f.time_index
if time_index is None:
time_index = ti.copy()
else:
check = time_index.equals(ti)
if not check:
bad_files.append(file)
ll = f.lat_lon
if lat_lon is None:
lat_lon = ll.copy()
else:
check = np.allclose(lat_lon, ll)
if not check:
bad_files.append(file)
bad_files = list(set(bad_files))
if bad_files:
msg = ("The following files' coordinates and time-index do not "
"match:\n{}".format(bad_files))
raise ResourceRuntimeError(msg)
[docs]
def close(self):
"""
Close all h5py.File instances
"""
for f in self._h5_map.values():
f.close()
[docs]
class MultiH5Path(MultiH5):
"""
Class to handle multiple h5 file Resources derived from a path
"""
def __init__(self, h5_path, check_files=False):
"""
Parameters
----------
h5_path : str
Unix shell style pattern path with * wildcards to multi-file
resource file sets. Files must have the same time index and
coordinates but can have different datasets.
check_files : bool
Check to ensure files have the same coordinates and time_index
"""
self.h5_path, h5_files = self._get_h5_files(h5_path)
super().__init__(h5_files, check_files=check_files)
def __repr__(self):
msg = ("{} for {}:\n Contains {} files and {} datasets"
.format(self.__class__.__name__, self.h5_path,
len(self), len(self._dset_map)))
return msg
@staticmethod
def _get_h5_files(h5_path):
"""
Parameters
----------
h5_path : str
Unix shell style pattern path with * wildcards to multi-file
resource file sets. Files must have the same time index and
coordinates but can have different datasets.
Returns
-------
h5_path : str
Just like the input except unstupified
file_paths : list
List of full file paths found by matching the h5_path input.
"""
h5_path = unstupify_path(h5_path)
if os.path.isdir(h5_path):
msg = ('h5_path must be a unix shell style pattern with '
'wildcard * in order to find files, but received '
'directory specification: {}'.format(h5_path))
raise FileInputError(msg)
file_paths = glob(h5_path)
if not any(file_paths):
msg = ('Could not find any file paths with pattern: {}'
.format(h5_path))
raise FileInputError(msg)
return h5_path, file_paths
[docs]
class MultiFileResource(AbstractInterpolatedResource):
"""
Class to handle fine spatial resolution resource data stored in
multiple .h5 files
See Also
--------
resource.Resource : Parent class
Examples
--------
Due to the size of the 2018 NSRDB and 5min WTK, datasets are stored in
multiple files. MultiFileResource and it's sub-classes allow for
interaction with all datasets as if they are in a single file.
MultiFileResource can take a directory containing all files to source
data from, or a filepath with a wildcard (*) indicating the filename
format.
>>> file = '$TESTDATADIR/wtk/wtk_2010_*m.h5'
>>> with MultiFileResource(file) as res:
>>> print(self._h5_files)
['$TESTDATADIR/wtk_2010_200m.h5',
'$TESTDATADIR/wtk_2010_100m.h5']
>>> file_100m = '$TESTDATADIR/wtk_2010_100m.h5'
>>> with Resource(file_100m) as res:
>>> print(res.datasets)
['coordinates', 'meta', 'pressure_100m', 'temperature_100m', 'time_index',
'winddirection_100m', 'windspeed_100m']
>>> file_200m = '$TESTDATADIR/wtk_2010_200m.h5'
>>> with Resource(file_200m) as res:
>>> print(res.datasets)
['coordinates', 'meta', 'pressure_200m', 'temperature_200m', 'time_index',
'winddirection_200m', 'windspeed_200m']
>>> with MultiFileResource(file) as res:
>>> print(res.datasets)
['coordinates', 'meta', 'pressure_100m', 'pressure_200m',
'temperature_100m', 'temperature_200m', 'time_index',
'winddirection_100m', 'winddirection_200m', 'windspeed_100m',
'windspeed_200m']
>>> with MultiFileResource(file) as res:
>>> wspd = res['windspeed_100m']
>>>
>>> wspd
[[15.13 15.17 15.21 ... 15.3 15.32 15.31]
[15.09 15.13 15.16 ... 15.26 15.29 15.31]
[15.09 15.12 15.15 ... 15.24 15.23 15.26]
...
[10.29 11.08 11.51 ... 14.43 14.41 14.19]
[11. 11.19 11.79 ... 13.27 11.93 11.8 ]
[12.16 12.44 13.09 ... 11.94 10.88 11.12]]
"""
INTERPOLABLE_DSETS = ["temperature", "pressure", "windspeed",
"winddirection"]
VARIABLE_NAME = "height"
VARIABLE_UNIT = "m"
def __init__(self, h5_source, unscale=True, str_decode=True,
check_files=False, use_lapse_rate=True):
"""
Parameters
----------
h5_source : str | list
Unix shell style pattern path with * wildcards to multi-file
resource file sets. Files must have the same time index and
coordinates but can have different datasets. Can also be an
explicit list of complete filepaths.
unscale : bool
Boolean flag to automatically unscale variables on extraction
str_decode : bool
Boolean flag to decode the bytestring meta data into normal
strings. Setting this to False will speed up the meta data read.
check_files : bool
Check to ensure files have the same coordinates and time_index
use_lapse_rate : bool
If a dataset is only available at a single hub-height and this flag
value is set to `True`, pressure / temperature values will be
calculated using linear lapse rate adjustment from the available
hub height to the requested one. If the flag value is set to
`False`, the value of these variables at the single available
hub-height will be returned for *all* requested heights. This
option has no effect if data is available at multiple hub-heights.
"""
self._unscale = unscale
self._meta = None
self._time_index = None
self._lat_lon = None
self._str_decode = str_decode
self._group = None
# Map variables to their .h5 files
self._h5 = self._init_multi_h5(h5_source, check_files=check_files)
self._h5_files = self._h5.h5_files
self.h5_file = self._h5_files[0]
self._attrs = None
self._shapes = None
self._chunks = None
self._dtypes = None
self._interp_var = None
self._use_lapse = use_lapse_rate
# this is where self.heights or self.depths gets set
self._interpolation_variable = self._parse_interp_var(self.datasets)
prop_name = "{}s".format(self.VARIABLE_NAME)
setattr(self, prop_name, self._interpolation_variable)
def __repr__(self):
msg = "{}".format(self.__class__.__name__)
return msg
@staticmethod
def _init_multi_h5(h5_source, check_files=False):
"""
Initialize MultiH5 handler class based on input type
Parameters
----------
h5_source : str | list
Unix shell style pattern path with * wildcards to multi-file
resource file sets. Files must have the same time index and
coordinates but can have different datasets. Can also be an
explicit list of complete filepaths.
check_files : bool
Check to ensure files have the same coordinates and time_index
Returns
-------
multi_h5 : MultiH5 | MultiH5Path
Initialized multi h5 handler
"""
if isinstance(h5_source, str):
multi_h5 = MultiH5Path(h5_source, check_files=check_files)
elif isinstance(h5_source, (list, tuple)):
multi_h5 = MultiH5(h5_source, check_files=check_files)
else:
msg = ('Cannot initialize MultiH5 from {}, expecting a path or a '
'list of .h5 file paths'.format(type(h5_source)))
raise ResourceRuntimeError(msg)
return multi_h5
[docs]
class MultiFileNSRDB(MultiFileResource, NSRDB):
"""
Class to handle 2018 and beyond NSRDB data that is at 2km and
sub 30 min resolution
See Also
--------
resource.MultiFileResource : Parent class
resource.NSRDB : Parent class
"""
[docs]
@classmethod
def preload_SAM(cls, h5_source, sites, unscale=True, str_decode=True,
tech='pvwattsv7', time_index_step=None, means=False,
clearsky=False, bifacial=False, downscale=None,
check_files=False):
"""
Pre-load project_points for SAM
Parameters
----------
h5_source : str | list
Unix shell style pattern path with * wildcards to multi-file
resource file sets. Files must have the same time index and
coordinates but can have different datasets. Can also be an
explicit list of complete filepaths.
sites : list
List of sites to be provided to SAM
(sites is synonymous with gids aka spatial indices)
unscale : bool
Boolean flag to automatically unscale variables on extraction
str_decode : bool
Boolean flag to decode the bytestring meta data into normal
strings. Setting this to False will speed up the meta data read.
tech : str, optional
SAM technology string, by default 'pvwattsv7'
time_index_step: int, optional
Step size for time_index, used to reduce temporal resolution,
by default None
means : bool, optional
Boolean flag to compute mean resource when res_array is set,
by default False
clearsky : bool
Boolean flag to pull clearsky instead of real irradiance
bifacial : bool
Boolean flag to pull surface albedo for bifacial modeling.
downscale : NoneType | str
Option for NSRDB resource downscaling to higher temporal
resolution. Expects a string in the Pandas frequency format,
e.g. '5min'.
check_files : bool
Check to ensure files have the same coordinates and time_index
Returns
-------
SAM_res : SAMResource
Instance of SAMResource pre-loaded with Solar resource for sites
in project_points
"""
with cls(h5_source, unscale=unscale, str_decode=str_decode,
check_files=check_files) as res:
# pylint: disable=assignment-from-no-return
SAM_res = res._preload_SAM(res, sites, tech=tech,
time_index_step=time_index_step,
means=means, clearsky=clearsky,
bifacial=bifacial, downscale=downscale)
return SAM_res
[docs]
class MultiFileWTK(MultiFileResource, WindResource):
"""
Class to handle 5min WIND Toolkit data
See Also
--------
resource.MultiFileResource : Parent class
resource.WindResource : Parent class
Examples
--------
MultiFileWTK automatically searches for files of the form *m.h5
>>> file = '$TESTDATADIR/wtk'
>>> with MultiFileWTK(file) as res:
>>> print(list(res._h5_files)
>>> print(res.datasets)
['$TESTDATADIR/wtk_2010_200m.h5',
'$TESTDATADIR/wtk_2010_100m.h5']
['coordinates', 'meta', 'pressure_100m', 'pressure_200m',
'temperature_100m', 'temperature_200m', 'time_index',
'winddirection_100m', 'winddirection_200m', 'windspeed_100m',
'windspeed_200m']
MultiFileWTK, like WindResource can interpolate / extrapolate hub-heights
>>> with MultiFileWTK(file) as res:
>>> wspd = res['windspeed_150m']
>>>
>>> wspd
[[16.19 16.25 16.305 ... 16.375 16.39 16.39 ]
[16.15 16.205 16.255001 ... 16.35 16.365 16.39 ]
[16.154999 16.195 16.23 ... 16.335 16.32 16.34 ]
...
[10.965 11.675 12.08 ... 15.18 14.805 14.42 ]
[11.66 11.91 12.535 ... 13.31 12.23 12.335 ]
[12.785 13.295 14.014999 ... 12.205 11.360001 11.64 ]]
"""
[docs]
@classmethod
def preload_SAM(cls, h5_source, sites, hub_heights, unscale=True,
str_decode=True, time_index_step=None, means=False,
require_wind_dir=False, precip_rate=False, icing=False,
check_files=False):
"""
Placeholder for classmethod that will pre-load project_points for SAM
Parameters
----------
h5_source : str | list
Unix shell style pattern path with * wildcards to multi-file
resource file sets. Files must have the same time index and
coordinates but can have different datasets. Can also be an
explicit list of complete filepaths.
sites : list
List of sites to be provided to SAM
(sites is synonymous with gids aka spatial indices)
hub_heights : int | float | list
Hub heights to extract for SAM
unscale : bool
Boolean flag to automatically unscale variables on extraction
str_decode : bool
Boolean flag to decode the bytestring meta data into normal
strings. Setting this to False will speed up the meta data read.
time_index_step: int, optional
Step size for time_index, used to reduce temporal resolution,
by default None
means : bool, optional
Boolean flag to compute mean resource when res_array is set,
by default False
require_wind_dir : bool
Boolean flag as to whether wind direction will be loaded.
precip_rate : bool
Boolean flag as to whether precipitationrate_0m will be preloaded
icing : bool
Boolean flag as to whether icing is analyzed.
This will preload relative humidity.
check_files : bool
Check to ensure files have the same coordinates and time_index
Returns
-------
SAM_res : SAMResource
Instance of SAMResource pre-loaded with Solar resource for sites
in project_points
"""
with cls(h5_source, unscale=unscale, str_decode=str_decode,
check_files=check_files) as res:
# pylint: disable=assignment-from-no-return
SAM_res = res._preload_SAM(res, sites, hub_heights,
time_index_step=time_index_step,
means=means,
require_wind_dir=require_wind_dir,
precip_rate=precip_rate, icing=icing)
return SAM_res
MULTI_FILE_CLASS_MAP = {Resource: MultiFileResource,
NSRDB: MultiFileNSRDB,
SolarResource: MultiFileResource,
WindResource: MultiFileWTK,
WaveResource: MultiFileResource,
GeothermalResource: MultiFileResource,
}