Source code for reVX.offshore.offshore_inputs

# -*- coding: utf-8 -*-
"""
Extract offshore inputs from exclusion layers
"""
import logging
import numpy as np
import pandas as pd
from scipy.ndimage import center_of_mass
from scipy.spatial import cKDTree
from warnings import warn

from reV.handlers.exclusions import ExclusionLayers
from reV.utilities.exceptions import MultiFileExclusionError
from reVX.utilities.utilities import log_versions, coordinate_distance
from rex.resource import Resource
from rex.utilities.utilities import parse_table, get_lat_lon_cols

logger = logging.getLogger(__name__)


[docs]class OffshoreInputs(ExclusionLayers): """ Class to extract offshore inputs from offshore inputs .h5 at desired offshore site gids. Mapping is based on the techmapping dataset (tm_dset). Offshore input values are taken from the array pixel closest to the center of mass of each offshore site gid. """ DEFAULT_INPUT_LAYERS = { 'array_efficiency': 'aeff', 'bathymetry': 'depth', 'dist_to_coast': 'dist_s_to_l', 'assembly_areas': 'dist_a_to_s', 'ports_operations': 'dist_op_to_s', 'ports_construction': 'dist_p_to_s', 'ports_construction_nolimits': 'dist_p_to_s_nolimit', 'weather_downtime_fixed_bottom': 'fixed_downtime', 'weather_downtime_floating': 'floating_downtime', 'weather_downtime_mean_wave_height_bouy': 'hs_average' } def __init__(self, inputs_fpath, offshore_sites, tm_dset='techmap_wtk'): """ Parameters ---------- inputs_fpath : str Path to offshore inputs .h5 file offshore_sites : str | list | tuple | ndarray |pandas.DataFrame - Path to .csv|.json file with offshore sites meta data - Path to a WIND Toolkit .h5 file to extact site meta from - List, tuple, or vector of offshore gids - Pre-extracted site meta DataFrame tm_dset : str, optional Dataset / layer name for wind toolkit techmap, by default 'techmap_wtk' """ log_versions(logger) super().__init__(inputs_fpath) self._offshore_meta = self._create_offshore_meta(offshore_sites, tm_dset) def __repr__(self): msg = "{} from {}".format(self.__class__.__name__, self.inputs_fpath) return msg @property def inputs_fpath(self): """ .h5 file containing offshore input layers Returns ------- str """ return self.h5_file @property def meta(self): """ Offshore site meta data including mapping to input layer row and column index Returns ------- pandas.DataFrame """ return self._offshore_meta @property def lat_lons(self): """ Offshore sites coordinates (lat, lons) Returns ------- ndarray """ lat_lon_cols = get_lat_lon_cols(self.meta) return self.meta[lat_lon_cols].values @property def row_ids(self): """ Input layer array row ids that correspond to desired offshore sites Returns ------- ndarray """ return self.meta['row_idx'].values @property def column_ids(self): """ Input layer array column ids that correspond to desired offshore sites Returns ------- ndarray """ return self.meta['col_idx'].values @staticmethod def _parse_offshore_sites(offshore_sites): """ Load offshore sites from disc if needed Parameters ---------- offshore_sites : str | list | tuple | ndarray |pandas.DataFrame - Path to .csv|.json file with offshore sites meta data - Path to a WIND Toolkit .h5 file to extact site meta from - List, tuple, or vector of offshore gids - Pre-extracted site meta DataFrame Returns ------- offshore_sites : pandas.DataFrame Offshore sites meta data """ if isinstance(offshore_sites, str): if offshore_sites.endswith('.h5'): with Resource(offshore_sites) as f: offshore_sites = f.meta if offshore_sites.index.name == 'gid': offshore_sites = offshore_sites.reset_index() else: offshore_sites = parse_table(offshore_sites) elif isinstance(offshore_sites, (tuple, list, np.ndarray)): offshore_sites = pd.DataFrame({'gid': offshore_sites}) if not isinstance(offshore_sites, pd.DataFrame): msg = ("offshore sites must be a .csv, .json, or .h5 file path, " "or a pre-extracted pandas DataFrame, but {} was provided" .format(offshore_sites)) logger.error(msg) raise ValueError(msg) if 'offshore' in offshore_sites: mask = offshore_sites['offshore'] == 1 offshore_sites = offshore_sites.loc[mask] return offshore_sites @classmethod def _parse_input_layers(cls, input_layers=None): """ Parse offshore inputs to extract from .h5 exclusion layers. "input_layers" can be: - A single layer to extract - A list of layers to extract - A dictionary with the output column name mapped to the layer to extract Parameters ---------- input_layers : str | list | dict Input layer, list of input layers, to extract, or dictionary mapping the input layers to extract to the column names to save them under Returns ------- dict Dictionary mapping the column name to the layer to extract """ msg = '' if input_layers is None: input_layers = cls.DEFAULT_INPUT_LAYERS msg += '"input_layers" not provided, using defaults. ' else: if isinstance(input_layers, str): input_layers = [input_layers] if isinstance(input_layers, (tuple, list, np.ndarray)): input_layers = {layer: layer for layer in input_layers} if not isinstance(input_layers, dict): msg = ('Expecting "layers" to be a the name of a single input ' 'layer, a list of input layers, or a dictionary mapping ' 'desired input layers to desired output column names, but ' 'recieved: {}'.format(type(input_layers))) logger.error(msg) raise TypeError(msg) msg += 'Extracting {}'.format(input_layers) logger.info(msg) return input_layers def _preflight_multi_file(self): """Run simple multi-file exclusion checks.""" lat_shape = self.h5.shapes['latitude'] lon_shape = self.h5.shapes['longitude'] for layer in self.layers: if layer not in ['assembly_areas', 'array_efficiency']: lshape = self.h5.shapes[layer] lshape = lshape[1:] if len(lshape) > 2 else lshape if lshape != lon_shape or lshape != lat_shape: msg = ('Shape of layer "{}" is {} which does not match ' 'latitude and longitude shapes of {} and {}. ' 'Check your exclusion file inputs: {}' .format(layer, self.h5.shapes[layer], lat_shape, lon_shape, self.h5._h5_files)) logger.error(msg) raise MultiFileExclusionError(msg) check_attrs = ('height', 'width', 'crs', 'transform') base_profile = {} for fp in self.h5_file: with ExclusionLayers(fp) as f: if not base_profile: base_profile = f.profile else: for attr in check_attrs: if attr not in base_profile or attr not in f.profile: msg = ('Multi-file exclusion inputs from {} ' 'dont have profiles with height, width, ' 'crs, and transform: {} and {}' .format(self.h5_file, base_profile, f.profile)) logger.error(msg) raise MultiFileExclusionError(msg) if base_profile[attr] != f.profile[attr]: msg = ('Multi-file exclusion inputs from {} ' 'dont have matching "{}": {} and {}' .format(self.h5_file, attr, base_profile[attr], f.profile[attr])) logger.error(msg) raise MultiFileExclusionError(msg) def _reduce_tech_map(self, tm_dset='techmap_wtk', offshore_gids=None, offshore_dset='dist_to_coast'): """ Find the row and column indices that correspond to the centriod of each offshore gid in exclusions layers. If offshore gids are not provided the centroid of every gid is in techmap. Parameters ---------- inputs_fpath : str Path to offshore inputs .h5 file tm_dset : str, optional Dataset / layer name for wind toolkit techmap, by default 'techmap_wtk' offshore_gids : ndarray | list, optional Vector or list of offshore gids, by default None offshore_dset : str, optional Exclusions layer to differentiate between onshore and offshore pixels, by default 'dist_to_coast' Returns ------- tech_map : pandas.DataFrame DataFrame mapping resource gid to exclusions latitude, longitude, row index, column index """ tech_map = self[tm_dset] # exclude onshore pixels tech_map[self[offshore_dset] <= 0] = -1 gids = np.unique(tech_map) if offshore_gids is None: offshore_gids = gids[gids != -1] else: missing = ~np.isin(offshore_gids, gids) if np.any(missing): msg = ('The following offshore gids were requested but are ' 'not availabe in {} and will not be extracted:\n{}' .format(tm_dset, offshore_gids[missing])) logger.warning(msg) warn(msg) offshore_gids = offshore_gids[~missing] # Increment techmap and gids by 1 as center of mass cannot use an # index of 0 tech_map += 1 offshore_gids += 1 tech_map = np.array(center_of_mass(tech_map, labels=tech_map, index=offshore_gids), dtype=np.uint32) tech_map = pd.DataFrame(tech_map, columns=['row_idx', 'col_idx']) tech_map['gid'] = offshore_gids - 1 return tech_map def _create_offshore_meta(self, offshore_sites, tm_dset='techmap_wtk'): """ Create offshore meta from offshore sites and techmap Parameters ---------- offshore_sites : str | pandas.DataFrame Path to .csv file with offshore sites or offshore meta, or path to a .h5 file to extact site meta from, or pre-extracted site meta DataFrame tm_dset : str, optional Dataset / layer name for wind toolkit techmap, by default 'techmap_wtk' Returns ------- offshore_meta : pandas.DataFrame Offshore sites meta data including mapping to input layers """ offshore_sites = self._parse_offshore_sites(offshore_sites) if 'gid' not in offshore_sites: msg = ('Cannot find offshore WIND Toolkit "gid"s of interest! ' 'Offshore sites input must have a "gid" column: {}' .format(list(offshore_sites.columns))) logger.error(msg) raise RuntimeError(msg) offshore_gids = offshore_sites['gid'].values.astype(np.int32) tech_map = self._reduce_tech_map(tm_dset=tm_dset, offshore_gids=offshore_gids) offshore_meta = pd.merge(offshore_sites, tech_map, on='gid', how='inner') return offshore_meta
[docs] def compute_assembly_dist(self, layer): """ Extract the distance from ports to assembly area and then compute the distance from nearest assembly area to sites Parameters ---------- layer : str Name of assembly area table/dataset Returns ------- out : dict Dictionary containing the distance from ports to assembly areas ('dist_p_to_a') and distance from nearest assembly area to sites ('dist_a_to_s') """ assembly_areas = pd.DataFrame(self.h5[layer]) assembly_areas = self.h5.df_str_decode(assembly_areas) lat_lon_cols = get_lat_lon_cols(assembly_areas) area_coords = assembly_areas[lat_lon_cols].values.astype(np.float32) # pylint: disable = not-callable tree = cKDTree(area_coords) site_lat_lons = self.lat_lons _, pos = tree.query(site_lat_lons) assert len(pos) == len(site_lat_lons) out = {'dist_p_to_a': np.zeros(len(site_lat_lons), dtype=np.float32), 'dist_a_to_s': np.zeros(len(site_lat_lons), dtype=np.float32)} for i, area in assembly_areas.iterrows(): logger.debug("Computing distance from assembly area {} to " "all offshore sites".format(area)) a_pos = np.where(pos == i)[0] # extract distance from ports to assembly areas out['dist_p_to_a'][a_pos] = area['dist_p_to_a'] # compute distance from assembly areas to sites area_coords = area[lat_lon_cols].values.astype(np.float32) area_coords = np.expand_dims(area_coords, 0) out['dist_a_to_s'][a_pos] = coordinate_distance( area_coords, site_lat_lons[a_pos]) return out
[docs] def extract_array_efficiency(self, layer): """ Extract array efficiency Parameters ---------- layer : str Name of array efficiency table/dataset Returns ------- aeff : ndarray Vector of array efficiency 'aeff' values for desired offshore sites """ aeff = pd.DataFrame(self.h5[layer]) aeff = self.h5.df_str_decode(aeff) lat_lon_cols = get_lat_lon_cols(aeff) aeff_coords = aeff[lat_lon_cols].values.astype(np.float32) # pylint: disable = not-callable tree = cKDTree(aeff_coords) site_lat_lons = self.lat_lons _, pos = tree.query(site_lat_lons) assert len(pos) == len(site_lat_lons) aeff = aeff['aeff'].values.astype(np.float32) return aeff[pos]
[docs] def extract_input_layer(self, layer): """ Extract input data for desired layer Parameters ---------- layer : str Desired input layer Returns ------- data : ndarray Input layer data for desired offshore sites """ data = self[layer, self.row_ids, self.column_ids] return data
[docs] def get_offshore_inputs(self, input_layers=None, downtime_tolerance=0.01): """ Extract data for the desired layers Parameters ---------- input_layers : str | list | dict Input layer, list of input layers, to extract, or dictionary mapping the input layers to extract to the column names to save them under, by default None downtime_tolerance : float Amount to shift weather downtime layer by so they are > 0 and/or < 1, by default 0.01 Returns ------- out : pandas.DataFrame Updated meta data table with desired layers """ input_layers = self._parse_input_layers(input_layers=input_layers) out = self.meta.copy() for layer, col in input_layers.items(): if layer not in self: msg = ("{} is not a valid offshore input layers, please " "choice one of: {}".format(layer, self.layers)) logger.error(msg) raise KeyError(msg) if layer.startswith('assembly'): for col, data in self.compute_assembly_dist(layer).items(): out[col] = data elif layer.startswith('array'): out[col] = self.extract_array_efficiency(layer) else: layer_out = self.extract_input_layer(layer) if layer == 'bathymetry': layer_out = np.abs(layer_out) elif layer == 'dist_to_coast': layer_out /= 1000 elif layer.startswith('weather_downtime'): layer_out[layer_out <= 0] = downtime_tolerance if 'wave_height' not in layer: layer_out[layer_out >= 1] = 1 - downtime_tolerance out[col] = layer_out return out
[docs] @classmethod def extract(cls, inputs_fpath, offshore_sites, tm_dset='techmap_wtk', input_layers=None, downtime_tolerance=0.01, out_fpath=None): """ Extract data from desired input layers for desired offshore sites Parameters ---------- inputs_fpath : str Path to offshore inputs .h5 file offshore_sites : str | list | tuple | ndarray |pandas.DataFrame - Path to .csv|.json file with offshore sites meta data - Path to a WIND Toolkit .h5 file to extact site meta from - List, tuple, or vector of offshore gids - Pre-extracted site meta DataFrame tm_dset : str, optional Dataset / layer name for wind toolkit techmap, by default 'techmap_wtk' input_layers : str | list | dict Input layer, list of input layers, to extract, or dictionary mapping the input layers to extract to the column names to save them under, by default None downtime_tolerance : float Amount to shift weather downtime layer by so they are > 0 and/or < 1, by default 0.01 out_fpath : str, optional Output .csv path to save offshore inputs too, by default None Returns ------- out : pandas.DataFrame Updated meta data table with desired layers """ with cls(inputs_fpath, offshore_sites, tm_dset=tm_dset) as off_ipt: out = off_ipt.get_offshore_inputs( input_layers=input_layers, downtime_tolerance=downtime_tolerance) if out_fpath: out.to_csv(out_fpath, index=False) return out