Source code for reV.hybrids.hybrids

# -*- coding: utf-8 -*-
"""reV Hybridization module.

@author: ppinchuk
"""

import logging
import re
from collections import namedtuple
from string import ascii_letters
from warnings import warn

import numpy as np
import pandas as pd
from rex.resource import Resource
from rex.utilities.utilities import to_records_array

from reV.handlers.outputs import Outputs
from reV.hybrids.hybrid_methods import HYBRID_METHODS
from reV.utilities import SupplyCurveField
from reV.utilities.exceptions import (
    FileInputError,
    InputError,
    InputWarning,
    OutputWarning,
)

logger = logging.getLogger(__name__)

MERGE_COLUMN = SupplyCurveField.SC_POINT_GID
PROFILE_DSET_REGEX = 'rep_profiles_[0-9]+$'
SOLAR_PREFIX = 'solar_'
WIND_PREFIX = 'wind_'
NON_DUPLICATE_COLS = {
    SupplyCurveField.LATITUDE, SupplyCurveField.LONGITUDE,
    SupplyCurveField.COUNTRY, SupplyCurveField.STATE, SupplyCurveField.COUNTY,
    SupplyCurveField.ELEVATION, SupplyCurveField.TIMEZONE,
    SupplyCurveField.SC_POINT_GID, SupplyCurveField.SC_ROW_IND,
    SupplyCurveField.SC_COL_IND
}
HYBRIDS_GID_COL = "gid"
DEFAULT_FILL_VALUES = {f'solar_{SupplyCurveField.CAPACITY_AC_MW}': 0,
                       f'wind_{SupplyCurveField.CAPACITY_AC_MW}': 0,
                       f'solar_{SupplyCurveField.MEAN_CF_AC}': 0,
                       f'wind_{SupplyCurveField.MEAN_CF_AC}': 0}
OUTPUT_PROFILE_NAMES = ['hybrid_profile',
                        'hybrid_solar_profile',
                        'hybrid_wind_profile']
RatioColumns = namedtuple('RatioColumns', ['num', 'denom', 'fixed'],
                          defaults=(None, None, None))


[docs]class ColNameFormatter: """Column name formatting helper class.""" ALLOWED = set(ascii_letters)
[docs] @classmethod def fmt(cls, n): """Format an input column name to remove excess chars and whitespace. This method should help facilitate the merging of column names between two DataFrames. Parameters ---------- n : str Input column name. Returns ------- str The column name with all characters except ascii stripped and all lowercase. """ return "".join(c for c in n if c in cls.ALLOWED).lower()
[docs]class HybridsData: """Hybrids input data container.""" def __init__(self, solar_fpath, wind_fpath): """ Parameters ---------- solar_fpath : str Filepath to rep profile output file to extract solar profiles and summaries from. wind_fpath : str Filepath to rep profile output file to extract wind profiles and summaries from. """ self.solar_fpath = solar_fpath self.wind_fpath = wind_fpath self.profile_dset_names = [] self.merge_col_overlap_values = set() self._solar_meta = None self._wind_meta = None self._solar_time_index = None self._wind_time_index = None self._hybrid_time_index = None self.__profile_reg_check = re.compile(PROFILE_DSET_REGEX) self.__solar_cols = self.solar_meta.columns.map(ColNameFormatter.fmt) self.__wind_cols = self.wind_meta.columns.map(ColNameFormatter.fmt) @property def solar_meta(self): """Summary for the solar representative profiles. Returns ------- solar_meta : pd.DataFrame Summary for the solar representative profiles. """ if self._solar_meta is None: with Resource(self.solar_fpath) as res: self._solar_meta = res.meta return self._solar_meta @property def wind_meta(self): """Summary for the wind representative profiles. Returns ------- wind_meta : pd.DataFrame Summary for the wind representative profiles. """ if self._wind_meta is None: with Resource(self.wind_fpath) as res: self._wind_meta = res.meta return self._wind_meta @property def solar_time_index(self): """Get the time index for the solar rep profiles. Returns ------- solar_time_index : pd.datetimeindex Time index sourced from the solar reV gen file. """ if self._solar_time_index is None: with Resource(self.solar_fpath) as res: self._solar_time_index = res.time_index return self._solar_time_index @property def wind_time_index(self): """Get the time index for the wind rep profiles. Returns ------- wind_time_index : pd.datetimeindex Time index sourced from the wind reV gen file. """ if self._wind_time_index is None: with Resource(self.wind_fpath) as res: self._wind_time_index = res.time_index return self._wind_time_index @property def hybrid_time_index(self): """Get the time index for the hybrid rep profiles. Returns ------- hybrid_time_index : pd.datetimeindex Time index for the hybrid rep profiles. """ if self._hybrid_time_index is None: self._hybrid_time_index = self.solar_time_index.join( self.wind_time_index, how="inner" ) return self._hybrid_time_index
[docs] def contains_col(self, col_name): """Check if input column name exists in either meta data set. Parameters ---------- col_name : str Name of column to check for. Returns ------- bool Whether or not the column is found in either meta data set. """ fmt_name = ColNameFormatter.fmt(col_name) col_in_solar = fmt_name in self.__solar_cols col_in_wind = fmt_name in self.__wind_cols return col_in_solar or col_in_wind
[docs] def validate(self): """Validate the input data. This method checks for a minimum time index length, a unique profile, and unique merge column that overlaps between both data sets. """ self._validate_time_index() self._validate_num_profiles() self._validate_merge_col_exists() self._validate_unique_merge_col() self._validate_merge_col_overlaps()
def _validate_time_index(self): """Validate the hybrid time index to be of len >= 8760. Raises ------ FileInputError If len(time_index) < 8760 for the hybrid profile. """ if len(self.hybrid_time_index) < 8760: msg = ( "The length of the merged time index ({}) is less than " "8760. Please ensure that the input profiles have a " "time index that overlaps >= 8760 times." ) e = msg.format(len(self.hybrid_time_index)) logger.error(e) raise FileInputError(e) def _validate_num_profiles(self): """Validate the number of input profiles. Raises ------ FileInputError If # of rep_profiles > 1. """ for fp in [self.solar_fpath, self.wind_fpath]: with Resource(fp) as res: profile_dset_names = [ n for n in res.dsets if self.__profile_reg_check.match(n) ] if not profile_dset_names: msg = ( "Did not find any data sets matching the regex: " "{!r} in {!r}. Please ensure that the profile data " "exists and that the data set is named correctly." ) e = msg.format(PROFILE_DSET_REGEX, fp) logger.error(e) raise FileInputError(e) if len(profile_dset_names) > 1: msg = ("Found more than one profile in {!r}: {}. " "This module is not intended for hybridization of " "multiple representative profiles. Please re-run " "on a single aggregated profile.") e = msg.format(fp, profile_dset_names) logger.error(e) raise FileInputError(e) self.profile_dset_names += profile_dset_names def _validate_merge_col_exists(self): """Validate the existence of the merge column. Raises ------ FileInputError If merge column is missing from either the solar or the wind meta data. """ msg = ( "Cannot hybridize: merge column {!r} missing from the " "{} meta data! ({!r})" ) mc = ColNameFormatter.fmt(MERGE_COLUMN) for cols, fp, res in zip( [self.__solar_cols, self.__wind_cols], [self.solar_fpath, self.wind_fpath], ["solar", "wind"], ): if mc not in cols: e = msg.format(MERGE_COLUMN, res, fp) logger.error(e) raise FileInputError(e) def _validate_unique_merge_col(self): """Validate the existence of unique values in the merge column. Raises ------ FileInputError If merge column contains duplicate values in either the solar or the wind meta data. """ msg = ( "Duplicate {}s were found. This is likely due to resource " "class binning, which is not supported at this time. " "Please re-run supply curve aggregation without " "resource class binning and ensure there are no duplicate " "values in {!r}. File: {!r}" ) mc = ColNameFormatter.fmt(MERGE_COLUMN) for ds, cols, fp in zip( [self.solar_meta, self.wind_meta], [self.__solar_cols, self.__wind_cols], [self.solar_fpath, self.wind_fpath], ): merge_col = ds.columns[cols == mc].item() if not ds[merge_col].is_unique: e = msg.format(merge_col, merge_col, fp) logger.error(e) raise FileInputError(e) def _validate_merge_col_overlaps(self): """Validate the existence of overlap in the merge column values. Raises ------ FileInputError If merge column values do not overlap between the tow input files. """ mc = ColNameFormatter.fmt(MERGE_COLUMN) merge_col = self.solar_meta.columns[self.__solar_cols == mc].item() solar_vals = set(self.solar_meta[merge_col].values) merge_col = self.wind_meta.columns[self.__wind_cols == mc].item() wind_vals = set(self.wind_meta[merge_col].values) self.merge_col_overlap_values = solar_vals & wind_vals if not self.merge_col_overlap_values: msg = ( "No overlap detected in the values of {!r} across the " "input files. Please ensure that at least one of the " "{!r} values is the same for input files {!r} and {!r}" ) e = msg.format( merge_col, merge_col, self.solar_fpath, self.wind_fpath ) logger.error(e) raise FileInputError(e)
[docs]class MetaHybridizer: """Framework to handle hybridization of meta data.""" _INTERNAL_COL_PREFIX = "_h_internal" def __init__( self, data, allow_solar_only=False, allow_wind_only=False, fillna=None, limits=None, ratio_bounds=None, ratio="solar_capacity/wind_capacity", ): """ Parameters ---------- data : `HybridsData` Instance of `HybridsData` containing input data to hybridize. allow_solar_only : bool, optional Option to allow SC points with only solar capacity (no wind). By default, ``False``. allow_wind_only : bool, optional Option to allow SC points with only wind capacity (no solar), By default, ``False``. fillna : dict, optional Dictionary containing column_name, fill_value pairs representing any fill values that should be applied after merging the wind and solar meta. Note that column names will likely have to be prefixed with ``solar`` or ``wind``. By default, ``None``. limits : dict, optional Option to specify mapping (in the form of a dictionary) of {colum_name: max_value} representing the upper limit (maximum value) for the values of a column in the merged meta. For example, `limits={'solar_capacity': 100}` would limit all the values of the solar capacity in the merged meta to a maximum value of 100. This limit is applied *BEFORE* ratio calculations. The names of the columns should match the column names in the merged meta, so they are likely prefixed with ``solar`` or ``wind`. By default, ``None`` (no limits applied). ratio_bounds : tuple, optional Option to set ratio bounds (in two-tuple form) on the columns of the `ratio` input. For example, `ratio_bounds=(0.5, 1.5)` would adjust the values of both of the `ratio` columns such that their ratio is always between half and double (e.g., no value would be more than double the other). To specify a single ratio value, use the same value as the upper and lower bound. For example, `ratio_bounds=(1, 1)` would adjust the values of both of the `ratio` columns such that their ratio is always equal. By default, ``None`` (no limit on the ratio). ratio : str, optional Option to specify the columns used to calculate the ratio that is limited by the `ratio_bounds` input. This input is a string in the form "numerator_column_name/denominator_column_name". For example, `ratio='solar_capacity/wind_capacity'` would limit the ratio of the solar to wind capacities as specified by the `ratio_bounds` input. If `ratio_bounds` is ``None``, this input does nothing. The names of the columns should be prefixed with one of the prefixes defined as class variables. By default ``'solar_capacity/wind_capacity'``. """ self.data = data self._allow_solar_only = allow_solar_only self._allow_wind_only = allow_wind_only self._fillna = {**DEFAULT_FILL_VALUES, **(fillna or {})} self._limits = limits or {} self._ratio_bounds = ratio_bounds self._ratio = ratio self._hybrid_meta = None self.__hybrid_meta_cols = None self.__col_name_map = None self.__solar_rpi_n = "{}_solar_rpidx".format(self._INTERNAL_COL_PREFIX) self.__wind_rpi_n = "{}_wind_rpidx".format(self._INTERNAL_COL_PREFIX) @property def hybrid_meta(self): """Hybridized summary for the representative profiles. Returns ------- hybrid_meta : pd.DataFrame Summary for the hybridized representative profiles. At the very least, this has a column that the data was merged on. """ if self._hybrid_meta is None or self.__hybrid_meta_cols is None: return self._hybrid_meta return self._hybrid_meta[self.__hybrid_meta_cols]
[docs] def validate_input(self): """Validate the input parameters. This method validates that the input limit, fill, and ratio columns are formatted correctly. """ self._validate_limits_cols_prefixed() self._validate_fillna_cols_prefixed() self._validate_ratio_input()
def _validate_limits_cols_prefixed(self): """Ensure the limits columns are formatted correctly. This check is important because the limiting happens after the meta has been merged (so columns are already prefixed), but before the hybrid columns are computed. As a result, the limits columns _must_ have a valid prefix. Raises ------ InputError If limits columns are not prefixed correctly. """ for col in self._limits: self.__validate_col_prefix( col, (SOLAR_PREFIX, WIND_PREFIX), input_name="limits" ) @staticmethod def __validate_col_prefix(col, prefixes, input_name): """Validate the the col starts with the correct prefix.""" missing = [not col.startswith(p) for p in prefixes] if all(missing): msg = ( "Input {0} column {1!r} does not start with a valid " "prefix: {2!r}. Please ensure that the {0} column " "names specify the correct resource prefix." ) e = msg.format(input_name, col, prefixes) logger.error(e) raise InputError(e) def _validate_fillna_cols_prefixed(self): """Ensure the fillna columns are formatted correctly. This check is important because the fillna step happens after the meta has been merged (so columns are already prefixed), but before the hybrid columns are computed. As a result, the fillna columns _must_ have a valid prefix. Raises ------ InputError If fillna columns are not prefixed correctly. """ for col in self._fillna: self.__validate_col_prefix( col, (SOLAR_PREFIX, WIND_PREFIX), input_name="fillna" ) def _validate_ratio_input(self): """Validate the ratio input parameters. This method validates that the input ratio columns are formatted correctly and exist in the input data. It also verifies that the `ratio_bounds` is correctly formatted. """ if self._ratio_bounds is None: return self._validate_ratio_bounds() self._validate_ratio_type() self._validate_ratio_format() self._validate_ratio_cols_prefixed() self._validate_ratio_cols_exist() def _validate_ratio_bounds(self): """Ensure the ratio value is input correctly. Raises ------ InputError If ratio is not a len 2 container of floats. """ try: if len(self._ratio_bounds) != 2: msg = ( "Length of input for ratio_bounds is {} - but is " "required to be of length 2. Please make sure this " "input is a len 2 container of floats. If you would " "like to specify a single ratio value, use the same " "float for both limits (i.e. ratio_bounds=(1, 1))." ) e = msg.format(len(self._ratio_bounds)) logger.error(e) raise InputError(e) except TypeError: msg = ( "Input for ratio_bounds not understood: {!r}. " "Please make sure this value is a len 2 container " "of floats." ) e = msg.format(self._ratio_bounds) logger.error(e) raise InputError(e) from None def _validate_ratio_type(self): """Ensure that the ratio input is a string. Raises ------ InputError If `ratio` is not a string. """ if not isinstance(self._ratio, str): msg = ( "Ratio input type {} not understood. Please make sure " "the ratio input is a string in the form " "'numerator_column_name/denominator_column_name'. Ratio " "input: {!r}" ) e = msg.format(type(self._ratio), self._ratio) logger.error(e) raise InputError(e) def _validate_ratio_format(self): """Validate that the ratio input format is correct and can be parsed. Raises ------ InputError If the '/' character is missing or of there are too many '/' characters. """ if "/" not in self._ratio: msg = ( "Ratio input {} does not contain the '/' character. " "Please make sure the ratio input is a string in the form " "'numerator_column_name/denominator_column_name'" ) e = msg.format(self._ratio) logger.error(e) raise InputError(e) if len(self._ratio_cols) != 2: msg = ( "Ratio input {} contains too many '/' characters. Please " "make sure the ratio input is a string in the form " "'numerator_column_name/denominator_column_name'." ) e = msg.format(self._ratio) logger.error(e) raise InputError(e) def _validate_ratio_cols_prefixed(self): """Ensure the ratio columns are formatted correctly. This check is important because the ratio limit step happens after the meta has been merged (so columns are already prefixed), but before the hybrid columns are computed. As a result, the ratio columns _must_ have a valid prefix. Raises ------ InputError If ratio columns are not prefixed correctly. """ for col in self._ratio_cols: self.__validate_col_prefix( col, (SOLAR_PREFIX, WIND_PREFIX), input_name="ratios" ) def _validate_ratio_cols_exist(self): """Ensure the ratio columns exist if a ratio is specified. Raises ------ FileInputError If ratio columns are not found in the meta data. """ for col in self._ratio_cols: no_prefix_name = "_".join(col.split("_")[1:]) if not self.data.contains_col(no_prefix_name): msg = ( "Input ratios column {!r} not found in either meta " "data! Please check the input files {!r} and {!r}" ) e = msg.format( no_prefix_name, self.data.solar_fpath, self.data.wind_fpath ) logger.error(e) raise FileInputError(e) @property def _ratio_cols(self): """Get the ratio columns from the ratio input.""" if self._ratio is None: return [] return self._ratio.strip().split("/")
[docs] def hybridize(self): """Combine the solar and wind metas and run hybridize methods.""" self._format_meta_pre_merge() self._merge_solar_wind_meta() self._verify_lat_lon_match_post_merge() self._format_meta_post_merge() self._fillna_meta_cols() self._apply_limits() self._limit_by_ratio() self._add_hybrid_cols() self._sort_hybrid_meta_cols()
def _format_meta_pre_merge(self): """Prepare solar and wind meta for merging.""" self.__col_name_map = { ColNameFormatter.fmt(c): c for c in self.data.solar_meta.columns.values } self._rename_cols(self.data.solar_meta, prefix=SOLAR_PREFIX) self._rename_cols(self.data.wind_meta, prefix=WIND_PREFIX) self._save_rep_prof_index_internally() @staticmethod def _rename_cols(df, prefix): """Replace column names with the ColNameFormatter.fmt is needed.""" df.columns = [ ColNameFormatter.fmt(col_name) if col_name in NON_DUPLICATE_COLS else "{}{}".format(prefix, col_name) for col_name in df.columns.values ] def _save_rep_prof_index_internally(self): """Save rep profiles index in hybrid meta for access later.""" self.data.solar_meta[self.__solar_rpi_n] = self.data.solar_meta.index self.data.wind_meta[self.__wind_rpi_n] = self.data.wind_meta.index def _merge_solar_wind_meta(self): """Merge the wind and solar meta DataFrames.""" self._hybrid_meta = self.data.solar_meta.merge( self.data.wind_meta, on=ColNameFormatter.fmt(MERGE_COLUMN), suffixes=[None, "_x"], how=self._merge_type(), ) def _merge_type(self): """Determine the type of merge to use for meta based on user input.""" if self._allow_solar_only and self._allow_wind_only: return 'outer' if self._allow_solar_only and not self._allow_wind_only: return 'left' if not self._allow_solar_only and self._allow_wind_only: return 'right' return 'inner' def _format_meta_post_merge(self): """Format hybrid meta after merging.""" duplicate_cols = [n for n in self._hybrid_meta.columns if "_x" in n] self._propagate_duplicate_cols(duplicate_cols) self._drop_cols(duplicate_cols) self._hybrid_meta.rename(self.__col_name_map, inplace=True, axis=1) self._hybrid_meta.index.name = HYBRIDS_GID_COL def _propagate_duplicate_cols(self, duplicate_cols): """Fill missing column values from outer merge.""" for duplicate in duplicate_cols: no_suffix = "_".join(duplicate.split("_")[:-1]) null_idx = self._hybrid_meta[no_suffix].isnull() non_null_vals = self._hybrid_meta.loc[null_idx, duplicate].values self._hybrid_meta.loc[null_idx, no_suffix] = non_null_vals def _drop_cols(self, duplicate_cols): """Drop any remaining duplicate and 'HYBRIDS_GID_COL' columns.""" self._hybrid_meta.drop( duplicate_cols + [HYBRIDS_GID_COL], axis=1, inplace=True, errors="ignore", ) def _sort_hybrid_meta_cols(self): """Sort the columns of the hybrid meta.""" self.__hybrid_meta_cols = sorted( [ c for c in self._hybrid_meta.columns if not c.startswith(self._INTERNAL_COL_PREFIX) ], key=self._column_sorting_key, ) def _column_sorting_key(self, c): """Helper function to sort hybrid meta columns.""" first_index = 0 if c.startswith("hybrid"): first_index = 1 elif c.startswith("solar"): first_index = 2 elif c.startswith("wind"): first_index = 3 elif c == MERGE_COLUMN: first_index = -1 return first_index, self._hybrid_meta.columns.get_loc(c) def _verify_lat_lon_match_post_merge(self): """Verify that all the lat/lon values match post merge.""" lat = self._verify_col_match_post_merge( col_name=ColNameFormatter.fmt(SupplyCurveField.LATITUDE) ) lon = self._verify_col_match_post_merge( col_name=ColNameFormatter.fmt(SupplyCurveField.LONGITUDE) ) if not lat or not lon: msg = ( "Detected mismatched coordinate values (latitude or " "longitude) post merge. Please ensure that all matching " "values of {!r} correspond to the same values of latitude " "and longitude across the input files {!r} and {!r}" ) e = msg.format( MERGE_COLUMN, self.data.solar_fpath, self.data.wind_fpath ) logger.error(e) raise FileInputError(e) def _verify_col_match_post_merge(self, col_name): """Verify that all (non-null) values in a column match post merge.""" c1, c2 = col_name, '{}_x'.format(col_name) if c1 in self._hybrid_meta.columns and c2 in self._hybrid_meta.columns: compare_df = self._hybrid_meta[ (self._hybrid_meta[c1].notnull()) & (self._hybrid_meta[c2].notnull()) ] return np.allclose(compare_df[c1], compare_df[c2]) return True def _fillna_meta_cols(self): """Fill N/A values as specified by user (and internals).""" for col_name, fill_value in self._fillna.items(): if col_name in self._hybrid_meta.columns: self._hybrid_meta[col_name].fillna(fill_value, inplace=True) else: self.__warn_missing_col(col_name, action="fill") self._hybrid_meta[self.__solar_rpi_n].fillna(-1, inplace=True) self._hybrid_meta[self.__wind_rpi_n].fillna(-1, inplace=True) @staticmethod def __warn_missing_col(col_name, action): """Warn that a column the user request an action for is missing.""" msg = ("Skipping {} values for {!r}: Unable to find column " "in hybrid meta. Did you forget to prefix with " "{!r} or {!r}? ") w = msg.format(action, col_name, SOLAR_PREFIX, WIND_PREFIX) logger.warning(w) warn(w, InputWarning) def _apply_limits(self): """Clip column values as specified by user.""" for col_name, max_value in self._limits.items(): if col_name in self._hybrid_meta.columns: self._hybrid_meta[col_name].clip(upper=max_value, inplace=True) else: self.__warn_missing_col(col_name, action="limit") def _limit_by_ratio(self): """Limit the given pair of ratio columns based on input ratio.""" if self._ratio_bounds is None: return numerator_col, denominator_col = self._ratio_cols min_ratio, max_ratio = sorted(self._ratio_bounds) overlap_idx = self._hybrid_meta[MERGE_COLUMN].isin( self.data.merge_col_overlap_values ) numerator_vals = self._hybrid_meta[numerator_col].copy() denominator_vals = self._hybrid_meta[denominator_col].copy() ratios = ( numerator_vals.loc[overlap_idx] / denominator_vals.loc[overlap_idx] ) ratio_too_low = (ratios < min_ratio) & overlap_idx ratio_too_high = (ratios > max_ratio) & overlap_idx numerator_vals.loc[ratio_too_high] = ( denominator_vals.loc[ratio_too_high].values * max_ratio ) denominator_vals.loc[ratio_too_low] = ( numerator_vals.loc[ratio_too_low].values / min_ratio ) h_num_name = "hybrid_{}".format(numerator_col) h_denom_name = "hybrid_{}".format(denominator_col) self._hybrid_meta[h_num_name] = numerator_vals.values self._hybrid_meta[h_denom_name] = denominator_vals.values def _add_hybrid_cols(self): """Add new hybrid columns using registered hybrid methods.""" for new_col_name, method in HYBRID_METHODS.items(): out = method(self) if out is not None: try: self._hybrid_meta[new_col_name] = out except ValueError as e: msg = ( "Unable to add {!r} column to hybrid meta. The " "following exception was raised when adding " "the data output by '{}': {!r}." ) w = msg.format(new_col_name, method.__name__, e) logger.warning(w) warn(w, OutputWarning) @property def solar_profile_indices_map(self): """Map hybrid to solar rep indices. Returns ------- hybrid_indices : np.ndarray Index values corresponding to hybrid rep profiles. solar_indices : np.ndarray Index values of the solar rep profiles corresponding to the hybrid rep profile indices. """ if self._hybrid_meta is None: return np.array([]), np.array([]) idxs = self._hybrid_meta[self.__solar_rpi_n].astype(int) idxs = idxs[idxs >= 0] return idxs.index.values, idxs.values @property def wind_profile_indices_map(self): """Map hybrid to wind rep indices. Returns ------- hybrid_indices : np.ndarray Index values corresponding to hybrid rep profiles. wind_indices : np.ndarray Index values of the wind rep profiles corresponding to the hybrid rep profile indices. """ if self._hybrid_meta is None: return np.array([]), np.array([]) idxs = self._hybrid_meta[self.__wind_rpi_n].astype(int) idxs = idxs[idxs >= 0] return idxs.index.values, idxs.values
[docs]class Hybridization: """Hybridization""" def __init__( self, solar_fpath, wind_fpath, allow_solar_only=False, allow_wind_only=False, fillna=None, limits=None, ratio_bounds=None, ratio="solar_capacity/wind_capacity", ): """Framework to handle hybridization of SC and corresponding profiles. ``reV`` hybrids computes a "hybrid" wind and solar supply curve, where each supply curve point contains some wind and some solar capacity. Various ratio limits on wind-to-solar farm properties (e.g. wind-to-solar capacity) can be applied during the hybridization process. Hybrid generation profiles are also computed during this process. Parameters ---------- solar_fpath : str Filepath to rep profile output file to extract solar profiles and summaries from. wind_fpath : str Filepath to rep profile output file to extract wind profiles and summaries from. allow_solar_only : bool, optional Option to allow SC points with only solar capacity (no wind). By default, ``False``. allow_wind_only : bool, optional Option to allow SC points with only wind capacity (no solar). By default, ``False``. fillna : dict, optional Dictionary containing column_name, fill_value pairs representing any fill values that should be applied after merging the wind and solar meta. Note that column names will likely have to be prefixed with ``solar`` or ``wind``. By default ``None``. limits : dict, optional Option to specify mapping (in the form of a dictionary) of {colum_name: max_value} representing the upper limit (maximum value) for the values of a column in the merged meta. For example, ``limits={'solar_capacity': 100}`` would limit all the values of the solar capacity in the merged meta to a maximum value of 100. This limit is applied *BEFORE* ratio calculations. The names of the columns should match the column names in the merged meta, so they are likely prefixed with ``solar`` or ``wind``. By default, ``None`` (no limits applied). ratio_bounds : tuple, optional Option to set ratio bounds (in two-tuple form) on the columns of the ``ratio`` input. For example, ``ratio_bounds=(0.5, 1.5)`` would adjust the values of both of the ``ratio`` columns such that their ratio is always between half and double (e.g., no value would be more than double the other). To specify a single ratio value, use the same value as the upper and lower bound. For example, ``ratio_bounds=(1, 1)`` would adjust the values of both of the ``ratio`` columns such that their ratio is always equal. By default, ``None`` (no limit on the ratio). ratio : str, optional Option to specify the columns used to calculate the ratio that is limited by the `ratio_bounds` input. This input is a string in the form "{numerator_column}/{denominator_column}". For example, ``ratio='solar_capacity/wind_capacity'`` would limit the ratio of the solar to wind capacities as specified by the ``ratio_bounds`` input. If ``ratio_bounds`` is None, this input does nothing. The names of the columns should be prefixed with one of the prefixes defined as class variables. By default ``'solar_capacity/wind_capacity'``. """ logger.info( "Running hybridization of rep profiles with solar_fpath: " '"{}"'.format(solar_fpath) ) logger.info( "Running hybridization of rep profiles with solar_fpath: " '"{}"'.format(wind_fpath) ) logger.info( "Running hybridization of rep profiles with " 'allow_solar_only: "{}"'.format(allow_solar_only) ) logger.info( "Running hybridization of rep profiles with " 'allow_wind_only: "{}"'.format(allow_wind_only) ) logger.info( 'Running hybridization of rep profiles with fillna: "{}"'.format( fillna ) ) logger.info( 'Running hybridization of rep profiles with limits: "{}"'.format( limits ) ) logger.info( "Running hybridization of rep profiles with ratio_bounds: " '"{}"'.format(ratio_bounds) ) logger.info( 'Running hybridization of rep profiles with ratio: "{}"'.format( ratio ) ) self.data = HybridsData(solar_fpath, wind_fpath) self.meta_hybridizer = MetaHybridizer( data=self.data, allow_solar_only=allow_solar_only, allow_wind_only=allow_wind_only, fillna=fillna, limits=limits, ratio_bounds=ratio_bounds, ratio=ratio, ) self._profiles = None self._validate_input() def _validate_input(self): """Validate the user input and input files.""" self.data.validate() self.meta_hybridizer.validate_input() @property def solar_meta(self): """Summary for the solar representative profiles. Returns ------- solar_meta : pd.DataFrame Summary for the solar representative profiles. """ return self.data.solar_meta @property def wind_meta(self): """Summary for the wind representative profiles. Returns ------- wind_meta : pd.DataFrame Summary for the wind representative profiles. """ return self.data.wind_meta @property def hybrid_meta(self): """Hybridized summary for the representative profiles. Returns ------- hybrid_meta : pd.DataFrame Summary for the hybridized representative profiles. At the very least, this has a column that the data was merged on. """ return self.meta_hybridizer.hybrid_meta @property def solar_time_index(self): """Get the time index for the solar rep profiles. Returns ------- solar_time_index : pd.Datetimeindex Time index sourced from the solar rep profile file. """ return self.data.solar_time_index @property def wind_time_index(self): """Get the time index for the wind rep profiles. Returns ------- wind_time_index : pd.Datetimeindex Time index sourced from the wind rep profile file. """ return self.data.wind_time_index @property def hybrid_time_index(self): """Get the time index for the hybrid rep profiles. Returns ------- hybrid_time_index : pd.Datetimeindex Time index for the hybrid rep profiles. """ return self.data.hybrid_time_index @property def profiles(self): """Get the arrays of the hybridized representative profiles. Returns ------- profiles : dict Dict of hybridized representative profiles. """ return self._profiles
[docs] def run(self, fout=None, save_hybrid_meta=True): """Run hybridization of profiles and save to disc. Parameters ---------- fout : str, optional Filepath to output HDF5 file. If ``None``, output data are not written to a file. By default, ``None``. save_hybrid_meta : bool, optional Flag to save hybrid SC table to hybrid rep profile output. By default, ``True``. Returns ------- str Filepath to output h5 file. """ self.run_meta() self.run_profiles() if fout is not None: self.save_profiles(fout, save_hybrid_meta=save_hybrid_meta) logger.info("Hybridization of representative profiles complete!") return fout
[docs] def run_meta(self): """Compute the hybridized profiles. Returns ------- `Hybridization` Instance of Hybridization object (itself) containing the hybridized meta as an attribute. """ self.meta_hybridizer.hybridize() return self
[docs] def run_profiles(self): """Compute all hybridized profiles. Returns ------- `Hybridization` Instance of Hybridization object (itself) containing the hybridized profiles as attributes. """ logger.info("Running hybrid profile calculations.") self._init_profiles() self._compute_hybridized_profile_components() self._compute_hybridized_profiles_from_components() logger.info("Profile hybridization complete.") return self
def _init_profiles(self): """Initialize the output rep profiles attribute.""" self._profiles = { k: np.zeros( (len(self.hybrid_time_index), len(self.hybrid_meta)), dtype=np.float32, ) for k in OUTPUT_PROFILE_NAMES } def _compute_hybridized_profile_components(self): """Compute the resource components of the hybridized profiles.""" for params in self.__rep_profile_hybridization_params: col, (hybrid_idxs, solar_idxs), fpath, p_name, dset_name = params capacity = self.hybrid_meta.loc[hybrid_idxs, col].values with Resource(fpath) as res: data = res[ dset_name, res.time_index.isin(self.hybrid_time_index) ] self._profiles[p_name][:, hybrid_idxs] = ( data[:, solar_idxs] * capacity ) @property def __rep_profile_hybridization_params(self): """Zip the rep profile hybridization parameters.""" cap_col_names = [f"hybrid_solar_{SupplyCurveField.CAPACITY_AC_MW}", f"hybrid_wind_{SupplyCurveField.CAPACITY_AC_MW}"] idx_maps = [ self.meta_hybridizer.solar_profile_indices_map, self.meta_hybridizer.wind_profile_indices_map, ] fpaths = [self.data.solar_fpath, self.data.wind_fpath] zipped = zip( cap_col_names, idx_maps, fpaths, OUTPUT_PROFILE_NAMES[1:], self.data.profile_dset_names, ) return zipped def _compute_hybridized_profiles_from_components(self): """Compute the hybridized profiles from the resource components.""" hp_name, sp_name, wp_name = OUTPUT_PROFILE_NAMES self._profiles[hp_name] = ( self._profiles[sp_name] + self._profiles[wp_name] ) def _init_h5_out(self, fout, save_hybrid_meta=True): """Initialize an output h5 file for hybrid profiles. Parameters ---------- fout : str Filepath to output h5 file. save_hybrid_meta : bool Flag to save hybrid SC table to hybrid rep profile output. """ dsets = [] shapes = {} attrs = {} chunks = {} dtypes = {} for dset, data in self.profiles.items(): dsets.append(dset) shapes[dset] = data.shape chunks[dset] = None attrs[dset] = {Outputs.UNIT_ATTR: "MW"} dtypes[dset] = data.dtype meta = self.hybrid_meta.copy() for c in meta.columns: try: meta[c] = pd.to_numeric(meta[c]) except ValueError: pass Outputs.init_h5( fout, dsets, shapes, attrs, chunks, dtypes, meta, time_index=self.hybrid_time_index, ) if save_hybrid_meta: with Outputs(fout, mode="a") as out: hybrid_meta = to_records_array(self.hybrid_meta) out._create_dset( "meta", hybrid_meta.shape, hybrid_meta.dtype, data=hybrid_meta, ) def _write_h5_out(self, fout, save_hybrid_meta=True): """Write hybrid profiles and meta to an output file. Parameters ---------- fout : str Filepath to output h5 file. save_hybrid_meta : bool Flag to save hybrid SC table to hybrid rep profile output. """ with Outputs(fout, mode="a") as out: if "meta" in out.datasets and save_hybrid_meta: hybrid_meta = to_records_array(self.hybrid_meta) out["meta"] = hybrid_meta for dset, data in self.profiles.items(): out[dset] = data
[docs] def save_profiles(self, fout, save_hybrid_meta=True): """Initialize fout and save profiles. Parameters ---------- fout : str Filepath to output h5 file. save_hybrid_meta : bool Flag to save hybrid SC table to hybrid rep profile output. """ self._init_h5_out(fout, save_hybrid_meta=save_hybrid_meta) self._write_h5_out(fout, save_hybrid_meta=save_hybrid_meta)