"""Module for computing the total uplift in energy production."""
# This is a work in progress as we try to synthesize ideas from the
# table based methods and energy ratios back into one thing,
# some ideas we're incorporating:
# Conversion from polars to pandas
# Constructing tables (but now including tables of ratios)
# Keeping track of frequencies is matching sized tables
import warnings
import numpy as np
import polars as pl
import flasc.utilities.energy_ratio_utilities as util
from flasc.analysis.analysis_input import AnalysisInput
from flasc.data_processing.dataframe_manipulations import df_reduce_precision
from flasc.logging_manager import LoggingManager
logger_manager = LoggingManager() # Instantiate LoggingManager
logger = logger_manager.logger # Obtain the reusable logger
# Internal version, returns a polars dataframe
[docs]
def _total_uplift_power_ratio_single(
df_,
df_names,
ref_cols,
test_cols,
wd_cols,
ws_cols,
wd_step=2.0,
wd_min=0.0,
wd_max=360.0,
ws_step=1.0,
ws_min=0.0,
ws_max=50.0,
bin_cols_in=["wd_bin", "ws_bin"],
weight_by="min", # min, sum
df_freq_pl=None,
wd_bin_overlap_radius=0.0,
uplift_pairs=[],
uplift_names=[],
remove_all_nulls=False,
):
"""Compute the total change in energy production between two sets of turbines.
Args:
df_ (pl.DataFrame): A dataframe containing the data to use in the calculation.
df_names (list): A list of names to give to the dataframes.
ref_cols (list[str]): A list of columns to use as the reference turbines
test_cols (list[str]): A list of columns to use as the test turbines
wd_cols (list[str]): A list of columns to derive the wind directions from
ws_cols (list[str]): A list of columns to derive the wind speeds from
wd_step (float): The width of the wind direction bins.
wd_min (float): The minimum wind direction to use.
wd_max (float): The maximum wind direction to use.
ws_step (float): The width of the wind speed bins.
ws_min (float): The minimum wind speed to use.
ws_max (float): The maximum wind speed to use.
bin_cols_in (list[str]): A list of column names
to use for the wind speed and wind direction bins.
weight_by (str): How to weight the energy ratio, options are 'min', or 'sum'. 'min' means
the minimum count across the dataframes
is used to weight the energy ratio. 'sum' means the sum of the counts
across the dataframes is used to weight the energy ratio. Defaults to 'min'.
df_freq_pl (pl.Dataframe): Polars dataframe of pre-provided per bin weights
wd_bin_overlap_radius (float): The distance
in degrees one wd bin overlaps into the next, must be
less or equal to half the value of wd_step
uplift_pairs: (list[tuple]): List of pairs of df_names to compute uplifts for. Each element
of the list should be a tuple (or list) of length 2, where the first element will be the
base case in the uplift calculation and the second element will be the test case in the
uplift calculation. If None, no uplifts are computed.
uplift_names: (list[str]): Names for the uplift columns, following the order of the
pairs specified in uplift_pairs. If None, will default to "uplift_df_name1_df_name2",
remove_all_nulls: (bool): Construct reference and test by strictly requiring all data to be
available. If False, a minimum one data point from
ref_cols, test_cols, wd_cols, and ws_cols
must be available to compute the bin. Defaults to False.
Returns:
A tuple (dict, pl.DataFrame): containing the results of the computation
and the frequency table. The dictionary contains the uplift results indexed by the
uplift_names. The dataframe contains the weights for each wind direction
and wind speed bin.
"""
# Get the number of dataframes
num_df = len(df_names)
bin_cols_without_df_name = [c for c in bin_cols_in if c != "df_name"]
# Filter df_ to remove null values
null_filter = util.filter_all_nulls if remove_all_nulls else util.filter_any_nulls
df_ = null_filter(df_, ref_cols, test_cols, ws_cols, wd_cols)
if len(df_) == 0:
raise RuntimeError("After removing nulls, no data remains for computation.")
# Apply binning to dataframe and aggregate bins
df_ = util.bin_and_group_dataframe(
df_,
ref_cols,
test_cols,
wd_cols,
ws_cols,
wd_step,
wd_min,
wd_max,
ws_step,
ws_min,
ws_max,
wd_bin_overlap_radius,
remove_all_nulls,
bin_cols_without_df_name,
num_df,
)
# Determine the weighting of the ws/wd bins
df_, df_freq_pl = util.add_bin_weights(df_, df_freq_pl, bin_cols_without_df_name, weight_by)
# If total uplift requested, compute at this point
total_uplift_result = {}
for uplift_pair, uplift_name in zip(uplift_pairs, uplift_names):
df_total = (
df_.filter(pl.col("df_name").is_in(uplift_pair))
.with_columns(
power_ratio=pl.col("pow_test") / pl.col("pow_ref"),
weighted_pow_ref=pl.col("pow_ref") * pl.col("count"),
)
.with_columns(total_count_per_bin=pl.col("count").sum().over(bin_cols_without_df_name))
.with_columns(
weighted_pow_ref=pl.col("weighted_pow_ref") / pl.col("total_count_per_bin")
)
.with_columns(
weighted_pow_ref=pl.col("weighted_pow_ref").sum().over(bin_cols_without_df_name)
)
.pivot(
values=["power_ratio"],
columns="df_name",
index=bin_cols_without_df_name + ["weight", "weighted_pow_ref"],
aggregate_function="first",
)
# Renorm the weight
.with_columns(pl.col("weight") / pl.col("weight").sum())
.with_columns(delta_power_ratio=pl.col(uplift_pair[1]) - pl.col(uplift_pair[0]))
.with_columns(
delta_aep=pl.col("weight")
* pl.col("delta_power_ratio")
* pl.col("weighted_pow_ref"),
base_aep=pl.col("weight") * pl.col(uplift_pair[0]) * pl.col("weighted_pow_ref"),
)
.sum()
)
delta_aep = 8760 * df_total.select("delta_aep").item()
percent_delta_aep = 100 * (
df_total.select("delta_aep").item() / df_total.select("base_aep").item()
)
if np.isnan(delta_aep):
if ws_min < 5.0:
warnings.warn(
"NaNs detected in power ratios. This can result from "
+ "the reference power being 0, which can occur when wind speed is "
"very low. Try setting ws_min keyword argument to remove 0 power "
"wind speeds."
)
else:
warnings.warn("NaNs detected in power ratios.")
total_uplift_result[uplift_name] = {
"energy_uplift_ctr": delta_aep,
"energy_uplift_lb": None,
"energy_uplift_ub": None,
"energy_uplift_ctr_pc": percent_delta_aep,
"energy_uplift_lb_pc": None,
"energy_uplift_ub_pc": None,
}
return total_uplift_result, df_freq_pl
# Bootstrap function wraps the _compute_energy_ratio function
[docs]
def _total_uplift_power_ratio_bootstrap(
a_in,
ref_cols,
test_cols,
wd_cols,
ws_cols,
wd_step=2.0,
wd_min=0.0,
wd_max=360.0,
ws_step=1.0,
ws_min=0.0,
ws_max=50.0,
bin_cols_in=["wd_bin", "ws_bin"],
weight_by="min", # min, sum
df_freq_pl=None,
wd_bin_overlap_radius=0.0,
uplift_pairs=[],
uplift_names=[],
N=1,
percentiles=[5.0, 95.0],
remove_all_nulls=False,
):
"""Compute the total change in energy between two sets of turbines with bootstrapping.
Args:
a_in (AnalysisInput): An AnalysisInput object
containing the data to use in the calculation.
ref_cols (list[str]): A list of columns to use as the reference turbines
test_cols (list[str]): A list of columns to use as the test turbines
wd_cols (list[str]): A list of columns to derive the wind directions from
ws_cols (list[str]): A list of columns to derive the wind speeds from
wd_step (float): The width of the wind direction bins.
wd_min (float): The minimum wind direction to use.
wd_max (float): The maximum wind direction to use.
ws_step (float): The width of the wind speed bins.
ws_min (float): The minimum wind speed to use.
ws_max (float): The maximum wind speed to use.
bin_cols_in (list[str]): A list of column names to use for
the wind speed and wind direction bins.
weight_by (str): How to weight the energy ratio, options are 'min', or 'sum'. 'min' means
the minimum count across the dataframes
is used to weight the energy ratio. 'sum' means the sum of the counts
across the dataframes is used to weight the energy ratio.
df_freq_pl (pl.Dataframe): Polars dataframe of pre-provided per bin weights
wd_bin_overlap_radius (float): The distance
in degrees one wd bin overlaps into the next, must be
less or equal to half the value of wd_step
uplift_pairs: (list[tuple]): List of pairs of df_names to compute uplifts for. Each element
of the list should be a tuple (or list) of length 2, where the first element will be the
base case in the uplift calculation and the second element will be the test case in the
uplift calculation. If None, no uplifts are computed.
uplift_names: (list[str]): Names for the uplift columns, following the order of the
pairs specified in uplift_pairs. If None, will default to "uplift_df_name1_df_name2"
N (int): The number of bootstrap samples to use.
percentiles: (list or None): percentiles to use when returning energy ratio bounds.
If specified as None with N > 1 (bootstrapping), defaults to [5, 95].
remove_all_nulls: (bool): Construct reference and test by strictly requiring all data to be
available. If False, a minimum one data point
from ref_cols, test_cols, wd_cols, and ws_cols
must be available to compute the bin. Defaults to False.
Returns:
pl.DataFrame: A dataframe containing the energy ratio between the two sets of turbines.
"""
# Otherwise run the function N times and concatenate the results to compute statistics
uplift_single_outs = [
_total_uplift_power_ratio_single(
a_in.resample_energy_table(perform_resample=(i != 0)),
a_in.df_names,
ref_cols,
test_cols,
wd_cols,
ws_cols,
wd_step,
wd_min,
wd_max,
ws_step,
ws_min,
ws_max,
bin_cols_in,
weight_by,
df_freq_pl,
wd_bin_overlap_radius,
uplift_pairs,
uplift_names,
remove_all_nulls,
)
for i in range(N)
]
# df_concat = pl.concat([uplift_single_out[0] for uplift_single_out in uplift_single_outs])
# First output contains the original table; use that df_freq_pl
df_freq_pl = uplift_single_outs[0][1]
# Add in the statistics
total_uplift_result = {}
for uplift_name in uplift_names:
delta_aeps = np.zeros(N)
percent_delta_aeps = np.zeros(N)
for i in range(N):
delta_aeps[i] = uplift_single_outs[i][0][uplift_name]["energy_uplift_ctr"]
percent_delta_aeps[i] = uplift_single_outs[i][0][uplift_name]["energy_uplift_ctr_pc"]
delta_aep_central = delta_aeps[0]
delta_aep_lb = np.quantile(delta_aeps, percentiles[0] / 100)
delta_aep_ub = np.quantile(delta_aeps, percentiles[1] / 100)
percent_delta_aep_central = percent_delta_aeps[0]
percent_delta_aep_lb = np.quantile(percent_delta_aeps, percentiles[0] / 100)
percent_delta_aep_ub = np.quantile(percent_delta_aeps, percentiles[1] / 100)
total_uplift_result[uplift_name] = {
"energy_uplift_ctr": delta_aep_central,
"energy_uplift_lb": delta_aep_lb,
"energy_uplift_ub": delta_aep_ub,
"energy_uplift_ctr_pc": percent_delta_aep_central,
"energy_uplift_lb_pc": percent_delta_aep_lb,
"energy_uplift_ub_pc": percent_delta_aep_ub,
}
return total_uplift_result, df_freq_pl
[docs]
def total_uplift_power_ratio(
a_in: AnalysisInput,
ref_turbines=None,
test_turbines=None,
wd_turbines=None,
ws_turbines=None,
use_predefined_ref=False,
use_predefined_wd=False,
use_predefined_ws=False,
wd_step=2.0,
wd_min=0.0,
wd_max=360.0,
ws_step=1.0,
ws_min=0.0,
ws_max=50.0,
bin_cols_in=["wd_bin", "ws_bin"],
weight_by="min", # min or sum
df_freq=None,
wd_bin_overlap_radius=0.0,
uplift_pairs=None,
uplift_names=None,
N=1,
percentiles=None,
remove_all_nulls=False,
) -> dict: # dict output for now, may change later
"""Compute the energy ratio between two sets of turbines with bootstrapping.
Args:
a_in (AnalysisInput): An AnalysisInput object
containing the data to use in the calculation.
ref_turbines (list[int]): A list of turbine numbers to use as the reference.
test_turbines (list[int]): A list of turbine numbers to use as the test.
ws_turbines (list[int]): A list of turbine numbers to use for the wind speeds
wd_turbines (list[int]): A list of turbine numbers to use for the wind directions
use_predefined_ref (bool): If True, use the pow_ref column of df_ as the reference power.
use_predefined_ws (bool): If True, use the ws column of df_ as the wind speed.
use_predefined_wd (bool): If True, use the wd column of df_ as the wind direction.
wd_step (float): The width of the wind direction bins.
wd_min (float): The minimum wind direction to use.
wd_max (float): The maximum wind direction to use.
ws_step (float): The width of the wind speed bins.
ws_min (float): The minimum wind speed to use.
ws_max (float): The maximum wind speed to use.
bin_cols_in (list[str]): A list of column names to
use for the wind speed and wind direction bins.
weight_by (str): How to weight the energy ratio, options are 'min', , or 'sum'. 'min' means
the minimum count across the dataframes is used to weight the energy ratio.
'sum' means the sum of the counts
across the dataframes is used to weight the energy ratio.
df_freq (pd.Dataframe): A dataframe which specifies the
frequency of the ws/wd bin combinations. Provides
a method to use an explicit or long-term weigthing of bins. Dataframe should include
columns ws, wd and freq_val. ws and wd should
orrespond to the bin centers resulting from
the choices of the ws/wd_min / _max / _step.
In the case that df_freq has extra bins that aren't included
in those given by ws/wd min, max, step, they will
be ignored in the energy ratio calculation.
Any bins given by ws/wd min, max, step not present in
df_freq will be assigned a frequency of zero.
Defaults to None.
wd_bin_overlap_radius (float): The distance in degrees
one wd bin overlaps into the next, must be
less or equal to half the value of wd_step
uplift_pairs: (list[tuple]): List of pairs of df_names to compute uplifts for. Each element
of the list should be a tuple (or list) of length 2, where the first element will be the
base case in the uplift calculation and the second element will be the test case in the
uplift calculation. If None, no uplifts are computed.
uplift_names: (list[str]): Names for the uplift columns, following the order of the
pairs specified in uplift_pairs. If None, will default to "uplift_df_name1_df_name2"
N (int): The number of bootstrap samples to use.
percentiles: (list or None): percentiles to use when returning energy ratio bounds.
If specified as None with N > 1 (bootstrapping), defaults to [5, 95].
remove_all_nulls: (bool): Construct reference and test by strictly requiring all data to be
available. If False, a minimum one data point from ref_cols, test_cols, wd_cols,
and ws_cols
must be available to compute the bin. Defaults to False.
Returns:
EnergyRatioOutput: An EnergyRatioOutput object containing the energy ratio between the
two sets of turbines.
"""
# Get the polars dataframe from within the a_in
df_ = a_in.get_df()
# Check that inputs are valid
util.check_compute_analysis_inputs(
df_,
ref_turbines,
test_turbines,
wd_turbines,
ws_turbines,
use_predefined_ref,
use_predefined_wd,
use_predefined_ws,
wd_step,
wd_min,
wd_max,
ws_step,
ws_min,
ws_max,
bin_cols_in,
weight_by,
df_freq,
wd_bin_overlap_radius,
uplift_pairs,
uplift_names,
False,
N,
percentiles,
remove_all_nulls,
)
# Set up the column names for the reference and test power
if not use_predefined_ref:
ref_cols = [f"pow_{i:03d}" for i in ref_turbines]
else:
ref_cols = ["pow_ref"]
if not use_predefined_ws:
ws_cols = [f"ws_{i:03d}" for i in ws_turbines]
else:
ws_cols = ["ws"]
if not use_predefined_wd:
wd_cols = [f"wd_{i:03d}" for i in wd_turbines]
else:
wd_cols = ["wd"]
# Confirm uplift pairs provided correctly
if uplift_pairs is None:
uplift_pairs = []
elif isinstance(uplift_pairs[0], str) and len(uplift_pairs) == 2:
# Single pair provided, not in list of lists
uplift_pairs = [uplift_pairs]
else:
for up in uplift_pairs:
if len(up) != 2:
raise ValueError("uplift_pairs should be a list of tuples of length 2.")
if uplift_names is not None:
if len(uplift_names) != len(uplift_pairs):
raise ValueError("Length of uplift_names should match length of uplift_pairs")
else:
uplift_names = ["uplift_" + up[1] + "/" + up[0] for up in uplift_pairs]
# Convert the numbered arrays to appropriate column names
test_cols = [f"pow_{i:03d}" for i in test_turbines]
# If df_freq is provided, confirm is consistent with ws/wd min max and
# prepare a polars table of weights
if df_freq is not None:
# Maybe not test, not sure yet
# ws_edges = np.arange(ws_min, ws_max+ws_step,ws_step)
# ws_labels = ws_edges[:-1] + np.diff(ws_edges)/2.0
# wd_edges = np.arange(wd_min, wd_max+wd_step,wd_step)
# wd_labels = wd_edges[:-1] + np.diff(wd_edges)/2.0
# Conver to polars dataframe
df_freq_pl = pl.from_pandas(df_reduce_precision(df_freq, allow_convert_to_integer=False))
# Rename the columns
df_freq_pl = df_freq_pl.rename({"ws": "ws_bin", "wd": "wd_bin", "freq_val": "weight"})
else:
df_freq_pl = None
# If N=1, don't use bootstrapping
if N == 1:
if percentiles is not None:
logger.warn("percentiles can only be used with bootstrapping (N > 1).")
# Compute the energy ratio
total_uplift_result, df_freq_pl = _total_uplift_power_ratio_single(
df_,
a_in.df_names,
ref_cols,
test_cols,
wd_cols,
ws_cols,
wd_step,
wd_min,
wd_max,
ws_step,
ws_min,
ws_max,
bin_cols_in,
weight_by,
df_freq_pl,
wd_bin_overlap_radius,
uplift_pairs,
uplift_names,
remove_all_nulls,
)
else:
if percentiles is None:
percentiles = [5, 95]
elif not hasattr(percentiles, "__len__") or len(percentiles) != 2:
raise ValueError(
"percentiles should be a two element list of the "
+ "upper and lower desired percentiles."
)
total_uplift_result, df_freq_pl = _total_uplift_power_ratio_bootstrap(
a_in,
ref_cols,
test_cols,
wd_cols,
ws_cols,
wd_step,
wd_min,
wd_max,
ws_step,
ws_min,
ws_max,
bin_cols_in,
weight_by,
df_freq_pl,
wd_bin_overlap_radius,
uplift_pairs,
uplift_names,
N,
percentiles,
)
# Do we want some kind of more complex return object? Or are we OK
# returning just the total_uplift_result dictionary?
return total_uplift_result
# For backwards compatability include a function compute_total_uplift that
# simply wraps the total_uplift_power_ratio function and adds a deprecated
# warning
[docs]
def compute_total_uplift(*args, **kwargs):
"""Deprecated function for computing the total uplift in energy production."""
warnings.warn(
"compute_total_uplift is deprecated, please use total_uplift_power_ratio instead.",
DeprecationWarning,
)
return total_uplift_power_ratio(*args, **kwargs)