Source code for marmot.plottingmodules.capacity_factor

# -*- coding: utf-8 -*-
"""Generator capacity factor plots .

This module contain methods that are related to the capacity factor 
of generators and average output plots 
"""

import logging
from pathlib import Path
from typing import List

import numpy as np
import pandas as pd

import marmot.utils.mconfig as mconfig
from marmot.plottingmodules.plotutils.plot_data_helper import (
    GenCategories,
    PlotDataStoreAndProcessor,
)
from marmot.plottingmodules.plotutils.plot_exceptions import (
    MissingInputData,
    MissingZoneData,
)
from marmot.plottingmodules.plotutils.plot_library import PlotLibrary
from marmot.plottingmodules.plotutils.styles import ColorList
from marmot.plottingmodules.plotutils.timeseries_modifiers import (
    set_timestamp_date_range,
)

logger = logging.getLogger("plotter." + __name__)
plot_data_settings: dict = mconfig.parser("plot_data")
xdimension: int = mconfig.parser("figure_size", "xdimension")
ydimension: int = mconfig.parser("figure_size", "ydimension")


[docs]class CapacityFactor(PlotDataStoreAndProcessor): """Generator capacity factor plots. The capacity_factor.py module contain methods that are related to the capacity factor of generators. CapacityFactor inherits from the PlotDataStoreAndProcessor class to assist in creating figures. """ def __init__( self, Zones: List[str], Scenarios: List[str], AGG_BY: str, ordered_gen: List[str], marmot_solutions_folder: Path, gen_categories: GenCategories = GenCategories(), color_list: list = ColorList().colors, **kwargs, ): """ Args: Zones (List[str]): List of regions/zones to plot. Scenarios (List[str]): List of scenarios to plot. AGG_BY (str): Informs region type to aggregate by when creating plots. ordered_gen (List[str]): Ordered list of generator technologies to plot, order defines the generator technology position in stacked bar and area plots. marmot_solutions_folder (Path): Directory containing Marmot solution outputs. gen_categories (GenCategories): Instance of GenCategories class, groups generator technologies into defined categories. Deafults to GenCategories. color_list (list, optional): List of colors to apply to non-gen plots. Defaults to ColorList().colors. """ # Instantiation of PlotDataStoreAndProcessor super().__init__(AGG_BY, ordered_gen, marmot_solutions_folder, **kwargs) self.Zones = Zones self.Scenarios = Scenarios self.gen_categories = gen_categories self.color_list = color_list
[docs] def avg_output_when_committed( self, start_date_range: str = None, end_date_range: str = None, scenario_groupby: str = "Scenario", **_, ): """Creates barplots of the percentage average generation output when committed by technology type. Each scenario is plotted by a different colored grouped bar. Args: start_date_range (str, optional): Defines a start date at which to represent data from. Defaults to None. end_date_range (str, optional): Defines a end date at which to represent data to. Defaults to None. scenario_groupby (str, optional): Specifies whether to group data by Scenario or Year-Sceanrio. If grouping by Year-Sceanrio the year will be identified from the timestamp and appeneded to the sceanrio name. This is useful when plotting data which covers multiple years such as ReEDS. Defaults to Scenario. .. versionadded:: 0.10.0 Returns: dict: dictionary containing the created plot and its data table. """ outputs: dict = {} # List of properties needed by the plot, properties are a set of tuples and # contain 3 parts: required True/False, property name and scenarios required, # scenarios must be a list. properties = [ (True, "generator_Generation", self.Scenarios), (True, "generator_Installed_Capacity", self.Scenarios), ] # Runs get_formatted_data within PlotDataStoreAndProcessor to populate PlotDataStoreAndProcessor # dictionary with all required properties, returns a 1 if required data is missing check_input_data = self.get_formatted_data(properties) if 1 in check_input_data: return MissingInputData() for zone_input in self.Zones: cf_chunks = [] logger.info(f"{self.AGG_BY} = {zone_input}") for scenario in self.Scenarios: logger.info(f"Scenario = {str(scenario)}") Gen: pd.DataFrame = self["generator_Generation"].get(scenario) try: Gen = Gen.xs(zone_input, level=self.AGG_BY) except KeyError: logger.warning(f"No data in {zone_input}") continue Gen = Gen.reset_index() Gen = self.rename_gen_techs(Gen) Gen.tech = Gen.tech.astype("category") Gen.tech = Gen.tech.cat.set_categories(self.ordered_gen) Gen = Gen[Gen["tech"].isin(self.gen_categories.thermal)] Gen.set_index("timestamp", inplace=True) Gen = Gen.rename(columns={"values": "Output (MWh)"}) Cap: pd.DataFrame = self["generator_Installed_Capacity"].get(scenario) Cap = Cap.xs(zone_input, level=self.AGG_BY) Cap = Cap.rename(columns={"values": "Installed Capacity (MW)"}) if pd.notna(start_date_range): Cap, Gen = set_timestamp_date_range( [Cap, Gen], start_date_range, end_date_range ) if Gen.empty is True: logger.warning("No data in selected Date Range") continue Gen["year"] = Gen.index.year.astype(str) Cap["year"] = Cap.index.get_level_values("timestamp").year.astype(str) Gen = Gen.reset_index() Gen = pd.merge(Gen, Cap, on=["gen_name", "year"]) Gen.set_index("timestamp", inplace=True) if scenario_groupby == "Year-Scenario": Gen["Scenario"] = Gen.index.year.astype(str) + f"_{scenario}" else: Gen["Scenario"] = scenario year_scen = Gen["Scenario"].unique() for scen in year_scen: Gen_scen = Gen.loc[Gen["Scenario"] == scen] # Calculate CF individually for each plant, # since we need to take out all zero rows. tech_names = Gen_scen.sort_values(["tech"])["tech"].unique() CF = pd.DataFrame(columns=tech_names, index=[scen]) for tech_name in tech_names: stt = Gen_scen.loc[Gen_scen["tech"] == tech_name] if not all(stt["Output (MWh)"] == 0): gen_names = stt["gen_name"].unique() cfs = [] caps = [] for gen in gen_names: sgt = stt.loc[stt["gen_name"] == gen] if not all(sgt["Output (MWh)"] == 0): # Calculates interval step to correct for MWh of generation time_delta = sgt.index[1] - sgt.index[0] duration = sgt.index[len(sgt) - 1] - sgt.index[0] duration = ( duration + time_delta ) # Account for last timestep. # Finds intervals in 60 minute period interval_count = 60 / ( time_delta / np.timedelta64(1, "m") ) # Get length of time series in hours for CF calculation. duration_hours = min( 8760, duration / np.timedelta64(1, "h") ) # Remove time intervals when output is zero. sgt = sgt[sgt["Output (MWh)"] != 0] total_gen = ( sgt["Output (MWh)"].sum() / interval_count ) cap = sgt["Installed Capacity (MW)"].mean() # Calculate CF cf = total_gen / (cap * duration_hours) cfs.append(cf) caps.append(cap) # Find average "CF" (average output when committed) # for this technology, weighted by capacity. cf = np.average(cfs, weights=caps) CF[tech_name] = cf cf_chunks.append(CF) if cf_chunks: CF_all_scenarios = pd.concat(cf_chunks) else: outputs[zone_input] = MissingZoneData() continue Data_Table_Out = CF_all_scenarios.T mplt = PlotLibrary() fig, ax = mplt.get_figure() mplt.barplot( CF_all_scenarios.T, color=self.color_list, custom_tick_labels=list(CF_all_scenarios.columns), ytick_major_fmt="percent", ) ax.set_ylabel( "Average Output When Committed", color="black", rotation="vertical" ) if plot_data_settings["plot_title_as_region"]: mplt.add_main_title(zone_input) # Add legend mplt.add_legend() outputs[zone_input] = {"fig": fig, "data_table": Data_Table_Out} return outputs
[docs] def cf( self, start_date_range: str = None, end_date_range: str = None, scenario_groupby: str = "Scenario", **_, ): """Creates barplots of generator capacity factors by technology type. Each scenario is plotted by a different colored grouped bar. Args: start_date_range (str, optional): Defines a start date at which to represent data from. Defaults to None. end_date_range (str, optional): Defines a end date at which to represent data to. Defaults to None. scenario_groupby (str, optional): Specifies whether to group data by Scenario or Year-Sceanrio. If grouping by Year-Sceanrio the year will be identified from the timestamp and appeneded to the sceanrio name. This is useful when plotting data which covers multiple years such as ReEDS. Defaults to Scenario. .. versionadded:: 0.10.0 Returns: dict: dictionary containing the created plot and its data table. """ outputs: dict = {} # List of properties needed by the plot, properties are a set of tuples and # contain 3 parts: required True/False, property name and scenarios required, # scenarios must be a list. properties = [ (True, "generator_Generation", self.Scenarios), (True, "generator_Installed_Capacity", self.Scenarios), ] # Runs get_formatted_data within PlotDataStoreAndProcessor to populate PlotDataStoreAndProcessor # dictionary with all required properties, returns a 1 if required data is missing check_input_data = self.get_formatted_data(properties) if 1 in check_input_data: return MissingInputData() for zone_input in self.Zones: cf_scen_chunks = [] logger.info(f"{self.AGG_BY} = {zone_input}") for scenario in self.Scenarios: logger.info(f"Scenario = {str(scenario)}") Gen = self["generator_Generation"].get(scenario) try: # Check for regions missing all generation. Gen = Gen.xs(zone_input, level=self.AGG_BY) except KeyError: logger.warning(f"No data in {zone_input}") continue Gen = self.df_process_gen_inputs(Gen) Cap = self["generator_Installed_Capacity"].get(scenario) Cap = Cap.xs(zone_input, level=self.AGG_BY) Cap = self.df_process_gen_inputs(Cap) if pd.notna(start_date_range): Cap, Gen = set_timestamp_date_range( [Cap, Gen], start_date_range, end_date_range ) if Gen.empty is True: logger.warning("No data in selected Date Range") continue # Calculates interval step to correct for MWh of generation time_delta = Gen.index[1] - Gen.index[0] duration = Gen.index[len(Gen) - 1] - Gen.index[0] duration = duration + time_delta # Account for last timestep. # Finds intervals in 60 minute period interval_count: int = 60 / (time_delta / np.timedelta64(1, "m")) # Get length of time series in hours for CF calculation. duration_hours: int = min(8760, duration / np.timedelta64(1, "h")) Gen = Gen / interval_count Total_Gen = self.year_scenario_grouper( Gen, scenario, groupby=scenario_groupby ).sum() Cap = self.year_scenario_grouper( Cap, scenario, groupby=scenario_groupby ).sum() # Calculate CF CF = Total_Gen / (Cap * duration_hours) cf_scen_chunks.append(CF) if cf_scen_chunks: CF_all_scenarios = pd.concat(cf_scen_chunks, axis=0, sort=False).T CF_all_scenarios = CF_all_scenarios.fillna(0, axis=0) else: outputs[zone_input] = MissingZoneData() continue Data_Table_Out = CF_all_scenarios.T mplt = PlotLibrary(figsize=(xdimension * 1.5, ydimension * 1.5)) fig, ax = mplt.get_figure() mplt.barplot( CF_all_scenarios, color=self.color_list, ytick_major_fmt="percent" ) ax.set_ylabel("Capacity Factor", color="black", rotation="vertical") # Add legend mplt.add_legend() # Add title if plot_data_settings["plot_title_as_region"]: mplt.add_main_title(zone_input) outputs[zone_input] = {"fig": fig, "data_table": Data_Table_Out} return outputs
[docs] def time_at_min_gen( self, start_date_range: str = None, end_date_range: str = None, scenario_groupby: str = "Scenario", **_, ): """Creates barplots of generator percentage time at min-gen by technology type. Each scenario is plotted by a different colored grouped bar. Args: start_date_range (str, optional): Defines a start date at which to represent data from. Defaults to None. end_date_range (str, optional): Defines a end date at which to represent data to. Defaults to None. scenario_groupby (str, optional): Specifies whether to group data by Scenario or Year-Sceanrio. If grouping by Year-Sceanrio the year will be identified from the timestamp and appeneded to the sceanrio name. This is useful when plotting data which covers multiple years such as ReEDS. Defaults to Scenario. .. versionadded:: 0.10.0 Returns: dict: dictionary containing the created plot and its data table. """ outputs: dict = {} # List of properties needed by the plot, properties are a set of tuples and # contain 3 parts: required True/False, property name and scenarios required, # scenarios must be a list. properties = [ (True, "generator_Generation", self.Scenarios), (True, "generator_Installed_Capacity", self.Scenarios), (True, "generator_Hours_at_Minimum", self.Scenarios), ] # Runs get_formatted_data within PlotDataStoreAndProcessor to populate PlotDataStoreAndProcessor dictionary # with all required properties, returns a 1 if required data is missing check_input_data = self.get_formatted_data(properties) if 1 in check_input_data: return MissingInputData() for zone_input in self.Zones: logger.info(f"{self.AGG_BY} = {zone_input}") time_at_min = pd.DataFrame() for scenario in self.Scenarios: logger.info(f"Scenario = {str(scenario)}") Min = self["generator_Hours_at_Minimum"].get(scenario) try: Min = Min.xs(zone_input, level=self.AGG_BY) except KeyError: continue Gen = self["generator_Generation"].get(scenario) try: # Check for regions missing all generation. Gen = Gen.xs(zone_input, level=self.AGG_BY) except KeyError: logger.warning(f"No data in {zone_input}") continue Cap = self["generator_Installed_Capacity"].get(scenario) Cap = Cap.xs(zone_input, level=self.AGG_BY) if pd.notna(start_date_range): Min, Gen, Cap = set_timestamp_date_range( [Min, Gen, Cap], start_date_range, end_date_range ) if Gen.empty is True: logger.warning("No data in selected Date Range") continue Min = Min.reset_index() Min = Min.set_index("gen_name") Min = Min.rename(columns={"values": "Hours at Minimum"}) Gen = Gen.reset_index() Gen.tech = Gen.tech.astype("category") Gen.tech = Gen.tech.cat.set_categories(self.ordered_gen) Gen = Gen.rename(columns={"values": "Output (MWh)"}) Gen = Gen[~Gen["tech"].isin(self.gen_categories.vre)] Gen.index = Gen.timestamp Caps = Cap.groupby("gen_name").mean() Caps.reset_index() Caps = Caps.rename(columns={"values": "Installed Capacity (MW)"}) Min = pd.merge(Min, Caps, on="gen_name") # Find how many hours each generator was operating, for the denominator of the % time at min gen. # So remove all zero rows. Gen = Gen.loc[Gen["Output (MWh)"] != 0] online_gens = Gen.gen_name.unique() Min = Min.loc[online_gens] Min["hours_online"] = Gen.groupby("gen_name")["Output (MWh)"].count() Min["fraction_at_min"] = Min["Hours at Minimum"] / Min.hours_online tech_names = Min.tech.unique() time_at_min_individ = pd.DataFrame(columns=tech_names, index=[scenario]) for tech_name in tech_names: stt = Min.loc[Min["tech"] == tech_name] wgts = stt["Installed Capacity (MW)"] if wgts.sum() == 0: wgts = pd.Series([1] * len(stt)) output = np.average(stt.fraction_at_min, weights=wgts) time_at_min_individ[tech_name] = output time_at_min = time_at_min.append(time_at_min_individ) if time_at_min.empty == True: outputs[zone_input] = MissingZoneData() continue Data_Table_Out = time_at_min.T mplt = PlotLibrary(figsize=(xdimension * 1.5, ydimension * 1.5)) fig, ax = mplt.get_figure() mplt.barplot( time_at_min.T, color=self.color_list, custom_tick_labels=list(time_at_min.columns), ytick_major_fmt="percent", ) ax.set_ylabel( "Percentage of time online at minimum generation", color="black", rotation="vertical", ) # Add legend mplt.add_legend() # Add title if plot_data_settings["plot_title_as_region"]: mplt.add_main_title(zone_input) outputs[zone_input] = {"fig": fig, "data_table": Data_Table_Out} return outputs