Source code for marmot.plottingmodules.capacity_factor

# -*- coding: utf-8 -*-
"""Generator capacity factor plots .

This module contain methods that are related to the capacity factor 
of generators and average output plots 
"""

import logging
from pathlib import Path
from typing import List

import numpy as np
import pandas as pd

import marmot.utils.mconfig as mconfig
from marmot.plottingmodules.plotutils.plot_data_helper import (
    GenCategories,
    PlotDataStoreAndProcessor,
)
from marmot.plottingmodules.plotutils.plot_exceptions import (
    MissingInputData,
    MissingZoneData,
)
from marmot.plottingmodules.plotutils.plot_library import PlotLibrary
from marmot.plottingmodules.plotutils.styles import ColorList
from marmot.plottingmodules.plotutils.timeseries_modifiers import (
    set_timestamp_date_range,
)

logger = logging.getLogger("plotter." + __name__)
plot_data_settings: dict = mconfig.parser("plot_data")
xdimension: int = mconfig.parser("figure_size", "xdimension")
ydimension: int = mconfig.parser("figure_size", "ydimension")


[docs]class CapacityFactor(PlotDataStoreAndProcessor):
    """Generator capacity factor plots.

    The capacity_factor.py module contain methods that are
    related to the capacity factor of generators.

    CapacityFactor inherits from the PlotDataStoreAndProcessor class to
    assist in creating figures.
    """

    def __init__(
        self,
        Zones: List[str],
        Scenarios: List[str],
        AGG_BY: str,
        ordered_gen: List[str],
        marmot_solutions_folder: Path,
        gen_categories: GenCategories = GenCategories(),
        color_list: list = ColorList().colors,
        **kwargs,
    ):
        """
        Args:
            Zones (List[str]): List of regions/zones to plot.
            Scenarios (List[str]): List of scenarios to plot.
            AGG_BY (str): Informs region type to aggregate by when creating plots.
            ordered_gen (List[str]): Ordered list of generator technologies to plot,
                order defines the generator technology position in stacked bar and area plots.
            marmot_solutions_folder (Path): Directory containing Marmot solution outputs.
            gen_categories (GenCategories): Instance of GenCategories class, groups generator technologies
                into defined categories.
                Deafults to GenCategories.
            color_list (list, optional): List of colors to apply to non-gen plots.
                Defaults to ColorList().colors.
        """
        # Instantiation of PlotDataStoreAndProcessor
        super().__init__(AGG_BY, ordered_gen, marmot_solutions_folder, **kwargs)

        self.Zones = Zones
        self.Scenarios = Scenarios
        self.gen_categories = gen_categories
        self.color_list = color_list

[docs]    def avg_output_when_committed(
        self,
        start_date_range: str = None,
        end_date_range: str = None,
        scenario_groupby: str = "Scenario",
        **_,
    ):
        """Creates barplots of the percentage average generation output when committed
        by technology type.

        Each scenario is plotted by a different colored grouped bar.

        Args:
            start_date_range (str, optional): Defines a start date at which to represent
                data from.
                Defaults to None.
            end_date_range (str, optional): Defines a end date at which to represent data to.
                Defaults to None.
            scenario_groupby (str, optional): Specifies whether to group data by Scenario
                or Year-Sceanrio. If grouping by Year-Sceanrio the year will be identified
                from the timestamp and appeneded to the sceanrio name. This is useful when
                plotting data which covers multiple years such as ReEDS.
                Defaults to Scenario.

                .. versionadded:: 0.10.0

        Returns:
            dict: dictionary containing the created plot and its data table.
        """
        outputs: dict = {}

        # List of properties needed by the plot, properties are a set of tuples and
        # contain 3 parts: required True/False, property name and scenarios required,
        # scenarios must be a list.
        properties = [
            (True, "generator_Generation", self.Scenarios),
            (True, "generator_Installed_Capacity", self.Scenarios),
        ]

        # Runs get_formatted_data within PlotDataStoreAndProcessor to populate PlotDataStoreAndProcessor
        # dictionary with all required properties, returns a 1 if required data is missing
        check_input_data = self.get_formatted_data(properties)

        if 1 in check_input_data:
            return MissingInputData()

        for zone_input in self.Zones:
            cf_chunks = []
            logger.info(f"{self.AGG_BY} = {zone_input}")

            for scenario in self.Scenarios:
                logger.info(f"Scenario = {str(scenario)}")

                Gen: pd.DataFrame = self["generator_Generation"].get(scenario)
                try:
                    Gen = Gen.xs(zone_input, level=self.AGG_BY)
                except KeyError:
                    logger.warning(f"No data in {zone_input}")
                    continue
                Gen = Gen.reset_index()
                Gen = self.rename_gen_techs(Gen)
                Gen.tech = Gen.tech.astype("category")
                Gen.tech = Gen.tech.cat.set_categories(self.ordered_gen)
                Gen = Gen[Gen["tech"].isin(self.gen_categories.thermal)]
                Gen.set_index("timestamp", inplace=True)
                Gen = Gen.rename(columns={"values": "Output (MWh)"})

                Cap: pd.DataFrame = self["generator_Installed_Capacity"].get(scenario)
                Cap = Cap.xs(zone_input, level=self.AGG_BY)
                Cap = Cap.rename(columns={"values": "Installed Capacity (MW)"})

                if pd.notna(start_date_range):
                    Cap, Gen = set_timestamp_date_range(
                        [Cap, Gen], start_date_range, end_date_range
                    )
                    if Gen.empty is True:
                        logger.warning("No data in selected Date Range")
                        continue

                Gen["year"] = Gen.index.year.astype(str)
                Cap["year"] = Cap.index.get_level_values("timestamp").year.astype(str)
                Gen = Gen.reset_index()
                Gen = pd.merge(Gen, Cap, on=["gen_name", "year"])
                Gen.set_index("timestamp", inplace=True)

                if scenario_groupby == "Year-Scenario":
                    Gen["Scenario"] = Gen.index.year.astype(str) + f"_{scenario}"
                else:
                    Gen["Scenario"] = scenario

                year_scen = Gen["Scenario"].unique()
                for scen in year_scen:
                    Gen_scen = Gen.loc[Gen["Scenario"] == scen]
                    # Calculate CF individually for each plant,
                    # since we need to take out all zero rows.
                    tech_names = Gen_scen.sort_values(["tech"])["tech"].unique()
                    CF = pd.DataFrame(columns=tech_names, index=[scen])
                    for tech_name in tech_names:
                        stt = Gen_scen.loc[Gen_scen["tech"] == tech_name]
                        if not all(stt["Output (MWh)"] == 0):

                            gen_names = stt["gen_name"].unique()
                            cfs = []
                            caps = []
                            for gen in gen_names:
                                sgt = stt.loc[stt["gen_name"] == gen]
                                if not all(sgt["Output (MWh)"] == 0):
                                    # Calculates interval step to correct for MWh of generation
                                    time_delta = sgt.index[1] - sgt.index[0]
                                    duration = sgt.index[len(sgt) - 1] - sgt.index[0]
                                    duration = (
                                        duration + time_delta
                                    )  # Account for last timestep.
                                    # Finds intervals in 60 minute period
                                    interval_count = 60 / (
                                        time_delta / np.timedelta64(1, "m")
                                    )
                                    # Get length of time series in hours for CF calculation.
                                    duration_hours = min(
                                        8760, duration / np.timedelta64(1, "h")
                                    )
                                    # Remove time intervals when output is zero.
                                    sgt = sgt[sgt["Output (MWh)"] != 0]
                                    total_gen = (
                                        sgt["Output (MWh)"].sum() / interval_count
                                    )
                                    cap = sgt["Installed Capacity (MW)"].mean()
                                    # Calculate CF
                                    cf = total_gen / (cap * duration_hours)
                                    cfs.append(cf)
                                    caps.append(cap)

                            # Find average "CF" (average output when committed)
                            # for this technology, weighted by capacity.
                            cf = np.average(cfs, weights=caps)
                            CF[tech_name] = cf
                    cf_chunks.append(CF)

            if cf_chunks:
                CF_all_scenarios = pd.concat(cf_chunks)
            else:
                outputs[zone_input] = MissingZoneData()
                continue

            Data_Table_Out = CF_all_scenarios.T

            mplt = PlotLibrary()
            fig, ax = mplt.get_figure()

            mplt.barplot(
                CF_all_scenarios.T,
                color=self.color_list,
                custom_tick_labels=list(CF_all_scenarios.columns),
                ytick_major_fmt="percent",
            )

            ax.set_ylabel(
                "Average Output When Committed", color="black", rotation="vertical"
            )

            if plot_data_settings["plot_title_as_region"]:
                mplt.add_main_title(zone_input)
            # Add legend
            mplt.add_legend()

            outputs[zone_input] = {"fig": fig, "data_table": Data_Table_Out}
        return outputs

[docs]    def cf(
        self,
        start_date_range: str = None,
        end_date_range: str = None,
        scenario_groupby: str = "Scenario",
        **_,
    ):
        """Creates barplots of generator capacity factors by technology type.

        Each scenario is plotted by a different colored grouped bar.

        Args:
            start_date_range (str, optional): Defines a start date at which to represent
                data from.
                Defaults to None.
            end_date_range (str, optional): Defines a end date at which to represent data to.
                Defaults to None.
            scenario_groupby (str, optional): Specifies whether to group data by Scenario
                or Year-Sceanrio. If grouping by Year-Sceanrio the year will be identified
                from the timestamp and appeneded to the sceanrio name. This is useful when
                plotting data which covers multiple years such as ReEDS.
                Defaults to Scenario.

                .. versionadded:: 0.10.0

        Returns:
            dict: dictionary containing the created plot and its data table.
        """

        outputs: dict = {}

        # List of properties needed by the plot, properties are a set of tuples and
        # contain 3 parts: required True/False, property name and scenarios required,
        # scenarios must be a list.
        properties = [
            (True, "generator_Generation", self.Scenarios),
            (True, "generator_Installed_Capacity", self.Scenarios),
        ]

        # Runs get_formatted_data within PlotDataStoreAndProcessor to populate PlotDataStoreAndProcessor
        # dictionary with all required properties, returns a 1 if required data is missing
        check_input_data = self.get_formatted_data(properties)

        if 1 in check_input_data:
            return MissingInputData()

        for zone_input in self.Zones:
            cf_scen_chunks = []
            logger.info(f"{self.AGG_BY} = {zone_input}")

            for scenario in self.Scenarios:

                logger.info(f"Scenario = {str(scenario)}")
                Gen = self["generator_Generation"].get(scenario)
                try:  # Check for regions missing all generation.
                    Gen = Gen.xs(zone_input, level=self.AGG_BY)
                except KeyError:
                    logger.warning(f"No data in {zone_input}")
                    continue
                Gen = self.df_process_gen_inputs(Gen)

                Cap = self["generator_Installed_Capacity"].get(scenario)
                Cap = Cap.xs(zone_input, level=self.AGG_BY)
                Cap = self.df_process_gen_inputs(Cap)

                if pd.notna(start_date_range):
                    Cap, Gen = set_timestamp_date_range(
                        [Cap, Gen], start_date_range, end_date_range
                    )
                    if Gen.empty is True:
                        logger.warning("No data in selected Date Range")
                        continue

                # Calculates interval step to correct for MWh of generation
                time_delta = Gen.index[1] - Gen.index[0]
                duration = Gen.index[len(Gen) - 1] - Gen.index[0]
                duration = duration + time_delta  # Account for last timestep.
                # Finds intervals in 60 minute period
                interval_count: int = 60 / (time_delta / np.timedelta64(1, "m"))
                # Get length of time series in hours for CF calculation.
                duration_hours: int = min(8760, duration / np.timedelta64(1, "h"))

                Gen = Gen / interval_count

                Total_Gen = self.year_scenario_grouper(
                    Gen, scenario, groupby=scenario_groupby
                ).sum()
                Cap = self.year_scenario_grouper(
                    Cap, scenario, groupby=scenario_groupby
                ).sum()
                # Calculate CF
                CF = Total_Gen / (Cap * duration_hours)
                cf_scen_chunks.append(CF)

            if cf_scen_chunks:
                CF_all_scenarios = pd.concat(cf_scen_chunks, axis=0, sort=False).T
                CF_all_scenarios = CF_all_scenarios.fillna(0, axis=0)
            else:
                outputs[zone_input] = MissingZoneData()
                continue

            Data_Table_Out = CF_all_scenarios.T

            mplt = PlotLibrary(figsize=(xdimension * 1.5, ydimension * 1.5))
            fig, ax = mplt.get_figure()

            mplt.barplot(
                CF_all_scenarios, color=self.color_list, ytick_major_fmt="percent"
            )

            ax.set_ylabel("Capacity Factor", color="black", rotation="vertical")
            # Add legend
            mplt.add_legend()
            # Add title
            if plot_data_settings["plot_title_as_region"]:
                mplt.add_main_title(zone_input)
            outputs[zone_input] = {"fig": fig, "data_table": Data_Table_Out}

        return outputs

[docs]    def time_at_min_gen(
        self,
        start_date_range: str = None,
        end_date_range: str = None,
        scenario_groupby: str = "Scenario",
        **_,
    ):
        """Creates barplots of generator percentage time at min-gen by technology type.

        Each scenario is plotted by a different colored grouped bar.

        Args:
            start_date_range (str, optional): Defines a start date at which to represent
                data from.
                Defaults to None.
            end_date_range (str, optional): Defines a end date at which to represent data to.
                Defaults to None.
            scenario_groupby (str, optional): Specifies whether to group data by Scenario
                or Year-Sceanrio. If grouping by Year-Sceanrio the year will be identified
                from the timestamp and appeneded to the sceanrio name. This is useful when
                plotting data which covers multiple years such as ReEDS.
                Defaults to Scenario.

                .. versionadded:: 0.10.0

        Returns:
            dict: dictionary containing the created plot and its data table.
        """

        outputs: dict = {}

        # List of properties needed by the plot, properties are a set of tuples and
        # contain 3 parts: required True/False, property name and scenarios required,
        # scenarios must be a list.
        properties = [
            (True, "generator_Generation", self.Scenarios),
            (True, "generator_Installed_Capacity", self.Scenarios),
            (True, "generator_Hours_at_Minimum", self.Scenarios),
        ]

        # Runs get_formatted_data within PlotDataStoreAndProcessor to populate PlotDataStoreAndProcessor dictionary
        # with all required properties, returns a 1 if required data is missing
        check_input_data = self.get_formatted_data(properties)

        if 1 in check_input_data:
            return MissingInputData()

        for zone_input in self.Zones:
            logger.info(f"{self.AGG_BY} = {zone_input}")

            time_at_min = pd.DataFrame()

            for scenario in self.Scenarios:
                logger.info(f"Scenario = {str(scenario)}")

                Min = self["generator_Hours_at_Minimum"].get(scenario)
                try:
                    Min = Min.xs(zone_input, level=self.AGG_BY)
                except KeyError:
                    continue
                Gen = self["generator_Generation"].get(scenario)
                try:  # Check for regions missing all generation.
                    Gen = Gen.xs(zone_input, level=self.AGG_BY)
                except KeyError:
                    logger.warning(f"No data in {zone_input}")
                    continue
                Cap = self["generator_Installed_Capacity"].get(scenario)
                Cap = Cap.xs(zone_input, level=self.AGG_BY)

                if pd.notna(start_date_range):
                    Min, Gen, Cap = set_timestamp_date_range(
                        [Min, Gen, Cap], start_date_range, end_date_range
                    )
                    if Gen.empty is True:
                        logger.warning("No data in selected Date Range")
                        continue

                Min = Min.reset_index()
                Min = Min.set_index("gen_name")
                Min = Min.rename(columns={"values": "Hours at Minimum"})

                Gen = Gen.reset_index()
                Gen.tech = Gen.tech.astype("category")
                Gen.tech = Gen.tech.cat.set_categories(self.ordered_gen)
                Gen = Gen.rename(columns={"values": "Output (MWh)"})
                Gen = Gen[~Gen["tech"].isin(self.gen_categories.vre)]
                Gen.index = Gen.timestamp

                Caps = Cap.groupby("gen_name").mean()
                Caps.reset_index()
                Caps = Caps.rename(columns={"values": "Installed Capacity (MW)"})
                Min = pd.merge(Min, Caps, on="gen_name")

                # Find how many hours each generator was operating, for the denominator of the % time at min gen.
                # So remove all zero rows.
                Gen = Gen.loc[Gen["Output (MWh)"] != 0]
                online_gens = Gen.gen_name.unique()
                Min = Min.loc[online_gens]
                Min["hours_online"] = Gen.groupby("gen_name")["Output (MWh)"].count()
                Min["fraction_at_min"] = Min["Hours at Minimum"] / Min.hours_online

                tech_names = Min.tech.unique()
                time_at_min_individ = pd.DataFrame(columns=tech_names, index=[scenario])
                for tech_name in tech_names:
                    stt = Min.loc[Min["tech"] == tech_name]
                    wgts = stt["Installed Capacity (MW)"]
                    if wgts.sum() == 0:
                        wgts = pd.Series([1] * len(stt))
                    output = np.average(stt.fraction_at_min, weights=wgts)
                    time_at_min_individ[tech_name] = output

                time_at_min = time_at_min.append(time_at_min_individ)

            if time_at_min.empty == True:
                outputs[zone_input] = MissingZoneData()
                continue

            Data_Table_Out = time_at_min.T

            mplt = PlotLibrary(figsize=(xdimension * 1.5, ydimension * 1.5))
            fig, ax = mplt.get_figure()

            mplt.barplot(
                time_at_min.T,
                color=self.color_list,
                custom_tick_labels=list(time_at_min.columns),
                ytick_major_fmt="percent",
            )

            ax.set_ylabel(
                "Percentage of time online at minimum generation",
                color="black",
                rotation="vertical",
            )
            # Add legend
            mplt.add_legend()
            # Add title
            if plot_data_settings["plot_title_as_region"]:
                mplt.add_main_title(zone_input)

            outputs[zone_input] = {"fig": fig, "data_table": Data_Table_Out}
        return outputs