Source code for marmot.plottingmodules.plotutils.timeseries_modifiers
"""Collection of functions which modify or infer information from timeseries
@author: Daniel Levie
"""
import datetime as dt
import logging
from typing import List, Tuple, Union
import numpy as np
import pandas as pd
logger = logging.getLogger("plotter." + __name__)
[docs]def set_timestamp_date_range(
dfs: Union[pd.DataFrame, List[pd.DataFrame]], start_date: str, end_date: str
) -> Union[pd.DataFrame, Tuple[pd.DataFrame, ...]]:
"""Sets the timestamp date range based on start_date and end_date strings
Takes either a single df or a list of dfs as input.
The index must be a pd.DatetimeIndex or a multiindex with level timestamp.
Args:
dfs (Union[pd.DataFrame, List[pd.DataFrame]]): df(s) to set date range for
start_date (str): start date
end_date (str): end date
Raises:
ValueError: If df.index is not of type type pd.DatetimeIndex or
type pd.MultiIndex with level timestamp.
Returns:
pd.DataFrame or Tuple[pd.DataFrame]: adjusted dataframes
"""
logger.info(
f"Plotting specific date range: \
{str(start_date)} to {str(end_date)}"
)
df_list = []
if isinstance(dfs, list):
for df in dfs:
if isinstance(df.index, pd.DatetimeIndex):
df = df.loc[start_date:end_date]
elif isinstance(df.index, pd.MultiIndex):
df = df.xs(
slice(start_date, end_date), level="timestamp", drop_level=False
)
else:
raise ValueError(
"'df.index' must be of type pd.DatetimeIndex or "
"type pd.MultiIndex with level 'timestamp'"
)
df_list.append(df)
return tuple(df_list)
else:
if isinstance(dfs.index, pd.DatetimeIndex):
df = dfs.loc[start_date:end_date]
elif isinstance(dfs.index, pd.MultiIndex):
df = dfs.xs(
slice(start_date, end_date), level="timestamp", drop_level=False
)
else:
raise ValueError(
"'df.index' must be of type pd.DatetimeIndex or "
"type pd.MultiIndex with level 'timestamp'"
)
return df
[docs]def get_sub_hour_interval_count(df: pd.DataFrame) -> int:
"""Detects the interval spacing of timeseries data.
Used to adjust sums of certain variables for sub-hourly data.
Args:
df (pd.DataFrame): pandas dataframe with timestamp in index.
Returns:
int: Number of intervals per 60 minutes.
"""
timestamps = df.index.get_level_values("timestamp").unique()
time_delta = timestamps[1] - timestamps[0]
# Finds intervals in 60 minute period
intervals_per_hour = 60 / (time_delta / np.timedelta64(1, "m"))
# If intervals are greater than 1 hour, returns 1
return max(1, intervals_per_hour)
[docs]def adjust_for_leapday(self, df: pd.DataFrame) -> pd.DataFrame:
"""Shifts dataframe ahead by one day.
Use if a non-leap year time series is modeled with a leap year time index.
Modeled year must be included in the scenario parent directory name.
Args:
df (pd.DataFrame): Dataframe to process.
Returns:
pd.DataFrame: Same dataframe, with time index shifted.
"""
if (
"2008" not in self.processed_hdf5_folder
and "2012" not in self.processed_hdf5_folder
and df.index.get_level_values("timestamp")[0] > dt.datetime(2024, 2, 28, 0, 0)
):
df.index = df.index.set_levels(
df.index.levels[df.index.names.index("timestamp")].shift(1, freq="D"),
level="timestamp",
)
return df
[docs]def sort_duration(df: pd.DataFrame, col: str) -> pd.DataFrame:
"""Converts a dataframe time series into a duration curve.
Args:
df (pd.DataFrame): pandas multiindex dataframe.
col (str): Column name by which to sort.
Returns:
pd.DataFrame: Dataframe with values sorted from largest to smallest.
"""
sorted_duration = (
df.sort_values(by=col, ascending=False)
.reset_index()
.drop(columns=["timestamp"])
)
return sorted_duration