Source code for marmot.utils.dataio

"""Module containing all data import and export functions 

Handles the reading and writing of data to h5 files and csv files

@author: Daniel Levie
"""

import logging
from pathlib import Path

import pandas as pd


[docs]def save_to_h5( df: pd.DataFrame, file_name: Path, key: str, mode: str = "a", complevel: int = 9, complib: str = "blosc:zlib", **kwargs, ) -> None: """Saves data to formatted hdf5 file Args: df (pd.DataFrame): Dataframe to save file_name (Path): name of hdf5 file key (str): formatted property identifier, e.g generator_Generation mode (str, optional): file access mode. Defaults to "a". complevel (int, optional): compression level. Defaults to 9. complib (str, optional): compression library. Defaults to 'blosc:zlib'. **kwargs These parameters will be passed pandas.to_hdf function. """ logger = logging.getLogger("formatter." + __name__) logger.info("Saving data to h5 file...") df.to_hdf( file_name, key=key, mode=mode, complevel=complevel, complib=complib, **kwargs, ) logger.info("Data saved to h5 file successfully\n")
[docs]def write_metadata_to_h5( df: pd.DataFrame, file_name: Path, key: str, partition: str = "model_metadata", mode: str = "a", **kwargs, ) -> None: """Save metadata to formatted h5 file. Args: df (pd.DataFrame): Dataframe to save file_name (Path): name of hdf5 file key (str): metadata key, e.g objects/generators partition (str, optional): metadata partition. Defaults to "model_metadata". mode (str, optional): file access mode. Defaults to "a". **kwargs These parameters will be passed pandas.to_hdf function. """ df.to_hdf( file_name, key=f"metadata/{partition}/{key}", mode=mode, **kwargs, )
[docs]def read_processed_h5file( processed_hdf5_folder: Path, plx_prop_name: str, scenario: str ) -> pd.DataFrame: """Reads Data from processed h5file. Args: processed_hdf5_folder (Path): Directory containing Marmot h5 solution files. plx_prop_name (str): Name of property, e.g generator_Generation scenario (str): Name of scenario. Returns: pd.DataFrame: Requested dataframe. """ logger = logging.getLogger("plotter." + __name__) try: with pd.HDFStore( processed_hdf5_folder.joinpath(f"{scenario}_formatted.h5"), "r" ) as file: return file[plx_prop_name] except KeyError: return pd.DataFrame()
[docs]def read_csv_property_file( csv_property_folder: Path, plx_prop_name: str, scenario: str ) -> pd.DataFrame: """Read formatted data from csv file. Allows data to be read in from a csv if it is missing from the formatted h5 file. Format of data must adhere to the standard Marmot formats for each data class, e.g generator, line etc. Filename should be of the following pattern: - {scenario}_{plx_prop_name}.csv An example of a line_Net_Import: - Base DA_line_Net_Import.csv The Marmot formatter will not create these files, they must be created manually. Args: csv_property_folder (Path): Directory containing csv property files. plx_prop_name (str): Name of property, e.g generator_Generation scenario (str): Name of scenario. Returns: pd.DataFrame: Requested dataframe or empty dataframe if file not found. """ logger = logging.getLogger("plotter." + __name__) try: df = pd.read_csv( csv_property_folder.joinpath(f"{scenario}_{plx_prop_name}.csv"), index_col=False, ) df.timestamp = pd.to_datetime(df.timestamp) df_cols = list(df.columns) df_cols.pop(df_cols.index("values")) df = df.set_index(df_cols) return df except FileNotFoundError: logger.warning( f"{scenario}_{plx_prop_name}.csv was not found in " f"{csv_property_folder}. Data is MISSING." ) return pd.DataFrame()