Source code for r2x_core.store

"""Data Storage for managing R2X data files and their metadata."""

import json
from collections.abc import Iterable
from pathlib import Path
from typing import TYPE_CHECKING, Any

from loguru import logger

from .datafile import DataFile
from .reader import DataReader
from .utils import filter_valid_kwargs

if TYPE_CHECKING:
    from .plugin_config import PluginConfig


[docs] class DataStore: """Container for managing data file mappings and loading data. The DataStore class provides a centralized interface for managing collections of data files, their metadata, and coordinating data loading operations. It maintains a registry of DataFile instances and delegates actual file reading operations to a DataReader instance. Parameters ---------- folder : str or Path, optional Base directory containing the data files. If None, uses current working directory. reader : DataReader, optional Custom data reader instance for handling file I/O operations. If None, creates a default DataReader instance. Attributes ---------- folder : Path Resolved absolute path to the base data directory. reader : DataReader Data reader instance used for file operations. Examples -------- Create a basic data store: >>> store = DataStore(folder="/path/to/data") >>> data_file = DataFile(name="generators", fpath="gen_data.csv") >>> store.add_data_file(data_file) >>> data = store.read_data_file("generators") Load from JSON configuration: >>> store = DataStore.from_json("config.json", folder="/path/to/data") >>> files = store.list_data_files() >>> print(files) ['generators', 'transmission', 'load'] Batch operations: >>> files = [DataFile(name="gen", fpath="gen.csv"), DataFile(name="load", fpath="load.csv")] >>> store.add_data_files(files) >>> store.remove_data_files(["gen", "load"]) See Also -------- DataFile : Data file metadata and configuration DataReader : File reading and processing operations Notes ----- The DataStore maintains DataFile metadata in memory but delegates actual file reading to the DataReader, which may implement its own caching strategies. The store itself does not cache file contents, only the DataFile configurations. """ def __init__(self, folder: str | Path | None = None, reader: DataReader | None = None) -> None: """Initialize the DataStore. Parameters ---------- folder : str | Path | None, optional Base directory containing the data files. If None, uses current working directory. Default is None. reader : DataReader | None, optional Custom data reader instance for handling file I/O operations. If None, creates a default DataReader instance. Default is None. Raises ------ FileNotFoundError If the specified folder does not exist. """ if folder is None: folder = Path.cwd() folder_path = Path(folder) if not folder_path.exists(): raise FileNotFoundError(f"Folder does not exist: {folder_path}") self._reader = reader or DataReader() self.folder = folder_path.resolve() self._cache: dict[str, DataFile] = {} logger.debug("Initialized DataStore with folder: {}", self.folder) def __contains__(self, name: str) -> bool: """Check if a data file exists in the store. Parameters ---------- name : str Name of the data file to check for. Returns ------- bool True if the data file exists in the store, False otherwise. Examples -------- >>> store = DataStore("/path/to/data") >>> data_file = DataFile(name="generators", fpath="gen.csv") >>> store.add_data_file(data_file) >>> "generators" in store True >>> "missing_file" in store False Notes ----- This method enables the use of the 'in' operator with DataStore instances, providing a convenient way to check for data file existence without raising exceptions. """ return name in self._cache
[docs] @classmethod def from_plugin_config(cls, config: "PluginConfig", folder: Path | str) -> "DataStore": """Create a DataStore instance from a PluginConfig. This is a convenience constructor that automatically discovers and loads the file mapping JSON associated with a plugin configuration class. Parameters ---------- config : PluginConfig Plugin configuration instance. The file mapping path will be discovered from the config class using get_file_mapping_path(). folder : Path or str Base directory containing the data files referenced in the configuration. Returns ------- DataStore A new DataStore instance populated with DataFile configurations from the plugin's file mapping. Raises ------ FileNotFoundError If the configuration file does not exist or if data files are missing. TypeError If the JSON file does not contain a valid array structure. ValidationError If any DataFile configuration in the JSON is invalid. Examples -------- Simple usage: >>> from r2x_reeds.config import ReEDSConfig >>> config = ReEDSConfig(solve_year=2030, weather_year=2012) >>> store = DataStore.from_plugin_config(config, folder="/data/reeds") >>> store.list_data_files() ['generators', 'buses', 'transmission'] See Also -------- from_json : Create from an explicit JSON file path PluginConfig.get_file_mapping_path : Get the file mapping path Notes ----- This method provides a cleaner API than manually calling config.get_file_mapping_path() and then DataStore.from_json(). It's the recommended way to create a DataStore for plugin-based workflows. """ mapping_path = config.__class__.get_file_mapping_path() logger.info("Loading DataStore from plugin config: {}", config.__class__.__name__) logger.debug("File mapping path: {}", mapping_path) return cls.from_json(mapping_path, folder)
[docs] @classmethod def from_json(cls, fpath: Path | str, folder: Path | str) -> "DataStore": """Create a DataStore instance from a JSON configuration file. Parameters ---------- fpath : Path or str Path to the JSON configuration file containing DataFile specifications. folder : Path or str Base directory containing the data files referenced in the configuration. Returns ------- DataStore A new DataStore instance populated with DataFile configurations from the JSON file. Raises ------ FileNotFoundError If the configuration file does not exist. TypeError If the JSON file does not contain a valid array structure. ValidationError If any DataFile configuration in the JSON is invalid (raised by Pydantic during DataFile creation). KeyError If any data file names are duplicated (raised during add_data_files). Examples -------- Create a JSON configuration file: >>> config = [ ... {"name": "generators", "fpath": "gen_data.csv", "description": "Generator capacity data"}, ... {"name": "load", "fpath": "load_data.csv", "description": "Load profiles"}, ... ] >>> import json >>> with open("config.json", "w") as f: ... json.dump(config, f) Load the DataStore: >>> store = DataStore.from_json("config.json", "/path/to/data") >>> store.list_data_files() ['generators', 'load'] See Also -------- to_json : Save DataStore configuration to JSON DataFile : Individual data file configuration structure Notes ----- The JSON file must contain an array of objects, where each object represents a valid DataFile configuration with at minimum 'name' and 'fpath' fields. """ fpath = Path(fpath) if not fpath.exists(): raise FileNotFoundError(f"Configuration file not found: {fpath}") with open(fpath, encoding="utf-8") as f: data_files_json = json.load(f) if not isinstance(data_files_json, list): msg = f"JSON file `{fpath}` is not a JSON array." raise TypeError(msg) store = cls(folder=folder) # Try first to check if the file exists in the folder pass. In the # future we could potentially add arbitrary files files_not_found = [] for file_data in data_files_json: updated_fpath = Path(folder) / file_data["fpath"] if not updated_fpath.exists(): logger.warning("File {} not found on: {}", file_data["name"], updated_fpath) files_not_found.append(file_data["name"]) continue file_data["fpath"] = updated_fpath if files_not_found: msg = f"The following files {files_not_found} were not found in the specified {folder=}." raise FileNotFoundError(msg) data_files = [DataFile(**file_data) for file_data in data_files_json] store.add_data_files(data_files) logger.info("Loaded {} data files from {}", len(data_files), fpath) return store
[docs] def add_data_file(self, data_file: DataFile, overwrite: bool = False) -> None: """Add a single data file to the store. Parameters ---------- data_file : DataFile The data file configuration to add to the store. overwrite : bool, optional Whether to overwrite an existing file with the same name. Default is False. Raises ------ KeyError If a file with the same name already exists and overwrite is False. Examples -------- >>> store = DataStore("/path/to/data") >>> data_file = DataFile(name="generators", fpath="gen_data.csv") >>> store.add_data_file(data_file) >>> # Overwrite existing file >>> new_data_file = DataFile(name="generators", fpath="new_gen_data.csv") >>> store.add_data_file(new_data_file, overwrite=True) See Also -------- add_data_files : Add multiple data files at once remove_data_file : Remove a data file from the store """ if data_file.name in self._cache and not overwrite: msg = f"Data file '{data_file.name}' already exists. " msg += "Use overwrite=True to replace it." raise KeyError(msg) self._cache[data_file.name] = data_file logger.debug("Added data file '{}' to store", data_file.name)
[docs] def add_data_files(self, data_files: Iterable[DataFile], overwrite: bool = False) -> None: """Add multiple data files to the store. Parameters ---------- data_files : Iterable[DataFile] Collection of data file configurations to add to the store. overwrite : bool, optional Whether to overwrite existing files with the same names. Default is False. Raises ------ KeyError If any file with the same name already exists and overwrite is False. Examples -------- >>> store = DataStore("/path/to/data") >>> files = [ ... DataFile(name="generators", fpath="gen.csv"), ... DataFile(name="transmission", fpath="trans.csv"), ... DataFile(name="load", fpath="load.csv") ... ] >>> store.add_data_files(files) See Also -------- add_data_file : Add a single data file remove_data_files : Remove multiple data files """ for data_file in data_files: self.add_data_file(data_file, overwrite=overwrite)
[docs] def remove_data_file(self, name: str) -> None: """Remove a data file from the store. Parameters ---------- name : str Name of the data file to remove. Raises ------ KeyError If the specified file name is not present in the store. Examples -------- >>> store = DataStore("/path/to/data") >>> data_file = DataFile(name="generators", fpath="gen.csv") >>> store.add_data_file(data_file) >>> store.remove_data_file("generators") See Also -------- remove_data_files : Remove multiple data files at once add_data_file : Add a data file to the store """ if name not in self._cache: raise KeyError(f"Data file '{name}' not found in store.") del self._cache[name] logger.debug("Removed data file '{}' from store", name)
[docs] def remove_data_files(self, names: Iterable[str]) -> None: """Remove multiple data files from the store. Parameters ---------- names : Iterable[str] Collection of data file names to remove. Raises ------ KeyError If any specified file name is not present in the store. Examples -------- >>> store = DataStore("/path/to/data") >>> files = [ ... DataFile(name="gen", fpath="gen.csv"), ... DataFile(name="load", fpath="load.csv") ... ] >>> store.add_data_files(files) >>> store.remove_data_files(["gen", "load"]) See Also -------- remove_data_file : Remove a single data file add_data_files : Add multiple data files """ for name in names: self.remove_data_file(name)
[docs] def get_data_file_by_name(self, name: str) -> DataFile: """Retrieve a data file configuration by name. Parameters ---------- name : str Name of the data file to retrieve. Returns ------- DataFile The data file configuration object. Raises ------ KeyError If the specified file name is not present in the store. Examples -------- >>> store = DataStore("/path/to/data") >>> data_file = store.get_data_file_by_name("generators") >>> print(data_file.fpath) generators.csv See Also -------- list_data_files : Get all data file names read_data_file : Load the actual file contents """ if name not in self._cache: available_files = list(self._cache.keys()) raise KeyError(f"'{name}' not present in store. Available files: {available_files}") return self._cache[name]
[docs] def list_data_files(self) -> list[str]: """List all data file names in the store. Returns ------- list[str] Sorted list of all data file names present in the store. Examples -------- >>> store = DataStore("/path/to/data") >>> files = [ ... DataFile(name="generators", fpath="gen.csv"), ... DataFile(name="load", fpath="load.csv") ... ] >>> store.add_data_files(files) >>> store.list_data_files() ['generators', 'load'] See Also -------- get_data_file_by_name : Get a specific data file configuration __contains__ : Check if a specific file exists """ return sorted(self._cache.keys())
[docs] def read_data_file(self, /, *, name: str, use_cache: bool = True) -> Any: """Load data from a file using the configured reader. Parameters ---------- name : str Name of the data file to load. use_cache : bool, optional Whether to use cached data if available. Default is True. Returns ------- Any The loaded data, type depends on file type and reader configuration. Raises ------ KeyError If the specified file name is not present in the store. FileNotFoundError If the file does not exist and is not marked as optional. Examples -------- >>> store = DataStore("/path/to/data") >>> data_file = DataFile(name="generators", fpath="gen.csv") >>> store.add_data_file(data_file) >>> data = store.read_data_file("generators") See Also -------- get_data_file_by_name : Get the file configuration clear_cache : Clear the reader's cache """ if name not in self: raise KeyError(f"'{name}' not present in store.") data_file = self._cache[name] return self.reader.read_data_file(self.folder, data_file, use_cache=use_cache)
[docs] def clear_cache(self) -> None: """Clear both the data reader's cache and the data store's file configurations. This method clears the underlying DataReader's cache of loaded file contents and also removes all data file configurations from the DataStore. Examples -------- >>> store = DataStore("/path/to/data") >>> # Load some data files... >>> store.clear_cache() # Clear cached file contents and configurations See Also -------- reader : Access the underlying DataReader instance """ self.reader.clear_cache() self._cache.clear() logger.debug("Cleared data reader cache and data store configurations")
[docs] def to_json(self, fpath: str | Path, **model_dump_kwargs: dict[str, Any]) -> None: """Save the DataStore configuration to a JSON file. Parameters ---------- fpath : str or Path Path where the JSON configuration file will be saved. **model_dump_kwargs : dict[str, Any] Additional keyword arguments passed to the DataFile.model_dump method for controlling serialization behavior. Examples -------- >>> store = DataStore("/path/to/data") >>> files = [DataFile(name="generators", fpath="gen.csv"), DataFile(name="load", fpath="load.csv")] >>> store.add_data_files(files) >>> store.to_json("config.json") >>> # Save with custom serialization options >>> store.to_json("config.json", exclude_none=True) See Also -------- from_json : Load DataStore configuration from JSON DataFile.model_dump : Individual file serialization method Notes ----- The resulting JSON file will contain an array of DataFile configurations that can be loaded back using the `from_json` class method. """ json_data = [ data_file.model_dump( mode="json", round_trip=True, **filter_valid_kwargs(data_file.model_dump, model_dump_kwargs), ) for data_file in self._cache.values() ] with open(fpath, "w", encoding="utf-8") as f: json.dump(json_data, f, indent=2, ensure_ascii=False) logger.info("Created JSON file at {}", fpath)
@property def reader(self) -> DataReader: """Get the data reader instance. Returns ------- DataReader The configured data reader instance. Examples -------- >>> store = DataStore("/path/to/data") >>> reader = store.reader >>> reader.clear_cache() """ return self._reader @reader.setter def reader(self, reader: DataReader) -> None: """Set a new data reader instance. Parameters ---------- reader : DataReader New data reader instance to use. Raises ------ TypeError If reader is not a valid DataReader instance. """ if not isinstance(reader, DataReader): raise TypeError("reader must be a valid DataReader instance.") self._reader = reader