Source code for tyche.IO

"""
I/O utilities for Tyche.
"""

import os     as os
import importlib as il

from inspect import getmembers, isfunction
from numpy import arange

from .DataManager import DesignsDataset, FunctionsDataset, IndicesDataset, InvestmentsDataset, ParametersDataset, ResultsDataset, TranchesDataset


[docs]def check_tables( path, name ): """ Perform validity checks on input datasets. All checks are run before this method terminates; that is, data errors are found all at once rather than one at a time from several calls to this method. A list of errors found is printed if any check fails. The errors include a summary of the check and identify the dataset that needs to be changed. Parameters ---------- path:str Path to directory of datasets name:str Name of datasets file (XLSX) Returns ------- Boolean: True if data is valid, False otherwise """ check_list = [] # Get the datasets as distinct DataFrames. # The DataManager performs column name checks and enforces data types. indices = IndicesDataset( os.path.join(path, name)) functions = FunctionsDataset( os.path.join(path, name)) designs = DesignsDataset( os.path.join(path, name)) parameters = ParametersDataset( os.path.join(path, name)) results = ResultsDataset( os.path.join(path, name)) tranches = TranchesDataset( os.path.join(path, name)) investments = InvestmentsDataset(os.path.join(path, name)) # Cross-check: Identical sets of Technology across designs, indices, # parameters, and results datasets _odd_tech_set = set(designs.index.get_level_values('Technology') ).symmetric_difference( set(indices.index.get_level_values('Technology')) ).symmetric_difference( set(parameters.index.get_level_values('Technology')) ).symmetric_difference( set(results.index.get_level_values('Technology')) ) # If there are any technologies that DON'T appear in all four # datasets, add an error message to the check_list. if len(_odd_tech_set) != 0: check_list.append( ('Data Validation: Technology names are inconsistent. Check in ' f'designs, indices, parameters, and results.\n{_odd_tech_set}\n') ) # Cross-check: Lifetime-Index set in designs dataset must equal the # Capital-Index set in indices dataset # The set of levels in the Index index level that have the Variable # index level Lifetime _des_idx = designs.index.to_frame() _ind_idx = indices.index.to_frame() _odd_cap_set = set( _des_idx.Index[_des_idx.Variable == 'Lifetime'] ).symmetric_difference( set(_ind_idx.Index[_ind_idx.Type == 'Capital']) ) if len(_odd_cap_set) != 0: check_list.append( ('Data Validation: Capital types are inconsistent. Check in designs' f' and indices.\n{_odd_cap_set}\n') ) # Cross-check: Category-Tranche combinations in investments must be a subset of # the Category-Tranche combinations in tranches _inv_idx = investments.index.to_frame() _tra_idx = tranches.index.to_frame() _odd_cattra_set = set( [i + '-' + j for i, j in _inv_idx[['Category','Tranche']].values] ).difference( set([i + '-' + j for i, j in _tra_idx[['Category','Tranche']].values]) ) if len(_odd_cattra_set) != 0: check_list.append( ('Data Validation: Category-Tranche combinations are inconsistent. Check ' f'in investments and tranches.\n{_odd_cattra_set}\n') ) # Cross-check: Technology-Scenario values in designs must be exactly the # set of Technology-Scenario values in parameters _par_idx = parameters.index.to_frame() _odd_tecsce_set = set( [i + '-' + j for i, j in _des_idx[['Technology','Scenario']].values] ).symmetric_difference( set([i + '-' + j for i, j in _par_idx[['Technology','Scenario']].values]) ) if len(_odd_tecsce_set) != 0: check_list.append( ('Data Validation: Technology-Scenario combinations are inconsistent. ' f'Check in designs and parameters.\n{_odd_tecsce_set}\n') ) # Designs check: Variable index levels are exactly Input, Input efficiency, # Input price, Lifetime, Output efficiency, Output price, Scale # Check if something's in designs Variable index levels that shouldn't be _des_var_set = set( i for i in designs.index.get_level_values('Variable') ).difference( set( ['Input', 'Input efficiency', 'Input price', 'Lifetime', 'Output efficiency', 'Output price', 'Scale'] ) ) if len(_des_var_set) != 0: check_list.append( ('Data Validation: Variable column in designs has unexpected ' f'value(s).\n{_des_var_set}\n') ) # Designs check: Every Technology-Scenario combination must have # the same Index levels within each mandatory Variable # Designs check: Every Technology-Scenario combination must have # all mandatory Variables # Get a list of all Technology-Scenario combinations in Designs _des_tecsce = list(set([i[:2] for i in designs.index.values])) # Get the set (no duplicates) of all Variable-Value combinations across # all Tech-Sce combinations _var_val_set = set([i[2:] for i in designs.index.values]) for _j in _des_tecsce: _des_tecsce_var_set = set([i[2] for i in designs.index.values if i[:2] == _j]) _des_tecsce_varval_set = set([i[2:] for i in designs.index.values if i[:2] == _j]) # Check if the Tech-Scen combo is missing any mandatory Variables _odd_des_tecsce_var_set = set( ['Input', 'Input efficiency', 'Input price', 'Lifetime', 'Output efficiency', 'Output price', 'Scale'] ).difference( _des_tecsce_var_set ) # Check if the Tech-Scen combo is missing any Variable Indexes _odd_des_tecsce_varval_set = _var_val_set.difference( _des_tecsce_varval_set ) if len(_odd_des_tecsce_var_set) != 0: check_list.append( (f'Data Validation: Technology-Scenario combination {_j} has ' f'missing mandatory Variables. Check in designs.\n{_odd_des_tecsce_var_set}\n') ) if len(_odd_des_tecsce_varval_set) != 0: check_list.append( (f'Data Validation: Technology-Scenario combination {_j} has' f' missing Variable Indexes. Check in designs.\n{_odd_des_tecsce_varval_set}\n') ) # Functions check: All unique entries under Model must be a .py file containing the # methods defined in the Capital, Fixed, Production, and Metrics columns # For every technology model, for _tech, _meta in functions.iterrows(): # First check that the model exists as a .py file in the correct location if os.path.exists('../' + _meta['Model'] + '.py'): # If the file does exist, use a try/except structure to attempt import try: _model = il.import_module("." + _meta["Model"], package="technology") except ImportError: check_list.append( (f'Data Validation: Technology model {_tech} is not importable.\n') ) # If the model imported successfully, compare the set of methods # within the model to the set of methods named in the Functions dataset # The set of methods within the model must contain all elements of the set of # methods named in the Functions dataset, BUT can also contain additional methods _odd_model_funs = set(_meta[2:-1].values).difference( set( [f[0] for f in getmembers(_model, isfunction)] ) ) # If the two sets of method names don't match, append to check_list if len(_odd_model_funs) != 0: check_list.append( (f'Data Validation: Technology model {_tech} has inconsistent methods. ' f'Revise the Functions dataset or the {_tech} model (.py).\n{_odd_model_funs}\n') ) # If the file does not exist, add the missing file to check_list and exit the loop else: check_list.append( (f'Data Validation: Technology model {_tech} (.py) does not exist in ' 'the technology directory.\n') ) # Indices check: Type column contains exactly Capital, Input, Output, Metric _ind_type_odd = set( [i[1] for i in indices.index.values] ).symmetric_difference( set( ['Capital', 'Input', 'Output', 'Metric'] ) ) if len(_ind_type_odd) != 0: check_list.append( (f'Data Validation: Type column in Indices is missing values or ' f'has unexpected values.\n{_ind_type_odd}\n') ) # Indices check: Offset values within each Type must be sequential integers # beginning at zero. # Step 1: Check that all Offset values are integers using column dtype if indices.Offset.dtype != 'int': check_list.append( (f'Data validation: Offset values in Indices must be integers.\n') ) else: # Step 2: If all Offsets are integers, check for sequential values _ind_val = indices.Offset.reset_index() _ind_val_odd = [ set( arange(len(_ind_val.Index[_ind_val.Type==_t])) ).symmetric_difference( set(_ind_val.Offset[_ind_val.Type==_t]) ) for _t in ['Capital', 'Input', 'Output', 'Metric'] ] if any([len(i) for i in _ind_val_odd]) != 0: check_list.append( (f'Data Validation: Check that Offset values in Indices are ' 'sequential integers beginning at zero, within each Type.\n') ) # Cross-check: The Index values for Output Type in the indices dataset, # the Index values for Output Variable in results, and the Output # efficiency (and Output price) Variable values in designs must be identical. _out_val_odd = [ set( [i[2] for i in indices.index.values if i[1] == 'Output'] ).symmetric_difference( set( [j[2] for j in results.index.values if j[1] == 'Output'] ) ), set( [j[2] for j in results.index.values if j[1] == 'Output'] ).symmetric_difference( set( [k[3] for k in designs.index.values if k[2] in {'Output price','Output efficiency'}] ) ) ] if any([len(i) for i in _out_val_odd]) != 0: check_list.append( (f'Data Validation: Output Index values {_out_val_odd} are inconsistent in one of ' f'indices, results, and designs.\n') ) # Cross-check: The index values for Input Type in the indices dataset must match # the Index values for Input, Input price, and Input efficiency Variable in designs. _inp_val_odd = set( [i[2] for i in indices.index.values if i[1] == 'Input'] ).symmetric_difference( set( [j[3] for j in designs.index.values if j[2] in {'Input', 'Input price', 'Input efficiency'}] ) ) if len(_inp_val_odd) != 0: check_list.append( (f'Data Validation: Input Index values {_inp_val_odd} are inconsistent in either ' 'indices or in designs.\n') ) # Parameters check: Offset values within every Tech-Scen combo must be # sequential integers beginning at zero # Step 1: Check that all Offset values are integers using column dtype if parameters.Offset.dtype != 'int': check_list.append( (f'Data validation: Offset values in Parameters must be integers.\n') ) else: # Step 2: If all Offsets are integers, check for sequential values _par_off_val = parameters.Offset.reset_index() _par_off_val_odd = list() for _t in _par_off_val.Technology.unique().tolist(): for _s in _par_off_val.Scenario.unique().tolist(): _par_off_val_odd = _par_off_val_odd + \ [set( arange(len(_par_off_val.Offset[(_par_off_val.Technology==_t) & (_par_off_val.Scenario==_s)])) ).symmetric_difference( set( _par_off_val.Offset[(_par_off_val.Technology==_t) & (_par_off_val.Scenario==_s)] ) ) ] if any([len(i) for i in _par_off_val_odd]) != 0: check_list.append( ('Data Validation: Check that Offset values in Parameters are ' 'sequential integers beginning at zero, within each Technology-Scenario ' 'combination.\n') ) # Parameters check: Every Parameter Offset must be the same across # all Technology-Scenario combinations # Get a list of all Technology-Scenario combinations in Parameters parameters.sort_index(inplace=True) _par_tecsce_paroff = [ parameters.loc[i,'Offset'] for i in list( set([j[:2] for j in parameters.index.values]) ) ] for _j in arange(len(_par_tecsce_paroff)): if _j < len(_par_tecsce_paroff)-1: if not _par_tecsce_paroff[_j].equals(_par_tecsce_paroff[_j+1]): check_list.append( ('Data Validation: Parameter Offsets are inconsistent. Check ' f'in Parameters.\n{_par_tecsce_paroff[_j]}\n') ) # Results check: Every Technology must have a result where both the Variable # and the Index are "Cost". for _i in results.index.to_frame().Technology.unique(): if len([j[1:] for j in results.index if (j[1:] == ('Cost','Cost')) & (j[0] == _i)]) != 1: check_list.append( (f'Data Validation: Technology {_i} in Results needs a row where both ' 'the Variable and the Index are "Cost".\n') ) # Tranches check: Within every Category, the Amounts for each Tranche must be unique _tra_amt_unique = [ tranches.groupby('Category').Amount.count()[i] != tranches.groupby('Category').Amount.nunique()[i] for i in arange(tranches.index.to_frame().Category.nunique()) ] if any(_tra_amt_unique): check_list.append( (f'Data Validation: Category {tranches.index.to_frame().Category.unique()[_tra_amt_unique][0]}' ' in Tranches has duplicate Amounts.\n') ) # Cross-check: Metric Index values are identical in results and in indices _met_odd_val = set( [i[2] for i in indices.index.values if i[1] == 'Metric'] ).symmetric_difference( set( [j[2] for j in results.index.values if j[1] == 'Metric'] ) ) if len(_met_odd_val) != 0: check_list.append( (f'Data Validation: Metric Index values {_met_odd_val} are inconsistent either ' 'in results or in indices.\n') ) if len(check_list) != 0: for i in check_list: print(i) return False else: return True