Source code for reV.econ.econ

# -*- coding: utf-8 -*-
"""
reV econ module (lcoe-fcr, single owner, etc...)
"""
import logging
import numpy as np
import os
import pandas as pd
import pprint
from warnings import warn

from reV.config.project_points import PointsControl
from reV.generation.base import BaseGen
from reV.handlers.outputs import Outputs
from reV.SAM.econ import LCOE as SAM_LCOE
from reV.SAM.econ import SingleOwner
from reV.SAM.windbos import WindBos
from reV.utilities.exceptions import (ExecutionError, OffshoreWindInputWarning)
from reV.utilities import ModuleName

from rex.resource import Resource
from rex.multi_file_resource import MultiFileResource
from rex.utilities.utilities import check_res_file

logger = logging.getLogger(__name__)


[docs]class Econ(BaseGen): """Econ""" # Mapping of reV econ output strings to SAM econ modules OPTIONS = {'lcoe_fcr': SAM_LCOE, 'ppa_price': SingleOwner, 'project_return_aftertax_npv': SingleOwner, 'lcoe_real': SingleOwner, 'lcoe_nom': SingleOwner, 'flip_actual_irr': SingleOwner, 'gross_revenue': SingleOwner, 'total_installed_cost': WindBos, 'turbine_cost': WindBos, 'sales_tax_cost': WindBos, 'bos_cost': WindBos, 'fixed_charge_rate': SAM_LCOE, 'capital_cost': SAM_LCOE, 'fixed_operating_cost': SAM_LCOE, 'variable_operating_cost': SAM_LCOE, } """Available ``reV`` econ `output_request` options""" # Mapping of reV econ outputs to scale factors and units. # Type is scalar or array and corresponds to the SAM single-site output OUT_ATTRS = BaseGen.ECON_ATTRS def __init__(self, project_points, sam_files, cf_file, site_data=None, output_request=('lcoe_fcr',), sites_per_worker=100, memory_utilization_limit=0.4, append=False): """reV econ analysis class. ``reV`` econ analysis runs SAM econ calculations, typically to compute LCOE (using :py:class:`PySAM.Lcoefcr.Lcoefcr`), though :py:class:`PySAM.Singleowner.Singleowner` or :py:class:`PySAM.Windbos.Windbos` calculations can also be performed simply by requesting outputs from those computation modules. See the keys of :attr:`Econ.OPTIONS <reV.econ.econ.Econ.OPTIONS>` for all available econ outputs. Econ computations rely on an input a generation (i.e. capacity factor) profile. You can request ``reV`` to run the analysis for one or more "sites", which correspond to the meta indices in the generation data. Parameters ---------- project_points : int | list | tuple | str | dict | pd.DataFrame | slice Input specifying which sites to process. A single integer representing the GID of a site may be specified to evaluate reV at a single location. A list or tuple of integers (or slice) representing the GIDs of multiple sites can be specified to evaluate reV at multiple specific locations. A string pointing to a project points CSV file may also be specified. Typically, the CSV contains two columns: - ``gid``: Integer specifying the GID of each site. - ``config``: Key in the `sam_files` input dictionary (see below) corresponding to the SAM configuration to use for each particular site. This value can also be ``None`` (or left out completely) if you specify only a single SAM configuration file as the `sam_files` input. The CSV file may also contain site-specific inputs by including a column named after a config keyword (e.g. a column called ``capital_cost`` may be included to specify a site-specific capital cost value for each location). Columns that do not correspond to a config key may also be included, but they will be ignored. A DataFrame following the same guidelines as the CSV input (or a dictionary that can be used to initialize such a DataFrame) may be used for this input as well. sam_files : dict | str A dictionary mapping SAM input configuration ID(s) to SAM configuration(s). Keys are the SAM config ID(s) which correspond to the ``config`` column in the project points CSV. Values for each key are either a path to a corresponding SAM config file or a full dictionary of SAM config inputs. For example:: sam_files = { "default": "/path/to/default/sam.json", "onshore": "/path/to/onshore/sam_config.yaml", "offshore": { "sam_key_1": "sam_value_1", "sam_key_2": "sam_value_2", ... }, ... } This input can also be a string pointing to a single SAM config file. In this case, the ``config`` column of the CSV points input should be set to ``None`` or left out completely. See the documentation for the ``reV`` SAM class (e.g. :class:`reV.SAM.generation.WindPower`, :class:`reV.SAM.generation.PvWattsv8`, :class:`reV.SAM.generation.Geothermal`, etc.) for documentation on the allowed and/or required SAM config file inputs. cf_file : str Path to reV output generation file containing a capacity factor output. .. Note:: If executing ``reV`` from the command line, this path can contain brackets ``{}`` that will be filled in by the `analysis_years` input. Alternatively, this input can be set to ``"PIPELINE"`` to parse the output of the previous step (``reV`` generation) and use it as input to this call. However, note that duplicate executions of ``reV`` generation within the pipeline may invalidate this parsing, meaning the `cf_file` input will have to be specified manually. site_data : str | pd.DataFrame, optional Site-specific input data for SAM calculation. If this input is a string, it should be a path that points to a CSV file. Otherwise, this input should be a DataFrame with pre-extracted site data. Rows in this table should match the input sites via a ``gid`` column. The rest of the columns should match configuration input keys that will take site-specific values. Note that some or all site-specific inputs can be specified via the `project_points` input table instead. If ``None``, no site-specific data is considered. By default, ``None``. output_request : list | tuple, optional List of output variables requested from SAM. Can be any of the parameters in the "Outputs" group of the PySAM module (e.g. :py:class:`PySAM.Windpower.Windpower.Outputs`, :py:class:`PySAM.Pvwattsv8.Pvwattsv8.Outputs`, :py:class:`PySAM.Geothermal.Geothermal.Outputs`, etc.) being executed. This list can also include a select number of SAM config/resource parameters to include in the output: any key in any of the `output attribute JSON files <https://tinyurl.com/4bmrpe3j/>`_ may be requested. Time-series profiles requested via this input are output in UTC. By default, ``('lcoe_fcr',)``. sites_per_worker : int, optional Number of sites to run in series on a worker. ``None`` defaults to the resource file chunk size. By default, ``None``. memory_utilization_limit : float, optional Memory utilization limit (fractional). Must be a value between 0 and 1. This input sets how many site results will be stored in-memory at any given time before flushing to disk. By default, ``0.4``. append : bool Option to append econ datasets to source `cf_file`. By default, ``False``. """ # get a points control instance pc = self.get_pc(points=project_points, points_range=None, sam_configs=sam_files, cf_file=cf_file, sites_per_worker=sites_per_worker, append=append) super().__init__(pc, output_request, site_data=site_data, memory_utilization_limit=memory_utilization_limit) self._cf_file = cf_file self._append = append self._run_attrs['cf_file'] = cf_file self._run_attrs['sam_module'] = self._sam_module.MODULE @property def cf_file(self): """Get the capacity factor output filename and path. Returns ------- cf_file : str reV generation capacity factor output file with path. """ return self._cf_file @property def meta(self): """Get meta data from the source capacity factors file. Returns ------- _meta : pd.DataFrame Meta data from capacity factor outputs file. """ if self._meta is None and self.cf_file is not None: with Outputs(self.cf_file) as cfh: # only take meta that belongs to this project's site list self._meta = cfh.meta[ cfh.meta['gid'].isin(self.points_control.sites)] if 'offshore' in self._meta: if self._meta['offshore'].sum() > 1: w = ('Found offshore sites in econ meta data. ' 'This functionality has been deprecated. ' 'Please run the reV offshore module to ' 'calculate offshore wind lcoe.') warn(w, OffshoreWindInputWarning) logger.warning(w) elif self._meta is None and self.cf_file is None: self._meta = pd.DataFrame({'gid': self.points_control.sites}) return self._meta @property def time_index(self): """Get the generation resource time index data.""" if self._time_index is None and self.cf_file is not None: with Outputs(self.cf_file) as cfh: if 'time_index' in cfh.datasets: self._time_index = cfh.time_index return self._time_index @staticmethod def _econ_append_pc(pp, cf_file, sites_per_worker=None): """ Generate ProjectControls for econ append Parameters ---------- pp : reV.config.project_points.ProjectPoints ProjectPoints to adjust gids for cf_file : str reV generation capacity factor output file with path. sites_per_worker : int Number of sites to run in series on a worker. None defaults to the resource file chunk size. Returns ------- pc : reV.config.project_points.PointsControl PointsControl object instance. """ multi_h5_res, hsds = check_res_file(cf_file) if multi_h5_res: res_cls = MultiFileResource res_kwargs = {} else: res_cls = Resource res_kwargs = {'hsds': hsds} with res_cls(cf_file, **res_kwargs) as f: gid0 = f.meta['gid'].values[0] gid1 = f.meta['gid'].values[-1] i0 = pp.index(gid0) i1 = pp.index(gid1) + 1 pc = PointsControl.split(i0, i1, pp, sites_per_split=sites_per_worker) return pc
[docs] @classmethod def get_pc(cls, points, points_range, sam_configs, cf_file, sites_per_worker=None, append=False): """ Get a PointsControl instance. Parameters ---------- points : slice | list | str | reV.config.project_points.PointsControl Slice specifying project points, or string pointing to a project points csv, or a fully instantiated PointsControl object. points_range : list | None Optional two-entry list specifying the index range of the sites to analyze. To be taken from the reV.config.PointsControl.split_range property. sam_configs : dict | str | SAMConfig SAM input configuration ID(s) and file path(s). Keys are the SAM config ID(s) which map to the config column in the project points CSV. Values are either a JSON SAM config file or dictionary of SAM config inputs. Can also be a single config file path or a pre loaded SAMConfig object. cf_file : str reV generation capacity factor output file with path. sites_per_worker : int Number of sites to run in series on a worker. None defaults to the resource file chunk size. append : bool Flag to append econ datasets to source cf_file. This has priority over the out_fpath input. Returns ------- pc : reV.config.project_points.PointsControl PointsControl object instance. """ pc = super().get_pc(points, points_range, sam_configs, ModuleName.ECON, sites_per_worker=sites_per_worker, res_file=cf_file) if append: pc = cls._econ_append_pc(pc.project_points, cf_file, sites_per_worker=sites_per_worker) return pc
@staticmethod def _run_single_worker(pc, econ_fun, output_request, **kwargs): """Run the SAM econ calculation. Parameters ---------- pc : reV.config.project_points.PointsControl Iterable points control object from reV config module. Must have project_points with df property with all relevant site-specific inputs and a 'gid' column. By passing site-specific inputs in this dataframe, which was split using points_control, only the data relevant to the current sites is passed. econ_fun : method reV_run() method from one of the econ modules (SingleOwner, SAM_LCOE, WindBos). output_request : str | list | tuple Economic output variable(s) requested from SAM. kwargs : dict Additional input parameters for the SAM run module. Returns ------- out : dict Output dictionary from the SAM reV_run function. Data is scaled within this function to the datatype specified in Econ.OUT_ATTRS. """ # make sure output request is a list if isinstance(output_request, str): output_request = [output_request] # Extract the site df from the project points df. site_df = pc.project_points.df site_df = site_df.set_index('gid', drop=True) # SAM execute econ analysis based on output request try: out = econ_fun(pc, site_df, output_request=output_request, **kwargs) except Exception as e: out = {} logger.exception('Worker failed for PC: {}'.format(pc)) raise e return out def _parse_output_request(self, req): """Set the output variables requested from generation. Parameters ---------- req : str| list | tuple Output variables requested from SAM. Returns ------- output_request : list Output variables requested from SAM. """ output_request = self._output_request_type_check(req) for request in output_request: if request not in self.OUT_ATTRS: msg = ('User output request "{}" not recognized. ' 'Will attempt to extract from PySAM.'.format(request)) logger.debug(msg) modules = [] for request in output_request: if request in self.OPTIONS: modules.append(self.OPTIONS[request]) if not any(modules): msg = ('None of the user output requests were recognized. ' 'Cannot run reV econ. ' 'At least one of the following must be requested: {}' .format(list(self.OPTIONS.keys()))) logger.exception(msg) raise ExecutionError(msg) b1 = [m == modules[0] for m in modules] b2 = np.array([m == WindBos for m in modules]) b3 = np.array([m == SingleOwner for m in modules]) if all(b1): self._sam_module = modules[0] self._fun = modules[0].reV_run elif all(b2 | b3): self._sam_module = SingleOwner self._fun = SingleOwner.reV_run else: msg = ('Econ outputs requested from different SAM modules not ' 'currently supported. Output request variables require ' 'SAM methods: {}'.format(modules)) raise ValueError(msg) return list(set(output_request)) def _get_data_shape(self, dset, n_sites): """Get the output array shape based on OUT_ATTRS or PySAM.Outputs. This Econ get data shape method will also first check for the dset in the site_data table. If not found in site_data, the dataset will be looked for in OUT_ATTRS and PySAM.Outputs as it would for Generation. Parameters ---------- dset : str Variable name to get shape for. n_sites : int Number of sites for this data shape. Returns ------- shape : tuple 1D or 2D shape tuple for dset. """ if dset in self.site_data: data_shape = (n_sites, ) data = self.site_data[dset].values[0] if isinstance(data, (list, tuple, np.ndarray, str)): msg = ('Cannot pass through non-scalar site_data ' 'input key "{}" as an output_request!'.format(dset)) logger.error(msg) raise ExecutionError(msg) else: data_shape = super()._get_data_shape(dset, n_sites) return data_shape
[docs] def run(self, out_fpath=None, max_workers=1, timeout=1800, pool_size=None): """Execute a parallel reV econ run with smart data flushing. Parameters ---------- out_fpath : str, optional Path to output file. If this class was initialized with ``append=True``, this input has no effect. If ``None``, no output file will be written. If the filepath is specified but the module name (econ) and/or resource data year is not included, the module name and/or resource data year will get added to the output file name. By default, ``None``. max_workers : int, optional Number of local workers to run on. By default, ``1``. timeout : int, optional Number of seconds to wait for parallel run iteration to complete before returning zeros. By default, ``1800`` seconds. pool_size : int, optional Number of futures to submit to a single process pool for parallel futures. If ``None``, the pool size is set to ``os.cpu_count() * 2``. By default, ``None``. Returns ------- str | None Path to output HDF5 file, or ``None`` if results were not written to disk. """ if pool_size is None: pool_size = os.cpu_count() * 2 # initialize output file or append econ data to gen file if self._append: self._out_fpath = self._cf_file else: self._init_fpath(out_fpath, ModuleName.ECON) self._init_h5(mode='a' if self._append else 'w') self._init_out_arrays() diff = list(set(self.points_control.sites) - set(self.meta['gid'].values)) if diff: raise Exception('The following analysis sites were requested ' 'through project points for econ but are not ' 'found in the CF file ("{}"): {}' .format(self.cf_file, diff)) # make a kwarg dict kwargs = {'output_request': self.output_request, 'cf_file': self.cf_file, 'year': self.year} logger.info('Running econ with smart data flushing ' 'for: {}'.format(self.points_control)) logger.debug('The following project points were specified: "{}"' .format(self.project_points)) logger.debug('The following SAM configs are available to this run:\n{}' .format(pprint.pformat(self.sam_configs, indent=4))) logger.debug('The SAM output variables have been requested:\n{}' .format(self.output_request)) try: kwargs['econ_fun'] = self._fun if max_workers == 1: logger.debug('Running serial econ for: {}' .format(self.points_control)) for i, pc_sub in enumerate(self.points_control): self.out = self._run_single_worker(pc_sub, **kwargs) logger.info('Finished reV econ serial compute for: {} ' '(iteration {} out of {})' .format(pc_sub, i + 1, len(self.points_control))) self.flush() else: logger.debug('Running parallel econ for: {}' .format(self.points_control)) self._parallel_run(max_workers=max_workers, pool_size=pool_size, timeout=timeout, **kwargs) except Exception as e: logger.exception('SmartParallelJob.execute() failed for econ.') raise e return self._out_fpath