# -*- coding: utf-8 -*-
"""reV econ module (lcoe-fcr, single owner, etc...)"""
import logging
import os
import pprint
from warnings import warn
import numpy as np
import pandas as pd
from rex.multi_file_resource import MultiFileResource
from rex.resource import Resource
from rex.utilities.utilities import check_res_file
from reV.config.project_points import PointsControl
from reV.generation.base import BaseGen
from reV.handlers.outputs import Outputs
from reV.SAM.econ import LCOE as SAM_LCOE
from reV.SAM.econ import SingleOwner
from reV.SAM.windbos import WindBos
from reV.utilities import ModuleName, ResourceMetaField
from reV.utilities.exceptions import ExecutionError, OffshoreWindInputWarning
logger = logging.getLogger(__name__)
[docs]class Econ(BaseGen):
"""Econ"""
# Mapping of reV econ output strings to SAM econ modules
OPTIONS = {
"lcoe_fcr": SAM_LCOE,
"ppa_price": SingleOwner,
"project_return_aftertax_npv": SingleOwner,
"lcoe_real": SingleOwner,
"lcoe_nom": SingleOwner,
"flip_actual_irr": SingleOwner,
"gross_revenue": SingleOwner,
"total_installed_cost": WindBos,
"turbine_cost": WindBos,
"sales_tax_cost": WindBos,
"bos_cost": WindBos,
"fixed_charge_rate": SAM_LCOE,
"capital_cost": SAM_LCOE,
"fixed_operating_cost": SAM_LCOE,
"variable_operating_cost": SAM_LCOE,
}
"""Available ``reV`` econ `output_request` options"""
# Mapping of reV econ outputs to scale factors and units.
# Type is scalar or array and corresponds to the SAM single-site output
OUT_ATTRS = BaseGen.ECON_ATTRS
def __init__(self, project_points, sam_files, cf_file, site_data=None,
output_request=('lcoe_fcr',), sites_per_worker=100,
memory_utilization_limit=0.4, append=False):
"""ReV econ analysis class.
``reV`` econ analysis runs SAM econ calculations, typically to
compute LCOE (using :py:class:`PySAM.Lcoefcr.Lcoefcr`), though
:py:class:`PySAM.Singleowner.Singleowner` or
:py:class:`PySAM.Windbos.Windbos` calculations can also be
performed simply by requesting outputs from those computation
modules. See the keys of
:attr:`Econ.OPTIONS <reV.econ.econ.Econ.OPTIONS>` for all
available econ outputs. Econ computations rely on an input a
generation (i.e. capacity factor) profile. You can request
``reV`` to run the analysis for one or more "sites", which
correspond to the meta indices in the generation data.
Parameters
----------
project_points : int | list | tuple | str | dict | pd.DataFrame | slice
Input specifying which sites to process. A single integer
representing the GID of a site may be specified to evaluate
reV at a single location. A list or tuple of integers
(or slice) representing the GIDs of multiple sites can be
specified to evaluate reV at multiple specific locations.
A string pointing to a project points CSV file may also be
specified. Typically, the CSV contains the following
columns:
- ``gid``: Integer specifying the generation GID of each
site.
- ``config``: Key in the `sam_files` input dictionary
(see below) corresponding to the SAM configuration to
use for each particular site. This value can also be
``None`` (or left out completely) if you specify only
a single SAM configuration file as the `sam_files`
input.
- ``capital_cost_multiplier``: This is an *optional*
multiplier input that, if included, will be used to
regionally scale the ``capital_cost`` input in the SAM
config. If you include this column in your CSV, you
*do not* need to specify ``capital_cost``, unless you
would like that value to vary regionally and
independently of the multiplier (i.e. the multiplier
will still be applied on top of the ``capital_cost``
input).
The CSV file may also contain other site-specific inputs by
including a column named after a config keyword (e.g. a
column called ``wind_turbine_rotor_diameter`` may be
included to specify a site-specific turbine diameter for
each location). Columns that do not correspond to a config
key may also be included, but they will be ignored. A
DataFrame following the same guidelines as the CSV input
(or a dictionary that can be used to initialize such a
DataFrame) may be used for this input as well.
sam_files : dict | str
A dictionary mapping SAM input configuration ID(s) to SAM
configuration(s). Keys are the SAM config ID(s) which
correspond to the ``config`` column in the project points
CSV. Values for each key are either a path to a
corresponding SAM config file or a full dictionary
of SAM config inputs. For example::
sam_files = {
"default": "/path/to/default/sam.json",
"onshore": "/path/to/onshore/sam_config.yaml",
"offshore": {
"sam_key_1": "sam_value_1",
"sam_key_2": "sam_value_2",
...
},
...
}
This input can also be a string pointing to a single SAM
config file. In this case, the ``config`` column of the
CSV points input should be set to ``None`` or left out
completely. See the documentation for the ``reV`` SAM class
(e.g. :class:`reV.SAM.generation.WindPower`,
:class:`reV.SAM.generation.PvWattsv8`,
:class:`reV.SAM.generation.Geothermal`, etc.) for
documentation on the allowed and/or required SAM config file
inputs.
cf_file : str
Path to reV output generation file containing a capacity
factor output.
.. Note:: If executing ``reV`` from the command line, this
path can contain brackets ``{}`` that will be filled in
by the `analysis_years` input. Alternatively, this input
can be set to ``"PIPELINE"`` to parse the output of the
previous step (``reV`` generation) and use it as input to
this call. However, note that duplicate executions of
``reV`` generation within the pipeline may invalidate this
parsing, meaning the `cf_file` input will have to be
specified manually.
site_data : str | pd.DataFrame, optional
Site-specific input data for SAM calculation. If this input
is a string, it should be a path that points to a CSV file.
Otherwise, this input should be a DataFrame with
pre-extracted site data. Rows in this table should match
the input sites via a ``gid`` column. The rest of the
columns should match configuration input keys that will take
site-specific values. Note that some or all site-specific
inputs can be specified via the `project_points` input
table instead. If ``None``, no site-specific data is
considered. By default, ``None``.
output_request : list | tuple, optional
List of output variables requested from SAM. Can be any
of the parameters in the "Outputs" group of the PySAM module
(e.g. :py:class:`PySAM.Windpower.Windpower.Outputs`,
:py:class:`PySAM.Pvwattsv8.Pvwattsv8.Outputs`,
:py:class:`PySAM.Geothermal.Geothermal.Outputs`, etc.) being
executed. This list can also include a select number of SAM
config/resource parameters to include in the output:
any key in any of the
`output attribute JSON files <https://tinyurl.com/4bmrpe3j/>`_
may be requested. Time-series profiles requested via this
input are output in UTC. By default, ``('lcoe_fcr',)``.
sites_per_worker : int, optional
Number of sites to run in series on a worker. ``None``
defaults to the resource file chunk size.
By default, ``None``.
memory_utilization_limit : float, optional
Memory utilization limit (fractional). Must be a value
between 0 and 1. This input sets how many site results will
be stored in-memory at any given time before flushing to
disk. By default, ``0.4``.
append : bool
Option to append econ datasets to source `cf_file`.
By default, ``False``.
"""
# get a points control instance
pc = self.get_pc(
points=project_points,
points_range=None,
sam_configs=sam_files,
cf_file=cf_file,
sites_per_worker=sites_per_worker,
append=append,
)
super().__init__(
pc,
output_request,
site_data=site_data,
memory_utilization_limit=memory_utilization_limit,
)
self._cf_file = cf_file
self._append = append
self._run_attrs["cf_file"] = cf_file
self._run_attrs["sam_module"] = self._sam_module.MODULE
@property
def cf_file(self):
"""Get the capacity factor output filename and path.
Returns
-------
cf_file : str
reV generation capacity factor output file with path.
"""
return self._cf_file
@property
def meta(self):
"""Get meta data from the source capacity factors file.
Returns
-------
_meta : pd.DataFrame
Meta data from capacity factor outputs file.
"""
if self._meta is None and self.cf_file is not None:
with Outputs(self.cf_file) as cfh:
# only take meta that belongs to this project's site list
self._meta = cfh.meta[
cfh.meta[ResourceMetaField.GID].isin(
self.points_control.sites)]
if ("offshore" in self._meta and self._meta["offshore"].sum() > 1):
w = ('Found offshore sites in econ meta data. '
'This functionality has been deprecated. '
'Please run the reV offshore module to '
'calculate offshore wind lcoe.')
warn(w, OffshoreWindInputWarning)
logger.warning(w)
elif self._meta is None and self.cf_file is None:
self._meta = pd.DataFrame(
{ResourceMetaField.GID: self.points_control.sites})
return self._meta
@property
def time_index(self):
"""Get the generation resource time index data."""
if self._time_index is None and self.cf_file is not None:
with Outputs(self.cf_file) as cfh:
if "time_index" in cfh.datasets:
self._time_index = cfh.time_index
return self._time_index
@staticmethod
def _econ_append_pc(pp, cf_file, sites_per_worker=None):
"""
Generate ProjectControls for econ append
Parameters
----------
pp : reV.config.project_points.ProjectPoints
ProjectPoints to adjust gids for
cf_file : str
reV generation capacity factor output file with path.
sites_per_worker : int
Number of sites to run in series on a worker. None defaults to the
resource file chunk size.
Returns
-------
pc : reV.config.project_points.PointsControl
PointsControl object instance.
"""
multi_h5_res, hsds = check_res_file(cf_file)
if multi_h5_res:
res_cls = MultiFileResource
res_kwargs = {}
else:
res_cls = Resource
res_kwargs = {"hsds": hsds}
with res_cls(cf_file, **res_kwargs) as f:
gid0 = f.meta[ResourceMetaField.GID].values[0]
gid1 = f.meta[ResourceMetaField.GID].values[-1]
i0 = pp.index(gid0)
i1 = pp.index(gid1) + 1
pc = PointsControl.split(i0, i1, pp, sites_per_split=sites_per_worker)
return pc
[docs] @classmethod
def get_pc(
cls,
points,
points_range,
sam_configs,
cf_file,
sites_per_worker=None,
append=False,
):
"""
Get a PointsControl instance.
Parameters
----------
points : slice | list | str | reV.config.project_points.PointsControl
Slice specifying project points, or string pointing to a project
points csv, or a fully instantiated PointsControl object.
points_range : list | None
Optional two-entry list specifying the index range of the sites to
analyze. To be taken from the reV.config.PointsControl.split_range
property.
sam_configs : dict | str | SAMConfig
SAM input configuration ID(s) and file path(s). Keys are the SAM
config ID(s) which map to the config column in the project points
CSV. Values are either a JSON SAM config file or dictionary of SAM
config inputs. Can also be a single config file path or a
pre loaded SAMConfig object.
cf_file : str
reV generation capacity factor output file with path.
sites_per_worker : int
Number of sites to run in series on a worker. None defaults to the
resource file chunk size.
append : bool
Flag to append econ datasets to source cf_file. This has priority
over the out_fpath input.
Returns
-------
pc : reV.config.project_points.PointsControl
PointsControl object instance.
"""
pc = super().get_pc(
points,
points_range,
sam_configs,
ModuleName.ECON,
sites_per_worker=sites_per_worker,
res_file=cf_file,
)
if append:
pc = cls._econ_append_pc(
pc.project_points, cf_file, sites_per_worker=sites_per_worker
)
return pc
@staticmethod
def _run_single_worker(pc, econ_fun, output_request, **kwargs):
"""Run the SAM econ calculation.
Parameters
----------
pc : reV.config.project_points.PointsControl
Iterable points control object from reV config module.
Must have project_points with df property with all relevant
site-specific inputs and a `SiteDataField.GID` column.
By passing site-specific inputs in this dataframe, which
was split using points_control, only the data relevant to
the current sites is passed.
econ_fun : method
reV_run() method from one of the econ modules (SingleOwner,
SAM_LCOE, WindBos).
output_request : str | list | tuple
Economic output variable(s) requested from SAM.
kwargs : dict
Additional input parameters for the SAM run module.
Returns
-------
out : dict
Output dictionary from the SAM reV_run function. Data is scaled
within this function to the datatype specified in Econ.OUT_ATTRS.
"""
# make sure output request is a list
if isinstance(output_request, str):
output_request = [output_request]
# Extract the site df from the project points df.
site_df = pc.project_points.df
site_df = site_df.set_index(ResourceMetaField.GID, drop=True)
# SAM execute econ analysis based on output request
try:
out = econ_fun(
pc, site_df, output_request=output_request, **kwargs
)
except Exception as e:
out = {}
logger.exception("Worker failed for PC: {}".format(pc))
raise e
return out
def _parse_output_request(self, req):
"""Set the output variables requested from generation.
Parameters
----------
req : str| list | tuple
Output variables requested from SAM.
Returns
-------
output_request : list
Output variables requested from SAM.
"""
output_request = super()._parse_output_request(req)
for request in output_request:
if request not in self.OUT_ATTRS:
msg = (
'User output request "{}" not recognized. '
"Will attempt to extract from PySAM.".format(request)
)
logger.debug(msg)
modules = [self.OPTIONS[request] for request in output_request
if request in self.OPTIONS]
if not any(modules):
msg = (
"None of the user output requests were recognized. "
"Cannot run reV econ. "
"At least one of the following must be requested: {}".format(
list(self.OPTIONS.keys())
)
)
logger.exception(msg)
raise ExecutionError(msg)
b1 = [m == modules[0] for m in modules]
b2 = np.array([m == WindBos for m in modules])
b3 = np.array([m == SingleOwner for m in modules])
if all(b1):
self._sam_module = modules[0]
self._fun = modules[0].reV_run
elif all(b2 | b3):
self._sam_module = SingleOwner
self._fun = SingleOwner.reV_run
else:
msg = (
"Econ outputs requested from different SAM modules not "
"currently supported. Output request variables require "
"SAM methods: {}".format(modules)
)
raise ValueError(msg)
return list(set(output_request))
def _get_data_shape(self, dset, n_sites):
"""Get the output array shape based on OUT_ATTRS or PySAM.Outputs.
This Econ get data shape method will also first check for the dset in
the site_data table. If not found in site_data, the dataset will be
looked for in OUT_ATTRS and PySAM.Outputs as it would for Generation.
Parameters
----------
dset : str
Variable name to get shape for.
n_sites : int
Number of sites for this data shape.
Returns
-------
shape : tuple
1D or 2D shape tuple for dset.
"""
if dset in self.site_data:
data_shape = (n_sites,)
data = self.site_data[dset].values[0]
if isinstance(data, (list, tuple, np.ndarray, str)):
msg = (
"Cannot pass through non-scalar site_data "
'input key "{}" as an output_request!'.format(dset)
)
logger.error(msg)
raise ExecutionError(msg)
else:
data_shape = super()._get_data_shape(dset, n_sites)
return data_shape
[docs] def run(self, out_fpath=None, max_workers=1, timeout=1800, pool_size=None):
"""Execute a parallel reV econ run with smart data flushing.
Parameters
----------
out_fpath : str, optional
Path to output file. If this class was initialized with
``append=True``, this input has no effect. If ``None``, no
output file will be written. If the filepath is specified
but the module name (econ) and/or resource data year is not
included, the module name and/or resource data year will get
added to the output file name. By default, ``None``.
max_workers : int, optional
Number of local workers to run on. By default, ``1``.
timeout : int, optional
Number of seconds to wait for parallel run iteration to
complete before returning zeros. By default, ``1800``
seconds.
pool_size : int, optional
Number of futures to submit to a single process pool for
parallel futures. If ``None``, the pool size is set to
``os.cpu_count() * 2``. By default, ``None``.
Returns
-------
str | None
Path to output HDF5 file, or ``None`` if results were not
written to disk.
"""
if pool_size is None:
pool_size = os.cpu_count() * 2
# initialize output file or append econ data to gen file
if self._append:
self._out_fpath = self._cf_file
else:
self._init_fpath(out_fpath, ModuleName.ECON)
self._init_h5(mode="a" if self._append else "w")
self._init_out_arrays()
diff = list(set(self.points_control.sites)
- set(self.meta[ResourceMetaField.GID].values))
if diff:
raise Exception(
"The following analysis sites were requested "
"through project points for econ but are not "
'found in the CF file ("{}"): {}'.format(self.cf_file, diff)
)
# make a kwarg dict
kwargs = {
"output_request": self.output_request,
"cf_file": self.cf_file,
"year": self.year,
}
logger.info(
"Running econ with smart data flushing " "for: {}".format(
self.points_control
)
)
logger.debug(
'The following project points were specified: "{}"'.format(
self.project_points
)
)
logger.debug(
"The following SAM configs are available to this run:\n{}".format(
pprint.pformat(self.sam_configs, indent=4)
)
)
logger.debug(
"The SAM output variables have been requested:\n{}".format(
self.output_request
)
)
try:
kwargs["econ_fun"] = self._fun
if max_workers == 1:
logger.debug(
"Running serial econ for: {}".format(self.points_control)
)
for i, pc_sub in enumerate(self.points_control):
self.out = self._run_single_worker(pc_sub, **kwargs)
logger.info(
"Finished reV econ serial compute for: {} "
"(iteration {} out of {})".format(
pc_sub, i + 1, len(self.points_control)
)
)
self.flush()
else:
logger.debug(
"Running parallel econ for: {}".format(self.points_control)
)
self._parallel_run(
max_workers=max_workers,
pool_size=pool_size,
timeout=timeout,
**kwargs,
)
except Exception as e:
logger.exception("SmartParallelJob.execute() failed for econ.")
raise e
return self._out_fpath