Source code for reV.econ.econ

# -*- coding: utf-8 -*-
"""
reV econ module (lcoe-fcr, single owner, etc...)
"""
import logging
import numpy as np
import os
import pandas as pd
import pprint
from warnings import warn

from reV.config.project_points import PointsControl
from reV.generation.base import BaseGen
from reV.handlers.outputs import Outputs
from reV.SAM.econ import LCOE as SAM_LCOE
from reV.SAM.econ import SingleOwner
from reV.SAM.windbos import WindBos
from reV.utilities.exceptions import (ExecutionError, OffshoreWindInputWarning)
from reV.utilities import ModuleName

from rex.resource import Resource
from rex.multi_file_resource import MultiFileResource
from rex.utilities.utilities import check_res_file

logger = logging.getLogger(__name__)


[docs]class Econ(BaseGen):
    """Econ"""

    # Mapping of reV econ output strings to SAM econ modules
    OPTIONS = {'lcoe_fcr': SAM_LCOE,
               'ppa_price': SingleOwner,
               'project_return_aftertax_npv': SingleOwner,
               'lcoe_real': SingleOwner,
               'lcoe_nom': SingleOwner,
               'flip_actual_irr': SingleOwner,
               'gross_revenue': SingleOwner,
               'total_installed_cost': WindBos,
               'turbine_cost': WindBos,
               'sales_tax_cost': WindBos,
               'bos_cost': WindBos,
               'fixed_charge_rate': SAM_LCOE,
               'capital_cost': SAM_LCOE,
               'fixed_operating_cost': SAM_LCOE,
               'variable_operating_cost': SAM_LCOE,
               }
    """Available ``reV`` econ `output_request` options"""

    # Mapping of reV econ outputs to scale factors and units.
    # Type is scalar or array and corresponds to the SAM single-site output
    OUT_ATTRS = BaseGen.ECON_ATTRS

    def __init__(self, project_points, sam_files, cf_file, site_data=None,
                 output_request=('lcoe_fcr',), sites_per_worker=100,
                 memory_utilization_limit=0.4, append=False):
        """reV econ analysis class.

        ``reV`` econ analysis runs SAM econ calculations, typically to
        compute LCOE (using :py:class:`PySAM.Lcoefcr.Lcoefcr`), though
        :py:class:`PySAM.Singleowner.Singleowner` or
        :py:class:`PySAM.Windbos.Windbos` calculations can also be
        performed simply by requesting outputs from those computation
        modules. See the keys of
        :attr:`Econ.OPTIONS <reV.econ.econ.Econ.OPTIONS>` for all
        available econ outputs. Econ computations rely on an input a
        generation (i.e. capacity factor) profile. You can request
        ``reV`` to run the analysis for one or more "sites", which
        correspond to the meta indices in the generation data.

        Parameters
        ----------
        project_points : int | list | tuple | str | dict | pd.DataFrame | slice
            Input specifying which sites to process. A single integer
            representing the GID of a site may be specified to evaluate
            reV at a single location. A list or tuple of integers
            (or slice) representing the GIDs of multiple sites can be
            specified to evaluate reV at multiple specific locations.
            A string pointing to a project points CSV file may also be
            specified. Typically, the CSV contains two columns:

                - ``gid``: Integer specifying the GID of each site.
                - ``config``: Key in the `sam_files` input dictionary
                  (see below) corresponding to the SAM configuration to
                  use for each particular site. This value can also be
                  ``None`` (or left out completely) if you specify only
                  a single SAM configuration file as the `sam_files`
                  input.

            The CSV file may also contain site-specific inputs by
            including a column named after a config keyword (e.g. a
            column called ``capital_cost`` may be included to specify a
            site-specific capital cost value for each location). Columns
            that do not correspond to a config key may also be included,
            but they will be ignored. A DataFrame following the same
            guidelines as the CSV input (or a dictionary that can be
            used to initialize such a DataFrame) may be used for this
            input as well.
        sam_files : dict | str
            A dictionary mapping SAM input configuration ID(s) to SAM
            configuration(s). Keys are the SAM config ID(s) which
            correspond to the ``config`` column in the project points
            CSV. Values for each key are either a path to a
            corresponding SAM config file or a full dictionary
            of SAM config inputs. For example::

                sam_files = {
                    "default": "/path/to/default/sam.json",
                    "onshore": "/path/to/onshore/sam_config.yaml",
                    "offshore": {
                        "sam_key_1": "sam_value_1",
                        "sam_key_2": "sam_value_2",
                        ...
                    },
                    ...
                }

            This input can also be a string pointing to a single SAM
            config file. In this case, the ``config`` column of the
            CSV points input should be set to ``None`` or left out
            completely. See the documentation for the ``reV`` SAM class
            (e.g. :class:`reV.SAM.generation.WindPower`,
            :class:`reV.SAM.generation.PvWattsv8`,
            :class:`reV.SAM.generation.Geothermal`, etc.) for
            documentation on the allowed and/or required SAM config file
            inputs.
        cf_file : str
            Path to reV output generation file containing a capacity
            factor output.

            .. Note:: If executing ``reV`` from the command line, this
              path can contain brackets ``{}`` that will be filled in
              by the `analysis_years` input. Alternatively, this input
              can be set to ``"PIPELINE"`` to parse the output of the
              previous step (``reV`` generation) and use it as input to
              this call. However, note that duplicate executions of
              ``reV`` generation within the pipeline may invalidate this
              parsing, meaning the `cf_file` input will have to be
              specified manually.

        site_data : str | pd.DataFrame, optional
            Site-specific input data for SAM calculation. If this input
            is a string, it should be a path that points to a CSV file.
            Otherwise, this input should be a DataFrame with
            pre-extracted site data. Rows in this table should match
            the input sites via a ``gid`` column. The rest of the
            columns should match configuration input keys that will take
            site-specific values. Note that some or all site-specific
            inputs can be specified via the `project_points` input
            table instead. If ``None``, no site-specific data is
            considered. By default, ``None``.
        output_request : list | tuple, optional
            List of output variables requested from SAM. Can be any
            of the parameters in the "Outputs" group of the PySAM module
            (e.g. :py:class:`PySAM.Windpower.Windpower.Outputs`,
            :py:class:`PySAM.Pvwattsv8.Pvwattsv8.Outputs`,
            :py:class:`PySAM.Geothermal.Geothermal.Outputs`, etc.) being
            executed. This list can also include a select number of SAM
            config/resource parameters to include in the output:
            any key in any of the
            `output attribute JSON files <https://tinyurl.com/4bmrpe3j/>`_
            may be requested. Time-series profiles requested via this
            input are output in UTC. By default, ``('lcoe_fcr',)``.
        sites_per_worker : int, optional
            Number of sites to run in series on a worker. ``None``
            defaults to the resource file chunk size.
            By default, ``None``.
        memory_utilization_limit : float, optional
            Memory utilization limit (fractional). Must be a value
            between 0 and 1. This input sets how many site results will
            be stored in-memory at any given time before flushing to
            disk. By default, ``0.4``.
        append : bool
            Option to append econ datasets to source `cf_file`.
            By default, ``False``.
        """

        # get a points control instance
        pc = self.get_pc(points=project_points, points_range=None,
                         sam_configs=sam_files, cf_file=cf_file,
                         sites_per_worker=sites_per_worker, append=append)

        super().__init__(pc, output_request, site_data=site_data,
                         memory_utilization_limit=memory_utilization_limit)

        self._cf_file = cf_file
        self._append = append
        self._run_attrs['cf_file'] = cf_file
        self._run_attrs['sam_module'] = self._sam_module.MODULE

    @property
    def cf_file(self):
        """Get the capacity factor output filename and path.

        Returns
        -------
        cf_file : str
            reV generation capacity factor output file with path.
        """
        return self._cf_file

    @property
    def meta(self):
        """Get meta data from the source capacity factors file.

        Returns
        -------
        _meta : pd.DataFrame
            Meta data from capacity factor outputs file.
        """
        if self._meta is None and self.cf_file is not None:
            with Outputs(self.cf_file) as cfh:
                # only take meta that belongs to this project's site list
                self._meta = cfh.meta[
                    cfh.meta['gid'].isin(self.points_control.sites)]

            if 'offshore' in self._meta:
                if self._meta['offshore'].sum() > 1:
                    w = ('Found offshore sites in econ meta data. '
                         'This functionality has been deprecated. '
                         'Please run the reV offshore module to '
                         'calculate offshore wind lcoe.')
                    warn(w, OffshoreWindInputWarning)
                    logger.warning(w)

        elif self._meta is None and self.cf_file is None:
            self._meta = pd.DataFrame({'gid': self.points_control.sites})

        return self._meta

    @property
    def time_index(self):
        """Get the generation resource time index data."""
        if self._time_index is None and self.cf_file is not None:
            with Outputs(self.cf_file) as cfh:
                if 'time_index' in cfh.datasets:
                    self._time_index = cfh.time_index

        return self._time_index

    @staticmethod
    def _econ_append_pc(pp, cf_file, sites_per_worker=None):
        """
        Generate ProjectControls for econ append

        Parameters
        ----------
        pp : reV.config.project_points.ProjectPoints
            ProjectPoints to adjust gids for
        cf_file : str
            reV generation capacity factor output file with path.
        sites_per_worker : int
            Number of sites to run in series on a worker. None defaults to the
            resource file chunk size.

        Returns
        -------
        pc : reV.config.project_points.PointsControl
            PointsControl object instance.
        """
        multi_h5_res, hsds = check_res_file(cf_file)
        if multi_h5_res:
            res_cls = MultiFileResource
            res_kwargs = {}
        else:
            res_cls = Resource
            res_kwargs = {'hsds': hsds}

        with res_cls(cf_file, **res_kwargs) as f:
            gid0 = f.meta['gid'].values[0]
            gid1 = f.meta['gid'].values[-1]

        i0 = pp.index(gid0)
        i1 = pp.index(gid1) + 1
        pc = PointsControl.split(i0, i1, pp, sites_per_split=sites_per_worker)

        return pc

[docs]    @classmethod
    def get_pc(cls, points, points_range, sam_configs, cf_file,
               sites_per_worker=None, append=False):
        """
        Get a PointsControl instance.

        Parameters
        ----------
        points : slice | list | str | reV.config.project_points.PointsControl
            Slice specifying project points, or string pointing to a project
            points csv, or a fully instantiated PointsControl object.
        points_range : list | None
            Optional two-entry list specifying the index range of the sites to
            analyze. To be taken from the reV.config.PointsControl.split_range
            property.
        sam_configs : dict | str | SAMConfig
            SAM input configuration ID(s) and file path(s). Keys are the SAM
            config ID(s) which map to the config column in the project points
            CSV. Values are either a JSON SAM config file or dictionary of SAM
            config inputs. Can also be a single config file path or a
            pre loaded SAMConfig object.
        cf_file : str
            reV generation capacity factor output file with path.
        sites_per_worker : int
            Number of sites to run in series on a worker. None defaults to the
            resource file chunk size.
        append : bool
            Flag to append econ datasets to source cf_file. This has priority
            over the out_fpath input.

        Returns
        -------
        pc : reV.config.project_points.PointsControl
            PointsControl object instance.
        """
        pc = super().get_pc(points, points_range, sam_configs, ModuleName.ECON,
                            sites_per_worker=sites_per_worker,
                            res_file=cf_file)

        if append:
            pc = cls._econ_append_pc(pc.project_points, cf_file,
                                     sites_per_worker=sites_per_worker)

        return pc

    @staticmethod
    def _run_single_worker(pc, econ_fun, output_request, **kwargs):
        """Run the SAM econ calculation.

        Parameters
        ----------
        pc : reV.config.project_points.PointsControl
            Iterable points control object from reV config module.
            Must have project_points with df property with all relevant
            site-specific inputs and a 'gid' column. By passing site-specific
            inputs in this dataframe, which was split using points_control,
            only the data relevant to the current sites is passed.
        econ_fun : method
            reV_run() method from one of the econ modules (SingleOwner,
            SAM_LCOE, WindBos).
        output_request : str | list | tuple
            Economic output variable(s) requested from SAM.
        kwargs : dict
            Additional input parameters for the SAM run module.

        Returns
        -------
        out : dict
            Output dictionary from the SAM reV_run function. Data is scaled
            within this function to the datatype specified in Econ.OUT_ATTRS.
        """

        # make sure output request is a list
        if isinstance(output_request, str):
            output_request = [output_request]

        # Extract the site df from the project points df.
        site_df = pc.project_points.df
        site_df = site_df.set_index('gid', drop=True)

        # SAM execute econ analysis based on output request
        try:
            out = econ_fun(pc, site_df, output_request=output_request,
                           **kwargs)
        except Exception as e:
            out = {}
            logger.exception('Worker failed for PC: {}'.format(pc))
            raise e

        return out

    def _parse_output_request(self, req):
        """Set the output variables requested from generation.

        Parameters
        ----------
        req : str| list | tuple
            Output variables requested from SAM.

        Returns
        -------
        output_request : list
            Output variables requested from SAM.
        """

        output_request = self._output_request_type_check(req)

        for request in output_request:
            if request not in self.OUT_ATTRS:
                msg = ('User output request "{}" not recognized. '
                       'Will attempt to extract from PySAM.'.format(request))
                logger.debug(msg)

        modules = []
        for request in output_request:
            if request in self.OPTIONS:
                modules.append(self.OPTIONS[request])

        if not any(modules):
            msg = ('None of the user output requests were recognized. '
                   'Cannot run reV econ. '
                   'At least one of the following must be requested: {}'
                   .format(list(self.OPTIONS.keys())))
            logger.exception(msg)
            raise ExecutionError(msg)

        b1 = [m == modules[0] for m in modules]
        b2 = np.array([m == WindBos for m in modules])
        b3 = np.array([m == SingleOwner for m in modules])

        if all(b1):
            self._sam_module = modules[0]
            self._fun = modules[0].reV_run
        elif all(b2 | b3):
            self._sam_module = SingleOwner
            self._fun = SingleOwner.reV_run
        else:
            msg = ('Econ outputs requested from different SAM modules not '
                   'currently supported. Output request variables require '
                   'SAM methods: {}'.format(modules))
            raise ValueError(msg)

        return list(set(output_request))

    def _get_data_shape(self, dset, n_sites):
        """Get the output array shape based on OUT_ATTRS or PySAM.Outputs.

        This Econ get data shape method will also first check for the dset in
        the site_data table. If not found in site_data, the dataset will be
        looked for in OUT_ATTRS and PySAM.Outputs as it would for Generation.

        Parameters
        ----------
        dset : str
            Variable name to get shape for.
        n_sites : int
            Number of sites for this data shape.

        Returns
        -------
        shape : tuple
            1D or 2D shape tuple for dset.
        """

        if dset in self.site_data:
            data_shape = (n_sites, )
            data = self.site_data[dset].values[0]

            if isinstance(data, (list, tuple, np.ndarray, str)):
                msg = ('Cannot pass through non-scalar site_data '
                       'input key "{}" as an output_request!'.format(dset))
                logger.error(msg)
                raise ExecutionError(msg)

        else:
            data_shape = super()._get_data_shape(dset, n_sites)

        return data_shape

[docs]    def run(self, out_fpath=None, max_workers=1, timeout=1800,
            pool_size=None):
        """Execute a parallel reV econ run with smart data flushing.

        Parameters
        ----------
        out_fpath : str, optional
            Path to output file. If this class was initialized with
            ``append=True``, this input has no effect. If ``None``, no
            output file will be written. If the filepath is specified
            but the module name (econ) and/or resource data year is not
            included, the module name and/or resource data year will get
            added to the output file name. By default, ``None``.
        max_workers : int, optional
            Number of local workers to run on. By default, ``1``.
        timeout : int, optional
            Number of seconds to wait for parallel run iteration to
            complete before returning zeros. By default, ``1800``
            seconds.
        pool_size : int, optional
            Number of futures to submit to a single process pool for
            parallel futures. If ``None``, the pool size is set to
            ``os.cpu_count() * 2``. By default, ``None``.

        Returns
        -------
        str | None
            Path to output HDF5 file, or ``None`` if results were not
            written to disk.
        """
        if pool_size is None:
            pool_size = os.cpu_count() * 2

        # initialize output file or append econ data to gen file
        if self._append:
            self._out_fpath = self._cf_file
        else:
            self._init_fpath(out_fpath, ModuleName.ECON)

        self._init_h5(mode='a' if self._append else 'w')
        self._init_out_arrays()

        diff = list(set(self.points_control.sites)
                    - set(self.meta['gid'].values))
        if diff:
            raise Exception('The following analysis sites were requested '
                            'through project points for econ but are not '
                            'found in the CF file ("{}"): {}'
                            .format(self.cf_file, diff))

        # make a kwarg dict
        kwargs = {'output_request': self.output_request,
                  'cf_file': self.cf_file,
                  'year': self.year}

        logger.info('Running econ with smart data flushing '
                    'for: {}'.format(self.points_control))
        logger.debug('The following project points were specified: "{}"'
                     .format(self.project_points))
        logger.debug('The following SAM configs are available to this run:\n{}'
                     .format(pprint.pformat(self.sam_configs, indent=4)))
        logger.debug('The SAM output variables have been requested:\n{}'
                     .format(self.output_request))

        try:
            kwargs['econ_fun'] = self._fun
            if max_workers == 1:
                logger.debug('Running serial econ for: {}'
                             .format(self.points_control))
                for i, pc_sub in enumerate(self.points_control):
                    self.out = self._run_single_worker(pc_sub, **kwargs)
                    logger.info('Finished reV econ serial compute for: {} '
                                '(iteration {} out of {})'
                                .format(pc_sub, i + 1,
                                        len(self.points_control)))
                self.flush()
            else:
                logger.debug('Running parallel econ for: {}'
                             .format(self.points_control))
                self._parallel_run(max_workers=max_workers,
                                   pool_size=pool_size, timeout=timeout,
                                   **kwargs)

        except Exception as e:
            logger.exception('SmartParallelJob.execute() failed for econ.')
            raise e

        return self._out_fpath