Source code for rex.utilities.bc_parse_table

# -*- coding: utf-8 -*-
"""
rex bias correction utilities.
"""
import json
import numpy as np
import logging
from warnings import warn
import rex.bias_correction


logger = logging.getLogger(__name__)



[docs]
def parse_bc_table(bc_df, gids):
    """Parse the bias correction table for required bc functions and kwargs

    Parameters
    ----------
    bc_df : pd.DataFrame
        DataFrame with wind or solar resource bias correction table. This
        must have columns "gid" and "method", where "gid" is the resource
        file indices, and "method" is a function name from the
        ``rex.bias_correction`` module. Only windspeed or GHI+DNI+DHI are
        corrected, depending on the technology. See the
        ``rex.bias_correction`` module for more details on available
        bias correction methods.
    gids : list | np.ndarray
        Array of integer gids (spatial indices) from the source h5 file.
        This is used to get the correct bias correction parameters from
        ``bias_correct`` table based on its ``gid`` column

    Returns
    -------
    bc_fun : function
        Function from ``rex.bias_correction`` to use.
    bc_fun_kwargs : dict
        Kwargs from ``bc_df`` to input to ``bc_fun``. This may include extra
        kwargs that are not required by ``bc_fun`` and should be cleaned before
        passing to the function.
    bool_bc : np.ndarray
        1D Boolean array with length equal to the ``gids`` input with ``True``
        where data has available bias correction inputs in ``bc_df`` and
        ``False`` where not
    """

    if 'method' not in bc_df:
        msg = ('Bias correction table provided, but "method" column not '
               'found! Only see columns: {}. Need to specify "method" which '
               'is a function name from `rex.bias_correction`'
               .format(list(bc_df.columns)))
        logger.error(msg)
        raise KeyError(msg)

    if bc_df.index.name != 'gid':
        if 'gid' not in bc_df:
            msg = ('Bias correction table must have "gid" column but only '
                   'found: {}'.format(list(bc_df.columns)))
            logger.error(msg)
            raise KeyError(msg)
        bc_df = bc_df.set_index('gid')

    gid_arr = np.array(gids)
    bool_bc = np.isin(gid_arr, bc_df.index.values)

    if not bool_bc.any():
        return None, {}, bool_bc

    if not bool_bc.all():
        missing = gid_arr[~bool_bc]
        msg = ('{} sites were missing from the bias correction table, '
               'not bias correcting: {}'.format(len(missing), missing))
        logger.warning(msg)
        warn(msg)

    fun_name = bc_df['method'].unique()
    msg = ('rex bias correction currently only supports a single unique '
           'bias correction method per chunk of sites but received: {}'
           .format(fun_name))
    assert len(fun_name) == 1, msg
    bc_fun = getattr(rex.bias_correction, fun_name[0], None)
    if bc_fun is None:
        avail = [x for x in dir(rex.bias_correction) if not x.startswith('_')]
        msg = ('Could not find method name "{}" in ``rex.bias_correction`` '
               'which has the available objects: {}'
               .format(fun_name[0], avail))
        logger.error(msg)
        raise KeyError(msg)

    bc_fun_kwargs = {}
    for col in bc_df.columns:

        # load serialized lists from string columns in bc_df into nested lists
        sample = bc_df[col].values[0]
        if isinstance(sample, str) and '[' in sample and ']' in sample:
            bc_df.loc[:, col] = bc_df[col].apply(json.loads)

        arr = bc_df.loc[gid_arr[bool_bc], col].values

        # nested lists in bc_df converted to arr of shape (space, N)
        if isinstance(arr[0], (list, tuple)):
            arr = np.array(arr.tolist())

        bc_fun_kwargs[col] = arr

    return bc_fun, bc_fun_kwargs, bool_bc