# -*- coding: utf-8 -*-
"""
Created on Tues Feb 2023
@author: bbenton
"""
import logging
import pandas as pd
import numpy as np
import os
import shutil
from warnings import warn
import re
from reVX.plexos.rev_reeds_plexos import PlexosAggregation
from rex import Outputs
logger = logging.getLogger(__name__)
[docs]class PrasAggregation(PlexosAggregation):
"""
Framework to aggregate reV gen profiles to PRAS zone power profiles and
overwrite exisiting pras file with new generation profiles. This class
takes as input rev supply curve table, and reeds buildout table (specifying
which rev sc points were built and at what capacity). The class will build
power profiles for each supply curve point and then aggregate the sc point
profiles to the nearest neighbor pras zone. The corresponding zone power
profiles in the pras output file will be overwritten with these new
aggregated generation profiles.
Examples
--------
>>> from reVX.pras.rev_reeds_pras import PrasAggregation
>>>
>>> rev_supply_curve_file = ...
>>> reeds_build_file = ...
>>> capacity_factor_file = ...
>>> pras_file = ...
>>>
>>> PrasAggregation.run(rev_supply_curve_file, reeds_build_file,
capacity_factor_file, pras_file, build_year=2047,
tech_type='upv')
This will replace all the pras generation profiles with upv tech in the
file pras_file.pras.h5 with aggregated generation profiles from cf_path.h5
>>> reeds_build_files = [...]
>>> capacity_factor_files = [...]
>>> tech_types = ['upv', 'dupv', 'wind-ons', 'wind-ofs']
>>>
>>> PrasAggregation.run(rev_supply_curve_file, reeds_build_files,
capacity_factor_files, pras_file, build_year=2047,
tech_type=tech_types)
This will run the same routine but for each tech type in tech_types. The
files in reeds_build_files and capacity_factor_files have to correspond to
the tech type in the tech_types list.
"""
def __init__(self, rev_sc, reeds_build, cf_fpath, pras_file,
forecast_fpath=None, build_year=2050, res_class=None,
tech_type=None, timezone='US/Central', dset_tag=None,
max_workers=None):
"""
Parameters
----------
rev_sc : str | pd.DataFrame
reV supply curve results table including SC gid, latitude,
longitude, res_gids, gid_counts. Or file path to reV supply
curve table.
reeds_build : str | pd.DataFrame
ReEDS buildout with rows for built capacity (MW) at each reV SC
point. This should have columns: year, region, class,
built_capacity, and sc_gid (corresponding to the reV supply curve
point gid). Some cleaning of the column names will be performed for
legacy tables but these are the column headers that are desired.
cf_fpath : str
File path to .h5 capacity factor file (reV gen output) to get
profiles from.
pras_file : str
File path to existing hdf5 PRAS file (.pras or .h5 extension) which
will be amended to include generation data from given resource
data. This file must have a 'generators' group with '_core' and
'capacity' datasets in this group.
forecast_fpath : str | None
Forecasted capacity factor .h5 file path (reV results). If not
None, the supply curve res_gids are mapped to sites in the
cf_fpath, then the coordinates from cf_fpath are mapped to the
nearest neighbor sites in the forecast_fpath, where the final
generation profiles are retrieved from.
build_year : int, optional
REEDS year of interest, by default 2050
res_class : int | None
Optional resource class to use to filter supply curve points.
For example, if res_class = 3 then only supply curve points with
class 3 will be kept in the sc_build table. The corresponds to the
'class' column in the reeds_build file.
tech_type : str | None
Pras files will have the tech type in the
['generation/_core']['name'] entry, which is used to select the
correct output index
timezone : str
Timezone for output generation profiles. This is a string that will
be passed to pytz.timezone() e.g. US/Pacific, US/Mountain,
US/Central, US/Eastern, or UTC. For a list of all available
timezones, see pytz.all_timezones
dset_tag : str
Dataset tag to append to dataset names in cf profile file. e.g. If
the cf profile file is a multi year file using dset_tag="-2008"
will enable us to select the corresponding datasets
(cf_mean-2008, cf_profile-2008, etc)
max_workers : int | None
Max workers for parallel profile aggregation. None uses all
available workers. 1 will run in serial.
"""
self._dset_tag = dset_tag if dset_tag is not None else ""
self._tech_type = tech_type
self._pras_file = pras_file
self._output_file = pras_file
self._pras_meta = None
self._res_class = res_class
self._complete_pras_meta = None
self._build_year = build_year
self._sc_build_indices = None
self._pras_indices = None
self._qa_file = None
self._prof_file = None
self._pras_build_zones = self._init_pras_build_zones(reeds_build)
super().__init__(self._pras_build_zones, rev_sc,
reeds_build, cf_fpath,
forecast_fpath=forecast_fpath,
build_year=build_year,
res_class=res_class,
timezone=timezone,
dset_tag=self._dset_tag,
max_workers=max_workers)
self._init_output_files(pras_file)
logger.info('Running aggregation for tech_type={}, res_class={}, '
'rev_sc={}, reeds_build={}, cf_fpath={}, dset_tag={}, '
'pras_file={}.'
.format(self._tech_type, self._res_class, rev_sc,
reeds_build, cf_fpath, dset_tag, pras_file))
logger.info('Found {} pras_zones and {} sc_build_zones'
.format(len(self.pras_zones), len(self.sc_build_zones)))
self.missing_zones = self.get_missing_zones()
self.found_zones = list(set(self.pras_zones) - set(self.missing_zones))
def _init_pras_build_zones(self, reeds_build):
"""
Build pras_build_zones table from reeds_build. This returns a table
with all available zones in the reeds build table and assigns a
plexos_id to each zone so that this can be used as a plexos node table
for calling super().__init__()
Parameters
----------
reeds_build : str | pd.DataFrame
ReEDS buildout with rows for built capacity (MW) at each reV SC
point. This should have columns: reeds_year, built_capacity, and
sc_gid (corresponding to the reV supply curve point gid). Some
cleaning of the column names will be performed for legacy tables
but these are the column headers that are desired.
Returns
-------
pras_build_zones : pd.DataFrame
pras zones DataFrame with all available zones in reeds build table
and corresponding plexos_id
"""
pras_build_zones = reeds_build
if isinstance(pras_build_zones, str):
pras_build_zones = pd.read_csv(pras_build_zones)
class_mask = pras_build_zones['class'] == self._res_class
pras_build_zones = pras_build_zones[class_mask]
year_mask = pras_build_zones['year'] == self._build_year
pras_build_zones = pras_build_zones[year_mask]
pras_build_zones = pras_build_zones[['latitude', 'longitude',
'region', 'built_capacity']]
pras_build_zones_grp = pras_build_zones.groupby(by='region')
pras_build_zones = pras_build_zones_grp.mean()
cap = pras_build_zones_grp.sum()['built_capacity']
pras_build_zones['built_capacity'] = cap
pras_build_zones = pras_build_zones.reset_index()
pras_build_zones['plexos_id'] = np.arange(len(pras_build_zones))
return pras_build_zones
def _init_output_files(self, pras_file):
"""
Initialize the output file to be written with new generation profiles
and define the qa file name.
Parameters
----------
pras_file : str
Path to original pras file containing generation profiles to be
updated. The file will be copied to a new path (given by
_output_file) and then updated with the new profiles
"""
if not self._pras_file.endswith('.h5'):
self._output_file += '.h5'
self._output_file = pras_file.replace('.h5', '_updated.h5')
msg = ("pras_file={} does not exist. This file must come from a "
"previous ReEDS2PRAS run").format(self._pras_file)
assert os.path.exists(self._pras_file), msg
if not os.path.exists(self._output_file):
shutil.copy(self._pras_file, self._output_file)
self._qa_file = self._output_file.replace(
'.h5', '_qa_{}_{}.csv'.format(self._tech_type, self._res_class))
self._prof_file = self._output_file.replace(
'.h5', '_{}_{}.csv'.format(self._tech_type, self._res_class))
[docs] @classmethod
def get_pras_classes(cls, pras_file, tech_type):
"""
Get list of tech classes present in the pras file for the requested
technology type
Parameters
----------
pras_file : str
Path to the pras file being overwritten with new generation
profiles
tech_type : str
Technology type for which profiles are being overwritten
Returns
-------
List
"""
pras_meta = cls.get_pras_meta(pras_file)
class_list = [int(x.split('_')[-1]) for x in pras_meta['category']
if x.startswith(tech_type)]
class_list = np.unique(class_list)
logger.info('Found classes {} for tech_type {}'
.format(class_list, tech_type))
return class_list
@property
def zone_index_map(self):
"""
Get the map between zone index and zone label. i.e. zone_index=1,
zone_label=p10
"""
zone_index_map = dict(
zip(self._pras_build_zones['region'].values,
self._pras_build_zones['plexos_id'].values))
return zone_index_map
@property
def sc_build_zones(self):
"""
Get the labels for the zones in the zone_map. For example, the zone_map
might have an index of 1 for the region but this corresponds to the
label of p10.
"""
build_zones = self._pras_build_zones['region'].unique()
return build_zones
@property
def complete_pras_meta(self):
"""
Get pras 'meta' without filtering for tech_type and class
Returns
-------
pd.DataFrame
"""
if self._complete_pras_meta is None:
self._complete_pras_meta = self.get_pras_meta(self._pras_file)
return self._complete_pras_meta
@property
def pras_meta(self):
"""
Get pras 'meta' which describes the tech type and class at each zone
Returns
-------
pd.DataFrame
"""
if self._pras_meta is None:
self._pras_meta = self.enforce_tech_constraint(
self.complete_pras_meta)
return self._pras_meta
@property
def pras_zones(self):
"""
Get list of all available zones in pras output file for the requested
tech and class. For upv and dupv regions are labeled p1, p2, etc. For
wind and csp regions are labeled s1, s2, etc.
Returns
-------
List
"""
return self.pras_meta['region'].values
@property
def pras_indices(self):
"""
Get the list of pras file indices for the pras zones available in the
supply curve zone list.
Returns
-------
np.ndarray
"""
if self._pras_indices is None:
zone_mask = np.isin(self.pras_zones, self.sc_build_zones)
self._pras_indices = list(self.pras_meta.index[zone_mask])
return self._pras_indices
@property
def pras_names(self):
"""
Get the list of pras entry names for the pras zones available in the
supply curve zone list.
Returns
-------
np.ndarray
"""
return self.pras_meta['name'].loc[self.pras_indices]
@property
def found_pras_zones(self):
"""
List of pras zones found in the reeds build out for the requested tech
and class
Returns
-------
np.ndarray
"""
return self.pras_meta['region'].loc[self.pras_indices]
@property
def found_build_zones(self):
"""
List of sc build zones found in the pras output file for the requested
tech and class
Returns
-------
np.ndarray
"""
return self.sc_build_zones[self.sc_build_indices]
@property
def sc_build_indices(self):
"""
Get the list of profile indices corresponding to the sc_build_zones -
used to write the new profiles to the pras output.
Returns
-------
List
"""
if self._sc_build_indices is None:
zones = self.sc_build_zones.tolist()
self._sc_build_indices = [zones.index(x) for x
in self.found_pras_zones]
return self._sc_build_indices
@property
def built_capacity(self):
"""
Get the built capacity for all zones found in the pras output file and
the reeds build file.
Returns
-------
np.ndarray
"""
indices = [np.where(self._pras_build_zones['region'] == x)[0][0]
for x in self.found_pras_zones]
cap = self._pras_build_zones['built_capacity'].iloc[indices].values
return cap
[docs] def enforce_tech_constraint(self, meta):
"""
Filter the pras generator meta for the requested technology type and
class
Parameters
----------
meta : pd.DataFrame
Full pras 'meta' without tech_type or class filtering
Returns
-------
pd.DataFrame
pras meta DataFrame filtered by tech_type and class
"""
tech_key = '{}_'.format(self._tech_type)
if self._res_class is not None:
tech_key += '{}'.format(self._res_class)
category_mask = [x.startswith(tech_key) for x in meta['category']]
out_meta = meta[category_mask]
if out_meta.empty:
msg = ('Found zero pras zones with requested tech_key {}.'
.format(tech_key))
logger.error(msg)
raise RuntimeError(msg)
return out_meta
[docs] def get_missing_zones(self):
"""
Check for zones present in the pras output file which cannot be found
in the supply curve zone list
"""
zone_mask = np.isin(self.pras_zones, self.sc_build_zones)
missing_zones = sorted([x for x in np.unique(self.pras_zones)
if x not in self.sc_build_zones])
msg = ('{} / {} pras zones not found in sc_build_zones, for '
'tech_key {}_{}. Missing zones: {}.').format(
len(missing_zones), len(self.pras_zones), self._tech_type,
self._res_class, missing_zones)
if not all(zone_mask):
logger.warning(msg)
warn(msg)
else:
logger.info('All {} pras zones found in sc_build_zones'
.format(len(self.pras_zones)))
if len(missing_zones) > 0:
missing_indices = self.pras_meta['region'].isin(missing_zones)
missing_indices = self.pras_meta.index[missing_indices].tolist()
with Outputs(self._pras_file, mode='r') as out:
missing_cap = out['generators/capacity', :, missing_indices]
logger.info('The mean/min/max capacity for the missing zones: '
'{} / {} / {} MW.'
.format(np.mean(missing_cap), np.min(missing_cap),
np.max(missing_cap)))
return missing_zones
[docs] def qa(self, profiles):
"""
Simple QA to check original mean/min/max capacity and the mean/min/max
capacity for the updated generation profiles
Parameters
----------
profiles : np.ndarray
Generation profile timeseries in MW at each plant / pras zone.
Returns
-------
new_cap : np.ndarray
Array containing the updated generation profiles
"""
df = pd.DataFrame()
with Outputs(self._pras_file, mode='r') as out:
old_cap = out['generators/capacity', :, self.pras_indices]
new_cap = profiles[:, self.sc_build_indices].astype(old_cap.dtype)
df['region'] = self.found_pras_zones
df['name'] = self.pras_meta['name'].loc[self.pras_indices]
df['old_mean'] = np.mean(old_cap, axis=0)
df['new_mean'] = np.mean(new_cap, axis=0)
df['old_max'] = np.max(old_cap, axis=0)
df['new_max'] = np.max(new_cap, axis=0)
df['built_capacity'] = self.built_capacity
logger.info('The old and new mean/max capacity (MW) for the {}_{} '
'zones:\n{}.'.format(self._tech_type,
self._res_class, df))
df.to_csv(self._qa_file)
logger.info('Wrote pras QA to: {}'.format(self._qa_file))
return new_cap
[docs] def export(self, time_index, profiles):
"""Export generation profiles to h5 and plexos-formatted csv
Parameters
----------
time_index : pd.datetimeindex
Time index for the profiles.
profiles : np.ndarray
Generation profile timeseries in MW at each plant / pras zone.
"""
logger.info('Saving profiles for {}_{} to file: {}'
.format(self._tech_type, self._res_class,
self._output_file))
profiles = self.tz_convert_profiles(profiles, self._timezone)
new_cap = self.qa(profiles)
with Outputs(self._output_file, mode='a') as out:
out['generators/capacity', :, self.pras_indices] = new_cap
df_pras = pd.DataFrame(new_cap, columns=self.pras_names,
index=time_index.tz_convert(None))
df_pras.index.name = 'DATETIME'
df_pras.to_csv(self._prof_file)
logger.info('Wrote pras formatted profiles to: {}'
.format(self._prof_file))
def _make_node_map(self):
"""Map built rev SC points to pras zones.
Returns
-------
pras_zone_index : np.ndarray
(n, 1) array of pras zone indices mapped to the SC builds where n
is the number of SC points built. Each value in this array gives
the pras zone index that the sc point is mapped to. So
self.zone_map[10] yields the pras zone index for
self.sc_build[10].
"""
if 'region' in self.sc_build:
pras_zone_index = self.sc_build['region'].values
pras_zone_index = [self.zone_index_map[k] for k
in pras_zone_index]
pras_zone_index = np.array(pras_zone_index)
if len(pras_zone_index.shape) == 1:
pras_zone_index = pras_zone_index.reshape(
(len(pras_zone_index), 1))
else:
msg = '"region" column not found in sc_build.'
logger.error(msg)
raise RuntimeError(msg)
return pras_zone_index
[docs] @classmethod
def run(cls, rev_sc, reeds_build, cf_fpath, pras_file,
forecast_fpath=None, build_year=2050, res_class=None,
tech_type=None, timezone='US/Central', dset_tag=None,
max_workers=None):
"""Run pras aggregation and output for the requested tech types. This
will aggregate the generation profiles over region specific supply
curve points found in rev_sc, after filtering rev_sc for the requested
tech type, and resource class. These aggregated generation profiles
will then be written to the pras output file in the location
corresponding to the region, tech type, and class.
rev_sc : str | pd.DataFrame
reV supply curve results table including SC gid, latitude,
longitude, res_gids, gid_counts. Or file path to reV supply
curve table. If providing a list of tech types this also needs to
be a list of the corresponding supply curve files.
reeds_build : str | pd.DataFrame | list
ReEDS buildout with rows for built capacity (MW) at each reV SC
point. This should have columns: reeds_year, built_capacity, and
sc_gid (corresponding to the reV supply curve point gid). Some
cleaning of the column names will be performed for legacy tables
but these are the column headers that are desired. This input can
also include "plexos_node_gid" which will explicitly assign a
supply curve point buildout to a single pras zone. If included,
all points must be assigned to pras zones. If providing a list
of tech types this also needs to be a list of the corresponding
reeds builout files for each tech type.
cf_fpath : str | list
File path to capacity factor file (reV gen output) to
get profiles from. If providing a list of tech types this also
needs to be a list of the corresponding capacity factor files.
pras_file : str
File path to existing PRAS file which will be amended to include
generation data from given resource data.
forecast_fpath : str | None
Forecasted capacity factor .h5 file path (reV results). If not
None, the supply curve res_gids are mapped to sites in the
cf_fpath, then the coordinates from cf_fpath are mapped to the
nearest neighbor sites in the forecast_fpath, where the final
generation profiles are retrieved from.
build_year : int, optional
REEDS year of interest, by default 2050
res_class : int | None
Optional resource class to use to filter supply curve points.
For example, if res_class = 3 then only supply curve points with
class 3 will be kept in the sc_build table. If None then the
aggregation will be run for each class found in the pras output
file
tech_type : str | list
Tech type for which to overwrite the generation profiles in the
pras output file. Pras files will have the tech type in the
['generation/_core']['name'] entry, which is used to select the
correct output index
timezone : str
Timezone for output generation profiles. This is a string that will
be passed to pytz.timezone() e.g. US/Pacific, US/Mountain,
US/Central, US/Eastern, or UTC. For a list of all available
timezones, see pytz.all_timezones
dset_tag : str
Dataset tag to append to dataset names in cf profile file. e.g. If
the cf profile file is a multi year file using dset_tag="-2008"
will enable us to select the corresponding datasets
(cf_mean-2008, cf_profile-2008, etc)
max_workers : int | None
Max workers for parallel profile aggregation. None uses all
available workers. 1 will run in serial.
"""
if not isinstance(reeds_build, list):
reeds_build = [reeds_build]
if not isinstance(cf_fpath, list):
cf_fpath = [cf_fpath]
if not isinstance(rev_sc, list):
rev_sc = [rev_sc]
if not isinstance(tech_type, list):
tech_type = [tech_type]
missing_zones = []
for i, tech in enumerate(tech_type):
reeds_build_fp = reeds_build[i]
cf_fp = cf_fpath[i]
rev_sc_fp = rev_sc[i]
missing_zones += cls._run_single(tech, rev_sc_fp, reeds_build_fp,
cf_fp, pras_file,
forecast_path=forecast_fpath,
build_year=build_year,
res_class=res_class,
timezone=timezone,
dset_tag=dset_tag,
max_workers=max_workers)
logger.info('Missing {} zones across all classes: {}'
.format(len(set(missing_zones)), set(missing_zones)))
@classmethod
def _run_single(cls, tech, rev_sc_fp, reeds_build_fp, cf_fp, pras_file,
forecast_fpath=None, build_year=2050, res_class=None,
timezone='US/Central', dset_tag=None, max_workers=None):
"""Run pras aggregation and output for a single requested tech type.
This will aggregate the generation profiles over region specific supply
curve points found in rev_sc, after filtering rev_sc for the requested
tech type, and resource class. These aggregated generation profiles
will then be written to the pras output file in the location
corresponding to the region, tech type, and class.
Parameters
----------
tech : str
Tech type for which to overwrite the generation profiles in the
pras output file. Pras files will have the tech type in the
['generation/_core']['name'] entry, which is used to select the
correct output index
rev_sc_fp : str | pd.DataFrame
reV supply curve results table including SC gid, latitude,
longitude, res_gids, gid_counts. Or file path to reV supply
curve table.
reeds_build_fp : str | pd.DataFrame
ReEDS buildout with rows for built capacity (MW) at each reV SC
point. This should have columns: reeds_year, built_capacity, and
sc_gid (corresponding to the reV supply curve point gid). Some
cleaning of the column names will be performed for legacy tables
but these are the column headers that are desired. This input can
also include "plexos_node_gid" which will explicitly assign a
supply curve point buildout to a single pras zone. If included,
all points must be assigned to pras zones.
cf_fp : str
File path to capacity factor file (reV gen output) to
get profiles from.
pras_file : str
File path to existing PRAS file which will be amended to include
generation data from given resource data.
forecast_fpath : str | None
Forecasted capacity factor .h5 file path (reV results). If not
None, the supply curve res_gids are mapped to sites in the
cf_fpath, then the coordinates from cf_fpath are mapped to the
nearest neighbor sites in the forecast_fpath, where the final
generation profiles are retrieved from.
build_year : int, optional
REEDS year of interest, by default 2050
res_class : int | None
Optional resource class to use to filter supply curve points.
For example, if res_class = 3 then only supply curve points with
class 3 will be kept in the sc_build table. If None then the
aggregation will be run for each class found in the pras output
file
timezone : str
Timezone for output generation profiles. This is a string that will
be passed to pytz.timezone() e.g. US/Pacific, US/Mountain,
US/Central, US/Eastern, or UTC. For a list of all available
timezones, see pytz.all_timezones
dset_tag : str
Dataset tag to append to dataset names in cf profile file. e.g. If
the cf profile file is a multi year file using dset_tag="-2008"
will enable us to select the corresponding datasets
(cf_mean-2008, cf_profile-2008, etc)
max_workers : int | None
Max workers for parallel profile aggregation. None uses all
available workers. 1 will run in serial.
"""
missing_zones = []
if res_class is None:
res_classes = cls.get_pras_classes(pras_file, tech)
else:
res_classes = [res_class]
for res_class in res_classes:
pa = cls(rev_sc_fp, reeds_build_fp, cf_fp, pras_file,
forecast_fpath=forecast_fpath,
build_year=build_year,
tech_type=tech,
res_class=res_class,
timezone=timezone,
dset_tag=dset_tag,
max_workers=max_workers)
profiles = pa.make_profiles()
pa.export(pa.time_index, profiles)
missing_zones += pa.missing_zones
return missing_zones