"""
Class to compile MODIS dry land albedo and IMS snow data into composite albedo.
Created on Jan 23 2020
@author: mbannist
"""
import logging
import os
from concurrent.futures import as_completed
from datetime import datetime as dt
from multiprocessing import cpu_count
from typing import ClassVar
import h5py
import numpy as np
from rex.utilities.execution import SpawnProcessPool
from scipy import ndimage
from scipy.spatial import cKDTree
from nsrdb.albedo import ims, modis
from nsrdb.albedo import temperature_model as tm
# Value for NODATA cells in composite albedo
ALBEDO_NODATA = 0
# TIFF world file. Values were extracted from MODIS data with QGIS and may
# be slightly off.
WORLD = """0.00833333
0.00000000
0.00000000
-0.00833333
-179.99583333
89.99583333
"""
# Number of times to dialate (buffer) IMS snow/no-snow boundary. Larger numbers
# results in a larger KD tree. Smaller number risk "shark's teeth" artifacts
# at snow edge in composite albedo.
IMS_EDGE_BUFFFER = 6
# MODIS is clipped to IMS extent before calculating nearest neighbors.
# CLIP_MARGIN is a small buffer around the IMS extent to prevent the MODIS
# data from being clipped slightly too small due to mismatched projections.
CLIP_MARGIN = 0.1 # degrees lat/long
logger = logging.getLogger(__name__)
[docs]
class AlbedoError(Exception):
"""Exceptions for albedo related errors"""
[docs]
class CompositeAlbedoDay:
"""
Combine IMS and MODIS data to create composite Albedo.
This class is intended to be used with the run() class method, which
acquires IMS and MODIS data, merges the two, and a returns an instance
of the class. Exporting albedo data to HDF5 or TIFF must be explicitly
requested by the user using appropriate methods.
The composite albedo data is created by mapping the MODIS data to the IMS
data using a KD tree to perform a nearest neighbor analysis using
concurrent futures. Passing a KD tree based on the full IMS data to the
futures proved problematic due to memory issues, so the IMS data is first
reduced to the snow/no-snow boundary areas first, which reduces the size
of the tree significantly.
Combining the two data sources takes roughly 15 minutes for older, 4 km
IMS data, and 50 minutes for 1 km IMS data running on a 36 core HPC node.
Methods
-------
run - Load all data and create composite albedo.
write_albedo - Write albedo data to HDF5 file.
write_tiff - Write albedo to georeferenced TIFF.
write_tiff_from_h5 - Load existing composite albedo data from HDF5 and
write to TIFF.
"""
# Value for snow/sea ice in IMS. In thousandths, e.g. 867 == 0.867
SNOW_ALBEDO = 867
ALBEDO_ATTRS: ClassVar = {'units': 'unitless', 'scale_factor': 100}
CHUNKS = (2000, 1000)
[docs]
@classmethod
def run(
cls,
date,
modis_path,
ims_path,
albedo_path,
merra_path=None,
ims_shape=None,
modis_shape=None,
max_workers=None,
ims_buffer=IMS_EDGE_BUFFFER,
):
"""
Merge MODIS and IMS data for one day.
Parameters
----------
date : datetime instance
Date to calculate composite albedo for
modis_path : str
File path to MODIS hdf (NetCDF) data files. Data is downloaded and
stored at this location if not present.
ims_path : str
File path to IMS data and metadata files. Data is downloaded and
stored at this location if not present.
albedo_path : str
Path for composite albedo data files (output)
merra_path : str | None
Path for merra data to use in albedo calculation. If None albedo
will use constant value instead of calculating.
ims_shape : (int, int)
Shape of IMS data (rows, cols). Defaults to typical shape of IMS
data. Should be None unless testing.
modis_shape : (int, int)
Shape of MODIS data (rows, cols). Defaults to typical shape of
MODIS data. Should be None unless testing.
max_workers : int | None
Max number of workers for concurrent futures, None is all
ims_buffer : int
Number of times to buffer points used to define IMS snow/no snow
edge in KD tree.
Returns
-------
Class instance
"""
cad = cls(
date, modis_path, ims_path, albedo_path, merra_path, max_workers
)
logger.info(f'Loading MODIS data for {cad.date}')
cad._modis = modis.ModisDay(
cad.date, cad._modis_path, shape=modis_shape
)
logger.info(f'Loading IMS data {cad.date}')
cad._ims = ims.ImsDay(cad.date, cad._ims_path, shape=ims_shape)
cad.albedo = cad._calc_albedo(ims_buffer=ims_buffer)
return cad
def __init__(
self,
date,
modis_path,
ims_path,
albedo_path,
merra_path=None,
max_workers=None,
):
"""
Parameters
----------
date : datetime instance
Date to calculate composite albedo for
modis_path : str
File path to MODIS hdf (NetCDF) data files. Data is downloaded and
stored at this location if it not present.
ims_path : str
File path to IMS data and metadata files. Data is downloaded and
stored at this location if it not present.
albedo_path : str
Path for composite albedo data files (output)
merra_path : str | None
Path for merra data to use in albedo calculation. If None albedo
will use constant value instead of calculating.
max_workers : int | None
Max number of workers for concurrent futures, None is all
"""
self.date = date
self._modis_path = modis_path
self._ims_path = ims_path
self.albedo_path = albedo_path
self._merra_path = merra_path
self._max_workers = max_workers
self._modis = None # ModisDay object
self._ims = None # ImsDay object
self.albedo = None # numpy array of albedo data, same shape as MODIS
self._merra_data = None # temperature data for albedo calculation
self._mask = None # snow_no_snow mask
[docs]
def write_albedo(self):
"""
Write albedo data to HDF5 file. Albedo data is assumed to be scaled
to np.uint8.
Parameters
----------
path : string
Location to save albedo data to
"""
assert self.albedo.dtype == np.uint8
day = str(self.date.timetuple().tm_yday).zfill(3)
year = self.date.year
outfilename = os.path.join(
self.albedo_path, f'nsrdb_albedo_{year}_{day}.h5'
)
if (
self.albedo.shape[0] > self.CHUNKS[0]
and self.albedo.shape[1] > self.CHUNKS[1]
):
logger.info(f'Using a chunk size of {self.CHUNKS}')
chunks = self.CHUNKS
else:
logger.warning(
f'Albedo data is smaller than {self.CHUNKS}, using '
'automatic chunk size.'
)
chunks = True
logger.info(f'Writing albedo data to {outfilename}')
with h5py.File(outfilename, 'w') as f:
f.create_dataset(
'surface_albedo',
shape=self.albedo.shape,
dtype=self.albedo.dtype,
chunks=chunks,
data=self.albedo,
)
for k, v in self.ALBEDO_ATTRS.items():
f['surface_albedo'].attrs[k] = v
f.create_dataset(
'latitude',
shape=self._modis.lat.shape,
dtype=self._modis.lat.dtype,
data=self._modis.lat,
)
f.create_dataset(
'longitude',
shape=self._modis.lon.shape,
dtype=self._modis.lon.dtype,
data=self._modis.lon,
)
[docs]
def write_tiff(self, outfilename=None):
"""
Write albedo data to TIFF and world file. Geo referencing appears to be
off by 5 meters.
Parameters
----------
outfilename : string | None
Path and file name for TIFF and world file. Uses default if None.
"""
if outfilename is None:
day = str(self.date.timetuple().tm_yday).zfill(3)
year = self.date.year
outfilename = os.path.join(
self.albedo_path, f'nsrdb_albedo_{year}_{day}.tif'
)
self._create_tiff(self.albedo, outfilename)
[docs]
@classmethod
def write_tiff_from_h5(cls, infilename, outfilename=None):
"""
Write albedo data to TIFF and world file. Geo referencing appears to be
off by 5 meters.
Parameters
----------
infilename : string
Path and file name of albedo h5 file
outfilename : string
Path and file name for TIFF and world file. Defaults to infilename
with .tif extension
"""
if outfilename is None:
outfilename = os.path.splitext(infilename)[0] + '.tif'
with h5py.File(infilename, 'r') as f:
data = np.array(f['surface_albedo'])
cls._create_tiff(data, outfilename)
@staticmethod
def _create_tiff(data, filename):
"""
Write albedo data to TIFF and world file. Geo referencing appears to be
off by 5 meters.
Parameters
----------
data : np.array() (int16)
Albedo data
filename : string
File name and full path for TIFF
"""
import libtiff
tif = libtiff.TIFF.open(filename, mode='w')
tif.write_image(data)
tif.close()
with open(
os.path.splitext(filename)[0] + '.tfw', 'w', encoding='utf-8'
) as f:
f.write(WORLD)
logger.debug(f'Write to file {filename} complete')
def _calc_albedo(self, ims_buffer=IMS_EDGE_BUFFFER):
"""
Calculate composite albedo by merging MODIS and IMS
Parameters
----------
ims_buffer : int
Number of times to buffer points used to define IMS snow/no snow
edge in KD tree.
Returns
-------
albedo : 2D numpy array (np.uint8)
MODIS data overlayed with IMS snow. Array has same shape/projection
as MODIS
"""
if self._modis is None or self._ims is None:
raise AlbedoError(
'MODIS/IMS data must be loaded before running' ' calc_albedo()'
)
logger.info(f'Calculating composite albedo for {self.date}')
# Clip MODIS data to IMS boundary
mc = ModisClipper(self._modis, self._ims)
# Find snow/no snow region boundaries of IMS
logger.info('Determining IMS snow/no snow region boundaries')
ims_bin_mskd, ims_pts = self._get_ims_boundary(buffer=ims_buffer)
# Create cKDTree to map MODIS points onto IMS regions
logger.info('Creating KD Tree')
ims_tree = cKDTree(ims_pts)
# Map MODIS pixels to IMS data
logger.info('Mapping MODIS to IMS data. This might take a while.')
modis_pts = self._get_modis_pts(mc.mlon_clip, mc.mlat_clip)
if self._max_workers != 1:
if self._max_workers is not None:
logging.warning(
f'Processing albedo with {self._max_workers}' ' workers'
)
ind = self._run_futures(ims_tree, modis_pts)
else:
logging.warning('Processing albedo with a single worker')
ind = self._run_single_tree(ims_tree, modis_pts)
# Project nearest neighbors from IMS to MODIS.
# Array is on same grid as clipped MODIS,
# but has snow/no snow values from binary IMS.
snow_no_snow = ims_bin_mskd[ind].reshape(
len(mc.mlat_clip), len(mc.mlon_clip)
)
self._mask = snow_no_snow
logger.info(f'Shape of snow/no snow grid is {snow_no_snow.shape}.')
# Update MODIS albedo for cells w/ snow
mclip_albedo = mc.modis_clip
if self._merra_path is not None:
logger.info(
f'Loading Merra data for {self.date}. '
'This might take a while'
)
if self._max_workers != 1:
if self._max_workers is not None:
logging.warning(
f'Loading MERRA data with {self._max_workers}'
' workers'
)
else:
logging.warning('Loading MERRA data with a single worker')
self._merra_data = tm.DataHandler.get_data(
self.date,
self._merra_path,
self._mask,
mc.mlat_clip,
mc.mlon_clip,
max_workers=self._max_workers,
)
msg = 'Calculating temperature dependent '
msg += f'snowy albedo for {self.date}'
logger.info(msg)
mclip_albedo = tm.TemperatureModel.update_snow_albedo(
mclip_albedo, self._mask, self._merra_data
)
else:
mclip_albedo[snow_no_snow == 1] = self.SNOW_ALBEDO
# Merge clipped composite albedo with full MODIS data
albedo = self._modis.data
albedo[mc.modis_idx] = mclip_albedo
# Reset NODATA values
albedo[albedo == modis.MODIS_NODATA] = ALBEDO_NODATA
# Check bounds
if albedo[albedo < 0].any() or albedo[albedo > 1000].any():
raise AlbedoError(
'Composite albedo data has values greater than'
' 1000 or less than 0, before reducing scale '
'factor.'
)
# MODIS data has a scaling factor of 1000, reduce to 100
albedo /= 10
albedo = np.round(albedo)
albedo = albedo.astype(np.uint8)
return albedo
@staticmethod
def _run_single_tree(tree, pts):
"""
Map MODIS pixels to IMS binary data on one core.
Parameters
----------
tree : cKDTree
KD tree created using IMS region boundary pixels
pts : 2D numpy array
Lon/lat locations for chunk of MODIS data cells
Returns
-------
ind : 1D numpy array (int)
Indices mapping MODIS cells to ims_bin_mskd
"""
_, ind = tree.query(pts)
return ind
def _run_futures(self, ims_tree, modis_pts):
"""
Split mapping MODIS to IMS across multiple cores.
Parameters
----------
ims_tree : cKDTree
KD tree created using IMS region boundary pixels
modis_pts : 2D numpy array
Lon/lat locations for MODIS data cells
Returns
-------
ind : 1D numpy array (int)
Indices mapping MODIS cells to ims_bin_mskd
"""
futures = {}
chunks = np.array_split(modis_pts, cpu_count())
now = dt.now()
loggers = ['nsrdb']
with SpawnProcessPool(
loggers=loggers, max_workers=self._max_workers
) as exe:
for i, chunk in enumerate(chunks):
future = exe.submit(self._run_single_tree, ims_tree, chunk)
meta = {'id': i}
ct = chunk.T
meta['lon_min'] = ct[0].min()
meta['lon_max'] = ct[0].max()
meta['lat_min'] = ct[1].min()
meta['lat_max'] = ct[1].max()
meta['size'] = ct.size
futures[future] = meta
logger.info(f'Started all futures in {dt.now() - now}')
for i, future in enumerate(as_completed(futures)):
logger.info(
f'Future {futures[future]} completed in '
f'{dt.now() - now}.'
)
logger.info(
f'{i + 1} out of {len(futures)} futures ' f'completed'
)
logger.info('done processing')
# Merge all returned indices
ind = np.empty((len(modis_pts)), dtype=int)
pos = 0
for key in futures:
size = len(key.result())
ind[pos : pos + size] = key.result()
pos += size
return ind
def _get_ims_boundary(self, buffer=5):
"""
Create IMS boundary layer which represents the pixels that form the
boundary between snow and no snow.
Parameters
----------
buffer : int
Number of times to buffer initial edge detection mask. Greater
values result in a "thicker" edge layer and will more reliablely
classify locations at higher latitudes as snow or no-snow, at the
expense of the boundary using more points.
Returns
-------
ims_bin_mskd : 1D numpy array
IMS data, represented as 0 (no snow) or 1 (snow/sea ice), for
region boundary pixels.
ims_pts : 2D numpy array
Lon/lat points for ims_bin_mskd
"""
# Create binary IMS layer. Same size as original IMS.
ims_bin = self._ims.data
ims_bin[ims_bin < 3] = 0 # Dry land, water
ims_bin[ims_bin > 2] = 1 # Snow, sea ice
# Create and buffer edge mask
logger.debug(
'Performing IMS edge detection and dilating edge ' '%s times',
buffer,
)
ims_mask = ims_bin - ndimage.morphology.binary_dilation(ims_bin)
ims_mask = ndimage.morphology.binary_dilation(
ims_mask, iterations=buffer
)
ims_mask = ims_mask.flatten()
# Mask data and lon/lat to boundary edges
ims_bin_mskd = ims_bin.flatten()[ims_mask]
ilon = self._ims.lon[ims_mask]
ilat = self._ims.lat[ims_mask]
# Combine lat and lon to create pts for KD tree
ims_pts = np.vstack((ilon, ilat)).T
return ims_bin_mskd, ims_pts
@staticmethod
def _get_modis_pts(lon_pts, lat_pts):
"""
Create 2D numpy array representing lon/lats of MODIS pixels
Parameters
----------
lon_pts : numpy array
Longitude points for MODIS data
lat_pts : numpy array
Latitude points for MODIS data
Returns
-------
modis_pts : 2d numpy array
Array of lon/lat points corresponding to all pixels in
self._modis.data
"""
new_mg = np.meshgrid(lon_pts, lat_pts)
n_mg_v = np.vstack((new_mg[0].reshape(-1), new_mg[1].reshape(-1)))
modis_pts = n_mg_v.T
return modis_pts
[docs]
class ModisClipper:
"""
Clip MODIS data to extent of valid IMS data. This prevents
assigning IMS values to MODIS outside of the IMS extents, and
speeds NN mapping.
Attributes
----------
modis_idx : 2D numpy boolean array
2D indices of MODIS values w/n IMS extent
modis_clip : 2D numpy int16
MODIS data clipped to IMS extent
mlat_clip : 1D numpy float32
MODIS latitudes clipped to IMS extent
mlon_clip : 1D numpy float32
MODIS longigutes clipped to IMS extent
"""
def __init__(self, modis, ims):
"""
Parameters
----------
modis : ModisDay instance
MODIS data to clip
ims : ImsDay instance
IMS data to clip MODIS extent to
"""
self._modis = modis
self._ims = ims
# Ignore IMS Nodata pixels
valid_meta = (~np.isnan(self._ims.lat)) & (~np.isnan(self._ims.lon))
valid_ims = (self._ims.data.flatten() > 0) & valid_meta
ilat_good = self._ims.lat[valid_ims]
ilon_good = self._ims.lon[valid_ims]
# Get MODIS mask from IMS extents
logger.info(
f'Boundaries of valid IMS data: {ilon_good.min()} - '
f'{ilon_good.max()} long, {ilat_good.min()} - '
f'{ilat_good.max()} lat'
)
mlat_idx = (ilat_good.min() - CLIP_MARGIN <= self._modis.lat) & (
self._modis.lat <= ilat_good.max() + CLIP_MARGIN
)
mlon_idx = (ilon_good.min() - CLIP_MARGIN <= self._modis.lon) & (
self._modis.lon <= ilon_good.max() + CLIP_MARGIN
)
self.modis_idx = np.ix_(mlat_idx, mlon_idx)
# Clip out MODIS that matches IMS extents
self.modis_clip = self._modis.data[self.modis_idx]
self.mlat_clip = self._modis.lat[mlat_idx]
self.mlon_clip = self._modis.lon[mlon_idx]
logger.info(
'Boundaries of clipped MODIS data: '
f'{self.mlon_clip.min()} - '
f'{self.mlon_clip.max()} long, {self.mlat_clip.min()} - '
f'{self.mlat_clip.max()} lat'
)
logger.info(f'Shape of clipped MODIS data is {self.modis_clip.shape}')