Source code for reVX.utilities.utilities

# -*- coding: utf-8 -*-
reVX Utilities
import addfips
import geopandas as gpd
import pandas as pd
import numpy as np
import pyproj
import rasterio
import shapely
import skimage
import sklearn
from sklearn.metrics.pairwise import haversine_distances

from reV.utilities import log_versions as reV_log_versions
from reVX.version import __version__

[docs]def coordinate_distance(coords1, coords2): """ Compute the haversine distance between the two sets of coordinates. Results are in km Parameters ---------- coords1 : ndarray First set of (lat, lon) coordinates coords2 : ndarray Second set of (lat, lon) coordinates Returns ------- dist : ndarray Vector of haversine distances between coordinate set 1 and set 2 in km """ dist = haversine_distances(np.radians(coords1), np.radians(coords2)) if len(coords1) == 1: dist = dist.ravel() else: dist = np.diag(dist) # radius of the earth in kilometers # radius of the earth in km R = 6371.0 return dist * R
[docs]def log_versions(logger): """Log package versions: - rex, reV, and reVX to info - h5py, numpy, pandas, scipy, PySAM, geopandas, pyproj, rasterio, shapely, sklearn, and skimage to debug Parameters ---------- logger : logging.Logger Logger object to log memory message to. """'Running with reVX version {}'.format(__version__)) reV_log_versions(logger) logger.debug('- geopandas version {}'.format(gpd.__version__)) logger.debug('- pyproj version {}'.format(pyproj.__version__)) logger.debug('- rasterio version {}'.format(rasterio.__version__)) logger.debug('- shapely version {}'.format(shapely.__version__)) logger.debug('- scikit-image version {}'.format(skimage.__version__)) logger.debug('- scikit-learn version {}'.format(sklearn.__version__))
[docs]def to_geo(data_frame, lat_col="latitude", lon_col="longitude", crs="epsg:4326"): """Convert a Pandas DataFrame to a GeoPandas GeoDataFrame. The input DataFrame must have latitude and longitude columns, which get converted to a point geometry in the outputs GeoDataFrame. Parameters ---------- data_frame : pandas.DataFrame A pandas data frame with latitude and longitude coordinates. lat_col : str, optional The name of the latitude column. By default, ``"latitude"``. lon_col : str, optional The name of the longitude column. By default, ``"longitude"``. crs : str, optional The Coordinate Reference System of the output DataFrame represented as a string. By default, ``"epsg:4326"``. Returns ------- geopandas.GeoDataFrame A GeoPandas GeoDataFrame object with points representing the lat/lon positions as the geometry. """ missing = {col for col in [lat_col, lon_col] if col not in data_frame} if any(missing): raise KeyError("Input DataFrame missing the following required keys: " "{}".format(missing)) # pylint: disable=unnecessary-lambda-assignment to_point = lambda x: shapely.geometry.Point((x[lon_col], x[lat_col])) data_frame["geometry"] = data_frame.apply(to_point, axis=1) return gpd.GeoDataFrame(data_frame, geometry="geometry", crs=crs)
[docs]def load_fips_to_state_map(): """Generate a FIPS to state name mapping. The keys of the returned dictionary are two-digit FIPS codes (as strings) and the values are the state names. Returns ------- dict Dictionary mapping two-digitFIPS codes (as strings) to state names. """ cdf = pd.read_csv( / "data" / "states.csv") cdf["fips"] = cdf["fips"].apply(lambda x: f"{x:02d}") return dict(zip(cdf["fips"], cdf["name"]))