Source code for fied.scc.scc_unit_id


import pandas as pd
import numpy as np
import re
import logging
import requests
from io import BytesIO


[docs] class SCC_ID: """ Use descriptions of SCC code levels to identify unit type and fuel type indicated by a complete SCC code (e.g., 30190003). The U.S. EPA uses Source Classification Codes (SCCs) to classify different types of activities that generate emissions. Each SCC represents a unique source category-specific process or function that emits air pollutants. The SCCs are used as a primary identifying data element in EPA’s WebFIRE (where SCCs are used to link emissions factors to an emission process), the National Emissions Inventory (NEI), and other EPA databases. Eight digit SCC codes, such as ABBCCCDD, are structured as follows: A: Level 1 BB: Level 2 CCC: Level 3 DD: Level 4 See SCC documentation for additional information: https://sor-scc-api.epa.gov/sccwebservices/sccsearch/docs/SCC-IntroToSCCs_2021.pdf """ def __init__(self): self._complete_scc_filepath = './data/SCC/SCCDownload.csv' logging.basicConfig(level=logging.INFO)
[docs] def get_complete_scc(self): """ Download full set of SCC codes from EPA and save to `self._complete_scc_filepath`. Note that downloading directly from website assignes filename for csv based as 'SCCDownload-{y}-{md}-{t}.csv' """ payload = { 'format': 'CSV', 'sortFacet': 'scc level one', 'filename': 'SCCDownload.csv' } url = 'https://sor-scc-api.epa.gov/sccwebservices/v1/SCC?' try: r = requests.get(url, params=payload) except requests.exceptions.RequestException as e: logging.ERROR(e.response.text) else: all_scc = pd.read_csv( BytesIO(r.content) ) all_scc.to_csv(self._complete_scc_filepath) return all_scc
[docs] def load_complete_scc(self): """ Complete list of SCC codes (available from https://sor-scc-api.epa.gov/sccwebservices/sccsearch/) have been manually downloaded. Returns ------- all_scc : pandas.DataFrame """ try: all_scc = pd.read_csv(self._complete_scc_filepath) except FileNotFoundError: all_scc = self.get_complete_scc() all_scc.columns = [c.replace(' ', '_') for c in all_scc.columns] return all_scc
[docs] @staticmethod def scc_query_split(scc): """ Uses EPA SCC Web Services to get level information for an 8- or 10-digit SCC. Parameters ---------- scc : int Eight or 10-digit Source Classification Code. Returns ------- scc_levels : dict Dictionary of all four level names """ base_url = 'http://sor-scc-api.epa.gov:80/sccwebservices/v1/SCC/' try: r = requests.get(base_url+f'{scc}') r.raise_for_status() except requests.exceptions.HTTPError as err: logging.error(f'{err}') levels = [f'scc level {n}' for n in ['one', 'two', 'three', 'four']] scc_levels = {} try: for k in levels: scc_levels[k] = r.json()['attributes'][k]['text'] except TypeError: logging.error(f'SCC {scc} cannot be found') scc_levels = None return scc_levels
[docs] def build_id(self): """ Identify all relevant unit types and fuel types in SCCs. Returns ------- all_scc : pandas.DataFrame Complete SCC codes with added columns of unit types and fuel types. """ all_scc = self.load_complete_scc() all_scc.loc[:, 'unit_type'] = np.nan all_scc.loc[:, 'fuel_type'] = np.nan id_meth = [ self.id_external_combustion, self.id_stationary_fuel_combustion, self.id_ice, self.id_chemical_evaporation, self.id_industrial_processes, ] ids = pd.concat( [f(all_scc) for f in id_meth], axis=0, ignore_index=False, sort=True ) all_scc.update(ids) all_scc.dropna(subset=['unit_type', 'fuel_type'], how='all', inplace=True) # all_scc = self.ft_clean_up(all_scc) return all_scc
[docs] def id_external_combustion(self, all_scc): """ Method for identifying relevant unit and fuel types under SCC Level 1 External Combustion (1) Parameters ---------- all_scc : pandas.DataFrame Complete list of SCCs. Returns ------- scc_exc : pandas.DataFrame SCC for external combustion (SCC Level 1 == 1) with unit type and fuel type descriptions. """ scc_exc = all_scc.query("scc_level_one=='External Combustion'") unit_types_detail = [] fuel_types = [] other_fuels = [ 'Shredded', 'Specify Percent Butane in Comments', 'Specify in Comments', 'Specify Waste Material in Comments', 'Refuse Derived Fuel', 'Sewage Grease Skimmings', 'Waste Oil', 'Sludge Waste', 'Digester Gas', 'Agricultural Byproducts (rice or peanut hulls, shells, cow manure, etc)', 'Paper Pellets', 'Black Liquor', 'Red Liquor', 'Spent Sulfite Liquor', 'Tall Oil', 'Wood/Wood Waste Liquid', 'Off-gas Ejectors', 'Pulverized Coal', 'Salable Animal Fat', 'Natural Gas', 'Process Gas', 'Wet Slurry', 'Distillate Oil', 'Residual Oil', 'Petroleum Refinery Gas', 'Grade 4 Oil', 'Grade 5 Oil', 'Grade 6 Oil', 'Blast Furnace Gas', 'Coke Oven Gas', 'Landfill Gas', 'Biomass i[2]Solids' ] for i, r in scc_exc.iterrows(): if r['scc_level_two'] == 'Space Heaters': if ':' in r['scc_level_three']: unit_types_detail.append(r['scc_level_four'].split(': ')[-1]) fuel_types.append(r['scc_level_four'].split(': ')[0]) else: unit_types_detail.append('Space heater') fuel_types.append(r['scc_level_four'].split(': ')[0]) elif 'boiler' in r['scc_level_two'].lower(): fuel_types.append(r['scc_level_three']) if 'boiler' in r['scc_level_four'].lower(): ut = r['scc_level_four'] else: ut = f'Boiler, {r.scc_level_four}' unit_types_detail.append(ut) # split_ut = r['scc_level_four'].split(': ') # if len(split_ut) == 2: # ut = split_ut[-1] # if ut in other_fuels: # unit_types_detail.append('Boiler') # else: # unit_types_detail.append(ut) # elif len(split_ut) > 2: # unit_types_detail.append((' '.join(split_ut[1:]))) # else: # if 'boiler' in r['scc_level_four'].lower(): # unit_types_detail.append(r['scc_level_four']) # else: # unit_types_detail.append('Boiler') scc_exc.loc[:, 'unit_type'] = unit_types_detail scc_exc.loc[:, 'fuel_type'] = fuel_types return scc_exc
[docs] def id_ice(self, all_scc): """ Method for identifying relevant unit and fuel types under SCC Level 1 Internal Combustion Engines (2) Parameters ---------- all_scc : pandas.DataFrame Complete list of SCCs. Returns ------- scc_ice : pandas.DataFrame SCC for external combustion (SCC Level 1 == 2) with unit type and fuel type descriptions. """ scc_ice = all_scc.query("scc_level_one=='Internal Combustion Engines'") scc_ice = scc_ice[scc_ice.scc_level_two.isin( ['Electric Generation', 'Industrial', 'Commercial/Institutional'] )] unit_types_detail = [] fuel_types = [] other = ['Geysers/Geothermal', 'Equipment Leaks', 'Wastewater, Aggregate', 'Wastewater, Points of Generation', 'Flares'] types = [ 'Turbine', 'Reciprocating', 'Turbine: Cogeneration', 'Reciprocating: Cogeneration', 'Refinery Gas: Turbine', 'Refinery Gas: Reciprocating Engine', 'Propane: Reciprocating', 'Butane: Reciprocating', 'Reciprocating Engine', 'Reciprocating Engine: Cogeneration' ] for i, r in scc_ice.iterrows(): if r['scc_level_three'] in other: ut = None ft = None elif r['scc_level_four'] in types: ft = r['scc_level_three'] ut = r['scc_level_four'] unit_types_detail.append(ut) fuel_types.append(ft) scc_ice.loc[:, 'unit_type'] = unit_types_detail scc_ice.loc[:, 'fuel_type'] = fuel_types return scc_ice
[docs] def id_stationary_fuel_combustion(self, all_scc): """ Method for identifying relevant unit and fuel types under SCC Level 1 Stationary Source Fuel Combustion (21; note this is a 10-digit SCC code) Parameters ---------- all_scc : pandas.DataFrame Complete list of SCCs. Returns ------- scc_ice : pandas.DataFrame SCC for external combustion (SCC Level 1 == 2) with unit type and fuel type descriptions """ scc_sta = all_scc.query("scc_level_one == 'Stationary Source Fuel Combustion'") scc_sta = scc_sta[~scc_sta.scc_level_two.isin( ['Residential'] )] unit_types_detail = [] fuel_types = [] for i, r in scc_sta.iterrows(): fuel_types.append(r['scc_level_three']) if 'All Boiler Types' in r['scc_level_four']: unit_types_detail.append('Boiler') elif 'Boilers and IC Engines' in r['scc_level_four']: unit_types_detail.append('Boilers and IC Engines') elif 'All IC Engine Types' in r['scc_level_four']: unit_types_detail.append('IC Engine') elif 'All Heater Types' in r['scc_level_four']: unit_types_detail.append('Heater') else: unit_types_detail.append(r['scc_level_four']) scc_sta.loc[:, 'unit_type'] = unit_types_detail scc_sta.loc[:, 'fuel_type'] = fuel_types return scc_sta
[docs] def id_chemical_evaporation(self, all_scc): """ Method for identifying relevant unit and fuel types under SCC Level 1 Chemical Evaporation (4) Parameters ---------- all_scc : pandas.DataFrame Complete list of SCCs. Returns ------- scc_chee : pandas.DataFrame SCC for Chemical Evaporation (SCC Level 1 == 4) with unit type and fuel type descriptions """ scc_chee = all_scc.query("scc_level_one == 'Chemical Evaporation'") fuel_types = [] unit_types_detail = [] for i, r in scc_chee.iterrows(): if (r['scc_level_two'] == 'Surface Coating Operations') & \ (('dryer' in r['scc_level_four'].lower()) | \ ('drying' in r['scc_level_four'].lower())): unit_types_detail.append(r['scc_level_four']) fuel_types.append(None) elif r['scc_level_three'] == 'Coating Oven - General': if ('<' in r['scc_level_four']) | ('>' in r['scc_level_four']): unit_types_detail.append('Coating Oven') else: unit_types_detail.append(r['scc_level_four']) fuel_types.append(None) elif r['scc_level_three'] == 'Coating Oven Heater': unit_types_detail.append('Coating Oven Heater') fuel_types.append(r['scc_level_four']) elif (r['scc_level_three'] == 'Fuel Fired Equipment') & \ (r['scc_level_two'] == 'Surface Coating Operations'): unit_types_detail.append(r['scc_level_four'].split(': ')[1]) fuel_types.append(r['scc_level_four'].split(': ')[0]) elif (r['scc_level_three'] == 'Fuel Fired Equipment') & \ (r['scc_level_two']=='Organic Solvent Evaporation'): unit_types_detail.append(r['scc_level_four'].split(': ')[0]) fuel_types.append(r['scc_level_four'].split(': ')[1]) elif r['scc_level_three'] == 'Drying': unit_types_detail.append(r['scc_level_four']) fuel_types.append(None) elif (r['scc_level_three'] != 'Drying') & \ (r['scc_level_four'] in ['Dryer', 'Drying', 'Drying/Curing']): unit_types_detail.append(r['scc_level_four']) fuel_types.append(None) # Skipping dry cleaning operations. Unsure whether "drying" # includes application of heat else: unit_types_detail.append(None) fuel_types.append(None) scc_chee.loc[:, 'unit_type'] = unit_types_detail scc_chee.loc[:, 'fuel_type'] = fuel_types return scc_chee
[docs] def id_industrial_processes(self, all_scc): """ Method for identifying relevant unit and fuel types under SCC Level 1 Industrial Processes (3) Parameters ---------- all_scc : pandas.DataFrame Complete list of SCCs. Returns ------- scc_ind : pandas.DataFrame SCC for Industrial Processes (SCC Level 1 == 3) with unit type and fuel type descriptions """ scc_ind = all_scc.query("scc_level_one == 'Industrial Processes'") unit_types_detail = [] fuel_types = [] other_counter = [] for i, r in scc_ind.iterrows(): ft = None ut = None if 'Commercial Cooking' in r['scc_level_three']: ut = r['scc_level_four'] ft = None elif (r['scc_level_two'] == 'In-process Fuel Use') & \ (r['scc_level_four'] != 'Total') & \ ('Fuel Storage' not in r['scc_level_three']): ut = r['scc_level_four'] ft = r['scc_level_three'] other_counter.append(i) elif r['scc_level_three'] == 'Ammonia Production': if ':' in r['scc_level_four']: ut = r['scc_level_four'].split(': ')[0] ft = r['scc_level_four'].split(': ')[1] if ft == 'Natural Gas Fired': ft = 'natural gas' else: pass else: ut = r['scc_level_four'] ft = None elif (r['scc_level_two'] != 'In-process Fuel Use') & \ (any([x in r['scc_level_four'].lower() for x in [ 'calciner', 'evaporator', 'furnace', 'dryer', 'kiln', 'oven', 'flares', 'incinerators', 'turbines', 'turbine', 'engines', 'engine', 'incinerator', 'distillation', 'heater', 'broil', 'flare', 'stove', 'steam' ]])): if r['scc_level_three'] == 'Fuel Fired Equipment': x, y = r['scc_level_four'].split(': ') if any([z in x for z in ['Distillate', 'Residual', 'Gas', 'Liquid', 'Propane']]): ft = x ut = y else: ft = y ut = x else: ut = r['scc_level_four'] if 'fired' in ut.lower(): try: ft = re.search( r'(\w+ \w+)(?=-fired)|(\w+)(?=-fired)|(\w+ \w+ \w+)(?=-fired)', ut ).group() except AttributeError: try: ft = re.search( r'cbm|nat gas|natural gas|distillate oil|residual oil|#2 oil|#6 oil|propane|coal|process gas', ut.lower() ).group() except AttributeError: ft = None else: if ft.lower() == 'nat gas': ft = 'Natural Gas' elif ft.lower() == 'cbm': ft = 'Natural Gas' elif ft.lower() == "#2 oil": ft = 'Diesel' elif ft.lower() == "#6 oil": ft = 'Residual Fuel Oil' elif ft == 'natural gas': ft = 'Natural Gas' else: if 'fired' in ft.lower(): ft = ft.lower().split(' fired')[0] elif 'direct' in ft.lower(): try: ft = ft.lower().split('direct ')[1] except IndexError: ft = None else: if ft == 'ng': ft = 'Natural Gas' else: pass elif ('and' in ft.lower()) | ('or' in ft.lower()): ft = ft.split(' ')[1] # Assume that "...Gas-Fired..." equipment refers to natural gas. elif ft.lower() == 'gas': ft = 'Natural Gas' elif ft.lower() == 'oil': ft = 'Residual Fuel Oil' else: if 'diesel' in ut.lower(): ft = 'Diesel' elif ':' in ut: try: x, y = ut.split(': ') except ValueError: ft = None else: if any([z in x for z in ['Distillate', 'Residual', 'Gas', 'Liquid', 'Propane']]): ft = x ut = y elif any([z in y for z in ['Distillate', 'Residual', 'Gas', 'Liquid', 'Propane']]): ft = y ut = x else: ft = None else: ft = None elif r['scc_level_three'] == 'Fuel Fired Equipment': if r['sector'] == 'Industrial Processes - Chemical Manuf': try: ft = r['scc_level_four'].split(': ')[1] ut = r['scc_level_four'].split(': ')[0] except IndexError: ft = None ut = r['scc_level_four'] else: ft = r['scc_level_four'].split(': ')[0] ut = r['scc_level_four'].split(': ')[1] logging.info(f'Last fuel type is {fuel_types[-1]}\nLast unit type is {unit_types_detail[-1]} ') # elif r['tier_1_description'] == 'Fuel Comb. Industrial': # ft = f'{r["tier_3_description"]} {"tier_2_description"}' # Cludge for catching technologies that use electricity try: fte = re.search(r'(elec)', ut.lower()) except AttributeError: pass else: if fte: ft = 'electricity' else: pass fuel_types.append(ft) unit_types_detail.append(ut) scc_ind.loc[:, 'unit_type'] = unit_types_detail scc_ind.loc[:, 'fuel_type'] = fuel_types return scc_ind
[docs] def main(self): id_scc = SCC_ID() id_scc_df = id_scc.build_id() id_scc_df.to_csv('./scc/iden_scc.csv')
if __name__ == '__main__': id_scc = SCC_ID() id_scc_df = id_scc.build_id() id_scc_df.to_csv('./scc/iden_scc.csv')