Source code for buildingmotif.label_parsing.combinators

import logging
import re
from typing import List

from rdflib import URIRef

from buildingmotif.label_parsing.parser import Parser
from buildingmotif.label_parsing.tokens import (
    Constant,
    Delimiter,
    Identifier,
    Null,
    Token,
    TokenOrConstructor,
    TokenResult,
    ensure_token,
)
from buildingmotif.namespaces import BRICK

logger = logging.getLogger()


[docs]class string(Parser): """Constructs a parser that matches a string.""" def __init__(self, s: str, type_name: TokenOrConstructor, id=None): self.s = s self.type_name = type_name self.id = id def __call__(self, target: str) -> List[TokenResult]: if target.startswith(self.s): return [ TokenResult( self.s, ensure_token(self.type_name, self.s), len(self.s), id=self.id, ) ] return [ TokenResult( None, Null(), 0, f"Expected {self.s}, got {target[:len(self.s)]}", id=self.id, ) ]
[docs]class rest(Parser): """Constructs a parser that matches the rest of the string.""" def __init__(self, type_name: TokenOrConstructor, id=None): self.type_name = type_name self.id = id def __call__(self, target: str) -> List[TokenResult]: return [ TokenResult( target, ensure_token(self.type_name, target), len(target), id=self.id ) ]
[docs]class substring_n(Parser): """Constructs a parser that matches a substring of length n.""" def __init__(self, length: int, type_name: TokenOrConstructor, id=None): self.length = length self.type_name = type_name self.id = id def __call__(self, target: str) -> List[TokenResult]: if len(target) >= self.length: value = target[: self.length] return [ TokenResult( value, ensure_token(self.type_name, value), self.length, id=self.id ) ] return [ TokenResult( None, Null(), 0, f"Expected {self.length} characters, got {target[:self.length]}", id=self.id, ) ]
[docs]class regex(Parser): """Constructs a parser that matches a regular expression.""" def __init__(self, r: str, type_name: TokenOrConstructor, id=None): self.r = r self.type_name = type_name self.id = id def __call__(self, target: str) -> List[TokenResult]: match = re.match(self.r, target) if match: value = match.group() return [ TokenResult( value, ensure_token(self.type_name, value), len(value), id=self.id ) ] return [ TokenResult( None, Null(), 0, f"Expected {self.r}, got {target[:len(self.r)]}", id=self.id, ) ]
[docs]class choice(Parser): """Constructs a choice combinator of parsers.""" def __init__(self, *parsers: Parser, id=None): self.parsers = parsers self.id = id def __call__(self, target: str) -> List[TokenResult]: errors = [] for p in self.parsers: result = p(target) if result and not any(r.error for r in result): return result if result: errors.append(result[0].error) return [TokenResult(None, Null(), 0, " | ".join(errors), id=None)] # type: ignore
[docs]class constant(Parser): """Matches a constant token.""" def __init__(self, type_name: Token, id=None): self.id = id self.type_name = type_name def __call__(self, target: str) -> List[TokenResult]: return [TokenResult(None, self.type_name, 0, id=self.id)]
[docs]class abbreviations(Parser): """Constructs a choice combinator of string matching based on a dictionary.""" def __init__(self, patterns: dict, id=None): parsers = [string(s, Constant(URIRef(t))) for s, t in patterns.items()] self.choice = choice(*parsers) self.id = id def __call__(self, target: str): return self.choice(target)
[docs]class sequence(Parser): """Applies parsers in sequence. All parsers must match consecutively.""" def __init__(self, *parsers: Parser, id=None): self.parsers = parsers self.id = id def __call__(self, target: str) -> List[TokenResult]: results = [] total_length = 0 for p in self.parsers: result = p(target) if not result: raise Exception("Expected result") results.extend(result) # if there are any errors, return the results if any(r.error for r in result): return results # TODO: how to handle error? consumed_length = sum([r.length for r in result]) target = target[consumed_length:] total_length += sum([r.length for r in result]) return results
[docs]class many(Parser): """Applies the given sequence parser repeatedly until it stops matching.""" def __init__(self, seq_parser: Parser, id=None): self.seq_parser = seq_parser self.id = id def __call__(self, target): results = [] while True: part = self.seq_parser(target) if not part or part[0].value is None: break results.extend(part) # add up the length of all the tokens total_length = sum([r.length for r in part]) target = target[total_length:] return results
[docs]class maybe(Parser): """Applies the given parser, but does not fail if it does not match.""" def __init__(self, parser: Parser, id=None): self.parser = parser self.id = id def __call__(self, target): result = self.parser(target) # if the result is not empty and there are no errors, return the result, otherwise return a null token if result and not any(r.error for r in result): return result return [TokenResult(None, Null(), 0, id=self.id)]
[docs]class until(Parser): """ Constructs a parser that matches everything until the given parser matches. STarts with a string length of 1 and increments it until the parser matches. """ def __init__(self, parser: Parser, type_name: TokenOrConstructor, id=None): self.type_name = type_name self.parser = parser self.id = id def __call__(self, target): length = 1 while length <= len(target): result = self.parser(target[length:]) if result and not any(r.error for r in result): return [ TokenResult( target[:length], ensure_token(self.type_name, target[:length]), length, id=self.id, ) ] length += 1 return [ TokenResult( None, Null(), 0, f"Expected {self.type_name}, got {target[:length]}", id=self.id, ) ]
[docs]class extend_if_match(Parser): """Adds the type to the token result.""" def __init__(self, parser: Parser, type_name: Token, id=None): self.parser = parser self.type_name = type_name self.id = id def __call__(self, target): result = self.parser(target) if result and not any(r.error for r in result): result.extend([TokenResult(None, self.type_name, 0, id=self.id)]) return result return result
[docs]def as_identifier(parser): """ If the parser matches, add a new Identifier token after every Constant token in the result. The Identifier token has the same string value as the Constant token. """ def as_identifier_parser(target): result = parser(target) if result and not any(r.error for r in result): new_result = [] for r in result: new_result.append(r) if isinstance(r.token, Constant): # length of the new token must be given as 0 so that the substring # is not double counted new_result.append(TokenResult(r.value, Identifier(r.value), 0)) return new_result return result return as_identifier_parser
COMMON_EQUIP_ABBREVIATIONS_BRICK = { "AHU": BRICK.Air_Handling_Unit, "FCU": BRICK.Fan_Coil_Unit, "VAV": BRICK.Variable_Air_Volume_Box, "CRAC": BRICK.Computer_Room_Air_Conditioner, "HX": BRICK.Heat_Exchanger, "PMP": BRICK.Pump, "RVAV": BRICK.Variable_Air_Volume_Box_With_Reheat, "HP": BRICK.Heat_Pump, "RTU": BRICK.Rooftop_Unit, "DMP": BRICK.Damper, "STS": BRICK.Status, "VLV": BRICK.Valve, "CHVLV": BRICK.Chilled_Water_Valve, "HWVLV": BRICK.Hot_Water_Valve, "VFD": BRICK.Variable_Frequency_Drive, "CT": BRICK.Cooling_Tower, "MAU": BRICK.Makeup_Air_Unit, "R": BRICK.Room, "A": BRICK.Air_Handling_Unit, } COMMON_POINT_ABBREVIATIONS = { "ART": BRICK.Room_Temperature_Sensor, "TSP": BRICK.Air_Temperature_Setpoint, "HSP": BRICK.Air_Temperature_Heating_Setpoint, "CSP": BRICK.Air_Temperature_Cooling_Setpoint, "SP": BRICK.Setpoint, "CHWST": BRICK.Leaving_Chilled_Water_Temperature_Sensor, "CHWRT": BRICK.Entering_Chilled_Water_Temperature_Sensor, "HWST": BRICK.Leaving_Hot_Water_Temperature_Sensor, "HWRT": BRICK.Entering_Hot_Water_Temperature_Sensor, "CO": BRICK.CO_Sensor, "CO2": BRICK.CO2_Sensor, "T": BRICK.Temperature_Sensor, "FS": BRICK.Flow_Sensor, "PS": BRICK.Pressure_Sensor, "DPS": BRICK.Differential_Pressure_Sensor, } COMMON_ABBREVIATIONS = abbreviations( {**COMMON_EQUIP_ABBREVIATIONS_BRICK, **COMMON_POINT_ABBREVIATIONS} ) # common parser combinators equip_abbreviations = abbreviations(COMMON_EQUIP_ABBREVIATIONS_BRICK) point_abbreviations = abbreviations(COMMON_POINT_ABBREVIATIONS) delimiters = regex(r"[._:/\- ]", Delimiter) identifier = regex(r"[a-zA-Z0-9]+", Identifier) named_equip = sequence(equip_abbreviations, maybe(delimiters), identifier) named_point = sequence(point_abbreviations, maybe(delimiters), identifier)