Source code for buildingmotif.dataclasses.library

import logging
import pathlib
import tempfile
from dataclasses import dataclass
from typing import TYPE_CHECKING, Any, Dict, List, Mapping, Optional, Union

import pygit2
import rdflib
import sqlalchemy
import yaml
from pkg_resources import resource_exists, resource_filename
from rdflib.exceptions import ParserError
from rdflib.plugins.parsers.notation3 import BadSyntax
from rdflib.util import guess_format

from buildingmotif import get_building_motif
from buildingmotif.database.tables import DBLibrary, DBTemplate
from buildingmotif.dataclasses.shape_collection import ShapeCollection
from buildingmotif.dataclasses.template import Template
from buildingmotif.schemas import validate_libraries_yaml
from buildingmotif.template_compilation import compile_template_spec
from buildingmotif.utils import (
    copy_graph,
    get_ontology_files,
    get_template_parts_from_shape,
    shacl_inference,
    skip_uri,
)

if TYPE_CHECKING:
    from buildingmotif import BuildingMOTIF


@dataclass
class _template_dependency:
    """Represents early-bound (template_id) or late-bound (template_name and
    library) dependency of a template on another template.
    """

    template_name: str
    bindings: Dict[str, Any]
    library: str
    template_id: Optional[int] = None

    def __repr__(self):
        return (
            f"dep<name={self.template_name} bindings={self.bindings} "
            f"library={self.library} id={self.template_id}>"
        )

    @classmethod
    def from_dict(
        cls, d: Dict[str, Any], dependent_library_name: str
    ) -> "_template_dependency":
        """Creates a py:class:`_template_dependency` from a dictionary.

        :param d: dictionary
        :type d: Dict[str, Any]
        :param dependent_library_name: library name
        :type dependent_library_name: str
        :return: the _template_dependency from the dict
        :rtype: _template_dependency
        """
        template_name = d["template"]
        bindings = d.get("args", {})
        library = d.get("library", dependent_library_name)
        template_id = d.get("template_id")
        return cls(template_name, bindings, library, template_id)

    def to_template(self, id_lookup: Dict[str, int]) -> Template:
        """Resolve this dependency to a template.

        :param id_lookup: a local cache of {name: id} for uncommitted templates
        :type id_lookup: Dict[str, int]
        :return: the template instance this dependency points to
        :rtype: Template
        """
        # direct lookup if id is provided
        if self.template_id is not None:
            return Template.load(self.template_id)
        # if id is not provided, look at our local 'cache' of to-be-committed
        # templates for the id (id_lookup)
        if self.template_name in id_lookup:
            return Template.load(id_lookup[self.template_name])
        # if not in the local cache, then search the database for the template
        # within the given library
        library = Library.load(name=self.library)
        return library.get_template_by_name(self.template_name)


[docs]@dataclass class Library: """This class mirrors :py:class:`database.tables.DBLibrary`.""" _id: int _name: str _bm: "BuildingMOTIF"
[docs] @classmethod def create(cls, name: str, overwrite: Optional[bool] = True) -> "Library": """Create new Library. :param name: library name :type name: str :param overwrite: if True, overwrite the existing copy of the library. :type overwrite: Optional[bool] :return: new library :rtype: Library """ bm = get_building_motif() try: db_library = bm.table_connection.get_db_library_by_name(name) if overwrite: cls._clear_library(db_library) else: logging.warning( f'Library {name} already exists in database. To ovewrite load library with "overwrite=True"' # noqa ) except sqlalchemy.exc.NoResultFound: db_library = bm.table_connection.create_db_library(name) return cls(_id=db_library.id, _name=db_library.name, _bm=bm)
@classmethod def _clear_library(cls, library: DBLibrary) -> None: """Clear contents of a library. :param library: library to clear :type library: DBLibrary """ bm = get_building_motif() for template in library.templates: # type: ignore bm.session.delete(template) # TODO: load library from URI? Does the URI identify the library uniquely? # TODO: can we deduplicate shape graphs? use hash of graph?
[docs] @classmethod def load( cls, db_id: Optional[int] = None, ontology_graph: Optional[Union[str, rdflib.Graph]] = None, directory: Optional[str] = None, name: Optional[str] = None, overwrite: Optional[bool] = True, infer_templates: Optional[bool] = True, run_shacl_inference: Optional[bool] = True, ) -> "Library": """Loads a library from the database or an external source. When specifying a path to load a library or ontology_graph from, paths within the buildingmotif.libraries module will be prioritized if they resolve. :param db_id: the unique id of the library in the database, defaults to None :type db_id: Optional[int], optional :param ontology_graph: a path to a serialized RDF graph. Supports remote ontology URLs, defaults to None :type ontology_graph: Optional[str|rdflib.Graph], optional :param directory: a path to a directory containing a library, or an rdflib graph, defaults to None :type directory: Optional[str], optional :param name: the name of the library inside the database, defaults to None :type name: Optional[str], optional :param overwrite: if true, replace any existing copy of the library, defaults to True :type overwrite: Optional[true], optional :param infer_templates: if true, infer shapes from the ontology graph, defaults to True :type infer_templates: Optional[bool], optional :param run_shacl_inference: if true, run SHACL inference on the ontology graph, using the BuildingMOTIF SHACL engine, defaults to True :type run_shacl_inference: Optional[bool], optional :return: the loaded library :rtype: Library :raises Exception: if the library cannot be loaded """ if db_id is not None: return cls._load_from_db(db_id) elif ontology_graph is not None: if isinstance(ontology_graph, str): ontology_graph_path = ontology_graph if resource_exists("buildingmotif.libraries", ontology_graph_path): logging.debug(f"Loading builtin library: {ontology_graph_path}") ontology_graph_path = resource_filename( "buildingmotif.libraries", ontology_graph_path ) ontology_graph = rdflib.Graph() ontology_graph.parse( ontology_graph_path, format=guess_format(ontology_graph_path) ) return cls._load_from_ontology( ontology_graph, overwrite=overwrite, infer_templates=infer_templates, run_shacl_inference=run_shacl_inference, ) elif directory is not None: if resource_exists("buildingmotif.libraries", directory): logging.debug(f"Loading builtin library: {directory}") src = pathlib.Path( resource_filename("buildingmotif.libraries", directory) ) else: src = pathlib.Path(directory) if not src.exists(): raise Exception(f"Directory {src} does not exist") return cls._load_from_directory( src, overwrite=overwrite, infer_templates=infer_templates, run_shacl_inference=run_shacl_inference, ) elif name is not None: bm = get_building_motif() db_library = bm.table_connection.get_db_library_by_name(name) return cls(_id=db_library.id, _name=db_library.name, _bm=bm) else: raise Exception("No library information provided")
@classmethod def _load_from_db(cls, id: int) -> "Library": """Load library from database by id. :param id: id of library :type id: int :return: library :rtype: Library """ bm = get_building_motif() db_library = bm.table_connection.get_db_library(id) return cls(_id=db_library.id, _name=db_library.name, _bm=bm) @classmethod def _load_from_ontology( cls, ontology: rdflib.Graph, overwrite: Optional[bool] = True, infer_templates: Optional[bool] = True, run_shacl_inference: Optional[bool] = True, ) -> "Library": """ Load a library from an ontology graph. This proceeds as follows. First, get all entities in the graph that are instances of *both* owl:Class and sh:NodeShape. (this is "candidates") For each candidate, use the utility function to parse the NodeShape and turn it into a Template. :param ontology: the graph to load into BuildingMOTIF and interpret as a Library :type ontology: rdflib.Graph :param overwrite: if true, overwrite the existing copy of the Library :type overwrite: bool :param infer_templates: if true, infer shapes from the ontology graph :type infer_templates: bool :param run_shacl_inference: if true, run SHACL inference on the ontology graph :type run_shacl_inference: bool :return: the loaded Library :rtype: "Library" """ # get the name of the ontology; this will be the name of the library # any=False will raise an error if there is more than one ontology defined in the graph ontology_name = ontology.value( predicate=rdflib.RDF.type, object=rdflib.OWL.Ontology, any=False ) or rdflib.URIRef("urn:unnamed/") if not overwrite: if cls._library_exists(ontology_name): logging.warning( f'Library "{ontology_name}" already exists in database and "overwrite=False". Returning existing library.' # noqa ) return Library.load(name=ontology_name) # expand the ontology graph before we insert it into the database. This will ensure # that the output of compiled models will not contain triples that really belong to # the ontology if run_shacl_inference: ontology = shacl_inference( ontology, engine=get_building_motif().shacl_engine ) lib = cls.create(ontology_name, overwrite=overwrite) if infer_templates: # infer shapes from any class/nodeshape candidates in the graph lib._infer_templates_from_graph(ontology) # load the ontology graph as a shape_collection shape_col_id = lib.get_shape_collection().id assert shape_col_id is not None # should always pass shape_col = ShapeCollection.load(shape_col_id) shape_col.add_graph(ontology) return lib def _infer_templates_from_graph(self, graph: rdflib.Graph): """Infer templates from a graph (by interpreting shapes) and add them to this library. :param graph: graph to infer templates from :type graph: rdflib.Graph """ # add all imports to the same graph so we can resolve everything imports_closure = copy_graph(graph) # import dependencies into 'graph' # get all imports from the graph for dependency in graph.objects(predicate=rdflib.OWL.imports): # attempt to load from BuildingMOTIF try: lib = Library.load(name=str(dependency)) imports_closure += lib.get_shape_collection().graph except Exception as e: # TODO: replace with a more specific exception logging.warning( f"An ontology could not resolve a dependency on {dependency} ({e}). Check this is loaded into BuildingMOTIF" ) continue class_candidates = set(graph.subjects(rdflib.RDF.type, rdflib.OWL.Class)) shape_candidates = set(graph.subjects(rdflib.RDF.type, rdflib.SH.NodeShape)) candidates = class_candidates.intersection(shape_candidates) template_id_lookup: Dict[str, int] = {} dependency_cache: Dict[int, List[Dict[Any, Any]]] = {} for candidate in candidates: assert isinstance(candidate, rdflib.URIRef) # TODO: mincount 0 (or unspecified) should be optional args on the generated template partial_body, deps = get_template_parts_from_shape( candidate, imports_closure ) templ = self.create_template(str(candidate), partial_body) dependency_cache[templ.id] = deps template_id_lookup[str(candidate)] = templ.id self._resolve_template_dependencies(template_id_lookup, dependency_cache) def _load_shapes_from_directory( self, directory: pathlib.Path, infer_templates: Optional[bool] = True, run_shacl_inference: Optional[bool] = True, ): """Helper method to read all graphs in the given directory into this library. :param directory: directory containing graph files :type directory: pathlib.Path :param infer_templates: if true, infer shapes from the ontology graph :type infer_templates: bool :param run_shacl_inference: if true, run SHACL inference on the ontology graph :type run_shacl_inference: bool """ shape_col_id = self.get_shape_collection().id assert shape_col_id is not None # this should always pass shape_col = ShapeCollection.load(shape_col_id) for filename in get_ontology_files(directory): try: shape_col.graph.parse(filename, format=guess_format(filename)) except (ParserError, BadSyntax) as e: logging.getLogger(__name__).error( f"Could not parse file {filename}: {e}" ) raise e if run_shacl_inference: shape_col.graph = shacl_inference( shape_col.graph, engine=get_building_motif().shacl_engine ) # infer shapes from any class/nodeshape candidates in the graph if infer_templates: self._infer_templates_from_graph(shape_col.graph) @classmethod def _load_from_directory( cls, directory: pathlib.Path, overwrite: Optional[bool] = True, infer_templates: Optional[bool] = True, run_shacl_inference: Optional[bool] = True, ) -> "Library": """ Load a library from a directory. Templates are read from YML files in the directory. The name of the library is given by the name of the directory. :param directory: directory containing a library :type directory: pathlib.Path :param overwrite: if true, overwrite the existing copy of the Library :type overwrite: bool :param infer_templates: if true, infer shapes from the ontology graph :type infer_templates: bool :param run_shacl_inference: if true, run SHACL inference on the ontology graph :type run_shacl_inference: bool :raises e: if cannot create template :raises e: if cannot resolve dependencies :return: library :rtype: Library """ if not overwrite: if cls._library_exists(directory.name): logging.warning( f'Library "{directory.name}" already exists in database and "overwrite=False". Returning existing library.' # noqa ) return Library.load(name=directory.name) lib = cls.create(directory.name, overwrite=overwrite) # setup caches for reading templates template_id_lookup: Dict[str, int] = {} dependency_cache: Dict[int, List[_template_dependency]] = {} # read all .yml files for file in directory.rglob("*.yml"): # if .ipynb_checkpoints, skip; these are cached files that Jupyter creates if ".ipynb_checkpoints" in file.parts: continue lib._read_yml_file(file, template_id_lookup, dependency_cache) # now that we have all the templates, we can populate the dependencies lib._resolve_template_dependencies(template_id_lookup, dependency_cache) # load shape collections from all ontology files in the directory lib._load_shapes_from_directory(directory) return lib
[docs] @classmethod def load_from_libraries_yml(cls, filename: str): """ Loads *multiple* libraries from a properly-formatted 'libraries.yml' file. Does not return a Library! You will need to load the libraries by name in order to get the dataclasses.Library object. We recommend loading libraries directly, one-by-one, in most cases. This method is here to support the commandline tool. :param filename: the filename of the YAML file to load library names from :type filename: str :rtype: None """ libraries = yaml.load(open(filename, "r"), Loader=yaml.FullLoader) validate_libraries_yaml(libraries) # raises exception for description in libraries: _resolve_library_definition(description)
@staticmethod def _library_exists(library_name: str) -> bool: """Checks whether a library with the given name exists in the database.""" bm = get_building_motif() try: bm.table_connection.get_db_library_by_name(library_name) return True except sqlalchemy.exc.NoResultFound: return False def _resolve_dependency( self, template: Template, dep: Union[_template_dependency, dict], template_id_lookup: Dict[str, int], ): """Resolve a dependency to a template. :param template: template to resolve dependency for :type template: Template :param dep: dependency :type dep: Union[_template_dependency, dict] :param template_id_lookup: a local cache of {name: id} for uncommitted templates :type template_id_lookup: Dict[str, int] :return: the template instance this dependency points to :rtype: Template """ # if dep is a _template_dependency, turn it into a template if isinstance(dep, _template_dependency): dependee = dep.to_template(template_id_lookup) template.add_dependency(dependee, dep.bindings) return # now, we know that dep is a dict # if dependency names a library explicitly, load that library and get the template by name if "library" in dep: dependee = Library.load(name=dep["library"]).get_template_by_name( dep["template"] ) template.add_dependency(dependee, dep["args"]) return # if no library is provided, try to resolve the dependency from this library if dep["template"] in template_id_lookup: dependee = Template.load(template_id_lookup[dep["template"]]) template.add_dependency(dependee, dep["args"]) return # check documentation for skip_uri for what URIs get skipped if skip_uri(dep["template"]): return # if the dependency is not in the local cache, then search through this library's imports # for the template for imp in self.graph_imports: try: library = Library.load(name=str(imp)) dependee = library.get_template_by_name(dep["template"]) template.add_dependency(dependee, dep["args"]) return except Exception as e: logging.debug( f"Could not find dependee {dep['template']} in library {imp}: {e}" ) logging.warning( f"Warning: could not find dependee {dep['template']} in libraries {self.graph_imports}" ) def _resolve_template_dependencies( self, template_id_lookup: Dict[str, int], dependency_cache: Mapping[int, Union[List[_template_dependency], List[dict]]], ): """Resolve all dependencies for all templates in this library""" # two phases here: first, add all of the templates and their dependencies # to the database but *don't* check that the dependencies are valid yet for template in self.get_templates(): if template.id not in dependency_cache: continue for dep in dependency_cache[template.id]: self._resolve_dependency(template, dep, template_id_lookup) # check that all dependencies are valid (use parameters that exist, etc) for template in self.get_templates(): template.check_dependencies() def _read_yml_file( self, file: pathlib.Path, template_id_lookup: Dict[str, int], dependency_cache: Dict[int, List[_template_dependency]], ): """Read a YML file into this library. Utility function for `_load_from_directory`.""" contents = yaml.load(open(file, "r"), Loader=yaml.FullLoader) for templ_name, templ_spec in contents.items(): # compile the template body using its rules templ_spec = compile_template_spec(templ_spec) # input name of template templ_spec.update({"name": templ_name}) # remove dependencies so we can resolve them to their IDs later deps = [ _template_dependency.from_dict(d, self.name) for d in templ_spec.pop("dependencies", []) ] templ_spec["optional_args"] = templ_spec.pop("optional", []) try: templ = self.create_template(**templ_spec) except Exception as e: logging.error( f"Error creating template {templ_name} from file {file}: {e}" ) raise e dependency_cache[templ.id] = deps template_id_lookup[templ.name] = templ.id @property def id(self) -> Optional[int]: return self._id @id.setter def id(self, new_id): raise AttributeError("Cannot modify db id") @property def name(self) -> str: return self._name @name.setter def name(self, new_name: str): self._bm.table_connection.update_db_library_name(self._id, new_name) self._name = new_name @property def graph_imports(self) -> List[rdflib.URIRef]: """ Get the list of owl:imports for this library's shape collection """ shape_col = self.get_shape_collection() return [ i for i in shape_col.graph.objects(None, rdflib.OWL.imports) if isinstance(i, rdflib.URIRef) ]
[docs] def create_template( self, name: str, body: Optional[rdflib.Graph] = None, optional_args: Optional[List[str]] = None, ) -> Template: """Create template in this library. :param name: name :type name: str :param body: template body :type body: rdflib.Graph :param optional_args: optional parameters for the template :type optional_args: list[str] :return: created template :rtype: Template """ db_template = self._bm.table_connection.create_db_template(name, self._id) body = self._bm.graph_connection.create_graph( db_template.body_id, body if body else rdflib.Graph() ) # ensure the "param" namespace is bound to the graph body.namespace_manager = self._bm.template_ns_mgr if optional_args is None: optional_args = [] self._bm.table_connection.update_db_template_optional_args( db_template.id, optional_args ) return Template( _id=db_template.id, _name=db_template.name, body=body, optional_args=optional_args, _bm=self._bm, )
[docs] def get_templates(self) -> List[Template]: """Get templates from library. :return: list of templates :rtype: List[Template] """ db_library = self._bm.table_connection.get_db_library(self._id) templates: List[DBTemplate] = db_library.templates return [Template.load(t.id) for t in templates]
[docs] def get_shape_collection(self) -> ShapeCollection: """Get ShapeCollection from library. :return: library's shape collection :rtype: ShapeCollection """ # TODO: we should save the libraries shape_collection to a class attr on load/create. That # way we wont need an additional db query each time we call this function. db_library = self._bm.table_connection.get_db_library(self._id) return ShapeCollection.load(db_library.shape_collection.id)
[docs] def get_template_by_name(self, name: str) -> Template: """Get template by name from library. :param name: template name :type name: str :raises ValueError: if template not in library :return: template :rtype: Template """ dbt = self._bm.table_connection.get_db_template_by_name(name) if dbt.library_id != self._id: raise ValueError(f"Template {name} not in library {self._name}") return Template.load(dbt.id)
def _resolve_library_definition(desc: Dict[str, Any]): """ Loads a library from a description in libraries.yml """ if "directory" in desc: spath = pathlib.Path(desc["directory"]).absolute() if spath.exists() and spath.is_dir(): logging.info(f"Load local library {spath} (directory)") Library.load(directory=str(spath)) else: raise Exception(f"{spath} is not an existing directory") elif "ontology" in desc: ont = desc["ontology"] g = rdflib.Graph().parse(ont, format=rdflib.util.guess_format(ont)) logging.info(f"Load library {ont} as ontology graph") Library.load(ontology_graph=g) elif "git" in desc: repo = desc["git"]["repo"] branch = desc["git"]["branch"] path = desc["git"]["path"] logging.info(f"Load library {path} from git repository: {repo}@{branch}") with tempfile.TemporaryDirectory() as temp_loc: pygit2.clone_repository( repo, temp_loc, checkout_branch=branch ) # , depth=1) new_path = pathlib.Path(temp_loc) / pathlib.Path(path) if new_path.is_dir(): _resolve_library_definition({"directory": new_path}) else: _resolve_library_definition({"ontology": new_path})