from dataclasses import dataclass
from functools import cached_property
from typing import Dict, List, Optional
import pandas as pd
import rdflib
import rdflib.query
from rdflib import URIRef
from buildingmotif.dataclasses.model import Model
from buildingmotif.dataclasses.shape_collection import ShapeCollection
from buildingmotif.dataclasses.validation import ValidationContext
from buildingmotif.namespaces import OWL, SH, A
from buildingmotif.utils import (
copy_graph,
rewrite_shape_graph,
shacl_inference,
shacl_validate,
skolemize_shapes,
)
[docs]@dataclass
class CompiledModel:
"""
This class represents a model that has been compiled against a set of ShapeCollections.
"""
model: Model
shape_collections: List[ShapeCollection]
_compiled_graph: rdflib.Graph
def __init__(
self,
model: Model,
shape_collections: List[ShapeCollection],
compiled_graph: rdflib.Graph,
shacl_engine: str = "default",
):
self.model = model
self.shape_collections = shape_collections
ontology_graph = rdflib.Graph()
for shape_collection in shape_collections:
ontology_graph += shape_collection.graph
ontology_graph = skolemize_shapes(ontology_graph)
shacl_engine = (
self.model._bm.shacl_engine
if (shacl_engine == "default" or not shacl_engine)
else shacl_engine
)
self._compiled_graph = shacl_inference(
compiled_graph, ontology_graph, shacl_engine
)
@cached_property
def graph(self) -> rdflib.Graph:
g = copy_graph(self._compiled_graph)
for shape_collection in self.shape_collections:
g += shape_collection.graph
return g
[docs] def validate_model_against_shapes(
self,
shapes_to_test: List[rdflib.URIRef],
target_class: rdflib.URIRef,
) -> Dict[rdflib.URIRef, "ValidationContext"]:
"""Validates the model against a list of shapes and generates a
validation report for each.
:param shapes_to_test: list of shape URIs to validate the model against
:type shapes_to_test: List[URIRef]
:param target_class: the class upon which to run the selected shapes
:type target_class: URIRef
:return: a dictionary that relates each shape to test URIRef to a
ValidationContext
:rtype: Dict[URIRef, ValidationContext]
"""
model_graph = copy_graph(self._compiled_graph)
results = {}
targets = model_graph.query(
f"""
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
SELECT ?target
WHERE {{
?target rdf:type/rdfs:subClassOf* <{target_class}>
}}
"""
)
# skolemize the shape graph so we have consistent identifiers across
# validation through the interpretation of the validation report
ontology_graph = self.graph.skolemize()
for shape_uri in shapes_to_test:
temp_model_graph = copy_graph(model_graph)
for (s,) in targets:
temp_model_graph.add((URIRef(s), A, shape_uri))
valid, report_g, report_str = shacl_validate(
temp_model_graph, ontology_graph, engine=self.model._bm.shacl_engine
)
results[shape_uri] = ValidationContext(
self.shape_collections,
ontology_graph,
valid,
report_g,
report_str,
self.model,
)
return results
[docs] def validate(
self,
error_on_missing_imports: bool = True,
) -> "ValidationContext":
"""Validates this model against the given list of ShapeCollections.
If no list is provided, the model will be validated against the model's "manifest".
If a list of shape collections is provided, the manifest will *not* be automatically
included in the set of shape collections.
Loads all of the ShapeCollections into a single graph.
:param error_on_missing_imports: if True, raises an error if any of the dependency
ontologies are missing (i.e. they need to be loaded into BuildingMOTIF), defaults
to True
:type error_on_missing_imports: bool, optional
:return: An object containing useful properties/methods to deal with
the validation results
:rtype: ValidationContext
"""
# TODO: determine the return types; At least a bool for valid/invalid,
# but also want a report. Is this the base pySHACL report? Or a useful
# transformation, like a list of deltas for potential fixes?
shapeg = copy_graph(self._compiled_graph)
# aggregate shape graphs
for sc in self.shape_collections:
shapeg += sc.resolve_imports(
error_on_missing_imports=error_on_missing_imports
).graph
# inline sh:node for interpretability
shapeg = rewrite_shape_graph(shapeg)
# remove imports from sg
shapeg.remove((None, OWL.imports, None))
# skolemize the shape graph so we have consistent identifiers across
# validation through the interpretation of the validation report
shapeg = skolemize_shapes(shapeg)
# remove imports from data graph
shapeg.remove((None, OWL.imports, None))
# validate the data graph
valid, report_g, report_str = shacl_validate(
shapeg, engine=self.model._bm.shacl_engine
)
return ValidationContext(
self.shape_collections,
shapeg,
valid,
report_g,
report_str,
self.model,
)
[docs] def defining_shape_collection(
self, shape: rdflib.URIRef
) -> Optional[ShapeCollection]:
"""
Given a shape, return the ShapeCollection that defines it. The search is limited to the
ShapeCollections that were used to compile this model.
:param shape: the shape to search for
:type shape: rdflib.URIRef
:return: the ShapeCollection that defines the shape, or None if the shape is not defined
:rtype: Optional[ShapeCollection]
"""
for sc in self.shape_collections:
if (shape, A, SH.NodeShape) in sc.graph:
return sc
return None
[docs] def shape_to_table(self, shape: rdflib.URIRef, table: str, conn):
"""
Turn the shape into a SPARQL query and execute it on the model's graph, storing the results in a table.
:param shape: the shape to query
:type shape: rdflib.URIRef
:param table: the name of the table to store the results in
:type table: str
:param conn: the connection to the database
:type conn: sqlalchemy.engine.base.Connection
"""
metadata = self.shape_to_df(shape)
metadata.to_sql(table, conn, if_exists="replace", index=False)
[docs] def shape_to_df(self, shape: rdflib.URIRef) -> pd.DataFrame:
"""
Turn the shape into a SPARQL query and execute it on the model's graph, storing the results in a dataframe.
:param shape: the shape to query
:type shape: rdflib.URIRef
:return: the results of the query
:rtype: pd.DataFrame
"""
defining_sc = self.defining_shape_collection(shape)
if defining_sc is None:
raise ValueError(
f"Shape {shape} is not defined in any of the shape collections"
)
query = defining_sc.shape_to_query(shape)
metadata = pd.DataFrame(self.graph.query(query).bindings, dtype="string")
# metadata.columns will be rdflib.term.Variable objects, so we need to convert them to strings
metadata.columns = [str(col) for col in metadata.columns]
# convert the rdflib terms to Python types
return metadata.map(lambda x: x.toPython())