@@ -71,6 +71,104 @@ def concept_is_cdo_concept(n_concept: rdflib.URIRef) -> bool:
7171 ) or concept_iri .startswith ("https://ontology.caseontology.org/" )
7272
7373
74+ def get_invalid_cdo_concepts (
75+ data_graph : rdflib .Graph , ontology_graph : rdflib .Graph
76+ ) -> Set [rdflib .URIRef ]:
77+ """
78+ Get the set of concepts in the data graph that are not part of the CDO ontologies as specified with the ontology_graph argument.
79+
80+ :param data_graph: The data graph to validate.
81+ :param ontology_graph: The ontology graph to use for validation.
82+ :return: The list of concepts in the data graph that are not part of the CDO ontology.
83+
84+ >>> from case_utils.namespace import NS_RDF, NS_OWL, NS_UCO_CORE
85+ >>> from rdflib import Graph, Literal, Namespace, URIRef
86+ >>> # Define a namespace for a knowledge base, and a namespace for custom extensions.
87+ >>> ns_kb = Namespace("http://example.org/kb/")
88+ >>> ns_ex = Namespace("http://example.org/ontology/")
89+ >>> dg = Graph()
90+ >>> og = Graph()
91+ >>> # Use an ontology graph in review that includes only a single class and a single property excerpted from UCO, but also a single custom property.
92+ >>> _ = og.add((NS_UCO_CORE.UcoObject, NS_RDF.type, NS_OWL.Class))
93+ >>> _ = og.add((NS_UCO_CORE.name, NS_RDF.type, NS_OWL.DatatypeProperty))
94+ >>> _ = og.add((ns_ex.ourCustomProperty, NS_RDF.type, NS_OWL.DatatypeProperty))
95+ >>> # Define an individual.
96+ >>> n_uco_object = ns_kb["UcoObject-f494d239-d9fd-48da-bc07-461ba86d8c6c"]
97+ >>> n_uco_object
98+ rdflib.term.URIRef('http://example.org/kb/UcoObject-f494d239-d9fd-48da-bc07-461ba86d8c6c')
99+ >>> # Review a data graph that includes only the single individual, class typo'd (capitalized incorrectly), but property OK.
100+ >>> _ = dg.add((n_uco_object, NS_RDF.type, NS_UCO_CORE.UCOObject))
101+ >>> _ = dg.add((n_uco_object, NS_UCO_CORE.name, Literal("Test")))
102+ >>> _ = dg.add((n_uco_object, ns_ex.customProperty, Literal("Custom Value")))
103+ >>> invalid_cdo_concepts = get_invalid_cdo_concepts(dg, og)
104+ >>> invalid_cdo_concepts
105+ {rdflib.term.URIRef('https://ontology.unifiedcyberontology.org/uco/core/UCOObject')}
106+ >>> # Note that the property "ourCustomProperty" was typo'd in the data graph, but this was not reported.
107+ >>> assert ns_ex.ourCustomProperty not in invalid_cdo_concepts
108+ """
109+ # Construct set of CDO concepts for data graph concept-existence review.
110+ cdo_concepts : Set [rdflib .URIRef ] = set ()
111+
112+ for n_structural_class in [
113+ NS_OWL .Class ,
114+ NS_OWL .AnnotationProperty ,
115+ NS_OWL .DatatypeProperty ,
116+ NS_OWL .ObjectProperty ,
117+ NS_RDFS .Datatype ,
118+ NS_SH .NodeShape ,
119+ NS_SH .PropertyShape ,
120+ NS_SH .Shape ,
121+ ]:
122+ for ontology_triple in ontology_graph .triples (
123+ (None , NS_RDF .type , n_structural_class )
124+ ):
125+ if not isinstance (ontology_triple [0 ], rdflib .URIRef ):
126+ continue
127+ if concept_is_cdo_concept (ontology_triple [0 ]):
128+ cdo_concepts .add (ontology_triple [0 ])
129+ for n_ontology_predicate in [
130+ NS_OWL .backwardCompatibleWith ,
131+ NS_OWL .imports ,
132+ NS_OWL .incompatibleWith ,
133+ NS_OWL .priorVersion ,
134+ NS_OWL .versionIRI ,
135+ ]:
136+ for ontology_triple in ontology_graph .triples (
137+ (None , n_ontology_predicate , None )
138+ ):
139+ assert isinstance (ontology_triple [0 ], rdflib .URIRef )
140+ assert isinstance (ontology_triple [2 ], rdflib .URIRef )
141+ cdo_concepts .add (ontology_triple [0 ])
142+ cdo_concepts .add (ontology_triple [2 ])
143+ for ontology_triple in ontology_graph .triples ((None , NS_RDF .type , NS_OWL .Ontology )):
144+ if not isinstance (ontology_triple [0 ], rdflib .URIRef ):
145+ continue
146+ cdo_concepts .add (ontology_triple [0 ])
147+
148+ # Also load historical ontology and version IRIs.
149+ ontology_and_version_iris_data = importlib .resources .read_text (
150+ case_utils .ontology , "ontology_and_version_iris.txt"
151+ )
152+ for line in ontology_and_version_iris_data .split ("\n " ):
153+ cleaned_line = line .strip ()
154+ if cleaned_line == "" :
155+ continue
156+ cdo_concepts .add (rdflib .URIRef (cleaned_line ))
157+
158+ data_cdo_concepts : Set [rdflib .URIRef ] = set ()
159+ for data_triple in data_graph .triples ((None , None , None )):
160+ for data_triple_member in data_triple :
161+ if isinstance (data_triple_member , rdflib .URIRef ):
162+ if concept_is_cdo_concept (data_triple_member ):
163+ data_cdo_concepts .add (data_triple_member )
164+ elif isinstance (data_triple_member , rdflib .Literal ):
165+ if isinstance (data_triple_member .datatype , rdflib .URIRef ):
166+ if concept_is_cdo_concept (data_triple_member .datatype ):
167+ data_cdo_concepts .add (data_triple_member .datatype )
168+
169+ return data_cdo_concepts - cdo_concepts
170+
171+
74172def main () -> None :
75173 parser = argparse .ArgumentParser (
76174 description = "CASE wrapper to pySHACL command line tool."
@@ -181,67 +279,9 @@ def main() -> None:
181279 _logger .debug ("arg_ontology_graph = %r." , arg_ontology_graph )
182280 ontology_graph .parse (arg_ontology_graph )
183281
184- # Construct set of CDO concepts for data graph concept-existence review.
185- cdo_concepts : Set [rdflib .URIRef ] = set ()
186-
187- for n_structural_class in [
188- NS_OWL .Class ,
189- NS_OWL .AnnotationProperty ,
190- NS_OWL .DatatypeProperty ,
191- NS_OWL .ObjectProperty ,
192- NS_RDFS .Datatype ,
193- NS_SH .NodeShape ,
194- NS_SH .PropertyShape ,
195- NS_SH .Shape ,
196- ]:
197- for ontology_triple in ontology_graph .triples (
198- (None , NS_RDF .type , n_structural_class )
199- ):
200- if not isinstance (ontology_triple [0 ], rdflib .URIRef ):
201- continue
202- if concept_is_cdo_concept (ontology_triple [0 ]):
203- cdo_concepts .add (ontology_triple [0 ])
204- for n_ontology_predicate in [
205- NS_OWL .backwardCompatibleWith ,
206- NS_OWL .imports ,
207- NS_OWL .incompatibleWith ,
208- NS_OWL .priorVersion ,
209- NS_OWL .versionIRI ,
210- ]:
211- for ontology_triple in ontology_graph .triples (
212- (None , n_ontology_predicate , None )
213- ):
214- assert isinstance (ontology_triple [0 ], rdflib .URIRef )
215- assert isinstance (ontology_triple [2 ], rdflib .URIRef )
216- cdo_concepts .add (ontology_triple [0 ])
217- cdo_concepts .add (ontology_triple [2 ])
218- for ontology_triple in ontology_graph .triples ((None , NS_RDF .type , NS_OWL .Ontology )):
219- if not isinstance (ontology_triple [0 ], rdflib .URIRef ):
220- continue
221- cdo_concepts .add (ontology_triple [0 ])
222-
223- # Also load historical ontology and version IRIs.
224- ontology_and_version_iris_data = importlib .resources .read_text (
225- case_utils .ontology , "ontology_and_version_iris.txt"
226- )
227- for line in ontology_and_version_iris_data .split ("\n " ):
228- cleaned_line = line .strip ()
229- if cleaned_line == "" :
230- continue
231- cdo_concepts .add (rdflib .URIRef (cleaned_line ))
232-
233- data_cdo_concepts : Set [rdflib .URIRef ] = set ()
234- for data_triple in data_graph .triples ((None , None , None )):
235- for data_triple_member in data_triple :
236- if isinstance (data_triple_member , rdflib .URIRef ):
237- if concept_is_cdo_concept (data_triple_member ):
238- data_cdo_concepts .add (data_triple_member )
239- elif isinstance (data_triple_member , rdflib .Literal ):
240- if isinstance (data_triple_member .datatype , rdflib .URIRef ):
241- if concept_is_cdo_concept (data_triple_member .datatype ):
242- data_cdo_concepts .add (data_triple_member .datatype )
282+ # Get the list of undefined CDO concepts in the graph
283+ undefined_cdo_concepts = get_invalid_cdo_concepts (data_graph , ontology_graph )
243284
244- undefined_cdo_concepts = data_cdo_concepts - cdo_concepts
245285 for undefined_cdo_concept in sorted (undefined_cdo_concepts ):
246286 warnings .warn (undefined_cdo_concept , NonExistentCDOConceptWarning )
247287 undefined_cdo_concepts_message = (
0 commit comments