|
32 | 32 | __version__ = "0.3.0" |
33 | 33 |
|
34 | 34 | import argparse |
35 | | -import importlib.resources |
36 | 35 | import logging |
37 | 36 | import os |
38 | 37 | import sys |
39 | 38 | import warnings |
40 | | -from typing import Any, Dict, List, Optional, Set, Tuple, Union |
| 39 | +from typing import Any, Dict, List, Optional, Tuple, Union |
41 | 40 |
|
42 | 41 | import pyshacl # type: ignore |
43 | 42 | import rdflib |
44 | 43 | from rdflib import Graph |
45 | 44 |
|
46 | | -import case_utils.ontology |
| 45 | +from case_utils.case_validate.validate_types import ( |
| 46 | + NonExistentCDOConceptWarning, |
| 47 | + ValidationResult, |
| 48 | +) |
| 49 | +from case_utils.case_validate.validate_utils import ( |
| 50 | + get_invalid_cdo_concepts, |
| 51 | + get_ontology_graph, |
| 52 | +) |
47 | 53 | from case_utils.ontology.version_info import ( |
48 | 54 | CURRENT_CASE_VERSION, |
49 | 55 | built_version_choices_list, |
50 | 56 | ) |
51 | 57 |
|
52 | | -NS_OWL = rdflib.OWL |
53 | | -NS_RDF = rdflib.RDF |
54 | | -NS_RDFS = rdflib.RDFS |
55 | | -NS_SH = rdflib.SH |
56 | | - |
57 | 58 | _logger = logging.getLogger(os.path.basename(__file__)) |
58 | 59 |
|
59 | 60 |
|
60 | | -class NonExistentCDOConceptWarning(UserWarning): |
61 | | - """ |
62 | | - This class is used when a concept is encountered in the data graph that is not part of CDO ontologies, according to the --built-version flags and --ontology-graph flags. |
63 | | - """ |
64 | | - |
65 | | - pass |
66 | | - |
67 | | - |
68 | | -class NonExistentCASEVersionError(Exception): |
69 | | - """ |
70 | | - This class is used when an invalid CASE version is requested that is not supported by the library. |
71 | | - """ |
72 | | - |
73 | | - pass |
74 | | - |
75 | | - |
76 | | -class ValidationResult: |
77 | | - def __init__( |
78 | | - self, |
79 | | - conforms: bool, |
80 | | - graph: Union[Exception, bytes, str, rdflib.Graph], |
81 | | - text: str, |
82 | | - undefined_concepts: Set[rdflib.URIRef], |
83 | | - ) -> None: |
84 | | - self.conforms = conforms |
85 | | - self.graph = graph |
86 | | - self.text = text |
87 | | - self.undefined_concepts = undefined_concepts |
88 | | - |
89 | | - |
90 | | -def concept_is_cdo_concept(n_concept: rdflib.URIRef) -> bool: |
91 | | - """ |
92 | | - Determine if a concept is part of the CDO ontology. |
93 | | -
|
94 | | - :param n_concept: The concept to check. |
95 | | - :return: whether the concept is part of the CDO ontologies. |
96 | | - """ |
97 | | - concept_iri = str(n_concept) |
98 | | - return concept_iri.startswith( |
99 | | - "https://ontology.unifiedcyberontology.org/" |
100 | | - ) or concept_iri.startswith("https://ontology.caseontology.org/") |
101 | | - |
102 | | - |
103 | | -def get_invalid_cdo_concepts( |
104 | | - data_graph: rdflib.Graph, ontology_graph: rdflib.Graph |
105 | | -) -> Set[rdflib.URIRef]: |
106 | | - """ |
107 | | - Get the set of concepts in the data graph that are not part of the CDO ontologies as specified with the ontology_graph argument. |
108 | | -
|
109 | | - :param data_graph: The data graph to validate. |
110 | | - :param ontology_graph: The ontology graph to use for validation. |
111 | | - :return: The list of concepts in the data graph that are not part of the CDO ontology. |
112 | | -
|
113 | | - >>> from case_utils.namespace import NS_RDF, NS_OWL, NS_UCO_CORE |
114 | | - >>> from rdflib import Graph, Literal, Namespace, URIRef |
115 | | - >>> # Define a namespace for a knowledge base, and a namespace for custom extensions. |
116 | | - >>> ns_kb = Namespace("http://example.org/kb/") |
117 | | - >>> ns_ex = Namespace("http://example.org/ontology/") |
118 | | - >>> dg = Graph() |
119 | | - >>> og = Graph() |
120 | | - >>> # Use an ontology graph in review that includes only a single class and a single property excerpted from UCO, but also a single custom property. |
121 | | - >>> _ = og.add((NS_UCO_CORE.UcoObject, NS_RDF.type, NS_OWL.Class)) |
122 | | - >>> _ = og.add((NS_UCO_CORE.name, NS_RDF.type, NS_OWL.DatatypeProperty)) |
123 | | - >>> _ = og.add((ns_ex.ourCustomProperty, NS_RDF.type, NS_OWL.DatatypeProperty)) |
124 | | - >>> # Define an individual. |
125 | | - >>> n_uco_object = ns_kb["UcoObject-f494d239-d9fd-48da-bc07-461ba86d8c6c"] |
126 | | - >>> n_uco_object |
127 | | - rdflib.term.URIRef('http://example.org/kb/UcoObject-f494d239-d9fd-48da-bc07-461ba86d8c6c') |
128 | | - >>> # Review a data graph that includes only the single individual, class typo'd (capitalized incorrectly), but property OK. |
129 | | - >>> _ = dg.add((n_uco_object, NS_RDF.type, NS_UCO_CORE.UCOObject)) |
130 | | - >>> _ = dg.add((n_uco_object, NS_UCO_CORE.name, Literal("Test"))) |
131 | | - >>> _ = dg.add((n_uco_object, ns_ex.customProperty, Literal("Custom Value"))) |
132 | | - >>> invalid_cdo_concepts = get_invalid_cdo_concepts(dg, og) |
133 | | - >>> invalid_cdo_concepts |
134 | | - {rdflib.term.URIRef('https://ontology.unifiedcyberontology.org/uco/core/UCOObject')} |
135 | | - >>> # Note that the property "ourCustomProperty" was typo'd in the data graph, but this was not reported. |
136 | | - >>> assert ns_ex.ourCustomProperty not in invalid_cdo_concepts |
137 | | - """ |
138 | | - # Construct set of CDO concepts for data graph concept-existence review. |
139 | | - cdo_concepts: Set[rdflib.URIRef] = set() |
140 | | - |
141 | | - for n_structural_class in [ |
142 | | - NS_OWL.Class, |
143 | | - NS_OWL.AnnotationProperty, |
144 | | - NS_OWL.DatatypeProperty, |
145 | | - NS_OWL.ObjectProperty, |
146 | | - NS_RDFS.Datatype, |
147 | | - NS_SH.NodeShape, |
148 | | - NS_SH.PropertyShape, |
149 | | - NS_SH.Shape, |
150 | | - ]: |
151 | | - for ontology_triple in ontology_graph.triples( |
152 | | - (None, NS_RDF.type, n_structural_class) |
153 | | - ): |
154 | | - if not isinstance(ontology_triple[0], rdflib.URIRef): |
155 | | - continue |
156 | | - if concept_is_cdo_concept(ontology_triple[0]): |
157 | | - cdo_concepts.add(ontology_triple[0]) |
158 | | - for n_ontology_predicate in [ |
159 | | - NS_OWL.backwardCompatibleWith, |
160 | | - NS_OWL.imports, |
161 | | - NS_OWL.incompatibleWith, |
162 | | - NS_OWL.priorVersion, |
163 | | - NS_OWL.versionIRI, |
164 | | - ]: |
165 | | - for ontology_triple in ontology_graph.triples( |
166 | | - (None, n_ontology_predicate, None) |
167 | | - ): |
168 | | - assert isinstance(ontology_triple[0], rdflib.URIRef) |
169 | | - assert isinstance(ontology_triple[2], rdflib.URIRef) |
170 | | - cdo_concepts.add(ontology_triple[0]) |
171 | | - cdo_concepts.add(ontology_triple[2]) |
172 | | - for ontology_triple in ontology_graph.triples((None, NS_RDF.type, NS_OWL.Ontology)): |
173 | | - if not isinstance(ontology_triple[0], rdflib.URIRef): |
174 | | - continue |
175 | | - cdo_concepts.add(ontology_triple[0]) |
176 | | - |
177 | | - # Also load historical ontology and version IRIs. |
178 | | - ontology_and_version_iris_data = importlib.resources.read_text( |
179 | | - case_utils.ontology, "ontology_and_version_iris.txt" |
180 | | - ) |
181 | | - for line in ontology_and_version_iris_data.split("\n"): |
182 | | - cleaned_line = line.strip() |
183 | | - if cleaned_line == "": |
184 | | - continue |
185 | | - cdo_concepts.add(rdflib.URIRef(cleaned_line)) |
186 | | - |
187 | | - data_cdo_concepts: Set[rdflib.URIRef] = set() |
188 | | - for data_triple in data_graph.triples((None, None, None)): |
189 | | - for data_triple_member in data_triple: |
190 | | - if isinstance(data_triple_member, rdflib.URIRef): |
191 | | - if concept_is_cdo_concept(data_triple_member): |
192 | | - data_cdo_concepts.add(data_triple_member) |
193 | | - elif isinstance(data_triple_member, rdflib.Literal): |
194 | | - if isinstance(data_triple_member.datatype, rdflib.URIRef): |
195 | | - if concept_is_cdo_concept(data_triple_member.datatype): |
196 | | - data_cdo_concepts.add(data_triple_member.datatype) |
197 | | - |
198 | | - return data_cdo_concepts - cdo_concepts |
199 | | - |
200 | | - |
201 | | -def get_ontology_graph( |
202 | | - case_version: Optional[str] = None, supplemental_graphs: Optional[List[str]] = None |
203 | | -) -> rdflib.Graph: |
204 | | - """ |
205 | | - Get the ontology graph for the given case_version and any supplemental graphs. |
206 | | -
|
207 | | - :param case_version: the version of the CASE ontology to use. If None (i.e. null), the most recent version will be used. If "none" (the string), no pre-built version of CASE will be used. |
208 | | - :param supplemental_graphs: a list of supplemental graphs to use. If None, no supplemental graphs will be used. |
209 | | - :return: the ontology graph against which to validate the data graph. |
210 | | - """ |
211 | | - ontology_graph = rdflib.Graph() |
212 | | - |
213 | | - if case_version != "none": |
214 | | - # Load bundled CASE ontology at requested version. |
215 | | - if case_version is None or case_version == "": |
216 | | - case_version = CURRENT_CASE_VERSION |
217 | | - # If the first character case_version is numeric, prepend case- to it. This allows for the version to be passed |
218 | | - # by the library as both case-1.2.0 and 1.2.0 |
219 | | - if case_version[0].isdigit(): |
220 | | - case_version = "case-" + case_version |
221 | | - ttl_filename = case_version + ".ttl" |
222 | | - _logger.debug("ttl_filename = %r.", ttl_filename) |
223 | | - # Ensure the requested version of the CASE ontology is available and if not, throw an appropriate exception |
224 | | - # that can be returned in a user-friendly message. |
225 | | - if not importlib.resources.is_resource(case_utils.ontology, ttl_filename): |
226 | | - raise NonExistentCASEVersionError( |
227 | | - f"The requested version ({case_version}) of the CASE ontology is not available. Please choose a " |
228 | | - f"different version. The latest supported version is: {CURRENT_CASE_VERSION}" |
229 | | - ) |
230 | | - ttl_data = importlib.resources.read_text(case_utils.ontology, ttl_filename) |
231 | | - ontology_graph.parse(data=ttl_data, format="turtle") |
232 | | - |
233 | | - if supplemental_graphs: |
234 | | - for arg_ontology_graph in supplemental_graphs: |
235 | | - _logger.debug("arg_ontology_graph = %r.", arg_ontology_graph) |
236 | | - ontology_graph.parse(arg_ontology_graph) |
237 | | - |
238 | | - return ontology_graph |
239 | | - |
240 | | - |
241 | 61 | def validate( |
242 | 62 | input_file: str, |
243 | 63 | *args: Any, |
|
0 commit comments