55import hashlib
66import argparse
77import logging
8+ import typing
9+ import uuid
10+ import warnings
811
12+ import case_utils .inherent_uuid
913import case_utils .local_uuid
1014import exifread
1115import rdflib
2327ns_kb = rdflib .Namespace ("http://example.org/kb/" )
2428
2529
26- def get_node_iri (ns : rdflib .Namespace , prefix : str ) -> rdflib .URIRef :
27- node_id = rdflib .URIRef (f"{ prefix } { case_utils .local_uuid .local_uuid ()} " , ns )
30+ def get_node_iri (
31+ ns : rdflib .Namespace ,
32+ prefix : str ,
33+ * args : typing .Any ,
34+ facet_class : typing .Optional [rdflib .URIRef ] = None ,
35+ uco_object_node : typing .Optional [rdflib .URIRef ] = None ,
36+ use_deterministic_uuids : bool = False ,
37+ ** kwargs : typing .Any
38+ ) -> rdflib .URIRef :
39+ node_id : typing .Optional [rdflib .URIRef ] = None
40+ if use_deterministic_uuids :
41+ if uco_object_node is None :
42+ warnings .warn ("get_node_iri() called requesting deterministic UUIDs, but no UcoObject node was provided." )
43+ else :
44+ if uco_object_node is None :
45+ warnings .warn ("get_node_iri() called requesting deterministic UUIDs, but no Facet class node was provided." )
46+ else :
47+ _node_id = case_utils .inherent_id .get_facet_uriref (uco_object_node , facet_class , namespace = ns )
48+ # Swap in the requested prefix value.
49+ _node_uuid = str (_node_id )[- 36 :]
50+ node_id = ns [f"{ prefix } { _node_uuid } " ]
51+
52+ if node_id is None :
53+ node_id = ns [f"{ prefix } { case_utils .local_uuid .local_uuid ()} " ]
2854 return node_id
2955
3056
@@ -72,17 +98,18 @@ def create_exif_dict(tags):
7298 return exif
7399
74100
75- def n_cyber_object_to_node (graph ):
101+ def n_cyber_object_to_node (graph , * args : typing . Any , use_deterministic_uuids : bool = False , ** kwargs : typing . Any ):
76102 """
77103 Initial function to create nodes for each of the file's facet nodes
78104 :param graph: rdflib graph object for adding nodes to
79105 :return: The four nodes for each fo the other functions to fill
80106 """
81107 cyber_object = rdflib .URIRef (get_node_iri (ns = ns_kb , prefix = "observableobject-" ))
82- n_raster_facet = rdflib .URIRef (get_node_iri (ns = ns_kb , prefix = "rasterpicture-" ))
83- n_file_facet = rdflib .URIRef (get_node_iri (ns = ns_kb , prefix = "filefacet-" ))
84- n_content_facet = rdflib .URIRef (get_node_iri (ns = ns_kb , prefix = "contentfacet-" ))
85- n_exif_facet = rdflib .URIRef (get_node_iri (ns = ns_kb , prefix = "exiffacet-" ))
108+
109+ n_raster_facet = rdflib .URIRef (get_node_iri (ns = ns_kb , prefix = "rasterpicture-" , facet_class = NS_UCO_OBSERVABLE .RasterPictureFacet , uco_object_node = cyber_object , use_deterministic_uuids = use_deterministic_uuids ))
110+ n_file_facet = rdflib .URIRef (get_node_iri (ns = ns_kb , prefix = "filefacet-" , facet_class = NS_UCO_OBSERVABLE .FileFacet , uco_object_node = cyber_object , use_deterministic_uuids = use_deterministic_uuids ))
111+ n_content_facet = rdflib .URIRef (get_node_iri (ns = ns_kb , prefix = "contentfacet-" , facet_class = NS_UCO_OBSERVABLE .ContentDataFacet , uco_object_node = cyber_object , use_deterministic_uuids = use_deterministic_uuids ))
112+ n_exif_facet = rdflib .URIRef (get_node_iri (ns = ns_kb , prefix = "exiffacet-" , facet_class = NS_UCO_OBSERVABLE .EXIFFacet , uco_object_node = cyber_object , use_deterministic_uuids = use_deterministic_uuids ))
86113 graph .add ((
87114 cyber_object ,
88115 NS_RDF .type ,
@@ -111,15 +138,14 @@ def n_cyber_object_to_node(graph):
111138 return n_exif_facet , n_raster_facet , n_file_facet , n_content_facet
112139
113140
114- def filecontent_object_to_node (graph , n_content_facet , file_information ):
141+ def filecontent_object_to_node (graph , n_content_facet , file_information , * args : typing . Any , use_deterministic_uuids : bool = False , ** kwargs : typing . Any ):
115142 """
116143 Unused: Create a node that will add the file content facet node to the graph
117144 :param graph: rdflib graph object for adding nodes to
118- :param n_content_facet: Blank node to contain all content facet information
145+ :param n_content_facet: Node to contain all content facet information
119146 :param file_information: Dictionary containing information about file being analysed
120147 :return: None
121148 """
122- file_hash_facet = rdflib .URIRef (get_node_iri (ns = ns_kb , prefix = "hash-" ))
123149 graph .add ((
124150 n_content_facet ,
125151 NS_RDF .type ,
@@ -143,16 +169,38 @@ def filecontent_object_to_node(graph, n_content_facet, file_information):
143169 rdflib .term .Literal (file_information ["size" ],
144170 datatype = NS_XSD .integer )
145171 ))
146- graph .add ((
147- n_content_facet ,
148- NS_UCO_OBSERVABLE .hash ,
149- file_hash_facet
150- ))
151- graph .add ((
152- file_hash_facet ,
153- NS_RDF .type ,
154- NS_UCO_TYPES .Hash
155- ))
172+
173+ if "SHA256" in file_information :
174+ hash_method = rdflib .Literal ("SHA256" , datatype = NS_UCO_VOCABULARY .HashNameVocab )
175+ hash_value = rdflib .Literal (file_information ["SHA256" ], datatype = NS_XSD .hexBinary )
176+
177+ file_hash : rdflib .URIRef
178+ if use_deterministic_uuids :
179+ file_hash_uuid : uuid .UUID = case_utils .inherent_uuid .hash_method_value_uuid (hash_method , hash_value , namespace = ns_kb )
180+ file_hash = ns_kb ["hash-" + str (file_hash_uuid )]
181+ else :
182+ file_hash = get_node_iri (ns = ns_kb , prefix = "hash-" )
183+
184+ graph .add ((
185+ n_content_facet ,
186+ NS_UCO_OBSERVABLE .hash ,
187+ file_hash
188+ ))
189+ graph .add ((
190+ file_hash ,
191+ NS_RDF .type ,
192+ NS_UCO_TYPES .Hash
193+ ))
194+ graph .add ((
195+ file_hash ,
196+ NS_UCO_TYPES .hashMethod ,
197+ hash_method
198+ ))
199+ graph .add ((
200+ file_hash ,
201+ NS_UCO_TYPES .hashValue ,
202+ hash_value
203+ ))
156204
157205
158206def filefacets_object_to_node (graph , n_file_facet , file_information ):
@@ -309,6 +357,11 @@ def main():
309357 """
310358 parser = argparse .ArgumentParser ()
311359 parser .add_argument ("file" , help = "file to extract exif data from" )
360+ parser .add_argument (
361+ "--use-deterministic-uuids" ,
362+ action = "store_true" ,
363+ help = "Use UUIDs computed using the case_utils.inherent_uuid module." ,
364+ )
312365 args = parser .parse_args ()
313366 local_file = args .file
314367 file_info = get_file_info (local_file )
0 commit comments