diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f97a949..63ff0db 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -23,7 +23,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.8] + python-version: [ 3.6, 3.8 ] steps: - uses: actions/checkout@v2 @@ -38,4 +38,4 @@ jobs: - name: Start from clean state run: make clean - name: Run tests - run: make check + run: make PYTHON3=python check diff --git a/Makefile b/Makefile index a8ed1b9..4661b8e 100644 --- a/Makefile +++ b/Makefile @@ -13,6 +13,8 @@ SHELL := /bin/bash +PYTHON3 ?= $(shell which python3.9 2>/dev/null || which python3.8 2>/dev/null || which python3.7 2>/dev/null || which python3.6 2>/dev/null || which python3) + all: .PHONY: \ @@ -38,6 +40,7 @@ all: check: \ .git_submodule_init.done.log $(MAKE) \ + PYTHON3=$(PYTHON3) \ --directory tests \ check @@ -45,6 +48,12 @@ clean: @$(MAKE) \ --directory tests \ clean + @rm -f \ + .git_submodule_init.done.log + @#Remove flag files that are normally set after deeper submodules and rdf-toolkit are downloaded. + @rm -f \ + dependencies/CASE-Examples-QC/.git_submodule_init.done.log \ + dependencies/CASE-Examples-QC/.lib.done.log distclean: \ clean diff --git a/README.md b/README.md index 8d1e38a..4bd97e5 100644 --- a/README.md +++ b/README.md @@ -35,6 +35,36 @@ case_file --disable-hashes sample.txt.json sample.txt ``` +### SPARQL executors + +Two commands are provided to generate output from a SPARQL query and one or more input graphs. Input graphs can be any graph, such as instance data or supplementary ontology files that supply custom class definitions or other external ontologies. + + +#### `case_sparql_construct` + +To use a SPARQL `CONSTRUCT` query to make a supplementary graph file from one or more input graphs: + +```bash +case_sparql_construct output.json input.sparql input.json [input-2.json ...] +``` + + +#### `case_sparql_select` + +To use a SPARQL `SELECT` query to make a table from one or more input graphs: + +```bash +# HTML output with Bootstrap classes +# (e.g. for Jekyll-backed websites) +case_sparql_select output.html input.sparql input.json [input-2.json ...] + +# Markdown, Github-flavored +case_sparql_select output.md input.sparql input.json [input-2.json ...] +``` + +Note that `case_sparql_select` is not guaranteed to function with Pythons below version 3.7. + + ### `local_uuid` This [module](case_utils/local_uuid.py) provides a wrapper UUID generator, `local_uuid()`. Its main purpose is making example data generate consistent identifiers, and intentionally includes mechanisms to make it difficult to activate this mode without awareness of the caller. @@ -58,8 +88,8 @@ This project follows [SEMVER 2.0.0](https://semver.org/) where versions are decl This repository supports the ontology versions that are linked as submodules in the [CASE Examples QC](https://github.com/ajnelson-nist/CASE-Examples-QC) repository. Currently, the ontology versions are: -* CASE - 0.3.0 -* UCO - 0.5.0 +* CASE - 0.4.0 +* UCO - 0.6.0 ## Repository locations diff --git a/case_utils/__init__.py b/case_utils/__init__.py index 6ec055e..a7184e2 100644 --- a/case_utils/__init__.py +++ b/case_utils/__init__.py @@ -11,7 +11,7 @@ # # We would appreciate acknowledgement if the software is used. -__version__ = "0.1.0" +__version__ = "0.2.0" import rdflib.util @@ -37,6 +37,8 @@ def guess_format(fpath, fmap=None): updated_fmap = {key:rdflib.util.SUFFIX_FORMAT_MAP[key] for key in rdflib.util.SUFFIX_FORMAT_MAP} if not "json" in updated_fmap: updated_fmap["json"] = "json-ld" + if not "jsonld" in updated_fmap: + updated_fmap["jsonld"] = "json-ld" else: updated_fmap = {k:fmap[k] for k in fmap} diff --git a/case_utils/case_file/__init__.py b/case_utils/case_file/__init__.py index 174b539..7d07afb 100644 --- a/case_utils/case_file/__init__.py +++ b/case_utils/case_file/__init__.py @@ -87,7 +87,7 @@ def create_file_node(graph, filepath, node_iri=None, node_prefix=DEFAULT_PREFIX, graph.add(( n_file_facet, NS_UCO_OBSERVABLE.sizeInBytes, - rdflib.Literal(file_stat.st_size, datatype=NS_XSD.long) + rdflib.Literal(int(file_stat.st_size)) )) graph.add(( n_file, @@ -174,7 +174,7 @@ def create_file_node(graph, filepath, node_iri=None, node_prefix=DEFAULT_PREFIX, graph.add(( n_contentdata_facet, NS_UCO_OBSERVABLE.sizeInBytes, - rdflib.Literal(successful_hashdict["filesize"], datatype=NS_XSD.long) + rdflib.Literal(successful_hashdict["filesize"]) )) # Add confirmed hashes into graph. diff --git a/case_utils/case_sparql_construct/__init__.py b/case_utils/case_sparql_construct/__init__.py new file mode 100644 index 0000000..4eaeceb --- /dev/null +++ b/case_utils/case_sparql_construct/__init__.py @@ -0,0 +1,87 @@ +#!/usr/bin/env python3 + +# This software was developed at the National Institute of Standards +# and Technology by employees of the Federal Government in the course +# of their official duties. Pursuant to title 17 Section 105 of the +# United States Code this software is not subject to copyright +# protection and is in the public domain. NIST assumes no +# responsibility whatsoever for its use by other parties, and makes +# no guarantees, expressed or implied, about its quality, +# reliability, or any other characteristic. +# +# We would appreciate acknowledgement if the software is used. + +""" +This script executes a SPARQL CONSTRUCT query, returning a graph of the generated triples. +""" + +__version__ = "0.1.0" + +import argparse +import os +import logging + +import rdflib.plugins.sparql + +import case_utils + +_logger = logging.getLogger(os.path.basename(__file__)) + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("-d", "--debug", action="store_true") + parser.add_argument("--disallow-empty-results", action="store_true", help="Raise error if no results are returned for query.") + parser.add_argument("--output-format", help="Override extension-based format guesser.") + parser.add_argument("out_graph") + parser.add_argument("in_sparql") + parser.add_argument("in_graph", nargs="+") + args = parser.parse_args() + + logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO) + + in_graph = rdflib.Graph() + for in_graph_filename in args.in_graph: + in_graph.parse(in_graph_filename, format=case_utils.guess_format(in_graph_filename)) + _logger.debug("len(in_graph) = %d.", len(in_graph)) + + out_graph = rdflib.Graph() + + # Inherit prefixes defined in input context dictionary. + nsdict = {k:v for (k,v) in in_graph.namespace_manager.namespaces()} + for prefix in sorted(nsdict.keys()): + out_graph.bind(prefix, nsdict[prefix]) + + _logger.debug("Running query in %r." % args.in_sparql) + construct_query_text = None + with open(args.in_sparql, "r") as in_fh: + construct_query_text = in_fh.read().strip() + assert not construct_query_text is None + + construct_query_object = rdflib.plugins.sparql.prepareQuery(construct_query_text, initNs=nsdict) + + # https://rdfextras.readthedocs.io/en/latest/working_with.html + construct_query_result = in_graph.query(construct_query_object) + _logger.debug("type(construct_query_result) = %r." % type(construct_query_result)) + _logger.debug("len(construct_query_result) = %d." % len(construct_query_result)) + for (row_no, row) in enumerate(construct_query_result): + if row_no == 0: + _logger.debug("row[0] = %r." % (row,)) + out_graph.add(row) + + output_format = None + if args.output_format is None: + output_format = case_utils.guess_format(args.out_graph) + else: + output_format = args.output_format + + serialize_kwargs = { + "format": output_format + } + if output_format == "json-ld": + context_dictionary = {k:v for (k,v) in out_graph.namespace_manager.namespaces()} + serialize_kwargs["context"] = context_dictionary + + out_graph.serialize(args.out_graph, **serialize_kwargs) + +if __name__ == "__main__": + main() diff --git a/case_utils/case_sparql_select/__init__.py b/case_utils/case_sparql_select/__init__.py new file mode 100644 index 0000000..357e3b0 --- /dev/null +++ b/case_utils/case_sparql_select/__init__.py @@ -0,0 +1,116 @@ +#!/usr/bin/env python3 + +# This software was developed at the National Institute of Standards +# and Technology by employees of the Federal Government in the course +# of their official duties. Pursuant to title 17 Section 105 of the +# United States Code this software is not subject to copyright +# protection and is in the public domain. NIST assumes no +# responsibility whatsoever for its use by other parties, and makes +# no guarantees, expressed or implied, about its quality, +# reliability, or any other characteristic. +# +# We would appreciate acknowledgement if the software is used. + +""" +This script executes a SPARQL SELECT query, returning a table representation. The design of the workflow is based on this example built on SPARQLWrapper: +https://lawlesst.github.io/notebook/sparql-dataframe.html + +Note that this assumes a limited syntax style in the outer SELECT clause of the query - only named variables, no aggregations, and a single space character separating all variable names. E.g.: + +SELECT ?x ?y ?z +WHERE +{ ... } + +The word "DISTINCT" will also be cut from the query, if present. + +Should a more complex query be necessary, an outer, wrapping SELECT query would let this script continue to function. +""" + +__version__ = "0.3.0" + +import argparse +import binascii +import os +import logging + +import pandas as pd +import rdflib.plugins.sparql + +import case_utils + +NS_XSD = rdflib.XSD + +_logger = logging.getLogger(os.path.basename(__file__)) + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("-d", "--debug", action="store_true") + parser.add_argument("--disallow-empty-results", action="store_true", help="Raise error if no results are returned for query.") + parser.add_argument("out_table", help="Expected extensions are .html for HTML tables or .md for Markdown tables.") + parser.add_argument("in_sparql") + parser.add_argument("in_graph", nargs="+") + args = parser.parse_args() + + logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO) + + graph = rdflib.Graph() + for in_graph_filename in args.in_graph: + graph.parse(in_graph_filename, format=case_utils.guess_format(in_graph_filename)) + + # Inherit prefixes defined in input context dictionary. + nsdict = {k:v for (k,v) in graph.namespace_manager.namespaces()} + + select_query_text = None + with open(args.in_sparql, "r") as in_fh: + select_query_text = in_fh.read().strip() + _logger.debug("select_query_text = %r." % select_query_text) + + # Build columns list from SELECT line. + select_query_text_lines = select_query_text.split("\n") + select_line = [line for line in select_query_text_lines if line.startswith("SELECT ")][0] + variables = select_line.replace(" DISTINCT", "").replace("SELECT ", "").split(" ") + + tally = 0 + records = [] + select_query_object = rdflib.plugins.sparql.prepareQuery(select_query_text, initNs=nsdict) + for (row_no, row) in enumerate(graph.query(select_query_object)): + tally = row_no + 1 + record = [] + for (column_no, column) in enumerate(row): + if column is None: + column_value = "" + elif isinstance(column, rdflib.term.Literal) and column.datatype == NS_XSD.hexBinary: + # Use hexlify to convert xsd:hexBinary to ASCII. + # The render to ASCII is in support of this script rendering results for website viewing. + # .decode() is because hexlify returns bytes. + column_value = binascii.hexlify(column.toPython()).decode() + else: + column_value = column.toPython() + if row_no == 0: + _logger.debug("row[0]column[%d] = %r." % (column_no, column_value)) + record.append(column_value) + records.append(record) + if tally == 0: + if args.disallow_empty_results: + raise ValueError("Failed to return any results.") + + df = pd.DataFrame(records, columns=variables) + + table_text = None + if args.out_table.endswith(".html"): + # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_html.html + # Add CSS classes for CASE website Bootstrap support. + table_text = df.to_html(classes=("table", "table-bordered", "table-condensed")) + elif args.out_table.endswith(".md"): + # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_markdown.html + # https://pypi.org/project/tabulate/ + # Assume Github-flavored Markdown. + table_text = df.to_markdown(tablefmt="github") + if table_text is None: + raise NotImplementedError("Unsupported output extension for output filename %r.", args.out_table) + + with open(args.out_table, "w") as out_fh: + out_fh.write(table_text) + +if __name__ == "__main__": + main() diff --git a/dependencies/CASE-Examples-QC b/dependencies/CASE-Examples-QC index 574a6a1..7a678bc 160000 --- a/dependencies/CASE-Examples-QC +++ b/dependencies/CASE-Examples-QC @@ -1 +1 @@ -Subproject commit 574a6a14920f43c7a7b06db1cfca5c96b37cf1b7 +Subproject commit 7a678bc0bb99b9a722b6e5ba4b05ec4cd7525bd3 diff --git a/setup.cfg b/setup.cfg index a6c4bb9..5837731 100644 --- a/setup.cfg +++ b/setup.cfg @@ -18,12 +18,18 @@ classifiers = # TODO The constraint on pyparsing can be removed when rdflib Issue #1190 is resolved. # https://github.com/RDFLib/rdflib/issues/1190 install_requires = + # Note that numpy (pandas dependency) is only supported in Python >= 3.7. + pandas;python_version>='3.7' pyparsing < 3.0.0 rdflib-jsonld requests + tabulate packages = find: python_requires = >=3.6 [options.entry_points] console_scripts = case_file = case_utils.case_file:main + case_sparql_construct = case_utils.case_sparql_construct:main + # Note that numpy (pandas dependency, and pandas is dependency of case_sparql_select) is only supported in Python >= 3.7. + case_sparql_select = case_utils.case_sparql_select:main diff --git a/tests/Makefile b/tests/Makefile index 8a83ec6..7857855 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -20,19 +20,23 @@ srcdir := $(shell pwd) PYTHON3 ?= $(shell which python3.9 2>/dev/null || which python3.8 2>/dev/null || which python3.7 2>/dev/null || which python3.6 2>/dev/null || which python3) all: \ - all-case_file + all-case_file \ + all-case_sparql_construct \ + all-case_sparql_select .PHONY: \ all-case_file \ + all-case_sparql_construct \ + all-case_sparql_select \ check-case_file \ + check-case_sparql_construct \ + check-case_sparql_select \ check-isomorphic_diff \ download .venv.done.log: \ $(top_srcdir)/.git_submodule_init.done.log \ $(top_srcdir)/case_utils/__init__.py \ - $(top_srcdir)/case_utils/case_file/__init__.py \ - $(top_srcdir)/case_utils/local_uuid.py \ $(top_srcdir)/setup.cfg \ $(top_srcdir)/setup.py \ requirements.txt @@ -50,7 +54,9 @@ all: \ --requirement requirements.txt source venv/bin/activate \ && cd $(top_srcdir) \ - && python3 setup.py install + && pip install \ + --editable \ + . touch $@ all-case_file: \ @@ -58,13 +64,30 @@ all-case_file: \ $(MAKE) \ --directory case_file +all-case_sparql_construct: \ + .venv.done.log + $(MAKE) \ + --directory case_sparql_construct + +all-case_sparql_select: \ + .venv.done.log + # Only descend if python>=3.7, due to pandas dependency unsatisfiable in 3.6.x. + # Boolean explanation: sys.exit(False) has exit status 0. + venv/bin/python3 -c 'import sys ; sys.exit(not (sys.version_info < (3, 7)))' \ + || $(MAKE) \ + --directory case_sparql_select + # These check calls are provided in preferred run-order. check: \ check-isomorphic_diff \ - check-case_file + check-case_file \ + check-case_sparql_construct \ + check-case_sparql_select source venv/bin/activate \ && pytest \ --ignore case_file \ + --ignore case_sparql_construct \ + --ignore case_sparql_select \ --log-level=DEBUG check-case_file: \ @@ -73,6 +96,21 @@ check-case_file: \ --directory case_file \ check +check-case_sparql_construct: \ + .venv.done.log + $(MAKE) \ + --directory case_sparql_construct \ + check + +check-case_sparql_select: \ + .venv.done.log + # Only descend if python>=3.7, due to pandas dependency unsatisfiable in 3.6.x. + # Boolean explanation: sys.exit(False) has exit status 0. + venv/bin/python3 -c 'import sys ; sys.exit(not (sys.version_info < (3, 7)))' \ + || $(MAKE) \ + --directory case_sparql_select \ + check + check-isomorphic_diff: \ .venv.done.log $(MAKE) \ @@ -80,6 +118,12 @@ check-isomorphic_diff: \ check clean: + @$(MAKE) \ + --directory case_sparql_select \ + clean + @$(MAKE) \ + --directory case_sparql_construct \ + clean @$(MAKE) \ --directory case_file \ clean diff --git a/tests/case_file/Makefile b/tests/case_file/Makefile index 71961a1..0f91a02 100644 --- a/tests/case_file/Makefile +++ b/tests/case_file/Makefile @@ -19,9 +19,7 @@ tests_srcdir := $(top_srcdir)/tests qc_srcdir := $(top_srcdir)/dependencies/CASE-Examples-QC -case_srcdir := $(qc_srcdir)/dependencies/CASE-Examples/dependencies/CASE - -RDF_TOOLKIT_JAR := $(case_srcdir)/lib/rdf-toolkit.jar +RDF_TOOLKIT_JAR := $(qc_srcdir)/dependencies/CASE-Examples/dependencies/CASE-0.3.0/CASE/lib/rdf-toolkit.jar COMM ?= $(shell which gcomm 2>/dev/null || which comm) ifeq ($(COMM),) @@ -96,14 +94,19 @@ kb.ttl: \ mv _$@ $@ sample.txt.done.log: \ + $(tests_srcdir)/.venv.done.log \ sample_txt.py - python3 sample_txt.py sample.txt + source $(tests_srcdir)/venv/bin/activate \ + && python3 sample_txt.py \ + sample.txt touch $@ # Display difference between rdflib default output and compacted output. sample.txt.json: \ $(tests_srcdir)/src/compact.py \ $(tests_srcdir)/src/isomorphic_diff.py \ + $(top_srcdir)/case_utils/case_file/__init__.py \ + $(top_srcdir)/case_utils/local_uuid.py \ sample.txt-nocompact.json rm -f $@ _$@ __$@ export DEMO_UUID_REQUESTING_NONRANDOM=NONRANDOM_REQUESTED \ @@ -134,6 +137,7 @@ sample.txt.ttl: \ $(RDF_TOOLKIT_JAR) \ $(tests_srcdir)/.venv.done.log \ $(top_srcdir)/case_utils/case_file/__init__.py \ + $(top_srcdir)/case_utils/local_uuid.py \ sample.txt.done.log rm -f _$@ __$@ export DEMO_UUID_REQUESTING_NONRANDOM=NONRANDOM_REQUESTED \ @@ -155,6 +159,7 @@ sample.txt-disable_hashes.ttl: \ $(RDF_TOOLKIT_JAR) \ $(tests_srcdir)/.venv.done.log \ $(top_srcdir)/case_utils/case_file/__init__.py \ + $(top_srcdir)/case_utils/local_uuid.py \ sample.txt.done.log rm -f _$@ __$@ export DEMO_UUID_REQUESTING_NONRANDOM=NONRANDOM_REQUESTED \ @@ -178,6 +183,7 @@ sample.txt-nocompact.json: \ $(tests_srcdir)/.venv.done.log \ $(tests_srcdir)/src/isomorphic_diff.py \ $(top_srcdir)/case_utils/case_file/__init__.py \ + $(top_srcdir)/case_utils/local_uuid.py \ sample.txt.done.log rm -f _$@ export DEMO_UUID_REQUESTING_NONRANDOM=NONRANDOM_REQUESTED \ diff --git a/tests/case_file/kb.json b/tests/case_file/kb.json index 1739ecc..a2d427a 100644 --- a/tests/case_file/kb.json +++ b/tests/case_file/kb.json @@ -4,7 +4,8 @@ "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", "uco-core": "https://unifiedcyberontology.org/ontology/uco/core#", "uco-observable": "https://unifiedcyberontology.org/ontology/uco/observable#", - "uco-types": "https://unifiedcyberontology.org/ontology/uco/types#" + "uco-types": "https://unifiedcyberontology.org/ontology/uco/types#", + "xsd": "http://www.w3.org/2001/XMLSchema#" }, "@graph": [ { @@ -18,7 +19,7 @@ "@value": "2010-01-02T03:04:56+00:00" }, "uco-observable:sizeInBytes": { - "@type": "xsd:long", + "@type": "xsd:integer", "@value": "4" } } @@ -76,7 +77,7 @@ } ], "uco-observable:sizeInBytes": { - "@type": "xsd:long", + "@type": "xsd:integer", "@value": "4" } }, @@ -88,7 +89,7 @@ "@value": "2010-01-02T03:04:56+00:00" }, "uco-observable:sizeInBytes": { - "@type": "xsd:long", + "@type": "xsd:integer", "@value": "4" } } diff --git a/tests/case_file/kb.ttl b/tests/case_file/kb.ttl index ccd47b8..4e0b8a8 100644 --- a/tests/case_file/kb.ttl +++ b/tests/case_file/kb.ttl @@ -14,7 +14,7 @@ kb:file-16d49634-ba5d-5f46-ab4e-7a577a4e096d a uco-observable:FileFacet ; uco-observable:fileName "sample.txt" ; uco-observable:modifiedTime "2010-01-02T03:04:56+00:00"^^xsd:dateTime ; - uco-observable:sizeInBytes "4"^^xsd:long ; + uco-observable:sizeInBytes "4"^^xsd:integer ; ] ; . @@ -45,13 +45,13 @@ kb:file-57400969-69d0-5d5d-95c4-9dd7de330d3d uco-types:hashValue "ee26b0dd4af7e749aa1a8ee3c10ae9923f618980772e473f8819a5d4940e0db27ac185f8a0e1d5f84f88bc887fd67b143732c304cc5fa9ad8e6f57f50028a8ff"^^xsd:hexBinary ; ] ; - uco-observable:sizeInBytes "4"^^xsd:long ; + uco-observable:sizeInBytes "4"^^xsd:integer ; ] , [ a uco-observable:FileFacet ; uco-observable:fileName "sample.txt" ; uco-observable:modifiedTime "2010-01-02T03:04:56+00:00"^^xsd:dateTime ; - uco-observable:sizeInBytes "4"^^xsd:long ; + uco-observable:sizeInBytes "4"^^xsd:integer ; ] ; . diff --git a/tests/case_file/sample.txt-disable_hashes.ttl b/tests/case_file/sample.txt-disable_hashes.ttl index 97a0180..4462c33 100644 --- a/tests/case_file/sample.txt-disable_hashes.ttl +++ b/tests/case_file/sample.txt-disable_hashes.ttl @@ -12,7 +12,7 @@ kb:file-16d49634-ba5d-5f46-ab4e-7a577a4e096d a uco-observable:FileFacet ; uco-observable:fileName "sample.txt" ; uco-observable:modifiedTime "2010-01-02T03:04:56+00:00"^^xsd:dateTime ; - uco-observable:sizeInBytes "4"^^xsd:long ; + uco-observable:sizeInBytes "4"^^xsd:integer ; ] ; . diff --git a/tests/case_file/sample.txt-nocompact.json b/tests/case_file/sample.txt-nocompact.json index 3b2a1fb..563291d 100644 --- a/tests/case_file/sample.txt-nocompact.json +++ b/tests/case_file/sample.txt-nocompact.json @@ -40,10 +40,7 @@ "@id": "_:Na99d1f2a83814ac491c9b02662ea8587" } ], - "https://unifiedcyberontology.org/ontology/uco/observable#sizeInBytes": { - "@type": "http://www.w3.org/2001/XMLSchema#long", - "@value": "4" - } + "https://unifiedcyberontology.org/ontology/uco/observable#sizeInBytes": 4 }, { "@id": "_:Nad6171b4933a4a84b83faa5d48fd4ddb", @@ -101,10 +98,7 @@ "@type": "http://www.w3.org/2001/XMLSchema#dateTime", "@value": "2010-01-02T03:04:56+00:00" }, - "https://unifiedcyberontology.org/ontology/uco/observable#sizeInBytes": { - "@type": "http://www.w3.org/2001/XMLSchema#long", - "@value": "4" - } + "https://unifiedcyberontology.org/ontology/uco/observable#sizeInBytes": 4 } ] -} \ No newline at end of file +} diff --git a/tests/case_file/sample.txt.json b/tests/case_file/sample.txt.json index f56d602..dfc3075 100644 --- a/tests/case_file/sample.txt.json +++ b/tests/case_file/sample.txt.json @@ -40,10 +40,7 @@ "@id": "_:N77e79676ee5449ea9f6db813fb9dc095" } ], - "uco-observable:sizeInBytes": { - "@type": "xsd:long", - "@value": "4" - } + "uco-observable:sizeInBytes": 4 }, { "@id": "_:N839986b2d63a4963beafbb65016449da", @@ -101,10 +98,7 @@ "@type": "xsd:dateTime", "@value": "2010-01-02T03:04:56+00:00" }, - "uco-observable:sizeInBytes": { - "@type": "xsd:long", - "@value": "4" - } + "uco-observable:sizeInBytes": 4 } ] -} \ No newline at end of file +} diff --git a/tests/case_file/sample.txt.ttl b/tests/case_file/sample.txt.ttl index f9ecfa8..f5d275b 100644 --- a/tests/case_file/sample.txt.ttl +++ b/tests/case_file/sample.txt.ttl @@ -35,13 +35,13 @@ kb:file-57400969-69d0-5d5d-95c4-9dd7de330d3d uco-types:hashValue "ee26b0dd4af7e749aa1a8ee3c10ae9923f618980772e473f8819a5d4940e0db27ac185f8a0e1d5f84f88bc887fd67b143732c304cc5fa9ad8e6f57f50028a8ff"^^xsd:hexBinary ; ] ; - uco-observable:sizeInBytes "4"^^xsd:long ; + uco-observable:sizeInBytes "4"^^xsd:integer ; ] , [ a uco-observable:FileFacet ; uco-observable:fileName "sample.txt" ; uco-observable:modifiedTime "2010-01-02T03:04:56+00:00"^^xsd:dateTime ; - uco-observable:sizeInBytes "4"^^xsd:long ; + uco-observable:sizeInBytes "4"^^xsd:integer ; ] ; . diff --git a/tests/case_file/sample_txt.py b/tests/case_file/sample_txt.py index ac6722e..625caa9 100644 --- a/tests/case_file/sample_txt.py +++ b/tests/case_file/sample_txt.py @@ -17,13 +17,14 @@ Mtime should be 2010-01-02T03:04:56Z. """ -import datetime import os import sys +import dateutil.parser + with open(sys.argv[1], "w") as out_fh: out_fh.write("test") -target_datetime = datetime.datetime.fromisoformat("2010-01-02T03:04:56+00:00") +target_datetime = dateutil.parser.isoparse("2010-01-02T03:04:56+00:00") target_timestamp = target_datetime.timestamp() os.utime(sys.argv[1], (target_timestamp, target_timestamp)) diff --git a/tests/case_sparql_construct/.gitignore b/tests/case_sparql_construct/.gitignore new file mode 100644 index 0000000..6553b72 --- /dev/null +++ b/tests/case_sparql_construct/.gitignore @@ -0,0 +1,2 @@ +output.json +output.ttl diff --git a/tests/case_sparql_construct/Makefile b/tests/case_sparql_construct/Makefile new file mode 100644 index 0000000..55343fc --- /dev/null +++ b/tests/case_sparql_construct/Makefile @@ -0,0 +1,49 @@ +#!/usr/bin/make -f + +# This software was developed at the National Institute of Standards +# and Technology by employees of the Federal Government in the course +# of their official duties. Pursuant to title 17 Section 105 of the +# United States Code this software is not subject to copyright +# protection and is in the public domain. NIST assumes no +# responsibility whatsoever for its use by other parties, and makes +# no guarantees, expressed or implied, about its quality, +# reliability, or any other characteristic. +# +# We would appreciate acknowledgement if the software is used. + +SHELL := /bin/bash + +top_srcdir := $(shell cd ../.. ; pwd) + +tests_srcdir := $(top_srcdir)/tests + +all: \ + output.ttl + +check: \ + output.json \ + output.ttl + source $(tests_srcdir)/venv/bin/activate \ + && pytest \ + --log-level=DEBUG + +clean: + @rm -rf \ + __pycache__ + @rm -f \ + output.* \ + _* + +output.%: \ + $(tests_srcdir)/.venv.done.log \ + $(top_srcdir)/case_utils/case_sparql_construct/__init__.py \ + input-1.sparql \ + input-2.ttl \ + input-3.json + source $(tests_srcdir)/venv/bin/activate \ + && case_sparql_construct \ + _$@ \ + input-1.sparql \ + input-2.ttl \ + input-3.json + mv _$@ $@ diff --git a/tests/case_sparql_construct/README.md b/tests/case_sparql_construct/README.md new file mode 100644 index 0000000..bab41ac --- /dev/null +++ b/tests/case_sparql_construct/README.md @@ -0,0 +1,16 @@ +# Test of CASE SPARQL CONSTRUCT query runner + + +## Test procedure + +The tests in this directory confirms `case_sparql_construct` satisfies a base set of expected command line functionality. +1. Inputs - `input-2.ttl` and `input-3.json` contain a small graph split across two files, and `input-1.sparql` contains a SPARQL `CONSTRUCT` query. +2. Outputs - `output.ttl` is generated by using `case_sparql_construct` to run `input-1.sparql` against the two `input-*.*` graph files. This affirms that `case_sparql_construct` can read multiple input files of differing formats. +3. Output verification - two name-pairs should have vcard records generated. The test `test_templates_with_blank_nodes_result()` confirms those pairs are in the output graph. + + +## References + +The data and query used in `input-2.ttl`, `input-3.json` and `input.sparql` are copied from ["SPARQL Query Language for RDF", Section 10.2.1](https://www.w3.org/TR/rdf-sparql-query/#tempatesWithBNodes), with these modifications: +* `input-2.ttl` contains the original example's `_:a` (Alice) records, but drops the `_:b` (Bob) records. +* `input-3.json` is a conversion of the original Turtle example's `_:b` records to JSON-LD. diff --git a/tests/case_sparql_construct/input-1.sparql b/tests/case_sparql_construct/input-1.sparql new file mode 100644 index 0000000..aee09cc --- /dev/null +++ b/tests/case_sparql_construct/input-1.sparql @@ -0,0 +1,14 @@ +# Query source: +# https://www.w3.org/TR/rdf-sparql-query/#tempatesWithBNodes + +PREFIX foaf: +PREFIX vcard: + +CONSTRUCT { ?x vcard:N _:v . + _:v vcard:givenName ?gname . + _:v vcard:familyName ?fname } +WHERE + { + { ?x foaf:firstname ?gname } UNION { ?x foaf:givenname ?gname } . + { ?x foaf:surname ?fname } UNION { ?x foaf:family_name ?fname } . + } diff --git a/tests/case_sparql_construct/input-2.ttl b/tests/case_sparql_construct/input-2.ttl new file mode 100644 index 0000000..8ac286f --- /dev/null +++ b/tests/case_sparql_construct/input-2.ttl @@ -0,0 +1,4 @@ +@prefix foaf: . + +_:a foaf:givenname "Alice" . +_:a foaf:family_name "Hacker" . diff --git a/tests/case_sparql_construct/input-3.json b/tests/case_sparql_construct/input-3.json new file mode 100644 index 0000000..6b78699 --- /dev/null +++ b/tests/case_sparql_construct/input-3.json @@ -0,0 +1,11 @@ +{ + "@context": { + "foaf": "http://xmlns.com/foaf/0.1/" + }, + "@graph": [ + { + "foaf:firstname": "Bob", + "foaf:surname": "Hacker" + } + ] +} diff --git a/tests/case_sparql_construct/test_case_sparql_construct.py b/tests/case_sparql_construct/test_case_sparql_construct.py new file mode 100644 index 0000000..9eb2002 --- /dev/null +++ b/tests/case_sparql_construct/test_case_sparql_construct.py @@ -0,0 +1,54 @@ +#!/usr/bin/make -f + +# This software was developed at the National Institute of Standards +# and Technology by employees of the Federal Government in the course +# of their official duties. Pursuant to title 17 Section 105 of the +# United States Code this software is not subject to copyright +# protection and is in the public domain. NIST assumes no +# responsibility whatsoever for its use by other parties, and makes +# no guarantees, expressed or implied, about its quality, +# reliability, or any other characteristic. +# +# We would appreciate acknowledgement if the software is used. + +import rdflib.plugins.sparql + +import case_utils + +def _test_templates_with_blank_nodes_result(filename): + ground_truth_positive = { + ("Alice", "Hacker"), + ("Bob", "Hacker") + } + ground_truth_negative = set() + + graph = rdflib.Graph() + graph.parse(filename, format=case_utils.guess_format(filename)) + + computed = set() + query_string = """\ +PREFIX vcard: + +SELECT ?lGivenName ?lFamilyName +WHERE { + ?nNode + vcard:givenName ?lGivenName ; + vcard:familyName ?lFamilyName ; + . +} +""" + for result in graph.query(query_string): + ( + l_given_name, + l_family_name + ) = result + computed.add(( + l_given_name.toPython(), + l_family_name.toPython() + )) + assert computed == ground_truth_positive + +def test_templates_with_blank_nodes_result_json(): + _test_templates_with_blank_nodes_result("output.json") +def test_templates_with_blank_nodes_result_turtle(): + _test_templates_with_blank_nodes_result("output.ttl") diff --git a/tests/case_sparql_select/.check-output.html b/tests/case_sparql_select/.check-output.html new file mode 100644 index 0000000..aff9beb --- /dev/null +++ b/tests/case_sparql_select/.check-output.html @@ -0,0 +1,21 @@ + + + + + + + + + + + + + + + + + + + + +
?name?mbox
0Johnny Lee Outlawmailto:jlow@example.com
1Peter Goodguymailto:peter@example.org
\ No newline at end of file diff --git a/tests/case_sparql_select/.check-output.md b/tests/case_sparql_select/.check-output.md new file mode 100644 index 0000000..77b05f4 --- /dev/null +++ b/tests/case_sparql_select/.check-output.md @@ -0,0 +1,4 @@ +| | ?name | ?mbox | +|----|-------------------|--------------------------| +| 0 | Johnny Lee Outlaw | mailto:jlow@example.com | +| 1 | Peter Goodguy | mailto:peter@example.org | \ No newline at end of file diff --git a/tests/case_sparql_select/.gitignore b/tests/case_sparql_select/.gitignore new file mode 100644 index 0000000..a85ef3b --- /dev/null +++ b/tests/case_sparql_select/.gitignore @@ -0,0 +1,2 @@ +output.html +output.md diff --git a/tests/case_sparql_select/Makefile b/tests/case_sparql_select/Makefile new file mode 100644 index 0000000..f88732d --- /dev/null +++ b/tests/case_sparql_select/Makefile @@ -0,0 +1,65 @@ +#!/usr/bin/make -f + +# This software was developed at the National Institute of Standards +# and Technology by employees of the Federal Government in the course +# of their official duties. Pursuant to title 17 Section 105 of the +# United States Code this software is not subject to copyright +# protection and is in the public domain. NIST assumes no +# responsibility whatsoever for its use by other parties, and makes +# no guarantees, expressed or implied, about its quality, +# reliability, or any other characteristic. +# +# We would appreciate acknowledgement if the software is used. + +SHELL := /bin/bash + +top_srcdir := $(shell cd ../.. ; pwd) + +tests_srcdir := $(top_srcdir)/tests + +all: \ + output.html \ + output.md + +.PHONY: \ + check-html \ + check-markdown + +.PRECIOUS: \ + output.% + +check: \ + check-html \ + check-markdown + +check-html: \ + .check-output.html \ + output.html + diff $^ + +check-markdown: \ + .check-output.md \ + output.md + diff $^ + +clean: + @rm -rf \ + __pycache__ + @rm -f \ + *.html \ + *.md \ + _* + +output.%: \ + $(tests_srcdir)/.venv.done.log \ + $(top_srcdir)/case_utils/case_sparql_select/__init__.py \ + input-1.sparql \ + input-2.ttl \ + input-3.json + source $(tests_srcdir)/venv/bin/activate \ + && case_sparql_select \ + _$@ \ + input-1.sparql \ + input-2.ttl \ + input-3.json + mv _$@ $@ diff --git a/tests/case_sparql_select/input-1.sparql b/tests/case_sparql_select/input-1.sparql new file mode 100644 index 0000000..ab97ec5 --- /dev/null +++ b/tests/case_sparql_select/input-1.sparql @@ -0,0 +1,9 @@ +# Query source: +# https://www.w3.org/TR/rdf-sparql-query/#MultipleMatches + +PREFIX foaf: +SELECT ?name ?mbox +WHERE + { ?x foaf:name ?name . + ?x foaf:mbox ?mbox } +ORDER BY ?name ?mbox diff --git a/tests/case_sparql_select/input-2.ttl b/tests/case_sparql_select/input-2.ttl new file mode 100644 index 0000000..d1ccff9 --- /dev/null +++ b/tests/case_sparql_select/input-2.ttl @@ -0,0 +1,5 @@ +@prefix foaf: . + +_:a foaf:name "Johnny Lee Outlaw" . +_:a foaf:mbox . +_:c foaf:mbox . diff --git a/tests/case_sparql_select/input-3.json b/tests/case_sparql_select/input-3.json new file mode 100644 index 0000000..8ddfbf4 --- /dev/null +++ b/tests/case_sparql_select/input-3.json @@ -0,0 +1,13 @@ +{ + "@context": { + "foaf": "http://xmlns.com/foaf/0.1/" + }, + "@graph": [ + { + "foaf:name": "Peter Goodguy", + "foaf:mbox": { + "@id": "mailto:peter@example.org" + } + } + ] +} diff --git a/tests/case_utils/test_guess_format.py b/tests/case_utils/test_guess_format.py index a62504f..d54f4aa 100644 --- a/tests/case_utils/test_guess_format.py +++ b/tests/case_utils/test_guess_format.py @@ -18,6 +18,7 @@ PATH_TO_TTL = "/nonexistent/foo.ttl" PATH_TO_JSON = "/nonexistent/foo.json" +PATH_TO_JSONLD = "/nonexistent/foo.jsonld" PATH_TO_XHTML = "/nonexistent/foo.xhtml" FMAP_XHTML_GRDDL = {"xhtml": "grddl"} @@ -41,6 +42,10 @@ def test_rdflib_util_guess_format_ttl_fmap(): def test_rdflib_util_guess_format_json(): assert rdflib.util.guess_format(PATH_TO_JSON) == "json-ld", "Failed to recognize .json RDF file extension" +@pytest.mark.xfail(reason="rdflib 5.0.0 known to not recognize .jsonld", strict=True) +def test_rdflib_util_guess_format_jsonld(): + assert rdflib.util.guess_format(PATH_TO_JSONLD) == "json-ld", "Failed to recognize .jsonld RDF file extension" + def test_case_utils_guess_format_ttl_default(): assert case_utils.guess_format(PATH_TO_TTL) == "turtle", "Failed to recognize .ttl RDF file extension" @@ -54,3 +59,10 @@ def test_case_utils_guess_format_json_default(): @pytest.mark.xfail(reason="Preserving behavior - rdflib 5.0.0 guess_format fmap argument overwrites base module's extension map", strict=True) def test_case_utils_guess_format_json_fmap(): assert case_utils.guess_format(PATH_TO_JSON, FMAP_XHTML_GRDDL) == "json-ld", "Failed to recognize .json RDF file extension when using fmap" + +def test_case_utils_guess_format_jsonld_default(): + assert case_utils.guess_format(PATH_TO_JSONLD) == "json-ld", "Failed to recognize .jsonld RDF file extension" + +@pytest.mark.xfail(reason="Preserving behavior - rdflib 5.0.0 guess_format fmap argument overwrites base module's extension map", strict=True) +def test_case_utils_guess_format_jsonld_fmap(): + assert case_utils.guess_format(PATH_TO_JSONLD, FMAP_XHTML_GRDDL) == "json-ld", "Failed to recognize .jsonld RDF file extension when using fmap" diff --git a/tests/hexbinary/test_hexbinary.py b/tests/hexbinary/test_hexbinary.py new file mode 100644 index 0000000..04784bc --- /dev/null +++ b/tests/hexbinary/test_hexbinary.py @@ -0,0 +1,367 @@ +#!/usr/bin/env python3 + +# This software was developed at the National Institute of Standards +# and Technology by employees of the Federal Government in the course +# of their official duties. Pursuant to title 17 Section 105 of the +# United States Code this software is not subject to copyright +# protection and is in the public domain. NIST assumes no +# responsibility whatsoever for its use by other parties, and makes +# no guarantees, expressed or implied, about its quality, +# reliability, or any other characteristic. +# +# We would appreciate acknowledgement if the software is used. + +""" +This test suite tests some assumptions that might be made about hexBinary value comparison in Python's rdflib and its SPARQL engine. + +This script is expected to have pytest exit in a success state, reporting some tests passing, and some tests XFailing (i.e. being expected to fail). + +The overall finding is: in rdflib and rdflib's SPARQL engine, xsd:hexBinaryCanonical is not given any support not given to arbitrary string datatypes. This, and more specific, findings are affirmed by the tests: + +* Some of the tests serve as syntax reminders for SPARQL and pytest. + - test_sparql_syntax_bind_boolean + - test_pytest_syntax_xfail + - test_sparql_syntax_integer_coercion + - test_sparql_syntax_integer_cast +* SPARQL Literal datatype-casting can coerce known types, but will not cast strings of unknown datatypes. + - test_sparql_syntax_integer_cast + - test_sparql_cast_custom_type +* rdflib WILL match xsd:hexBinary data as casing-insensitive. So, Literals with values "ab" and "AB" match if both have the datatype xsd:hexBinary. + - test_rdflib_literal_hexbinary +* rdflib WILL NOT match xsd:hexBinaryCanonical data with xsd:hexBinary data, either as Literal objects or with a call to .toPython(). + - test_rdflib_literal_hexbinarycanonical + - test_rdflib_literal_topython_hexbinarycanonical +* The rdflib SPARQL engine WILL match xsd:hexBinary data as casing-insensitive. So, "ab" and "AB" match if both have the datatype xsd:hexBinary. + - test_sparql_compare_hexbinary_matchcase + - test_sparql_compare_hexbinary_mixcase + - test_graph_repeat + - test_graph_all_hexbinary_literals +* The rdflib SPARQL engine WILL match xsd:hexBinaryCanonical data with xsd:hexBinaryCanonical data, when casing matches. + - test_sparql_compare_hexbinarycanonical_matchcase +* The rdflib SPARQL engine WILL NOT match xsd:hexBinaryCanonical data with xsd:hexBinaryCanonical data, when casing does not match. + - test_sparql_compare_hexbinarycanonical_mixcase +* The rdflib SPARQL engine WILL NOT compare xsd:hexBinaryCanonical data with xsd:hexBinary data. + - test_sparql_compare_hb_hbc_mixcase + - test_sparql_compare_hb_hbc_mixcase_cast + - test_graph_hexbinarycanonical +""" + +import logging +import os + +import pytest +import rdflib.plugins.sparql + +_logger = logging.getLogger(os.path.basename(__file__)) + +# Variables used in several tests. +l_hb_lowercase = rdflib.Literal("ab", datatype=rdflib.XSD.hexBinary) +l_hb_uppercase = rdflib.Literal("AB", datatype=rdflib.XSD.hexBinary) +l_hbc_uppercase = rdflib.Literal("AB", datatype=rdflib.XSD.hexBinaryCanonical) +n_canonical1 = rdflib.URIRef("urn:example:canonical1") +n_lowercase1 = rdflib.URIRef("urn:example:lowercase1") +n_lowercase2 = rdflib.URIRef("urn:example:lowercase2") +n_uppercase1 = rdflib.URIRef("urn:example:uppercase1") +p_predicate = rdflib.URIRef("urn:example:predicate1") + +def test_sparql_syntax_bind_boolean(): + """ + This test serves as a syntax reminder for binding boolean values. + """ + confirmed = None + graph = rdflib.Graph() + for result in graph.query("""\ +SELECT ?lValue +WHERE { + BIND( 1 = 1 AS ?lValue ) +} +"""): + (l_value,) = result + confirmed = l_value.toPython() + assert confirmed + +@pytest.mark.xfail(reason="hard-coded failure") +def test_pytest_syntax_xfail(): + """ + This test serves as a syntax reminder for the XFail decorator. + """ + confirmed = None + graph = rdflib.Graph() + for result in graph.query("""\ +SELECT ?lValue +WHERE { + BIND( 1 = 2 AS ?lValue ) +} +"""): + (l_value,) = result + confirmed = l_value.toPython() + assert confirmed + +def test_sparql_syntax_integer_coercion(): + """ + This test serves as a syntax reminder for type coercions. + """ + confirmed = None + graph = rdflib.Graph() + for result in graph.query("""\ +SELECT ?lValue +WHERE { + BIND( 1 = "1"^^xsd:integer AS ?lValue ) +} +"""): + (l_value,) = result + confirmed = l_value.toPython() + assert confirmed + +def test_sparql_syntax_integer_cast(): + """ + This test serves as a syntax reminder for the casting form of type coercions. + """ + confirmed = None + graph = rdflib.Graph() + for result in graph.query("""\ +SELECT ?lValue +WHERE { + BIND( 1 = xsd:integer("1") AS ?lValue ) +} +"""): + (l_value,) = result + confirmed = l_value.toPython() + assert confirmed + +@pytest.mark.xfail +def test_sparql_cast_custom_type(): + """ + This test checks for nonexistent literal-datatype assignments. + """ + confirmed = None + graph = rdflib.Graph() + for result in graph.query("""\ +SELECT ?lValue +WHERE { + BIND( 1 = xsd:integer("1"^^xsd:hexBinaryTypoXXXX) AS ?lValue ) +} +"""): + (l_value,) = result + confirmed = l_value.toPython() + assert confirmed + +def test_sparql_compare_hexbinary_mixcase(): + confirmed = None + graph = rdflib.Graph() + for result in graph.query("""\ +SELECT ?lValue +WHERE { + BIND( "ab"^^xsd:hexBinary = "AB"^^xsd:hexBinary AS ?lValue ) +} +"""): + (l_value,) = result + confirmed = l_value.toPython() + assert confirmed + +def test_sparql_compare_hexbinary_matchcase(): + confirmed = None + graph = rdflib.Graph() + for result in graph.query("""\ +SELECT ?lValue +WHERE { + BIND( "AB"^^xsd:hexBinary = "AB"^^xsd:hexBinary AS ?lValue ) +} +"""): + (l_value,) = result + confirmed = l_value.toPython() + assert confirmed + +def test_sparql_compare_hexbinarycanonical_matchcase(): + confirmed = None + graph = rdflib.Graph() + for result in graph.query("""\ +SELECT ?lValue +WHERE { + BIND( "AB"^^xsd:hexBinaryCanonical = "AB"^^xsd:hexBinaryCanonical AS ?lValue ) +} +"""): + (l_value,) = result + confirmed = l_value.toPython() + assert confirmed + +@pytest.mark.xfail +def test_sparql_compare_hexbinarycanonical_mixcase(): + """ + This test shows hexBinaryCanonical does not induce a casing-insensitive comparison. + """ + confirmed = None + graph = rdflib.Graph() + for result in graph.query("""\ +SELECT ?lValue +WHERE { + BIND( "ab"^^xsd:hexBinaryCanonical = "AB"^^xsd:hexBinaryCanonical AS ?lValue ) +} +"""): + (l_value,) = result + confirmed = l_value.toPython() + assert confirmed + +@pytest.mark.xfail +def test_sparql_compare_hb_hbc_mixcase(): + """ + This test confirms that literal-comparison takes into account datatype when one type is unknown. + """ + confirmed = None + graph = rdflib.Graph() + for result in graph.query("""\ +SELECT ?lValue +WHERE { + BIND( "AB"^^xsd:hexBinary = "AB"^^xsd:hexBinaryCanonical AS ?lValue ) +} +"""): + (l_value,) = result + confirmed = l_value.toPython() + assert confirmed + +@pytest.mark.xfail +def test_sparql_compare_hb_hbc_mixcase_cast(): + """ + This test is a bit redundant with test_sparql_cast_custom_type, but is here as an explicit demonstration of failure to cast a hexBinary value. + """ + confirmed = None + graph = rdflib.Graph() + for result in graph.query("""\ +SELECT ?lValue +WHERE { + BIND( "ab"^^xsd:hexBinary = xsd:hexBinary("AB"^^xsd:hexBinaryCanonical) AS ?lValue ) +} +"""): + (l_value,) = result + confirmed = l_value.toPython() + assert confirmed + +def test_rdflib_literal_hexbinary(): + _logger.debug("l_hb_lowercase = %r." % l_hb_lowercase) + _logger.debug("l_hb_uppercase = %r." % l_hb_uppercase) + _logger.debug("l_hb_lowercase.toPython() = %r." % l_hb_lowercase.toPython()) + _logger.debug("l_hb_uppercase.toPython() = %r." % l_hb_uppercase.toPython()) + + assert l_hb_lowercase == l_hb_lowercase + assert l_hb_lowercase.toPython() == l_hb_lowercase.toPython() + + assert l_hb_lowercase == l_hb_uppercase + assert l_hb_lowercase.toPython() == l_hb_uppercase.toPython() + +@pytest.mark.xfail +def test_rdflib_literal_hexbinarycanonical(): + _logger.debug("l_hb_uppercase = %r." % l_hb_uppercase) + _logger.debug("l_hbc_uppercase = %r." % l_hbc_uppercase) + + assert l_hb_uppercase == l_hbc_uppercase + +@pytest.mark.xfail +def test_rdflib_literal_topython_hexbinarycanonical(): + _logger.debug("l_hb_lowercase.toPython() = %r." % l_hb_lowercase.toPython()) + _logger.debug("l_hb_uppercase.toPython() = %r." % l_hb_uppercase.toPython()) + + assert l_hb_uppercase.toPython() == l_hbc_uppercase.toPython() + +def _query_all_value_matches(graph): + """ + Return set of all node names (as strings) that have a matching value, where + "matching" is determined by the SPARQL engine's type and data coercions. + """ + computed = set() + for result in graph.query("""\ +SELECT ?nNode1 ?nNode2 +WHERE { + ?nNode1 ?p ?lValue . + ?nNode2 ?p ?lValue . + FILTER ( ?nNode1 != ?nNode2 ) +}"""): + (n_node1, n_node2) = result + computed.add(n_node1.toPython()) + computed.add(n_node2.toPython()) + return computed + +def test_graph_repeat(): + """ + Two nodes are given the same literal value, and are found to match on literal values. + """ + graph = rdflib.Graph() + graph.add(( + n_lowercase1, + p_predicate, + l_hb_lowercase + )) + graph.add(( + n_lowercase2, + p_predicate, + l_hb_lowercase + )) + expected = { + "urn:example:lowercase1", + "urn:example:lowercase2" + } + computed = _query_all_value_matches(graph) + assert computed == expected + +def test_graph_all_hexbinary_literals(): + """ + Two nodes with the same literal value, and another node with the uppercase of the literal hexBinary value, are found to match on literal values. + """ + graph = rdflib.Graph() + graph.add(( + n_lowercase1, + p_predicate, + l_hb_lowercase + )) + graph.add(( + n_lowercase2, + p_predicate, + l_hb_lowercase + )) + graph.add(( + n_uppercase1, + p_predicate, + l_hb_uppercase + )) + + expected = { + "urn:example:lowercase1", + "urn:example:lowercase2", + "urn:example:uppercase1" + } + + computed = _query_all_value_matches(graph) + assert computed == expected + +@pytest.mark.xfail +def test_graph_hexbinarycanonical(): + graph = rdflib.Graph() + graph.add(( + n_lowercase1, + p_predicate, + l_hb_lowercase + )) + graph.add(( + n_lowercase2, + p_predicate, + l_hb_lowercase + )) + graph.add(( + n_uppercase1, + p_predicate, + l_hb_uppercase + )) + graph.add(( + n_canonical1, + p_predicate, + l_hbc_uppercase + )) + + expected = { + "urn:example:canonical1", + "urn:example:lowercase1", + "urn:example:lowercase2", + "urn:example:uppercase1" + } + + computed = _query_all_value_matches(graph) + assert computed == expected diff --git a/tests/requirements.txt b/tests/requirements.txt index aed3c61..b31bac3 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -1,2 +1,3 @@ PyLD pytest +python-dateutil diff --git a/tests/src/compact.py b/tests/src/compact.py index 24b1509..16c25ce 100644 --- a/tests/src/compact.py +++ b/tests/src/compact.py @@ -64,6 +64,10 @@ def _accrue_local_context(doc_object): _logger.debug("total_context = %r." % total_context) compacted = pyld.jsonld.compact(doc, total_context) + + # Add xsd prefix back in to context dictionary. .compact() removes it, and this causes some xsd definitions like xsd:long to no longer resolve in SPARQL queries. + compacted["@context"]["xsd"] = "http://www.w3.org/2001/XMLSchema#" + out_fh.write(json.dumps(compacted, indent=4)) if __name__ == "__main__":