From ec87d99b36fcce4871790765ea0afecfae74fcd0 Mon Sep 17 00:00:00 2001 From: Alex Nelson Date: Tue, 6 Dec 2022 08:53:51 -0500 Subject: [PATCH 01/19] Add and test --use-prefixes flag for case_sparql_select A follow-on patch will regenerate Make-managed files. Signed-off-by: Alex Nelson --- case_utils/case_sparql_select/__init__.py | 10 ++++++ .../.check-prefixed_results.html | 18 +++++++++++ .../.check-prefixed_results.md | 4 +++ tests/case_utils/case_sparql_select/Makefile | 32 +++++++++++++++++++ 4 files changed, 64 insertions(+) create mode 100644 tests/case_utils/case_sparql_select/.check-prefixed_results.html create mode 100644 tests/case_utils/case_sparql_select/.check-prefixed_results.md diff --git a/case_utils/case_sparql_select/__init__.py b/case_utils/case_sparql_select/__init__.py index b753664..dcc704e 100644 --- a/case_utils/case_sparql_select/__init__.py +++ b/case_utils/case_sparql_select/__init__.py @@ -70,6 +70,11 @@ def main() -> None: action="store_true", help="Raise error if no results are returned for query.", ) + parser.add_argument( + "--use-prefixes", + action="store_true", + help="Abbreviate node IDs according to graph's encoded prefixes. (This will use prefixes in the graph, not the query.)", + ) parser.add_argument( "out_table", help="Expected extensions are .html for HTML tables or .md for Markdown tables.", @@ -124,6 +129,11 @@ def main() -> None: # The render to ASCII is in support of this script rendering results for website viewing. # .decode() is because hexlify returns bytes. column_value = binascii.hexlify(column.toPython()).decode() + elif isinstance(column, rdflib.URIRef): + if args.use_prefixes: + column_value = graph.namespace_manager.qname(column.toPython()) + else: + column_value = column.toPython() else: column_value = column.toPython() if row_no == 0: diff --git a/tests/case_utils/case_sparql_select/.check-prefixed_results.html b/tests/case_utils/case_sparql_select/.check-prefixed_results.html new file mode 100644 index 0000000..10e210b --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-prefixed_results.html @@ -0,0 +1,18 @@ + + + + + + + + + + + + + + + + + +
?nFile
0kb:file-1
1kb:file-2
\ No newline at end of file diff --git a/tests/case_utils/case_sparql_select/.check-prefixed_results.md b/tests/case_utils/case_sparql_select/.check-prefixed_results.md new file mode 100644 index 0000000..f49a624 --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-prefixed_results.md @@ -0,0 +1,4 @@ +| | ?nFile | +|----|-----------| +| 0 | kb:file-1 | +| 1 | kb:file-2 | \ No newline at end of file diff --git a/tests/case_utils/case_sparql_select/Makefile b/tests/case_utils/case_sparql_select/Makefile index 22a849b..4f36399 100644 --- a/tests/case_utils/case_sparql_select/Makefile +++ b/tests/case_utils/case_sparql_select/Makefile @@ -18,6 +18,8 @@ top_srcdir := $(shell cd ../../.. ; pwd) tests_srcdir := $(top_srcdir)/tests all: \ + prefixed_results.html \ + prefixed_results.md \ subclass-explicit-none.md \ subclass-implicit-any.md \ w3-output.html \ @@ -37,8 +39,23 @@ all: \ check: \ check-w3-html \ check-w3-markdown \ + check-prefixed_results \ check-subclass +check-prefixed_results: \ + check-prefixed_results-html \ + check-prefixed_results-md + +check-prefixed_results-html: \ + .check-prefixed_results.html \ + prefixed_results.html + diff $^ + +check-prefixed_results-md: \ + .check-prefixed_results.md \ + prefixed_results.md + diff $^ + check-subclass: \ check-subclass-explicit-none \ check-subclass-implicit-any @@ -71,6 +88,21 @@ clean: *.md \ _* +prefixed_results.%: \ + $(tests_srcdir)/.venv.done.log \ + $(top_srcdir)/case_utils/case_sparql_select/__init__.py \ + $(top_srcdir)/case_utils/ontology/__init__.py \ + $(top_srcdir)/case_utils/ontology/version_info.py \ + subclass.json \ + subclass.sparql + source $(tests_srcdir)/venv/bin/activate \ + && case_sparql_select \ + --use-prefixes \ + _$@ \ + subclass.sparql \ + subclass.json + mv _$@ $@ + subclass-explicit-none.md: \ $(tests_srcdir)/.venv.done.log \ $(top_srcdir)/case_utils/case_sparql_select/__init__.py \ From 46a9b8033df0ca752641d9664731c607b1f1d7b3 Mon Sep 17 00:00:00 2001 From: Alex Nelson Date: Tue, 6 Dec 2022 08:54:12 -0500 Subject: [PATCH 02/19] Regenerate Make-managed files Signed-off-by: Alex Nelson --- .../case_sparql_select/prefixed_results.html | 18 ++++++++++++++++++ .../case_sparql_select/prefixed_results.md | 4 ++++ 2 files changed, 22 insertions(+) create mode 100644 tests/case_utils/case_sparql_select/prefixed_results.html create mode 100644 tests/case_utils/case_sparql_select/prefixed_results.md diff --git a/tests/case_utils/case_sparql_select/prefixed_results.html b/tests/case_utils/case_sparql_select/prefixed_results.html new file mode 100644 index 0000000..10e210b --- /dev/null +++ b/tests/case_utils/case_sparql_select/prefixed_results.html @@ -0,0 +1,18 @@ + + + + + + + + + + + + + + + + + +
?nFile
0kb:file-1
1kb:file-2
\ No newline at end of file diff --git a/tests/case_utils/case_sparql_select/prefixed_results.md b/tests/case_utils/case_sparql_select/prefixed_results.md new file mode 100644 index 0000000..f49a624 --- /dev/null +++ b/tests/case_utils/case_sparql_select/prefixed_results.md @@ -0,0 +1,4 @@ +| | ?nFile | +|----|-----------| +| 0 | kb:file-1 | +| 1 | kb:file-2 | \ No newline at end of file From 66becce691871c25123ba28b78c27f32934504f8 Mon Sep 17 00:00:00 2001 From: Alex Nelson Date: Tue, 6 Dec 2022 10:19:50 -0500 Subject: [PATCH 03/19] Have case_sparql_select output end with newline No effects were observed on Make-managed files. Signed-off-by: Alex Nelson --- case_utils/case_sparql_select/__init__.py | 3 +++ .../case_sparql_select/.check-subclass-explicit-none.md | 2 +- .../case_sparql_select/.check-subclass-implicit-any.md | 2 +- tests/case_utils/case_sparql_select/.check-w3-output.html | 2 +- tests/case_utils/case_sparql_select/.check-w3-output.md | 2 +- 5 files changed, 7 insertions(+), 4 deletions(-) diff --git a/case_utils/case_sparql_select/__init__.py b/case_utils/case_sparql_select/__init__.py index b753664..92c0e6b 100644 --- a/case_utils/case_sparql_select/__init__.py +++ b/case_utils/case_sparql_select/__init__.py @@ -153,6 +153,9 @@ def main() -> None: with open(args.out_table, "w") as out_fh: out_fh.write(table_text) + if table_text[-1] != "\n": + # End file with newline. + out_fh.write("\n") if __name__ == "__main__": diff --git a/tests/case_utils/case_sparql_select/.check-subclass-explicit-none.md b/tests/case_utils/case_sparql_select/.check-subclass-explicit-none.md index 5d9d1ef..587732b 100644 --- a/tests/case_utils/case_sparql_select/.check-subclass-explicit-none.md +++ b/tests/case_utils/case_sparql_select/.check-subclass-explicit-none.md @@ -1,3 +1,3 @@ | | ?nFile | |----|------------------------------| -| 0 | http://example.org/kb/file-1 | \ No newline at end of file +| 0 | http://example.org/kb/file-1 | diff --git a/tests/case_utils/case_sparql_select/.check-subclass-implicit-any.md b/tests/case_utils/case_sparql_select/.check-subclass-implicit-any.md index 251fcf6..c94f185 100644 --- a/tests/case_utils/case_sparql_select/.check-subclass-implicit-any.md +++ b/tests/case_utils/case_sparql_select/.check-subclass-implicit-any.md @@ -1,4 +1,4 @@ | | ?nFile | |----|------------------------------| | 0 | http://example.org/kb/file-1 | -| 1 | http://example.org/kb/file-2 | \ No newline at end of file +| 1 | http://example.org/kb/file-2 | diff --git a/tests/case_utils/case_sparql_select/.check-w3-output.html b/tests/case_utils/case_sparql_select/.check-w3-output.html index aff9beb..bee5944 100644 --- a/tests/case_utils/case_sparql_select/.check-w3-output.html +++ b/tests/case_utils/case_sparql_select/.check-w3-output.html @@ -18,4 +18,4 @@ mailto:peter@example.org - \ No newline at end of file + diff --git a/tests/case_utils/case_sparql_select/.check-w3-output.md b/tests/case_utils/case_sparql_select/.check-w3-output.md index 77b05f4..af68b84 100644 --- a/tests/case_utils/case_sparql_select/.check-w3-output.md +++ b/tests/case_utils/case_sparql_select/.check-w3-output.md @@ -1,4 +1,4 @@ | | ?name | ?mbox | |----|-------------------|--------------------------| | 0 | Johnny Lee Outlaw | mailto:jlow@example.com | -| 1 | Peter Goodguy | mailto:peter@example.org | \ No newline at end of file +| 1 | Peter Goodguy | mailto:peter@example.org | From 9a2eb226939eb21222e851a6fb1149f3c61f8794 Mon Sep 17 00:00:00 2001 From: Alex Nelson Date: Tue, 6 Dec 2022 10:40:42 -0500 Subject: [PATCH 04/19] Designate new test files PRECIOUS Signed-off-by: Alex Nelson --- tests/case_utils/case_sparql_select/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/case_utils/case_sparql_select/Makefile b/tests/case_utils/case_sparql_select/Makefile index 4f36399..6431be7 100644 --- a/tests/case_utils/case_sparql_select/Makefile +++ b/tests/case_utils/case_sparql_select/Makefile @@ -33,6 +33,7 @@ all: \ check-w3-markdown .PRECIOUS: \ + prefixed_results.% \ subclass-% \ w3-output.% From b13d78a97f6c6001bd95f0ad2da0c4520418c68e Mon Sep 17 00:00:00 2001 From: Alex Nelson Date: Tue, 6 Dec 2022 10:59:37 -0500 Subject: [PATCH 05/19] Add case_sparql_select CSV and TSV output No effects were observed on Make-managed files. Signed-off-by: Alex Nelson --- case_utils/case_sparql_select/__init__.py | 18 ++++++++++++--- .../case_sparql_select/.check-w3-output.csv | 3 +++ .../case_sparql_select/.check-w3-output.tsv | 3 +++ tests/case_utils/case_sparql_select/Makefile | 22 +++++++++++++++++-- 4 files changed, 41 insertions(+), 5 deletions(-) create mode 100644 tests/case_utils/case_sparql_select/.check-w3-output.csv create mode 100644 tests/case_utils/case_sparql_select/.check-w3-output.tsv diff --git a/case_utils/case_sparql_select/__init__.py b/case_utils/case_sparql_select/__init__.py index b753664..5b9e601 100644 --- a/case_utils/case_sparql_select/__init__.py +++ b/case_utils/case_sparql_select/__init__.py @@ -33,6 +33,7 @@ import logging import os import sys +import typing import pandas as pd # type: ignore import rdflib.plugins.sparql @@ -72,7 +73,7 @@ def main() -> None: ) parser.add_argument( "out_table", - help="Expected extensions are .html for HTML tables or .md for Markdown tables.", + help="Expected extensions are .html for HTML tables, .md for Markdown tables, .csv for comma-separated values, and .tsv for tab-separated values.", ) parser.add_argument( "in_sparql", @@ -136,8 +137,19 @@ def main() -> None: df = pd.DataFrame(records, columns=variables) - table_text = None - if args.out_table.endswith(".html"): + table_text: typing.Optional[str] = None + if args.out_table.endswith(".csv") or args.out_table.endswith(".tsv"): + sep: str + if args.out_table.endswith(".csv"): + sep = "," + elif args.out_table.endswith(".tsv"): + sep = "\t" + else: + raise NotImplementedError( + "Output extension not implemented in CSV-style output." + ) + table_text = df.to_csv(sep=sep) + elif args.out_table.endswith(".html"): # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_html.html # Add CSS classes for CASE website Bootstrap support. table_text = df.to_html(classes=("table", "table-bordered", "table-condensed")) diff --git a/tests/case_utils/case_sparql_select/.check-w3-output.csv b/tests/case_utils/case_sparql_select/.check-w3-output.csv new file mode 100644 index 0000000..063e950 --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-w3-output.csv @@ -0,0 +1,3 @@ +,?name,?mbox +0,Johnny Lee Outlaw,mailto:jlow@example.com +1,Peter Goodguy,mailto:peter@example.org diff --git a/tests/case_utils/case_sparql_select/.check-w3-output.tsv b/tests/case_utils/case_sparql_select/.check-w3-output.tsv new file mode 100644 index 0000000..a4fdfca --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-w3-output.tsv @@ -0,0 +1,3 @@ + ?name ?mbox +0 Johnny Lee Outlaw mailto:jlow@example.com +1 Peter Goodguy mailto:peter@example.org diff --git a/tests/case_utils/case_sparql_select/Makefile b/tests/case_utils/case_sparql_select/Makefile index 22a849b..fe74e32 100644 --- a/tests/case_utils/case_sparql_select/Makefile +++ b/tests/case_utils/case_sparql_select/Makefile @@ -20,23 +20,29 @@ tests_srcdir := $(top_srcdir)/tests all: \ subclass-explicit-none.md \ subclass-implicit-any.md \ + w3-output.csv \ w3-output.html \ - w3-output.md + w3-output.md \ + w3-output.tsv .PHONY: \ check-subclass \ check-subclass-explicit-none \ check-subclass-implicit-any \ + check-w3-csv \ check-w3-html \ - check-w3-markdown + check-w3-markdown \ + check-w3-tsv .PRECIOUS: \ subclass-% \ w3-output.% check: \ + check-w3-csv \ check-w3-html \ check-w3-markdown \ + check-w3-tsv \ check-subclass check-subclass: \ @@ -53,6 +59,11 @@ check-subclass-implicit-any: \ subclass-implicit-any.md diff $^ +check-w3-csv: \ + .check-w3-output.csv \ + w3-output.csv + diff $^ + check-w3-html: \ .check-w3-output.html \ w3-output.html @@ -63,12 +74,19 @@ check-w3-markdown: \ w3-output.md diff $^ +check-w3-tsv: \ + .check-w3-output.tsv \ + w3-output.tsv + diff $^ + clean: @rm -rf \ __pycache__ @rm -f \ + *.csv \ *.html \ *.md \ + *.tsv \ _* subclass-explicit-none.md: \ From 894d7b171cdcadf3e52659d686ca1ecc46ee61b8 Mon Sep 17 00:00:00 2001 From: Alex Nelson Date: Tue, 6 Dec 2022 11:26:02 -0500 Subject: [PATCH 06/19] Update documentation comment Signed-off-by: Alex Nelson --- case_utils/case_sparql_select/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/case_utils/case_sparql_select/__init__.py b/case_utils/case_sparql_select/__init__.py index 5c50e78..424848c 100644 --- a/case_utils/case_sparql_select/__init__.py +++ b/case_utils/case_sparql_select/__init__.py @@ -164,7 +164,7 @@ def main() -> None: with open(args.out_table, "w") as out_fh: out_fh.write(table_text) if table_text[-1] != "\n": - # End file with newline. + # End file with newline. CSV and TSV modes end with a built-in newline. out_fh.write("\n") From 22306b9bd8437d0e714cb131fd8812ea687a8e73 Mon Sep 17 00:00:00 2001 From: Alex Nelson Date: Tue, 6 Dec 2022 13:29:03 -0500 Subject: [PATCH 07/19] Align test files with new behavior A follow-on patch will address Make-managed files. Signed-off-by: Alex Nelson --- .../case_utils/case_sparql_select/.check-prefixed_results.html | 2 +- tests/case_utils/case_sparql_select/.check-prefixed_results.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/case_utils/case_sparql_select/.check-prefixed_results.html b/tests/case_utils/case_sparql_select/.check-prefixed_results.html index 10e210b..4b41f69 100644 --- a/tests/case_utils/case_sparql_select/.check-prefixed_results.html +++ b/tests/case_utils/case_sparql_select/.check-prefixed_results.html @@ -15,4 +15,4 @@ kb:file-2 - \ No newline at end of file + diff --git a/tests/case_utils/case_sparql_select/.check-prefixed_results.md b/tests/case_utils/case_sparql_select/.check-prefixed_results.md index f49a624..f07d435 100644 --- a/tests/case_utils/case_sparql_select/.check-prefixed_results.md +++ b/tests/case_utils/case_sparql_select/.check-prefixed_results.md @@ -1,4 +1,4 @@ | | ?nFile | |----|-----------| | 0 | kb:file-1 | -| 1 | kb:file-2 | \ No newline at end of file +| 1 | kb:file-2 | From c4a9f584b2f6cc4c41b92e4c8e5df73124ad72ca Mon Sep 17 00:00:00 2001 From: Alex Nelson Date: Tue, 6 Dec 2022 13:31:04 -0500 Subject: [PATCH 08/19] Remove Make-managed files This directory had not been tracking the non-dot files. Signed-off-by: Alex Nelson --- tests/case_utils/case_sparql_select/.gitignore | 1 + .../case_sparql_select/prefixed_results.html | 18 ------------------ .../case_sparql_select/prefixed_results.md | 4 ---- 3 files changed, 1 insertion(+), 22 deletions(-) delete mode 100644 tests/case_utils/case_sparql_select/prefixed_results.html delete mode 100644 tests/case_utils/case_sparql_select/prefixed_results.md diff --git a/tests/case_utils/case_sparql_select/.gitignore b/tests/case_utils/case_sparql_select/.gitignore index 0ae4849..9d94989 100644 --- a/tests/case_utils/case_sparql_select/.gitignore +++ b/tests/case_utils/case_sparql_select/.gitignore @@ -1,2 +1,3 @@ +prefixed* subclass-*.md w3-output.* diff --git a/tests/case_utils/case_sparql_select/prefixed_results.html b/tests/case_utils/case_sparql_select/prefixed_results.html deleted file mode 100644 index 10e210b..0000000 --- a/tests/case_utils/case_sparql_select/prefixed_results.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - - - - - - - - - - - -
?nFile
0kb:file-1
1kb:file-2
\ No newline at end of file diff --git a/tests/case_utils/case_sparql_select/prefixed_results.md b/tests/case_utils/case_sparql_select/prefixed_results.md deleted file mode 100644 index f49a624..0000000 --- a/tests/case_utils/case_sparql_select/prefixed_results.md +++ /dev/null @@ -1,4 +0,0 @@ -| | ?nFile | -|----|-----------| -| 0 | kb:file-1 | -| 1 | kb:file-2 | \ No newline at end of file From 70ed1e54185e0d76875a0532aa5407421af0ea44 Mon Sep 17 00:00:00 2001 From: Alex Nelson Date: Tue, 6 Dec 2022 13:42:57 -0500 Subject: [PATCH 09/19] Run pre-commit autoupdate, pinning flake8 at 5.0.4 `flake8` 6.0.0 requires Python >= 3.8. Python 3.7 goes EOL on 2023-06-27, so we will stay with flake8 < 6.0.0 until then. The `flake8` issue was previously seen in case-prov PR 55. References: * https://github.com/casework/CASE-Implementation-PROV-O/pull/55 * https://www.python.org/downloads/ * https://peps.python.org/pep-0537/ Signed-off-by: Alex Nelson --- .pre-commit-config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c952e8e..d6274fe 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,10 +1,10 @@ repos: - repo: https://github.com/psf/black - rev: 22.3.0 + rev: 22.10.0 hooks: - id: black - repo: https://github.com/pycqa/flake8 - rev: 4.0.1 + rev: 5.0.4 hooks: - id: flake8 - repo: https://github.com/pycqa/isort From d1fc533ccfb13fb787ec7dc1860bf863fcbda591 Mon Sep 17 00:00:00 2001 From: Alex Nelson Date: Tue, 6 Dec 2022 13:53:56 -0500 Subject: [PATCH 10/19] Link documentation as with other if-elif branches Signed-off-by: Alex Nelson --- case_utils/case_sparql_select/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/case_utils/case_sparql_select/__init__.py b/case_utils/case_sparql_select/__init__.py index fc45de2..56fea8b 100644 --- a/case_utils/case_sparql_select/__init__.py +++ b/case_utils/case_sparql_select/__init__.py @@ -149,6 +149,7 @@ def main() -> None: table_text: typing.Optional[str] = None if args.out_table.endswith(".csv") or args.out_table.endswith(".tsv"): + # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_csv.html sep: str if args.out_table.endswith(".csv"): sep = "," From e9ab5607695257510bd087f83244b94b2d8ce09b Mon Sep 17 00:00:00 2001 From: Alex Nelson Date: Tue, 6 Dec 2022 15:39:51 -0500 Subject: [PATCH 11/19] Cross CSV and TSV output testing with prefix testing No effects were observed on Make-managed files. Signed-off-by: Alex Nelson --- .../.check-prefixed_results.csv | 3 +++ .../.check-prefixed_results.tsv | 3 +++ tests/case_utils/case_sparql_select/Makefile | 16 +++++++++++++++- 3 files changed, 21 insertions(+), 1 deletion(-) create mode 100644 tests/case_utils/case_sparql_select/.check-prefixed_results.csv create mode 100644 tests/case_utils/case_sparql_select/.check-prefixed_results.tsv diff --git a/tests/case_utils/case_sparql_select/.check-prefixed_results.csv b/tests/case_utils/case_sparql_select/.check-prefixed_results.csv new file mode 100644 index 0000000..60d4b78 --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-prefixed_results.csv @@ -0,0 +1,3 @@ +,?nFile +0,kb:file-1 +1,kb:file-2 diff --git a/tests/case_utils/case_sparql_select/.check-prefixed_results.tsv b/tests/case_utils/case_sparql_select/.check-prefixed_results.tsv new file mode 100644 index 0000000..9dac11d --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-prefixed_results.tsv @@ -0,0 +1,3 @@ + ?nFile +0 kb:file-1 +1 kb:file-2 diff --git a/tests/case_utils/case_sparql_select/Makefile b/tests/case_utils/case_sparql_select/Makefile index 23d082d..68f11ec 100644 --- a/tests/case_utils/case_sparql_select/Makefile +++ b/tests/case_utils/case_sparql_select/Makefile @@ -18,8 +18,10 @@ top_srcdir := $(shell cd ../../.. ; pwd) tests_srcdir := $(top_srcdir)/tests all: \ + prefixed_results.csv \ prefixed_results.html \ prefixed_results.md \ + prefixed_results.tsv \ subclass-explicit-none.md \ subclass-implicit-any.md \ w3-output.csv \ @@ -50,8 +52,15 @@ check: \ check-subclass check-prefixed_results: \ + check-prefixed_results-csv \ check-prefixed_results-html \ - check-prefixed_results-md + check-prefixed_results-md \ + check-prefixed_results-tsv + +check-prefixed_results-csv: \ + .check-prefixed_results.csv \ + prefixed_results.csv + diff $^ check-prefixed_results-html: \ .check-prefixed_results.html \ @@ -63,6 +72,11 @@ check-prefixed_results-md: \ prefixed_results.md diff $^ +check-prefixed_results-tsv: \ + .check-prefixed_results.tsv \ + prefixed_results.tsv + diff $^ + check-subclass: \ check-subclass-explicit-none \ check-subclass-implicit-any From 98d4c6b7bf0081b18ad8e135167efcec5e35235a Mon Sep 17 00:00:00 2001 From: Alex Nelson Date: Wed, 7 Dec 2022 15:42:43 -0500 Subject: [PATCH 12/19] Refactor case_sparql_select code This patch is code-motion to give function names to chunks of `case_sparql_select:main`. Some upcoming patch series are going to add features that, when taken together, introduce non-trivial parameter-value cross-dependencies. Moving functionality to functions enables combinatoric testing in a `pytest` space, rather than resorting to copying, pasting, and tweaking many Makefile lines. A future patch series will add the `pytest` script. Signed-off-by: Alex Nelson --- case_utils/case_sparql_select/__init__.py | 195 ++++++++++++++-------- 1 file changed, 127 insertions(+), 68 deletions(-) diff --git a/case_utils/case_sparql_select/__init__.py b/case_utils/case_sparql_select/__init__.py index eaa98cb..8b25f30 100644 --- a/case_utils/case_sparql_select/__init__.py +++ b/case_utils/case_sparql_select/__init__.py @@ -49,74 +49,44 @@ _logger = logging.getLogger(os.path.basename(__file__)) -def main() -> None: - parser = argparse.ArgumentParser() - - # Configure debug logging before running parse_args, because there could be an error raised before the construction of the argument parser. - logging.basicConfig( - level=logging.DEBUG - if ("--debug" in sys.argv or "-d" in sys.argv) - else logging.INFO - ) - - parser.add_argument("-d", "--debug", action="store_true") - parser.add_argument( - "--built-version", - choices=tuple(built_version_choices_list), - default="case-" + CURRENT_CASE_VERSION, - help="Ontology version to use to supplement query, such as for subclass querying. Does not require networking to use. Default is most recent CASE release. Passing 'none' will mean no pre-built CASE ontology versions accompanying this tool will be included in the analysis.", - ) - parser.add_argument( - "--disallow-empty-results", - action="store_true", - help="Raise error if no results are returned for query.", - ) - parser.add_argument( - "--use-prefixes", - action="store_true", - help="Abbreviate node IDs according to graph's encoded prefixes. (This will use prefixes in the graph, not the query.)", - ) - parser.add_argument( - "out_table", - help="Expected extensions are .html for HTML tables, .md for Markdown tables, .csv for comma-separated values, and .tsv for tab-separated values.", - ) - parser.add_argument( - "in_sparql", - help="File containing a SPARQL SELECT query. Note that prefixes not mapped with a PREFIX statement will be mapped according to their first occurrence among input graphs.", - ) - parser.add_argument("in_graph", nargs="+") - args = parser.parse_args() - - graph = rdflib.Graph() - for in_graph_filename in args.in_graph: - graph.parse(in_graph_filename) - - # Inherit prefixes defined in input context dictionary. - nsdict = {k: v for (k, v) in graph.namespace_manager.namespaces()} - - select_query_text = None - with open(args.in_sparql, "r") as in_fh: - select_query_text = in_fh.read().strip() - _logger.debug("select_query_text = %r." % select_query_text) - - if "subClassOf" in select_query_text: - case_utils.ontology.load_subclass_hierarchy( - graph, built_version=args.built_version - ) - +def query_text_to_variables(select_query_text: str) -> typing.List[str]: # Build columns list from SELECT line. select_query_text_lines = select_query_text.split("\n") select_line = [ line for line in select_query_text_lines if line.startswith("SELECT ") ][0] variables = select_line.replace(" DISTINCT", "").replace("SELECT ", "").split(" ") + return variables + + +def graph_and_query_to_data_frame( + graph: rdflib.Graph, + select_query_text: str, + *args: typing.Any, + built_version: str = "case-" + CURRENT_CASE_VERSION, + disallow_empty_results: bool = False, + use_prefixes: bool = False, + **kwargs: typing.Any, +) -> pd.DataFrame: + # Inherit prefixes defined in input context dictionary. + nsdict = {k: v for (k, v) in graph.namespace_manager.namespaces()} + + # Avoid side-effects on input parameter. + if "subClassOf" in select_query_text: + _graph = rdflib.Graph() + _graph += graph + case_utils.ontology.load_subclass_hierarchy(_graph, built_version=built_version) + else: + _graph = graph + + variables = query_text_to_variables(select_query_text) tally = 0 records = [] select_query_object = rdflib.plugins.sparql.processor.prepareQuery( select_query_text, initNs=nsdict ) - for (row_no, row) in enumerate(graph.query(select_query_object)): + for (row_no, row) in enumerate(_graph.query(select_query_object)): tally = row_no + 1 record = [] for (column_no, column) in enumerate(row): @@ -131,7 +101,7 @@ def main() -> None: # .decode() is because hexlify returns bytes. column_value = binascii.hexlify(column.toPython()).decode() elif isinstance(column, rdflib.URIRef): - if args.use_prefixes: + if use_prefixes: column_value = graph.namespace_manager.qname(column.toPython()) else: column_value = column.toPython() @@ -141,39 +111,128 @@ def main() -> None: _logger.debug("row[0]column[%d] = %r." % (column_no, column_value)) record.append(column_value) records.append(record) + if tally == 0: - if args.disallow_empty_results: + if disallow_empty_results: raise ValueError("Failed to return any results.") df = pd.DataFrame(records, columns=variables) + return df + +def data_frame_to_table_text( + df: pd.DataFrame, + *args: typing.Any, + output_mode: str, + **kwargs: typing.Any, +) -> str: table_text: typing.Optional[str] = None - if args.out_table.endswith(".csv") or args.out_table.endswith(".tsv"): - # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_csv.html + + if output_mode in {"csv", "tsv"}: sep: str - if args.out_table.endswith(".csv"): + if output_mode == "csv": sep = "," - elif args.out_table.endswith(".tsv"): + elif output_mode == "tsv": sep = "\t" else: raise NotImplementedError( "Output extension not implemented in CSV-style output." ) table_text = df.to_csv(sep=sep) - elif args.out_table.endswith(".html"): + elif output_mode == "html": # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_html.html # Add CSS classes for CASE website Bootstrap support. table_text = df.to_html(classes=("table", "table-bordered", "table-condensed")) - elif args.out_table.endswith(".md"): + elif output_mode == "md": # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_markdown.html # https://pypi.org/project/tabulate/ # Assume Github-flavored Markdown. + table_text = df.to_markdown(tablefmt="github") - if table_text is None: - raise NotImplementedError( - "Unsupported output extension for output filename %r.", args.out_table - ) + else: + if table_text is None: + raise NotImplementedError("Unimplemented output mode: %r." % output_mode) + assert table_text is not None + + return table_text + + +def main() -> None: + parser = argparse.ArgumentParser() + + # Configure debug logging before running parse_args, because there could be an error raised before the construction of the argument parser. + logging.basicConfig( + level=logging.DEBUG + if ("--debug" in sys.argv or "-d" in sys.argv) + else logging.INFO + ) + + parser.add_argument("-d", "--debug", action="store_true") + parser.add_argument( + "--built-version", + choices=tuple(built_version_choices_list), + default="case-" + CURRENT_CASE_VERSION, + help="Ontology version to use to supplement query, such as for subclass querying. Does not require networking to use. Default is most recent CASE release. Passing 'none' will mean no pre-built CASE ontology versions accompanying this tool will be included in the analysis.", + ) + parser.add_argument( + "--disallow-empty-results", + action="store_true", + help="Raise error if no results are returned for query.", + ) + parser.add_argument( + "--use-prefixes", + action="store_true", + help="Abbreviate node IDs according to graph's encoded prefixes. (This will use prefixes in the graph, not the query.)", + ) + parser.add_argument( + "out_table", + help="Expected extensions are .html for HTML tables, .md for Markdown tables, .csv for comma-separated values, and .tsv for tab-separated values.", + ) + parser.add_argument( + "in_sparql", + help="File containing a SPARQL SELECT query. Note that prefixes not mapped with a PREFIX statement will be mapped according to their first occurrence among input graphs.", + ) + + parser.add_argument("in_graph", nargs="+") + args = parser.parse_args() + output_mode: str + if args.out_table.endswith(".csv"): + output_mode = "csv" + elif args.out_table.endswith(".html"): + output_mode = "html" + elif args.out_table.endswith(".json"): + output_mode = "json" + elif args.out_table.endswith(".md"): + output_mode = "md" + elif args.out_table.endswith(".tsv"): + output_mode = "tsv" + else: + raise NotImplementedError("Output file extension not implemented.") + + graph = rdflib.Graph() + for in_graph_filename in args.in_graph: + graph.parse(in_graph_filename) + + select_query_text: typing.Optional[str] = None + with open(args.in_sparql, "r") as in_fh: + select_query_text = in_fh.read().strip() + if select_query_text is None: + raise ValueError("Failed to load query.") + _logger.debug("select_query_text = %r." % select_query_text) + + df = graph_and_query_to_data_frame( + graph, + select_query_text, + built_version=args.built_version, + disallow_empty_results=args.disallow_empty_results is True, + use_prefixes=args.use_prefixes is True, + ) + + table_text = data_frame_to_table_text( + df, + output_mode=output_mode, + ) with open(args.out_table, "w") as out_fh: out_fh.write(table_text) if table_text[-1] != "\n": From e69959272993d009bc9983724a7b8a162359f8c3 Mon Sep 17 00:00:00 2001 From: Alex Nelson Date: Wed, 7 Dec 2022 16:36:32 -0500 Subject: [PATCH 13/19] Add case_sparql_select CSV and TSV output No effects were observed on Make-managed files. Signed-off-by: Alex Nelson --- case_utils/case_sparql_select/__init__.py | 23 ++++++- .../.check-prefixed_results-indented.json | 6 ++ .../.check-prefixed_results.json | 1 + .../.check-w3-output-indented.json | 10 +++ .../case_sparql_select/.check-w3-output.json | 1 + .../case_utils/case_sparql_select/.gitignore | 2 +- tests/case_utils/case_sparql_select/Makefile | 68 +++++++++++++++++++ 7 files changed, 109 insertions(+), 2 deletions(-) create mode 100644 tests/case_utils/case_sparql_select/.check-prefixed_results-indented.json create mode 100644 tests/case_utils/case_sparql_select/.check-prefixed_results.json create mode 100644 tests/case_utils/case_sparql_select/.check-w3-output-indented.json create mode 100644 tests/case_utils/case_sparql_select/.check-w3-output.json diff --git a/case_utils/case_sparql_select/__init__.py b/case_utils/case_sparql_select/__init__.py index 8b25f30..16d3c67 100644 --- a/case_utils/case_sparql_select/__init__.py +++ b/case_utils/case_sparql_select/__init__.py @@ -123,6 +123,8 @@ def graph_and_query_to_data_frame( def data_frame_to_table_text( df: pd.DataFrame, *args: typing.Any, + json_indent: typing.Optional[int] = None, + json_orient: str, output_mode: str, **kwargs: typing.Any, ) -> str: @@ -143,6 +145,12 @@ def data_frame_to_table_text( # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_html.html # Add CSS classes for CASE website Bootstrap support. table_text = df.to_html(classes=("table", "table-bordered", "table-condensed")) + elif output_mode == "json": + # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_json.html + + table_text = df.to_json( + indent=json_indent, orient=json_orient, date_format="iso" + ) elif output_mode == "md": # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_markdown.html # https://pypi.org/project/tabulate/ @@ -179,6 +187,17 @@ def main() -> None: action="store_true", help="Raise error if no results are returned for query.", ) + parser.add_argument( + "--json-indent", + type=int, + help="Number of whitespace characters to use for indentation. Only applicable for JSON output.", + ) + parser.add_argument( + "--json-orient", + default="columns", + choices=("columns", "index", "records", "split", "table", "values"), + help="Orientation to use for Pandas DataFrame JSON output. Only applicable for JSON output.", + ) parser.add_argument( "--use-prefixes", action="store_true", @@ -186,7 +205,7 @@ def main() -> None: ) parser.add_argument( "out_table", - help="Expected extensions are .html for HTML tables, .md for Markdown tables, .csv for comma-separated values, and .tsv for tab-separated values.", + help="Expected extensions are .html for HTML tables, .json for JSON tables, .md for Markdown tables, .csv for comma-separated values, and .tsv for tab-separated values. Note that JSON is a Pandas output JSON format (chosen by '--json-orient'), and not JSON-LD.", ) parser.add_argument( "in_sparql", @@ -231,6 +250,8 @@ def main() -> None: table_text = data_frame_to_table_text( df, + json_indent=args.json_indent, + json_orient=args.json_orient, output_mode=output_mode, ) with open(args.out_table, "w") as out_fh: diff --git a/tests/case_utils/case_sparql_select/.check-prefixed_results-indented.json b/tests/case_utils/case_sparql_select/.check-prefixed_results-indented.json new file mode 100644 index 0000000..c356eca --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-prefixed_results-indented.json @@ -0,0 +1,6 @@ +{ + "?nFile":{ + "0":"kb:file-1", + "1":"kb:file-2" + } +} diff --git a/tests/case_utils/case_sparql_select/.check-prefixed_results.json b/tests/case_utils/case_sparql_select/.check-prefixed_results.json new file mode 100644 index 0000000..63a9d7f --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-prefixed_results.json @@ -0,0 +1 @@ +{"?nFile":{"0":"kb:file-1","1":"kb:file-2"}} diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-indented.json b/tests/case_utils/case_sparql_select/.check-w3-output-indented.json new file mode 100644 index 0000000..8f426c6 --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-w3-output-indented.json @@ -0,0 +1,10 @@ +{ + "?name":{ + "0":"Johnny Lee Outlaw", + "1":"Peter Goodguy" + }, + "?mbox":{ + "0":"mailto:jlow@example.com", + "1":"mailto:peter@example.org" + } +} diff --git a/tests/case_utils/case_sparql_select/.check-w3-output.json b/tests/case_utils/case_sparql_select/.check-w3-output.json new file mode 100644 index 0000000..840035b --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-w3-output.json @@ -0,0 +1 @@ +{"?name":{"0":"Johnny Lee Outlaw","1":"Peter Goodguy"},"?mbox":{"0":"mailto:jlow@example.com","1":"mailto:peter@example.org"}} diff --git a/tests/case_utils/case_sparql_select/.gitignore b/tests/case_utils/case_sparql_select/.gitignore index 9d94989..122f73b 100644 --- a/tests/case_utils/case_sparql_select/.gitignore +++ b/tests/case_utils/case_sparql_select/.gitignore @@ -1,3 +1,3 @@ prefixed* subclass-*.md -w3-output.* +w3-output* diff --git a/tests/case_utils/case_sparql_select/Makefile b/tests/case_utils/case_sparql_select/Makefile index 68f11ec..70bb0ec 100644 --- a/tests/case_utils/case_sparql_select/Makefile +++ b/tests/case_utils/case_sparql_select/Makefile @@ -20,12 +20,16 @@ tests_srcdir := $(top_srcdir)/tests all: \ prefixed_results.csv \ prefixed_results.html \ + prefixed_results.json \ + prefixed_results-indented.json \ prefixed_results.md \ prefixed_results.tsv \ subclass-explicit-none.md \ subclass-implicit-any.md \ w3-output.csv \ w3-output.html \ + w3-output.json \ + w3-output-indented.json \ w3-output.md \ w3-output.tsv @@ -35,6 +39,8 @@ all: \ check-subclass-implicit-any \ check-w3-csv \ check-w3-html \ + check-w3-json \ + check-w3-json-indented \ check-w3-markdown \ check-w3-tsv @@ -46,6 +52,7 @@ all: \ check: \ check-w3-csv \ check-w3-html \ + check-w3-json \ check-w3-markdown \ check-w3-tsv \ check-prefixed_results \ @@ -54,6 +61,7 @@ check: \ check-prefixed_results: \ check-prefixed_results-csv \ check-prefixed_results-html \ + check-prefixed_results-json \ check-prefixed_results-md \ check-prefixed_results-tsv @@ -67,6 +75,19 @@ check-prefixed_results-html: \ prefixed_results.html diff $^ +check-prefixed_results-json: \ + check-prefixed_results-json-indented \ + .check-prefixed_results.json \ + prefixed_results.json + diff \ + .check-prefixed_results.json \ + prefixed_results.json + +check-prefixed_results-json-indented: \ + .check-prefixed_results-indented.json \ + prefixed_results-indented.json + diff $^ + check-prefixed_results-md: \ .check-prefixed_results.md \ prefixed_results.md @@ -101,6 +122,19 @@ check-w3-html: \ w3-output.html diff $^ +check-w3-json: \ + .check-w3-output.json \ + check-w3-json-indented \ + w3-output.json + diff \ + .check-w3-output.json \ + w3-output.json + +check-w3-json-indented: \ + .check-w3-output-indented.json \ + w3-output-indented.json + diff $^ + check-w3-markdown: \ .check-w3-output.md \ w3-output.md @@ -119,6 +153,7 @@ clean: *.html \ *.md \ *.tsv \ + *output*.json \ _* prefixed_results.%: \ @@ -136,6 +171,22 @@ prefixed_results.%: \ subclass.json mv _$@ $@ +prefixed_results-indented.json: \ + $(tests_srcdir)/.venv.done.log \ + $(top_srcdir)/case_utils/case_sparql_select/__init__.py \ + $(top_srcdir)/case_utils/ontology/__init__.py \ + $(top_srcdir)/case_utils/ontology/version_info.py \ + subclass.json \ + subclass.sparql + source $(tests_srcdir)/venv/bin/activate \ + && case_sparql_select \ + --json-indent 4 \ + --use-prefixes \ + _$@ \ + subclass.sparql \ + subclass.json + mv _$@ $@ + subclass-explicit-none.md: \ $(tests_srcdir)/.venv.done.log \ $(top_srcdir)/case_utils/case_sparql_select/__init__.py \ @@ -180,3 +231,20 @@ w3-output.%: \ w3-input-2.ttl \ w3-input-3.json mv _$@ $@ + +w3-output-indented.json: \ + $(tests_srcdir)/.venv.done.log \ + $(top_srcdir)/case_utils/case_sparql_select/__init__.py \ + $(top_srcdir)/case_utils/ontology/__init__.py \ + $(top_srcdir)/case_utils/ontology/version_info.py \ + w3-input-1.sparql \ + w3-input-2.ttl \ + w3-input-3.json + source $(tests_srcdir)/venv/bin/activate \ + && case_sparql_select \ + --json-indent 4 \ + _$@ \ + w3-input-1.sparql \ + w3-input-2.ttl \ + w3-input-3.json + mv _$@ $@ From b1bcad32ec2aa8939f38272d3252e8f642706f43 Mon Sep 17 00:00:00 2001 From: Alex Nelson Date: Wed, 7 Dec 2022 17:13:45 -0500 Subject: [PATCH 14/19] Add options to case_sparql_select to disable headers and index numbers The test pattern for these features encourages an enumerative approach, so a `pytest` script exercising the freshly broken-out functions is now included. No effects were observed on Make-managed files. Signed-off-by: Alex Nelson --- case_utils/case_sparql_select/__init__.py | 70 ++++++++++++++++- tests/case_utils/Makefile | 1 + ...check-w3-output-with_header-with_index.csv | 3 + ...heck-w3-output-with_header-with_index.html | 21 ++++++ ....check-w3-output-with_header-with_index.md | 4 + ...check-w3-output-with_header-with_index.tsv | 3 + ...ck-w3-output-with_header-without_index.csv | 3 + ...k-w3-output-with_header-without_index.html | 18 +++++ ...eck-w3-output-with_header-without_index.md | 4 + ...ck-w3-output-with_header-without_index.tsv | 3 + ...ck-w3-output-without_header-with_index.csv | 2 + ...k-w3-output-without_header-with_index.html | 14 ++++ ...eck-w3-output-without_header-with_index.md | 3 + ...ck-w3-output-without_header-with_index.tsv | 2 + ...w3-output-without_header-without_index.csv | 2 + ...3-output-without_header-without_index.html | 12 +++ ...-w3-output-without_header-without_index.md | 3 + ...w3-output-without_header-without_index.tsv | 2 + tests/case_utils/case_sparql_select/Makefile | 3 + .../test_data_frame_to_table_text_json.py | 75 +++++++++++++++++++ 20 files changed, 245 insertions(+), 3 deletions(-) create mode 100644 tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index.csv create mode 100644 tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index.html create mode 100644 tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index.md create mode 100644 tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index.tsv create mode 100644 tests/case_utils/case_sparql_select/.check-w3-output-with_header-without_index.csv create mode 100644 tests/case_utils/case_sparql_select/.check-w3-output-with_header-without_index.html create mode 100644 tests/case_utils/case_sparql_select/.check-w3-output-with_header-without_index.md create mode 100644 tests/case_utils/case_sparql_select/.check-w3-output-with_header-without_index.tsv create mode 100644 tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index.csv create mode 100644 tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index.html create mode 100644 tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index.md create mode 100644 tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index.tsv create mode 100644 tests/case_utils/case_sparql_select/.check-w3-output-without_header-without_index.csv create mode 100644 tests/case_utils/case_sparql_select/.check-w3-output-without_header-without_index.html create mode 100644 tests/case_utils/case_sparql_select/.check-w3-output-without_header-without_index.md create mode 100644 tests/case_utils/case_sparql_select/.check-w3-output-without_header-without_index.tsv create mode 100644 tests/case_utils/case_sparql_select/test_data_frame_to_table_text_json.py diff --git a/case_utils/case_sparql_select/__init__.py b/case_utils/case_sparql_select/__init__.py index 8b25f30..2d8581c 100644 --- a/case_utils/case_sparql_select/__init__.py +++ b/case_utils/case_sparql_select/__init__.py @@ -124,10 +124,25 @@ def data_frame_to_table_text( df: pd.DataFrame, *args: typing.Any, output_mode: str, + use_header: bool, + use_index: bool, **kwargs: typing.Any, ) -> str: table_text: typing.Optional[str] = None + # Set up kwargs dicts. One kwarg behaves slightly differently for Markdown vs. other formats. + general_kwargs: typing.Dict[str, typing.Any] = dict() + md_kwargs: typing.Dict[str, typing.Any] = dict() + + # Note some output modes will drop 'header' from general_kwargs, due to alternate support or lack of support. + if use_header: + general_kwargs["header"] = True + else: + general_kwargs["header"] = False + md_kwargs["headers"] = tuple() + + general_kwargs["index"] = use_index + if output_mode in {"csv", "tsv"}: sep: str if output_mode == "csv": @@ -138,17 +153,22 @@ def data_frame_to_table_text( raise NotImplementedError( "Output extension not implemented in CSV-style output." ) - table_text = df.to_csv(sep=sep) + table_text = df.to_csv(sep=sep, **general_kwargs) elif output_mode == "html": # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_html.html # Add CSS classes for CASE website Bootstrap support. - table_text = df.to_html(classes=("table", "table-bordered", "table-condensed")) + table_text = df.to_html( + classes=("table", "table-bordered", "table-condensed"), **general_kwargs + ) elif output_mode == "md": # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_markdown.html # https://pypi.org/project/tabulate/ # Assume Github-flavored Markdown. - table_text = df.to_markdown(tablefmt="github") + # Drop unsupported kwarg. + del general_kwargs["header"] + + table_text = df.to_markdown(tablefmt="github", **general_kwargs, **md_kwargs) else: if table_text is None: raise NotImplementedError("Unimplemented output mode: %r." % output_mode) @@ -193,6 +213,30 @@ def main() -> None: help="File containing a SPARQL SELECT query. Note that prefixes not mapped with a PREFIX statement will be mapped according to their first occurrence among input graphs.", ) + parser_header_group = parser.add_mutually_exclusive_group(required=False) + parser_header_group.add_argument( + "--header", + action="store_true", + help="Print column labels. This is the default behavior.", + ) + parser_header_group.add_argument( + "--no-header", + action="store_true", + help="Do not print column labels.", + ) + + parser_index_group = parser.add_mutually_exclusive_group(required=False) + parser_index_group.add_argument( + "--index", + action="store_true", + help="Print index (auto-incrementing row labels as left untitled column). This is the default behavior.", + ) + parser_index_group.add_argument( + "--no-index", + action="store_true", + help="Do not print index.", + ) + parser.add_argument("in_graph", nargs="+") args = parser.parse_args() @@ -221,6 +265,24 @@ def main() -> None: raise ValueError("Failed to load query.") _logger.debug("select_query_text = %r." % select_query_text) + # Process --header and --no-header. + use_header: bool + if args.header is True: + use_header = True + if args.no_header is True: + use_header = False + else: + use_header = True + + # Process --index and --no-index. + use_index: bool + if args.index is True: + use_index = True + if args.no_index is True: + use_index = False + else: + use_index = True + df = graph_and_query_to_data_frame( graph, select_query_text, @@ -232,6 +294,8 @@ def main() -> None: table_text = data_frame_to_table_text( df, output_mode=output_mode, + use_header=use_header, + use_index=use_index, ) with open(args.out_table, "w") as out_fh: out_fh.write(table_text) diff --git a/tests/case_utils/Makefile b/tests/case_utils/Makefile index e77c927..3c65a40 100644 --- a/tests/case_utils/Makefile +++ b/tests/case_utils/Makefile @@ -65,6 +65,7 @@ check: \ && pytest \ --ignore case_file \ --ignore case_sparql_construct \ + --ignore case_sparql_select \ --ignore case_validate \ --log-level=DEBUG diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index.csv b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index.csv new file mode 100644 index 0000000..063e950 --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index.csv @@ -0,0 +1,3 @@ +,?name,?mbox +0,Johnny Lee Outlaw,mailto:jlow@example.com +1,Peter Goodguy,mailto:peter@example.org diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index.html b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index.html new file mode 100644 index 0000000..bee5944 --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index.html @@ -0,0 +1,21 @@ + + + + + + + + + + + + + + + + + + + + +
?name?mbox
0Johnny Lee Outlawmailto:jlow@example.com
1Peter Goodguymailto:peter@example.org
diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index.md b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index.md new file mode 100644 index 0000000..af68b84 --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index.md @@ -0,0 +1,4 @@ +| | ?name | ?mbox | +|----|-------------------|--------------------------| +| 0 | Johnny Lee Outlaw | mailto:jlow@example.com | +| 1 | Peter Goodguy | mailto:peter@example.org | diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index.tsv b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index.tsv new file mode 100644 index 0000000..a4fdfca --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index.tsv @@ -0,0 +1,3 @@ + ?name ?mbox +0 Johnny Lee Outlaw mailto:jlow@example.com +1 Peter Goodguy mailto:peter@example.org diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-with_header-without_index.csv b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-without_index.csv new file mode 100644 index 0000000..6bd60fb --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-without_index.csv @@ -0,0 +1,3 @@ +?name,?mbox +Johnny Lee Outlaw,mailto:jlow@example.com +Peter Goodguy,mailto:peter@example.org diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-with_header-without_index.html b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-without_index.html new file mode 100644 index 0000000..041fd3b --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-without_index.html @@ -0,0 +1,18 @@ + + + + + + + + + + + + + + + + + +
?name?mbox
Johnny Lee Outlawmailto:jlow@example.com
Peter Goodguymailto:peter@example.org
diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-with_header-without_index.md b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-without_index.md new file mode 100644 index 0000000..3aa8a01 --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-without_index.md @@ -0,0 +1,4 @@ +| ?name | ?mbox | +|-------------------|--------------------------| +| Johnny Lee Outlaw | mailto:jlow@example.com | +| Peter Goodguy | mailto:peter@example.org | diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-with_header-without_index.tsv b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-without_index.tsv new file mode 100644 index 0000000..dd1e81d --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-without_index.tsv @@ -0,0 +1,3 @@ +?name ?mbox +Johnny Lee Outlaw mailto:jlow@example.com +Peter Goodguy mailto:peter@example.org diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index.csv b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index.csv new file mode 100644 index 0000000..7933d39 --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index.csv @@ -0,0 +1,2 @@ +0,Johnny Lee Outlaw,mailto:jlow@example.com +1,Peter Goodguy,mailto:peter@example.org diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index.html b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index.html new file mode 100644 index 0000000..b6a842b --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index.html @@ -0,0 +1,14 @@ + + + + + + + + + + + + + +
0Johnny Lee Outlawmailto:jlow@example.com
1Peter Goodguymailto:peter@example.org
diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index.md b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index.md new file mode 100644 index 0000000..c5ee8c8 --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index.md @@ -0,0 +1,3 @@ +|---|-------------------|--------------------------| +| 0 | Johnny Lee Outlaw | mailto:jlow@example.com | +| 1 | Peter Goodguy | mailto:peter@example.org | diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index.tsv b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index.tsv new file mode 100644 index 0000000..992efe2 --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index.tsv @@ -0,0 +1,2 @@ +0 Johnny Lee Outlaw mailto:jlow@example.com +1 Peter Goodguy mailto:peter@example.org diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-without_header-without_index.csv b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-without_index.csv new file mode 100644 index 0000000..a4c2c82 --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-without_index.csv @@ -0,0 +1,2 @@ +Johnny Lee Outlaw,mailto:jlow@example.com +Peter Goodguy,mailto:peter@example.org diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-without_header-without_index.html b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-without_index.html new file mode 100644 index 0000000..6dbc7c3 --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-without_index.html @@ -0,0 +1,12 @@ + + + + + + + + + + + +
Johnny Lee Outlawmailto:jlow@example.com
Peter Goodguymailto:peter@example.org
diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-without_header-without_index.md b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-without_index.md new file mode 100644 index 0000000..6ad505c --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-without_index.md @@ -0,0 +1,3 @@ +|-------------------|--------------------------| +| Johnny Lee Outlaw | mailto:jlow@example.com | +| Peter Goodguy | mailto:peter@example.org | diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-without_header-without_index.tsv b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-without_index.tsv new file mode 100644 index 0000000..833da47 --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-without_index.tsv @@ -0,0 +1,2 @@ +Johnny Lee Outlaw mailto:jlow@example.com +Peter Goodguy mailto:peter@example.org diff --git a/tests/case_utils/case_sparql_select/Makefile b/tests/case_utils/case_sparql_select/Makefile index 68f11ec..0523c8a 100644 --- a/tests/case_utils/case_sparql_select/Makefile +++ b/tests/case_utils/case_sparql_select/Makefile @@ -50,6 +50,9 @@ check: \ check-w3-tsv \ check-prefixed_results \ check-subclass + source $(tests_srcdir)/venv/bin/activate \ + && pytest \ + --log-level=DEBUG check-prefixed_results: \ check-prefixed_results-csv \ diff --git a/tests/case_utils/case_sparql_select/test_data_frame_to_table_text_json.py b/tests/case_utils/case_sparql_select/test_data_frame_to_table_text_json.py new file mode 100644 index 0000000..00d20c3 --- /dev/null +++ b/tests/case_utils/case_sparql_select/test_data_frame_to_table_text_json.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python3 + +# This software was developed at the National Institute of Standards +# and Technology by employees of the Federal Government in the course +# of their official duties. Pursuant to title 17 Section 105 of the +# United States Code this software is not subject to copyright +# protection and is in the public domain. NIST assumes no +# responsibility whatsoever for its use by other parties, and makes +# no guarantees, expressed or implied, about its quality, +# reliability, or any other characteristic. +# +# We would appreciate acknowledgement if the software is used. + +import pathlib +import typing + +import pytest +import rdflib + +import case_utils.case_sparql_select + +SRCDIR = pathlib.Path(__file__).parent + +GRAPH = rdflib.Graph() +GRAPH.parse(str(SRCDIR / "w3-input-2.ttl")) +GRAPH.parse(str(SRCDIR / "w3-input-3.json")) +assert len(GRAPH) > 0 + +SELECT_QUERY_TEXT: typing.Optional[str] = None +with (SRCDIR / "w3-input-1.sparql").open("r") as _fh: + SELECT_QUERY_TEXT = _fh.read().strip() +assert SELECT_QUERY_TEXT is not None + +DATA_FRAME = case_utils.case_sparql_select.graph_and_query_to_data_frame( + GRAPH, SELECT_QUERY_TEXT +) + + +def make_data_frame_to_json_table_text_parameters() -> typing.Iterator[ + typing.Tuple[str, bool, bool] +]: + for use_header in [False, True]: + for use_index in [False, True]: + for output_mode in ["csv", "html", "md", "tsv"]: + yield (output_mode, use_header, use_index) + + +@pytest.mark.parametrize( + "output_mode, use_header, use_index", + make_data_frame_to_json_table_text_parameters(), +) +def test_data_frame_to_table_text_json( + output_mode: str, + use_header: bool, + use_index: bool, +) -> None: + table_text = case_utils.case_sparql_select.data_frame_to_table_text( + DATA_FRAME, + output_mode=output_mode, + use_header=use_header, + use_index=use_index, + ) + + output_filename_template = ".check-w3-output-%s_header-%s_index.%s" + header_part = "with" if use_header else "without" + index_part = "with" if use_index else "without" + output_filename = output_filename_template % ( + header_part, + index_part, + output_mode, + ) + with (SRCDIR / output_filename).open("w") as out_fh: + out_fh.write(table_text) + if table_text[-1] != "\n": + out_fh.write("\n") From ed44477f1ae35747b963d40b4ab61087273ac0cc Mon Sep 17 00:00:00 2001 From: Alex Nelson Date: Wed, 7 Dec 2022 17:26:03 -0500 Subject: [PATCH 15/19] Merge JSON output and --no-(header,index) flag branches This patch addresses issues with some parameter-values being compatible with some of the DataFrame table rendering functions, but not with certain `orient` parameters of the JSON rendering function. The incompatible pairings are now caught at parameter-parsing time. Signed-off-by: Alex Nelson --- case_utils/case_sparql_select/__init__.py | 16 +++++++-- ...with_header-with_index-orient-columns.json | 1 + ...t-with_header-with_index-orient-index.json | 1 + ...with_header-with_index-orient-records.json | 1 + ...t-with_header-with_index-orient-split.json | 1 + ...t-with_header-with_index-orient-table.json | 1 + ...-with_header-with_index-orient-values.json | 1 + ...ith_header-without_index-orient-split.json | 1 + ...ith_header-without_index-orient-table.json | 1 + ...hout_header-with_index-orient-columns.json | 1 + ...ithout_header-with_index-orient-index.json | 1 + ...hout_header-with_index-orient-records.json | 1 + ...ithout_header-with_index-orient-split.json | 1 + ...ithout_header-with_index-orient-table.json | 1 + ...thout_header-with_index-orient-values.json | 1 + ...out_header-without_index-orient-split.json | 1 + ...out_header-without_index-orient-table.json | 1 + .../test_data_frame_to_table_text_json.py | 33 ++++++++++++++++--- 18 files changed, 58 insertions(+), 7 deletions(-) create mode 100644 tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index-orient-columns.json create mode 100644 tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index-orient-index.json create mode 100644 tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index-orient-records.json create mode 100644 tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index-orient-split.json create mode 100644 tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index-orient-table.json create mode 100644 tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index-orient-values.json create mode 100644 tests/case_utils/case_sparql_select/.check-w3-output-with_header-without_index-orient-split.json create mode 100644 tests/case_utils/case_sparql_select/.check-w3-output-with_header-without_index-orient-table.json create mode 100644 tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index-orient-columns.json create mode 100644 tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index-orient-index.json create mode 100644 tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index-orient-records.json create mode 100644 tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index-orient-split.json create mode 100644 tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index-orient-table.json create mode 100644 tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index-orient-values.json create mode 100644 tests/case_utils/case_sparql_select/.check-w3-output-without_header-without_index-orient-split.json create mode 100644 tests/case_utils/case_sparql_select/.check-w3-output-without_header-without_index-orient-table.json diff --git a/case_utils/case_sparql_select/__init__.py b/case_utils/case_sparql_select/__init__.py index eb41f96..870e44c 100644 --- a/case_utils/case_sparql_select/__init__.py +++ b/case_utils/case_sparql_select/__init__.py @@ -165,8 +165,11 @@ def data_frame_to_table_text( elif output_mode == "json": # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_json.html + # Drop unsupported kwarg. + del general_kwargs["header"] + table_text = df.to_json( - indent=json_indent, orient=json_orient, date_format="iso" + indent=json_indent, orient=json_orient, date_format="iso", **general_kwargs ) elif output_mode == "md": # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_markdown.html @@ -253,7 +256,7 @@ def main() -> None: parser_index_group.add_argument( "--no-index", action="store_true", - help="Do not print index.", + help="Do not print index. If output is JSON, --json-orient must be 'split' or 'table'.", ) parser.add_argument("in_graph", nargs="+") @@ -302,6 +305,15 @@ def main() -> None: else: use_index = True + if ( + output_mode == "json" + and use_index is False + and args.json_orient not in {"split", "table"} + ): + raise ValueError( + "For JSON output, --no-index flag requires --json-orient to be either 'split' or 'table'." + ) + df = graph_and_query_to_data_frame( graph, select_query_text, diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index-orient-columns.json b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index-orient-columns.json new file mode 100644 index 0000000..840035b --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index-orient-columns.json @@ -0,0 +1 @@ +{"?name":{"0":"Johnny Lee Outlaw","1":"Peter Goodguy"},"?mbox":{"0":"mailto:jlow@example.com","1":"mailto:peter@example.org"}} diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index-orient-index.json b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index-orient-index.json new file mode 100644 index 0000000..717d3ae --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index-orient-index.json @@ -0,0 +1 @@ +{"0":{"?name":"Johnny Lee Outlaw","?mbox":"mailto:jlow@example.com"},"1":{"?name":"Peter Goodguy","?mbox":"mailto:peter@example.org"}} diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index-orient-records.json b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index-orient-records.json new file mode 100644 index 0000000..f053616 --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index-orient-records.json @@ -0,0 +1 @@ +[{"?name":"Johnny Lee Outlaw","?mbox":"mailto:jlow@example.com"},{"?name":"Peter Goodguy","?mbox":"mailto:peter@example.org"}] diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index-orient-split.json b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index-orient-split.json new file mode 100644 index 0000000..0401af1 --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index-orient-split.json @@ -0,0 +1 @@ +{"columns":["?name","?mbox"],"index":[0,1],"data":[["Johnny Lee Outlaw","mailto:jlow@example.com"],["Peter Goodguy","mailto:peter@example.org"]]} diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index-orient-table.json b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index-orient-table.json new file mode 100644 index 0000000..92f005c --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index-orient-table.json @@ -0,0 +1 @@ +{"schema":{"fields":[{"name":"index","type":"integer"},{"name":"?name","type":"string"},{"name":"?mbox","type":"string"}],"primaryKey":["index"],"pandas_version":"1.4.0"},"data":[{"index":0,"?name":"Johnny Lee Outlaw","?mbox":"mailto:jlow@example.com"},{"index":1,"?name":"Peter Goodguy","?mbox":"mailto:peter@example.org"}]} diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index-orient-values.json b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index-orient-values.json new file mode 100644 index 0000000..e49c3eb --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index-orient-values.json @@ -0,0 +1 @@ +[["Johnny Lee Outlaw","mailto:jlow@example.com"],["Peter Goodguy","mailto:peter@example.org"]] diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-with_header-without_index-orient-split.json b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-without_index-orient-split.json new file mode 100644 index 0000000..885bc91 --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-without_index-orient-split.json @@ -0,0 +1 @@ +{"columns":["?name","?mbox"],"data":[["Johnny Lee Outlaw","mailto:jlow@example.com"],["Peter Goodguy","mailto:peter@example.org"]]} diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-with_header-without_index-orient-table.json b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-without_index-orient-table.json new file mode 100644 index 0000000..c9e7c13 --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-without_index-orient-table.json @@ -0,0 +1 @@ +{"schema":{"fields":[{"name":"?name","type":"string"},{"name":"?mbox","type":"string"}],"pandas_version":"1.4.0"},"data":[{"?name":"Johnny Lee Outlaw","?mbox":"mailto:jlow@example.com"},{"?name":"Peter Goodguy","?mbox":"mailto:peter@example.org"}]} diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index-orient-columns.json b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index-orient-columns.json new file mode 100644 index 0000000..840035b --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index-orient-columns.json @@ -0,0 +1 @@ +{"?name":{"0":"Johnny Lee Outlaw","1":"Peter Goodguy"},"?mbox":{"0":"mailto:jlow@example.com","1":"mailto:peter@example.org"}} diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index-orient-index.json b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index-orient-index.json new file mode 100644 index 0000000..717d3ae --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index-orient-index.json @@ -0,0 +1 @@ +{"0":{"?name":"Johnny Lee Outlaw","?mbox":"mailto:jlow@example.com"},"1":{"?name":"Peter Goodguy","?mbox":"mailto:peter@example.org"}} diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index-orient-records.json b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index-orient-records.json new file mode 100644 index 0000000..f053616 --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index-orient-records.json @@ -0,0 +1 @@ +[{"?name":"Johnny Lee Outlaw","?mbox":"mailto:jlow@example.com"},{"?name":"Peter Goodguy","?mbox":"mailto:peter@example.org"}] diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index-orient-split.json b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index-orient-split.json new file mode 100644 index 0000000..0401af1 --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index-orient-split.json @@ -0,0 +1 @@ +{"columns":["?name","?mbox"],"index":[0,1],"data":[["Johnny Lee Outlaw","mailto:jlow@example.com"],["Peter Goodguy","mailto:peter@example.org"]]} diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index-orient-table.json b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index-orient-table.json new file mode 100644 index 0000000..92f005c --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index-orient-table.json @@ -0,0 +1 @@ +{"schema":{"fields":[{"name":"index","type":"integer"},{"name":"?name","type":"string"},{"name":"?mbox","type":"string"}],"primaryKey":["index"],"pandas_version":"1.4.0"},"data":[{"index":0,"?name":"Johnny Lee Outlaw","?mbox":"mailto:jlow@example.com"},{"index":1,"?name":"Peter Goodguy","?mbox":"mailto:peter@example.org"}]} diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index-orient-values.json b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index-orient-values.json new file mode 100644 index 0000000..e49c3eb --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index-orient-values.json @@ -0,0 +1 @@ +[["Johnny Lee Outlaw","mailto:jlow@example.com"],["Peter Goodguy","mailto:peter@example.org"]] diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-without_header-without_index-orient-split.json b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-without_index-orient-split.json new file mode 100644 index 0000000..885bc91 --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-without_index-orient-split.json @@ -0,0 +1 @@ +{"columns":["?name","?mbox"],"data":[["Johnny Lee Outlaw","mailto:jlow@example.com"],["Peter Goodguy","mailto:peter@example.org"]]} diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-without_header-without_index-orient-table.json b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-without_index-orient-table.json new file mode 100644 index 0000000..c9e7c13 --- /dev/null +++ b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-without_index-orient-table.json @@ -0,0 +1 @@ +{"schema":{"fields":[{"name":"?name","type":"string"},{"name":"?mbox","type":"string"}],"pandas_version":"1.4.0"},"data":[{"?name":"Johnny Lee Outlaw","?mbox":"mailto:jlow@example.com"},{"?name":"Peter Goodguy","?mbox":"mailto:peter@example.org"}]} diff --git a/tests/case_utils/case_sparql_select/test_data_frame_to_table_text_json.py b/tests/case_utils/case_sparql_select/test_data_frame_to_table_text_json.py index 00d20c3..9e5cc24 100644 --- a/tests/case_utils/case_sparql_select/test_data_frame_to_table_text_json.py +++ b/tests/case_utils/case_sparql_select/test_data_frame_to_table_text_json.py @@ -37,36 +37,59 @@ def make_data_frame_to_json_table_text_parameters() -> typing.Iterator[ - typing.Tuple[str, bool, bool] + typing.Tuple[str, str, bool, bool] ]: for use_header in [False, True]: for use_index in [False, True]: - for output_mode in ["csv", "html", "md", "tsv"]: - yield (output_mode, use_header, use_index) + for output_mode in ["csv", "html", "json", "md", "tsv"]: + if output_mode == "json": + for json_orient in [ + "columns", + "index", + "records", + "split", + "table", + "values", + ]: + # Handle incompatible parameter pairings for JSON mode. + if use_index is False: + if json_orient not in {"split", "table"}: + continue + + yield (json_orient, output_mode, use_header, use_index) + else: + yield ("columns", output_mode, use_header, use_index) @pytest.mark.parametrize( - "output_mode, use_header, use_index", + "json_orient, output_mode, use_header, use_index", make_data_frame_to_json_table_text_parameters(), ) def test_data_frame_to_table_text_json( + json_orient: str, output_mode: str, use_header: bool, use_index: bool, ) -> None: table_text = case_utils.case_sparql_select.data_frame_to_table_text( DATA_FRAME, + json_orient=json_orient, output_mode=output_mode, use_header=use_header, use_index=use_index, ) - output_filename_template = ".check-w3-output-%s_header-%s_index.%s" + output_filename_template = ".check-w3-output-%s_header-%s_index%s.%s" header_part = "with" if use_header else "without" index_part = "with" if use_index else "without" + if output_mode == "json": + json_orient_part = "-orient-" + json_orient + else: + json_orient_part = "" output_filename = output_filename_template % ( header_part, index_part, + json_orient_part, output_mode, ) with (SRCDIR / output_filename).open("w") as out_fh: From b7a61fe54952384aa259db52b4ca590253fa3d77 Mon Sep 17 00:00:00 2001 From: Alex Nelson Date: Thu, 8 Dec 2022 13:21:07 -0500 Subject: [PATCH 16/19] Add SHACL shapes to set of CDO concepts This patch adds a test to reproduce the discovery of Issue 85. A follow-on patch will regenerate Make-managed files. References: * https://github.com/casework/CASE-Utilities-Python/issues/85 Signed-off-by: Alex Nelson --- case_utils/case_validate/__init__.py | 4 ++ tests/case_utils/case_validate/Makefile | 20 ++++++- .../case_validate/shape_disabling/Makefile | 52 +++++++++++++++++++ .../shape_disabling/disable_shape.ttl | 11 ++++ .../case_validate/shape_disabling/example.ttl | 11 ++++ 5 files changed, 96 insertions(+), 2 deletions(-) create mode 100644 tests/case_utils/case_validate/shape_disabling/Makefile create mode 100644 tests/case_utils/case_validate/shape_disabling/disable_shape.ttl create mode 100644 tests/case_utils/case_validate/shape_disabling/example.ttl diff --git a/case_utils/case_validate/__init__.py b/case_utils/case_validate/__init__.py index a97e0ff..34a317d 100644 --- a/case_utils/case_validate/__init__.py +++ b/case_utils/case_validate/__init__.py @@ -51,6 +51,7 @@ NS_OWL = rdflib.OWL NS_RDF = rdflib.RDF NS_RDFS = rdflib.RDFS +NS_SH = rdflib.SH _logger = logging.getLogger(os.path.basename(__file__)) @@ -189,6 +190,9 @@ def main() -> None: NS_OWL.DatatypeProperty, NS_OWL.ObjectProperty, NS_RDFS.Datatype, + NS_SH.NodeShape, + NS_SH.PropertyShape, + NS_SH.Shape, ]: for ontology_triple in ontology_graph.triples( (None, NS_RDF.type, n_structural_class) diff --git a/tests/case_utils/case_validate/Makefile b/tests/case_utils/case_validate/Makefile index b6bf821..aa5a489 100644 --- a/tests/case_utils/case_validate/Makefile +++ b/tests/case_utils/case_validate/Makefile @@ -27,14 +27,17 @@ tests_srcdir := $(top_srcdir)/tests all: \ all-cli \ all-case_test_examples \ - all-uco_test_examples + all-uco_test_examples \ + all-shape_disabling .PHONY: \ all-case_test_examples \ all-cli \ + all-shape_disabling \ all-uco_test_examples \ check-case_test_examples \ check-cli \ + check-shape_disabling \ check-uco_test_examples all-case_test_examples: @@ -45,6 +48,10 @@ all-cli: $(MAKE) \ --directory cli +all-shape_disabling: + $(MAKE) \ + --directory shape_disabling + all-uco_test_examples: $(MAKE) \ --directory uco_test_examples @@ -52,7 +59,8 @@ all-uco_test_examples: check: \ check-cli \ check-case_test_examples \ - check-uco_test_examples + check-uco_test_examples \ + check-shape_disabling check-case_test_examples: $(MAKE) \ @@ -64,6 +72,11 @@ check-cli: --directory cli \ check +check-shape_disabling: + $(MAKE) \ + --directory shape_disabling \ + check + check-uco_test_examples: \ uco_monolithic.ttl $(MAKE) \ @@ -71,6 +84,9 @@ check-uco_test_examples: \ check clean: + @$(MAKE) \ + --directory shape_disabling \ + clean @$(MAKE) \ --directory case_test_examples \ clean diff --git a/tests/case_utils/case_validate/shape_disabling/Makefile b/tests/case_utils/case_validate/shape_disabling/Makefile new file mode 100644 index 0000000..b9603ac --- /dev/null +++ b/tests/case_utils/case_validate/shape_disabling/Makefile @@ -0,0 +1,52 @@ +#!/usr/bin/make -f + +# This software was developed at the National Institute of Standards +# and Technology by employees of the Federal Government in the course +# of their official duties. Pursuant to title 17 Section 105 of the +# United States Code this software is not subject to copyright +# protection and is in the public domain. NIST assumes no +# responsibility whatsoever for its use by other parties, and makes +# no guarantees, expressed or implied, about its quality, +# reliability, or any other characteristic. +# +# We would appreciate acknowledgement if the software is used. + +SHELL := /bin/bash + +top_srcdir := $(shell cd ../../../.. ; pwd) + +tests_srcdir := $(top_srcdir)/tests + +all: \ + validation_with_uuid_shape_disabled.txt \ + validation_with_uuid_shape_enabled.txt + +check: \ + validation_with_uuid_shape_disabled.txt \ + validation_with_uuid_shape_enabled.txt + +clean: + @rm -f \ + *.txt \ + _* + +validation_with_uuid_shape_disabled.txt: \ + $(tests_srcdir)/.venv.done.log \ + disable_shape.ttl \ + example.ttl + source $(tests_srcdir)/venv/bin/activate \ + && case_validate \ + --ontology disable_shape.ttl \ + example.ttl \ + > _$@ + mv _$@ $@ + +validation_with_uuid_shape_enabled.txt: \ + $(tests_srcdir)/.venv.done.log \ + example.ttl + source $(tests_srcdir)/venv/bin/activate \ + && case_validate \ + --allow-infos \ + example.ttl \ + > _$@ + mv _$@ $@ diff --git a/tests/case_utils/case_validate/shape_disabling/disable_shape.ttl b/tests/case_utils/case_validate/shape_disabling/disable_shape.ttl new file mode 100644 index 0000000..e9e9bf7 --- /dev/null +++ b/tests/case_utils/case_validate/shape_disabling/disable_shape.ttl @@ -0,0 +1,11 @@ +@prefix owl: . +@prefix rdf: . +@prefix rdfs: . +@prefix sh: . +@prefix uco-core: . +@prefix xsd: . + +uco-core:UcoThing-identifier-regex-shape + sh:deactivated "true"^^xsd:boolean ; + . + diff --git a/tests/case_utils/case_validate/shape_disabling/example.ttl b/tests/case_utils/case_validate/shape_disabling/example.ttl new file mode 100644 index 0000000..9f7a89c --- /dev/null +++ b/tests/case_utils/case_validate/shape_disabling/example.ttl @@ -0,0 +1,11 @@ +@prefix owl: . +@prefix rdf: . +@prefix rdfs: . +@prefix uco-core: . +@prefix xsd: . + + + a uco-core:UcoThing ; + rdfs:comment "This node's IRI is designed to trigger a UUID review shape."@en ; + . + From 8534c13b9c29bca8ffbfd29681593f531521e35a Mon Sep 17 00:00:00 2001 From: Alex Nelson Date: Thu, 8 Dec 2022 13:29:26 -0500 Subject: [PATCH 17/19] Regenerate Make-managed files References: * https://github.com/casework/CASE-Utilities-Python/issues/85 Signed-off-by: Alex Nelson --- .../validation_with_uuid_shape_disabled.txt | 2 ++ .../validation_with_uuid_shape_enabled.txt | 23 +++++++++++++++++++ 2 files changed, 25 insertions(+) create mode 100644 tests/case_utils/case_validate/shape_disabling/validation_with_uuid_shape_disabled.txt create mode 100644 tests/case_utils/case_validate/shape_disabling/validation_with_uuid_shape_enabled.txt diff --git a/tests/case_utils/case_validate/shape_disabling/validation_with_uuid_shape_disabled.txt b/tests/case_utils/case_validate/shape_disabling/validation_with_uuid_shape_disabled.txt new file mode 100644 index 0000000..0c16da6 --- /dev/null +++ b/tests/case_utils/case_validate/shape_disabling/validation_with_uuid_shape_disabled.txt @@ -0,0 +1,2 @@ +Validation Report +Conforms: True diff --git a/tests/case_utils/case_validate/shape_disabling/validation_with_uuid_shape_enabled.txt b/tests/case_utils/case_validate/shape_disabling/validation_with_uuid_shape_enabled.txt new file mode 100644 index 0000000..1e6b23f --- /dev/null +++ b/tests/case_utils/case_validate/shape_disabling/validation_with_uuid_shape_enabled.txt @@ -0,0 +1,23 @@ +Validation Report +Conforms: True +Results (1): +Validation Result in SPARQLConstraintComponent (http://www.w3.org/ns/shacl#SPARQLConstraintComponent): + Severity: sh:Info + Source Shape: core:UcoThing-identifier-regex-shape + Focus Node: + Value Node: + Source Constraint: [ rdf:type sh:SPARQLConstraint ; rdfs:seeAlso ; sh:message Literal("UcoThings are suggested to end with a UUID.", lang=en) ; sh:select Literal(" + PREFIX uco-core: + SELECT $this + WHERE { + $this a/rdfs:subClassOf* uco-core:UcoThing . + FILTER ( + ! REGEX ( + STR($this), + "[0-9a-f]{8}-[0-9a-f]{4}-[0-5][0-9a-f]{3}-[0-9a-f]{4}-[0-9a-f]{12}$", + "i" + ) + ) + } + ") ] + Message: UcoThings are suggested to end with a UUID. From fa77296ab70f2b17925fcd3245c19acecc1ae19b Mon Sep 17 00:00:00 2001 From: Alex Nelson Date: Thu, 8 Dec 2022 13:40:59 -0500 Subject: [PATCH 18/19] Add CODEOWNERS file References: * https://github.com/usnistgov/opensource-repo/#codeowners Signed-off-by: Alex Nelson --- .github/CODEOWNERS | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 .github/CODEOWNERS diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 0000000..b6a76fb --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1,10 @@ +# This file lists the contributors responsible for the +# repository content. They will also be automatically +# asked to review any pull request made in this repository. + +# Each line is a file pattern followed by one or more owners. +# The sequence matters: later patterns take precedence. + +# FILES OWNERS +* @casework/maintainers-global +* @casework/maintainers-case-python-utilities From be829b18306b553bc82131e2aa6b9be073b08a59 Mon Sep 17 00:00:00 2001 From: Alex Nelson Date: Thu, 8 Dec 2022 17:10:30 -0500 Subject: [PATCH 19/19] Bump versions Signed-off-by: Alex Nelson --- case_utils/__init__.py | 2 +- case_utils/case_sparql_select/__init__.py | 2 +- case_utils/case_validate/__init__.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/case_utils/__init__.py b/case_utils/__init__.py index 99dc0f8..2af6aa1 100644 --- a/case_utils/__init__.py +++ b/case_utils/__init__.py @@ -11,6 +11,6 @@ # # We would appreciate acknowledgement if the software is used. -__version__ = "0.8.0" +__version__ = "0.9.0" from . import local_uuid # noqa: F401 diff --git a/case_utils/case_sparql_select/__init__.py b/case_utils/case_sparql_select/__init__.py index 870e44c..1e94265 100644 --- a/case_utils/case_sparql_select/__init__.py +++ b/case_utils/case_sparql_select/__init__.py @@ -26,7 +26,7 @@ Should a more complex query be necessary, an outer, wrapping SELECT query would let this script continue to function. """ -__version__ = "0.4.4" +__version__ = "0.5.0" import argparse import binascii diff --git a/case_utils/case_validate/__init__.py b/case_utils/case_validate/__init__.py index 34a317d..5ef2952 100644 --- a/case_utils/case_validate/__init__.py +++ b/case_utils/case_validate/__init__.py @@ -29,7 +29,7 @@ details.) """ -__version__ = "0.2.0" +__version__ = "0.3.0" import argparse import importlib.resources