From 8e937e00a45d869614df6b27b884040137c92095 Mon Sep 17 00:00:00 2001 From: Tom Gillespie Date: Wed, 29 Nov 2017 22:29:18 -0800 Subject: [PATCH 1/5] term.py sort Literals with no total order This commit provides basic infrastructure for sorting Literals by value where the underlying type has no total ordering. This provides a more consistent solution to issues like: https://github.com/RDFLib/rdflib/issues/648, https://github.com/RDFLib/rdflib/issues/630, and https://github.com/RDFLib/rdflib/issues/613. Where workarounds are implemented in the serializer. This leads to massively increased code complexity in the serializers to compensate for the fact that Literal do not support a total ordering because of some of the underlying python datatypes do not. The only datatype that I know of that causes this issue at the moment is datetime, and I have implemented a fix for that. If other types are found to have this issue the solution is to add an entry to _NO_TOTAL_ORDER_TYPES that includes a function that partitions the type into subtypes that do have total orders. --- rdflib/term.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/rdflib/term.py b/rdflib/term.py index a3cfc72e0..27eeb4223 100644 --- a/rdflib/term.py +++ b/rdflib/term.py @@ -812,6 +812,9 @@ def __gt__(self, other): return self.language > other.language if self.value != None and other.value != None: + if dtself in _NO_TOTAL_ORDER_TYPES: + comparator = _NO_TOTAL_ORDER_TYPES[dtself] + return comparator(self.value) > comparator(other.value) return self.value > other.value if text_type(self) != text_type(other): @@ -1397,6 +1400,12 @@ def _writeXML(xmlnode): _XSD_DECIMAL, ) +# these are not guranteed to sort because it is not possible +# to calculate a total order over all valid members of the type +# the function must partition the type into subtypes that do have total orders +_NO_TOTAL_ORDER_TYPES = { + _XSD_DATETIME:lambda value:bool(value.tzinfo), +} def _castPythonToLiteral(obj): """ From 7c971027f27cc0a06e2013c120d0af997f09c81f Mon Sep 17 00:00:00 2001 From: Tom Gillespie Date: Thu, 30 Nov 2017 01:15:17 -0800 Subject: [PATCH 2/5] term.py added _XSD_TIME to _NO_TOTAL_ORDER_TYPES gYearMonth and gYear (as well as the other g* xsd types) shoud also be included, but their current implementation does not match the xsd spec so they are commented out. If the g* types are implemented as durations or time intervals sorting will likely be a much more complicated issue. --- rdflib/term.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/rdflib/term.py b/rdflib/term.py index 27eeb4223..a7c98a75b 100644 --- a/rdflib/term.py +++ b/rdflib/term.py @@ -1362,6 +1362,9 @@ def _writeXML(xmlnode): _XSD_DATE = URIRef(_XSD_PFX + 'date') _XSD_TIME = URIRef(_XSD_PFX + 'time') +_XSD_GYEARMONTH = URIRef(_XSD_PFX + 'gYearMonth') +_XSD_GYEAR = URIRef(_XSD_PFX + 'gYear') + # TODO: duration, gYearMonth, gYear, gMonthDay, gDay, gMonth _NUMERIC_LITERAL_TYPES = ( @@ -1405,6 +1408,10 @@ def _writeXML(xmlnode): # the function must partition the type into subtypes that do have total orders _NO_TOTAL_ORDER_TYPES = { _XSD_DATETIME:lambda value:bool(value.tzinfo), + #_XSD_DATE:lambda value:bool(value.tzinfo), # TODO: xsd spec allows tz + _XSD_TIME:lambda value:bool(value.tzinfo), + #_XSD_GYEARMONTH:lambda value:bool(value.tzinfo), # TODO: spec allows tz + #_XSD_GYEAR:lambda value:bool(value.tzinfo), # TODO: spec allows tz } def _castPythonToLiteral(obj): From 9bfa17b6fe4b9d10ecda4ef5f62a2944461b1c91 Mon Sep 17 00:00:00 2001 From: Tom Gillespie Date: Thu, 30 Nov 2017 01:22:05 -0800 Subject: [PATCH 3/5] test_term.py added test_total_order to make sure that all Literals can be sorted --- test/test_term.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/test/test_term.py b/test/test_term.py index f96985189..7301164a0 100644 --- a/test/test_term.py +++ b/test/test_term.py @@ -55,6 +55,28 @@ def test_base64_values(self): self.assertEqual(lit.value, decoded_b64msg) self.assertEqual(str(lit), b64msg) + def test_total_order(self): + types = { + XSD.dateTime:('0001-01-01T00:00:00', '0001-01-01T00:00:00Z', + '0001-01-01T00:00:00-00:00'), + XSD.date:('0001-01-01', '0001-01-01Z', '0001-01-01-00:00'), + XSD.time:('00:00:00', '00:00:00Z', '00:00:00-00:00'), + XSD.gYear:('0001', '0001Z', '0001-00:00'), # interval + XSD.gYearMonth:('0001-01', '0001-01Z', '0001-01-00:00'), + } + literals = [Literal(literal, datatype=type) + for type, literals in types.items() + for literal in literals] + try: + sorted(literals) + orderable = True + except TypeError as e: + for l in literals: + print(repr(l), repr(l.value)) + print(e) + orderable = False + self.assertTrue(orderable) + class TestValidityFunctions(unittest.TestCase): From 186e75ab68401df7f256ecb2dc76f86a3fd47e73 Mon Sep 17 00:00:00 2001 From: Tom Gillespie Date: Thu, 30 Nov 2017 01:34:16 -0800 Subject: [PATCH 4/5] term.py _NO_TOTAL_ORDER_TYPES now tests agasint native python types --- rdflib/term.py | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/rdflib/term.py b/rdflib/term.py index a7c98a75b..ec59802a7 100644 --- a/rdflib/term.py +++ b/rdflib/term.py @@ -812,8 +812,8 @@ def __gt__(self, other): return self.language > other.language if self.value != None and other.value != None: - if dtself in _NO_TOTAL_ORDER_TYPES: - comparator = _NO_TOTAL_ORDER_TYPES[dtself] + if type(self.value) in _NO_TOTAL_ORDER_TYPES: + comparator = _NO_TOTAL_ORDER_TYPES[type(self.value)] return comparator(self.value) > comparator(other.value) return self.value > other.value @@ -1362,9 +1362,6 @@ def _writeXML(xmlnode): _XSD_DATE = URIRef(_XSD_PFX + 'date') _XSD_TIME = URIRef(_XSD_PFX + 'time') -_XSD_GYEARMONTH = URIRef(_XSD_PFX + 'gYearMonth') -_XSD_GYEAR = URIRef(_XSD_PFX + 'gYear') - # TODO: duration, gYearMonth, gYear, gMonthDay, gDay, gMonth _NUMERIC_LITERAL_TYPES = ( @@ -1407,11 +1404,8 @@ def _writeXML(xmlnode): # to calculate a total order over all valid members of the type # the function must partition the type into subtypes that do have total orders _NO_TOTAL_ORDER_TYPES = { - _XSD_DATETIME:lambda value:bool(value.tzinfo), - #_XSD_DATE:lambda value:bool(value.tzinfo), # TODO: xsd spec allows tz - _XSD_TIME:lambda value:bool(value.tzinfo), - #_XSD_GYEARMONTH:lambda value:bool(value.tzinfo), # TODO: spec allows tz - #_XSD_GYEAR:lambda value:bool(value.tzinfo), # TODO: spec allows tz + datetime:lambda value:bool(value.tzinfo), + time:lambda value:bool(value.tzinfo), } def _castPythonToLiteral(obj): From 9dc15a423819fee1907f2fc4b1efd2d8019a0560 Mon Sep 17 00:00:00 2001 From: Tom Gillespie Date: Thu, 7 Dec 2017 21:32:50 -0800 Subject: [PATCH 5/5] term.py added total ordering rule for xml.dom.minidom.Document --- rdflib/term.py | 1 + 1 file changed, 1 insertion(+) diff --git a/rdflib/term.py b/rdflib/term.py index ec59802a7..783703551 100644 --- a/rdflib/term.py +++ b/rdflib/term.py @@ -1406,6 +1406,7 @@ def _writeXML(xmlnode): _NO_TOTAL_ORDER_TYPES = { datetime:lambda value:bool(value.tzinfo), time:lambda value:bool(value.tzinfo), + xml.dom.minidom.Document:lambda value:value.toxml(), } def _castPythonToLiteral(obj):