diff --git a/docs/source/_images/core_type_mappings.png b/docs/source/_images/core_type_mappings.png index 180e1fb26..e69de29bb 100644 Binary files a/docs/source/_images/core_type_mappings.png and b/docs/source/_images/core_type_mappings.png differ diff --git a/docs/source/_images/core_type_mappings.svg b/docs/source/_images/core_type_mappings.svg index 84e06d927..5b29bba3b 100644 --- a/docs/source/_images/core_type_mappings.svg +++ b/docs/source/_images/core_type_mappings.svg @@ -1 +1,3484 @@ - \ No newline at end of file + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/source/api.rst b/docs/source/api.rst index e522425fe..478f54c81 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -1091,7 +1091,7 @@ The core types with their general mappings are listed below: +------------------------+---------------------------------------------------------------------------------------------------------------------------+ | String | :class:`str` | +------------------------+---------------------------------------------------------------------------------------------------------------------------+ -| Bytes :sup:`[1]` | :class:`bytearray` | +| Bytes :sup:`[1]` | :class:`bytes` | +------------------------+---------------------------------------------------------------------------------------------------------------------------+ | List | :class:`list` | +------------------------+---------------------------------------------------------------------------------------------------------------------------+ @@ -1110,6 +1110,57 @@ The diagram below illustrates the actual mappings between the various layers, fr :target: ./_images/core_type_mappings.svg +Extended Data Types +=================== + +The driver supports serializing more types (as parameters in). +However, they will have to be mapped to the existing Bolt types (see above) when they are sent to the server. +This means, the driver will never return these types in results. + +When in doubt, you can test the type conversion like so:: + + import neo4j + + + with neo4j.GraphDatabase.driver(URI, auth=AUTH) as driver: + with driver.session() as session: + type_in = ("foo", "bar") + result = session.run("RETURN $x", x=type_in) + type_out = result.single()[0] + print(type(type_out)) + print(type_out) + +Which in this case would yield:: + + + ['foo', 'bar'] + + ++-----------------------------------+---------------------------------+---------------------------------------+ +| Parameter Type | Bolt Type | Result Type | ++===================================+=================================+=======================================+ +| :class:`tuple` | List | :class:`list` | ++-----------------------------------+---------------------------------+---------------------------------------+ +| :class:`bytearray` | Bytes | :class:`bytes` | ++-----------------------------------+---------------------------------+---------------------------------------+ +| numpy\ :sup:`[2]` ``ndarray`` | (nested) List | (nested) :class:`list` | ++-----------------------------------+---------------------------------+---------------------------------------+ +| pandas\ :sup:`[3]` ``DataFrame`` | Map[str, List[_]] :sup:`[4]` | :class:`dict` | ++-----------------------------------+---------------------------------+---------------------------------------+ +| pandas ``Series`` | List | :class:`list` | ++-----------------------------------+---------------------------------+---------------------------------------+ +| pandas ``Array`` | List | :class:`list` | ++-----------------------------------+---------------------------------+---------------------------------------+ + +.. Note:: + + 2. ``void`` and ``complexfloating`` typed numpy ``ndarray``\s are not supported. + 3. ``Period``, ``Interval``, and ``pyarrow`` pandas types are not supported. + 4. A pandas ``DataFrame`` will be serialized as Map with the column names mapping to the column values (as Lists). + Just like with ``dict`` objects, the column names need to be :class:`str` objects. + + + **************** Graph Data Types **************** diff --git a/pyproject.toml b/pyproject.toml index f16454683..08387ce8e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,10 +45,17 @@ dynamic = ["version", "readme"] Homepage = "https://github.com/neo4j/neo4j-python-driver" [project.optional-dependencies] -pandas = ["pandas>=1.0.0"] +numpy = ["numpy >= 1.7.0, < 2.0.0"] +pandas = [ + "pandas >= 1.1.0, < 2.0.0", + "numpy >= 1.7.0, < 2.0.0", +] [build-system] -requires = ["setuptools~=65.6", "tomlkit~=0.11.6"] +requires = [ + "setuptools~=65.6", + "tomlkit~=0.11.6", +] build-backend = "setuptools.build_meta" # still in beta diff --git a/requirements-dev.txt b/requirements-dev.txt index 04528c610..b070002c1 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -18,7 +18,9 @@ tomlkit~=0.11.6 # needed for running tests coverage[toml]>=5.5 mock>=4.0.3 +numpy>=1.7.0 pandas>=1.0.0 +pyarrow>=1.0.0 pytest>=6.2.5 pytest-asyncio>=0.16.0 pytest-benchmark>=3.4.1 diff --git a/src/neo4j/_codec/hydration/__init__.py b/src/neo4j/_codec/hydration/__init__.py index 1df9aaaa0..42e44f98a 100644 --- a/src/neo4j/_codec/hydration/__init__.py +++ b/src/neo4j/_codec/hydration/__init__.py @@ -17,6 +17,7 @@ from ._common import ( BrokenHydrationObject, + DehydrationHooks, HydrationScope, ) from ._interface import HydrationHandlerABC @@ -24,6 +25,7 @@ __all__ = [ "BrokenHydrationObject", + "DehydrationHooks", "HydrationHandlerABC", "HydrationScope", ] diff --git a/src/neo4j/_codec/hydration/_common.py b/src/neo4j/_codec/hydration/_common.py index 2632f2117..f278dc77f 100644 --- a/src/neo4j/_codec/hydration/_common.py +++ b/src/neo4j/_codec/hydration/_common.py @@ -16,12 +16,51 @@ # limitations under the License. +import typing as t from copy import copy +from dataclasses import dataclass from ...graph import Graph from ..packstream import Structure +@dataclass +class DehydrationHooks: + exact_types: t.Dict[t.Type, t.Callable[[t.Any], t.Any]] + subtypes: t.Dict[t.Type, t.Callable[[t.Any], t.Any]] + + def update(self, exact_types=None, subtypes=None): + exact_types = exact_types or {} + subtypes = subtypes or {} + self.exact_types.update(exact_types) + self.subtypes.update(subtypes) + + def extend(self, exact_types=None, subtypes=None): + exact_types = exact_types or {} + subtypes = subtypes or {} + return DehydrationHooks( + exact_types={**self.exact_types, **exact_types}, + subtypes={**self.subtypes, **subtypes}, + ) + + def get_transformer(self, item): + type_ = type(item) + transformer = self.exact_types.get(type_) + if transformer is not None: + return transformer + transformer = next( + ( + f + for super_type, f in self.subtypes.items() + if isinstance(item, super_type) + ), + None, + ) + if transformer is not None: + return transformer + return None + + class BrokenHydrationObject: """ Represents an object from the server, not understood by the driver. @@ -68,7 +107,7 @@ def __init__(self, hydration_handler, graph_hydrator): list: self._hydrate_list, dict: self._hydrate_dict, } - self.dehydration_hooks = hydration_handler.dehydration_functions + self.dehydration_hooks = hydration_handler.dehydration_hooks def _hydrate_structure(self, value): f = self._struct_hydration_functions.get(value.tag) diff --git a/src/neo4j/_codec/hydration/_interface/__init__.py b/src/neo4j/_codec/hydration/_interface/__init__.py index 5092d5e0d..093b98597 100644 --- a/src/neo4j/_codec/hydration/_interface/__init__.py +++ b/src/neo4j/_codec/hydration/_interface/__init__.py @@ -18,11 +18,14 @@ import abc +from .._common import DehydrationHooks + class HydrationHandlerABC(abc.ABC): def __init__(self): self.struct_hydration_functions = {} - self.dehydration_functions = {} + self.dehydration_hooks = DehydrationHooks(exact_types={}, + subtypes={}) @abc.abstractmethod def new_hydration_scope(self): diff --git a/src/neo4j/_codec/hydration/v1/hydration_handler.py b/src/neo4j/_codec/hydration/v1/hydration_handler.py index 89839503f..b4bb09658 100644 --- a/src/neo4j/_codec/hydration/v1/hydration_handler.py +++ b/src/neo4j/_codec/hydration/v1/hydration_handler.py @@ -23,6 +23,10 @@ timedelta, ) +from ...._optional_deps import ( + np, + pd, +) from ....graph import ( Graph, Node, @@ -159,8 +163,7 @@ def __init__(self): b"d": temporal.hydrate_datetime, # no time zone b"E": temporal.hydrate_duration, } - self.dehydration_functions = { - **self.dehydration_functions, + self.dehydration_hooks.update(exact_types={ Point: spatial.dehydrate_point, CartesianPoint: spatial.dehydrate_point, WGS84Point: spatial.dehydrate_point, @@ -172,7 +175,19 @@ def __init__(self): datetime: temporal.dehydrate_datetime, Duration: temporal.dehydrate_duration, timedelta: temporal.dehydrate_timedelta, - } + }) + if np is not None: + self.dehydration_hooks.update(exact_types={ + np.datetime64: temporal.dehydrate_np_datetime, + np.timedelta64: temporal.dehydrate_np_timedelta, + }) + if pd is not None: + self.dehydration_hooks.update(exact_types={ + pd.Timestamp: temporal.dehydrate_pandas_datetime, + pd.Timedelta: temporal.dehydrate_pandas_timedelta, + type(pd.NaT): lambda _: None, + }) + def patch_utc(self): from ..v2 import temporal as temporal_v2 @@ -186,10 +201,18 @@ def patch_utc(self): b"i": temporal_v2.hydrate_datetime, }) - self.dehydration_functions.update({ + self.dehydration_hooks.update(exact_types={ DateTime: temporal_v2.dehydrate_datetime, datetime: temporal_v2.dehydrate_datetime, }) + if np is not None: + self.dehydration_hooks.update(exact_types={ + np.datetime64: temporal_v2.dehydrate_np_datetime, + }) + if pd is not None: + self.dehydration_hooks.update(exact_types={ + pd.Timestamp: temporal_v2.dehydrate_pandas_datetime, + }) def new_hydration_scope(self): self._created_scope = True diff --git a/src/neo4j/_codec/hydration/v1/temporal.py b/src/neo4j/_codec/hydration/v1/temporal.py index c36eda5d3..d47967551 100644 --- a/src/neo4j/_codec/hydration/v1/temporal.py +++ b/src/neo4j/_codec/hydration/v1/temporal.py @@ -22,10 +22,17 @@ timedelta, ) +from ...._optional_deps import ( + np, + pd, +) from ....time import ( Date, DateTime, Duration, + MAX_YEAR, + MIN_YEAR, + NANO_SECONDS, Time, ) from ...packstream import Structure @@ -171,6 +178,50 @@ def seconds_and_nanoseconds(dt): int(tz.utcoffset(value).total_seconds())) +if np is not None: + def dehydrate_np_datetime(value): + """ Dehydrator for `numpy.datetime64` values. + + :param value: + :type value: numpy.datetime64 + :returns: + """ + if np.isnat(value): + return None + year = value.astype("datetime64[Y]").astype(int) + 1970 + if not 0 < year <= 9999: + # while we could encode years outside the range, they would fail + # when retrieved from the database. + raise ValueError(f"Year out of range ({MIN_YEAR:d}..{MAX_YEAR:d}) " + f"found {year}") + seconds = value.astype(np.dtype("datetime64[s]")).astype(int) + nanoseconds = (value.astype(np.dtype("datetime64[ns]")).astype(int) + % NANO_SECONDS) + return Structure(b"d", seconds, nanoseconds) + + +if pd is not None: + def dehydrate_pandas_datetime(value): + """ Dehydrator for `pandas.Timestamp` values. + + :param value: + :type value: pandas.Timestamp + :returns: + """ + return dehydrate_datetime( + DateTime( + value.year, + value.month, + value.day, + value.hour, + value.minute, + value.second, + value.microsecond * 1000 + value.nanosecond, + value.tzinfo, + ) + ) + + def hydrate_duration(months, days, seconds, nanoseconds): """ Hydrator for `Duration` values. @@ -205,3 +256,50 @@ def dehydrate_timedelta(value): seconds = value.seconds nanoseconds = 1000 * value.microseconds return Structure(b"E", months, days, seconds, nanoseconds) + + +if np is not None: + _NUMPY_DURATION_UNITS = { + "Y": "years", + "M": "months", + "W": "weeks", + "D": "days", + "h": "hours", + "m": "minutes", + "s": "seconds", + "ms": "milliseconds", + "us": "microseconds", + "ns": "nanoseconds", + } + + def dehydrate_np_timedelta(value): + """ Dehydrator for `numpy.timedelta64` values. + + :param value: + :type value: numpy.timedelta64 + :returns: + """ + if np.isnat(value): + return None + unit, step_size = np.datetime_data(value) + numer = int(value.astype(int)) + # raise RuntimeError((type(numer), type(step_size))) + kwarg = _NUMPY_DURATION_UNITS.get(unit) + if kwarg is not None: + return dehydrate_duration(Duration(**{kwarg: numer * step_size})) + return dehydrate_duration(Duration( + nanoseconds=value.astype("timedelta64[ns]").astype(int) + )) + + +if pd is not None: + def dehydrate_pandas_timedelta(value): + """ Dehydrator for `pandas.Timedelta` values. + + :param value: + :type value: pandas.Timedelta + :returns: + """ + return dehydrate_duration(Duration( + nanoseconds=value.value + )) diff --git a/src/neo4j/_codec/hydration/v2/hydration_handler.py b/src/neo4j/_codec/hydration/v2/hydration_handler.py index 167fab991..83348b3b7 100644 --- a/src/neo4j/_codec/hydration/v2/hydration_handler.py +++ b/src/neo4j/_codec/hydration/v2/hydration_handler.py @@ -37,8 +37,7 @@ def __init__(self): b"d": temporal.hydrate_datetime, # no time zone b"E": temporal.hydrate_duration, } - self.dehydration_functions = { - **self.dehydration_functions, + self.dehydration_hooks.update(exact_types={ Point: spatial.dehydrate_point, CartesianPoint: spatial.dehydrate_point, WGS84Point: spatial.dehydrate_point, @@ -50,7 +49,18 @@ def __init__(self): datetime: temporal.dehydrate_datetime, Duration: temporal.dehydrate_duration, timedelta: temporal.dehydrate_timedelta, - } + }) + if np is not None: + self.dehydration_hooks.update(exact_types={ + np.datetime64: temporal.dehydrate_np_datetime, + np.timedelta64: temporal.dehydrate_np_timedelta, + }) + if pd is not None: + self.dehydration_hooks.update(exact_types={ + pd.Timestamp: temporal.dehydrate_pandas_datetime, + pd.Timedelta: temporal.dehydrate_pandas_timedelta, + type(pd.NaT): lambda _: None, + }) def new_hydration_scope(self): self._created_scope = True diff --git a/src/neo4j/_codec/hydration/v2/temporal.py b/src/neo4j/_codec/hydration/v2/temporal.py index bc3644587..d15b37536 100644 --- a/src/neo4j/_codec/hydration/v2/temporal.py +++ b/src/neo4j/_codec/hydration/v2/temporal.py @@ -90,3 +90,49 @@ def seconds_and_nanoseconds(dt): "UTC offsets.") offset_seconds = offset.days * 86400 + offset.seconds return Structure(b"I", seconds, nanoseconds, offset_seconds) + + +if pd is not None: + def dehydrate_pandas_datetime(value): + """ Dehydrator for `pandas.Timestamp` values. + + :param value: + :type value: pandas.Timestamp + :returns: + """ + seconds, nanoseconds = divmod(value.value, NANO_SECONDS) + + import pytz + + tz = value.tzinfo + if tz is None: + # without time zone + return Structure(b"d", seconds, nanoseconds) + elif hasattr(tz, "zone") and tz.zone and isinstance(tz.zone, str): + # with named pytz time zone + return Structure(b"i", seconds, nanoseconds, tz.zone) + elif hasattr(tz, "key") and tz.key and isinstance(tz.key, str): + # with named zoneinfo (Python 3.9+) time zone + return Structure(b"i", seconds, nanoseconds, tz.key) + else: + # with time offset + offset = tz.utcoffset(value) + if offset.microseconds: + raise ValueError("Bolt protocol does not support sub-second " + "UTC offsets.") + offset_seconds = offset.days * 86400 + offset.seconds + return Structure(b"I", seconds, nanoseconds, offset_seconds) + + # simpler but slower alternative + # return dehydrate_datetime( + # DateTime( + # value.year, + # value.month, + # value.day, + # value.hour, + # value.minute, + # value.second, + # value.microsecond * 1000 + value.nanosecond, + # value.tzinfo, + # ) + # ) diff --git a/src/neo4j/_codec/packstream/v1/__init__.py b/src/neo4j/_codec/packstream/v1/__init__.py index d2f9caf4d..360cd25b4 100644 --- a/src/neo4j/_codec/packstream/v1/__init__.py +++ b/src/neo4j/_codec/packstream/v1/__init__.py @@ -16,6 +16,7 @@ # limitations under the License. +import typing as t from codecs import decode from contextlib import contextmanager from struct import ( @@ -23,9 +24,40 @@ unpack as struct_unpack, ) +from ...._optional_deps import ( + np, + pd, +) +from ...hydration import DehydrationHooks from .._common import Structure +NONE_VALUES: t.Tuple = (None,) +TRUE_VALUES: t.Tuple = (True,) +FALSE_VALUES: t.Tuple = (False,) +INT_TYPES: t.Tuple[t.Type, ...] = (int,) +FLOAT_TYPES: t.Tuple[t.Type, ...] = (float,) +# we can't put tuple here because spatial types subclass tuple, +# and we don't want to treat them as sequences +SEQUENCE_TYPES: t.Tuple[t.Type, ...] = (list,) +MAPPING_TYPES: t.Tuple[t.Type, ...] = (dict,) +BYTES_TYPES: t.Tuple[t.Type, ...] = (bytes, bytearray) + + +if np is not None: + TRUE_VALUES = (*TRUE_VALUES, np.bool_(True)) + FALSE_VALUES = (*FALSE_VALUES, np.bool_(False)) + INT_TYPES = (*INT_TYPES, np.integer) + FLOAT_TYPES = (*FLOAT_TYPES, np.floating) + SEQUENCE_TYPES = (*SEQUENCE_TYPES, np.ndarray) + +if pd is not None: + NONE_VALUES = (*NONE_VALUES, pd.NA) + SEQUENCE_TYPES = (*SEQUENCE_TYPES, pd.Series, pd.Categorical, + pd.core.arrays.ExtensionArray) + MAPPING_TYPES = (*MAPPING_TYPES, pd.DataFrame) + + PACKED_UINT_8 = [struct_pack(">B", value) for value in range(0x100)] PACKED_UINT_16 = [struct_pack(">H", value) for value in range(0x10000)] @@ -42,29 +74,47 @@ def __init__(self, stream): self.stream = stream self._write = self.stream.write - def pack_raw(self, data): + def _pack_raw(self, data): self._write(data) - def pack(self, value, dehydration_hooks=None): + def pack(self, data, dehydration_hooks=None): + self._pack(data, + dehydration_hooks=self._inject_hooks(dehydration_hooks)) + + @classmethod + def _inject_hooks(cls, dehydration_hooks=None): + if dehydration_hooks is None: + return DehydrationHooks( + exact_types={tuple: list}, + subtypes={} + ) + return dehydration_hooks.extend( + exact_types={tuple: list}, + subtypes={} + ) + + + def _pack(self, value, dehydration_hooks=None): write = self._write # None - if value is None: + if any(value is v for v in NONE_VALUES): write(b"\xC0") # NULL # Boolean - elif value is True: + elif any(value is v for v in TRUE_VALUES): write(b"\xC3") - elif value is False: + elif any(value is v for v in FALSE_VALUES): write(b"\xC2") # Float (only double precision is supported) - elif isinstance(value, float): + elif isinstance(value, FLOAT_TYPES): write(b"\xC1") write(struct_pack(">d", value)) # Integer - elif isinstance(value, int): + elif isinstance(value, INT_TYPES): + value = int(value) if -0x10 <= value < 0x80: write(PACKED_UINT_8[value % 0x100]) elif -0x80 <= value < -0x10: @@ -85,42 +135,46 @@ def pack(self, value, dehydration_hooks=None): # String elif isinstance(value, str): encoded = value.encode("utf-8") - self.pack_string_header(len(encoded)) - self.pack_raw(encoded) + self._pack_string_header(len(encoded)) + self._pack_raw(encoded) # Bytes - elif isinstance(value, (bytes, bytearray)): - self.pack_bytes_header(len(value)) - self.pack_raw(value) + elif isinstance(value, BYTES_TYPES): + self._pack_bytes_header(len(value)) + self._pack_raw(value) # List - elif isinstance(value, list): - self.pack_list_header(len(value)) + elif isinstance(value, SEQUENCE_TYPES): + self._pack_list_header(len(value)) for item in value: - self.pack(item, dehydration_hooks=dehydration_hooks) + self._pack(item, dehydration_hooks) # Map - elif isinstance(value, dict): - self.pack_map_header(len(value)) + elif isinstance(value, MAPPING_TYPES): + self._pack_map_header(len(value.keys())) for key, item in value.items(): if not isinstance(key, str): raise TypeError( "Map keys must be strings, not {}".format(type(key)) ) - self.pack(key, dehydration_hooks=dehydration_hooks) - self.pack(item, dehydration_hooks=dehydration_hooks) + self._pack(key, dehydration_hooks) + self._pack(item, dehydration_hooks) # Structure elif isinstance(value, Structure): self.pack_struct(value.tag, value.fields) - # Other - elif dehydration_hooks and type(value) in dehydration_hooks: - self.pack(dehydration_hooks[type(value)](value)) + # Other if in dehydration hooks else: + if dehydration_hooks: + transformer = dehydration_hooks.get_transformer(value) + if transformer is not None: + self._pack(transformer(value), dehydration_hooks) + return + raise ValueError("Values of type %s are not supported" % type(value)) - def pack_bytes_header(self, size): + def _pack_bytes_header(self, size): write = self._write if size < 0x100: write(b"\xCC") @@ -134,7 +188,7 @@ def pack_bytes_header(self, size): else: raise OverflowError("Bytes header size out of range") - def pack_string_header(self, size): + def _pack_string_header(self, size): write = self._write if size <= 0x0F: write(bytes((0x80 | size,))) @@ -150,7 +204,7 @@ def pack_string_header(self, size): else: raise OverflowError("String header size out of range") - def pack_list_header(self, size): + def _pack_list_header(self, size): write = self._write if size <= 0x0F: write(bytes((0x90 | size,))) @@ -166,7 +220,7 @@ def pack_list_header(self, size): else: raise OverflowError("List header size out of range") - def pack_map_header(self, size): + def _pack_map_header(self, size): write = self._write if size <= 0x0F: write(bytes((0xA0 | size,))) @@ -183,6 +237,12 @@ def pack_map_header(self, size): raise OverflowError("Map header size out of range") def pack_struct(self, signature, fields, dehydration_hooks=None): + self._pack_struct( + signature, fields, + dehydration_hooks=self._inject_hooks(dehydration_hooks) + ) + + def _pack_struct(self, signature, fields, dehydration_hooks=None): if len(signature) != 1 or not isinstance(signature, bytes): raise ValueError("Structure signature must be a single byte value") write = self._write @@ -193,7 +253,7 @@ def pack_struct(self, signature, fields, dehydration_hooks=None): raise OverflowError("Structure size out of range") write(signature) for field in fields: - self.pack(field, dehydration_hooks=dehydration_hooks) + self._pack(field, dehydration_hooks) @staticmethod def new_packable_buffer(): diff --git a/src/neo4j/_optional_deps/__init__.py b/src/neo4j/_optional_deps/__init__.py new file mode 100644 index 000000000..17aa1b61d --- /dev/null +++ b/src/neo4j/_optional_deps/__init__.py @@ -0,0 +1,22 @@ +import typing as t + + +np: t.Any = None + +try: + import numpy as np # type: ignore[no-redef] +except ImportError: + pass + +pd: t.Any = None + +try: + import pandas as pd # type: ignore[no-redef] +except ImportError: + pass + + +__all__ = [ + "np", + "pd", +] diff --git a/src/neo4j/time/__init__.py b/src/neo4j/time/__init__.py index 0ddc84289..faf8061c8 100644 --- a/src/neo4j/time/__init__.py +++ b/src/neo4j/time/__init__.py @@ -412,7 +412,7 @@ def __new__( + d * AVERAGE_SECONDS_IN_DAY + s - (1 if ns < 0 else 0)) - if avg_total_seconds < MIN_INT64 or avg_total_seconds > MAX_INT64: + if not MIN_INT64 <= avg_total_seconds <= MAX_INT64: raise ValueError("Duration value out of range: %r", tuple.__repr__((mo, d, s, ns))) return tuple.__new__(cls, (mo, d, s, ns)) diff --git a/tests/unit/common/codec/hydration/v1/test_hydration_handler.py b/tests/unit/common/codec/hydration/v1/test_hydration_handler.py index 908678c9d..6c00005d1 100644 --- a/tests/unit/common/codec/hydration/v1/test_hydration_handler.py +++ b/tests/unit/common/codec/hydration/v1/test_hydration_handler.py @@ -23,9 +23,14 @@ timedelta, ) +import numpy as np +import pandas as pd import pytest -from neo4j._codec.hydration import HydrationScope +from neo4j._codec.hydration import ( + DehydrationHooks, + HydrationScope, +) from neo4j._codec.hydration.v1 import HydrationHandler from neo4j._codec.packstream import Structure from neo4j.graph import Graph @@ -64,12 +69,15 @@ def test_scope_hydration_keys(self, hydration_scope): def test_scope_dehydration_keys(self, hydration_scope): hooks = hydration_scope.dehydration_hooks - assert isinstance(hooks, dict) - assert set(hooks.keys()) == { + assert isinstance(hooks, DehydrationHooks) + assert set(hooks.exact_types.keys()) == { date, datetime, time, timedelta, Date, DateTime, Duration, Time, - CartesianPoint, Point, WGS84Point + CartesianPoint, Point, WGS84Point, + np.datetime64, np.timedelta64, + pd.Timestamp, pd.Timedelta, type(pd.NaT) } + assert not hooks.subtypes def test_scope_get_graph(self, hydration_scope): graph = hydration_scope.get_graph() diff --git a/tests/unit/common/codec/hydration/v1/test_spacial_dehydration.py b/tests/unit/common/codec/hydration/v1/test_spacial_dehydration.py index 6486cea52..05c190457 100644 --- a/tests/unit/common/codec/hydration/v1/test_spacial_dehydration.py +++ b/tests/unit/common/codec/hydration/v1/test_spacial_dehydration.py @@ -34,40 +34,49 @@ class TestSpatialDehydration(HydrationHandlerTestBase): def hydration_handler(self): return HydrationHandler() - def test_cartesian_2d(self, hydration_scope): + @pytest.fixture + def transformer(self, hydration_scope): + def transformer(value): + transformer_ = \ + hydration_scope.dehydration_hooks.get_transformer(value) + assert callable(transformer_) + return transformer_(value) + return transformer + + def test_cartesian_2d(self, transformer): point = CartesianPoint((1, 3.1)) - struct = hydration_scope.dehydration_hooks[type(point)](point) + struct = transformer(point) assert struct == Structure(b"X", 7203, 1.0, 3.1) assert all(isinstance(f, float) for f in struct.fields[1:]) - def test_cartesian_3d(self, hydration_scope): + def test_cartesian_3d(self, transformer): point = CartesianPoint((1, -2, 3.1)) - struct = hydration_scope.dehydration_hooks[type(point)](point) + struct = transformer(point) assert struct == Structure(b"Y", 9157, 1.0, -2.0, 3.1) assert all(isinstance(f, float) for f in struct.fields[1:]) - def test_wgs84_2d(self, hydration_scope): + def test_wgs84_2d(self, transformer): point = WGS84Point((1, 3.1)) - struct = hydration_scope.dehydration_hooks[type(point)](point) + struct = transformer(point) assert struct == Structure(b"X", 4326, 1.0, 3.1) assert all(isinstance(f, float) for f in struct.fields[1:]) - def test_wgs84_3d(self, hydration_scope): + def test_wgs84_3d(self, transformer): point = WGS84Point((1, -2, 3.1)) - struct = hydration_scope.dehydration_hooks[type(point)](point) + struct = transformer(point) assert struct == Structure(b"Y", 4979, 1.0, -2.0, 3.1) assert all(isinstance(f, float) for f in struct.fields[1:]) - def test_custom_point_2d(self, hydration_scope): + def test_custom_point_2d(self, transformer): point = Point((1, 3.1)) point.srid = 12345 - struct = hydration_scope.dehydration_hooks[type(point)](point) + struct = transformer(point) assert struct == Structure(b"X", 12345, 1.0, 3.1) assert all(isinstance(f, float) for f in struct.fields[1:]) - def test_custom_point_3d(self, hydration_scope): + def test_custom_point_3d(self, transformer): point = Point((1, -2, 3.1)) point.srid = 12345 - struct = hydration_scope.dehydration_hooks[type(point)](point) + struct = transformer(point) assert struct == Structure(b"Y", 12345, 1.0, -2.0, 3.1) assert all(isinstance(f, float) for f in struct.fields[1:]) diff --git a/tests/unit/common/codec/hydration/v1/test_temporal_dehydration.py b/tests/unit/common/codec/hydration/v1/test_temporal_dehydration.py index 078fc6e7f..c783cefed 100644 --- a/tests/unit/common/codec/hydration/v1/test_temporal_dehydration.py +++ b/tests/unit/common/codec/hydration/v1/test_temporal_dehydration.py @@ -18,15 +18,21 @@ import datetime +import numpy as np +import pandas as pd import pytest import pytz from neo4j._codec.hydration.v1 import HydrationHandler from neo4j._codec.packstream import Structure from neo4j.time import ( + AVERAGE_SECONDS_IN_DAY, Date, DateTime, Duration, + MAX_INT64, + MIN_INT64, + NANO_SECONDS, Time, ) @@ -38,156 +44,220 @@ class TestTimeDehydration(HydrationHandlerTestBase): def hydration_handler(self): return HydrationHandler() - def test_date(self, hydration_scope): + @pytest.fixture + def transformer(self, hydration_scope): + def transformer(value): + transformer_ = \ + hydration_scope.dehydration_hooks.get_transformer(value) + assert callable(transformer_) + return transformer_(value) + return transformer + + @pytest.fixture + def assert_transforms(self, transformer): + def assert_(value, expected): + struct = transformer(value) + assert struct == expected + return assert_ + + def test_date(self, assert_transforms): date = Date(1991, 8, 24) - struct = hydration_scope.dehydration_hooks[type(date)](date) - assert struct == Structure(b"D", 7905) + assert_transforms(date, Structure(b"D", 7905)) - def test_native_date(self, hydration_scope): + def test_native_date(self, assert_transforms): date = datetime.date(1991, 8, 24) - struct = hydration_scope.dehydration_hooks[type(date)](date) - assert struct == Structure(b"D", 7905) + assert_transforms(date, Structure(b"D", 7905)) - def test_time(self, hydration_scope): + def test_time(self, assert_transforms): time = Time(1, 2, 3, 4, pytz.FixedOffset(60)) - struct = hydration_scope.dehydration_hooks[type(time)](time) - assert struct == Structure(b"T", 3723000000004, 3600) + assert_transforms(time, Structure(b"T", 3723000000004, 3600)) - def test_native_time(self, hydration_scope): + def test_native_time(self, assert_transforms): time = datetime.time(1, 2, 3, 4, pytz.FixedOffset(60)) - struct = hydration_scope.dehydration_hooks[type(time)](time) - assert struct == Structure(b"T", 3723000004000, 3600) + assert_transforms(time, Structure(b"T", 3723000004000, 3600)) - def test_local_time(self, hydration_scope): + def test_local_time(self, assert_transforms): time = Time(1, 2, 3, 4) - struct = hydration_scope.dehydration_hooks[type(time)](time) - assert struct == Structure(b"t", 3723000000004) + assert_transforms(time, Structure(b"t", 3723000000004)) - def test_local_native_time(self, hydration_scope): + def test_local_native_time(self, assert_transforms): time = datetime.time(1, 2, 3, 4) - struct = hydration_scope.dehydration_hooks[type(time)](time) - assert struct == Structure(b"t", 3723000004000) + assert_transforms(time, Structure(b"t", 3723000004000)) + + def test_local_date_time(self, assert_transforms): + dt = DateTime(2018, 10, 12, 11, 37, 41, 474716862) + assert_transforms(dt, Structure(b"d", 1539344261, 474716862)) + + def test_native_local_date_time(self, assert_transforms): + dt = datetime.datetime(2018, 10, 12, 11, 37, 41, 474716) + assert_transforms(dt, Structure(b"d", 1539344261, 474716000)) + + def test_numpy_local_date_time(self, assert_transforms): + dt = np.datetime64("2018-10-12T11:37:41.474716862") + assert_transforms(dt, Structure(b"d", 1539344261, 474716862)) + + def test_numpy_nat_local_date_time(self, assert_transforms): + dt = np.datetime64("NaT") + assert_transforms(dt, None) + + @pytest.mark.parametrize(("value", "error"), ( + (np.datetime64(10000 - 1970, "Y"), ValueError), + (np.datetime64("+10000-01-01"), ValueError), + (np.datetime64(-1970, "Y"), ValueError), + (np.datetime64("0000-12-31"), ValueError), - def test_date_time(self, hydration_scope): + )) + def test_numpy_invalid_local_date_time(self, value, error, transformer): + with pytest.raises(error): + transformer(value) + + def test_pandas_local_date_time(self, assert_transforms): + dt = pd.Timestamp("2018-10-12T11:37:41.474716862") + assert_transforms(dt, Structure(b"d", 1539344261, 474716862)) + + def test_pandas_nat_local_date_time(self, assert_transforms): + dt = pd.NaT + assert_transforms(dt, None) + + def test_date_time_fixed_offset(self, assert_transforms): dt = DateTime(2018, 10, 12, 11, 37, 41, 474716862, pytz.FixedOffset(60)) - struct = hydration_scope.dehydration_hooks[type(dt)](dt) - assert struct == Structure(b"F", 1539344261, 474716862, 3600) + assert_transforms(dt, Structure(b"F", 1539344261, 474716862, 3600)) - def test_native_date_time(self, hydration_scope): + def test_native_date_time_fixed_offset(self, assert_transforms): dt = datetime.datetime(2018, 10, 12, 11, 37, 41, 474716, pytz.FixedOffset(60)) - struct = hydration_scope.dehydration_hooks[type(dt)](dt) - assert struct == Structure(b"F", 1539344261, 474716000, 3600) + assert_transforms(dt, Structure(b"F", 1539344261, 474716000, 3600)) - def test_date_time_negative_offset(self, hydration_scope): + def test_pandas_date_time_fixed_offset(self, assert_transforms): + dt = pd.Timestamp("2018-10-12T11:37:41.474716862+0100") + assert_transforms(dt, Structure(b"F", 1539344261, 474716862, 3600)) + + def test_date_time_fixed_negative_offset(self, assert_transforms): dt = DateTime(2018, 10, 12, 11, 37, 41, 474716862, pytz.FixedOffset(-60)) - struct = hydration_scope.dehydration_hooks[type(dt)](dt) - assert struct == Structure(b"F", 1539344261, 474716862, -3600) + assert_transforms(dt, Structure(b"F", 1539344261, 474716862, -3600)) - def test_native_date_time_negative_offset(self, hydration_scope): + def test_native_date_time_fixed_negative_offset(self, assert_transforms): dt = datetime.datetime(2018, 10, 12, 11, 37, 41, 474716, pytz.FixedOffset(-60)) - struct = hydration_scope.dehydration_hooks[type(dt)](dt) - assert struct == Structure(b"F", 1539344261, 474716000, -3600) + assert_transforms(dt, Structure(b"F", 1539344261, 474716000, -3600)) + + def test_pandas_date_time_fixed_negative_offset(self, assert_transforms): + dt = pd.Timestamp("2018-10-12T11:37:41.474716862-0100") + assert_transforms(dt, Structure(b"F", 1539344261, 474716862, -3600)) - def test_date_time_zone_id(self, hydration_scope): + def test_date_time_zone_id(self, assert_transforms): dt = DateTime(2018, 10, 12, 11, 37, 41, 474716862, pytz.timezone("Europe/Stockholm")) - struct = hydration_scope.dehydration_hooks[type(dt)](dt) - assert struct == Structure(b"f", 1539344261, 474716862, - "Europe/Stockholm") + assert_transforms( + dt, + Structure(b"f", 1539344261, 474716862, "Europe/Stockholm") + ) - def test_native_date_time_zone_id(self, hydration_scope): + def test_native_date_time_zone_id(self, assert_transforms): dt = datetime.datetime(2018, 10, 12, 11, 37, 41, 474716, pytz.timezone("Europe/Stockholm")) - struct = hydration_scope.dehydration_hooks[type(dt)](dt) - assert struct == Structure(b"f", 1539344261, 474716000, - "Europe/Stockholm") - - def test_local_date_time(self, hydration_scope): - dt = DateTime(2018, 10, 12, 11, 37, 41, 474716862) - struct = hydration_scope.dehydration_hooks[type(dt)](dt) - assert struct == Structure(b"d", 1539344261, 474716862) + assert_transforms( + dt, + Structure(b"f", 1539344261, 474716000, "Europe/Stockholm") + ) - def test_native_local_date_time(self, hydration_scope): - dt = datetime.datetime(2018, 10, 12, 11, 37, 41, 474716) - struct = hydration_scope.dehydration_hooks[type(dt)](dt) - assert struct == Structure(b"d", 1539344261, 474716000) + def test_pandas_date_time_zone_id(self, assert_transforms): + dt = pd.Timestamp("2018-10-12T11:37:41.474716862+0200", + tz="Europe/Stockholm") + assert_transforms( + dt, + Structure(b"f", 1539344261, 474716862, "Europe/Stockholm") + ) - def test_duration(self, hydration_scope): + def test_duration(self, assert_transforms): duration = Duration(months=1, days=2, seconds=3, nanoseconds=4) - struct = hydration_scope.dehydration_hooks[type(duration)](duration) - assert struct == Structure(b"E", 1, 2, 3, 4) + assert_transforms(duration, Structure(b"E", 1, 2, 3, 4)) - def test_native_duration(self, hydration_scope): + def test_native_duration(self, assert_transforms): duration = datetime.timedelta(days=1, seconds=2, microseconds=3) - struct = hydration_scope.dehydration_hooks[type(duration)](duration) - assert struct == Structure(b"E", 0, 1, 2, 3000) + assert_transforms(duration, Structure(b"E", 0, 1, 2, 3000)) - def test_duration_mixed_sign(self, hydration_scope): + def test_duration_mixed_sign(self, assert_transforms): duration = Duration(months=1, days=-2, seconds=3, nanoseconds=4) - struct = hydration_scope.dehydration_hooks[type(duration)](duration) - assert struct == Structure(b"E", 1, -2, 3, 4) + assert_transforms(duration, Structure(b"E", 1, -2, 3, 4)) - def test_native_duration_mixed_sign(self, hydration_scope): + def test_native_duration_mixed_sign(self, assert_transforms): duration = datetime.timedelta(days=-1, seconds=2, microseconds=3) - struct = hydration_scope.dehydration_hooks[type(duration)](duration) - assert struct == Structure(b"E", 0, -1, 2, 3000) - - -class TestUTCPatchedTimeDehydration(TestTimeDehydration): - @pytest.fixture - def hydration_handler(self): - handler = HydrationHandler() - handler.patch_utc() - return handler - - def test_date_time(self, hydration_scope): - from ..v2.test_temporal_dehydration import ( - TestTimeDehydration as TestTimeDehydrationV2, - ) - TestTimeDehydrationV2().test_date_time( - hydration_scope - ) - - def test_native_date_time(self, hydration_scope): - from ..v2.test_temporal_dehydration import ( - TestTimeDehydration as TestTimeDehydrationV2, - ) - TestTimeDehydrationV2().test_native_date_time( - hydration_scope - ) - - def test_date_time_negative_offset(self, hydration_scope): - from ..v2.test_temporal_dehydration import ( - TestTimeDehydration as TestTimeDehydrationV2, - ) - TestTimeDehydrationV2().test_date_time_negative_offset( - hydration_scope - ) - - def test_native_date_time_negative_offset(self, hydration_scope): - from ..v2.test_temporal_dehydration import ( - TestTimeDehydration as TestTimeDehydrationV2, - ) - TestTimeDehydrationV2().test_native_date_time_negative_offset( - hydration_scope - ) - - def test_date_time_zone_id(self, hydration_scope): - from ..v2.test_temporal_dehydration import ( - TestTimeDehydration as TestTimeDehydrationV2, - ) - TestTimeDehydrationV2().test_date_time_zone_id( - hydration_scope - ) - - def test_native_date_time_zone_id(self, hydration_scope): - from ..v2.test_temporal_dehydration import ( - TestTimeDehydration as TestTimeDehydrationV2, + assert_transforms(duration, Structure(b"E", 0, -1, 2, 3000)) + + @pytest.mark.parametrize( + ("value", "expected_fields"), + ( + (np.timedelta64(1, "Y"), (12, 0, 0, 0)), + (np.timedelta64(1, "M"), (1, 0, 0, 0)), + (np.timedelta64(1, "D"), (0, 1, 0, 0)), + (np.timedelta64(1, "h"), (0, 0, 3600, 0)), + (np.timedelta64(1, "m"), (0, 0, 60, 0)), + (np.timedelta64(1, "s"), (0, 0, 1, 0)), + (np.timedelta64(MAX_INT64, "s"), (0, 0, MAX_INT64, 0)), + (np.timedelta64(1, "ms"), (0, 0, 0, 1000000)), + (np.timedelta64(1, "us"), (0, 0, 0, 1000)), + (np.timedelta64(1, "ns"), (0, 0, 0, 1)), + (np.timedelta64(NANO_SECONDS, "ns"), (0, 0, 1, 0)), + (np.timedelta64(NANO_SECONDS + 1, "ns"), (0, 0, 1, 1)), + (np.timedelta64(1000, "ps"), (0, 0, 0, 1)), + (np.timedelta64(1, "ps"), (0, 0, 0, 0)), + (np.timedelta64(1000000, "fs"), (0, 0, 0, 1)), + (np.timedelta64(1, "fs"), (0, 0, 0, 0)), + (np.timedelta64(1000000000, "as"), (0, 0, 0, 1)), + (np.timedelta64(1, "as"), (0, 0, 0, 0)), + (np.timedelta64(-1, "Y"), (-12, 0, 0, 0)), + (np.timedelta64(-1, "M"), (-1, 0, 0, 0)), + (np.timedelta64(-1, "D"), (0, -1, 0, 0)), + (np.timedelta64(-1, "h"), (0, 0, -3600, 0)), + (np.timedelta64(-1, "m"), (0, 0, -60, 0)), + (np.timedelta64(-1, "s"), (0, 0, -1, 0)), + # numpy uses MIN_INT64 to encode NaT + (np.timedelta64(MIN_INT64 + 1, "s"), (0, 0, MIN_INT64 + 1, 0)), + (np.timedelta64(-1, "ms"), (0, 0, 0, -1000000)), + (np.timedelta64(-1, "us"), (0, 0, 0, -1000)), + (np.timedelta64(-1, "ns"), (0, 0, 0, -1)), + (np.timedelta64(-NANO_SECONDS, "ns"), (0, 0, -1, 0)), + (np.timedelta64(-NANO_SECONDS - 1, "ns"), (0, 0, -1, -1)), + (np.timedelta64(-1000, "ps"), (0, 0, 0, -1)), + (np.timedelta64(-1, "ps"), (0, 0, 0, -1)), + (np.timedelta64(-1000000, "fs"), (0, 0, 0, -1)), + (np.timedelta64(-1, "fs"), (0, 0, 0, -1)), + (np.timedelta64(-1000000000, "as"), (0, 0, 0, -1)), + (np.timedelta64(-1, "as"), (0, 0, 0, -1)), ) - TestTimeDehydrationV2().test_native_date_time_zone_id( - hydration_scope + ) + def test_numpy_duration(self, value, expected_fields, assert_transforms): + assert_transforms(value, Structure(b"E", *expected_fields)) + + def test_numpy_nat_duration(self, assert_transforms): + duration = np.timedelta64("NaT") + assert_transforms(duration, None) + + @pytest.mark.parametrize(("value", "error"), ( + (np.timedelta64((MAX_INT64 // 60) + 1, "m"), ValueError), + (np.timedelta64((MIN_INT64 // 60), "m"), ValueError), + + )) + def test_numpy_invalid_durations(self, value, error, transformer): + with pytest.raises(error): + transformer(value) + + @pytest.mark.parametrize( + ("value", "expected_fields"), + ( + ( + pd.Timedelta(days=1, seconds=2, microseconds=3, nanoseconds=4), + (0, 0, AVERAGE_SECONDS_IN_DAY + 2, 3004) + ), + ( + pd.Timedelta(days=-1, seconds=2, microseconds=3, + nanoseconds=4), + (0, 0, -AVERAGE_SECONDS_IN_DAY + 2 + 1, -NANO_SECONDS + 3004) + ) ) + ) + def test_pandas_duration(self, value, expected_fields, assert_transforms): + assert_transforms(value, Structure(b"E", *expected_fields)) diff --git a/tests/unit/common/codec/hydration/v1/test_temporal_dehydration_utc_patch.py b/tests/unit/common/codec/hydration/v1/test_temporal_dehydration_utc_patch.py new file mode 100644 index 000000000..66ba4b0f7 --- /dev/null +++ b/tests/unit/common/codec/hydration/v1/test_temporal_dehydration_utc_patch.py @@ -0,0 +1,61 @@ +# Copyright (c) "Neo4j" +# Neo4j Sweden AB [https://neo4j.com] +# +# This file is part of Neo4j. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import pytest + +from ..v2.test_temporal_dehydration import ( + TestTimeDehydration as _TestTimeDehydrationV2, +) +from .test_temporal_dehydration import ( + HydrationHandler, # testing the same hydration handler +) +from .test_temporal_dehydration import ( + TestTimeDehydration as _TestTimeDehydrationV1, +) + + +class UTCPatchedTimeDehydrationMeta(type): + def __new__(mcs, name, bases, attrs): + for test_func in ( + "test_date_time_fixed_offset", + "test_native_date_time_fixed_offset", + "test_pandas_date_time_fixed_offset", + "test_date_time_fixed_negative_offset", + "test_native_date_time_fixed_negative_offset", + "test_pandas_date_time_fixed_negative_offset", + "test_date_time_zone_id", + "test_native_date_time_zone_id", + "test_pandas_date_time_zone_id", + ): + if not hasattr(_TestTimeDehydrationV2, test_func): + continue + attrs[test_func] = getattr(_TestTimeDehydrationV2, test_func) + + return super(UTCPatchedTimeDehydrationMeta, mcs).__new__( + mcs, name, bases, attrs + ) + + +class TestUTCPatchedTimeDehydration( + _TestTimeDehydrationV1, metaclass=UTCPatchedTimeDehydrationMeta +): + @pytest.fixture + def hydration_handler(self): + handler = HydrationHandler() + handler.patch_utc() + return handler diff --git a/tests/unit/common/codec/hydration/v2/test_temporal_dehydration.py b/tests/unit/common/codec/hydration/v2/test_temporal_dehydration.py index 97074e3c7..7cad75d57 100644 --- a/tests/unit/common/codec/hydration/v2/test_temporal_dehydration.py +++ b/tests/unit/common/codec/hydration/v2/test_temporal_dehydration.py @@ -18,6 +18,7 @@ import datetime +import pandas as pd import pytest import pytz @@ -35,42 +36,82 @@ class TestTimeDehydration(_TestTemporalDehydrationV1): def hydration_handler(self): return HydrationHandler() - def test_date_time(self, hydration_scope): + def test_date_time_fixed_offset(self, assert_transforms): dt = DateTime(2018, 10, 12, 11, 37, 41, 474716862, pytz.FixedOffset(60)) - struct = hydration_scope.dehydration_hooks[type(dt)](dt) - assert struct == Structure(b"I", 1539340661, 474716862, 3600) + assert_transforms( + dt, + Structure(b"I", 1539340661, 474716862, 3600) + ) - def test_native_date_time(self, hydration_scope): + def test_native_date_time_fixed_offset(self, assert_transforms): dt = datetime.datetime(2018, 10, 12, 11, 37, 41, 474716, pytz.FixedOffset(60)) - struct = hydration_scope.dehydration_hooks[type(dt)](dt) - assert struct == Structure(b"I", 1539340661, 474716000, 3600) + assert_transforms( + dt, + Structure(b"I", 1539340661, 474716000, 3600) + ) - def test_date_time_negative_offset(self, hydration_scope): + def test_pandas_date_time_fixed_offset(self, assert_transforms): + dt = pd.Timestamp("2018-10-12T11:37:41.474716862+0100") + assert_transforms(dt, Structure(b"I", 1539340661, 474716862, 3600)) + + def test_date_time_fixed_negative_offset(self, assert_transforms): dt = DateTime(2018, 10, 12, 11, 37, 41, 474716862, pytz.FixedOffset(-60)) - struct = hydration_scope.dehydration_hooks[type(dt)](dt) - assert struct == Structure(b"I", 1539347861, 474716862, -3600) + assert_transforms( + dt, + Structure(b"I", 1539347861, 474716862, -3600) + ) - def test_native_date_time_negative_offset(self, hydration_scope): + def test_native_date_time_fixed_negative_offset(self, assert_transforms): dt = datetime.datetime(2018, 10, 12, 11, 37, 41, 474716, pytz.FixedOffset(-60)) - struct = hydration_scope.dehydration_hooks[type(dt)](dt) - assert struct == Structure(b"I", 1539347861, 474716000, -3600) + assert_transforms( + dt, + Structure(b"I", 1539347861, 474716000, -3600) + ) + + def test_pandas_date_time_fixed_negative_offset(self, assert_transforms): + dt = pd.Timestamp("2018-10-12T11:37:41.474716862-0100") + assert_transforms(dt, Structure(b"I", 1539347861, 474716862, -3600)) - def test_date_time_zone_id(self, hydration_scope): + def test_date_time_zone_id(self, assert_transforms): dt = DateTime(2018, 10, 12, 11, 37, 41, 474716862) dt = pytz.timezone("Europe/Stockholm").localize(dt) # offset should be UTC+2 (7200 seconds) - struct = hydration_scope.dehydration_hooks[type(dt)](dt) - assert struct == Structure(b"i", 1539337061, 474716862, - "Europe/Stockholm") + assert_transforms( + dt, + Structure(b"i", 1539337061, 474716862, "Europe/Stockholm") + ) - def test_native_date_time_zone_id(self, hydration_scope): + def test_native_date_time_zone_id(self, assert_transforms): dt = datetime.datetime(2018, 10, 12, 11, 37, 41, 474716) dt = pytz.timezone("Europe/Stockholm").localize(dt) # offset should be UTC+2 (7200 seconds) - struct = hydration_scope.dehydration_hooks[type(dt)](dt) - assert struct == Structure(b"i", 1539337061, 474716000, - "Europe/Stockholm") + assert_transforms( + dt, + Structure(b"i", 1539337061, 474716000, "Europe/Stockholm") + ) + + @pytest.mark.parametrize(("dt", "fields"), ( + ( + pd.Timestamp("2018-10-12T11:37:41.474716862+0200", + tz="Europe/Stockholm"), + (1539337061, 474716862, "Europe/Stockholm"), + ), + ( + # 1972-10-29 02:00:01.001000001+0100 pre DST change + pd.Timestamp((1032 * 24 + 2) * 3600 * 1000000000 + 1001000001, + tz="Europe/London"), + ((1032 * 24 + 2) * 3600 + 1, 1000001, "Europe/London"), + ), + ( + # 1972-10-29 02:00:01.001000001+0000 post DST change + pd.Timestamp((1032 * 24 + 1) * 3600 * 1000000000 + 1001000001, + tz="Europe/London"), + ((1032 * 24 + 1) * 3600 + 1, 1000001, "Europe/London"), + ) + )) + def test_pandas_date_time_zone_id(self, dt, fields, assert_transforms): + assert_transforms(dt, Structure(b"i", *fields)) diff --git a/tests/unit/common/codec/packstream/v1/test_packstream.py b/tests/unit/common/codec/packstream/v1/test_packstream.py index 14f8fcfb5..7e5bd4937 100644 --- a/tests/unit/common/codec/packstream/v1/test_packstream.py +++ b/tests/unit/common/codec/packstream/v1/test_packstream.py @@ -18,9 +18,14 @@ import struct from io import BytesIO -from math import pi +from math import ( + isnan, + pi, +) from uuid import uuid4 +import numpy as np +import pandas as pd import pytest from neo4j._codec.packstream import Structure @@ -36,227 +41,453 @@ not_ascii = "♥O◘♦♥O◘♦" -class TestPackStream: - @pytest.fixture - def packer_with_buffer(self): - packable_buffer = Packer.new_packable_buffer() - return Packer(packable_buffer), packable_buffer +@pytest.fixture +def packer_with_buffer(): + packable_buffer = Packer.new_packable_buffer() + return Packer(packable_buffer), packable_buffer - @pytest.fixture - def unpacker_with_buffer(self): - unpackable_buffer = Unpacker.new_unpackable_buffer() - return Unpacker(unpackable_buffer), unpackable_buffer - def test_packable_buffer(self, packer_with_buffer): - packer, packable_buffer = packer_with_buffer - assert isinstance(packable_buffer, PackableBuffer) - assert packable_buffer is packer.stream +@pytest.fixture +def unpacker_with_buffer(): + unpackable_buffer = Unpacker.new_unpackable_buffer() + return Unpacker(unpackable_buffer), unpackable_buffer - def test_unpackable_buffer(self, unpacker_with_buffer): - unpacker, unpackable_buffer = unpacker_with_buffer - assert isinstance(unpackable_buffer, UnpackableBuffer) - assert unpackable_buffer is unpacker.unpackable +def test_packable_buffer(packer_with_buffer): + packer, packable_buffer = packer_with_buffer + assert isinstance(packable_buffer, PackableBuffer) + assert packable_buffer is packer.stream + +def test_unpackable_buffer(unpacker_with_buffer): + unpacker, unpackable_buffer = unpacker_with_buffer + assert isinstance(unpackable_buffer, UnpackableBuffer) + assert unpackable_buffer is unpacker.unpackable + + +@pytest.fixture +def pack(packer_with_buffer): + packer, packable_buffer = packer_with_buffer + + def _pack(*values, dehydration_hooks=None): + for value in values: + packer.pack(value, dehydration_hooks=dehydration_hooks) + data = bytearray(packable_buffer.data) + packable_buffer.clear() + return data - @pytest.fixture - def pack(self, packer_with_buffer): + return _pack + + +_default_out_value = object() + + +@pytest.fixture +def assert_packable(packer_with_buffer, unpacker_with_buffer): + def _recursive_nan_equal(a, b): + if isinstance(a, (list, tuple)) and isinstance(b, (list, tuple)): + return all(_recursive_nan_equal(x, y) for x, y in zip(a, b)) + elif isinstance(a, dict) and isinstance(b, dict): + return all(_recursive_nan_equal(a[k], b[k]) for k in a) + else: + return a == b or (isnan(a) and isnan(b)) + + def _assert(in_value, packed_value, out_value=_default_out_value): + if out_value is _default_out_value: + out_value = in_value + nonlocal packer_with_buffer, unpacker_with_buffer packer, packable_buffer = packer_with_buffer + unpacker, unpackable_buffer = unpacker_with_buffer + packable_buffer.clear() + unpackable_buffer.reset() + + packer.pack(in_value) + packed_data = packable_buffer.data + assert packed_data == packed_value + + unpackable_buffer.data = bytearray(packed_data) + unpackable_buffer.used = len(packed_data) + unpacked_data = unpacker.unpack() + assert _recursive_nan_equal(unpacked_data, out_value) + + return _assert + + +@pytest.fixture(params=(True, False)) +def np_float_overflow_as_error(request): + should_raise = request.param + if should_raise: + old_err = np.seterr(over="raise") + else: + old_err = np.seterr(over="ignore") + yield + np.seterr(**old_err) + + + +@pytest.fixture(params=( + int, + np.int8, np.int16, np.int32, np.int64, np.longlong, + np.uint8, np.uint16, np.uint32, np.uint64, np.ulonglong +)) +def int_type(request): + if issubclass(request.param, np.number): + def _int_type(value): + # this avoids deprecation warning from NEP50 and forces + # c-style wrapping of the value + return np.array(value).astype(request.param).item() + + return _int_type + else: + return request.param + + +@pytest.fixture(params=(float, + np.float16, np.float32, np.float64, np.longdouble)) +def float_type(request, np_float_overflow_as_error): + return request.param + + +@pytest.fixture(params=(bool, np.bool_)) +def bool_type(request): + return request.param + + +@pytest.fixture(params=(bytes, bytearray, np.bytes_)) +def bytes_type(request): + return request.param + - def _pack(*values, dehydration_hooks=None): - for value in values: - packer.pack(value, dehydration_hooks=dehydration_hooks) - data = bytearray(packable_buffer.data) - packable_buffer.clear() - return data +@pytest.fixture(params=(str, np.str_)) +def str_type(request): + return request.param - return _pack - @pytest.fixture - def assert_packable(self, packer_with_buffer, unpacker_with_buffer): - def _assert(value, packed_value): - nonlocal packer_with_buffer, unpacker_with_buffer - packer, packable_buffer = packer_with_buffer - unpacker, unpackable_buffer = unpacker_with_buffer - packable_buffer.clear() - unpackable_buffer.reset() +@pytest.fixture(params=(list, tuple, np.array, + pd.Series, pd.array, pd.arrays.SparseArray)) +def sequence_type(request): + if request.param is pd.Series: + def constructor(value): + if not value: + return pd.Series(dtype=object) + return pd.Series(value) - packer.pack(value) - packed_data = packable_buffer.data - assert packed_data == packed_value + return constructor + return request.param - unpackable_buffer.data = bytearray(packed_data) - unpackable_buffer.used = len(packed_data) - unpacked_data = unpacker.unpack() - assert unpacked_data == value - return _assert +class TestPackStream: + @pytest.mark.parametrize("value", (None, pd.NA)) + def test_none(self, value, assert_packable): + assert_packable(value, b"\xC0", None) - def test_none(self, assert_packable): - assert_packable(None, b"\xC0") + def test_boolean(self, bool_type, assert_packable): + assert_packable(bool_type(True), b"\xC3") + assert_packable(bool_type(False), b"\xC2") - def test_boolean(self, assert_packable): - assert_packable(True, b"\xC3") - assert_packable(False, b"\xC2") + @pytest.mark.parametrize("dtype", (bool, pd.BooleanDtype())) + def test_boolean_pandas_series(self, dtype, assert_packable): + value = [True, False] + value_series = pd.Series(value, dtype=dtype) + assert_packable(value_series, b"\x92\xC3\xC2", value) - def test_negative_tiny_int(self, assert_packable): + def test_negative_tiny_int(self, int_type, assert_packable): + for z in range(-16, 0): + z_typed = int_type(z) + if z != int(z_typed): + continue # not representable + assert_packable(z_typed, bytes(bytearray([z + 0x100]))) + + @pytest.mark.parametrize("dtype", ( + int, pd.Int8Dtype(), pd.Int16Dtype(), pd.Int32Dtype(), pd.Int64Dtype(), + np.int8, np.int16, np.int32, np.int64, np.longlong, + )) + def test_negative_tiny_int_pandas_series(self, dtype, assert_packable): for z in range(-16, 0): - assert_packable(z, bytes(bytearray([z + 0x100]))) + z_typed = pd.Series(z, dtype=dtype) + assert_packable(z_typed, bytes(bytearray([0x91, z + 0x100])), [z]) - def test_positive_tiny_int(self, assert_packable): + def test_positive_tiny_int(self, int_type, assert_packable): for z in range(0, 128): - assert_packable(z, bytes(bytearray([z]))) + z_typed = int_type(z) + if z != int(z_typed): + continue # not representable + assert_packable(z_typed, bytes(bytearray([z]))) - def test_negative_int8(self, assert_packable): + def test_negative_int8(self, int_type, assert_packable): for z in range(-128, -16): - assert_packable(z, bytes(bytearray([0xC8, z + 0x100]))) + z_typed = int_type(z) + if z != int(z_typed): + continue # not representable + assert_packable(z_typed, bytes(bytearray([0xC8, z + 0x100]))) - def test_positive_int16(self, assert_packable): + def test_positive_int16(self, int_type, assert_packable): for z in range(128, 32768): + z_typed = int_type(z) + if z != int(z_typed): + continue # not representable expected = b"\xC9" + struct.pack(">h", z) - assert_packable(z, expected) + assert_packable(z_typed, expected) - def test_negative_int16(self, assert_packable): + def test_negative_int16(self, int_type, assert_packable): for z in range(-32768, -128): + z_typed = int_type(z) + if z != int(z_typed): + continue # not representable expected = b"\xC9" + struct.pack(">h", z) - assert_packable(z, expected) + assert_packable(z_typed, expected) - def test_positive_int32(self, assert_packable): + def test_positive_int32(self, int_type, assert_packable): for e in range(15, 31): z = 2 ** e + z_typed = int_type(z) + if z != int(z_typed): + continue # not representable expected = b"\xCA" + struct.pack(">i", z) - assert_packable(z, expected) + assert_packable(z_typed, expected) - def test_negative_int32(self, assert_packable): + def test_negative_int32(self, int_type, assert_packable): for e in range(15, 31): z = -(2 ** e + 1) + z_typed = int_type(z) + if z != int(z_typed): + continue # not representable expected = b"\xCA" + struct.pack(">i", z) - assert_packable(z, expected) + assert_packable(z_typed, expected) - def test_positive_int64(self, assert_packable): + def test_positive_int64(self, int_type, assert_packable): for e in range(31, 63): z = 2 ** e + z_typed = int_type(z) + if z != int(z_typed): + continue # not representable expected = b"\xCB" + struct.pack(">q", z) - assert_packable(z, expected) + assert_packable(z_typed, expected) + + @pytest.mark.parametrize("dtype", ( + int, pd.Int64Dtype(), pd.UInt64Dtype(), + np.int64, np.longlong, np.uint64, np.ulonglong, + )) + def test_positive_int64_pandas_series(self, dtype, assert_packable): + for e in range(31, 63): + z = 2 ** e + z_typed = pd.Series(z, dtype=dtype) + expected = b"\x91\xCB" + struct.pack(">q", z) + assert_packable(z_typed, expected, [z]) - def test_negative_int64(self, assert_packable): + def test_negative_int64(self, int_type, assert_packable): for e in range(31, 63): z = -(2 ** e + 1) + z_typed = int_type(z) + if z != int(z_typed): + continue # not representable expected = b"\xCB" + struct.pack(">q", z) - assert_packable(z, expected) + assert_packable(z_typed, expected) - def test_integer_positive_overflow(self, pack, assert_packable): - with pytest.raises(OverflowError): - pack(2 ** 63 + 1) + @pytest.mark.parametrize("dtype", ( + int, pd.Int64Dtype(), np.int64, np.longlong, + )) + def test_negative_int64_pandas_series(self, dtype, assert_packable): + for e in range(31, 63): + z = -(2 ** e + 1) + z_typed = pd.Series(z, dtype=dtype) + expected = b"\x91\xCB" + struct.pack(">q", z) + assert_packable(z_typed, expected, [z]) - def test_integer_negative_overflow(self, pack, assert_packable): + def test_integer_positive_overflow(self, int_type, pack, assert_packable): with pytest.raises(OverflowError): - pack(-(2 ** 63) - 1) - - def test_zero_float64(self, assert_packable): - zero = 0.0 - expected = b"\xC1" + struct.pack(">d", zero) - assert_packable(zero, expected) - - def test_tau_float64(self, assert_packable): - tau = 2 * pi - expected = b"\xC1" + struct.pack(">d", tau) - assert_packable(tau, expected) - - def test_positive_float64(self, assert_packable): - for e in range(0, 100): - r = float(2 ** e) + 0.5 - expected = b"\xC1" + struct.pack(">d", r) - assert_packable(r, expected) - - def test_negative_float64(self, assert_packable): - for e in range(0, 100): - r = -(float(2 ** e) + 0.5) - expected = b"\xC1" + struct.pack(">d", r) - assert_packable(r, expected) + z = 2 ** 63 + 1 + z_typed = int_type(z) + if z != int(z_typed): + pytest.skip("not representable") + pack(z_typed) - def test_empty_bytes(self, assert_packable): - assert_packable(b"", b"\xCC\x00") - - def test_empty_bytearray(self, assert_packable): - assert_packable(bytearray(), b"\xCC\x00") - - def test_bytes_8(self, assert_packable): - assert_packable(bytearray(b"hello"), b"\xCC\x05hello") - - def test_bytes_16(self, assert_packable): + def test_integer_negative_overflow(self, int_type, pack, assert_packable): + with pytest.raises(OverflowError): + z = -(2 ** 63) - 1 + z_typed = int_type(z) + if z != int(z_typed): + pytest.skip("not representable") + pack(z_typed) + + def test_float(self, float_type, assert_packable): + for z in ( + 0.0, -0.0, pi, 2 * pi, float("inf"), float("-inf"), float("nan"), + *(float(2 ** e) + 0.5 for e in range(100)), + *(-float(2 ** e) + 0.5 for e in range(100)), + ): + print(z) + try: + z_typed = float_type(z) + except FloatingPointError: + continue # not representable + expected = b"\xC1" + struct.pack(">d", float(z_typed)) + assert_packable(z_typed, expected) + + @pytest.mark.parametrize("dtype", ( + float, pd.Float32Dtype(), pd.Float64Dtype(), + np.float16, np.float32, np.float64, np.longdouble, + )) + def test_float_pandas_series(self, dtype, np_float_overflow_as_error, + assert_packable): + for z in ( + 0.0, -0.0, pi, 2 * pi, float("inf"), float("-inf"), float("nan"), + *(float(2 ** e) + 0.5 for e in range(100)), + *(-float(2 ** e) + 0.5 for e in range(100)), + ): + try: + z_typed = pd.Series(z, dtype=dtype) + except FloatingPointError: + continue # not representable + if z_typed[0] is pd.NA: + expected_bytes = b"\x91\xC0" # encoded as NULL + expected_value = [None] + else: + expected_bytes = (b"\x91\xC1" + + struct.pack(">d", float(z_typed[0]))) + expected_value = [float(z_typed[0])] + assert_packable(z_typed, expected_bytes, expected_value) + + def test_empty_bytes(self, bytes_type, assert_packable): + b = bytes_type(b"") + assert_packable(b, b"\xCC\x00") + + def test_bytes_8(self, bytes_type, assert_packable): + b = bytes_type(b"hello") + assert_packable(b, b"\xCC\x05hello") + + def test_bytes_16(self, bytes_type, assert_packable): b = bytearray(40000) - assert_packable(b, b"\xCD\x9C\x40" + b) + b_typed = bytes_type(b) + assert_packable(b_typed, b"\xCD\x9C\x40" + b) - def test_bytes_32(self, assert_packable): + def test_bytes_32(self, bytes_type, assert_packable): b = bytearray(80000) - assert_packable(b, b"\xCE\x00\x01\x38\x80" + b) - - def test_bytearray_size_overflow(self, assert_packable): + b_typed = bytes_type(b) + assert_packable(b_typed, b"\xCE\x00\x01\x38\x80" + b) + + def test_bytes_pandas_series(self, assert_packable): + for b, header in ( + (b"", b"\xCC\x00"), + (b"hello", b"\xCC\x05"), + (bytearray(40000), b"\xCD\x9C\x40"), + (bytearray(80000), b"\xCE\x00\x01\x38\x80"), + ): + b_typed = pd.Series([b]) + assert_packable(b_typed, b"\x91" + header + b, [b]) + + def test_bytearray_size_overflow(self, bytes_type, assert_packable): stream_out = BytesIO() packer = Packer(stream_out) with pytest.raises(OverflowError): - packer.pack_bytes_header(2 ** 32) + packer._pack_bytes_header(2 ** 32) - def test_empty_string(self, assert_packable): - assert_packable(u"", b"\x80") + def test_empty_string(self, str_type, assert_packable): + assert_packable(str_type(""), b"\x80") - def test_tiny_strings(self, assert_packable): + def test_tiny_strings(self, str_type, assert_packable): for size in range(0x10): - assert_packable(u"A" * size, bytes(bytearray([0x80 + size]) + (b"A" * size))) + s = str_type("A" * size) + assert_packable(s, bytes(bytearray([0x80 + size]) + (b"A" * size))) - def test_string_8(self, assert_packable): - t = u"A" * 40 + def test_string_8(self, str_type, assert_packable): + t = "A" * 40 b = t.encode("utf-8") - assert_packable(t, b"\xD0\x28" + b) + t_typed = str_type(t) + assert_packable(t_typed, b"\xD0\x28" + b) - def test_string_16(self, assert_packable): - t = u"A" * 40000 + def test_string_16(self, str_type, assert_packable): + t = "A" * 40000 b = t.encode("utf-8") - assert_packable(t, b"\xD1\x9C\x40" + b) + t_typed = str_type(t) + assert_packable(t_typed, b"\xD1\x9C\x40" + b) - def test_string_32(self, assert_packable): - t = u"A" * 80000 + def test_string_32(self, str_type, assert_packable): + t = "A" * 80000 b = t.encode("utf-8") - assert_packable(t, b"\xD2\x00\x01\x38\x80" + b) + t_typed = str_type(t) + assert_packable(t_typed, b"\xD2\x00\x01\x38\x80" + b) - def test_unicode_string(self, assert_packable): - t = u"héllö" + def test_unicode_string(self, str_type, assert_packable): + t = "héllö" b = t.encode("utf-8") - assert_packable(t, bytes(bytearray([0x80 + len(b)])) + b) + t_typed = str_type(t) + assert_packable(t_typed, bytes(bytearray([0x80 + len(b)])) + b) + + @pytest.mark.parametrize("dtype", ( + str, np.str_, pd.StringDtype("python"), pd.StringDtype("pyarrow"), + )) + def test_string_pandas_series(self, dtype, assert_packable): + values = ( + ("", b"\x80"), + ("A" * 40, b"\xD0\x28"), + ("A" * 40000, b"\xD1\x9C\x40"), + ("A" * 80000, b"\xD2\x00\x01\x38\x80"), + ) + for t, header in values: + t_typed = pd.Series([t], dtype=dtype) + assert_packable(t_typed, b"\x91" + header + t.encode("utf-8"), [t]) + + t_typed = pd.Series([t for t, _ in values], dtype=dtype) + expected = ( + bytes([0x90 + len(values)]) + + b"".join(header + t.encode("utf-8") for t, header in values) + ) + assert_packable(t_typed, expected, [t for t, _ in values]) def test_string_size_overflow(self): stream_out = BytesIO() packer = Packer(stream_out) with pytest.raises(OverflowError): - packer.pack_string_header(2 ** 32) + packer._pack_string_header(2 ** 32) - def test_empty_list(self, assert_packable): - assert_packable([], b"\x90") + def test_empty_list(self, sequence_type, assert_packable): + l = [] + l_typed = sequence_type(l) + assert_packable(l_typed, b"\x90", l) - def test_tiny_lists(self, assert_packable): + def test_tiny_lists(self, sequence_type, assert_packable): for size in range(0x10): + l = [1] * size + l_typed = sequence_type(l) data_out = bytearray([0x90 + size]) + bytearray([1] * size) - assert_packable([1] * size, bytes(data_out)) + assert_packable(l_typed, bytes(data_out), l) - def test_list_8(self, assert_packable): + def test_list_8(self, sequence_type, assert_packable): l = [1] * 40 - assert_packable(l, b"\xD4\x28" + (b"\x01" * 40)) + l_typed = sequence_type(l) + assert_packable(l_typed, b"\xD4\x28" + (b"\x01" * 40), l) - def test_list_16(self, assert_packable): + def test_list_16(self, sequence_type, assert_packable): l = [1] * 40000 - assert_packable(l, b"\xD5\x9C\x40" + (b"\x01" * 40000)) + l_typed = sequence_type(l) + assert_packable(l_typed, b"\xD5\x9C\x40" + (b"\x01" * 40000), l) - def test_list_32(self, assert_packable): + def test_list_32(self, sequence_type, assert_packable): l = [1] * 80000 - assert_packable(l, b"\xD6\x00\x01\x38\x80" + (b"\x01" * 80000)) - - def test_nested_lists(self, assert_packable): - assert_packable([[[]]], b"\x91\x91\x90") + l_typed = sequence_type(l) + assert_packable(l_typed, b"\xD6\x00\x01\x38\x80" + (b"\x01" * 80000), l) + + def test_nested_lists(self, sequence_type, assert_packable): + l = [[[]]] + l_typed = sequence_type([sequence_type([sequence_type([])])]) + assert_packable(l_typed, b"\x91\x91\x90", l) + + @pytest.mark.parametrize("as_series", (True, False)) + def test_list_pandas_categorical(self, as_series, pack, assert_packable): + l = ["cat", "dog", "cat", "cat", "dog", "horse"] + l_typed = pd.Categorical(l) + if as_series: + l_typed = pd.Series(l_typed) + b = b"".join([ + b"\x96", + *(pack(e) for e in l) + ]) + assert_packable(l_typed, b, l) def test_list_size_overflow(self): stream_out = BytesIO() packer = Packer(stream_out) with pytest.raises(OverflowError): - packer.pack_list_header(2 ** 32) + packer._pack_list_header(2 ** 32) def test_empty_map(self, assert_packable): assert_packable({}, b"\xA0") @@ -285,14 +516,30 @@ def test_map_32(self, pack, assert_packable): b = b"".join(pack(u"A%s" % i, 1) for i in range(80000)) assert_packable(d, b"\xDA\x00\x01\x38\x80" + b) + def test_empty_dataframe_maps(self, assert_packable): + df = pd.DataFrame() + assert_packable(df, b"\xA0", {}) + + @pytest.mark.parametrize("size", range(0x10)) + def test_tiny_dataframes_maps(self, assert_packable, size): + data_in = dict() + data_out = bytearray([0xA0 + size]) + for el in range(1, size + 1): + data_in[chr(64 + el)] = [el] + data_out += bytearray([0x81, 64 + el, 0x91, el]) + data_in_typed = pd.DataFrame(data_in) + assert_packable(data_in_typed, bytes(data_out), data_in) + def test_map_size_overflow(self): stream_out = BytesIO() packer = Packer(stream_out) with pytest.raises(OverflowError): - packer.pack_map_header(2 ** 32) + packer._pack_map_header(2 ** 32) @pytest.mark.parametrize(("map_", "exc_type"), ( ({1: "1"}, TypeError), + (pd.DataFrame({1: ["1"]}), TypeError), + (pd.DataFrame({(1, 2): ["1"]}), TypeError), ({"x": {1: 'eins', 2: 'zwei', 3: 'drei'}}, TypeError), ({"x": {(1, 2): '1+2i', (2, 0): '2'}}, TypeError), )) @@ -300,7 +547,7 @@ def test_map_key_type(self, packer_with_buffer, map_, exc_type): # maps must have string keys packer, packable_buffer = packer_with_buffer with pytest.raises(exc_type, match="strings"): - packer.pack(map_) + packer._pack(map_) def test_illegal_signature(self, assert_packable): with pytest.raises(ValueError):