diff --git a/docs/source/_images/core_type_mappings.png b/docs/source/_images/core_type_mappings.png
index 180e1fb26..e69de29bb 100644
Binary files a/docs/source/_images/core_type_mappings.png and b/docs/source/_images/core_type_mappings.png differ
diff --git a/docs/source/_images/core_type_mappings.svg b/docs/source/_images/core_type_mappings.svg
index 84e06d927..5b29bba3b 100644
--- a/docs/source/_images/core_type_mappings.svg
+++ b/docs/source/_images/core_type_mappings.svg
@@ -1 +1,3484 @@
-
\ No newline at end of file
+
+
diff --git a/docs/source/api.rst b/docs/source/api.rst
index e522425fe..478f54c81 100644
--- a/docs/source/api.rst
+++ b/docs/source/api.rst
@@ -1091,7 +1091,7 @@ The core types with their general mappings are listed below:
+------------------------+---------------------------------------------------------------------------------------------------------------------------+
| String | :class:`str` |
+------------------------+---------------------------------------------------------------------------------------------------------------------------+
-| Bytes :sup:`[1]` | :class:`bytearray` |
+| Bytes :sup:`[1]` | :class:`bytes` |
+------------------------+---------------------------------------------------------------------------------------------------------------------------+
| List | :class:`list` |
+------------------------+---------------------------------------------------------------------------------------------------------------------------+
@@ -1110,6 +1110,57 @@ The diagram below illustrates the actual mappings between the various layers, fr
:target: ./_images/core_type_mappings.svg
+Extended Data Types
+===================
+
+The driver supports serializing more types (as parameters in).
+However, they will have to be mapped to the existing Bolt types (see above) when they are sent to the server.
+This means, the driver will never return these types in results.
+
+When in doubt, you can test the type conversion like so::
+
+ import neo4j
+
+
+ with neo4j.GraphDatabase.driver(URI, auth=AUTH) as driver:
+ with driver.session() as session:
+ type_in = ("foo", "bar")
+ result = session.run("RETURN $x", x=type_in)
+ type_out = result.single()[0]
+ print(type(type_out))
+ print(type_out)
+
+Which in this case would yield::
+
+
+ ['foo', 'bar']
+
+
++-----------------------------------+---------------------------------+---------------------------------------+
+| Parameter Type | Bolt Type | Result Type |
++===================================+=================================+=======================================+
+| :class:`tuple` | List | :class:`list` |
++-----------------------------------+---------------------------------+---------------------------------------+
+| :class:`bytearray` | Bytes | :class:`bytes` |
++-----------------------------------+---------------------------------+---------------------------------------+
+| numpy\ :sup:`[2]` ``ndarray`` | (nested) List | (nested) :class:`list` |
++-----------------------------------+---------------------------------+---------------------------------------+
+| pandas\ :sup:`[3]` ``DataFrame`` | Map[str, List[_]] :sup:`[4]` | :class:`dict` |
++-----------------------------------+---------------------------------+---------------------------------------+
+| pandas ``Series`` | List | :class:`list` |
++-----------------------------------+---------------------------------+---------------------------------------+
+| pandas ``Array`` | List | :class:`list` |
++-----------------------------------+---------------------------------+---------------------------------------+
+
+.. Note::
+
+ 2. ``void`` and ``complexfloating`` typed numpy ``ndarray``\s are not supported.
+ 3. ``Period``, ``Interval``, and ``pyarrow`` pandas types are not supported.
+ 4. A pandas ``DataFrame`` will be serialized as Map with the column names mapping to the column values (as Lists).
+ Just like with ``dict`` objects, the column names need to be :class:`str` objects.
+
+
+
****************
Graph Data Types
****************
diff --git a/pyproject.toml b/pyproject.toml
index f16454683..08387ce8e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -45,10 +45,17 @@ dynamic = ["version", "readme"]
Homepage = "https://github.com/neo4j/neo4j-python-driver"
[project.optional-dependencies]
-pandas = ["pandas>=1.0.0"]
+numpy = ["numpy >= 1.7.0, < 2.0.0"]
+pandas = [
+ "pandas >= 1.1.0, < 2.0.0",
+ "numpy >= 1.7.0, < 2.0.0",
+]
[build-system]
-requires = ["setuptools~=65.6", "tomlkit~=0.11.6"]
+requires = [
+ "setuptools~=65.6",
+ "tomlkit~=0.11.6",
+]
build-backend = "setuptools.build_meta"
# still in beta
diff --git a/requirements-dev.txt b/requirements-dev.txt
index 04528c610..b070002c1 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -18,7 +18,9 @@ tomlkit~=0.11.6
# needed for running tests
coverage[toml]>=5.5
mock>=4.0.3
+numpy>=1.7.0
pandas>=1.0.0
+pyarrow>=1.0.0
pytest>=6.2.5
pytest-asyncio>=0.16.0
pytest-benchmark>=3.4.1
diff --git a/src/neo4j/_codec/hydration/__init__.py b/src/neo4j/_codec/hydration/__init__.py
index 1df9aaaa0..42e44f98a 100644
--- a/src/neo4j/_codec/hydration/__init__.py
+++ b/src/neo4j/_codec/hydration/__init__.py
@@ -17,6 +17,7 @@
from ._common import (
BrokenHydrationObject,
+ DehydrationHooks,
HydrationScope,
)
from ._interface import HydrationHandlerABC
@@ -24,6 +25,7 @@
__all__ = [
"BrokenHydrationObject",
+ "DehydrationHooks",
"HydrationHandlerABC",
"HydrationScope",
]
diff --git a/src/neo4j/_codec/hydration/_common.py b/src/neo4j/_codec/hydration/_common.py
index 2632f2117..f278dc77f 100644
--- a/src/neo4j/_codec/hydration/_common.py
+++ b/src/neo4j/_codec/hydration/_common.py
@@ -16,12 +16,51 @@
# limitations under the License.
+import typing as t
from copy import copy
+from dataclasses import dataclass
from ...graph import Graph
from ..packstream import Structure
+@dataclass
+class DehydrationHooks:
+ exact_types: t.Dict[t.Type, t.Callable[[t.Any], t.Any]]
+ subtypes: t.Dict[t.Type, t.Callable[[t.Any], t.Any]]
+
+ def update(self, exact_types=None, subtypes=None):
+ exact_types = exact_types or {}
+ subtypes = subtypes or {}
+ self.exact_types.update(exact_types)
+ self.subtypes.update(subtypes)
+
+ def extend(self, exact_types=None, subtypes=None):
+ exact_types = exact_types or {}
+ subtypes = subtypes or {}
+ return DehydrationHooks(
+ exact_types={**self.exact_types, **exact_types},
+ subtypes={**self.subtypes, **subtypes},
+ )
+
+ def get_transformer(self, item):
+ type_ = type(item)
+ transformer = self.exact_types.get(type_)
+ if transformer is not None:
+ return transformer
+ transformer = next(
+ (
+ f
+ for super_type, f in self.subtypes.items()
+ if isinstance(item, super_type)
+ ),
+ None,
+ )
+ if transformer is not None:
+ return transformer
+ return None
+
+
class BrokenHydrationObject:
"""
Represents an object from the server, not understood by the driver.
@@ -68,7 +107,7 @@ def __init__(self, hydration_handler, graph_hydrator):
list: self._hydrate_list,
dict: self._hydrate_dict,
}
- self.dehydration_hooks = hydration_handler.dehydration_functions
+ self.dehydration_hooks = hydration_handler.dehydration_hooks
def _hydrate_structure(self, value):
f = self._struct_hydration_functions.get(value.tag)
diff --git a/src/neo4j/_codec/hydration/_interface/__init__.py b/src/neo4j/_codec/hydration/_interface/__init__.py
index 5092d5e0d..093b98597 100644
--- a/src/neo4j/_codec/hydration/_interface/__init__.py
+++ b/src/neo4j/_codec/hydration/_interface/__init__.py
@@ -18,11 +18,14 @@
import abc
+from .._common import DehydrationHooks
+
class HydrationHandlerABC(abc.ABC):
def __init__(self):
self.struct_hydration_functions = {}
- self.dehydration_functions = {}
+ self.dehydration_hooks = DehydrationHooks(exact_types={},
+ subtypes={})
@abc.abstractmethod
def new_hydration_scope(self):
diff --git a/src/neo4j/_codec/hydration/v1/hydration_handler.py b/src/neo4j/_codec/hydration/v1/hydration_handler.py
index 89839503f..b4bb09658 100644
--- a/src/neo4j/_codec/hydration/v1/hydration_handler.py
+++ b/src/neo4j/_codec/hydration/v1/hydration_handler.py
@@ -23,6 +23,10 @@
timedelta,
)
+from ...._optional_deps import (
+ np,
+ pd,
+)
from ....graph import (
Graph,
Node,
@@ -159,8 +163,7 @@ def __init__(self):
b"d": temporal.hydrate_datetime, # no time zone
b"E": temporal.hydrate_duration,
}
- self.dehydration_functions = {
- **self.dehydration_functions,
+ self.dehydration_hooks.update(exact_types={
Point: spatial.dehydrate_point,
CartesianPoint: spatial.dehydrate_point,
WGS84Point: spatial.dehydrate_point,
@@ -172,7 +175,19 @@ def __init__(self):
datetime: temporal.dehydrate_datetime,
Duration: temporal.dehydrate_duration,
timedelta: temporal.dehydrate_timedelta,
- }
+ })
+ if np is not None:
+ self.dehydration_hooks.update(exact_types={
+ np.datetime64: temporal.dehydrate_np_datetime,
+ np.timedelta64: temporal.dehydrate_np_timedelta,
+ })
+ if pd is not None:
+ self.dehydration_hooks.update(exact_types={
+ pd.Timestamp: temporal.dehydrate_pandas_datetime,
+ pd.Timedelta: temporal.dehydrate_pandas_timedelta,
+ type(pd.NaT): lambda _: None,
+ })
+
def patch_utc(self):
from ..v2 import temporal as temporal_v2
@@ -186,10 +201,18 @@ def patch_utc(self):
b"i": temporal_v2.hydrate_datetime,
})
- self.dehydration_functions.update({
+ self.dehydration_hooks.update(exact_types={
DateTime: temporal_v2.dehydrate_datetime,
datetime: temporal_v2.dehydrate_datetime,
})
+ if np is not None:
+ self.dehydration_hooks.update(exact_types={
+ np.datetime64: temporal_v2.dehydrate_np_datetime,
+ })
+ if pd is not None:
+ self.dehydration_hooks.update(exact_types={
+ pd.Timestamp: temporal_v2.dehydrate_pandas_datetime,
+ })
def new_hydration_scope(self):
self._created_scope = True
diff --git a/src/neo4j/_codec/hydration/v1/temporal.py b/src/neo4j/_codec/hydration/v1/temporal.py
index c36eda5d3..d47967551 100644
--- a/src/neo4j/_codec/hydration/v1/temporal.py
+++ b/src/neo4j/_codec/hydration/v1/temporal.py
@@ -22,10 +22,17 @@
timedelta,
)
+from ...._optional_deps import (
+ np,
+ pd,
+)
from ....time import (
Date,
DateTime,
Duration,
+ MAX_YEAR,
+ MIN_YEAR,
+ NANO_SECONDS,
Time,
)
from ...packstream import Structure
@@ -171,6 +178,50 @@ def seconds_and_nanoseconds(dt):
int(tz.utcoffset(value).total_seconds()))
+if np is not None:
+ def dehydrate_np_datetime(value):
+ """ Dehydrator for `numpy.datetime64` values.
+
+ :param value:
+ :type value: numpy.datetime64
+ :returns:
+ """
+ if np.isnat(value):
+ return None
+ year = value.astype("datetime64[Y]").astype(int) + 1970
+ if not 0 < year <= 9999:
+ # while we could encode years outside the range, they would fail
+ # when retrieved from the database.
+ raise ValueError(f"Year out of range ({MIN_YEAR:d}..{MAX_YEAR:d}) "
+ f"found {year}")
+ seconds = value.astype(np.dtype("datetime64[s]")).astype(int)
+ nanoseconds = (value.astype(np.dtype("datetime64[ns]")).astype(int)
+ % NANO_SECONDS)
+ return Structure(b"d", seconds, nanoseconds)
+
+
+if pd is not None:
+ def dehydrate_pandas_datetime(value):
+ """ Dehydrator for `pandas.Timestamp` values.
+
+ :param value:
+ :type value: pandas.Timestamp
+ :returns:
+ """
+ return dehydrate_datetime(
+ DateTime(
+ value.year,
+ value.month,
+ value.day,
+ value.hour,
+ value.minute,
+ value.second,
+ value.microsecond * 1000 + value.nanosecond,
+ value.tzinfo,
+ )
+ )
+
+
def hydrate_duration(months, days, seconds, nanoseconds):
""" Hydrator for `Duration` values.
@@ -205,3 +256,50 @@ def dehydrate_timedelta(value):
seconds = value.seconds
nanoseconds = 1000 * value.microseconds
return Structure(b"E", months, days, seconds, nanoseconds)
+
+
+if np is not None:
+ _NUMPY_DURATION_UNITS = {
+ "Y": "years",
+ "M": "months",
+ "W": "weeks",
+ "D": "days",
+ "h": "hours",
+ "m": "minutes",
+ "s": "seconds",
+ "ms": "milliseconds",
+ "us": "microseconds",
+ "ns": "nanoseconds",
+ }
+
+ def dehydrate_np_timedelta(value):
+ """ Dehydrator for `numpy.timedelta64` values.
+
+ :param value:
+ :type value: numpy.timedelta64
+ :returns:
+ """
+ if np.isnat(value):
+ return None
+ unit, step_size = np.datetime_data(value)
+ numer = int(value.astype(int))
+ # raise RuntimeError((type(numer), type(step_size)))
+ kwarg = _NUMPY_DURATION_UNITS.get(unit)
+ if kwarg is not None:
+ return dehydrate_duration(Duration(**{kwarg: numer * step_size}))
+ return dehydrate_duration(Duration(
+ nanoseconds=value.astype("timedelta64[ns]").astype(int)
+ ))
+
+
+if pd is not None:
+ def dehydrate_pandas_timedelta(value):
+ """ Dehydrator for `pandas.Timedelta` values.
+
+ :param value:
+ :type value: pandas.Timedelta
+ :returns:
+ """
+ return dehydrate_duration(Duration(
+ nanoseconds=value.value
+ ))
diff --git a/src/neo4j/_codec/hydration/v2/hydration_handler.py b/src/neo4j/_codec/hydration/v2/hydration_handler.py
index 167fab991..83348b3b7 100644
--- a/src/neo4j/_codec/hydration/v2/hydration_handler.py
+++ b/src/neo4j/_codec/hydration/v2/hydration_handler.py
@@ -37,8 +37,7 @@ def __init__(self):
b"d": temporal.hydrate_datetime, # no time zone
b"E": temporal.hydrate_duration,
}
- self.dehydration_functions = {
- **self.dehydration_functions,
+ self.dehydration_hooks.update(exact_types={
Point: spatial.dehydrate_point,
CartesianPoint: spatial.dehydrate_point,
WGS84Point: spatial.dehydrate_point,
@@ -50,7 +49,18 @@ def __init__(self):
datetime: temporal.dehydrate_datetime,
Duration: temporal.dehydrate_duration,
timedelta: temporal.dehydrate_timedelta,
- }
+ })
+ if np is not None:
+ self.dehydration_hooks.update(exact_types={
+ np.datetime64: temporal.dehydrate_np_datetime,
+ np.timedelta64: temporal.dehydrate_np_timedelta,
+ })
+ if pd is not None:
+ self.dehydration_hooks.update(exact_types={
+ pd.Timestamp: temporal.dehydrate_pandas_datetime,
+ pd.Timedelta: temporal.dehydrate_pandas_timedelta,
+ type(pd.NaT): lambda _: None,
+ })
def new_hydration_scope(self):
self._created_scope = True
diff --git a/src/neo4j/_codec/hydration/v2/temporal.py b/src/neo4j/_codec/hydration/v2/temporal.py
index bc3644587..d15b37536 100644
--- a/src/neo4j/_codec/hydration/v2/temporal.py
+++ b/src/neo4j/_codec/hydration/v2/temporal.py
@@ -90,3 +90,49 @@ def seconds_and_nanoseconds(dt):
"UTC offsets.")
offset_seconds = offset.days * 86400 + offset.seconds
return Structure(b"I", seconds, nanoseconds, offset_seconds)
+
+
+if pd is not None:
+ def dehydrate_pandas_datetime(value):
+ """ Dehydrator for `pandas.Timestamp` values.
+
+ :param value:
+ :type value: pandas.Timestamp
+ :returns:
+ """
+ seconds, nanoseconds = divmod(value.value, NANO_SECONDS)
+
+ import pytz
+
+ tz = value.tzinfo
+ if tz is None:
+ # without time zone
+ return Structure(b"d", seconds, nanoseconds)
+ elif hasattr(tz, "zone") and tz.zone and isinstance(tz.zone, str):
+ # with named pytz time zone
+ return Structure(b"i", seconds, nanoseconds, tz.zone)
+ elif hasattr(tz, "key") and tz.key and isinstance(tz.key, str):
+ # with named zoneinfo (Python 3.9+) time zone
+ return Structure(b"i", seconds, nanoseconds, tz.key)
+ else:
+ # with time offset
+ offset = tz.utcoffset(value)
+ if offset.microseconds:
+ raise ValueError("Bolt protocol does not support sub-second "
+ "UTC offsets.")
+ offset_seconds = offset.days * 86400 + offset.seconds
+ return Structure(b"I", seconds, nanoseconds, offset_seconds)
+
+ # simpler but slower alternative
+ # return dehydrate_datetime(
+ # DateTime(
+ # value.year,
+ # value.month,
+ # value.day,
+ # value.hour,
+ # value.minute,
+ # value.second,
+ # value.microsecond * 1000 + value.nanosecond,
+ # value.tzinfo,
+ # )
+ # )
diff --git a/src/neo4j/_codec/packstream/v1/__init__.py b/src/neo4j/_codec/packstream/v1/__init__.py
index d2f9caf4d..360cd25b4 100644
--- a/src/neo4j/_codec/packstream/v1/__init__.py
+++ b/src/neo4j/_codec/packstream/v1/__init__.py
@@ -16,6 +16,7 @@
# limitations under the License.
+import typing as t
from codecs import decode
from contextlib import contextmanager
from struct import (
@@ -23,9 +24,40 @@
unpack as struct_unpack,
)
+from ...._optional_deps import (
+ np,
+ pd,
+)
+from ...hydration import DehydrationHooks
from .._common import Structure
+NONE_VALUES: t.Tuple = (None,)
+TRUE_VALUES: t.Tuple = (True,)
+FALSE_VALUES: t.Tuple = (False,)
+INT_TYPES: t.Tuple[t.Type, ...] = (int,)
+FLOAT_TYPES: t.Tuple[t.Type, ...] = (float,)
+# we can't put tuple here because spatial types subclass tuple,
+# and we don't want to treat them as sequences
+SEQUENCE_TYPES: t.Tuple[t.Type, ...] = (list,)
+MAPPING_TYPES: t.Tuple[t.Type, ...] = (dict,)
+BYTES_TYPES: t.Tuple[t.Type, ...] = (bytes, bytearray)
+
+
+if np is not None:
+ TRUE_VALUES = (*TRUE_VALUES, np.bool_(True))
+ FALSE_VALUES = (*FALSE_VALUES, np.bool_(False))
+ INT_TYPES = (*INT_TYPES, np.integer)
+ FLOAT_TYPES = (*FLOAT_TYPES, np.floating)
+ SEQUENCE_TYPES = (*SEQUENCE_TYPES, np.ndarray)
+
+if pd is not None:
+ NONE_VALUES = (*NONE_VALUES, pd.NA)
+ SEQUENCE_TYPES = (*SEQUENCE_TYPES, pd.Series, pd.Categorical,
+ pd.core.arrays.ExtensionArray)
+ MAPPING_TYPES = (*MAPPING_TYPES, pd.DataFrame)
+
+
PACKED_UINT_8 = [struct_pack(">B", value) for value in range(0x100)]
PACKED_UINT_16 = [struct_pack(">H", value) for value in range(0x10000)]
@@ -42,29 +74,47 @@ def __init__(self, stream):
self.stream = stream
self._write = self.stream.write
- def pack_raw(self, data):
+ def _pack_raw(self, data):
self._write(data)
- def pack(self, value, dehydration_hooks=None):
+ def pack(self, data, dehydration_hooks=None):
+ self._pack(data,
+ dehydration_hooks=self._inject_hooks(dehydration_hooks))
+
+ @classmethod
+ def _inject_hooks(cls, dehydration_hooks=None):
+ if dehydration_hooks is None:
+ return DehydrationHooks(
+ exact_types={tuple: list},
+ subtypes={}
+ )
+ return dehydration_hooks.extend(
+ exact_types={tuple: list},
+ subtypes={}
+ )
+
+
+ def _pack(self, value, dehydration_hooks=None):
write = self._write
# None
- if value is None:
+ if any(value is v for v in NONE_VALUES):
write(b"\xC0") # NULL
# Boolean
- elif value is True:
+ elif any(value is v for v in TRUE_VALUES):
write(b"\xC3")
- elif value is False:
+ elif any(value is v for v in FALSE_VALUES):
write(b"\xC2")
# Float (only double precision is supported)
- elif isinstance(value, float):
+ elif isinstance(value, FLOAT_TYPES):
write(b"\xC1")
write(struct_pack(">d", value))
# Integer
- elif isinstance(value, int):
+ elif isinstance(value, INT_TYPES):
+ value = int(value)
if -0x10 <= value < 0x80:
write(PACKED_UINT_8[value % 0x100])
elif -0x80 <= value < -0x10:
@@ -85,42 +135,46 @@ def pack(self, value, dehydration_hooks=None):
# String
elif isinstance(value, str):
encoded = value.encode("utf-8")
- self.pack_string_header(len(encoded))
- self.pack_raw(encoded)
+ self._pack_string_header(len(encoded))
+ self._pack_raw(encoded)
# Bytes
- elif isinstance(value, (bytes, bytearray)):
- self.pack_bytes_header(len(value))
- self.pack_raw(value)
+ elif isinstance(value, BYTES_TYPES):
+ self._pack_bytes_header(len(value))
+ self._pack_raw(value)
# List
- elif isinstance(value, list):
- self.pack_list_header(len(value))
+ elif isinstance(value, SEQUENCE_TYPES):
+ self._pack_list_header(len(value))
for item in value:
- self.pack(item, dehydration_hooks=dehydration_hooks)
+ self._pack(item, dehydration_hooks)
# Map
- elif isinstance(value, dict):
- self.pack_map_header(len(value))
+ elif isinstance(value, MAPPING_TYPES):
+ self._pack_map_header(len(value.keys()))
for key, item in value.items():
if not isinstance(key, str):
raise TypeError(
"Map keys must be strings, not {}".format(type(key))
)
- self.pack(key, dehydration_hooks=dehydration_hooks)
- self.pack(item, dehydration_hooks=dehydration_hooks)
+ self._pack(key, dehydration_hooks)
+ self._pack(item, dehydration_hooks)
# Structure
elif isinstance(value, Structure):
self.pack_struct(value.tag, value.fields)
- # Other
- elif dehydration_hooks and type(value) in dehydration_hooks:
- self.pack(dehydration_hooks[type(value)](value))
+ # Other if in dehydration hooks
else:
+ if dehydration_hooks:
+ transformer = dehydration_hooks.get_transformer(value)
+ if transformer is not None:
+ self._pack(transformer(value), dehydration_hooks)
+ return
+
raise ValueError("Values of type %s are not supported" % type(value))
- def pack_bytes_header(self, size):
+ def _pack_bytes_header(self, size):
write = self._write
if size < 0x100:
write(b"\xCC")
@@ -134,7 +188,7 @@ def pack_bytes_header(self, size):
else:
raise OverflowError("Bytes header size out of range")
- def pack_string_header(self, size):
+ def _pack_string_header(self, size):
write = self._write
if size <= 0x0F:
write(bytes((0x80 | size,)))
@@ -150,7 +204,7 @@ def pack_string_header(self, size):
else:
raise OverflowError("String header size out of range")
- def pack_list_header(self, size):
+ def _pack_list_header(self, size):
write = self._write
if size <= 0x0F:
write(bytes((0x90 | size,)))
@@ -166,7 +220,7 @@ def pack_list_header(self, size):
else:
raise OverflowError("List header size out of range")
- def pack_map_header(self, size):
+ def _pack_map_header(self, size):
write = self._write
if size <= 0x0F:
write(bytes((0xA0 | size,)))
@@ -183,6 +237,12 @@ def pack_map_header(self, size):
raise OverflowError("Map header size out of range")
def pack_struct(self, signature, fields, dehydration_hooks=None):
+ self._pack_struct(
+ signature, fields,
+ dehydration_hooks=self._inject_hooks(dehydration_hooks)
+ )
+
+ def _pack_struct(self, signature, fields, dehydration_hooks=None):
if len(signature) != 1 or not isinstance(signature, bytes):
raise ValueError("Structure signature must be a single byte value")
write = self._write
@@ -193,7 +253,7 @@ def pack_struct(self, signature, fields, dehydration_hooks=None):
raise OverflowError("Structure size out of range")
write(signature)
for field in fields:
- self.pack(field, dehydration_hooks=dehydration_hooks)
+ self._pack(field, dehydration_hooks)
@staticmethod
def new_packable_buffer():
diff --git a/src/neo4j/_optional_deps/__init__.py b/src/neo4j/_optional_deps/__init__.py
new file mode 100644
index 000000000..17aa1b61d
--- /dev/null
+++ b/src/neo4j/_optional_deps/__init__.py
@@ -0,0 +1,22 @@
+import typing as t
+
+
+np: t.Any = None
+
+try:
+ import numpy as np # type: ignore[no-redef]
+except ImportError:
+ pass
+
+pd: t.Any = None
+
+try:
+ import pandas as pd # type: ignore[no-redef]
+except ImportError:
+ pass
+
+
+__all__ = [
+ "np",
+ "pd",
+]
diff --git a/src/neo4j/time/__init__.py b/src/neo4j/time/__init__.py
index 0ddc84289..faf8061c8 100644
--- a/src/neo4j/time/__init__.py
+++ b/src/neo4j/time/__init__.py
@@ -412,7 +412,7 @@ def __new__(
+ d * AVERAGE_SECONDS_IN_DAY
+ s
- (1 if ns < 0 else 0))
- if avg_total_seconds < MIN_INT64 or avg_total_seconds > MAX_INT64:
+ if not MIN_INT64 <= avg_total_seconds <= MAX_INT64:
raise ValueError("Duration value out of range: %r",
tuple.__repr__((mo, d, s, ns)))
return tuple.__new__(cls, (mo, d, s, ns))
diff --git a/tests/unit/common/codec/hydration/v1/test_hydration_handler.py b/tests/unit/common/codec/hydration/v1/test_hydration_handler.py
index 908678c9d..6c00005d1 100644
--- a/tests/unit/common/codec/hydration/v1/test_hydration_handler.py
+++ b/tests/unit/common/codec/hydration/v1/test_hydration_handler.py
@@ -23,9 +23,14 @@
timedelta,
)
+import numpy as np
+import pandas as pd
import pytest
-from neo4j._codec.hydration import HydrationScope
+from neo4j._codec.hydration import (
+ DehydrationHooks,
+ HydrationScope,
+)
from neo4j._codec.hydration.v1 import HydrationHandler
from neo4j._codec.packstream import Structure
from neo4j.graph import Graph
@@ -64,12 +69,15 @@ def test_scope_hydration_keys(self, hydration_scope):
def test_scope_dehydration_keys(self, hydration_scope):
hooks = hydration_scope.dehydration_hooks
- assert isinstance(hooks, dict)
- assert set(hooks.keys()) == {
+ assert isinstance(hooks, DehydrationHooks)
+ assert set(hooks.exact_types.keys()) == {
date, datetime, time, timedelta,
Date, DateTime, Duration, Time,
- CartesianPoint, Point, WGS84Point
+ CartesianPoint, Point, WGS84Point,
+ np.datetime64, np.timedelta64,
+ pd.Timestamp, pd.Timedelta, type(pd.NaT)
}
+ assert not hooks.subtypes
def test_scope_get_graph(self, hydration_scope):
graph = hydration_scope.get_graph()
diff --git a/tests/unit/common/codec/hydration/v1/test_spacial_dehydration.py b/tests/unit/common/codec/hydration/v1/test_spacial_dehydration.py
index 6486cea52..05c190457 100644
--- a/tests/unit/common/codec/hydration/v1/test_spacial_dehydration.py
+++ b/tests/unit/common/codec/hydration/v1/test_spacial_dehydration.py
@@ -34,40 +34,49 @@ class TestSpatialDehydration(HydrationHandlerTestBase):
def hydration_handler(self):
return HydrationHandler()
- def test_cartesian_2d(self, hydration_scope):
+ @pytest.fixture
+ def transformer(self, hydration_scope):
+ def transformer(value):
+ transformer_ = \
+ hydration_scope.dehydration_hooks.get_transformer(value)
+ assert callable(transformer_)
+ return transformer_(value)
+ return transformer
+
+ def test_cartesian_2d(self, transformer):
point = CartesianPoint((1, 3.1))
- struct = hydration_scope.dehydration_hooks[type(point)](point)
+ struct = transformer(point)
assert struct == Structure(b"X", 7203, 1.0, 3.1)
assert all(isinstance(f, float) for f in struct.fields[1:])
- def test_cartesian_3d(self, hydration_scope):
+ def test_cartesian_3d(self, transformer):
point = CartesianPoint((1, -2, 3.1))
- struct = hydration_scope.dehydration_hooks[type(point)](point)
+ struct = transformer(point)
assert struct == Structure(b"Y", 9157, 1.0, -2.0, 3.1)
assert all(isinstance(f, float) for f in struct.fields[1:])
- def test_wgs84_2d(self, hydration_scope):
+ def test_wgs84_2d(self, transformer):
point = WGS84Point((1, 3.1))
- struct = hydration_scope.dehydration_hooks[type(point)](point)
+ struct = transformer(point)
assert struct == Structure(b"X", 4326, 1.0, 3.1)
assert all(isinstance(f, float) for f in struct.fields[1:])
- def test_wgs84_3d(self, hydration_scope):
+ def test_wgs84_3d(self, transformer):
point = WGS84Point((1, -2, 3.1))
- struct = hydration_scope.dehydration_hooks[type(point)](point)
+ struct = transformer(point)
assert struct == Structure(b"Y", 4979, 1.0, -2.0, 3.1)
assert all(isinstance(f, float) for f in struct.fields[1:])
- def test_custom_point_2d(self, hydration_scope):
+ def test_custom_point_2d(self, transformer):
point = Point((1, 3.1))
point.srid = 12345
- struct = hydration_scope.dehydration_hooks[type(point)](point)
+ struct = transformer(point)
assert struct == Structure(b"X", 12345, 1.0, 3.1)
assert all(isinstance(f, float) for f in struct.fields[1:])
- def test_custom_point_3d(self, hydration_scope):
+ def test_custom_point_3d(self, transformer):
point = Point((1, -2, 3.1))
point.srid = 12345
- struct = hydration_scope.dehydration_hooks[type(point)](point)
+ struct = transformer(point)
assert struct == Structure(b"Y", 12345, 1.0, -2.0, 3.1)
assert all(isinstance(f, float) for f in struct.fields[1:])
diff --git a/tests/unit/common/codec/hydration/v1/test_temporal_dehydration.py b/tests/unit/common/codec/hydration/v1/test_temporal_dehydration.py
index 078fc6e7f..c783cefed 100644
--- a/tests/unit/common/codec/hydration/v1/test_temporal_dehydration.py
+++ b/tests/unit/common/codec/hydration/v1/test_temporal_dehydration.py
@@ -18,15 +18,21 @@
import datetime
+import numpy as np
+import pandas as pd
import pytest
import pytz
from neo4j._codec.hydration.v1 import HydrationHandler
from neo4j._codec.packstream import Structure
from neo4j.time import (
+ AVERAGE_SECONDS_IN_DAY,
Date,
DateTime,
Duration,
+ MAX_INT64,
+ MIN_INT64,
+ NANO_SECONDS,
Time,
)
@@ -38,156 +44,220 @@ class TestTimeDehydration(HydrationHandlerTestBase):
def hydration_handler(self):
return HydrationHandler()
- def test_date(self, hydration_scope):
+ @pytest.fixture
+ def transformer(self, hydration_scope):
+ def transformer(value):
+ transformer_ = \
+ hydration_scope.dehydration_hooks.get_transformer(value)
+ assert callable(transformer_)
+ return transformer_(value)
+ return transformer
+
+ @pytest.fixture
+ def assert_transforms(self, transformer):
+ def assert_(value, expected):
+ struct = transformer(value)
+ assert struct == expected
+ return assert_
+
+ def test_date(self, assert_transforms):
date = Date(1991, 8, 24)
- struct = hydration_scope.dehydration_hooks[type(date)](date)
- assert struct == Structure(b"D", 7905)
+ assert_transforms(date, Structure(b"D", 7905))
- def test_native_date(self, hydration_scope):
+ def test_native_date(self, assert_transforms):
date = datetime.date(1991, 8, 24)
- struct = hydration_scope.dehydration_hooks[type(date)](date)
- assert struct == Structure(b"D", 7905)
+ assert_transforms(date, Structure(b"D", 7905))
- def test_time(self, hydration_scope):
+ def test_time(self, assert_transforms):
time = Time(1, 2, 3, 4, pytz.FixedOffset(60))
- struct = hydration_scope.dehydration_hooks[type(time)](time)
- assert struct == Structure(b"T", 3723000000004, 3600)
+ assert_transforms(time, Structure(b"T", 3723000000004, 3600))
- def test_native_time(self, hydration_scope):
+ def test_native_time(self, assert_transforms):
time = datetime.time(1, 2, 3, 4, pytz.FixedOffset(60))
- struct = hydration_scope.dehydration_hooks[type(time)](time)
- assert struct == Structure(b"T", 3723000004000, 3600)
+ assert_transforms(time, Structure(b"T", 3723000004000, 3600))
- def test_local_time(self, hydration_scope):
+ def test_local_time(self, assert_transforms):
time = Time(1, 2, 3, 4)
- struct = hydration_scope.dehydration_hooks[type(time)](time)
- assert struct == Structure(b"t", 3723000000004)
+ assert_transforms(time, Structure(b"t", 3723000000004))
- def test_local_native_time(self, hydration_scope):
+ def test_local_native_time(self, assert_transforms):
time = datetime.time(1, 2, 3, 4)
- struct = hydration_scope.dehydration_hooks[type(time)](time)
- assert struct == Structure(b"t", 3723000004000)
+ assert_transforms(time, Structure(b"t", 3723000004000))
+
+ def test_local_date_time(self, assert_transforms):
+ dt = DateTime(2018, 10, 12, 11, 37, 41, 474716862)
+ assert_transforms(dt, Structure(b"d", 1539344261, 474716862))
+
+ def test_native_local_date_time(self, assert_transforms):
+ dt = datetime.datetime(2018, 10, 12, 11, 37, 41, 474716)
+ assert_transforms(dt, Structure(b"d", 1539344261, 474716000))
+
+ def test_numpy_local_date_time(self, assert_transforms):
+ dt = np.datetime64("2018-10-12T11:37:41.474716862")
+ assert_transforms(dt, Structure(b"d", 1539344261, 474716862))
+
+ def test_numpy_nat_local_date_time(self, assert_transforms):
+ dt = np.datetime64("NaT")
+ assert_transforms(dt, None)
+
+ @pytest.mark.parametrize(("value", "error"), (
+ (np.datetime64(10000 - 1970, "Y"), ValueError),
+ (np.datetime64("+10000-01-01"), ValueError),
+ (np.datetime64(-1970, "Y"), ValueError),
+ (np.datetime64("0000-12-31"), ValueError),
- def test_date_time(self, hydration_scope):
+ ))
+ def test_numpy_invalid_local_date_time(self, value, error, transformer):
+ with pytest.raises(error):
+ transformer(value)
+
+ def test_pandas_local_date_time(self, assert_transforms):
+ dt = pd.Timestamp("2018-10-12T11:37:41.474716862")
+ assert_transforms(dt, Structure(b"d", 1539344261, 474716862))
+
+ def test_pandas_nat_local_date_time(self, assert_transforms):
+ dt = pd.NaT
+ assert_transforms(dt, None)
+
+ def test_date_time_fixed_offset(self, assert_transforms):
dt = DateTime(2018, 10, 12, 11, 37, 41, 474716862,
pytz.FixedOffset(60))
- struct = hydration_scope.dehydration_hooks[type(dt)](dt)
- assert struct == Structure(b"F", 1539344261, 474716862, 3600)
+ assert_transforms(dt, Structure(b"F", 1539344261, 474716862, 3600))
- def test_native_date_time(self, hydration_scope):
+ def test_native_date_time_fixed_offset(self, assert_transforms):
dt = datetime.datetime(2018, 10, 12, 11, 37, 41, 474716,
pytz.FixedOffset(60))
- struct = hydration_scope.dehydration_hooks[type(dt)](dt)
- assert struct == Structure(b"F", 1539344261, 474716000, 3600)
+ assert_transforms(dt, Structure(b"F", 1539344261, 474716000, 3600))
- def test_date_time_negative_offset(self, hydration_scope):
+ def test_pandas_date_time_fixed_offset(self, assert_transforms):
+ dt = pd.Timestamp("2018-10-12T11:37:41.474716862+0100")
+ assert_transforms(dt, Structure(b"F", 1539344261, 474716862, 3600))
+
+ def test_date_time_fixed_negative_offset(self, assert_transforms):
dt = DateTime(2018, 10, 12, 11, 37, 41, 474716862,
pytz.FixedOffset(-60))
- struct = hydration_scope.dehydration_hooks[type(dt)](dt)
- assert struct == Structure(b"F", 1539344261, 474716862, -3600)
+ assert_transforms(dt, Structure(b"F", 1539344261, 474716862, -3600))
- def test_native_date_time_negative_offset(self, hydration_scope):
+ def test_native_date_time_fixed_negative_offset(self, assert_transforms):
dt = datetime.datetime(2018, 10, 12, 11, 37, 41, 474716,
pytz.FixedOffset(-60))
- struct = hydration_scope.dehydration_hooks[type(dt)](dt)
- assert struct == Structure(b"F", 1539344261, 474716000, -3600)
+ assert_transforms(dt, Structure(b"F", 1539344261, 474716000, -3600))
+
+ def test_pandas_date_time_fixed_negative_offset(self, assert_transforms):
+ dt = pd.Timestamp("2018-10-12T11:37:41.474716862-0100")
+ assert_transforms(dt, Structure(b"F", 1539344261, 474716862, -3600))
- def test_date_time_zone_id(self, hydration_scope):
+ def test_date_time_zone_id(self, assert_transforms):
dt = DateTime(2018, 10, 12, 11, 37, 41, 474716862,
pytz.timezone("Europe/Stockholm"))
- struct = hydration_scope.dehydration_hooks[type(dt)](dt)
- assert struct == Structure(b"f", 1539344261, 474716862,
- "Europe/Stockholm")
+ assert_transforms(
+ dt,
+ Structure(b"f", 1539344261, 474716862, "Europe/Stockholm")
+ )
- def test_native_date_time_zone_id(self, hydration_scope):
+ def test_native_date_time_zone_id(self, assert_transforms):
dt = datetime.datetime(2018, 10, 12, 11, 37, 41, 474716,
pytz.timezone("Europe/Stockholm"))
- struct = hydration_scope.dehydration_hooks[type(dt)](dt)
- assert struct == Structure(b"f", 1539344261, 474716000,
- "Europe/Stockholm")
-
- def test_local_date_time(self, hydration_scope):
- dt = DateTime(2018, 10, 12, 11, 37, 41, 474716862)
- struct = hydration_scope.dehydration_hooks[type(dt)](dt)
- assert struct == Structure(b"d", 1539344261, 474716862)
+ assert_transforms(
+ dt,
+ Structure(b"f", 1539344261, 474716000, "Europe/Stockholm")
+ )
- def test_native_local_date_time(self, hydration_scope):
- dt = datetime.datetime(2018, 10, 12, 11, 37, 41, 474716)
- struct = hydration_scope.dehydration_hooks[type(dt)](dt)
- assert struct == Structure(b"d", 1539344261, 474716000)
+ def test_pandas_date_time_zone_id(self, assert_transforms):
+ dt = pd.Timestamp("2018-10-12T11:37:41.474716862+0200",
+ tz="Europe/Stockholm")
+ assert_transforms(
+ dt,
+ Structure(b"f", 1539344261, 474716862, "Europe/Stockholm")
+ )
- def test_duration(self, hydration_scope):
+ def test_duration(self, assert_transforms):
duration = Duration(months=1, days=2, seconds=3, nanoseconds=4)
- struct = hydration_scope.dehydration_hooks[type(duration)](duration)
- assert struct == Structure(b"E", 1, 2, 3, 4)
+ assert_transforms(duration, Structure(b"E", 1, 2, 3, 4))
- def test_native_duration(self, hydration_scope):
+ def test_native_duration(self, assert_transforms):
duration = datetime.timedelta(days=1, seconds=2, microseconds=3)
- struct = hydration_scope.dehydration_hooks[type(duration)](duration)
- assert struct == Structure(b"E", 0, 1, 2, 3000)
+ assert_transforms(duration, Structure(b"E", 0, 1, 2, 3000))
- def test_duration_mixed_sign(self, hydration_scope):
+ def test_duration_mixed_sign(self, assert_transforms):
duration = Duration(months=1, days=-2, seconds=3, nanoseconds=4)
- struct = hydration_scope.dehydration_hooks[type(duration)](duration)
- assert struct == Structure(b"E", 1, -2, 3, 4)
+ assert_transforms(duration, Structure(b"E", 1, -2, 3, 4))
- def test_native_duration_mixed_sign(self, hydration_scope):
+ def test_native_duration_mixed_sign(self, assert_transforms):
duration = datetime.timedelta(days=-1, seconds=2, microseconds=3)
- struct = hydration_scope.dehydration_hooks[type(duration)](duration)
- assert struct == Structure(b"E", 0, -1, 2, 3000)
-
-
-class TestUTCPatchedTimeDehydration(TestTimeDehydration):
- @pytest.fixture
- def hydration_handler(self):
- handler = HydrationHandler()
- handler.patch_utc()
- return handler
-
- def test_date_time(self, hydration_scope):
- from ..v2.test_temporal_dehydration import (
- TestTimeDehydration as TestTimeDehydrationV2,
- )
- TestTimeDehydrationV2().test_date_time(
- hydration_scope
- )
-
- def test_native_date_time(self, hydration_scope):
- from ..v2.test_temporal_dehydration import (
- TestTimeDehydration as TestTimeDehydrationV2,
- )
- TestTimeDehydrationV2().test_native_date_time(
- hydration_scope
- )
-
- def test_date_time_negative_offset(self, hydration_scope):
- from ..v2.test_temporal_dehydration import (
- TestTimeDehydration as TestTimeDehydrationV2,
- )
- TestTimeDehydrationV2().test_date_time_negative_offset(
- hydration_scope
- )
-
- def test_native_date_time_negative_offset(self, hydration_scope):
- from ..v2.test_temporal_dehydration import (
- TestTimeDehydration as TestTimeDehydrationV2,
- )
- TestTimeDehydrationV2().test_native_date_time_negative_offset(
- hydration_scope
- )
-
- def test_date_time_zone_id(self, hydration_scope):
- from ..v2.test_temporal_dehydration import (
- TestTimeDehydration as TestTimeDehydrationV2,
- )
- TestTimeDehydrationV2().test_date_time_zone_id(
- hydration_scope
- )
-
- def test_native_date_time_zone_id(self, hydration_scope):
- from ..v2.test_temporal_dehydration import (
- TestTimeDehydration as TestTimeDehydrationV2,
+ assert_transforms(duration, Structure(b"E", 0, -1, 2, 3000))
+
+ @pytest.mark.parametrize(
+ ("value", "expected_fields"),
+ (
+ (np.timedelta64(1, "Y"), (12, 0, 0, 0)),
+ (np.timedelta64(1, "M"), (1, 0, 0, 0)),
+ (np.timedelta64(1, "D"), (0, 1, 0, 0)),
+ (np.timedelta64(1, "h"), (0, 0, 3600, 0)),
+ (np.timedelta64(1, "m"), (0, 0, 60, 0)),
+ (np.timedelta64(1, "s"), (0, 0, 1, 0)),
+ (np.timedelta64(MAX_INT64, "s"), (0, 0, MAX_INT64, 0)),
+ (np.timedelta64(1, "ms"), (0, 0, 0, 1000000)),
+ (np.timedelta64(1, "us"), (0, 0, 0, 1000)),
+ (np.timedelta64(1, "ns"), (0, 0, 0, 1)),
+ (np.timedelta64(NANO_SECONDS, "ns"), (0, 0, 1, 0)),
+ (np.timedelta64(NANO_SECONDS + 1, "ns"), (0, 0, 1, 1)),
+ (np.timedelta64(1000, "ps"), (0, 0, 0, 1)),
+ (np.timedelta64(1, "ps"), (0, 0, 0, 0)),
+ (np.timedelta64(1000000, "fs"), (0, 0, 0, 1)),
+ (np.timedelta64(1, "fs"), (0, 0, 0, 0)),
+ (np.timedelta64(1000000000, "as"), (0, 0, 0, 1)),
+ (np.timedelta64(1, "as"), (0, 0, 0, 0)),
+ (np.timedelta64(-1, "Y"), (-12, 0, 0, 0)),
+ (np.timedelta64(-1, "M"), (-1, 0, 0, 0)),
+ (np.timedelta64(-1, "D"), (0, -1, 0, 0)),
+ (np.timedelta64(-1, "h"), (0, 0, -3600, 0)),
+ (np.timedelta64(-1, "m"), (0, 0, -60, 0)),
+ (np.timedelta64(-1, "s"), (0, 0, -1, 0)),
+ # numpy uses MIN_INT64 to encode NaT
+ (np.timedelta64(MIN_INT64 + 1, "s"), (0, 0, MIN_INT64 + 1, 0)),
+ (np.timedelta64(-1, "ms"), (0, 0, 0, -1000000)),
+ (np.timedelta64(-1, "us"), (0, 0, 0, -1000)),
+ (np.timedelta64(-1, "ns"), (0, 0, 0, -1)),
+ (np.timedelta64(-NANO_SECONDS, "ns"), (0, 0, -1, 0)),
+ (np.timedelta64(-NANO_SECONDS - 1, "ns"), (0, 0, -1, -1)),
+ (np.timedelta64(-1000, "ps"), (0, 0, 0, -1)),
+ (np.timedelta64(-1, "ps"), (0, 0, 0, -1)),
+ (np.timedelta64(-1000000, "fs"), (0, 0, 0, -1)),
+ (np.timedelta64(-1, "fs"), (0, 0, 0, -1)),
+ (np.timedelta64(-1000000000, "as"), (0, 0, 0, -1)),
+ (np.timedelta64(-1, "as"), (0, 0, 0, -1)),
)
- TestTimeDehydrationV2().test_native_date_time_zone_id(
- hydration_scope
+ )
+ def test_numpy_duration(self, value, expected_fields, assert_transforms):
+ assert_transforms(value, Structure(b"E", *expected_fields))
+
+ def test_numpy_nat_duration(self, assert_transforms):
+ duration = np.timedelta64("NaT")
+ assert_transforms(duration, None)
+
+ @pytest.mark.parametrize(("value", "error"), (
+ (np.timedelta64((MAX_INT64 // 60) + 1, "m"), ValueError),
+ (np.timedelta64((MIN_INT64 // 60), "m"), ValueError),
+
+ ))
+ def test_numpy_invalid_durations(self, value, error, transformer):
+ with pytest.raises(error):
+ transformer(value)
+
+ @pytest.mark.parametrize(
+ ("value", "expected_fields"),
+ (
+ (
+ pd.Timedelta(days=1, seconds=2, microseconds=3, nanoseconds=4),
+ (0, 0, AVERAGE_SECONDS_IN_DAY + 2, 3004)
+ ),
+ (
+ pd.Timedelta(days=-1, seconds=2, microseconds=3,
+ nanoseconds=4),
+ (0, 0, -AVERAGE_SECONDS_IN_DAY + 2 + 1, -NANO_SECONDS + 3004)
+ )
)
+ )
+ def test_pandas_duration(self, value, expected_fields, assert_transforms):
+ assert_transforms(value, Structure(b"E", *expected_fields))
diff --git a/tests/unit/common/codec/hydration/v1/test_temporal_dehydration_utc_patch.py b/tests/unit/common/codec/hydration/v1/test_temporal_dehydration_utc_patch.py
new file mode 100644
index 000000000..66ba4b0f7
--- /dev/null
+++ b/tests/unit/common/codec/hydration/v1/test_temporal_dehydration_utc_patch.py
@@ -0,0 +1,61 @@
+# Copyright (c) "Neo4j"
+# Neo4j Sweden AB [https://neo4j.com]
+#
+# This file is part of Neo4j.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import pytest
+
+from ..v2.test_temporal_dehydration import (
+ TestTimeDehydration as _TestTimeDehydrationV2,
+)
+from .test_temporal_dehydration import (
+ HydrationHandler, # testing the same hydration handler
+)
+from .test_temporal_dehydration import (
+ TestTimeDehydration as _TestTimeDehydrationV1,
+)
+
+
+class UTCPatchedTimeDehydrationMeta(type):
+ def __new__(mcs, name, bases, attrs):
+ for test_func in (
+ "test_date_time_fixed_offset",
+ "test_native_date_time_fixed_offset",
+ "test_pandas_date_time_fixed_offset",
+ "test_date_time_fixed_negative_offset",
+ "test_native_date_time_fixed_negative_offset",
+ "test_pandas_date_time_fixed_negative_offset",
+ "test_date_time_zone_id",
+ "test_native_date_time_zone_id",
+ "test_pandas_date_time_zone_id",
+ ):
+ if not hasattr(_TestTimeDehydrationV2, test_func):
+ continue
+ attrs[test_func] = getattr(_TestTimeDehydrationV2, test_func)
+
+ return super(UTCPatchedTimeDehydrationMeta, mcs).__new__(
+ mcs, name, bases, attrs
+ )
+
+
+class TestUTCPatchedTimeDehydration(
+ _TestTimeDehydrationV1, metaclass=UTCPatchedTimeDehydrationMeta
+):
+ @pytest.fixture
+ def hydration_handler(self):
+ handler = HydrationHandler()
+ handler.patch_utc()
+ return handler
diff --git a/tests/unit/common/codec/hydration/v2/test_temporal_dehydration.py b/tests/unit/common/codec/hydration/v2/test_temporal_dehydration.py
index 97074e3c7..7cad75d57 100644
--- a/tests/unit/common/codec/hydration/v2/test_temporal_dehydration.py
+++ b/tests/unit/common/codec/hydration/v2/test_temporal_dehydration.py
@@ -18,6 +18,7 @@
import datetime
+import pandas as pd
import pytest
import pytz
@@ -35,42 +36,82 @@ class TestTimeDehydration(_TestTemporalDehydrationV1):
def hydration_handler(self):
return HydrationHandler()
- def test_date_time(self, hydration_scope):
+ def test_date_time_fixed_offset(self, assert_transforms):
dt = DateTime(2018, 10, 12, 11, 37, 41, 474716862,
pytz.FixedOffset(60))
- struct = hydration_scope.dehydration_hooks[type(dt)](dt)
- assert struct == Structure(b"I", 1539340661, 474716862, 3600)
+ assert_transforms(
+ dt,
+ Structure(b"I", 1539340661, 474716862, 3600)
+ )
- def test_native_date_time(self, hydration_scope):
+ def test_native_date_time_fixed_offset(self, assert_transforms):
dt = datetime.datetime(2018, 10, 12, 11, 37, 41, 474716,
pytz.FixedOffset(60))
- struct = hydration_scope.dehydration_hooks[type(dt)](dt)
- assert struct == Structure(b"I", 1539340661, 474716000, 3600)
+ assert_transforms(
+ dt,
+ Structure(b"I", 1539340661, 474716000, 3600)
+ )
- def test_date_time_negative_offset(self, hydration_scope):
+ def test_pandas_date_time_fixed_offset(self, assert_transforms):
+ dt = pd.Timestamp("2018-10-12T11:37:41.474716862+0100")
+ assert_transforms(dt, Structure(b"I", 1539340661, 474716862, 3600))
+
+ def test_date_time_fixed_negative_offset(self, assert_transforms):
dt = DateTime(2018, 10, 12, 11, 37, 41, 474716862,
pytz.FixedOffset(-60))
- struct = hydration_scope.dehydration_hooks[type(dt)](dt)
- assert struct == Structure(b"I", 1539347861, 474716862, -3600)
+ assert_transforms(
+ dt,
+ Structure(b"I", 1539347861, 474716862, -3600)
+ )
- def test_native_date_time_negative_offset(self, hydration_scope):
+ def test_native_date_time_fixed_negative_offset(self, assert_transforms):
dt = datetime.datetime(2018, 10, 12, 11, 37, 41, 474716,
pytz.FixedOffset(-60))
- struct = hydration_scope.dehydration_hooks[type(dt)](dt)
- assert struct == Structure(b"I", 1539347861, 474716000, -3600)
+ assert_transforms(
+ dt,
+ Structure(b"I", 1539347861, 474716000, -3600)
+ )
+
+ def test_pandas_date_time_fixed_negative_offset(self, assert_transforms):
+ dt = pd.Timestamp("2018-10-12T11:37:41.474716862-0100")
+ assert_transforms(dt, Structure(b"I", 1539347861, 474716862, -3600))
- def test_date_time_zone_id(self, hydration_scope):
+ def test_date_time_zone_id(self, assert_transforms):
dt = DateTime(2018, 10, 12, 11, 37, 41, 474716862)
dt = pytz.timezone("Europe/Stockholm").localize(dt)
# offset should be UTC+2 (7200 seconds)
- struct = hydration_scope.dehydration_hooks[type(dt)](dt)
- assert struct == Structure(b"i", 1539337061, 474716862,
- "Europe/Stockholm")
+ assert_transforms(
+ dt,
+ Structure(b"i", 1539337061, 474716862, "Europe/Stockholm")
+ )
- def test_native_date_time_zone_id(self, hydration_scope):
+ def test_native_date_time_zone_id(self, assert_transforms):
dt = datetime.datetime(2018, 10, 12, 11, 37, 41, 474716)
dt = pytz.timezone("Europe/Stockholm").localize(dt)
# offset should be UTC+2 (7200 seconds)
- struct = hydration_scope.dehydration_hooks[type(dt)](dt)
- assert struct == Structure(b"i", 1539337061, 474716000,
- "Europe/Stockholm")
+ assert_transforms(
+ dt,
+ Structure(b"i", 1539337061, 474716000, "Europe/Stockholm")
+ )
+
+ @pytest.mark.parametrize(("dt", "fields"), (
+ (
+ pd.Timestamp("2018-10-12T11:37:41.474716862+0200",
+ tz="Europe/Stockholm"),
+ (1539337061, 474716862, "Europe/Stockholm"),
+ ),
+ (
+ # 1972-10-29 02:00:01.001000001+0100 pre DST change
+ pd.Timestamp((1032 * 24 + 2) * 3600 * 1000000000 + 1001000001,
+ tz="Europe/London"),
+ ((1032 * 24 + 2) * 3600 + 1, 1000001, "Europe/London"),
+ ),
+ (
+ # 1972-10-29 02:00:01.001000001+0000 post DST change
+ pd.Timestamp((1032 * 24 + 1) * 3600 * 1000000000 + 1001000001,
+ tz="Europe/London"),
+ ((1032 * 24 + 1) * 3600 + 1, 1000001, "Europe/London"),
+ )
+ ))
+ def test_pandas_date_time_zone_id(self, dt, fields, assert_transforms):
+ assert_transforms(dt, Structure(b"i", *fields))
diff --git a/tests/unit/common/codec/packstream/v1/test_packstream.py b/tests/unit/common/codec/packstream/v1/test_packstream.py
index 14f8fcfb5..7e5bd4937 100644
--- a/tests/unit/common/codec/packstream/v1/test_packstream.py
+++ b/tests/unit/common/codec/packstream/v1/test_packstream.py
@@ -18,9 +18,14 @@
import struct
from io import BytesIO
-from math import pi
+from math import (
+ isnan,
+ pi,
+)
from uuid import uuid4
+import numpy as np
+import pandas as pd
import pytest
from neo4j._codec.packstream import Structure
@@ -36,227 +41,453 @@
not_ascii = "♥O◘♦♥O◘♦"
-class TestPackStream:
- @pytest.fixture
- def packer_with_buffer(self):
- packable_buffer = Packer.new_packable_buffer()
- return Packer(packable_buffer), packable_buffer
+@pytest.fixture
+def packer_with_buffer():
+ packable_buffer = Packer.new_packable_buffer()
+ return Packer(packable_buffer), packable_buffer
- @pytest.fixture
- def unpacker_with_buffer(self):
- unpackable_buffer = Unpacker.new_unpackable_buffer()
- return Unpacker(unpackable_buffer), unpackable_buffer
- def test_packable_buffer(self, packer_with_buffer):
- packer, packable_buffer = packer_with_buffer
- assert isinstance(packable_buffer, PackableBuffer)
- assert packable_buffer is packer.stream
+@pytest.fixture
+def unpacker_with_buffer():
+ unpackable_buffer = Unpacker.new_unpackable_buffer()
+ return Unpacker(unpackable_buffer), unpackable_buffer
- def test_unpackable_buffer(self, unpacker_with_buffer):
- unpacker, unpackable_buffer = unpacker_with_buffer
- assert isinstance(unpackable_buffer, UnpackableBuffer)
- assert unpackable_buffer is unpacker.unpackable
+def test_packable_buffer(packer_with_buffer):
+ packer, packable_buffer = packer_with_buffer
+ assert isinstance(packable_buffer, PackableBuffer)
+ assert packable_buffer is packer.stream
+
+def test_unpackable_buffer(unpacker_with_buffer):
+ unpacker, unpackable_buffer = unpacker_with_buffer
+ assert isinstance(unpackable_buffer, UnpackableBuffer)
+ assert unpackable_buffer is unpacker.unpackable
+
+
+@pytest.fixture
+def pack(packer_with_buffer):
+ packer, packable_buffer = packer_with_buffer
+
+ def _pack(*values, dehydration_hooks=None):
+ for value in values:
+ packer.pack(value, dehydration_hooks=dehydration_hooks)
+ data = bytearray(packable_buffer.data)
+ packable_buffer.clear()
+ return data
- @pytest.fixture
- def pack(self, packer_with_buffer):
+ return _pack
+
+
+_default_out_value = object()
+
+
+@pytest.fixture
+def assert_packable(packer_with_buffer, unpacker_with_buffer):
+ def _recursive_nan_equal(a, b):
+ if isinstance(a, (list, tuple)) and isinstance(b, (list, tuple)):
+ return all(_recursive_nan_equal(x, y) for x, y in zip(a, b))
+ elif isinstance(a, dict) and isinstance(b, dict):
+ return all(_recursive_nan_equal(a[k], b[k]) for k in a)
+ else:
+ return a == b or (isnan(a) and isnan(b))
+
+ def _assert(in_value, packed_value, out_value=_default_out_value):
+ if out_value is _default_out_value:
+ out_value = in_value
+ nonlocal packer_with_buffer, unpacker_with_buffer
packer, packable_buffer = packer_with_buffer
+ unpacker, unpackable_buffer = unpacker_with_buffer
+ packable_buffer.clear()
+ unpackable_buffer.reset()
+
+ packer.pack(in_value)
+ packed_data = packable_buffer.data
+ assert packed_data == packed_value
+
+ unpackable_buffer.data = bytearray(packed_data)
+ unpackable_buffer.used = len(packed_data)
+ unpacked_data = unpacker.unpack()
+ assert _recursive_nan_equal(unpacked_data, out_value)
+
+ return _assert
+
+
+@pytest.fixture(params=(True, False))
+def np_float_overflow_as_error(request):
+ should_raise = request.param
+ if should_raise:
+ old_err = np.seterr(over="raise")
+ else:
+ old_err = np.seterr(over="ignore")
+ yield
+ np.seterr(**old_err)
+
+
+
+@pytest.fixture(params=(
+ int,
+ np.int8, np.int16, np.int32, np.int64, np.longlong,
+ np.uint8, np.uint16, np.uint32, np.uint64, np.ulonglong
+))
+def int_type(request):
+ if issubclass(request.param, np.number):
+ def _int_type(value):
+ # this avoids deprecation warning from NEP50 and forces
+ # c-style wrapping of the value
+ return np.array(value).astype(request.param).item()
+
+ return _int_type
+ else:
+ return request.param
+
+
+@pytest.fixture(params=(float,
+ np.float16, np.float32, np.float64, np.longdouble))
+def float_type(request, np_float_overflow_as_error):
+ return request.param
+
+
+@pytest.fixture(params=(bool, np.bool_))
+def bool_type(request):
+ return request.param
+
+
+@pytest.fixture(params=(bytes, bytearray, np.bytes_))
+def bytes_type(request):
+ return request.param
+
- def _pack(*values, dehydration_hooks=None):
- for value in values:
- packer.pack(value, dehydration_hooks=dehydration_hooks)
- data = bytearray(packable_buffer.data)
- packable_buffer.clear()
- return data
+@pytest.fixture(params=(str, np.str_))
+def str_type(request):
+ return request.param
- return _pack
- @pytest.fixture
- def assert_packable(self, packer_with_buffer, unpacker_with_buffer):
- def _assert(value, packed_value):
- nonlocal packer_with_buffer, unpacker_with_buffer
- packer, packable_buffer = packer_with_buffer
- unpacker, unpackable_buffer = unpacker_with_buffer
- packable_buffer.clear()
- unpackable_buffer.reset()
+@pytest.fixture(params=(list, tuple, np.array,
+ pd.Series, pd.array, pd.arrays.SparseArray))
+def sequence_type(request):
+ if request.param is pd.Series:
+ def constructor(value):
+ if not value:
+ return pd.Series(dtype=object)
+ return pd.Series(value)
- packer.pack(value)
- packed_data = packable_buffer.data
- assert packed_data == packed_value
+ return constructor
+ return request.param
- unpackable_buffer.data = bytearray(packed_data)
- unpackable_buffer.used = len(packed_data)
- unpacked_data = unpacker.unpack()
- assert unpacked_data == value
- return _assert
+class TestPackStream:
+ @pytest.mark.parametrize("value", (None, pd.NA))
+ def test_none(self, value, assert_packable):
+ assert_packable(value, b"\xC0", None)
- def test_none(self, assert_packable):
- assert_packable(None, b"\xC0")
+ def test_boolean(self, bool_type, assert_packable):
+ assert_packable(bool_type(True), b"\xC3")
+ assert_packable(bool_type(False), b"\xC2")
- def test_boolean(self, assert_packable):
- assert_packable(True, b"\xC3")
- assert_packable(False, b"\xC2")
+ @pytest.mark.parametrize("dtype", (bool, pd.BooleanDtype()))
+ def test_boolean_pandas_series(self, dtype, assert_packable):
+ value = [True, False]
+ value_series = pd.Series(value, dtype=dtype)
+ assert_packable(value_series, b"\x92\xC3\xC2", value)
- def test_negative_tiny_int(self, assert_packable):
+ def test_negative_tiny_int(self, int_type, assert_packable):
+ for z in range(-16, 0):
+ z_typed = int_type(z)
+ if z != int(z_typed):
+ continue # not representable
+ assert_packable(z_typed, bytes(bytearray([z + 0x100])))
+
+ @pytest.mark.parametrize("dtype", (
+ int, pd.Int8Dtype(), pd.Int16Dtype(), pd.Int32Dtype(), pd.Int64Dtype(),
+ np.int8, np.int16, np.int32, np.int64, np.longlong,
+ ))
+ def test_negative_tiny_int_pandas_series(self, dtype, assert_packable):
for z in range(-16, 0):
- assert_packable(z, bytes(bytearray([z + 0x100])))
+ z_typed = pd.Series(z, dtype=dtype)
+ assert_packable(z_typed, bytes(bytearray([0x91, z + 0x100])), [z])
- def test_positive_tiny_int(self, assert_packable):
+ def test_positive_tiny_int(self, int_type, assert_packable):
for z in range(0, 128):
- assert_packable(z, bytes(bytearray([z])))
+ z_typed = int_type(z)
+ if z != int(z_typed):
+ continue # not representable
+ assert_packable(z_typed, bytes(bytearray([z])))
- def test_negative_int8(self, assert_packable):
+ def test_negative_int8(self, int_type, assert_packable):
for z in range(-128, -16):
- assert_packable(z, bytes(bytearray([0xC8, z + 0x100])))
+ z_typed = int_type(z)
+ if z != int(z_typed):
+ continue # not representable
+ assert_packable(z_typed, bytes(bytearray([0xC8, z + 0x100])))
- def test_positive_int16(self, assert_packable):
+ def test_positive_int16(self, int_type, assert_packable):
for z in range(128, 32768):
+ z_typed = int_type(z)
+ if z != int(z_typed):
+ continue # not representable
expected = b"\xC9" + struct.pack(">h", z)
- assert_packable(z, expected)
+ assert_packable(z_typed, expected)
- def test_negative_int16(self, assert_packable):
+ def test_negative_int16(self, int_type, assert_packable):
for z in range(-32768, -128):
+ z_typed = int_type(z)
+ if z != int(z_typed):
+ continue # not representable
expected = b"\xC9" + struct.pack(">h", z)
- assert_packable(z, expected)
+ assert_packable(z_typed, expected)
- def test_positive_int32(self, assert_packable):
+ def test_positive_int32(self, int_type, assert_packable):
for e in range(15, 31):
z = 2 ** e
+ z_typed = int_type(z)
+ if z != int(z_typed):
+ continue # not representable
expected = b"\xCA" + struct.pack(">i", z)
- assert_packable(z, expected)
+ assert_packable(z_typed, expected)
- def test_negative_int32(self, assert_packable):
+ def test_negative_int32(self, int_type, assert_packable):
for e in range(15, 31):
z = -(2 ** e + 1)
+ z_typed = int_type(z)
+ if z != int(z_typed):
+ continue # not representable
expected = b"\xCA" + struct.pack(">i", z)
- assert_packable(z, expected)
+ assert_packable(z_typed, expected)
- def test_positive_int64(self, assert_packable):
+ def test_positive_int64(self, int_type, assert_packable):
for e in range(31, 63):
z = 2 ** e
+ z_typed = int_type(z)
+ if z != int(z_typed):
+ continue # not representable
expected = b"\xCB" + struct.pack(">q", z)
- assert_packable(z, expected)
+ assert_packable(z_typed, expected)
+
+ @pytest.mark.parametrize("dtype", (
+ int, pd.Int64Dtype(), pd.UInt64Dtype(),
+ np.int64, np.longlong, np.uint64, np.ulonglong,
+ ))
+ def test_positive_int64_pandas_series(self, dtype, assert_packable):
+ for e in range(31, 63):
+ z = 2 ** e
+ z_typed = pd.Series(z, dtype=dtype)
+ expected = b"\x91\xCB" + struct.pack(">q", z)
+ assert_packable(z_typed, expected, [z])
- def test_negative_int64(self, assert_packable):
+ def test_negative_int64(self, int_type, assert_packable):
for e in range(31, 63):
z = -(2 ** e + 1)
+ z_typed = int_type(z)
+ if z != int(z_typed):
+ continue # not representable
expected = b"\xCB" + struct.pack(">q", z)
- assert_packable(z, expected)
+ assert_packable(z_typed, expected)
- def test_integer_positive_overflow(self, pack, assert_packable):
- with pytest.raises(OverflowError):
- pack(2 ** 63 + 1)
+ @pytest.mark.parametrize("dtype", (
+ int, pd.Int64Dtype(), np.int64, np.longlong,
+ ))
+ def test_negative_int64_pandas_series(self, dtype, assert_packable):
+ for e in range(31, 63):
+ z = -(2 ** e + 1)
+ z_typed = pd.Series(z, dtype=dtype)
+ expected = b"\x91\xCB" + struct.pack(">q", z)
+ assert_packable(z_typed, expected, [z])
- def test_integer_negative_overflow(self, pack, assert_packable):
+ def test_integer_positive_overflow(self, int_type, pack, assert_packable):
with pytest.raises(OverflowError):
- pack(-(2 ** 63) - 1)
-
- def test_zero_float64(self, assert_packable):
- zero = 0.0
- expected = b"\xC1" + struct.pack(">d", zero)
- assert_packable(zero, expected)
-
- def test_tau_float64(self, assert_packable):
- tau = 2 * pi
- expected = b"\xC1" + struct.pack(">d", tau)
- assert_packable(tau, expected)
-
- def test_positive_float64(self, assert_packable):
- for e in range(0, 100):
- r = float(2 ** e) + 0.5
- expected = b"\xC1" + struct.pack(">d", r)
- assert_packable(r, expected)
-
- def test_negative_float64(self, assert_packable):
- for e in range(0, 100):
- r = -(float(2 ** e) + 0.5)
- expected = b"\xC1" + struct.pack(">d", r)
- assert_packable(r, expected)
+ z = 2 ** 63 + 1
+ z_typed = int_type(z)
+ if z != int(z_typed):
+ pytest.skip("not representable")
+ pack(z_typed)
- def test_empty_bytes(self, assert_packable):
- assert_packable(b"", b"\xCC\x00")
-
- def test_empty_bytearray(self, assert_packable):
- assert_packable(bytearray(), b"\xCC\x00")
-
- def test_bytes_8(self, assert_packable):
- assert_packable(bytearray(b"hello"), b"\xCC\x05hello")
-
- def test_bytes_16(self, assert_packable):
+ def test_integer_negative_overflow(self, int_type, pack, assert_packable):
+ with pytest.raises(OverflowError):
+ z = -(2 ** 63) - 1
+ z_typed = int_type(z)
+ if z != int(z_typed):
+ pytest.skip("not representable")
+ pack(z_typed)
+
+ def test_float(self, float_type, assert_packable):
+ for z in (
+ 0.0, -0.0, pi, 2 * pi, float("inf"), float("-inf"), float("nan"),
+ *(float(2 ** e) + 0.5 for e in range(100)),
+ *(-float(2 ** e) + 0.5 for e in range(100)),
+ ):
+ print(z)
+ try:
+ z_typed = float_type(z)
+ except FloatingPointError:
+ continue # not representable
+ expected = b"\xC1" + struct.pack(">d", float(z_typed))
+ assert_packable(z_typed, expected)
+
+ @pytest.mark.parametrize("dtype", (
+ float, pd.Float32Dtype(), pd.Float64Dtype(),
+ np.float16, np.float32, np.float64, np.longdouble,
+ ))
+ def test_float_pandas_series(self, dtype, np_float_overflow_as_error,
+ assert_packable):
+ for z in (
+ 0.0, -0.0, pi, 2 * pi, float("inf"), float("-inf"), float("nan"),
+ *(float(2 ** e) + 0.5 for e in range(100)),
+ *(-float(2 ** e) + 0.5 for e in range(100)),
+ ):
+ try:
+ z_typed = pd.Series(z, dtype=dtype)
+ except FloatingPointError:
+ continue # not representable
+ if z_typed[0] is pd.NA:
+ expected_bytes = b"\x91\xC0" # encoded as NULL
+ expected_value = [None]
+ else:
+ expected_bytes = (b"\x91\xC1"
+ + struct.pack(">d", float(z_typed[0])))
+ expected_value = [float(z_typed[0])]
+ assert_packable(z_typed, expected_bytes, expected_value)
+
+ def test_empty_bytes(self, bytes_type, assert_packable):
+ b = bytes_type(b"")
+ assert_packable(b, b"\xCC\x00")
+
+ def test_bytes_8(self, bytes_type, assert_packable):
+ b = bytes_type(b"hello")
+ assert_packable(b, b"\xCC\x05hello")
+
+ def test_bytes_16(self, bytes_type, assert_packable):
b = bytearray(40000)
- assert_packable(b, b"\xCD\x9C\x40" + b)
+ b_typed = bytes_type(b)
+ assert_packable(b_typed, b"\xCD\x9C\x40" + b)
- def test_bytes_32(self, assert_packable):
+ def test_bytes_32(self, bytes_type, assert_packable):
b = bytearray(80000)
- assert_packable(b, b"\xCE\x00\x01\x38\x80" + b)
-
- def test_bytearray_size_overflow(self, assert_packable):
+ b_typed = bytes_type(b)
+ assert_packable(b_typed, b"\xCE\x00\x01\x38\x80" + b)
+
+ def test_bytes_pandas_series(self, assert_packable):
+ for b, header in (
+ (b"", b"\xCC\x00"),
+ (b"hello", b"\xCC\x05"),
+ (bytearray(40000), b"\xCD\x9C\x40"),
+ (bytearray(80000), b"\xCE\x00\x01\x38\x80"),
+ ):
+ b_typed = pd.Series([b])
+ assert_packable(b_typed, b"\x91" + header + b, [b])
+
+ def test_bytearray_size_overflow(self, bytes_type, assert_packable):
stream_out = BytesIO()
packer = Packer(stream_out)
with pytest.raises(OverflowError):
- packer.pack_bytes_header(2 ** 32)
+ packer._pack_bytes_header(2 ** 32)
- def test_empty_string(self, assert_packable):
- assert_packable(u"", b"\x80")
+ def test_empty_string(self, str_type, assert_packable):
+ assert_packable(str_type(""), b"\x80")
- def test_tiny_strings(self, assert_packable):
+ def test_tiny_strings(self, str_type, assert_packable):
for size in range(0x10):
- assert_packable(u"A" * size, bytes(bytearray([0x80 + size]) + (b"A" * size)))
+ s = str_type("A" * size)
+ assert_packable(s, bytes(bytearray([0x80 + size]) + (b"A" * size)))
- def test_string_8(self, assert_packable):
- t = u"A" * 40
+ def test_string_8(self, str_type, assert_packable):
+ t = "A" * 40
b = t.encode("utf-8")
- assert_packable(t, b"\xD0\x28" + b)
+ t_typed = str_type(t)
+ assert_packable(t_typed, b"\xD0\x28" + b)
- def test_string_16(self, assert_packable):
- t = u"A" * 40000
+ def test_string_16(self, str_type, assert_packable):
+ t = "A" * 40000
b = t.encode("utf-8")
- assert_packable(t, b"\xD1\x9C\x40" + b)
+ t_typed = str_type(t)
+ assert_packable(t_typed, b"\xD1\x9C\x40" + b)
- def test_string_32(self, assert_packable):
- t = u"A" * 80000
+ def test_string_32(self, str_type, assert_packable):
+ t = "A" * 80000
b = t.encode("utf-8")
- assert_packable(t, b"\xD2\x00\x01\x38\x80" + b)
+ t_typed = str_type(t)
+ assert_packable(t_typed, b"\xD2\x00\x01\x38\x80" + b)
- def test_unicode_string(self, assert_packable):
- t = u"héllö"
+ def test_unicode_string(self, str_type, assert_packable):
+ t = "héllö"
b = t.encode("utf-8")
- assert_packable(t, bytes(bytearray([0x80 + len(b)])) + b)
+ t_typed = str_type(t)
+ assert_packable(t_typed, bytes(bytearray([0x80 + len(b)])) + b)
+
+ @pytest.mark.parametrize("dtype", (
+ str, np.str_, pd.StringDtype("python"), pd.StringDtype("pyarrow"),
+ ))
+ def test_string_pandas_series(self, dtype, assert_packable):
+ values = (
+ ("", b"\x80"),
+ ("A" * 40, b"\xD0\x28"),
+ ("A" * 40000, b"\xD1\x9C\x40"),
+ ("A" * 80000, b"\xD2\x00\x01\x38\x80"),
+ )
+ for t, header in values:
+ t_typed = pd.Series([t], dtype=dtype)
+ assert_packable(t_typed, b"\x91" + header + t.encode("utf-8"), [t])
+
+ t_typed = pd.Series([t for t, _ in values], dtype=dtype)
+ expected = (
+ bytes([0x90 + len(values)])
+ + b"".join(header + t.encode("utf-8") for t, header in values)
+ )
+ assert_packable(t_typed, expected, [t for t, _ in values])
def test_string_size_overflow(self):
stream_out = BytesIO()
packer = Packer(stream_out)
with pytest.raises(OverflowError):
- packer.pack_string_header(2 ** 32)
+ packer._pack_string_header(2 ** 32)
- def test_empty_list(self, assert_packable):
- assert_packable([], b"\x90")
+ def test_empty_list(self, sequence_type, assert_packable):
+ l = []
+ l_typed = sequence_type(l)
+ assert_packable(l_typed, b"\x90", l)
- def test_tiny_lists(self, assert_packable):
+ def test_tiny_lists(self, sequence_type, assert_packable):
for size in range(0x10):
+ l = [1] * size
+ l_typed = sequence_type(l)
data_out = bytearray([0x90 + size]) + bytearray([1] * size)
- assert_packable([1] * size, bytes(data_out))
+ assert_packable(l_typed, bytes(data_out), l)
- def test_list_8(self, assert_packable):
+ def test_list_8(self, sequence_type, assert_packable):
l = [1] * 40
- assert_packable(l, b"\xD4\x28" + (b"\x01" * 40))
+ l_typed = sequence_type(l)
+ assert_packable(l_typed, b"\xD4\x28" + (b"\x01" * 40), l)
- def test_list_16(self, assert_packable):
+ def test_list_16(self, sequence_type, assert_packable):
l = [1] * 40000
- assert_packable(l, b"\xD5\x9C\x40" + (b"\x01" * 40000))
+ l_typed = sequence_type(l)
+ assert_packable(l_typed, b"\xD5\x9C\x40" + (b"\x01" * 40000), l)
- def test_list_32(self, assert_packable):
+ def test_list_32(self, sequence_type, assert_packable):
l = [1] * 80000
- assert_packable(l, b"\xD6\x00\x01\x38\x80" + (b"\x01" * 80000))
-
- def test_nested_lists(self, assert_packable):
- assert_packable([[[]]], b"\x91\x91\x90")
+ l_typed = sequence_type(l)
+ assert_packable(l_typed, b"\xD6\x00\x01\x38\x80" + (b"\x01" * 80000), l)
+
+ def test_nested_lists(self, sequence_type, assert_packable):
+ l = [[[]]]
+ l_typed = sequence_type([sequence_type([sequence_type([])])])
+ assert_packable(l_typed, b"\x91\x91\x90", l)
+
+ @pytest.mark.parametrize("as_series", (True, False))
+ def test_list_pandas_categorical(self, as_series, pack, assert_packable):
+ l = ["cat", "dog", "cat", "cat", "dog", "horse"]
+ l_typed = pd.Categorical(l)
+ if as_series:
+ l_typed = pd.Series(l_typed)
+ b = b"".join([
+ b"\x96",
+ *(pack(e) for e in l)
+ ])
+ assert_packable(l_typed, b, l)
def test_list_size_overflow(self):
stream_out = BytesIO()
packer = Packer(stream_out)
with pytest.raises(OverflowError):
- packer.pack_list_header(2 ** 32)
+ packer._pack_list_header(2 ** 32)
def test_empty_map(self, assert_packable):
assert_packable({}, b"\xA0")
@@ -285,14 +516,30 @@ def test_map_32(self, pack, assert_packable):
b = b"".join(pack(u"A%s" % i, 1) for i in range(80000))
assert_packable(d, b"\xDA\x00\x01\x38\x80" + b)
+ def test_empty_dataframe_maps(self, assert_packable):
+ df = pd.DataFrame()
+ assert_packable(df, b"\xA0", {})
+
+ @pytest.mark.parametrize("size", range(0x10))
+ def test_tiny_dataframes_maps(self, assert_packable, size):
+ data_in = dict()
+ data_out = bytearray([0xA0 + size])
+ for el in range(1, size + 1):
+ data_in[chr(64 + el)] = [el]
+ data_out += bytearray([0x81, 64 + el, 0x91, el])
+ data_in_typed = pd.DataFrame(data_in)
+ assert_packable(data_in_typed, bytes(data_out), data_in)
+
def test_map_size_overflow(self):
stream_out = BytesIO()
packer = Packer(stream_out)
with pytest.raises(OverflowError):
- packer.pack_map_header(2 ** 32)
+ packer._pack_map_header(2 ** 32)
@pytest.mark.parametrize(("map_", "exc_type"), (
({1: "1"}, TypeError),
+ (pd.DataFrame({1: ["1"]}), TypeError),
+ (pd.DataFrame({(1, 2): ["1"]}), TypeError),
({"x": {1: 'eins', 2: 'zwei', 3: 'drei'}}, TypeError),
({"x": {(1, 2): '1+2i', (2, 0): '2'}}, TypeError),
))
@@ -300,7 +547,7 @@ def test_map_key_type(self, packer_with_buffer, map_, exc_type):
# maps must have string keys
packer, packable_buffer = packer_with_buffer
with pytest.raises(exc_type, match="strings"):
- packer.pack(map_)
+ packer._pack(map_)
def test_illegal_signature(self, assert_packable):
with pytest.raises(ValueError):