Skip to content

Commit fa8f92b

Browse files
authored
Use buffer interface for deserialization (#135)
* Initial tests + interface changes to work with memoryviews as well as bytes objects when deserialising - this enables deserialising from mmap'ed files without copying * Test readable and writable memoryviews (bytes and bytearray respectively) * Update assert_is_not to restore original state so bitmaps can be reused
1 parent 369daeb commit fa8f92b

File tree

2 files changed

+62
-10
lines changed

2 files changed

+62
-10
lines changed

pyroaring/abstract_bitmap.pxi

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,16 @@ try:
1212
except NameError: # python 3
1313
pass
1414

15-
cdef croaring.roaring_bitmap_t *deserialize_ptr(bytes buff):
15+
16+
cdef croaring.roaring_bitmap_t *deserialize_ptr(const unsigned char[:] buff):
1617
cdef croaring.roaring_bitmap_t *ptr
1718
cdef const char *reason_failure = NULL
19+
20+
cdef char* buffer_ptr = <char*>&buff[0]
21+
1822
buff_size = len(buff)
19-
ptr = croaring.roaring_bitmap_portable_deserialize_safe(buff, buff_size)
23+
ptr = croaring.roaring_bitmap_portable_deserialize_safe(buffer_ptr, buff_size)
24+
2025
if ptr == NULL:
2126
raise ValueError("Could not deserialize bitmap")
2227
# Validate the bitmap
@@ -26,11 +31,14 @@ cdef croaring.roaring_bitmap_t *deserialize_ptr(bytes buff):
2631
raise ValueError(f"Invalid bitmap after deserialization: {reason_failure.decode('utf-8')}")
2732
return ptr
2833

29-
cdef croaring.roaring64_bitmap_t *deserialize64_ptr(bytes buff):
34+
cdef croaring.roaring64_bitmap_t *deserialize64_ptr(const unsigned char[:] buff):
3035
cdef croaring.roaring64_bitmap_t *ptr
3136
cdef const char *reason_failure = NULL
37+
38+
cdef char* buffer_ptr = <char*>&buff[0]
39+
3240
buff_size = len(buff)
33-
ptr = croaring.roaring64_bitmap_portable_deserialize_safe(buff, buff_size)
41+
ptr = croaring.roaring64_bitmap_portable_deserialize_safe(buffer_ptr, buff_size)
3442
if ptr == NULL:
3543
raise ValueError("Could not deserialize bitmap")
3644
# Validate the bitmap
@@ -760,7 +768,7 @@ cdef class AbstractBitMap:
760768

761769

762770
@classmethod
763-
def deserialize(cls, bytes buff):
771+
def deserialize(cls, const unsigned char[:] buff):
764772
"""
765773
Generate a bitmap from the given serialization. See AbstractBitMap.serialize for the reverse operation.
766774
@@ -1221,7 +1229,7 @@ cdef class AbstractBitMap64:
12211229

12221230

12231231
@classmethod
1224-
def deserialize(cls, bytes buff):
1232+
def deserialize(cls, const unsigned char[:] buff):
12251233
"""
12261234
Generate a bitmap from the given serialization. See AbstractBitMap64.serialize for the reverse operation.
12271235

test.py

Lines changed: 48 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -143,21 +143,37 @@ def bitmap_sample(bitmap: AbstractBitMap, size: int) -> list[int]:
143143
return [bitmap[i] for i in indices]
144144

145145
def assert_is_not(self, bitmap1: AbstractBitMap, bitmap2: AbstractBitMap) -> None:
146+
add1 = remove1 = add2 = remove2 = -1
146147
if isinstance(bitmap1, BitMap):
147148
if bitmap1:
148-
bitmap1.remove(bitmap1[0])
149+
remove1 = bitmap1[0]
150+
bitmap1.remove(remove1)
149151
else:
150-
bitmap1.add(27)
152+
add1 = 27
153+
bitmap1.add(add1)
151154
elif isinstance(bitmap2, BitMap):
152155
if bitmap2:
153-
bitmap2.remove(bitmap1[0])
156+
remove2 = bitmap2[0]
157+
bitmap2.remove(remove2)
154158
else:
155-
bitmap2.add(27)
159+
add2 = 27
160+
bitmap2.add(add2)
156161
else: # The two are non-mutable, cannot do anything...
157162
return
158163
if bitmap1 == bitmap2:
159164
pytest.fail(
160165
'The two bitmaps are identical (modifying one also modifies the other).')
166+
# Restore the bitmaps to their original point
167+
else:
168+
if add1 >= 0:
169+
bitmap1.remove(add1)
170+
if remove1 >= 0:
171+
bitmap1.add(remove1)
172+
if add2 >= 0:
173+
bitmap2.remove(add2)
174+
if remove2 >= 0:
175+
bitmap2.add(remove2)
176+
161177

162178

163179
class TestBasic(Util):
@@ -874,6 +890,34 @@ def test_serialization(
874890
assert isinstance(new_bm, cls2)
875891
self.assert_is_not(old_bm, new_bm)
876892

893+
@given(bitmap_cls, bitmap_cls, hyp_many_collections)
894+
def test_deserialization_from_memoryview(
895+
self,
896+
cls1: type[EitherBitMap],
897+
cls2: type[EitherBitMap],
898+
values: list[HypCollection]
899+
) -> None:
900+
old_bms = [cls1(vals) for vals in values]
901+
902+
# Create a memoryview with all of the items concatenated into a single bytes
903+
# object.
904+
serialized = [bm.serialize() for bm in old_bms]
905+
sizes = [len(ser) for ser in serialized]
906+
starts = [0]
907+
for s in sizes:
908+
starts.append(s + starts[-1])
909+
910+
combined = b''.join(serialized)
911+
mutable_combined = bytearray(combined)
912+
913+
for source in (combined, mutable_combined):
914+
with memoryview(source) as mv:
915+
new_bms = [cls2.deserialize(mv[start: start + size])for start, size in zip(starts, sizes)]
916+
for old_bm, new_bm in zip(old_bms, new_bms):
917+
assert old_bm == new_bm
918+
assert isinstance(new_bm, cls2)
919+
self.assert_is_not(old_bm, new_bm)
920+
877921
@given(bitmap_cls, hyp_collection, st.integers(min_value=2, max_value=pickle.HIGHEST_PROTOCOL))
878922
def test_pickle_protocol(
879923
self,

0 commit comments

Comments
 (0)