Skip to content

Commit 65e3871

Browse files
committed
Fix read of query returning only empty var-length strings for attribute
If a query range only returns empty strings for a var-length attribute, then the data buffer will have zero-length and the offsets denote the empty cells. Remove the zero-length check. See: https://github.com/TileDB-Inc/TileDB/blob/dev/test/src/unit-empty-var-length.cc
1 parent 4110334 commit 65e3871

File tree

3 files changed

+28
-4
lines changed

3 files changed

+28
-4
lines changed

tiledb/core.cc

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -863,8 +863,6 @@ class PyQuery {
863863

864864
auto start = std::chrono::high_resolution_clock::now();
865865

866-
if (buf.size() < 1)
867-
TPY_ERROR_LOC(std::string("Unexpected empty buffer array ('") + name + "')");
868866
if (off.size() < 1)
869867
TPY_ERROR_LOC(std::string("Unexpected empty offsets array ('") + name + "')");
870868

tiledb/libtiledb.pyx

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5402,12 +5402,11 @@ cdef class SparseArrayImpl(Array):
54025402
if name == '__attr':
54035403
final_name = ''
54045404
if self.schema._needs_var_buffer(name):
5405-
if len(results[name][1]) > 0:
5405+
if len(results[name][1]) > 0: # note: len(offsets) > 0
54065406
arr = q.unpack_buffer(name, results[name][0], results[name][1])
54075407
else:
54085408
arr = results[name][0]
54095409
final_dtype = self.schema.attr_or_dim_dtype(name)
5410-
print((arr, len(arr), final_dtype))
54115410
if (len(arr) < 1 and
54125411
(np.issubdtype(final_dtype, np.bytes_) or
54135412
np.issubdtype(final_dtype, np.unicode_))):

tiledb/tests/test_libtiledb.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1467,6 +1467,33 @@ def test_varlen_write_bytes(self):
14671467

14681468
assert_array_equal(A, T.multi_index[1:len(A)][''])
14691469

1470+
def test_varlen_sparse_all_empty_strings(self):
1471+
# this test addresses a fix for specific need for reads on a
1472+
# large existing array, see
1473+
# https://github.com/TileDB-Inc/TileDB-Py/pull/475
1474+
# we currently have to write a placeholder at the end to
1475+
# avoid zero-length cell error
1476+
# TODO: follow-up with improved testing for empty var-length/strings
1477+
A = np.array(['','','','','','\x00'], dtype=object)
1478+
dim_len = len(A)
1479+
uri = self.path("varlen_all_empty_strings")
1480+
1481+
ctx = tiledb.Ctx()
1482+
1483+
dom = tiledb.Domain(tiledb.Dim(ctx=ctx, domain=(1, dim_len), tile=dim_len), ctx=ctx)
1484+
att = tiledb.Attr(name="a1", dtype=np.str_, var=True, ctx=ctx)
1485+
1486+
schema = tiledb.ArraySchema(dom, (att,), sparse=True, ctx=ctx)
1487+
1488+
tiledb.Array.create(uri, schema)
1489+
1490+
with tiledb.open(uri, mode='w', ctx=ctx) as T:
1491+
T[np.arange(1,dim_len+1)] = {'a1' : A}
1492+
1493+
with tiledb.open(uri, mode='r', ctx=ctx) as T:
1494+
# check interior range
1495+
assert_array_equal(A[1:-1], T[2:-1]['a1'])
1496+
assert_array_equal(A[1:-1], T.multi_index[2:dim_len-1]['a1'])
14701497

14711498
def test_varlen_write_unicode(self):
14721499
A = np.array(['aa','bbb',

0 commit comments

Comments
 (0)