Skip to content

Commit 3b0d427

Browse files
authored
Merge pull request #11 from ngoldbaum/add-asciidtype
Fix bugs around null termination bytes
2 parents 30d7137 + 3abea62 commit 3b0d427

File tree

4 files changed

+73
-50
lines changed

4 files changed

+73
-50
lines changed

asciidtype/asciidtype/src/casts.c

Lines changed: 6 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ ascii_to_ascii_resolve_descriptors(PyObject *NPY_UNUSED(self),
2525
}
2626
else {
2727
Py_INCREF(given_descrs[1]);
28-
loop_descrs[1] = given_descrs[0];
28+
loop_descrs[1] = given_descrs[1];
2929
}
3030

3131
if (((ASCIIDTypeObject *)loop_descrs[0])->size ==
@@ -38,35 +38,9 @@ ascii_to_ascii_resolve_descriptors(PyObject *NPY_UNUSED(self),
3838
}
3939

4040
static int
41-
ascii_to_ascii_contiguous(PyArrayMethod_Context *context, char *const data[],
42-
npy_intp const dimensions[],
43-
npy_intp const NPY_UNUSED(strides[]),
44-
NpyAuxData *NPY_UNUSED(auxdata))
45-
{
46-
PyArray_Descr **descrs = context->descriptors;
47-
// for contiguous assignment the sizes of the two dtypes should be
48-
// the same, consider adding an assert to check?
49-
long size = ((ASCIIDTypeObject *)descrs[0])->size;
50-
51-
npy_intp N = dimensions[0] * size;
52-
char *in = data[0];
53-
char *out = data[1];
54-
55-
while (N--) {
56-
*out = *in;
57-
out++;
58-
in++;
59-
}
60-
61-
return 0;
62-
}
63-
64-
static int
65-
ascii_to_ascii_strided_or_unaligned(PyArrayMethod_Context *context,
66-
char *const data[],
67-
npy_intp const dimensions[],
68-
npy_intp const strides[],
69-
NpyAuxData *NPY_UNUSED(auxdata))
41+
ascii_to_ascii(PyArrayMethod_Context *context, char *const data[],
42+
npy_intp const dimensions[], npy_intp const strides[],
43+
NpyAuxData *NPY_UNUSED(auxdata))
7044
{
7145
PyArray_Descr **descrs = context->descriptors;
7246
long in_size = ((ASCIIDTypeObject *)descrs[0])->size;
@@ -87,7 +61,7 @@ ascii_to_ascii_strided_or_unaligned(PyArrayMethod_Context *context,
8761
npy_intp out_stride = strides[1];
8862

8963
while (N--) {
90-
memcpy(out, in, out_size * sizeof(char)); // NOLINT
64+
memcpy(out, in, copy_size * sizeof(char)); // NOLINT
9165
for (int i = copy_size; i < out_size; i++) {
9266
*(out + i) = '\0';
9367
}
@@ -106,20 +80,7 @@ ascii_to_ascii_get_loop(PyArrayMethod_Context *context, int aligned,
10680
NpyAuxData **NPY_UNUSED(out_transferdata),
10781
NPY_ARRAYMETHOD_FLAGS *flags)
10882
{
109-
PyArray_Descr **descrs = context->descriptors;
110-
111-
int contig = (strides[0] == ((ASCIIDTypeObject *)descrs[0])->size *
112-
sizeof(char) &&
113-
strides[1] == ((ASCIIDTypeObject *)descrs[1])->size *
114-
sizeof(char));
115-
116-
if (aligned && contig) {
117-
*out_loop = (PyArrayMethod_StridedLoop *)&ascii_to_ascii_contiguous;
118-
}
119-
else {
120-
*out_loop = (PyArrayMethod_StridedLoop
121-
*)&ascii_to_ascii_strided_or_unaligned;
122-
}
83+
*out_loop = (PyArrayMethod_StridedLoop *)&ascii_to_ascii;
12384

12485
*flags = 0;
12586
return 0;

asciidtype/asciidtype/src/dtype.c

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ asciidtype_setitem(ASCIIDTypeObject *descr, PyObject *obj, char *dataptr)
113113

114114
Py_ssize_t len = PyBytes_Size(value);
115115

116-
size_t copysize;
116+
long copysize;
117117

118118
if (len > descr->size) {
119119
copysize = descr->size;
@@ -138,7 +138,13 @@ asciidtype_setitem(ASCIIDTypeObject *descr, PyObject *obj, char *dataptr)
138138
static PyObject *
139139
asciidtype_getitem(ASCIIDTypeObject *descr, char *dataptr)
140140
{
141-
PyObject *val_obj = PyUnicode_FromString(dataptr);
141+
char scalar_buffer[descr->size + 1];
142+
143+
memcpy(scalar_buffer, dataptr, descr->size * sizeof(char));
144+
145+
scalar_buffer[descr->size] = '\0';
146+
147+
PyObject *val_obj = PyUnicode_FromString(scalar_buffer);
142148
if (val_obj == NULL) {
143149
return NULL;
144150
}
@@ -205,7 +211,7 @@ asciidtype_repr(ASCIIDTypeObject *self)
205211
}
206212

207213
static PyMemberDef ASCIIDType_members[] = {
208-
{"size", T_OBJECT_EX, offsetof(ASCIIDTypeObject, size), READONLY,
214+
{"size", T_LONG, offsetof(ASCIIDTypeObject, size), READONLY,
209215
"The number of characters per array element"},
210216
{NULL},
211217
};

asciidtype/pyproject.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@ requires = [
88
]
99
build-backend = "mesonpy"
1010

11+
[tool.black]
12+
line-length = 79
13+
1114
[project]
1215
name = "asciidtype"
1316
description = "A dtype for ASCII data"

asciidtype/tests/test_asciidtype.py

Lines changed: 55 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,64 @@ def test_dtype_creation():
1010

1111
def test_scalar_creation():
1212
dtype = ASCIIDType(7)
13-
ASCIIScalar('string', dtype)
13+
ASCIIScalar("string", dtype)
1414

1515

1616
def test_creation_with_explicit_dtype():
1717
dtype = ASCIIDType(7)
1818
arr = np.array(["hello", "this", "is", "an", "array"], dtype=dtype)
1919
assert repr(arr) == (
20-
"array(['hello', 'this', 'is', 'an', 'array'], dtype=ASCIIDType(7))")
20+
"array(['hello', 'this', 'is', 'an', 'array'], dtype=ASCIIDType(7))"
21+
)
22+
23+
24+
def test_creation_truncation():
25+
inp = ["hello", "this", "is", "an", "array"]
26+
27+
dtype = ASCIIDType(5)
28+
arr = np.array(inp, dtype=dtype)
29+
assert repr(arr) == (
30+
"array(['hello', 'this', 'is', 'an', 'array'], dtype=ASCIIDType(5))"
31+
)
32+
33+
dtype = ASCIIDType(4)
34+
arr = np.array(inp, dtype=dtype)
35+
assert repr(arr) == (
36+
"array(['hell', 'this', 'is', 'an', 'arra'], dtype=ASCIIDType(4))"
37+
)
38+
39+
dtype = ASCIIDType(1)
40+
arr = np.array(inp, dtype=dtype)
41+
assert repr(arr) == (
42+
"array(['h', 't', 'i', 'a', 'a'], dtype=ASCIIDType(1))"
43+
)
44+
assert arr.tobytes() == b"htiaa"
45+
46+
# dtype = ASCIIDType()
47+
# arr = np.array(["hello", "this", "is", "an", "array"], dtype=dtype)
48+
# assert repr(arr) == ("array(['', '', '', '', ''], dtype=ASCIIDType(0))")
49+
# assert arr.tobytes() == b""
50+
51+
52+
def test_casting_to_asciidtype():
53+
arr = np.array(["hello", "this", "is", "an", "array"], dtype=ASCIIDType(5))
54+
55+
assert repr(arr.astype(ASCIIDType(7))) == (
56+
"array(['hello', 'this', 'is', 'an', 'array'], dtype=ASCIIDType(7))"
57+
)
58+
59+
assert repr(arr.astype(ASCIIDType(5))) == (
60+
"array(['hello', 'this', 'is', 'an', 'array'], dtype=ASCIIDType(5))"
61+
)
62+
63+
assert repr(arr.astype(ASCIIDType(4))) == (
64+
"array(['hell', 'this', 'is', 'an', 'arra'], dtype=ASCIIDType(4))"
65+
)
66+
67+
assert repr(arr.astype(ASCIIDType(1))) == (
68+
"array(['h', 't', 'i', 'a', 'a'], dtype=ASCIIDType(1))"
69+
)
70+
71+
# assert repr(arr.astype(ASCIIDType())) == (
72+
# "array(['', '', '', '', ''], dtype=ASCIIDType(0))"
73+
# )

0 commit comments

Comments
 (0)