From 64be6c49080f0d087f2c62c86de59f07cb69f8b1 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 28 Jan 2025 17:47:14 +0100 Subject: [PATCH 1/3] gh-89188: Implement PyUnicode_KIND() as a function Implement PyUnicode_KIND() and PyUnicode_DATA() as function, in addition to the macros with the same names. --- Include/cpython/unicodeobject.h | 8 ++++++-- Objects/unicodeobject.c | 21 +++++++++++++++++++++ 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h index 287de52b96202c..cea69dd1280999 100644 --- a/Include/cpython/unicodeobject.h +++ b/Include/cpython/unicodeobject.h @@ -240,6 +240,8 @@ enum PyUnicode_Kind { PyUnicode_4BYTE_KIND = 4 }; +PyAPI_FUNC(int) PyUnicode_KIND(PyObject *op); + // PyUnicode_KIND(): Return one of the PyUnicode_*_KIND values defined above. // // gh-89653: Converting this macro to a static inline function would introduce @@ -264,13 +266,15 @@ static inline void* _PyUnicode_NONCOMPACT_DATA(PyObject *op) { return data; } -static inline void* PyUnicode_DATA(PyObject *op) { +PyAPI_FUNC(void*) PyUnicode_DATA(PyObject *op); + +static inline void* _PyUnicode_DATA(PyObject *op) { if (PyUnicode_IS_COMPACT(op)) { return _PyUnicode_COMPACT_DATA(op); } return _PyUnicode_NONCOMPACT_DATA(op); } -#define PyUnicode_DATA(op) PyUnicode_DATA(_PyObject_CAST(op)) +#define PyUnicode_DATA(op) _PyUnicode_DATA(_PyObject_CAST(op)) /* Return pointers to the canonical representation cast to unsigned char, Py_UCS2, or Py_UCS4 for direct character access. diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index d9952f764bb178..c6f13f60ad741f 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -16486,3 +16486,24 @@ PyInit__string(void) { return PyModuleDef_Init(&_string_module); } + + +#undef PyUnicode_KIND +int PyUnicode_KIND(PyObject *op) +{ + if (!PyUnicode_Check(op)) { + PyErr_Format(PyExc_TypeError, "expect str, got %T", op); + return -1; + } + return _PyASCIIObject_CAST(op)->state.kind; +} + +#undef PyUnicode_DATA +void* PyUnicode_DATA(PyObject *op) +{ + if (!PyUnicode_Check(op)) { + PyErr_Format(PyExc_TypeError, "expect str, got %T", op); + return NULL; + } + return _PyUnicode_DATA(op); +} From e9073e1b71fb2c994b9af4fce147dc933d6e6d79 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 29 Jan 2025 11:58:40 +0100 Subject: [PATCH 2/3] Add NEWS entry --- .../next/C_API/2025-01-29-11-58-38.gh-issue-89188.BsfLr3.rst | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 Misc/NEWS.d/next/C_API/2025-01-29-11-58-38.gh-issue-89188.BsfLr3.rst diff --git a/Misc/NEWS.d/next/C_API/2025-01-29-11-58-38.gh-issue-89188.BsfLr3.rst b/Misc/NEWS.d/next/C_API/2025-01-29-11-58-38.gh-issue-89188.BsfLr3.rst new file mode 100644 index 00000000000000..28cffc1898b63a --- /dev/null +++ b/Misc/NEWS.d/next/C_API/2025-01-29-11-58-38.gh-issue-89188.BsfLr3.rst @@ -0,0 +1,3 @@ +Implement :c:func:`PyUnicode_KIND` and :c:func:`PyUnicode_DATA` as function, +in addition to the macros with the same names. The macros rely on C bit +fields which is an undefined behavior. Patch by Victor Stinner. From 99efe136a445ef837ae8807cd89fe0d0f72ff88b Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 30 Jan 2025 12:04:08 +0100 Subject: [PATCH 3/3] Update Misc/NEWS.d/next/C_API/2025-01-29-11-58-38.gh-issue-89188.BsfLr3.rst Co-authored-by: Petr Viktorin --- .../next/C_API/2025-01-29-11-58-38.gh-issue-89188.BsfLr3.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/C_API/2025-01-29-11-58-38.gh-issue-89188.BsfLr3.rst b/Misc/NEWS.d/next/C_API/2025-01-29-11-58-38.gh-issue-89188.BsfLr3.rst index 28cffc1898b63a..7ff225a7dc60c7 100644 --- a/Misc/NEWS.d/next/C_API/2025-01-29-11-58-38.gh-issue-89188.BsfLr3.rst +++ b/Misc/NEWS.d/next/C_API/2025-01-29-11-58-38.gh-issue-89188.BsfLr3.rst @@ -1,3 +1,3 @@ Implement :c:func:`PyUnicode_KIND` and :c:func:`PyUnicode_DATA` as function, in addition to the macros with the same names. The macros rely on C bit -fields which is an undefined behavior. Patch by Victor Stinner. +fields which have compiler-specific layout. Patch by Victor Stinner.