From 40bcdea78dae1b9defa8b9de4e0cdd9d20f42999 Mon Sep 17 00:00:00 2001 From: Stan Ulbrych Date: Sun, 7 Sep 2025 13:37:03 +0100 Subject: [PATCH 1/5] Commit --- Include/internal/pycore_unicodeobject.h | 2 +- Lib/test/test_capi/test_unicode.py | 89 ++++++++ ...-09-07-13-36-15.gh-issue-103997.jIPHCc.rst | 1 + Modules/_testinternalcapi.c | 14 +- Objects/unicodeobject.c | 196 ++++++++---------- 5 files changed, 185 insertions(+), 117 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-09-07-13-36-15.gh-issue-103997.jIPHCc.rst diff --git a/Include/internal/pycore_unicodeobject.h b/Include/internal/pycore_unicodeobject.h index 8dfcaedd5ef2e8..57293171aca41b 100644 --- a/Include/internal/pycore_unicodeobject.h +++ b/Include/internal/pycore_unicodeobject.h @@ -261,7 +261,7 @@ extern Py_ssize_t _PyUnicode_InsertThousandsGrouping( Behaviour is expected to be an exact match of `textwrap.dedent`. Return a new reference on success, NULL with exception set on error. */ -extern PyObject* _PyUnicode_Dedent(PyObject *unicode); +PyAPI_FUNC(PyObject*) _PyUnicode_Dedent(PyObject *unicode); /* --- Misc functions ----------------------------------------------------- */ diff --git a/Lib/test/test_capi/test_unicode.py b/Lib/test/test_capi/test_unicode.py index 6a9c60f3a6d75e..5ddd26bc465852 100644 --- a/Lib/test/test_capi/test_unicode.py +++ b/Lib/test/test_capi/test_unicode.py @@ -1074,6 +1074,95 @@ def test_transform_decimal_and_space(self): self.assertRaises(SystemError, transform_decimal, []) # CRASHES transform_decimal(NULL) + @support.cpython_only + @unittest.skipIf(_testinternalcapi is None,'need _testinternalcapi module') + def test_dedent(self): + from _testinternalcapi import _PyUnicode_Dedent as dedent + self.assertEqual('hello\nworld', dedent(' hello\n world')) + self.assertEqual('hello\nmy\n friend', dedent(' hello\n my\n friend')) + + # Only spaces. + text = " " + expect = "" + self.assertEqual(expect, dedent(text)) + + # Only tabs. + text = "\t\t\t\t" + expect = "" + self.assertEqual(expect, dedent(text)) + + # A mixture. + text = " \t \t\t \t " + expect = "" + self.assertEqual(expect, dedent(text)) + + # ASCII whitespace. + text = "\f\n\r\t\v " + expect = "\n" + self.assertEqual(expect, dedent(text)) + + # One newline. + text = "\n" + expect = "\n" + self.assertEqual(expect, dedent(text)) + + # Windows-style newlines. + text = "\r\n" * 5 + expect = "\n" * 5 + self.assertEqual(expect, dedent(text)) + + # Whitespace mixture. + text = " \n\t\n \n\t\t\n\n\n " + expect = "\n\n\n\n\n\n" + self.assertEqual(expect, dedent(text)) + + # Lines consisting only of whitespace are always normalised + text = "a\n \n\t\n" + expect = "a\n\n\n" + self.assertEqual(expect, dedent(text)) + + # Whitespace characters on non-empty lines are retained + text = "a\r\n\r\n\r\n" + expect = "a\r\n\n\n" + self.assertEqual(expect, dedent(text)) + + # Uneven indentation with declining indent level. + text = " Foo\n Bar\n" # 5 spaces, then 4 + expect = " Foo\nBar\n" + self.assertEqual(expect, dedent(text)) + + # Declining indent level with blank line. + text = " Foo\n\n Bar\n" # 5 spaces, blank, then 4 + expect = " Foo\n\nBar\n" + self.assertEqual(expect, dedent(text)) + + # Declining indent level with whitespace only line. + text = " Foo\n \n Bar\n" # 5 spaces, then 4, then 4 + expect = " Foo\n\nBar\n" + self.assertEqual(expect, dedent(text)) + + text = " hello\tthere\n how are\tyou?" + expect = "hello\tthere\nhow are\tyou?" + self.assertEqual(expect, dedent(text)) + + # dedent() only removes whitespace that can be uniformly removed! + text = "\thello there\n\thow are you?" + expect = "hello there\nhow are you?" + self.assertEqual(expect, dedent(text)) + + text = '''\ + def foo(): + while 1: + return foo + ''' + expect = '''\ +def foo(): + while 1: + return foo +''' + self.assertEqual(expect, dedent(text)) + + @support.cpython_only @unittest.skipIf(_testlimitedcapi is None, 'need _testlimitedcapi module') def test_concat(self): diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-09-07-13-36-15.gh-issue-103997.jIPHCc.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-09-07-13-36-15.gh-issue-103997.jIPHCc.rst new file mode 100644 index 00000000000000..d28137bcc9dd69 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-09-07-13-36-15.gh-issue-103997.jIPHCc.rst @@ -0,0 +1 @@ +:option:`-c` now dedents like :func:`textwrap.dedent` diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index 243c7346576fc6..0776b088b4a1f2 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -34,7 +34,7 @@ #include "pycore_pyerrors.h" // _PyErr_ChainExceptions1() #include "pycore_pylifecycle.h" // _PyInterpreterConfig_InitFromDict() #include "pycore_pystate.h" // _PyThreadState_GET() -#include "pycore_unicodeobject.h" // _PyUnicode_TransformDecimalAndSpaceToASCII() +#include "pycore_unicodeobject.h" // _PyUnicode_TransformDecimalAndSpaceToASCII() / _PyUnicode_Dedent() #include "clinic/_testinternalcapi.c.h" @@ -1416,6 +1416,17 @@ unicode_transformdecimalandspacetoascii(PyObject *self, PyObject *arg) return _PyUnicode_TransformDecimalAndSpaceToASCII(arg); } +/* Test _PyUnicode_Dedent() */ +static PyObject * +unicode_dedent(PyObject *self, PyObject *arg) +{ + if (arg == Py_None) { + arg = NULL; + } + return _PyUnicode_Dedent(arg); +} + + static PyObject * test_pyobject_is_freed(const char *test_name, PyObject *op) { @@ -2422,6 +2433,7 @@ static PyMethodDef module_functions[] = { {"_PyTraceMalloc_GetTraceback", tracemalloc_get_traceback, METH_VARARGS}, {"test_tstate_capi", test_tstate_capi, METH_NOARGS, NULL}, {"_PyUnicode_TransformDecimalAndSpaceToASCII", unicode_transformdecimalandspacetoascii, METH_O}, + {"_PyUnicode_Dedent", unicode_dedent, METH_O}, {"check_pyobject_forbidden_bytes_is_freed", check_pyobject_forbidden_bytes_is_freed, METH_NOARGS}, {"check_pyobject_freed_is_freed", check_pyobject_freed_is_freed, METH_NOARGS}, diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 4c88e4c1fdca2e..ee51cdcaa3d4cf 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -14309,83 +14309,65 @@ unicode_getnewargs(PyObject *v, PyObject *Py_UNUSED(ignored)) } /* -This function searchs the longest common leading whitespace -of all lines in the [src, end). -It returns the length of the common leading whitespace and sets `output` to -point to the beginning of the common leading whitespace if length > 0. + Find the longest common leading whitespace among a list of lines. + Whitespace-only lines are ignored. + Returns the margin length (>= 0). */ static Py_ssize_t -search_longest_common_leading_whitespace( - const char *const src, - const char *const end, - const char **output) -{ - // [_start, _start + _len) - // describes the current longest common leading whitespace - const char *_start = NULL; - Py_ssize_t _len = 0; - - for (const char *iter = src; iter < end; ++iter) { - const char *line_start = iter; - const char *leading_whitespace_end = NULL; - - // scan the whole line - while (iter < end && *iter != '\n') { - if (!leading_whitespace_end && *iter != ' ' && *iter != '\t') { - /* `iter` points to the first non-whitespace character - in this line */ - if (iter == line_start) { - // some line has no indent, fast exit! - return 0; - } - leading_whitespace_end = iter; - } - ++iter; - } +search_longest_common_leading_whitespace(PyObject *lines, Py_ssize_t nlines) +{ + PyObject *smallest = NULL, *largest = NULL; + for (Py_ssize_t i = 0; i < nlines; i++) { + PyObject *line = PyList_GET_ITEM(lines, i); + Py_ssize_t linelen = PyUnicode_GET_LENGTH(line); - // if this line has all white space, skip it - if (!leading_whitespace_end) { + if (linelen == 0) { continue; } - if (!_start) { - // update the first leading whitespace - _start = line_start; - _len = leading_whitespace_end - line_start; - assert(_len > 0); + int kind = PyUnicode_KIND(line); + void *data = PyUnicode_DATA(line); + int all_ws = 1; + for (Py_ssize_t j = 0; j < linelen; j++) { + if (!Py_UNICODE_ISSPACE(PyUnicode_READ(kind, data, j))) { + all_ws = 0; + break; + } + } + if (all_ws) { + continue; } - else { - /* We then compare with the current longest leading whitespace. - [line_start, leading_whitespace_end) is the leading - whitespace of this line, + if (smallest == NULL || PyObject_RichCompareBool(line, smallest, Py_LT)) { + smallest = line; + } + if (largest == NULL || PyObject_RichCompareBool(line, largest, Py_GT)) { + largest = line; + } + } - [_start, _start + _len) is the leading whitespace of the - current longest leading whitespace. */ - Py_ssize_t new_len = 0; - const char *_iter = _start, *line_iter = line_start; + if (smallest == NULL || largest == NULL) { + return 0; + } - while (_iter < _start + _len && line_iter < leading_whitespace_end - && *_iter == *line_iter) - { - ++_iter; - ++line_iter; - ++new_len; - } + Py_ssize_t margin = 0; + Py_ssize_t minlen = Py_MIN(PyUnicode_GET_LENGTH(smallest), + PyUnicode_GET_LENGTH(largest)); + int skind = PyUnicode_KIND(smallest); + int lkind = PyUnicode_KIND(largest); + const void *sdata = PyUnicode_DATA(smallest); + const void *ldata = PyUnicode_DATA(largest); - _len = new_len; - if (_len == 0) { - // No common things now, fast exit! - return 0; - } + while (margin < minlen) { + Py_UCS4 c1 = PyUnicode_READ(skind, sdata, margin); + Py_UCS4 c2 = PyUnicode_READ(lkind, ldata, margin); + if (c1 != c2 || !(c1 == ' ' || c1 == '\t')) { + break; } + margin++; } - assert(_len >= 0); - if (_len > 0) { - *output = _start; - } - return _len; + return margin; } /* Dedent a string. @@ -14395,74 +14377,58 @@ search_longest_common_leading_whitespace( PyObject * _PyUnicode_Dedent(PyObject *unicode) { - Py_ssize_t src_len = 0; - const char *src = PyUnicode_AsUTF8AndSize(unicode, &src_len); - if (!src) { + PyObject *sep = PyUnicode_FromString("\n"); + if (sep == NULL) { return NULL; } - assert(src_len >= 0); - if (src_len == 0) { - return Py_NewRef(unicode); - } - - const char *const end = src + src_len; - - // [whitespace_start, whitespace_start + whitespace_len) - // describes the current longest common leading whitespace - const char *whitespace_start = NULL; - Py_ssize_t whitespace_len = search_longest_common_leading_whitespace( - src, end, &whitespace_start); - - if (whitespace_len == 0) { - return Py_NewRef(unicode); + PyObject *lines = PyUnicode_Split(unicode, sep, -1); + Py_DECREF(sep); + if (lines == NULL) { + return NULL; } + Py_ssize_t nlines = PyList_GET_SIZE(lines); + Py_ssize_t margin = search_longest_common_leading_whitespace(lines, nlines); - // now we should trigger a dedent - char *dest = PyMem_Malloc(src_len); - if (!dest) { - PyErr_NoMemory(); + PyUnicodeWriter *writer = PyUnicodeWriter_Create(0); + if (writer == NULL) { + Py_DECREF(lines); return NULL; } - char *dest_iter = dest; - for (const char *iter = src; iter < end; ++iter) { - const char *line_start = iter; - bool in_leading_space = true; + for (Py_ssize_t i = 0; i < nlines; i++) { + PyObject *line = PyList_GET_ITEM(lines, i); + Py_ssize_t linelen = PyUnicode_GET_LENGTH(line); - // iterate over a line to find the end of a line - while (iter < end && *iter != '\n') { - if (in_leading_space && *iter != ' ' && *iter != '\t') { - in_leading_space = false; + int all_ws = 1; + int kind = PyUnicode_KIND(line); + void *data = PyUnicode_DATA(line); + for (Py_ssize_t j = 0; j < linelen; j++) { + if (!Py_UNICODE_ISSPACE(PyUnicode_READ(kind, data, j))) { + all_ws = 0; + break; } - ++iter; } - // invariant: *iter == '\n' or iter == end - bool append_newline = iter < end; - - // if this line has all white space, write '\n' and continue - if (in_leading_space && append_newline) { - *dest_iter++ = '\n'; - continue; + if (!all_ws) { + Py_ssize_t start = Py_MIN(margin, linelen); + if (PyUnicodeWriter_WriteSubstring(writer, line, start, linelen) < 0) { + PyUnicodeWriter_Discard(writer); + Py_DECREF(lines); + return NULL; + } } - /* copy [new_line_start + whitespace_len, iter) to buffer, then - conditionally append '\n' */ - - Py_ssize_t new_line_len = iter - line_start - whitespace_len; - assert(new_line_len >= 0); - memcpy(dest_iter, line_start + whitespace_len, new_line_len); - - dest_iter += new_line_len; - - if (append_newline) { - *dest_iter++ = '\n'; + if (i < nlines - 1) { + if (PyUnicodeWriter_WriteChar(writer, '\n') < 0) { + PyUnicodeWriter_Discard(writer); + Py_DECREF(lines); + return NULL; + } } } - PyObject *res = PyUnicode_FromStringAndSize(dest, dest_iter - dest); - PyMem_Free(dest); - return res; + Py_DECREF(lines); + return PyUnicodeWriter_Finish(writer); } static PyMethodDef unicode_methods[] = { From f6ace9d2b62352adb63f6fa9a452d13bb6b02089 Mon Sep 17 00:00:00 2001 From: Stan Ulbrych Date: Sun, 7 Sep 2025 16:06:00 +0100 Subject: [PATCH 2/5] Add comment & un-refactor --- Include/internal/pycore_unicodeobject.h | 4 +- Objects/unicodeobject.c | 193 ++++++++++++++---------- 2 files changed, 114 insertions(+), 83 deletions(-) diff --git a/Include/internal/pycore_unicodeobject.h b/Include/internal/pycore_unicodeobject.h index 57293171aca41b..c53bc084de072e 100644 --- a/Include/internal/pycore_unicodeobject.h +++ b/Include/internal/pycore_unicodeobject.h @@ -259,7 +259,9 @@ extern Py_ssize_t _PyUnicode_InsertThousandsGrouping( /* Dedent a string. Behaviour is expected to be an exact match of `textwrap.dedent`. - Return a new reference on success, NULL with exception set on error. + Return a new reference on success, NULL with an exception set on error. + + Export for test_capi.test_unicode */ PyAPI_FUNC(PyObject*) _PyUnicode_Dedent(PyObject *unicode); diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index ee51cdcaa3d4cf..67898b56711c61 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -14309,126 +14309,155 @@ unicode_getnewargs(PyObject *v, PyObject *Py_UNUSED(ignored)) } /* - Find the longest common leading whitespace among a list of lines. - Whitespace-only lines are ignored. - Returns the margin length (>= 0). +This function searches the longest common leading whitespace +of all lines in the [src, end). +It returns the length of the common leading whitespace and sets *output* to +point to the beginning of the common leading whitespace if length > 0. */ static Py_ssize_t -search_longest_common_leading_whitespace(PyObject *lines, Py_ssize_t nlines) -{ - PyObject *smallest = NULL, *largest = NULL; - for (Py_ssize_t i = 0; i < nlines; i++) { - PyObject *line = PyList_GET_ITEM(lines, i); - Py_ssize_t linelen = PyUnicode_GET_LENGTH(line); - - if (linelen == 0) { - continue; - } - - int kind = PyUnicode_KIND(line); - void *data = PyUnicode_DATA(line); - int all_ws = 1; - for (Py_ssize_t j = 0; j < linelen; j++) { - if (!Py_UNICODE_ISSPACE(PyUnicode_READ(kind, data, j))) { - all_ws = 0; - break; +search_longest_common_leading_whitespace( + const char *const src, + const char *const end, + const char **output) +{ + // [_start, _start + _len) + // describes the current longest common leading whitespace + const char *_start = NULL; + Py_ssize_t _len = 0; + + for (const char *iter = src; iter < end; ++iter) { + const char *line_start = iter; + const char *leading_whitespace_end = NULL; + + // scan the whole line + while (iter < end && *iter != '\n') { + if (!leading_whitespace_end && !Py_ISSPACE(Py_CHARMASK(*iter))) { + if (iter == line_start) { + // some line has no indent, fast exit! + return 0; + } + leading_whitespace_end = iter; } + ++iter; } - if (all_ws) { + + // if this line has all white space, skip it + if (!leading_whitespace_end) { continue; } - if (smallest == NULL || PyObject_RichCompareBool(line, smallest, Py_LT)) { - smallest = line; + if (!_start) { + // update the first leading whitespace + _start = line_start; + _len = leading_whitespace_end - line_start; + assert(_len > 0); } - if (largest == NULL || PyObject_RichCompareBool(line, largest, Py_GT)) { - largest = line; - } - } + else { + /* We then compare with the current longest leading whitespace. - if (smallest == NULL || largest == NULL) { - return 0; - } + [line_start, leading_whitespace_end) is the leading + whitespace of this line, - Py_ssize_t margin = 0; - Py_ssize_t minlen = Py_MIN(PyUnicode_GET_LENGTH(smallest), - PyUnicode_GET_LENGTH(largest)); - int skind = PyUnicode_KIND(smallest); - int lkind = PyUnicode_KIND(largest); - const void *sdata = PyUnicode_DATA(smallest); - const void *ldata = PyUnicode_DATA(largest); + [_start, _start + _len) is the leading whitespace of the + current longest leading whitespace. */ + Py_ssize_t new_len = 0; + const char *_iter = _start, *line_iter = line_start; - while (margin < minlen) { - Py_UCS4 c1 = PyUnicode_READ(skind, sdata, margin); - Py_UCS4 c2 = PyUnicode_READ(lkind, ldata, margin); - if (c1 != c2 || !(c1 == ' ' || c1 == '\t')) { - break; + while (_iter < _start + _len && line_iter < leading_whitespace_end + && *_iter == *line_iter) + { + ++_iter; + ++line_iter; + ++new_len; + } + + _len = new_len; + if (_len == 0) { + // No common things now, fast exit! + return 0; + } } - margin++; } - return margin; + assert(_len >= 0); + if (_len > 0) { + *output = _start; + } + return _len; } /* Dedent a string. - Behaviour is expected to be an exact match of `textwrap.dedent`. - Return a new reference on success, NULL with exception set on error. + Behaviour is expected to be an exact match of textwrap.dedent. + Return a new reference on success, NULL with an exception set on error. */ PyObject * _PyUnicode_Dedent(PyObject *unicode) { - PyObject *sep = PyUnicode_FromString("\n"); - if (sep == NULL) { + Py_ssize_t src_len = 0; + const char *src = PyUnicode_AsUTF8AndSize(unicode, &src_len); + if (!src) { return NULL; } - PyObject *lines = PyUnicode_Split(unicode, sep, -1); - Py_DECREF(sep); - if (lines == NULL) { - return NULL; + assert(src_len >= 0); + if (src_len == 0) { + return Py_NewRef(unicode); } - Py_ssize_t nlines = PyList_GET_SIZE(lines); - Py_ssize_t margin = search_longest_common_leading_whitespace(lines, nlines); - PyUnicodeWriter *writer = PyUnicodeWriter_Create(0); - if (writer == NULL) { - Py_DECREF(lines); + const char *const end = src + src_len; + + // [whitespace_start, whitespace_start + whitespace_len) + // describes the current longest common leading whitespace + const char *whitespace_start = NULL; + const Py_ssize_t whitespace_len = search_longest_common_leading_whitespace( + src, end, &whitespace_start); + + // now we should trigger a dedent + char *dest = PyMem_Malloc(src_len); + if (!dest) { + PyErr_NoMemory(); return NULL; } + char *dest_iter = dest; - for (Py_ssize_t i = 0; i < nlines; i++) { - PyObject *line = PyList_GET_ITEM(lines, i); - Py_ssize_t linelen = PyUnicode_GET_LENGTH(line); + for (const char *iter = src; iter < end; ++iter) { + const char *line_start = iter; + bool in_leading_space = true; - int all_ws = 1; - int kind = PyUnicode_KIND(line); - void *data = PyUnicode_DATA(line); - for (Py_ssize_t j = 0; j < linelen; j++) { - if (!Py_UNICODE_ISSPACE(PyUnicode_READ(kind, data, j))) { - all_ws = 0; - break; + // iterate over a line to find the end of a line + while (iter < end && *iter != '\n') { + if (in_leading_space && !Py_ISSPACE(Py_CHARMASK(*iter))) { + in_leading_space = false; } + ++iter; } - if (!all_ws) { - Py_ssize_t start = Py_MIN(margin, linelen); - if (PyUnicodeWriter_WriteSubstring(writer, line, start, linelen) < 0) { - PyUnicodeWriter_Discard(writer); - Py_DECREF(lines); - return NULL; + // invariant: *iter == '\n' or iter == end + const bool append_newline = iter < end; + + // if this line has all white space, write '\n' and continue + if (in_leading_space) { + if (append_newline) { + *dest_iter++ = '\n'; } + continue; } - if (i < nlines - 1) { - if (PyUnicodeWriter_WriteChar(writer, '\n') < 0) { - PyUnicodeWriter_Discard(writer); - Py_DECREF(lines); - return NULL; - } + /* copy [new_line_start + whitespace_len, iter) to buffer, then + conditionally append '\n' */ + const Py_ssize_t new_line_len = iter - line_start - whitespace_len; + assert(new_line_len >= 0); + memcpy(dest_iter, line_start + whitespace_len, new_line_len); + + dest_iter += new_line_len; + + if (append_newline) { + *dest_iter++ = '\n'; } } - Py_DECREF(lines); - return PyUnicodeWriter_Finish(writer); + PyObject *res = PyUnicode_FromStringAndSize(dest, dest_iter - dest); + PyMem_Free(dest); + return res; } static PyMethodDef unicode_methods[] = { From 3d3d957ca45772a2224f9fb940b83651e7981d9a Mon Sep 17 00:00:00 2001 From: Stan Ulbrych Date: Sun, 7 Sep 2025 16:13:33 +0100 Subject: [PATCH 3/5] Revert some more --- Objects/unicodeobject.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 67898b56711c61..2feac651a8fcdf 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -14332,6 +14332,8 @@ search_longest_common_leading_whitespace( // scan the whole line while (iter < end && *iter != '\n') { if (!leading_whitespace_end && !Py_ISSPACE(Py_CHARMASK(*iter))) { + /* `iter` points to the first non-whitespace character + in this line */ if (iter == line_start) { // some line has no indent, fast exit! return 0; @@ -14408,7 +14410,7 @@ _PyUnicode_Dedent(PyObject *unicode) // [whitespace_start, whitespace_start + whitespace_len) // describes the current longest common leading whitespace const char *whitespace_start = NULL; - const Py_ssize_t whitespace_len = search_longest_common_leading_whitespace( + Py_ssize_t whitespace_len = search_longest_common_leading_whitespace( src, end, &whitespace_start); // now we should trigger a dedent @@ -14432,7 +14434,7 @@ _PyUnicode_Dedent(PyObject *unicode) } // invariant: *iter == '\n' or iter == end - const bool append_newline = iter < end; + bool append_newline = iter < end; // if this line has all white space, write '\n' and continue if (in_leading_space) { @@ -14444,7 +14446,7 @@ _PyUnicode_Dedent(PyObject *unicode) /* copy [new_line_start + whitespace_len, iter) to buffer, then conditionally append '\n' */ - const Py_ssize_t new_line_len = iter - line_start - whitespace_len; + Py_ssize_t new_line_len = iter - line_start - whitespace_len; assert(new_line_len >= 0); memcpy(dest_iter, line_start + whitespace_len, new_line_len); From 4403936d7f278cd7dc48da0b23b93a45448ef696 Mon Sep 17 00:00:00 2001 From: Stan Ulbrych Date: Sun, 7 Sep 2025 16:22:30 +0100 Subject: [PATCH 4/5] Revert odd line removal --- Objects/unicodeobject.c | 1 + 1 file changed, 1 insertion(+) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 2feac651a8fcdf..9e9a0d826c46bb 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -14446,6 +14446,7 @@ _PyUnicode_Dedent(PyObject *unicode) /* copy [new_line_start + whitespace_len, iter) to buffer, then conditionally append '\n' */ + Py_ssize_t new_line_len = iter - line_start - whitespace_len; assert(new_line_len >= 0); memcpy(dest_iter, line_start + whitespace_len, new_line_len); From 0cf91f242a96ad7c5fb042a9b930440092cd69a8 Mon Sep 17 00:00:00 2001 From: Stan Ulbrych Date: Sun, 7 Sep 2025 17:00:54 +0100 Subject: [PATCH 5/5] Changes --- Include/internal/pycore_unicodeobject.h | 2 +- Lib/test/test_capi/test_unicode.py | 1 - Modules/_testinternalcapi.c | 3 ++- Objects/unicodeobject.c | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Include/internal/pycore_unicodeobject.h b/Include/internal/pycore_unicodeobject.h index c53bc084de072e..68ac71b747eedc 100644 --- a/Include/internal/pycore_unicodeobject.h +++ b/Include/internal/pycore_unicodeobject.h @@ -261,7 +261,7 @@ extern Py_ssize_t _PyUnicode_InsertThousandsGrouping( Behaviour is expected to be an exact match of `textwrap.dedent`. Return a new reference on success, NULL with an exception set on error. - Export for test_capi.test_unicode + Export for '_testinternalcapi' shared extension. */ PyAPI_FUNC(PyObject*) _PyUnicode_Dedent(PyObject *unicode); diff --git a/Lib/test/test_capi/test_unicode.py b/Lib/test/test_capi/test_unicode.py index 5ddd26bc465852..f18377927cdbc2 100644 --- a/Lib/test/test_capi/test_unicode.py +++ b/Lib/test/test_capi/test_unicode.py @@ -1162,7 +1162,6 @@ def foo(): ''' self.assertEqual(expect, dedent(text)) - @support.cpython_only @unittest.skipIf(_testlimitedcapi is None, 'need _testlimitedcapi module') def test_concat(self): diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index 0776b088b4a1f2..af67a4d2b488ba 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -34,7 +34,7 @@ #include "pycore_pyerrors.h" // _PyErr_ChainExceptions1() #include "pycore_pylifecycle.h" // _PyInterpreterConfig_InitFromDict() #include "pycore_pystate.h" // _PyThreadState_GET() -#include "pycore_unicodeobject.h" // _PyUnicode_TransformDecimalAndSpaceToASCII() / _PyUnicode_Dedent() +#include "pycore_unicodeobject.h" // _PyUnicode_TransformDecimalAndSpaceToASCII(), _PyUnicode_Dedent() #include "clinic/_testinternalcapi.c.h" @@ -1416,6 +1416,7 @@ unicode_transformdecimalandspacetoascii(PyObject *self, PyObject *arg) return _PyUnicode_TransformDecimalAndSpaceToASCII(arg); } + /* Test _PyUnicode_Dedent() */ static PyObject * unicode_dedent(PyObject *self, PyObject *arg) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 9e9a0d826c46bb..b767b964d68822 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -14311,7 +14311,7 @@ unicode_getnewargs(PyObject *v, PyObject *Py_UNUSED(ignored)) /* This function searches the longest common leading whitespace of all lines in the [src, end). -It returns the length of the common leading whitespace and sets *output* to +It returns the length of the common leading whitespace and sets `output` to point to the beginning of the common leading whitespace if length > 0. */ static Py_ssize_t @@ -14389,7 +14389,7 @@ search_longest_common_leading_whitespace( } /* Dedent a string. - Behaviour is expected to be an exact match of textwrap.dedent. + Behaviour is expected to be an exact match of `textwrap.dedent`. Return a new reference on success, NULL with an exception set on error. */ PyObject *