Skip to content

Commit a6c5843

Browse files
committed
gh-130567: fix strxfrm memory allocation
The posix specification does not define that wcsxfrm should return needed buffer size, it just says: If the value returned is n or more, the contents of the array pointed to by ws1 are unspecified. Therefore double the allocation when the original call has failed and repeat that until it works.
1 parent fda056e commit a6c5843

File tree

2 files changed

+43
-23
lines changed

2 files changed

+43
-23
lines changed

Lib/test/test_locale.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -371,6 +371,17 @@ def test_strcoll_with_diacritic(self):
371371
def test_strxfrm_with_diacritic(self):
372372
self.assertLess(locale.strxfrm('à'), locale.strxfrm('b'))
373373

374+
@unittest.skipIf(sys.platform.startswith('aix'),
375+
'bpo-29972: broken test on AIX')
376+
@unittest.skipIf(
377+
is_emscripten or is_wasi,
378+
"musl libc issue on Emscripten/WASI, bpo-46390"
379+
)
380+
@unittest.skipIf(sys.platform.startswith("netbsd"),
381+
"gh-124108: NetBSD doesn't support UTF-8 for LC_COLLATE")
382+
def test_strxfrm_non_latin_1(self):
383+
self.assertLess(locale.strxfrm('s'), locale.strxfrm('š'))
384+
374385

375386
class NormalizeTest(unittest.TestCase):
376387
def check(self, localename, expected):

Modules/_localemodule.c

Lines changed: 32 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -394,49 +394,58 @@ static PyObject *
394394
_locale_strxfrm_impl(PyObject *module, PyObject *str)
395395
/*[clinic end generated code: output=3081866ebffc01af input=1378bbe6a88b4780]*/
396396
{
397-
Py_ssize_t n1;
397+
Py_ssize_t buf_len;
398398
wchar_t *s = NULL, *buf = NULL;
399-
size_t n2;
399+
size_t xfrm_result;
400400
PyObject *result = NULL;
401401

402-
s = PyUnicode_AsWideCharString(str, &n1);
402+
s = PyUnicode_AsWideCharString(str, &buf_len);
403403
if (s == NULL)
404404
goto exit;
405-
if (wcslen(s) != (size_t)n1) {
405+
if (wcslen(s) != (size_t)buf_len) {
406406
PyErr_SetString(PyExc_ValueError,
407407
"embedded null character");
408408
goto exit;
409409
}
410410

411411
/* assume no change in size, first */
412-
n1 = n1 + 1;
413-
buf = PyMem_New(wchar_t, n1);
412+
buf_len = buf_len + 1;
413+
buf = PyMem_New(wchar_t, buf_len);
414414
if (!buf) {
415415
PyErr_NoMemory();
416416
goto exit;
417417
}
418-
errno = 0;
419-
n2 = wcsxfrm(buf, s, n1);
420-
if (errno && errno != ERANGE) {
421-
PyErr_SetFromErrno(PyExc_OSError);
422-
goto exit;
423-
}
424-
if (n2 >= (size_t)n1) {
425-
/* more space needed */
426-
wchar_t * new_buf = PyMem_Realloc(buf, (n2+1)*sizeof(wchar_t));
418+
for (;;) {
419+
errno = 0;
420+
xfrm_result = wcsxfrm(buf, s, buf_len);
421+
if (errno && errno != ERANGE) {
422+
PyErr_SetFromErrno(PyExc_OSError);
423+
break;
424+
}
425+
426+
if (xfrm_result < (size_t)buf_len) {
427+
// wcsxfrm succeeded, return result
428+
result = PyUnicode_FromWideChar(buf, xfrm_result);
429+
break;
430+
}
431+
432+
if (xfrm_result > buf_len) {
433+
// Assume this is desired buffer size
434+
new_buf_len = xfrm_result + 1;
435+
} else {
436+
// Some platforms, such as macOS 15 doesn't return desired buffer
437+
// size so it is up to the caller to figure out needed buffer size
438+
// (gh-130567).
439+
new_buf_len = new_buf_len * 2;
440+
}
441+
442+
wchar_t * new_buf = PyMem_Realloc(buf, new_buf_len * sizeof(wchar_t));
427443
if (!new_buf) {
428444
PyErr_NoMemory();
429-
goto exit;
445+
break;
430446
}
431447
buf = new_buf;
432-
errno = 0;
433-
n2 = wcsxfrm(buf, s, n2+1);
434-
if (errno) {
435-
PyErr_SetFromErrno(PyExc_OSError);
436-
goto exit;
437-
}
438448
}
439-
result = PyUnicode_FromWideChar(buf, n2);
440449
exit:
441450
PyMem_Free(buf);
442451
PyMem_Free(s);

0 commit comments

Comments
 (0)