From cb60c6f9200e12ff5f4e2c9bb1c5e111193e3a91 Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Thu, 27 Mar 2025 06:55:07 +0300 Subject: [PATCH 01/11] gh-74756: support precision field for integer formatting types ```pycon >>> f"{-12:.8b}" '11110100' >>> f"{200:.8b}" Traceback (most recent call last): File "", line 1, in f"{200:.8b}" ^^^^^^^^^ OverflowError: Expected integer in range [-2**7, 2**7) >>> f"{123:.8d}" '00000123' >>> f"{-12:.8d}" '-00000012' ``` --- Doc/library/string.rst | 14 ++- Lib/test/test_long.py | 15 ++- Lib/test/test_types.py | 2 - ...5-03-31-07-25-18.gh-issue-74756.7aCPNT.rst | 3 + Python/formatter_unicode.c | 116 ++++++++++++++++-- 5 files changed, 135 insertions(+), 15 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-03-31-07-25-18.gh-issue-74756.7aCPNT.rst diff --git a/Doc/library/string.rst b/Doc/library/string.rst index 721c5c8d334674..848fd9ff0f58b3 100644 --- a/Doc/library/string.rst +++ b/Doc/library/string.rst @@ -460,8 +460,18 @@ displayed after the decimal point for presentation types ``'f'`` and ``'F'``, or before and after the decimal point for presentation types ``'g'`` or ``'G'``. For string presentation types the field indicates the maximum field size - in other words, how many characters will be -used from the field content. The *precision* is not allowed for integer -presentation types. +used from the field content. + +For integer presentation types, the precision gives the minimal number of +digits to appear, expanded with an appropriate number of leading zeros. Note +that for non-decimal presentation types --- two's complements are used to +represent signed integers, accepting values in range ``[-m,m)``, where +``m=2**(k*precision-1)`` and ``k=1,3,4`` for ``'b'``, ``'o'`` and +``'x'``/``'X'`` types, respectively. A precision of ``0`` is treated as +equivalent to a precision of ``1`` here. + +.. versionchanged:: next + Precision specification allowed for integer presentation types. The ``'_'`` or ``','`` option after *precision* means the use of an underscore or a comma for a thousands separator of the fractional part for floating-point diff --git a/Lib/test/test_long.py b/Lib/test/test_long.py index f336d49fa4f008..140a2cb8086786 100644 --- a/Lib/test/test_long.py +++ b/Lib/test/test_long.py @@ -675,6 +675,7 @@ def test__format__(self): self.assertEqual(format(123456789, 'd'), '123456789') self.assertEqual(format(123456789, ','), '123,456,789') self.assertEqual(format(123456789, '_'), '123_456_789') + self.assertEqual(format(3, '1.3'), '003') # sign and aligning are interdependent self.assertEqual(format(1, "-"), '1') @@ -706,6 +707,9 @@ def test__format__(self): self.assertRaises(ValueError, format, 1234567890, ',x') self.assertEqual(format(1234567890, '_x'), '4996_02d2') self.assertEqual(format(1234567890, '_X'), '4996_02D2') + self.assertEqual(format(8086, '#.8x'), '0x00001f96') + self.assertRaises(OverflowError, format, 2048, '.3x') + self.assertRaises(OverflowError, format, -2049, '.3x') # octal self.assertEqual(format(3, "o"), "3") @@ -720,6 +724,9 @@ def test__format__(self): self.assertEqual(format(-1234, "+o"), "-2322") self.assertRaises(ValueError, format, 1234567890, ',o') self.assertEqual(format(1234567890, '_o'), '111_4540_1322') + self.assertEqual(format(18, '#.3o'), '0o022') + self.assertRaises(OverflowError, format, 256, '.3o') + self.assertRaises(OverflowError, format, -257, '.3o') # binary self.assertEqual(format(3, "b"), "11") @@ -734,9 +741,15 @@ def test__format__(self): self.assertEqual(format(-1234, "+b"), "-10011010010") self.assertRaises(ValueError, format, 1234567890, ',b') self.assertEqual(format(12345, '_b'), '11_0000_0011_1001') + self.assertEqual(format(-12, '.8b'), '11110100') + self.assertEqual(format(73, '.8b'), '01001001') + self.assertEqual(format(73, '#.8b'), '0b01001001') + self.assertRaises(OverflowError, format, 300, '.8b') + self.assertRaises(OverflowError, format, -200, '.8b') + self.assertRaises(OverflowError, format, 128, '.8b') + self.assertRaises(OverflowError, format, -129, '.8b') # make sure these are errors - self.assertRaises(ValueError, format, 3, "1.3") # precision disallowed self.assertRaises(ValueError, format, 3, "_c") # underscore, self.assertRaises(ValueError, format, 3, ",c") # comma, and self.assertRaises(ValueError, format, 3, "+c") # sign not allowed diff --git a/Lib/test/test_types.py b/Lib/test/test_types.py index f014f7e9ee08c9..2d152508a37ff8 100644 --- a/Lib/test/test_types.py +++ b/Lib/test/test_types.py @@ -357,8 +357,6 @@ def test(i, format_spec, result): # make sure these are errors - # precision disallowed - self.assertRaises(ValueError, 3 .__format__, "1.3") # sign not allowed with 'c' self.assertRaises(ValueError, 3 .__format__, "+c") # format spec must be string diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-03-31-07-25-18.gh-issue-74756.7aCPNT.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-03-31-07-25-18.gh-issue-74756.7aCPNT.rst new file mode 100644 index 00000000000000..8312b438780a18 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-03-31-07-25-18.gh-issue-74756.7aCPNT.rst @@ -0,0 +1,3 @@ +Support precision field for integer formatting types. For binary, octal and +hexadecimal formatting types --- twos complements are used to represent +signed values. Patch by Sergey B Kirpichev. diff --git a/Python/formatter_unicode.c b/Python/formatter_unicode.c index 30807f428c7d71..52e2543a21cd16 100644 --- a/Python/formatter_unicode.c +++ b/Python/formatter_unicode.c @@ -358,7 +358,8 @@ parse_internal_render_format_spec(PyObject *obj, } } - if (format->type == 'n' + if ((format->type == 'n' || format->type == 'd' || format->type == 'b' + || format->type == 'o' || format->type == 'x' || format->type == 'X') && format->frac_thousands_separator != LT_NO_LOCALE) { invalid_thousands_separator_type(format->frac_thousands_separator, @@ -979,12 +980,6 @@ format_long_internal(PyObject *value, const InternalFormatSpec *format, from a hard-code pseudo-locale */ LocaleInfo locale = LocaleInfo_STATIC_INIT; - /* no precision allowed on integers */ - if (format->precision != -1) { - PyErr_SetString(PyExc_ValueError, - "Precision not allowed in integer format specifier"); - goto done; - } /* no negative zero coercion on integers */ if (format->no_neg_0) { PyErr_SetString(PyExc_ValueError, @@ -1063,6 +1058,7 @@ format_long_internal(PyObject *value, const InternalFormatSpec *format, if (format->sign != '+' && format->sign != ' ' && format->width == -1 + && format->precision == -1 && format->type != 'X' && format->type != 'n' && !format->thousands_separators && PyLong_CheckExact(value)) @@ -1077,9 +1073,109 @@ format_long_internal(PyObject *value, const InternalFormatSpec *format, n_prefix = leading_chars_to_skip; /* Do the hard part, converting to a string in a given base */ - tmp = _PyLong_Format(value, base); - if (tmp == NULL) - goto done; + if (format->precision != -1) { + /* Use two's complement for 'b', 'o' and 'x' formatting types */ + if (format->type == 'b' || format->type == 'x' + || format->type == 'o' || format->type == 'X') + { + int64_t shift = Py_MAX(1, format->precision); + + if (format->type == 'x' || format->type == 'X') { + shift *= 4; + } + else if (format->type == 'o') { + shift *= 3; + } + + PyObject *mod = _PyLong_Lshift(PyLong_FromLong(1), shift); + PyObject *mod2 = _PyLong_Rshift(mod, 1); + PyObject *value2 = value; + + if (mod == NULL || mod2 == NULL) { + Py_XDECREF(mod); + Py_XDECREF(mod2); + goto done; + } + if (PyLong_IsNegative(value)) { + value2 = PyNumber_Negative(mod2); + if (value2 == NULL) { + Py_DECREF(mod2); + goto done; + } + Py_SETREF(mod2, value2); + if (PyObject_RichCompareBool(value, mod2, Py_LT)) { + Py_DECREF(mod2); + PyErr_Format(PyExc_OverflowError, + "Expected integer in range [-2**%ld, 2**%ld)", + shift - 1, shift - 1); + goto done; + } + Py_DECREF(mod2); + value2 = PyNumber_Add(value, mod); + Py_DECREF(mod); + if (value2 == NULL) { + goto done; + } + } + else { + if (PyObject_RichCompareBool(value2, mod2, Py_GE)) { + Py_DECREF(mod); + Py_DECREF(mod2); + PyErr_Format(PyExc_OverflowError, + "Expected integer in range [-2**%ld, 2**%ld)", + shift - 1, shift - 1); + goto done; + } + Py_DECREF(mod); + Py_DECREF(mod2); + Py_INCREF(value2); + } + tmp = _PyLong_Format(value2, base); + Py_DECREF(value2); + } + else { + tmp = _PyLong_Format(value, base); + } + + /* Prepend enough leading zeros (after the sign) */ + + int sign = PyUnicode_READ_CHAR(tmp, leading_chars_to_skip) == '-'; + Py_ssize_t tmp2_len = format->precision + leading_chars_to_skip + sign; + Py_ssize_t tmp_len = PyUnicode_GET_LENGTH(tmp); + Py_ssize_t gap = tmp2_len - tmp_len; + + if (gap > 0) { + PyObject *tmp2 = PyUnicode_New(tmp2_len, 127); + + if (PyUnicode_CopyCharacters(tmp2, leading_chars_to_skip + gap + sign, + tmp, leading_chars_to_skip + sign, + tmp2_len - leading_chars_to_skip - sign) == -1) { + Py_DECREF(tmp2); + goto done; + } + if (PyUnicode_Fill(tmp2, leading_chars_to_skip + sign, gap, '0') == -1) { + Py_DECREF(tmp2); + goto done; + } + if (sign && PyUnicode_WriteChar(tmp2, leading_chars_to_skip, '-') == -1) { + Py_DECREF(tmp2); + goto done; + } + if (leading_chars_to_skip + && PyUnicode_CopyCharacters(tmp2, 0, tmp, 0, + leading_chars_to_skip) == -1) { + Py_DECREF(tmp2); + goto done; + } + Py_SETREF(tmp, tmp2); + } + } + else { + tmp = _PyLong_Format(value, base); + if (tmp == NULL) { + goto done; + } + } inumeric_chars = 0; n_digits = PyUnicode_GET_LENGTH(tmp); From b6bb4847e225bff2765d82d53b046ba66f738dc3 Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Tue, 1 Apr 2025 03:26:31 +0300 Subject: [PATCH 02/11] address review: exceptions --- Lib/test/test_long.py | 16 ++++++++-------- Python/formatter_unicode.c | 8 ++++---- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/Lib/test/test_long.py b/Lib/test/test_long.py index 140a2cb8086786..fb0934825a3b54 100644 --- a/Lib/test/test_long.py +++ b/Lib/test/test_long.py @@ -708,8 +708,8 @@ def test__format__(self): self.assertEqual(format(1234567890, '_x'), '4996_02d2') self.assertEqual(format(1234567890, '_X'), '4996_02D2') self.assertEqual(format(8086, '#.8x'), '0x00001f96') - self.assertRaises(OverflowError, format, 2048, '.3x') - self.assertRaises(OverflowError, format, -2049, '.3x') + self.assertRaises(ValueError, format, 2048, '.3x') + self.assertRaises(ValueError, format, -2049, '.3x') # octal self.assertEqual(format(3, "o"), "3") @@ -725,8 +725,8 @@ def test__format__(self): self.assertRaises(ValueError, format, 1234567890, ',o') self.assertEqual(format(1234567890, '_o'), '111_4540_1322') self.assertEqual(format(18, '#.3o'), '0o022') - self.assertRaises(OverflowError, format, 256, '.3o') - self.assertRaises(OverflowError, format, -257, '.3o') + self.assertRaises(ValueError, format, 256, '.3o') + self.assertRaises(ValueError, format, -257, '.3o') # binary self.assertEqual(format(3, "b"), "11") @@ -744,10 +744,10 @@ def test__format__(self): self.assertEqual(format(-12, '.8b'), '11110100') self.assertEqual(format(73, '.8b'), '01001001') self.assertEqual(format(73, '#.8b'), '0b01001001') - self.assertRaises(OverflowError, format, 300, '.8b') - self.assertRaises(OverflowError, format, -200, '.8b') - self.assertRaises(OverflowError, format, 128, '.8b') - self.assertRaises(OverflowError, format, -129, '.8b') + self.assertRaises(ValueError, format, 300, '.8b') + self.assertRaises(ValueError, format, -200, '.8b') + self.assertRaises(ValueError, format, 128, '.8b') + self.assertRaises(ValueError, format, -129, '.8b') # make sure these are errors self.assertRaises(ValueError, format, 3, "_c") # underscore, diff --git a/Python/formatter_unicode.c b/Python/formatter_unicode.c index 52e2543a21cd16..a99209357c287b 100644 --- a/Python/formatter_unicode.c +++ b/Python/formatter_unicode.c @@ -1105,8 +1105,8 @@ format_long_internal(PyObject *value, const InternalFormatSpec *format, Py_SETREF(mod2, value2); if (PyObject_RichCompareBool(value, mod2, Py_LT)) { Py_DECREF(mod2); - PyErr_Format(PyExc_OverflowError, - "Expected integer in range [-2**%ld, 2**%ld)", + PyErr_Format(PyExc_ValueError, + "Expected integer in range(-2**%ld, 2**%ld)", shift - 1, shift - 1); goto done; } @@ -1121,8 +1121,8 @@ format_long_internal(PyObject *value, const InternalFormatSpec *format, if (PyObject_RichCompareBool(value2, mod2, Py_GE)) { Py_DECREF(mod); Py_DECREF(mod2); - PyErr_Format(PyExc_OverflowError, - "Expected integer in range [-2**%ld, 2**%ld)", + PyErr_Format(PyExc_ValueError, + "Expected integer in range(-2**%ld, 2**%ld)", shift - 1, shift - 1); goto done; } From ad33bc0f606c12b3096cc592d968f98f4e8dc5c6 Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Tue, 1 Apr 2025 11:10:10 +0300 Subject: [PATCH 03/11] + cleanup --- Python/formatter_unicode.c | 42 +++++++++++++++----------------------- 1 file changed, 17 insertions(+), 25 deletions(-) diff --git a/Python/formatter_unicode.c b/Python/formatter_unicode.c index a99209357c287b..71506fd366f2bc 100644 --- a/Python/formatter_unicode.c +++ b/Python/formatter_unicode.c @@ -1086,56 +1086,48 @@ format_long_internal(PyObject *value, const InternalFormatSpec *format, else if (format->type == 'o') { shift *= 3; } + shift--; /* expected value in range(-2**shift, 2**shift) */ PyObject *mod = _PyLong_Lshift(PyLong_FromLong(1), shift); - PyObject *mod2 = _PyLong_Rshift(mod, 1); - PyObject *value2 = value; - if (mod == NULL || mod2 == NULL) { - Py_XDECREF(mod); - Py_XDECREF(mod2); + if (mod == NULL) { goto done; } if (PyLong_IsNegative(value)) { - value2 = PyNumber_Negative(mod2); - if (value2 == NULL) { - Py_DECREF(mod2); + Py_SETREF(mod, PyNumber_Negative(mod)); + if (mod == NULL) { goto done; } - Py_SETREF(mod2, value2); - if (PyObject_RichCompareBool(value, mod2, Py_LT)) { - Py_DECREF(mod2); - PyErr_Format(PyExc_ValueError, - "Expected integer in range(-2**%ld, 2**%ld)", - shift - 1, shift - 1); - goto done; + if (PyObject_RichCompareBool(value, mod, Py_LT)) { + goto range; } - Py_DECREF(mod2); - value2 = PyNumber_Add(value, mod); + Py_SETREF(mod, _PyLong_Lshift(mod, 1)); + tmp = PyNumber_Subtract(value, mod); Py_DECREF(mod); - if (value2 == NULL) { + if (tmp == NULL) { goto done; } + Py_SETREF(tmp, _PyLong_Format(tmp, base)); } else { - if (PyObject_RichCompareBool(value2, mod2, Py_GE)) { + if (PyObject_RichCompareBool(value, mod, Py_GE)) { +range: Py_DECREF(mod); - Py_DECREF(mod2); PyErr_Format(PyExc_ValueError, "Expected integer in range(-2**%ld, 2**%ld)", - shift - 1, shift - 1); + shift, shift); goto done; } Py_DECREF(mod); - Py_DECREF(mod2); - Py_INCREF(value2); + tmp = _PyLong_Format(value, base); } - tmp = _PyLong_Format(value2, base); - Py_DECREF(value2); } else { tmp = _PyLong_Format(value, base); } + if (tmp == NULL) { + goto done; + } /* Prepend enough leading zeros (after the sign) */ From d6aa05ffedd3dcb639ad83de4d255fe713601a87 Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Wed, 2 Apr 2025 09:49:59 +0300 Subject: [PATCH 04/11] address review: disallow precision for 'c' type --- Lib/test/test_long.py | 1 + Python/formatter_unicode.c | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/Lib/test/test_long.py b/Lib/test/test_long.py index fb0934825a3b54..b8155319b6871a 100644 --- a/Lib/test/test_long.py +++ b/Lib/test/test_long.py @@ -750,6 +750,7 @@ def test__format__(self): self.assertRaises(ValueError, format, -129, '.8b') # make sure these are errors + self.assertRaises(ValueError, format, 3, "1.3c") # precision disallowed with 'c', self.assertRaises(ValueError, format, 3, "_c") # underscore, self.assertRaises(ValueError, format, 3, ",c") # comma, and self.assertRaises(ValueError, format, 3, "+c") # sign not allowed diff --git a/Python/formatter_unicode.c b/Python/formatter_unicode.c index 71506fd366f2bc..da605415ad9cff 100644 --- a/Python/formatter_unicode.c +++ b/Python/formatter_unicode.c @@ -980,6 +980,13 @@ format_long_internal(PyObject *value, const InternalFormatSpec *format, from a hard-code pseudo-locale */ LocaleInfo locale = LocaleInfo_STATIC_INIT; + /* no precision allowed on 'c' integer representation type */ + if (format->precision != -1 && format->type == 'c') { + PyErr_SetString(PyExc_ValueError, + "Precision not allowed with 'c' integer format specifier"); + goto done; + } + /* no negative zero coercion on integers */ if (format->no_neg_0) { PyErr_SetString(PyExc_ValueError, From f03f6bbec686d60f3bf6c4659e10658ccc50e0e7 Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Wed, 2 Apr 2025 09:51:31 +0300 Subject: [PATCH 05/11] +1 // docs --- Doc/library/string.rst | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Doc/library/string.rst b/Doc/library/string.rst index 848fd9ff0f58b3..52723eefb6b6e5 100644 --- a/Doc/library/string.rst +++ b/Doc/library/string.rst @@ -462,12 +462,12 @@ types ``'g'`` or ``'G'``. For string presentation types the field indicates the maximum field size - in other words, how many characters will be used from the field content. -For integer presentation types, the precision gives the minimal number of -digits to appear, expanded with an appropriate number of leading zeros. Note -that for non-decimal presentation types --- two's complements are used to -represent signed integers, accepting values in range ``[-m,m)``, where -``m=2**(k*precision-1)`` and ``k=1,3,4`` for ``'b'``, ``'o'`` and -``'x'``/``'X'`` types, respectively. A precision of ``0`` is treated as +For integer presentation types (excluding ``'c'``), the precision gives the +minimal number of digits to appear, expanded with an appropriate number of +leading zeros. Note that for non-decimal presentation types --- two's +complements are used to represent signed integers, accepting values in range +``[-m,m)``, where ``m=2**(k*precision-1)`` and ``k=1,3,4`` for ``'b'``, ``'o'`` +and ``'x'``/``'X'`` types, respectively. A precision of ``0`` is treated as equivalent to a precision of ``1`` here. .. versionchanged:: next From 837ebbf7db4c935a5157fa6e1da4b0e4a5168670 Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Fri, 4 Apr 2025 06:58:09 +0300 Subject: [PATCH 06/11] address review: don't raise ValueError (precision specifies the *minimal* range) --- Doc/library/string.rst | 9 ++++----- Lib/test/test_long.py | 16 ++++++++-------- Python/formatter_unicode.c | 24 +++++++++++++----------- 3 files changed, 25 insertions(+), 24 deletions(-) diff --git a/Doc/library/string.rst b/Doc/library/string.rst index 52723eefb6b6e5..9943786e781a79 100644 --- a/Doc/library/string.rst +++ b/Doc/library/string.rst @@ -464,11 +464,10 @@ used from the field content. For integer presentation types (excluding ``'c'``), the precision gives the minimal number of digits to appear, expanded with an appropriate number of -leading zeros. Note that for non-decimal presentation types --- two's -complements are used to represent signed integers, accepting values in range -``[-m,m)``, where ``m=2**(k*precision-1)`` and ``k=1,3,4`` for ``'b'``, ``'o'`` -and ``'x'``/``'X'`` types, respectively. A precision of ``0`` is treated as -equivalent to a precision of ``1`` here. +leading zeros. Note that for non-decimal presentation types --- integer value +interpreted as ``max(k*precision, number.bit_length())``-bit two's complement, +where ``k=1,3,4`` for ``'b'``, ``'o'`` and ``'x'``/``'X'`` types, respectively. +A precision of ``0`` is treated as equivalent to a precision of ``1`` here. .. versionchanged:: next Precision specification allowed for integer presentation types. diff --git a/Lib/test/test_long.py b/Lib/test/test_long.py index b8155319b6871a..f15b780dd814a9 100644 --- a/Lib/test/test_long.py +++ b/Lib/test/test_long.py @@ -708,8 +708,8 @@ def test__format__(self): self.assertEqual(format(1234567890, '_x'), '4996_02d2') self.assertEqual(format(1234567890, '_X'), '4996_02D2') self.assertEqual(format(8086, '#.8x'), '0x00001f96') - self.assertRaises(ValueError, format, 2048, '.3x') - self.assertRaises(ValueError, format, -2049, '.3x') + self.assertEqual(format(2048, '.3x'), '0800') + self.assertEqual(format(-2049, '.3x'), '17ff') # octal self.assertEqual(format(3, "o"), "3") @@ -725,8 +725,8 @@ def test__format__(self): self.assertRaises(ValueError, format, 1234567890, ',o') self.assertEqual(format(1234567890, '_o'), '111_4540_1322') self.assertEqual(format(18, '#.3o'), '0o022') - self.assertRaises(ValueError, format, 256, '.3o') - self.assertRaises(ValueError, format, -257, '.3o') + self.assertEqual(format(256, '.3o'), '0400') + self.assertEqual(format(-257, '.3o'), '1377') # binary self.assertEqual(format(3, "b"), "11") @@ -744,10 +744,10 @@ def test__format__(self): self.assertEqual(format(-12, '.8b'), '11110100') self.assertEqual(format(73, '.8b'), '01001001') self.assertEqual(format(73, '#.8b'), '0b01001001') - self.assertRaises(ValueError, format, 300, '.8b') - self.assertRaises(ValueError, format, -200, '.8b') - self.assertRaises(ValueError, format, 128, '.8b') - self.assertRaises(ValueError, format, -129, '.8b') + self.assertEqual(format(300, '.8b'), '100101100') + self.assertEqual(format(-200, '.8b'), '100111000') + self.assertEqual(format(128, '.8b'), '010000000') + self.assertEqual(format(-129, '.8b'), '101111111') # make sure these are errors self.assertRaises(ValueError, format, 3, "1.3c") # precision disallowed with 'c', diff --git a/Python/formatter_unicode.c b/Python/formatter_unicode.c index da605415ad9cff..e663a0c33df3b6 100644 --- a/Python/formatter_unicode.c +++ b/Python/formatter_unicode.c @@ -1081,11 +1081,14 @@ format_long_internal(PyObject *value, const InternalFormatSpec *format, /* Do the hard part, converting to a string in a given base */ if (format->precision != -1) { + int64_t precision = Py_MAX(1, format->precision); + /* Use two's complement for 'b', 'o' and 'x' formatting types */ if (format->type == 'b' || format->type == 'x' || format->type == 'o' || format->type == 'X') { - int64_t shift = Py_MAX(1, format->precision); + int64_t shift = precision; + int incr = 1; if (format->type == 'x' || format->type == 'X') { shift *= 4; @@ -1093,8 +1096,11 @@ format_long_internal(PyObject *value, const InternalFormatSpec *format, else if (format->type == 'o') { shift *= 3; } - shift--; /* expected value in range(-2**shift, 2**shift) */ + shift = Py_MAX(shift, _PyLong_NumBits(value)); + shift--; + /* expected value in range(-2**n, 2**n), where n=shift + or n=shift+1 */ PyObject *mod = _PyLong_Lshift(PyLong_FromLong(1), shift); if (mod == NULL) { @@ -1106,9 +1112,9 @@ format_long_internal(PyObject *value, const InternalFormatSpec *format, goto done; } if (PyObject_RichCompareBool(value, mod, Py_LT)) { - goto range; + incr++; } - Py_SETREF(mod, _PyLong_Lshift(mod, 1)); + Py_SETREF(mod, _PyLong_Lshift(mod, incr)); tmp = PyNumber_Subtract(value, mod); Py_DECREF(mod); if (tmp == NULL) { @@ -1118,16 +1124,12 @@ format_long_internal(PyObject *value, const InternalFormatSpec *format, } else { if (PyObject_RichCompareBool(value, mod, Py_GE)) { -range: - Py_DECREF(mod); - PyErr_Format(PyExc_ValueError, - "Expected integer in range(-2**%ld, 2**%ld)", - shift, shift); - goto done; + incr++; } Py_DECREF(mod); tmp = _PyLong_Format(value, base); } + precision += (incr - 1); } else { tmp = _PyLong_Format(value, base); @@ -1139,7 +1141,7 @@ format_long_internal(PyObject *value, const InternalFormatSpec *format, /* Prepend enough leading zeros (after the sign) */ int sign = PyUnicode_READ_CHAR(tmp, leading_chars_to_skip) == '-'; - Py_ssize_t tmp2_len = format->precision + leading_chars_to_skip + sign; + Py_ssize_t tmp2_len = precision + leading_chars_to_skip + sign; Py_ssize_t tmp_len = PyUnicode_GET_LENGTH(tmp); Py_ssize_t gap = tmp2_len - tmp_len; From 8790325556af899404c65501a2cf3f6d7aa8efd8 Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Fri, 4 Apr 2025 08:04:48 +0300 Subject: [PATCH 07/11] address review: use z option for twos complement interpretation --- Lib/test/test_format.py | 3 ++- Lib/test/test_long.py | 26 +++++++++++++------------- Python/formatter_unicode.c | 13 ++++++++----- 3 files changed, 23 insertions(+), 19 deletions(-) diff --git a/Lib/test/test_format.py b/Lib/test/test_format.py index c7cc32e09490b2..10f2fb8c0c603e 100644 --- a/Lib/test/test_format.py +++ b/Lib/test/test_format.py @@ -620,9 +620,10 @@ def test_specifier_z_error(self): with self.assertRaisesRegex(ValueError, error_msg): f"{0:fz}" # wrong position - error_msg = re.escape("Negative zero coercion (z) not allowed") + error_msg = re.escape("'z' option not allowed with 'c', 'd' and 'n'") with self.assertRaisesRegex(ValueError, error_msg): f"{0:zd}" # can't apply to int presentation type + error_msg = re.escape("Negative zero coercion (z) not allowed") with self.assertRaisesRegex(ValueError, error_msg): f"{'x':zs}" # can't apply to string diff --git a/Lib/test/test_long.py b/Lib/test/test_long.py index f15b780dd814a9..215ebe53a5f61a 100644 --- a/Lib/test/test_long.py +++ b/Lib/test/test_long.py @@ -707,9 +707,9 @@ def test__format__(self): self.assertRaises(ValueError, format, 1234567890, ',x') self.assertEqual(format(1234567890, '_x'), '4996_02d2') self.assertEqual(format(1234567890, '_X'), '4996_02D2') - self.assertEqual(format(8086, '#.8x'), '0x00001f96') - self.assertEqual(format(2048, '.3x'), '0800') - self.assertEqual(format(-2049, '.3x'), '17ff') + self.assertEqual(format(8086, 'z#.8x'), '0x00001f96') + self.assertEqual(format(2048, 'z.3x'), '0800') + self.assertEqual(format(-2049, 'z.3x'), '17ff') # octal self.assertEqual(format(3, "o"), "3") @@ -724,9 +724,9 @@ def test__format__(self): self.assertEqual(format(-1234, "+o"), "-2322") self.assertRaises(ValueError, format, 1234567890, ',o') self.assertEqual(format(1234567890, '_o'), '111_4540_1322') - self.assertEqual(format(18, '#.3o'), '0o022') - self.assertEqual(format(256, '.3o'), '0400') - self.assertEqual(format(-257, '.3o'), '1377') + self.assertEqual(format(18, 'z#.3o'), '0o022') + self.assertEqual(format(256, 'z.3o'), '0400') + self.assertEqual(format(-257, 'z.3o'), '1377') # binary self.assertEqual(format(3, "b"), "11") @@ -741,13 +741,13 @@ def test__format__(self): self.assertEqual(format(-1234, "+b"), "-10011010010") self.assertRaises(ValueError, format, 1234567890, ',b') self.assertEqual(format(12345, '_b'), '11_0000_0011_1001') - self.assertEqual(format(-12, '.8b'), '11110100') - self.assertEqual(format(73, '.8b'), '01001001') - self.assertEqual(format(73, '#.8b'), '0b01001001') - self.assertEqual(format(300, '.8b'), '100101100') - self.assertEqual(format(-200, '.8b'), '100111000') - self.assertEqual(format(128, '.8b'), '010000000') - self.assertEqual(format(-129, '.8b'), '101111111') + self.assertEqual(format(-12, 'z.8b'), '11110100') + self.assertEqual(format(73, 'z.8b'), '01001001') + self.assertEqual(format(73, 'z#.8b'), '0b01001001') + self.assertEqual(format(300, 'z.8b'), '100101100') + self.assertEqual(format(-200, 'z.8b'), '100111000') + self.assertEqual(format(128, 'z.8b'), '010000000') + self.assertEqual(format(-129, 'z.8b'), '101111111') # make sure these are errors self.assertRaises(ValueError, format, 3, "1.3c") # precision disallowed with 'c', diff --git a/Python/formatter_unicode.c b/Python/formatter_unicode.c index e663a0c33df3b6..696bb52f5a3f0b 100644 --- a/Python/formatter_unicode.c +++ b/Python/formatter_unicode.c @@ -988,10 +988,12 @@ format_long_internal(PyObject *value, const InternalFormatSpec *format, } /* no negative zero coercion on integers */ - if (format->no_neg_0) { + if (format->no_neg_0 && format->type != 'b' && format->type != 'o' + && format->type != 'x' && format->type != 'X') + { PyErr_SetString(PyExc_ValueError, - "Negative zero coercion (z) not allowed in integer" - " format specifier"); + "'z' option not allowed with 'c', 'd' and 'n' " + "integer format specifier"); goto done; } @@ -1084,8 +1086,9 @@ format_long_internal(PyObject *value, const InternalFormatSpec *format, int64_t precision = Py_MAX(1, format->precision); /* Use two's complement for 'b', 'o' and 'x' formatting types */ - if (format->type == 'b' || format->type == 'x' - || format->type == 'o' || format->type == 'X') + if (format->no_neg_0 && (format->type == 'b' || format->type == 'x' + || format->type == 'o' + || format->type == 'X')) { int64_t shift = precision; int incr = 1; From 9e3cc9668741e621ceb05fc98fe947ea31ec7fe6 Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Fri, 4 Apr 2025 09:12:13 +0300 Subject: [PATCH 08/11] + cleanup and docs --- Doc/library/string.rst | 17 ++++++++++------- Lib/test/test_long.py | 2 ++ Python/formatter_unicode.c | 20 +++++++++++--------- 3 files changed, 23 insertions(+), 16 deletions(-) diff --git a/Doc/library/string.rst b/Doc/library/string.rst index 9943786e781a79..db289e80879324 100644 --- a/Doc/library/string.rst +++ b/Doc/library/string.rst @@ -398,9 +398,11 @@ following: .. index:: single: z; in string formatting -The ``'z'`` option coerces negative zero floating-point values to positive -zero after rounding to the format precision. This option is only valid for -floating-point presentation types. +For floating-point presentation types the ``'z'`` option coerces negative zero +floating-point values to positive zero after rounding to the format precision. +For integer types ``'b'``, ``'o'``, ``'x'`` and ``'X'`` it can be used to +interpret integer value as two's complement. This option is invalid for other +presentation types. .. versionchanged:: 3.11 Added the ``'z'`` option (see also :pep:`682`). @@ -464,10 +466,11 @@ used from the field content. For integer presentation types (excluding ``'c'``), the precision gives the minimal number of digits to appear, expanded with an appropriate number of -leading zeros. Note that for non-decimal presentation types --- integer value -interpreted as ``max(k*precision, number.bit_length())``-bit two's complement, -where ``k=1,3,4`` for ``'b'``, ``'o'`` and ``'x'``/``'X'`` types, respectively. -A precision of ``0`` is treated as equivalent to a precision of ``1`` here. +leading zeros. If ``'z'`` option specified for non-decimal presentation types +--- integer value interpreted as ``max(k*precision, number.bit_length())``-bit +two's complement, where ``k=1,3,4`` for ``'b'``, ``'o'`` and ``'x'``/``'X'`` +types, respectively. A precision of ``0`` is treated as equivalent to a +precision of ``1`` here. .. versionchanged:: next Precision specification allowed for integer presentation types. diff --git a/Lib/test/test_long.py b/Lib/test/test_long.py index 215ebe53a5f61a..0c4ecc70ce9433 100644 --- a/Lib/test/test_long.py +++ b/Lib/test/test_long.py @@ -745,6 +745,8 @@ def test__format__(self): self.assertEqual(format(73, 'z.8b'), '01001001') self.assertEqual(format(73, 'z#.8b'), '0b01001001') self.assertEqual(format(300, 'z.8b'), '100101100') + self.assertEqual(format(200, '.8b'), '11001000') + self.assertEqual(format(200, 'z.8b'), '011001000') self.assertEqual(format(-200, 'z.8b'), '100111000') self.assertEqual(format(128, 'z.8b'), '010000000') self.assertEqual(format(-129, 'z.8b'), '101111111') diff --git a/Python/formatter_unicode.c b/Python/formatter_unicode.c index 696bb52f5a3f0b..a42f5d9c649039 100644 --- a/Python/formatter_unicode.c +++ b/Python/formatter_unicode.c @@ -1141,33 +1141,35 @@ format_long_internal(PyObject *value, const InternalFormatSpec *format, goto done; } - /* Prepend enough leading zeros (after the sign) */ + /* Prepend enough leading zeros (after sign and prefix) */ - int sign = PyUnicode_READ_CHAR(tmp, leading_chars_to_skip) == '-'; + int sign = PyUnicode_READ_CHAR(tmp, 0) == '-'; Py_ssize_t tmp2_len = precision + leading_chars_to_skip + sign; Py_ssize_t tmp_len = PyUnicode_GET_LENGTH(tmp); Py_ssize_t gap = tmp2_len - tmp_len; if (gap > 0) { PyObject *tmp2 = PyUnicode_New(tmp2_len, 127); + Py_ssize_t value_start = leading_chars_to_skip + sign; - if (PyUnicode_CopyCharacters(tmp2, leading_chars_to_skip + gap + sign, - tmp, leading_chars_to_skip + sign, - tmp2_len - leading_chars_to_skip - sign) == -1) { + if (PyUnicode_CopyCharacters(tmp2, value_start + gap, + tmp, value_start, + precision) == -1) { Py_DECREF(tmp2); goto done; } - if (PyUnicode_Fill(tmp2, leading_chars_to_skip + sign, gap, '0') == -1) { + if (PyUnicode_Fill(tmp2, value_start, gap, '0') == -1) { Py_DECREF(tmp2); goto done; } - if (sign && PyUnicode_WriteChar(tmp2, leading_chars_to_skip, '-') == -1) { + if (sign && PyUnicode_WriteChar(tmp2, 0, '-') == -1) { Py_DECREF(tmp2); goto done; } if (leading_chars_to_skip - && PyUnicode_CopyCharacters(tmp2, 0, tmp, 0, - leading_chars_to_skip) == -1) { + && PyUnicode_CopyCharacters(tmp2, sign, tmp, sign, + leading_chars_to_skip) == -1) + { Py_DECREF(tmp2); goto done; } From 5e0d519db56b2d6dcb2d9f7973503f5b3a8ce576 Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Fri, 4 Apr 2025 14:32:58 +0300 Subject: [PATCH 09/11] address review: correct precision in two's complement case --- Lib/test/test_long.py | 3 ++- Python/formatter_unicode.c | 11 ++++++----- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/Lib/test/test_long.py b/Lib/test/test_long.py index 0c4ecc70ce9433..387e4d2afe9383 100644 --- a/Lib/test/test_long.py +++ b/Lib/test/test_long.py @@ -744,12 +744,13 @@ def test__format__(self): self.assertEqual(format(-12, 'z.8b'), '11110100') self.assertEqual(format(73, 'z.8b'), '01001001') self.assertEqual(format(73, 'z#.8b'), '0b01001001') - self.assertEqual(format(300, 'z.8b'), '100101100') + self.assertEqual(format(300, 'z.8b'), '0100101100') self.assertEqual(format(200, '.8b'), '11001000') self.assertEqual(format(200, 'z.8b'), '011001000') self.assertEqual(format(-200, 'z.8b'), '100111000') self.assertEqual(format(128, 'z.8b'), '010000000') self.assertEqual(format(-129, 'z.8b'), '101111111') + self.assertEqual(format(256, 'z.8b'), '0100000000') # make sure these are errors self.assertRaises(ValueError, format, 3, "1.3c") # precision disallowed with 'c', diff --git a/Python/formatter_unicode.c b/Python/formatter_unicode.c index a42f5d9c649039..426a5481fc08be 100644 --- a/Python/formatter_unicode.c +++ b/Python/formatter_unicode.c @@ -1091,15 +1091,15 @@ format_long_internal(PyObject *value, const InternalFormatSpec *format, || format->type == 'X')) { int64_t shift = precision; - int incr = 1; + int incr = 1, k = 1; if (format->type == 'x' || format->type == 'X') { - shift *= 4; + k = 4; } else if (format->type == 'o') { - shift *= 3; + k = 3; } - shift = Py_MAX(shift, _PyLong_NumBits(value)); + shift = Py_MAX(shift*k, _PyLong_NumBits(value)); shift--; /* expected value in range(-2**n, 2**n), where n=shift @@ -1132,7 +1132,8 @@ format_long_internal(PyObject *value, const InternalFormatSpec *format, Py_DECREF(mod); tmp = _PyLong_Format(value, base); } - precision += (incr - 1); + shift += incr; + precision = (shift + k - 1)/k; } else { tmp = _PyLong_Format(value, base); From 304bfc5cd150a5762f46df538ae074982701a000 Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Sun, 6 Apr 2025 08:54:43 +0300 Subject: [PATCH 10/11] address review: ensure leading bit is 1 for negatives now we choose minimal twos complement size as being >= k*precision AND with leading bit set to 1 for negatives Increasing precision will adds 0's of (base-1)'s as needed: >>> f"{-129:z#.2x}" '0xf7f' >>> f"{-129:z#.3x}" '0xf7f' >>> f"{-129:z#.4x}" '0xff7f' >>> f"{383 :z#.2x}" '0x17f' >>> f"{383 :z#.3x}" '0x17f' >>> f"{383 :z#.4x}" '0x017f' --- Doc/library/string.rst | 8 ++++---- Lib/test/test_long.py | 4 ++-- Python/formatter_unicode.c | 22 +++++++++++++--------- 3 files changed, 19 insertions(+), 15 deletions(-) diff --git a/Doc/library/string.rst b/Doc/library/string.rst index dd8f84a18cfeb9..f4c91345de7720 100644 --- a/Doc/library/string.rst +++ b/Doc/library/string.rst @@ -444,10 +444,10 @@ used from the field content. For integer presentation types (excluding ``'c'``), the precision gives the minimal number of digits to appear, expanded with an appropriate number of leading zeros. If ``'z'`` option specified for non-decimal presentation types ---- integer value interpreted as ``max(k*precision, number.bit_length())``-bit -two's complement, where ``k=1,3,4`` for ``'b'``, ``'o'`` and ``'x'``/``'X'`` -types, respectively. A precision of ``0`` is treated as equivalent to a -precision of ``1`` here. +--- integer value interpreted as two's complement, the precision gives it's +minimum size ``precision*k`` in bits, where ``k=1,3,4`` for ``'b'``, ``'o'`` +and ``'x'``/``'X'`` types, respectively. A precision of ``0`` is treated as +equivalent to a precision of ``1`` here. .. versionchanged:: next Precision specification allowed for integer presentation types. diff --git a/Lib/test/test_long.py b/Lib/test/test_long.py index 387e4d2afe9383..9c700ded8b1598 100644 --- a/Lib/test/test_long.py +++ b/Lib/test/test_long.py @@ -709,7 +709,7 @@ def test__format__(self): self.assertEqual(format(1234567890, '_X'), '4996_02D2') self.assertEqual(format(8086, 'z#.8x'), '0x00001f96') self.assertEqual(format(2048, 'z.3x'), '0800') - self.assertEqual(format(-2049, 'z.3x'), '17ff') + self.assertEqual(format(-2049, 'z.3x'), 'f7ff') # octal self.assertEqual(format(3, "o"), "3") @@ -726,7 +726,7 @@ def test__format__(self): self.assertEqual(format(1234567890, '_o'), '111_4540_1322') self.assertEqual(format(18, 'z#.3o'), '0o022') self.assertEqual(format(256, 'z.3o'), '0400') - self.assertEqual(format(-257, 'z.3o'), '1377') + self.assertEqual(format(-257, 'z.3o'), '7377') # binary self.assertEqual(format(3, "b"), "11") diff --git a/Python/formatter_unicode.c b/Python/formatter_unicode.c index 426a5481fc08be..4a2e3df5eb8095 100644 --- a/Python/formatter_unicode.c +++ b/Python/formatter_unicode.c @@ -1090,21 +1090,25 @@ format_long_internal(PyObject *value, const InternalFormatSpec *format, || format->type == 'o' || format->type == 'X')) { - int64_t shift = precision; - int incr = 1, k = 1; + int dbits = 1; if (format->type == 'x' || format->type == 'X') { - k = 4; + dbits = 4; } else if (format->type == 'o') { - k = 3; + dbits = 3; } - shift = Py_MAX(shift*k, _PyLong_NumBits(value)); + + int64_t nbits = _PyLong_NumBits(value); + int64_t shift = Py_MAX(precision, (nbits + dbits - 1)/dbits); + + shift *= dbits; shift--; /* expected value in range(-2**n, 2**n), where n=shift - or n=shift+1 */ + or n=shift+dbits */ PyObject *mod = _PyLong_Lshift(PyLong_FromLong(1), shift); + int incr = 1; if (mod == NULL) { goto done; @@ -1115,7 +1119,7 @@ format_long_internal(PyObject *value, const InternalFormatSpec *format, goto done; } if (PyObject_RichCompareBool(value, mod, Py_LT)) { - incr++; + incr += dbits; } Py_SETREF(mod, _PyLong_Lshift(mod, incr)); tmp = PyNumber_Subtract(value, mod); @@ -1127,13 +1131,13 @@ format_long_internal(PyObject *value, const InternalFormatSpec *format, } else { if (PyObject_RichCompareBool(value, mod, Py_GE)) { - incr++; + incr += dbits; } Py_DECREF(mod); tmp = _PyLong_Format(value, base); } shift += incr; - precision = (shift + k - 1)/k; + precision = shift/dbits; } else { tmp = _PyLong_Format(value, base); From 65a5e7084efd762532694c70ef133d981e93bb33 Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Tue, 8 Apr 2025 04:54:03 +0300 Subject: [PATCH 11/11] + check if nbits is too big (fix warnings) --- Python/formatter_unicode.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/Python/formatter_unicode.c b/Python/formatter_unicode.c index 4a2e3df5eb8095..26a54c394cdbe2 100644 --- a/Python/formatter_unicode.c +++ b/Python/formatter_unicode.c @@ -1100,7 +1100,15 @@ format_long_internal(PyObject *value, const InternalFormatSpec *format, } int64_t nbits = _PyLong_NumBits(value); - int64_t shift = Py_MAX(precision, (nbits + dbits - 1)/dbits); + + if (nbits > PY_SSIZE_T_MAX) { + PyErr_SetString(PyExc_OverflowError, + "int too large to format"); + goto done; + } + + Py_ssize_t shift = Py_MAX(precision, + ((Py_ssize_t)nbits + dbits - 1)/dbits); shift *= dbits; shift--;