From 81d7ea241a39b86c111380767d28d7fb408e3bb6 Mon Sep 17 00:00:00 2001 From: Daniel Pope Date: Sat, 8 Feb 2025 09:56:08 +0000 Subject: [PATCH 01/10] Accept bytes in bytes.fromhex()/bytearray.fromhex() Fixes #129349 --- Lib/test/test_bytes.py | 8 ++- ...-02-08-09-55-33.gh-issue-129349.PkcG-l.rst | 1 + Objects/bytearrayobject.c | 6 +-- Objects/bytesobject.c | 54 +++++++++++-------- Objects/clinic/bytearrayobject.c.h | 22 +------- Objects/clinic/bytesobject.c.h | 22 +------- 6 files changed, 46 insertions(+), 67 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-02-08-09-55-33.gh-issue-129349.PkcG-l.rst diff --git a/Lib/test/test_bytes.py b/Lib/test/test_bytes.py index f6ffe83c5d69e8..d1ecd2db77f963 100644 --- a/Lib/test/test_bytes.py +++ b/Lib/test/test_bytes.py @@ -455,8 +455,14 @@ def test_fromhex(self): for c in "\x1C\x1D\x1E\x1F\x85\xa0\u2000\u2002\u2028": self.assertRaises(ValueError, self.type2test.fromhex, c) + # Check that we can parse bytes and bytearray + self.assertEqual(self.type2test.fromhex(b' 012abc'), b'\x01\x2a\xbc') + self.assertEqual( + self.type2test.fromhex(bytearray(b' 012abc')), + b'\x01\x2a\xbc', + ) + self.assertEqual(self.type2test.fromhex('0000'), b'\0\0') - self.assertRaises(TypeError, self.type2test.fromhex, b'1B') self.assertRaises(ValueError, self.type2test.fromhex, 'a') self.assertRaises(ValueError, self.type2test.fromhex, 'rt') self.assertRaises(ValueError, self.type2test.fromhex, '1a b cd') diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-02-08-09-55-33.gh-issue-129349.PkcG-l.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-02-08-09-55-33.gh-issue-129349.PkcG-l.rst new file mode 100644 index 00000000000000..48b28d30dc1e20 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-02-08-09-55-33.gh-issue-129349.PkcG-l.rst @@ -0,0 +1 @@ +``bytes.fromhex()``/``bytearray.fromhex()`` now accept ASCII ``bytes``. diff --git a/Objects/bytearrayobject.c b/Objects/bytearrayobject.c index 34f43eb8c31315..f1c76664198abc 100644 --- a/Objects/bytearrayobject.c +++ b/Objects/bytearrayobject.c @@ -2533,7 +2533,7 @@ bytearray_splitlines_impl(PyByteArrayObject *self, int keepends) @classmethod bytearray.fromhex - string: unicode + string: object / Create a bytearray object from a string of hexadecimal numbers. @@ -2543,8 +2543,8 @@ Example: bytearray.fromhex('B9 01EF') -> bytearray(b'\\xb9\\x01\\xef') [clinic start generated code]*/ static PyObject * -bytearray_fromhex_impl(PyTypeObject *type, PyObject *string) -/*[clinic end generated code: output=8f0f0b6d30fb3ba0 input=f033a16d1fb21f48]*/ +bytearray_fromhex(PyTypeObject *type, PyObject *string) +/*[clinic end generated code: output=da84dc708e9c4b36 input=7e314e5b2d7ab484]*/ { PyObject *result = _PyBytes_FromHex(string, type == &PyByteArray_Type); if (type != &PyByteArray_Type && result != NULL) { diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index ba642d3788fc78..740d89f8725caa 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -2484,7 +2484,7 @@ bytes_splitlines_impl(PyBytesObject *self, int keepends) @classmethod bytes.fromhex - string: unicode + string: object / Create a bytes object from a string of hexadecimal numbers. @@ -2494,8 +2494,8 @@ Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'. [clinic start generated code]*/ static PyObject * -bytes_fromhex_impl(PyTypeObject *type, PyObject *string) -/*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/ +bytes_fromhex(PyTypeObject *type, PyObject *string) +/*[clinic end generated code: output=d458ec88195da6b3 input=f37d98ed51088a21]*/ { PyObject *result = _PyBytes_FromHex(string, 0); if (type != &PyBytes_Type && result != NULL) { @@ -2510,31 +2510,43 @@ _PyBytes_FromHex(PyObject *string, int use_bytearray) char *buf; Py_ssize_t hexlen, invalid_char; unsigned int top, bot; - const Py_UCS1 *str, *end; + const Py_UCS1 *str, *start, *end; _PyBytesWriter writer; _PyBytesWriter_Init(&writer); writer.use_bytearray = use_bytearray; - assert(PyUnicode_Check(string)); - hexlen = PyUnicode_GET_LENGTH(string); + if (PyUnicode_Check(string)) { + hexlen = PyUnicode_GET_LENGTH(string); - if (!PyUnicode_IS_ASCII(string)) { - const void *data = PyUnicode_DATA(string); - int kind = PyUnicode_KIND(string); - Py_ssize_t i; + if (!PyUnicode_IS_ASCII(string)) { + const void *data = PyUnicode_DATA(string); + int kind = PyUnicode_KIND(string); + Py_ssize_t i; - /* search for the first non-ASCII character */ - for (i = 0; i < hexlen; i++) { - if (PyUnicode_READ(kind, data, i) >= 128) - break; + /* search for the first non-ASCII character */ + for (i = 0; i < hexlen; i++) { + if (PyUnicode_READ(kind, data, i) >= 128) + break; + } + invalid_char = i; + goto error; } - invalid_char = i; - goto error; - } - assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND); - str = PyUnicode_1BYTE_DATA(string); + assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND); + str = start = PyUnicode_1BYTE_DATA(string); + } else if (PyBytes_Check(string)) { + hexlen = PyBytes_GET_SIZE(string); + str = start = (Py_UCS1 *) PyBytes_AS_STRING(string); + } else if (PyByteArray_Check(string)) { + hexlen = PyByteArray_GET_SIZE(string); + str = start = (Py_UCS1 *) PyByteArray_AS_STRING(string); + } else { + PyErr_Format(PyExc_TypeError, + "fromhex() argument must be str or bytes, not %s", + Py_TYPE(string)->tp_name); + return NULL; + } /* This overestimates if there are spaces */ buf = _PyBytesWriter_Alloc(&writer, hexlen / 2); @@ -2554,7 +2566,7 @@ _PyBytes_FromHex(PyObject *string, int use_bytearray) top = _PyLong_DigitValue[*str]; if (top >= 16) { - invalid_char = str - PyUnicode_1BYTE_DATA(string); + invalid_char = str - start; goto error; } str++; @@ -2565,7 +2577,7 @@ _PyBytes_FromHex(PyObject *string, int use_bytearray) if (str >= end){ invalid_char = -1; } else { - invalid_char = str - PyUnicode_1BYTE_DATA(string); + invalid_char = str - start; } goto error; } diff --git a/Objects/clinic/bytearrayobject.c.h b/Objects/clinic/bytearrayobject.c.h index fa105f74c58512..8ed10d8ab5a8b2 100644 --- a/Objects/clinic/bytearrayobject.c.h +++ b/Objects/clinic/bytearrayobject.c.h @@ -1601,26 +1601,6 @@ PyDoc_STRVAR(bytearray_fromhex__doc__, #define BYTEARRAY_FROMHEX_METHODDEF \ {"fromhex", (PyCFunction)bytearray_fromhex, METH_O|METH_CLASS, bytearray_fromhex__doc__}, -static PyObject * -bytearray_fromhex_impl(PyTypeObject *type, PyObject *string); - -static PyObject * -bytearray_fromhex(PyTypeObject *type, PyObject *arg) -{ - PyObject *return_value = NULL; - PyObject *string; - - if (!PyUnicode_Check(arg)) { - _PyArg_BadArgument("fromhex", "argument", "str", arg); - goto exit; - } - string = arg; - return_value = bytearray_fromhex_impl(type, string); - -exit: - return return_value; -} - PyDoc_STRVAR(bytearray_hex__doc__, "hex($self, /, sep=, bytes_per_sep=1)\n" "--\n" @@ -1789,4 +1769,4 @@ bytearray_sizeof(PyObject *self, PyObject *Py_UNUSED(ignored)) { return bytearray_sizeof_impl((PyByteArrayObject *)self); } -/*[clinic end generated code: output=7c924a56e0a8bfe6 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=13a4231325b7d3c1 input=a9049054013a1b77]*/ diff --git a/Objects/clinic/bytesobject.c.h b/Objects/clinic/bytesobject.c.h index 11cb81a9c5c9d7..c0f61f1368ba2b 100644 --- a/Objects/clinic/bytesobject.c.h +++ b/Objects/clinic/bytesobject.c.h @@ -1204,26 +1204,6 @@ PyDoc_STRVAR(bytes_fromhex__doc__, #define BYTES_FROMHEX_METHODDEF \ {"fromhex", (PyCFunction)bytes_fromhex, METH_O|METH_CLASS, bytes_fromhex__doc__}, -static PyObject * -bytes_fromhex_impl(PyTypeObject *type, PyObject *string); - -static PyObject * -bytes_fromhex(PyTypeObject *type, PyObject *arg) -{ - PyObject *return_value = NULL; - PyObject *string; - - if (!PyUnicode_Check(arg)) { - _PyArg_BadArgument("fromhex", "argument", "str", arg); - goto exit; - } - string = arg; - return_value = bytes_fromhex_impl(type, string); - -exit: - return return_value; -} - PyDoc_STRVAR(bytes_hex__doc__, "hex($self, /, sep=, bytes_per_sep=1)\n" "--\n" @@ -1404,4 +1384,4 @@ bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) exit: return return_value; } -/*[clinic end generated code: output=61cb2cf6506df4c6 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=967aae4b46423586 input=a9049054013a1b77]*/ From ea025420c17a5a9e8bd54f180ce3736c70485f2e Mon Sep 17 00:00:00 2001 From: Daniel Pope Date: Sat, 8 Feb 2025 10:29:58 +0000 Subject: [PATCH 02/10] Update documentation --- Doc/library/stdtypes.rst | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index a6260ecd77f520..de00a064ad4c03 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -2744,6 +2744,11 @@ data and are closely related to string objects in a variety of other ways. :meth:`bytes.fromhex` now skips all ASCII whitespace in the string, not just spaces. + .. versionchanged:: 3.14 + :meth:`bytes.fromhex` now accepts an ASCII :class:`bytes` object as + input. + + A reverse conversion function exists to transform a bytes object into its hexadecimal representation. @@ -2829,6 +2834,10 @@ objects. :meth:`bytearray.fromhex` now skips all ASCII whitespace in the string, not just spaces. + .. versionchanged:: 3.14 + :meth:`bytearray.fromhex` now accepts an ASCII :class:`bytes` object as + input. + A reverse conversion function exists to transform a bytearray object into its hexadecimal representation. From 76d28ce24bf68ebbcac722e367559e71efe3dc61 Mon Sep 17 00:00:00 2001 From: Daniel Pope Date: Sun, 9 Feb 2025 16:19:27 +0000 Subject: [PATCH 03/10] Apply suggestions from @picnixz in #129844 --- Doc/library/stdtypes.rst | 4 ++-- Doc/whatsnew/3.14.rst | 4 ++++ Lib/test/test_bytes.py | 5 +++++ ...25-02-08-09-55-33.gh-issue-129349.PkcG-l.rst | 3 ++- Objects/bytesobject.c | 17 ++++++++++------- 5 files changed, 23 insertions(+), 10 deletions(-) diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index de00a064ad4c03..2e6d0af72fa350 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -2744,7 +2744,7 @@ data and are closely related to string objects in a variety of other ways. :meth:`bytes.fromhex` now skips all ASCII whitespace in the string, not just spaces. - .. versionchanged:: 3.14 + .. versionchanged:: next :meth:`bytes.fromhex` now accepts an ASCII :class:`bytes` object as input. @@ -2834,7 +2834,7 @@ objects. :meth:`bytearray.fromhex` now skips all ASCII whitespace in the string, not just spaces. - .. versionchanged:: 3.14 + .. versionchanged:: next :meth:`bytearray.fromhex` now accepts an ASCII :class:`bytes` object as input. diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 6539b23380b6e0..a33b6dd9c06842 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -354,6 +354,10 @@ Other language changes (with :func:`format` or :ref:`f-strings`). (Contrubuted by Sergey B Kirpichev in :gh:`87790`.) +* The :func:`bytes.fromhex` and :func:`bytearray.fromhex` methods now accept + ASCII :class:`bytes` and :class:`bytearray` objects. + (Contributed by Daniel Pope in :gh:`129349`.) + * ``\B`` in :mod:`regular expression ` now matches empty input string. Now it is always the opposite of ``\b``. (Contributed by Serhiy Storchaka in :gh:`124130`.) diff --git a/Lib/test/test_bytes.py b/Lib/test/test_bytes.py index d1ecd2db77f963..618bb8abe3d7b0 100644 --- a/Lib/test/test_bytes.py +++ b/Lib/test/test_bytes.py @@ -450,6 +450,7 @@ def test_fromhex(self): # check that ASCII whitespace is ignored self.assertEqual(self.type2test.fromhex(' 1A\n2B\t30\v'), b) + self.assertEqual(self.type2test.fromhex(b' 1A\n2B\t30\v'), b) for c in "\x09\x0A\x0B\x0C\x0D\x20": self.assertEqual(self.type2test.fromhex(c), self.type2test()) for c in "\x1C\x1D\x1E\x1F\x85\xa0\u2000\u2002\u2028": @@ -461,6 +462,10 @@ def test_fromhex(self): self.type2test.fromhex(bytearray(b' 012abc')), b'\x01\x2a\xbc', ) + # Invalid bytes are rejected + for u8 in b"\0\x1C\x1D\x1E\x1F\x85\xa0": + b = bytes([30, 31, u8]) + self.assertRaises(ValueError, self.type2test.fromhex, b) self.assertEqual(self.type2test.fromhex('0000'), b'\0\0') self.assertRaises(ValueError, self.type2test.fromhex, 'a') diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-02-08-09-55-33.gh-issue-129349.PkcG-l.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-02-08-09-55-33.gh-issue-129349.PkcG-l.rst index 48b28d30dc1e20..78572600ca47e9 100644 --- a/Misc/NEWS.d/next/Core_and_Builtins/2025-02-08-09-55-33.gh-issue-129349.PkcG-l.rst +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-02-08-09-55-33.gh-issue-129349.PkcG-l.rst @@ -1 +1,2 @@ -``bytes.fromhex()``/``bytearray.fromhex()`` now accept ASCII ``bytes``. +:meth:`bytes.fromhex` and :meth:`bytearray.fromhex()` now accepts ASCII +:class:`bytes` objects. diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 740d89f8725caa..b94b33959a8857 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -2535,16 +2535,19 @@ _PyBytes_FromHex(PyObject *string, int use_bytearray) assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND); str = start = PyUnicode_1BYTE_DATA(string); - } else if (PyBytes_Check(string)) { + } + else if (PyBytes_Check(string)) { hexlen = PyBytes_GET_SIZE(string); - str = start = (Py_UCS1 *) PyBytes_AS_STRING(string); - } else if (PyByteArray_Check(string)) { + str = start = (Py_UCS1 *)PyBytes_AS_STRING(string); + } + else if (PyByteArray_Check(string)) { hexlen = PyByteArray_GET_SIZE(string); - str = start = (Py_UCS1 *) PyByteArray_AS_STRING(string); - } else { + str = start = (Py_UCS1 *)PyByteArray_AS_STRING(string); + } + else { PyErr_Format(PyExc_TypeError, - "fromhex() argument must be str or bytes, not %s", - Py_TYPE(string)->tp_name); + "fromhex() argument must be str or bytes, not %T", + string); return NULL; } From cff091c6e6b8d9cec2b1a3f6a4fa5c1ceb9793bc Mon Sep 17 00:00:00 2001 From: Daniel Pope Date: Fri, 7 Mar 2025 10:04:42 +0000 Subject: [PATCH 04/10] Fix sphinx-lint --- .../2025-02-08-09-55-33.gh-issue-129349.PkcG-l.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-02-08-09-55-33.gh-issue-129349.PkcG-l.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-02-08-09-55-33.gh-issue-129349.PkcG-l.rst index 78572600ca47e9..8c36a2cb10e45d 100644 --- a/Misc/NEWS.d/next/Core_and_Builtins/2025-02-08-09-55-33.gh-issue-129349.PkcG-l.rst +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-02-08-09-55-33.gh-issue-129349.PkcG-l.rst @@ -1,2 +1,2 @@ -:meth:`bytes.fromhex` and :meth:`bytearray.fromhex()` now accepts ASCII +:meth:`bytes.fromhex` and :meth:`bytearray.fromhex` now accepts ASCII :class:`bytes` objects. From be921887d5f106bd96db6f4e80a59ffc1b928d2d Mon Sep 17 00:00:00 2001 From: Daniel Pope Date: Fri, 7 Mar 2025 10:09:06 +0000 Subject: [PATCH 05/10] Mention bytearray in docs and blurb --- Doc/library/stdtypes.rst | 9 +++++---- .../2025-02-08-09-55-33.gh-issue-129349.PkcG-l.rst | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index 2e6d0af72fa350..9fed973ad482a2 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -2745,8 +2745,9 @@ data and are closely related to string objects in a variety of other ways. not just spaces. .. versionchanged:: next - :meth:`bytes.fromhex` now accepts an ASCII :class:`bytes` object as - input. + :meth:`bytes.fromhex` now accepts ASCII :class:`bytes` and + :class:`bytearray` objects as input. + A reverse conversion function exists to transform a bytes object into its @@ -2835,8 +2836,8 @@ objects. not just spaces. .. versionchanged:: next - :meth:`bytearray.fromhex` now accepts an ASCII :class:`bytes` object as - input. + :meth:`bytearray.fromhex` now accepts ASCII :class:`bytes` and + :class:`bytearray` objects as input. A reverse conversion function exists to transform a bytearray object into its hexadecimal representation. diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-02-08-09-55-33.gh-issue-129349.PkcG-l.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-02-08-09-55-33.gh-issue-129349.PkcG-l.rst index 8c36a2cb10e45d..0881bfb6b1410d 100644 --- a/Misc/NEWS.d/next/Core_and_Builtins/2025-02-08-09-55-33.gh-issue-129349.PkcG-l.rst +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-02-08-09-55-33.gh-issue-129349.PkcG-l.rst @@ -1,2 +1,2 @@ :meth:`bytes.fromhex` and :meth:`bytearray.fromhex` now accepts ASCII -:class:`bytes` objects. +:class:`bytes`/:class:`bytearray` objects. From b0e3eb557a1a7c68ae38c5017290ed024016fbe9 Mon Sep 17 00:00:00 2001 From: Daniel Pope Date: Fri, 7 Mar 2025 10:51:27 +0000 Subject: [PATCH 06/10] Use buffer protocol to support all byte-like objects --- Doc/library/stdtypes.rst | 4 +-- Doc/whatsnew/3.14.rst | 2 +- Lib/test/test_bytes.py | 15 +++++++---- ...-02-08-09-55-33.gh-issue-129349.PkcG-l.rst | 2 +- Objects/bytesobject.c | 27 ++++++++++++------- 5 files changed, 32 insertions(+), 18 deletions(-) diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index 9fed973ad482a2..558281f0afc8d5 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -2746,7 +2746,7 @@ data and are closely related to string objects in a variety of other ways. .. versionchanged:: next :meth:`bytes.fromhex` now accepts ASCII :class:`bytes` and - :class:`bytearray` objects as input. + :term:`bytes-like objects ` as input. @@ -2837,7 +2837,7 @@ objects. .. versionchanged:: next :meth:`bytearray.fromhex` now accepts ASCII :class:`bytes` and - :class:`bytearray` objects as input. + :term:`bytes-like objects ` as input. A reverse conversion function exists to transform a bytearray object into its hexadecimal representation. diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index a33b6dd9c06842..6898b50b2c932a 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -355,7 +355,7 @@ Other language changes (Contrubuted by Sergey B Kirpichev in :gh:`87790`.) * The :func:`bytes.fromhex` and :func:`bytearray.fromhex` methods now accept - ASCII :class:`bytes` and :class:`bytearray` objects. + ASCII :class:`bytes` and :term:`bytes-like objects `. (Contributed by Daniel Pope in :gh:`129349`.) * ``\B`` in :mod:`regular expression ` now matches empty input string. diff --git a/Lib/test/test_bytes.py b/Lib/test/test_bytes.py index 618bb8abe3d7b0..fe8baafcd61c82 100644 --- a/Lib/test/test_bytes.py +++ b/Lib/test/test_bytes.py @@ -457,11 +457,16 @@ def test_fromhex(self): self.assertRaises(ValueError, self.type2test.fromhex, c) # Check that we can parse bytes and bytearray - self.assertEqual(self.type2test.fromhex(b' 012abc'), b'\x01\x2a\xbc') - self.assertEqual( - self.type2test.fromhex(bytearray(b' 012abc')), - b'\x01\x2a\xbc', - ) + tests = [ + ("bytes", bytes), + ("bytearray", bytearray), + ("memoryview", memoryview), + ("array.array", lambda bs: array.array('B', bs)), + ] + for name, factory in tests: + with self.subTest(name=name): + self.assertEqual(self.type2test.fromhex(factory(b' 1A 2B 30 ')), b) + # Invalid bytes are rejected for u8 in b"\0\x1C\x1D\x1E\x1F\x85\xa0": b = bytes([30, 31, u8]) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-02-08-09-55-33.gh-issue-129349.PkcG-l.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-02-08-09-55-33.gh-issue-129349.PkcG-l.rst index 0881bfb6b1410d..db2af780bd5cbb 100644 --- a/Misc/NEWS.d/next/Core_and_Builtins/2025-02-08-09-55-33.gh-issue-129349.PkcG-l.rst +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-02-08-09-55-33.gh-issue-129349.PkcG-l.rst @@ -1,2 +1,2 @@ :meth:`bytes.fromhex` and :meth:`bytearray.fromhex` now accepts ASCII -:class:`bytes`/:class:`bytearray` objects. +:class:`bytes` and :term:`bytes-like objects `. diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index b94b33959a8857..fc888ec91b9b13 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -2512,6 +2512,8 @@ _PyBytes_FromHex(PyObject *string, int use_bytearray) unsigned int top, bot; const Py_UCS1 *str, *start, *end; _PyBytesWriter writer; + Py_buffer view; + view.obj = NULL; _PyBytesWriter_Init(&writer); writer.use_bytearray = use_bytearray; @@ -2534,15 +2536,13 @@ _PyBytes_FromHex(PyObject *string, int use_bytearray) } assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND); - str = start = PyUnicode_1BYTE_DATA(string); - } - else if (PyBytes_Check(string)) { - hexlen = PyBytes_GET_SIZE(string); - str = start = (Py_UCS1 *)PyBytes_AS_STRING(string); + str = PyUnicode_1BYTE_DATA(string); } - else if (PyByteArray_Check(string)) { - hexlen = PyByteArray_GET_SIZE(string); - str = start = (Py_UCS1 *)PyByteArray_AS_STRING(string); + else if (PyObject_CheckBuffer(string)) { + if (PyObject_GetBuffer(string, &view, PyBUF_SIMPLE) != 0) + return NULL; + hexlen = view.len; + str = view.buf; } else { PyErr_Format(PyExc_TypeError, @@ -2554,8 +2554,9 @@ _PyBytes_FromHex(PyObject *string, int use_bytearray) /* This overestimates if there are spaces */ buf = _PyBytesWriter_Alloc(&writer, hexlen / 2); if (buf == NULL) - return NULL; + goto release_buffer; + start = str; end = str + hexlen; while (str < end) { /* skip over spaces in the input */ @@ -2589,6 +2590,9 @@ _PyBytes_FromHex(PyObject *string, int use_bytearray) *buf++ = (unsigned char)((top << 4) + bot); } + if (view.obj != NULL) { + PyBuffer_Release(&view); + } return _PyBytesWriter_Finish(&writer, buf); error: @@ -2601,6 +2605,11 @@ _PyBytes_FromHex(PyObject *string, int use_bytearray) "fromhex() arg at position %zd", invalid_char); } _PyBytesWriter_Dealloc(&writer); + + release_buffer: + if (view.obj != NULL) { + PyBuffer_Release(&view); + } return NULL; } From ef92b4eee117c15a55115370753750ac4ecc3a00 Mon Sep 17 00:00:00 2001 From: Daniel Pope Date: Fri, 7 Mar 2025 18:39:36 +0000 Subject: [PATCH 07/10] Insert missing braces MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> --- Objects/bytesobject.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index fc888ec91b9b13..ba99ad0a356a6b 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -2539,8 +2539,9 @@ _PyBytes_FromHex(PyObject *string, int use_bytearray) str = PyUnicode_1BYTE_DATA(string); } else if (PyObject_CheckBuffer(string)) { - if (PyObject_GetBuffer(string, &view, PyBUF_SIMPLE) != 0) + if (PyObject_GetBuffer(string, &view, PyBUF_SIMPLE) != 0) { return NULL; + } hexlen = view.len; str = view.buf; } From 685f5831051b2f0cfe3f6781861b91cd2c4d37ce Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Sat, 8 Mar 2025 12:38:14 +0100 Subject: [PATCH 08/10] Update Doc/library/stdtypes.rst --- Doc/library/stdtypes.rst | 2 -- 1 file changed, 2 deletions(-) diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index 558281f0afc8d5..7b3fa218317fcc 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -2748,8 +2748,6 @@ data and are closely related to string objects in a variety of other ways. :meth:`bytes.fromhex` now accepts ASCII :class:`bytes` and :term:`bytes-like objects ` as input. - - A reverse conversion function exists to transform a bytes object into its hexadecimal representation. From f859c3c28d8583310f149794c630de86788fd7df Mon Sep 17 00:00:00 2001 From: Daniel Pope Date: Sun, 9 Mar 2025 13:09:57 +0000 Subject: [PATCH 09/10] Add test for invalid type in bytes.fromhex() --- Lib/test/test_bytes.py | 5 +++++ Objects/bytesobject.c | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_bytes.py b/Lib/test/test_bytes.py index fe8baafcd61c82..d5490a20a0525d 100644 --- a/Lib/test/test_bytes.py +++ b/Lib/test/test_bytes.py @@ -473,6 +473,11 @@ def test_fromhex(self): self.assertRaises(ValueError, self.type2test.fromhex, b) self.assertEqual(self.type2test.fromhex('0000'), b'\0\0') + with self.assertRaisesRegex( + TypeError, + r'fromhex\(\) argument must be str or bytes-like, not tuple', + ): + self.type2test.fromhex(()) self.assertRaises(ValueError, self.type2test.fromhex, 'a') self.assertRaises(ValueError, self.type2test.fromhex, 'rt') self.assertRaises(ValueError, self.type2test.fromhex, '1a b cd') diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index ba99ad0a356a6b..5d057353aca95b 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -2547,7 +2547,7 @@ _PyBytes_FromHex(PyObject *string, int use_bytearray) } else { PyErr_Format(PyExc_TypeError, - "fromhex() argument must be str or bytes, not %T", + "fromhex() argument must be str or bytes-like, not %T", string); return NULL; } From 31b9ab008fe44c7e29ad11ec90d55a75a0422451 Mon Sep 17 00:00:00 2001 From: Daniel Pope Date: Mon, 10 Mar 2025 10:22:04 +0000 Subject: [PATCH 10/10] PEP-7 --- Objects/bytesobject.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 5d057353aca95b..ada0d0024827dc 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -2554,8 +2554,9 @@ _PyBytes_FromHex(PyObject *string, int use_bytearray) /* This overestimates if there are spaces */ buf = _PyBytesWriter_Alloc(&writer, hexlen / 2); - if (buf == NULL) + if (buf == NULL) { goto release_buffer; + } start = str; end = str + hexlen;