Skip to content

Commit cad8ca2

Browse files
committed
Merge in the main branch, switch to _Py_ALIGNED_DEF
2 parents 8db665a + 49d7236 commit cad8ca2

File tree

11 files changed

+133
-99
lines changed

11 files changed

+133
-99
lines changed

Include/Python.h

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -59,14 +59,6 @@
5959
# include <intrin.h> // __readgsqword()
6060
#endif
6161

62-
// Suppress known warnings in Python header files.
63-
#if defined(_MSC_VER)
64-
// Warning that alignas behaviour has changed. Doesn't affect us, because we
65-
// never relied on the old behaviour.
66-
#pragma warning(push)
67-
#pragma warning(disable: 5274)
68-
#endif
69-
7062
// Include Python header files
7163
#include "pyport.h"
7264
#include "pymacro.h"
@@ -146,9 +138,4 @@
146138
#include "cpython/pyfpe.h"
147139
#include "cpython/tracemalloc.h"
148140

149-
// Restore warning filter
150-
#ifdef _MSC_VER
151-
#pragma warning(pop)
152-
#endif
153-
154141
#endif /* !Py_PYTHON_H */

Include/cpython/unicodeobject.h

Lines changed: 59 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,63 @@ static inline Py_UCS4 Py_UNICODE_LOW_SURROGATE(Py_UCS4 ch) {
4747

4848
/* --- Unicode Type ------------------------------------------------------- */
4949

50+
struct _PyUnicodeObject_state {
51+
/* If interned is non-zero, the two references from the
52+
dictionary to this object are *not* counted in ob_refcnt.
53+
The possible values here are:
54+
0: Not Interned
55+
1: Interned
56+
2: Interned and Immortal
57+
3: Interned, Immortal, and Static
58+
This categorization allows the runtime to determine the right
59+
cleanup mechanism at runtime shutdown. */
60+
#ifdef Py_GIL_DISABLED
61+
// Needs to be accessed atomically, so can't be a bit field.
62+
unsigned char interned;
63+
#else
64+
unsigned int interned:2;
65+
#endif
66+
/* Character size:
67+
68+
- PyUnicode_1BYTE_KIND (1):
69+
70+
* character type = Py_UCS1 (8 bits, unsigned)
71+
* all characters are in the range U+0000-U+00FF (latin1)
72+
* if ascii is set, all characters are in the range U+0000-U+007F
73+
(ASCII), otherwise at least one character is in the range
74+
U+0080-U+00FF
75+
76+
- PyUnicode_2BYTE_KIND (2):
77+
78+
* character type = Py_UCS2 (16 bits, unsigned)
79+
* all characters are in the range U+0000-U+FFFF (BMP)
80+
* at least one character is in the range U+0100-U+FFFF
81+
82+
- PyUnicode_4BYTE_KIND (4):
83+
84+
* character type = Py_UCS4 (32 bits, unsigned)
85+
* all characters are in the range U+0000-U+10FFFF
86+
* at least one character is in the range U+10000-U+10FFFF
87+
*/
88+
unsigned int kind:3;
89+
/* Compact is with respect to the allocation scheme. Compact unicode
90+
objects only require one memory block while non-compact objects use
91+
one block for the PyUnicodeObject struct and another for its data
92+
buffer. */
93+
unsigned int compact:1;
94+
/* The string only contains characters in the range U+0000-U+007F (ASCII)
95+
and the kind is PyUnicode_1BYTE_KIND. If ascii is set and compact is
96+
set, use the PyASCIIObject structure. */
97+
unsigned int ascii:1;
98+
/* The object is statically allocated. */
99+
unsigned int statically_allocated:1;
100+
#ifndef Py_GIL_DISABLED
101+
/* Historical: padding to ensure that PyUnicode_DATA() is always aligned to
102+
4 bytes (see issue gh-63736 on m68k) */
103+
unsigned int :24;
104+
#endif
105+
};
106+
50107
/* ASCII-only strings created through PyUnicode_New use the PyASCIIObject
51108
structure. state.ascii and state.compact are set, and the data
52109
immediately follow the structure. utf8_length can be found
@@ -99,67 +156,8 @@ typedef struct {
99156
PyObject_HEAD
100157
Py_ssize_t length; /* Number of code points in the string */
101158
Py_hash_t hash; /* Hash value; -1 if not set */
102-
#ifdef Py_GIL_DISABLED
103-
/* Ensure 4 byte alignment for PyUnicode_DATA(), see gh-63736 on m68k.
104-
In the non-free-threaded build, we'll use explicit padding instead */
105-
_Py_ALIGN_AS(4)
106-
#endif
107-
struct {
108-
/* If interned is non-zero, the two references from the
109-
dictionary to this object are *not* counted in ob_refcnt.
110-
The possible values here are:
111-
0: Not Interned
112-
1: Interned
113-
2: Interned and Immortal
114-
3: Interned, Immortal, and Static
115-
This categorization allows the runtime to determine the right
116-
cleanup mechanism at runtime shutdown. */
117-
#ifdef Py_GIL_DISABLED
118-
// Needs to be accessed atomically, so can't be a bit field.
119-
unsigned char interned;
120-
#else
121-
unsigned int interned:2;
122-
#endif
123-
/* Character size:
124-
125-
- PyUnicode_1BYTE_KIND (1):
126-
127-
* character type = Py_UCS1 (8 bits, unsigned)
128-
* all characters are in the range U+0000-U+00FF (latin1)
129-
* if ascii is set, all characters are in the range U+0000-U+007F
130-
(ASCII), otherwise at least one character is in the range
131-
U+0080-U+00FF
132-
133-
- PyUnicode_2BYTE_KIND (2):
134-
135-
* character type = Py_UCS2 (16 bits, unsigned)
136-
* all characters are in the range U+0000-U+FFFF (BMP)
137-
* at least one character is in the range U+0100-U+FFFF
138-
139-
- PyUnicode_4BYTE_KIND (4):
140-
141-
* character type = Py_UCS4 (32 bits, unsigned)
142-
* all characters are in the range U+0000-U+10FFFF
143-
* at least one character is in the range U+10000-U+10FFFF
144-
*/
145-
unsigned int kind:3;
146-
/* Compact is with respect to the allocation scheme. Compact unicode
147-
objects only require one memory block while non-compact objects use
148-
one block for the PyUnicodeObject struct and another for its data
149-
buffer. */
150-
unsigned int compact:1;
151-
/* The string only contains characters in the range U+0000-U+007F (ASCII)
152-
and the kind is PyUnicode_1BYTE_KIND. If ascii is set and compact is
153-
set, use the PyASCIIObject structure. */
154-
unsigned int ascii:1;
155-
/* The object is statically allocated. */
156-
unsigned int statically_allocated:1;
157-
#ifndef Py_GIL_DISABLED
158-
/* Padding to ensure that PyUnicode_DATA() is always aligned to
159-
4 bytes (see issue gh-63736 on m68k) */
160-
unsigned int :24;
161-
#endif
162-
} state;
159+
/* Ensure 4 byte alignment for PyUnicode_DATA(), see gh-63736 on m68k. */
160+
_Py_ALIGNED_DEF(4, struct _PyUnicodeObject_state) state;
163161
} PyASCIIObject;
164162

165163
/* Non-ASCII strings allocated through PyUnicode_New use the

Include/internal/pycore_interp_structs.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,10 +159,11 @@ struct atexit_state {
159159
typedef struct {
160160
// Tagged pointer to next object in the list.
161161
// 0 means the object is not tracked
162-
uintptr_t _gc_next;
162+
_Py_ALIGNED_DEF(_PyObject_MIN_ALIGNMENT, uintptr_t) _gc_next;
163163

164164
// Tagged pointer to previous object in the list.
165165
// Lowest two bits are used for flags documented later.
166+
// Those bits are made available by the struct's minimum alignment.
166167
uintptr_t _gc_prev;
167168
} PyGC_Head;
168169

Include/object.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,12 @@ whose size is determined when the object is allocated.
101101
#define PyObject_VAR_HEAD PyVarObject ob_base;
102102
#define Py_INVALID_SIZE (Py_ssize_t)-1
103103

104+
/* PyObjects are given a minimum alignment so that the least significant bits
105+
* of an object pointer become available for other purposes.
106+
* This must be an integer literal with the value (1 << _PyGC_PREV_SHIFT), number of bytes.
107+
*/
108+
#define _PyObject_MIN_ALIGNMENT 4
109+
104110
/* Nothing is actually declared to be a PyObject, but every pointer to
105111
* a Python object can be cast to a PyObject*. This is inheritance built
106112
* by hand. Similarly every pointer to a variable-size Python object can,
@@ -136,6 +142,7 @@ struct _object {
136142
#else
137143
Py_ssize_t ob_refcnt;
138144
#endif
145+
_Py_ALIGNED_DEF(_PyObject_MIN_ALIGNMENT, char) _aligner;
139146
};
140147
#ifdef _MSC_VER
141148
__pragma(warning(pop))
@@ -153,7 +160,7 @@ struct _object {
153160
// ob_tid stores the thread id (or zero). It is also used by the GC and the
154161
// trashcan mechanism as a linked list pointer and by the GC to store the
155162
// computed "gc_refs" refcount.
156-
uintptr_t ob_tid;
163+
_Py_ALIGNED_DEF(_PyObject_MIN_ALIGNMENT, uintptr_t) ob_tid;
157164
uint16_t ob_flags;
158165
PyMutex ob_mutex; // per-object lock
159166
uint8_t ob_gc_bits; // gc-related state

Include/pymacro.h

Lines changed: 40 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -24,41 +24,65 @@
2424
#endif
2525

2626

27-
// _Py_ALIGN_AS: this compiler's spelling of `alignas` keyword,
28-
// additional compat checking would be great since we added it to the default
29-
// build.
30-
// Standards/compiler support:
27+
// _Py_ALIGNED_DEF(N, T): Define a variable/member with increased alignment
28+
//
29+
// `N`: the desired minimum alignment, an integer literal, number of bytes
30+
// `T`: the type of the defined variable
31+
// (or a type with at least the defined variable's alignment)
32+
//
33+
// May not be used on a struct definition.
34+
//
35+
// Standards/compiler support for `alignas` alternatives:
3136
// - `alignas` is a keyword in C23 and C++11.
3237
// - `_Alignas` is a keyword in C11
3338
// - GCC & clang has __attribute__((aligned))
3439
// (use that for older standards in pedantic mode)
3540
// - MSVC has __declspec(align)
3641
// - `_Alignas` is common C compiler extension
37-
// Older compilers may name it differently; to allow compilation on such
38-
// unsupported platforms, we don't redefine _Py_ALIGN_AS if it's already
42+
// Older compilers may name `alignas` differently; to allow compilation on such
43+
// unsupported platforms, we don't redefine _Py_ALIGNED_DEF if it's already
3944
// defined. Note that defining it wrong (including defining it to nothing) will
4045
// cause ABI incompatibilities.
41-
#ifndef _Py_ALIGN_AS
46+
//
47+
// Behavior of `alignas` alternatives:
48+
// - `alignas` & `_Alignas`:
49+
// - Can be used multiple times; the greatest alignment applies.
50+
// - It is an *error* if the combined effect of all `alignas` modifiers would
51+
// decrease the alignment.
52+
// - Takes types or numbers.
53+
// - May not be used on a struct definition, unless also defining a variable.
54+
// - `__declspec(align)`:
55+
// - Has no effect if it would decrease alignment.
56+
// - Only takes an integer literal.
57+
// - May be used on struct or variable definitions.
58+
// However, when defining both the struct and the variable at once,
59+
// `declspec(aligned)` causes compiler warning 5274 and possible ABI
60+
// incompatibility.
61+
// - ` __attribute__((aligned))`:
62+
// - Has no effect if it would decrease alignment.
63+
// - Takes types or numbers
64+
// - May be used on struct or variable definitions.
65+
#ifndef _Py_ALIGNED_DEF
4266
# ifdef __cplusplus
4367
# if __cplusplus >= 201103L
44-
# define _Py_ALIGN_AS(V) alignas(V)
68+
# define _Py_ALIGNED_DEF(N, T) alignas(N) alignas(T) T
4569
# elif defined(__GNUC__) || defined(__clang__)
46-
# define _Py_ALIGN_AS(V) __attribute__((aligned(V)))
70+
# define _Py_ALIGNED_DEF(N, T) __attribute__((aligned(N))) T
4771
# elif defined(_MSC_VER)
48-
# define _Py_ALIGN_AS(V) __declspec(align(V))
72+
# define _Py_ALIGNED_DEF(N, T) __declspec(align(N)) T
4973
# else
50-
# define _Py_ALIGN_AS(V) alignas(V)
74+
# define _Py_ALIGNED_DEF(N, T) alignas(N) alignas(T) T
5175
# endif
5276
# elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
53-
# define _Py_ALIGN_AS(V) alignas(V)
77+
# define _Py_ALIGNED_DEF(N, T) alignas(N) alignas(T) T
5478
# elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L
55-
# define _Py_ALIGN_AS(V) _Alignas(V)
79+
# define _Py_ALIGNED_DEF(N, T) _Alignas(N) _Alignas(T) T
5680
# elif (defined(__GNUC__) || defined(__clang__))
57-
# define _Py_ALIGN_AS(V) __attribute__((aligned(V)))
81+
# define _Py_ALIGNED_DEF(N, T) __attribute__((aligned(N))) T
5882
# elif defined(_MSC_VER)
59-
# define _Py_ALIGN_AS(V) __declspec(align(V))
83+
# define _Py_ALIGNED_DEF(N, T) __declspec(align(N)) T
6084
# else
61-
# define _Py_ALIGN_AS(V) _Alignas(V)
85+
# define _Py_ALIGNED_DEF(N, T) _Alignas(N) _Alignas(T) T
6286
# endif
6387
#endif
6488

Lib/test/pickletester.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1100,6 +1100,11 @@ def test_large_32b_binunicode8(self):
11001100
self.check_unpickling_error((pickle.UnpicklingError, OverflowError),
11011101
dumped)
11021102

1103+
def test_large_binstring(self):
1104+
errmsg = 'BINSTRING pickle has negative byte count'
1105+
with self.assertRaisesRegex(pickle.UnpicklingError, errmsg):
1106+
self.loads(b'T\0\0\0\x80')
1107+
11031108
def test_get(self):
11041109
pickled = b'((lp100000\ng100000\nt.'
11051110
unpickled = self.loads(pickled)

Lib/test/test_random.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,15 @@
1414
from fractions import Fraction
1515
from collections import abc, Counter
1616

17+
18+
class MyIndex:
19+
def __init__(self, value):
20+
self.value = value
21+
22+
def __index__(self):
23+
return self.value
24+
25+
1726
class TestBasicOps:
1827
# Superclass with tests common to all generators.
1928
# Subclasses must arrange for self.gen to retrieve the Random instance
@@ -809,6 +818,9 @@ def test_getrandbits(self):
809818
self.gen.seed(1234567)
810819
self.assertEqual(self.gen.getrandbits(100),
811820
97904845777343510404718956115)
821+
self.gen.seed(1234567)
822+
self.assertEqual(self.gen.getrandbits(MyIndex(100)),
823+
97904845777343510404718956115)
812824

813825
def test_getrandbits_2G_bits(self):
814826
size = 2**31
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fix crash when building on Linux/m68k.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Raise a correct exception for values greater than 0x7fffffff for the ``BINSTRING`` opcode in the C implementation of :mod:`pickle`.

Modules/_pickle.c

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5543,17 +5543,16 @@ static int
55435543
load_counted_binstring(PickleState *st, UnpicklerObject *self, int nbytes)
55445544
{
55455545
PyObject *obj;
5546-
Py_ssize_t size;
5546+
long size;
55475547
char *s;
55485548

55495549
if (_Unpickler_Read(self, st, &s, nbytes) < 0)
55505550
return -1;
55515551

5552-
size = calc_binsize(s, nbytes);
5552+
size = calc_binint(s, nbytes);
55535553
if (size < 0) {
5554-
PyErr_Format(st->UnpicklingError,
5555-
"BINSTRING exceeds system's maximum size of %zd bytes",
5556-
PY_SSIZE_T_MAX);
5554+
PyErr_SetString(st->UnpicklingError,
5555+
"BINSTRING pickle has negative byte count");
55575556
return -1;
55585557
}
55595558

0 commit comments

Comments
 (0)