1010#include <stdatomic.h>
1111#endif
1212
13+
14+ #if defined(_MSC_VER )
15+ #include <intrin.h>
16+ #include <immintrin.h>
17+ #endif
18+
1319/* This is modeled after the atomics interface from C1x, according to
1420 * the draft at
1521 * http://www.open-std.org/JTC1/SC22/wg14/www/docs/n1425.pdf.
@@ -87,8 +93,9 @@ typedef struct _Py_atomic_int {
8793 || (ORDER) == __ATOMIC_CONSUME), \
8894 __atomic_load_n(&(ATOMIC_VAL)->_value, ORDER))
8995
90- #else
91-
96+ /* Only support GCC (for expression statements) and x86 (for simple
97+ * atomic semantics) and MSVC x86/x64/ARM */
98+ #elif defined(__GNUC__ ) && (defined(__i386__ ) || defined(__amd64 ))
9299typedef enum _Py_memory_order {
93100 _Py_memory_order_relaxed ,
94101 _Py_memory_order_acquire ,
@@ -105,9 +112,6 @@ typedef struct _Py_atomic_int {
105112 int _value ;
106113} _Py_atomic_int ;
107114
108- /* Only support GCC (for expression statements) and x86 (for simple
109- * atomic semantics) for now */
110- #if defined(__GNUC__ ) && (defined(__i386__ ) || defined(__amd64 ))
111115
112116static __inline__ void
113117_Py_atomic_signal_fence (_Py_memory_order order )
@@ -127,7 +131,7 @@ _Py_atomic_thread_fence(_Py_memory_order order)
127131static __inline__ void
128132_Py_ANNOTATE_MEMORY_ORDER (const volatile void * address , _Py_memory_order order )
129133{
130- (void )address ; /* shut up -Wunused-parameter */
134+ (void )address ; /* shut up -Wunused-parameter */
131135 switch (order ) {
132136 case _Py_memory_order_release :
133137 case _Py_memory_order_acq_rel :
@@ -219,7 +223,291 @@ _Py_ANNOTATE_MEMORY_ORDER(const volatile void *address, _Py_memory_order order)
219223 result ; \
220224 })
221225
222- #else /* !gcc x86 */
226+ #elif defined(_MSC_VER )
227+ /* _Interlocked* functions provide a full memory barrier and are therefore
228+ enough for acq_rel and seq_cst. If the HLE variants aren't available
229+ in hardware they will fall back to a full memory barrier as well.
230+
231+ This might affect performance but likely only in some very specific and
232+ hard to meassure scenario.
233+ */
234+ #if defined(_M_IX86 ) || defined(_M_X64 )
235+ typedef enum _Py_memory_order {
236+ _Py_memory_order_relaxed ,
237+ _Py_memory_order_acquire ,
238+ _Py_memory_order_release ,
239+ _Py_memory_order_acq_rel ,
240+ _Py_memory_order_seq_cst
241+ } _Py_memory_order ;
242+
243+ typedef struct _Py_atomic_address {
244+ volatile uintptr_t _value ;
245+ } _Py_atomic_address ;
246+
247+ typedef struct _Py_atomic_int {
248+ volatile int _value ;
249+ } _Py_atomic_int ;
250+
251+
252+ #if defined(_M_X64 )
253+ #define _Py_atomic_store_64bit (ATOMIC_VAL , NEW_VAL , ORDER ) \
254+ switch (ORDER) { \
255+ case _Py_memory_order_acquire: \
256+ _InterlockedExchange64_HLEAcquire((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
257+ break; \
258+ case _Py_memory_order_release: \
259+ _InterlockedExchange64_HLERelease((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
260+ break; \
261+ default: \
262+ _InterlockedExchange64((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
263+ break; \
264+ }
265+ #else
266+ #define _Py_atomic_store_64bit (ATOMIC_VAL , NEW_VAL , ORDER ) ((void)0);
267+ #endif
268+
269+ #define _Py_atomic_store_32bit (ATOMIC_VAL , NEW_VAL , ORDER ) \
270+ switch (ORDER) { \
271+ case _Py_memory_order_acquire: \
272+ _InterlockedExchange_HLEAcquire((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
273+ break; \
274+ case _Py_memory_order_release: \
275+ _InterlockedExchange_HLERelease((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
276+ break; \
277+ default: \
278+ _InterlockedExchange((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
279+ break; \
280+ }
281+
282+ #if defined(_M_X64 )
283+ /* This has to be an intptr_t for now.
284+ gil_created() uses -1 as a sentinel value, if this returns
285+ a uintptr_t it will do an unsigned compare and crash
286+ */
287+ inline intptr_t _Py_atomic_load_64bit (volatile uintptr_t * value , int order ) {
288+ uintptr_t old ;
289+ switch (order ) {
290+ case _Py_memory_order_acquire :
291+ {
292+ do {
293+ old = * value ;
294+ } while (_InterlockedCompareExchange64_HLEAcquire (value , old , old ) != old );
295+ break ;
296+ }
297+ case _Py_memory_order_release :
298+ {
299+ do {
300+ old = * value ;
301+ } while (_InterlockedCompareExchange64_HLERelease (value , old , old ) != old );
302+ break ;
303+ }
304+ case _Py_memory_order_relaxed :
305+ old = * value ;
306+ break ;
307+ default :
308+ {
309+ do {
310+ old = * value ;
311+ } while (_InterlockedCompareExchange64 (value , old , old ) != old );
312+ break ;
313+ }
314+ }
315+ return old ;
316+ }
317+
318+ #else
319+ #define _Py_atomic_load_64bit (ATOMIC_VAL , ORDER ) *ATOMIC_VAL
320+ #endif
321+
322+ inline int _Py_atomic_load_32bit (volatile int * value , int order ) {
323+ int old ;
324+ switch (order ) {
325+ case _Py_memory_order_acquire :
326+ {
327+ do {
328+ old = * value ;
329+ } while (_InterlockedCompareExchange_HLEAcquire (value , old , old ) != old );
330+ break ;
331+ }
332+ case _Py_memory_order_release :
333+ {
334+ do {
335+ old = * value ;
336+ } while (_InterlockedCompareExchange_HLERelease (value , old , old ) != old );
337+ break ;
338+ }
339+ case _Py_memory_order_relaxed :
340+ old = * value ;
341+ break ;
342+ default :
343+ {
344+ do {
345+ old = * value ;
346+ } while (_InterlockedCompareExchange (value , old , old ) != old );
347+ break ;
348+ }
349+ }
350+ return old ;
351+ }
352+
353+ #define _Py_atomic_store_explicit (ATOMIC_VAL , NEW_VAL , ORDER ) \
354+ if (sizeof(*ATOMIC_VAL._value) == 8) { \
355+ _Py_atomic_store_64bit(ATOMIC_VAL._value, NEW_VAL, ORDER) } else { \
356+ _Py_atomic_store_32bit(ATOMIC_VAL._value, NEW_VAL, ORDER) }
357+
358+ #define _Py_atomic_load_explicit (ATOMIC_VAL , ORDER ) \
359+ ( \
360+ sizeof(*(ATOMIC_VAL._value)) == 8 ? \
361+ _Py_atomic_load_64bit(ATOMIC_VAL._value, ORDER) : \
362+ _Py_atomic_load_32bit(ATOMIC_VAL._value, ORDER) \
363+ )
364+ #elif defined(_M_ARM ) || defined(_M_ARM64 )
365+ typedef enum _Py_memory_order {
366+ _Py_memory_order_relaxed ,
367+ _Py_memory_order_acquire ,
368+ _Py_memory_order_release ,
369+ _Py_memory_order_acq_rel ,
370+ _Py_memory_order_seq_cst
371+ } _Py_memory_order ;
372+
373+ typedef struct _Py_atomic_address {
374+ volatile uintptr_t _value ;
375+ } _Py_atomic_address ;
376+
377+ typedef struct _Py_atomic_int {
378+ volatile int _value ;
379+ } _Py_atomic_int ;
380+
381+
382+ #if defined(_M_ARM64 )
383+ #define _Py_atomic_store_64bit (ATOMIC_VAL , NEW_VAL , ORDER ) \
384+ switch (ORDER) { \
385+ case _Py_memory_order_acquire: \
386+ _InterlockedExchange64_acq((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
387+ break; \
388+ case _Py_memory_order_release: \
389+ _InterlockedExchange64_rel((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
390+ break; \
391+ default: \
392+ _InterlockedExchange64((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
393+ break; \
394+ }
395+ #else
396+ #define _Py_atomic_store_64bit (ATOMIC_VAL , NEW_VAL , ORDER ) ((void)0);
397+ #endif
398+
399+ #define _Py_atomic_store_32bit (ATOMIC_VAL , NEW_VAL , ORDER ) \
400+ switch (ORDER) { \
401+ case _Py_memory_order_acquire: \
402+ _InterlockedExchange_acq((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
403+ break; \
404+ case _Py_memory_order_release: \
405+ _InterlockedExchange_rel((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
406+ break; \
407+ default: \
408+ _InterlockedExchange((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
409+ break; \
410+ }
411+
412+ #if defined(_M_ARM64 )
413+ /* This has to be an intptr_t for now.
414+ gil_created() uses -1 as a sentinel value, if this returns
415+ a uintptr_t it will do an unsigned compare and crash
416+ */
417+ inline intptr_t _Py_atomic_load_64bit (volatile uintptr_t * value , int order ) {
418+ uintptr_t old ;
419+ switch (order ) {
420+ case _Py_memory_order_acquire :
421+ {
422+ do {
423+ old = * value ;
424+ } while (_InterlockedCompareExchange64_acq (value , old , old ) != old );
425+ break ;
426+ }
427+ case _Py_memory_order_release :
428+ {
429+ do {
430+ old = * value ;
431+ } while (_InterlockedCompareExchange64_rel (value , old , old ) != old );
432+ break ;
433+ }
434+ case _Py_memory_order_relaxed :
435+ old = * value ;
436+ break ;
437+ default :
438+ {
439+ do {
440+ old = * value ;
441+ } while (_InterlockedCompareExchange64 (value , old , old ) != old );
442+ break ;
443+ }
444+ }
445+ return old ;
446+ }
447+
448+ #else
449+ #define _Py_atomic_load_64bit (ATOMIC_VAL , ORDER ) *ATOMIC_VAL
450+ #endif
451+
452+ inline int _Py_atomic_load_32bit (volatile int * value , int order ) {
453+ int old ;
454+ switch (order ) {
455+ case _Py_memory_order_acquire :
456+ {
457+ do {
458+ old = * value ;
459+ } while (_InterlockedCompareExchange_acq (value , old , old ) != old );
460+ break ;
461+ }
462+ case _Py_memory_order_release :
463+ {
464+ do {
465+ old = * value ;
466+ } while (_InterlockedCompareExchange_rel (value , old , old ) != old );
467+ break ;
468+ }
469+ case _Py_memory_order_relaxed :
470+ old = * value ;
471+ break ;
472+ default :
473+ {
474+ do {
475+ old = * value ;
476+ } while (_InterlockedCompareExchange (value , old , old ) != old );
477+ break ;
478+ }
479+ }
480+ return old ;
481+ }
482+
483+ #define _Py_atomic_store_explicit (ATOMIC_VAL , NEW_VAL , ORDER ) \
484+ if (sizeof(*ATOMIC_VAL._value) == 8) { \
485+ _Py_atomic_store_64bit(ATOMIC_VAL._value, NEW_VAL, ORDER) } else { \
486+ _Py_atomic_store_32bit(ATOMIC_VAL._value, NEW_VAL, ORDER) }
487+
488+ #define _Py_atomic_load_explicit (ATOMIC_VAL , ORDER ) \
489+ ( \
490+ sizeof(*(ATOMIC_VAL._value)) == 8 ? \
491+ _Py_atomic_load_64bit(ATOMIC_VAL._value, ORDER) : \
492+ _Py_atomic_load_32bit(ATOMIC_VAL._value, ORDER) \
493+ )
494+ #endif
495+ #else /* !gcc x86 !_msc_ver */
496+ typedef enum _Py_memory_order {
497+ _Py_memory_order_relaxed ,
498+ _Py_memory_order_acquire ,
499+ _Py_memory_order_release ,
500+ _Py_memory_order_acq_rel ,
501+ _Py_memory_order_seq_cst
502+ } _Py_memory_order ;
503+
504+ typedef struct _Py_atomic_address {
505+ uintptr_t _value ;
506+ } _Py_atomic_address ;
507+
508+ typedef struct _Py_atomic_int {
509+ int _value ;
510+ } _Py_atomic_int ;
223511/* Fall back to other compilers and processors by assuming that simple
224512 volatile accesses are atomic. This is false, so people should port
225513 this. */
@@ -229,8 +517,6 @@ _Py_ANNOTATE_MEMORY_ORDER(const volatile void *address, _Py_memory_order order)
229517 ((ATOMIC_VAL)->_value = NEW_VAL)
230518#define _Py_atomic_load_explicit (ATOMIC_VAL , ORDER ) \
231519 ((ATOMIC_VAL)->_value)
232-
233- #endif /* !gcc x86 */
234520#endif
235521
236522/* Standardized shortcuts. */
@@ -245,6 +531,5 @@ _Py_ANNOTATE_MEMORY_ORDER(const volatile void *address, _Py_memory_order order)
245531 _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, _Py_memory_order_relaxed)
246532#define _Py_atomic_load_relaxed (ATOMIC_VAL ) \
247533 _Py_atomic_load_explicit(ATOMIC_VAL, _Py_memory_order_relaxed)
248-
249534#endif /* Py_BUILD_CORE */
250535#endif /* Py_ATOMIC_H */
0 commit comments