Skip to content

Commit 4357302

Browse files
gh-116738: Make _json module thread-safe in the free-threading (#119438)
Co-authored-by: Kumar Aditya <[email protected]>
1 parent c7a097c commit 4357302

File tree

3 files changed

+214
-41
lines changed

3 files changed

+214
-41
lines changed
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
from threading import Barrier, Thread
2+
from test.test_json import CTest
3+
from test.support import threading_helper
4+
5+
6+
def encode_json_helper(
7+
json, worker, data, number_of_threads=12, number_of_json_encodings=100
8+
):
9+
worker_threads = []
10+
barrier = Barrier(number_of_threads)
11+
for index in range(number_of_threads):
12+
worker_threads.append(
13+
Thread(target=worker, args=[barrier, data, index])
14+
)
15+
for t in worker_threads:
16+
t.start()
17+
for ii in range(number_of_json_encodings):
18+
json.dumps(data)
19+
data.clear()
20+
for t in worker_threads:
21+
t.join()
22+
23+
24+
class MyMapping(dict):
25+
def __init__(self):
26+
self.mapping = []
27+
28+
def items(self):
29+
return self.mapping
30+
31+
32+
@threading_helper.reap_threads
33+
@threading_helper.requires_working_threading()
34+
class TestJsonEncoding(CTest):
35+
# Test encoding json with concurrent threads modifying the data cannot
36+
# corrupt the interpreter
37+
38+
def test_json_mutating_list(self):
39+
def worker(barrier, data, index):
40+
barrier.wait()
41+
while data:
42+
for d in data:
43+
if len(d) > 5:
44+
d.clear()
45+
else:
46+
d.append(index)
47+
48+
data = [[], []]
49+
encode_json_helper(self.json, worker, data)
50+
51+
def test_json_mutating_exact_dict(self):
52+
def worker(barrier, data, index):
53+
barrier.wait()
54+
while data:
55+
for d in data:
56+
if len(d) > 5:
57+
try:
58+
key = list(d)[0]
59+
d.pop()
60+
except (KeyError, IndexError):
61+
pass
62+
else:
63+
d[index] = index
64+
65+
data = [{}, {}]
66+
encode_json_helper(self.json, worker, data)
67+
68+
def test_json_mutating_mapping(self):
69+
def worker(barrier, data, index):
70+
barrier.wait()
71+
while data:
72+
for d in data:
73+
if len(d.mapping) > 3:
74+
d.mapping.clear()
75+
else:
76+
d.mapping.append((index, index))
77+
78+
data = [MyMapping(), MyMapping()]
79+
encode_json_helper(self.json, worker, data)
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Make the module :mod:`json` safe to use under the free-threading build.

Modules/_json.c

Lines changed: 134 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
#include "Python.h"
1212
#include "pycore_ceval.h" // _Py_EnterRecursiveCall()
13+
#include "pycore_critical_section.h" // Py_BEGIN_CRITICAL_SECTION_SEQUENCE_FAST()
1314
#include "pycore_global_strings.h" // _Py_ID()
1415
#include "pycore_pyerrors.h" // _PyErr_FormatNote
1516
#include "pycore_runtime.h" // _PyRuntime
@@ -1456,7 +1457,7 @@ write_newline_indent(PyUnicodeWriter *writer,
14561457
static PyObject *
14571458
encoder_call(PyObject *op, PyObject *args, PyObject *kwds)
14581459
{
1459-
/* Python callable interface to encode_listencode_obj */
1460+
/* Python callable interface to encoder_listencode_obj */
14601461
static char *kwlist[] = {"obj", "_current_indent_level", NULL};
14611462
PyObject *obj;
14621463
Py_ssize_t indent_level;
@@ -1743,15 +1744,84 @@ encoder_encode_key_value(PyEncoderObject *s, PyUnicodeWriter *writer, bool *firs
17431744
return 0;
17441745
}
17451746

1747+
static inline int
1748+
_encoder_iterate_mapping_lock_held(PyEncoderObject *s, PyUnicodeWriter *writer,
1749+
bool *first, PyObject *dct, PyObject *items,
1750+
Py_ssize_t indent_level, PyObject *indent_cache,
1751+
PyObject *separator)
1752+
{
1753+
_Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(items);
1754+
PyObject *key, *value;
1755+
for (Py_ssize_t i = 0; i < PyList_GET_SIZE(items); i++) {
1756+
PyObject *item = PyList_GET_ITEM(items, i);
1757+
#ifdef Py_GIL_DISABLED
1758+
// gh-119438: in the free-threading build the critical section on items can get suspended
1759+
Py_INCREF(item);
1760+
#endif
1761+
if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
1762+
PyErr_SetString(PyExc_ValueError, "items must return 2-tuples");
1763+
#ifdef Py_GIL_DISABLED
1764+
Py_DECREF(item);
1765+
#endif
1766+
return -1;
1767+
}
1768+
1769+
key = PyTuple_GET_ITEM(item, 0);
1770+
value = PyTuple_GET_ITEM(item, 1);
1771+
if (encoder_encode_key_value(s, writer, first, dct, key, value,
1772+
indent_level, indent_cache,
1773+
separator) < 0) {
1774+
#ifdef Py_GIL_DISABLED
1775+
Py_DECREF(item);
1776+
#endif
1777+
return -1;
1778+
}
1779+
#ifdef Py_GIL_DISABLED
1780+
Py_DECREF(item);
1781+
#endif
1782+
}
1783+
1784+
return 0;
1785+
}
1786+
1787+
static inline int
1788+
_encoder_iterate_dict_lock_held(PyEncoderObject *s, PyUnicodeWriter *writer,
1789+
bool *first, PyObject *dct, Py_ssize_t indent_level,
1790+
PyObject *indent_cache, PyObject *separator)
1791+
{
1792+
_Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(dct);
1793+
PyObject *key, *value;
1794+
Py_ssize_t pos = 0;
1795+
while (PyDict_Next(dct, &pos, &key, &value)) {
1796+
#ifdef Py_GIL_DISABLED
1797+
// gh-119438: in the free-threading build the critical section on dct can get suspended
1798+
Py_INCREF(key);
1799+
Py_INCREF(value);
1800+
#endif
1801+
if (encoder_encode_key_value(s, writer, first, dct, key, value,
1802+
indent_level, indent_cache,
1803+
separator) < 0) {
1804+
#ifdef Py_GIL_DISABLED
1805+
Py_DECREF(key);
1806+
Py_DECREF(value);
1807+
#endif
1808+
return -1;
1809+
}
1810+
#ifdef Py_GIL_DISABLED
1811+
Py_DECREF(key);
1812+
Py_DECREF(value);
1813+
#endif
1814+
}
1815+
return 0;
1816+
}
1817+
17461818
static int
17471819
encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer,
1748-
PyObject *dct,
1820+
PyObject *dct,
17491821
Py_ssize_t indent_level, PyObject *indent_cache)
17501822
{
17511823
/* Encode Python dict dct a JSON term */
17521824
PyObject *ident = NULL;
1753-
PyObject *items = NULL;
1754-
PyObject *key, *value;
17551825
bool first = true;
17561826

17571827
if (PyDict_GET_SIZE(dct) == 0) {
@@ -1788,34 +1858,30 @@ encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer,
17881858
}
17891859

17901860
if (s->sort_keys || !PyDict_CheckExact(dct)) {
1791-
items = PyMapping_Items(dct);
1792-
if (items == NULL || (s->sort_keys && PyList_Sort(items) < 0))
1861+
PyObject *items = PyMapping_Items(dct);
1862+
if (items == NULL || (s->sort_keys && PyList_Sort(items) < 0)) {
1863+
Py_XDECREF(items);
17931864
goto bail;
1865+
}
17941866

1795-
for (Py_ssize_t i = 0; i < PyList_GET_SIZE(items); i++) {
1796-
PyObject *item = PyList_GET_ITEM(items, i);
1797-
1798-
if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
1799-
PyErr_SetString(PyExc_ValueError, "items must return 2-tuples");
1800-
goto bail;
1801-
}
1802-
1803-
key = PyTuple_GET_ITEM(item, 0);
1804-
value = PyTuple_GET_ITEM(item, 1);
1805-
if (encoder_encode_key_value(s, writer, &first, dct, key, value,
1806-
indent_level, indent_cache,
1807-
separator) < 0)
1808-
goto bail;
1867+
int result;
1868+
Py_BEGIN_CRITICAL_SECTION_SEQUENCE_FAST(items);
1869+
result = _encoder_iterate_mapping_lock_held(s, writer, &first, dct,
1870+
items, indent_level, indent_cache, separator);
1871+
Py_END_CRITICAL_SECTION_SEQUENCE_FAST();
1872+
Py_DECREF(items);
1873+
if (result < 0) {
1874+
goto bail;
18091875
}
1810-
Py_CLEAR(items);
18111876

18121877
} else {
1813-
Py_ssize_t pos = 0;
1814-
while (PyDict_Next(dct, &pos, &key, &value)) {
1815-
if (encoder_encode_key_value(s, writer, &first, dct, key, value,
1816-
indent_level, indent_cache,
1817-
separator) < 0)
1818-
goto bail;
1878+
int result;
1879+
Py_BEGIN_CRITICAL_SECTION(dct);
1880+
result = _encoder_iterate_dict_lock_held(s, writer, &first, dct,
1881+
indent_level, indent_cache, separator);
1882+
Py_END_CRITICAL_SECTION();
1883+
if (result < 0) {
1884+
goto bail;
18191885
}
18201886
}
18211887

@@ -1837,22 +1903,52 @@ encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer,
18371903
return 0;
18381904

18391905
bail:
1840-
Py_XDECREF(items);
18411906
Py_XDECREF(ident);
18421907
return -1;
18431908
}
18441909

1910+
static inline int
1911+
_encoder_iterate_fast_seq_lock_held(PyEncoderObject *s, PyUnicodeWriter *writer,
1912+
PyObject *seq, PyObject *s_fast,
1913+
Py_ssize_t indent_level, PyObject *indent_cache, PyObject *separator)
1914+
{
1915+
for (Py_ssize_t i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
1916+
PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
1917+
#ifdef Py_GIL_DISABLED
1918+
// gh-119438: in the free-threading build the critical section on s_fast can get suspended
1919+
Py_INCREF(obj);
1920+
#endif
1921+
if (i) {
1922+
if (PyUnicodeWriter_WriteStr(writer, separator) < 0) {
1923+
#ifdef Py_GIL_DISABLED
1924+
Py_DECREF(obj);
1925+
#endif
1926+
return -1;
1927+
}
1928+
}
1929+
if (encoder_listencode_obj(s, writer, obj, indent_level, indent_cache)) {
1930+
_PyErr_FormatNote("when serializing %T item %zd", seq, i);
1931+
#ifdef Py_GIL_DISABLED
1932+
Py_DECREF(obj);
1933+
#endif
1934+
return -1;
1935+
}
1936+
#ifdef Py_GIL_DISABLED
1937+
Py_DECREF(obj);
1938+
#endif
1939+
}
1940+
return 0;
1941+
}
1942+
18451943
static int
18461944
encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer,
18471945
PyObject *seq,
18481946
Py_ssize_t indent_level, PyObject *indent_cache)
18491947
{
18501948
PyObject *ident = NULL;
18511949
PyObject *s_fast = NULL;
1852-
Py_ssize_t i;
18531950

1854-
ident = NULL;
1855-
s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
1951+
s_fast = PySequence_Fast(seq, "encoder_listencode_list needs a sequence");
18561952
if (s_fast == NULL)
18571953
return -1;
18581954
if (PySequence_Fast_GET_SIZE(s_fast) == 0) {
@@ -1890,16 +1986,13 @@ encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer,
18901986
goto bail;
18911987
}
18921988
}
1893-
for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
1894-
PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
1895-
if (i) {
1896-
if (PyUnicodeWriter_WriteStr(writer, separator) < 0)
1897-
goto bail;
1898-
}
1899-
if (encoder_listencode_obj(s, writer, obj, indent_level, indent_cache)) {
1900-
_PyErr_FormatNote("when serializing %T item %zd", seq, i);
1901-
goto bail;
1902-
}
1989+
int result;
1990+
Py_BEGIN_CRITICAL_SECTION_SEQUENCE_FAST(seq);
1991+
result = _encoder_iterate_fast_seq_lock_held(s, writer, seq, s_fast,
1992+
indent_level, indent_cache, separator);
1993+
Py_END_CRITICAL_SECTION_SEQUENCE_FAST();
1994+
if (result < 0) {
1995+
goto bail;
19031996
}
19041997
if (ident != NULL) {
19051998
if (PyDict_DelItem(s->markers, ident))

0 commit comments

Comments
 (0)