Skip to content

Commit 2063943

Browse files
jorisvandenbosschekhemkaran10Khemkaran
authored
[backport 2.3.x] BUG: fix to_json() with JSON Table Schema work correctly with string dtype (#61900) (#61965)
Co-authored-by: Khemkaran Sevta <[email protected]> Co-authored-by: Khemkaran <[email protected]>
1 parent 3e2a430 commit 2063943

File tree

4 files changed

+15
-14
lines changed

4 files changed

+15
-14
lines changed

doc/source/whatsnew/v2.3.2.rst

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,10 @@ become the default string dtype in pandas 3.0. See
2222

2323
Bug fixes
2424
^^^^^^^^^
25-
-
25+
- Fix :meth:`~DataFrame.to_json` with ``orient="table"`` to correctly use the
26+
"string" type in the JSON Table Schema for :class:`StringDtype` columns
27+
(:issue:`61889`)
28+
2629

2730
.. ---------------------------------------------------------------------------
2831
.. _whatsnew_232.contributors:

pandas/io/json/_table_schema.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -90,8 +90,6 @@ def as_json_table_type(x: DtypeObj) -> str:
9090
return "datetime"
9191
elif lib.is_np_dtype(x, "m"):
9292
return "duration"
93-
elif isinstance(x, ExtensionDtype):
94-
return "any"
9593
elif is_string_dtype(x):
9694
return "string"
9795
else:
@@ -197,7 +195,7 @@ def convert_json_field_to_pandas_type(field) -> str | CategoricalDtype:
197195
"""
198196
typ = field["type"]
199197
if typ == "string":
200-
return "object"
198+
return field.get("extDtype", None)
201199
elif typ == "integer":
202200
return field.get("extDtype", "int64")
203201
elif typ == "number":

pandas/tests/io/json/test_json_table_schema.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ def test_build_table_schema(self, df_schema, using_infer_string):
6969
"primaryKey": ["idx"],
7070
}
7171
if using_infer_string:
72-
expected["fields"][2] = {"name": "B", "type": "any", "extDtype": "str"}
72+
expected["fields"][2] = {"name": "B", "type": "string", "extDtype": "str"}
7373
assert result == expected
7474
result = build_table_schema(df_schema)
7575
assert "pandas_version" in result
@@ -119,10 +119,10 @@ def test_multiindex(self, df_schema, using_infer_string):
119119
if using_infer_string:
120120
expected["fields"][0] = {
121121
"name": "level_0",
122-
"type": "any",
122+
"type": "string",
123123
"extDtype": "str",
124124
}
125-
expected["fields"][3] = {"name": "B", "type": "any", "extDtype": "str"}
125+
expected["fields"][3] = {"name": "B", "type": "string", "extDtype": "str"}
126126
assert result == expected
127127

128128
df.index.names = ["idx0", None]
@@ -305,7 +305,7 @@ def test_to_json(self, df_table, using_infer_string):
305305
]
306306

307307
if using_infer_string:
308-
fields[2] = {"name": "B", "type": "any", "extDtype": "str"}
308+
fields[2] = {"name": "B", "type": "string", "extDtype": "str"}
309309

310310
schema = {"fields": fields, "primaryKey": ["idx"]}
311311
data = [
@@ -544,7 +544,7 @@ def test_convert_pandas_type_to_json_field_categorical(self, kind, ordered):
544544
},
545545
CategoricalDtype(categories=["a", "b", "c"], ordered=True),
546546
),
547-
({"type": "string"}, "object"),
547+
({"type": "string"}, None),
548548
],
549549
)
550550
def test_convert_json_field_to_pandas_type(self, inp, exp):

pandas/tests/io/json/test_json_table_schema_ext_dtype.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ def test_build_table_schema(self):
5050
{"name": "index", "type": "integer"},
5151
{"name": "A", "type": "any", "extDtype": "DateDtype"},
5252
{"name": "B", "type": "number", "extDtype": "decimal"},
53-
{"name": "C", "type": "any", "extDtype": "string"},
53+
{"name": "C", "type": "string", "extDtype": "string"},
5454
{"name": "D", "type": "integer", "extDtype": "Int64"},
5555
],
5656
"primaryKey": ["index"],
@@ -96,10 +96,10 @@ def test_as_json_table_type_ext_decimal_dtype(self):
9696
],
9797
)
9898
def test_as_json_table_type_ext_string_array_dtype(self, string_data):
99-
assert as_json_table_type(string_data.dtype) == "any"
99+
assert as_json_table_type(string_data.dtype) == "string"
100100

101101
def test_as_json_table_type_ext_string_dtype(self):
102-
assert as_json_table_type(StringDtype()) == "any"
102+
assert as_json_table_type(StringDtype()) == "string"
103103

104104
@pytest.mark.parametrize(
105105
"integer_data",
@@ -204,7 +204,7 @@ def test_build_string_series(self, sa):
204204

205205
fields = [
206206
{"name": "id", "type": "integer"},
207-
{"name": "a", "type": "any", "extDtype": "string"},
207+
{"name": "a", "type": "string", "extDtype": "string"},
208208
]
209209

210210
schema = {"fields": fields, "primaryKey": ["id"]}
@@ -256,7 +256,7 @@ def test_to_json(self, df):
256256
OrderedDict({"name": "idx", "type": "integer"}),
257257
OrderedDict({"name": "A", "type": "any", "extDtype": "DateDtype"}),
258258
OrderedDict({"name": "B", "type": "number", "extDtype": "decimal"}),
259-
OrderedDict({"name": "C", "type": "any", "extDtype": "string"}),
259+
OrderedDict({"name": "C", "type": "string", "extDtype": "string"}),
260260
OrderedDict({"name": "D", "type": "integer", "extDtype": "Int64"}),
261261
]
262262

0 commit comments

Comments
 (0)