Skip to content

Commit a067fff

Browse files
khemkaran10Khemkaranjorisvandenbossche
authored
BUG: fix to_json() with JSON Table Schema work correctly with string dtype (#61900)
Co-authored-by: Khemkaran <[email protected]> Co-authored-by: Joris Van den Bossche <[email protected]>
1 parent 14caf55 commit a067fff

File tree

4 files changed

+15
-14
lines changed

4 files changed

+15
-14
lines changed

doc/source/whatsnew/v2.3.2.rst

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,10 @@ become the default string dtype in pandas 3.0. See
2222

2323
Bug fixes
2424
^^^^^^^^^
25-
-
25+
- Fix :meth:`~DataFrame.to_json` with ``orient="table"`` to correctly use the
26+
"string" type in the JSON Table Schema for :class:`StringDtype` columns
27+
(:issue:`61889`)
28+
2629

2730
.. ---------------------------------------------------------------------------
2831
.. _whatsnew_232.contributors:

pandas/io/json/_table_schema.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -90,8 +90,6 @@ def as_json_table_type(x: DtypeObj) -> str:
9090
return "datetime"
9191
elif lib.is_np_dtype(x, "m"):
9292
return "duration"
93-
elif isinstance(x, ExtensionDtype):
94-
return "any"
9593
elif is_string_dtype(x):
9694
return "string"
9795
else:
@@ -197,7 +195,7 @@ def convert_json_field_to_pandas_type(field) -> str | CategoricalDtype:
197195
"""
198196
typ = field["type"]
199197
if typ == "string":
200-
return "object"
198+
return field.get("extDtype", None)
201199
elif typ == "integer":
202200
return field.get("extDtype", "int64")
203201
elif typ == "number":

pandas/tests/io/json/test_json_table_schema.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ def test_build_table_schema(self, df_schema, using_infer_string):
7070
"primaryKey": ["idx"],
7171
}
7272
if using_infer_string:
73-
expected["fields"][2] = {"name": "B", "type": "any", "extDtype": "str"}
73+
expected["fields"][2] = {"name": "B", "type": "string", "extDtype": "str"}
7474
assert result == expected
7575
result = build_table_schema(df_schema)
7676
assert "pandas_version" in result
@@ -120,10 +120,10 @@ def test_multiindex(self, df_schema, using_infer_string):
120120
if using_infer_string:
121121
expected["fields"][0] = {
122122
"name": "level_0",
123-
"type": "any",
123+
"type": "string",
124124
"extDtype": "str",
125125
}
126-
expected["fields"][3] = {"name": "B", "type": "any", "extDtype": "str"}
126+
expected["fields"][3] = {"name": "B", "type": "string", "extDtype": "str"}
127127
assert result == expected
128128

129129
df.index.names = ["idx0", None]
@@ -303,7 +303,7 @@ def test_to_json(self, df_table, using_infer_string):
303303
]
304304

305305
if using_infer_string:
306-
fields[2] = {"name": "B", "type": "any", "extDtype": "str"}
306+
fields[2] = {"name": "B", "type": "string", "extDtype": "str"}
307307

308308
schema = {"fields": fields, "primaryKey": ["idx"]}
309309
data = [
@@ -547,7 +547,7 @@ def test_convert_pandas_type_to_json_field_categorical(self, kind, ordered):
547547
},
548548
CategoricalDtype(categories=["a", "b", "c"], ordered=True),
549549
),
550-
({"type": "string"}, "object"),
550+
({"type": "string"}, None),
551551
],
552552
)
553553
def test_convert_json_field_to_pandas_type(self, inp, exp):

pandas/tests/io/json/test_json_table_schema_ext_dtype.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ def test_build_table_schema(self):
5050
{"name": "index", "type": "integer"},
5151
{"name": "A", "type": "any", "extDtype": "DateDtype"},
5252
{"name": "B", "type": "number", "extDtype": "decimal"},
53-
{"name": "C", "type": "any", "extDtype": "string"},
53+
{"name": "C", "type": "string", "extDtype": "string"},
5454
{"name": "D", "type": "integer", "extDtype": "Int64"},
5555
],
5656
"primaryKey": ["index"],
@@ -80,10 +80,10 @@ def test_as_json_table_type_ext_decimal_dtype(self):
8080
@pytest.mark.parametrize("box", [lambda x: x, Series])
8181
def test_as_json_table_type_ext_string_array_dtype(self, box):
8282
string_data = box(array(["pandas"], dtype="string"))
83-
assert as_json_table_type(string_data.dtype) == "any"
83+
assert as_json_table_type(string_data.dtype) == "string"
8484

8585
def test_as_json_table_type_ext_string_dtype(self):
86-
assert as_json_table_type(StringDtype()) == "any"
86+
assert as_json_table_type(StringDtype()) == "string"
8787

8888
@pytest.mark.parametrize("box", [lambda x: x, Series])
8989
def test_as_json_table_type_ext_integer_array_dtype(self, box):
@@ -176,7 +176,7 @@ def test_build_string_series(self, sa):
176176

177177
fields = [
178178
{"name": "id", "type": "integer"},
179-
{"name": "a", "type": "any", "extDtype": "string"},
179+
{"name": "a", "type": "string", "extDtype": "string"},
180180
]
181181

182182
schema = {"fields": fields, "primaryKey": ["id"]}
@@ -235,7 +235,7 @@ def test_to_json(self, da, dc, sa, ia):
235235
OrderedDict({"name": "idx", "type": "integer"}),
236236
OrderedDict({"name": "A", "type": "any", "extDtype": "DateDtype"}),
237237
OrderedDict({"name": "B", "type": "number", "extDtype": "decimal"}),
238-
OrderedDict({"name": "C", "type": "any", "extDtype": "string"}),
238+
OrderedDict({"name": "C", "type": "string", "extDtype": "string"}),
239239
OrderedDict({"name": "D", "type": "integer", "extDtype": "Int64"}),
240240
]
241241

0 commit comments

Comments
 (0)