Skip to content

Commit c4fa611

Browse files
[backport 2.3.x] Output formatting: preserve quoting for string categories (#61891) (#61966)
1 parent 2063943 commit c4fa611

File tree

5 files changed

+32
-52
lines changed

5 files changed

+32
-52
lines changed

pandas/core/arrays/categorical.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2215,8 +2215,16 @@ def _repr_categories(self) -> list[str]:
22152215
)
22162216
from pandas.io.formats import format as fmt
22172217

2218+
formatter = None
2219+
if self.categories.dtype == "str":
2220+
# the extension array formatter defaults to boxed=True in format_array
2221+
# override here to boxed=False to be consistent with QUOTE_NONNUMERIC
2222+
formatter = cast(ExtensionArray, self.categories._values)._formatter(
2223+
boxed=False
2224+
)
2225+
22182226
format_array = partial(
2219-
fmt.format_array, formatter=None, quoting=QUOTE_NONNUMERIC
2227+
fmt.format_array, formatter=formatter, quoting=QUOTE_NONNUMERIC
22202228
)
22212229
if len(self.categories) > max_categories:
22222230
num = max_categories // 2

pandas/tests/arrays/categorical/test_repr.py

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -19,16 +19,11 @@
1919
class TestCategoricalReprWithFactor:
2020
def test_print(self, using_infer_string):
2121
factor = Categorical(["a", "b", "b", "a", "a", "c", "c", "c"], ordered=True)
22-
if using_infer_string:
23-
expected = [
24-
"['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c']",
25-
"Categories (3, str): [a < b < c]",
26-
]
27-
else:
28-
expected = [
29-
"['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c']",
30-
"Categories (3, object): ['a' < 'b' < 'c']",
31-
]
22+
dtype = "str" if using_infer_string else "object"
23+
expected = [
24+
"['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c']",
25+
f"Categories (3, {dtype}): ['a' < 'b' < 'c']",
26+
]
3227
expected = "\n".join(expected)
3328
actual = repr(factor)
3429
assert actual == expected

pandas/tests/indexes/categorical/test_category.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
import numpy as np
22
import pytest
33

4-
from pandas._config import using_string_dtype
5-
64
from pandas._libs import index as libindex
75
from pandas._libs.arrays import NDArrayBacked
86

@@ -196,7 +194,6 @@ def test_unique(self, data, categories, expected_data, ordered):
196194
expected = CategoricalIndex(expected_data, dtype=dtype)
197195
tm.assert_index_equal(idx.unique(), expected)
198196

199-
@pytest.mark.xfail(using_string_dtype(), reason="repr doesn't roundtrip")
200197
def test_repr_roundtrip(self):
201198
ci = CategoricalIndex(["a", "b"], categories=["a", "b"], ordered=True)
202199
str(ci)

pandas/tests/series/test_formats.py

Lines changed: 14 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -318,38 +318,27 @@ def test_categorical_repr(self, using_infer_string):
318318
assert exp == a.__str__()
319319

320320
a = Series(Categorical(["a", "b"] * 25))
321+
exp = (
322+
"0 a\n1 b\n"
323+
" ..\n"
324+
"48 a\n49 b\n"
325+
"Length: 50, dtype: category\nCategories (2, object): ['a', 'b']"
326+
)
321327
if using_infer_string:
322-
exp = (
323-
"0 a\n1 b\n"
324-
" ..\n"
325-
"48 a\n49 b\n"
326-
"Length: 50, dtype: category\nCategories (2, str): [a, b]"
327-
)
328-
else:
329-
exp = (
330-
"0 a\n1 b\n"
331-
" ..\n"
332-
"48 a\n49 b\n"
333-
"Length: 50, dtype: category\nCategories (2, object): ['a', 'b']"
334-
)
328+
exp = exp.replace("object", "str")
335329
with option_context("display.max_rows", 5):
336330
assert exp == repr(a)
337331

338332
levs = list("abcdefghijklmnopqrstuvwxyz")
339333
a = Series(Categorical(["a", "b"], categories=levs, ordered=True))
334+
exp = (
335+
"0 a\n1 b\n"
336+
"dtype: category\n"
337+
"Categories (26, object): ['a' < 'b' < 'c' < 'd' ... "
338+
"'w' < 'x' < 'y' < 'z']"
339+
)
340340
if using_infer_string:
341-
exp = (
342-
"0 a\n1 b\n"
343-
"dtype: category\n"
344-
"Categories (26, str): [a < b < c < d ... w < x < y < z]"
345-
)
346-
else:
347-
exp = (
348-
"0 a\n1 b\n"
349-
"dtype: category\n"
350-
"Categories (26, object): ['a' < 'b' < 'c' < 'd' ... "
351-
"'w' < 'x' < 'y' < 'z']"
352-
)
341+
exp = exp.replace("object", "str")
353342
assert exp == a.__str__()
354343

355344
def test_categorical_series_repr(self):

pandas/tests/util/test_assert_series_equal.py

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -215,24 +215,15 @@ def test_series_equal_numeric_values_mismatch(rtol):
215215

216216

217217
def test_series_equal_categorical_values_mismatch(rtol, using_infer_string):
218-
if using_infer_string:
219-
msg = """Series are different
220-
221-
Series values are different \\(66\\.66667 %\\)
222-
\\[index\\]: \\[0, 1, 2\\]
223-
\\[left\\]: \\['a', 'b', 'c'\\]
224-
Categories \\(3, str\\): \\[a, b, c\\]
225-
\\[right\\]: \\['a', 'c', 'b'\\]
226-
Categories \\(3, str\\): \\[a, b, c\\]"""
227-
else:
228-
msg = """Series are different
218+
dtype = "str" if using_infer_string else "object"
219+
msg = f"""Series are different
229220
230221
Series values are different \\(66\\.66667 %\\)
231222
\\[index\\]: \\[0, 1, 2\\]
232223
\\[left\\]: \\['a', 'b', 'c'\\]
233-
Categories \\(3, object\\): \\['a', 'b', 'c'\\]
224+
Categories \\(3, {dtype}\\): \\['a', 'b', 'c'\\]
234225
\\[right\\]: \\['a', 'c', 'b'\\]
235-
Categories \\(3, object\\): \\['a', 'b', 'c'\\]"""
226+
Categories \\(3, {dtype}\\): \\['a', 'b', 'c'\\]"""
236227

237228
s1 = Series(Categorical(["a", "b", "c"]))
238229
s2 = Series(Categorical(["a", "c", "b"]))

0 commit comments

Comments
 (0)