Skip to content

Commit e4a03b6

Browse files
CI: enable doctest errors again + fixup categorical examples (#61947)
1 parent a067fff commit e4a03b6

File tree

12 files changed

+77
-77
lines changed

12 files changed

+77
-77
lines changed

ci/code_checks.sh

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,9 +58,7 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then
5858

5959
MSG='Python and Cython Doctests' ; echo "$MSG"
6060
python -c 'import pandas as pd; pd.test(run_doctests=True)'
61-
# TEMP don't let doctests fail the build until all string dtype changes are fixed
62-
# RET=$(($RET + $?)) ; echo "$MSG" "DONE"
63-
echo "$MSG" "DONE"
61+
RET=$(($RET + $?)) ; echo "$MSG" "DONE"
6462

6563
fi
6664

pandas/core/algorithms.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -391,11 +391,11 @@ def unique(values):
391391
392392
>>> pd.unique(pd.Series(pd.Categorical(list("baabc"))))
393393
['b', 'a', 'c']
394-
Categories (3, object): ['a', 'b', 'c']
394+
Categories (3, str): ['a', 'b', 'c']
395395
396396
>>> pd.unique(pd.Series(pd.Categorical(list("baabc"), categories=list("abc"))))
397397
['b', 'a', 'c']
398-
Categories (3, object): ['a', 'b', 'c']
398+
Categories (3, str): ['a', 'b', 'c']
399399
400400
An ordered Categorical preserves the category ordering.
401401
@@ -405,7 +405,7 @@ def unique(values):
405405
... )
406406
... )
407407
['b', 'a', 'c']
408-
Categories (3, object): ['a' < 'b' < 'c']
408+
Categories (3, str): ['a' < 'b' < 'c']
409409
410410
An array of tuples
411411
@@ -751,7 +751,7 @@ def factorize(
751751
array([0, 0, 1])
752752
>>> uniques
753753
['a', 'c']
754-
Categories (3, str): [a, b, c]
754+
Categories (3, str): ['a', 'b', 'c']
755755
756756
Notice that ``'b'`` is in ``uniques.categories``, despite not being
757757
present in ``cat.values``.

pandas/core/arrays/base.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1688,13 +1688,13 @@ def factorize(
16881688
>>> cat = pd.Categorical(['a', 'b', 'c'])
16891689
>>> cat
16901690
['a', 'b', 'c']
1691-
Categories (3, object): ['a', 'b', 'c']
1691+
Categories (3, str): ['a', 'b', 'c']
16921692
>>> cat.repeat(2)
16931693
['a', 'a', 'b', 'b', 'c', 'c']
1694-
Categories (3, object): ['a', 'b', 'c']
1694+
Categories (3, str): ['a', 'b', 'c']
16951695
>>> cat.repeat([1, 2, 3])
16961696
['a', 'b', 'b', 'c', 'c', 'c']
1697-
Categories (3, object): ['a', 'b', 'c']
1697+
Categories (3, str): ['a', 'b', 'c']
16981698
"""
16991699

17001700
@Substitution(klass="ExtensionArray")

pandas/core/arrays/categorical.py

Lines changed: 40 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -332,7 +332,7 @@ class Categorical(NDArrayBackedExtensionArray, PandasObject, ObjectStringArrayMi
332332
333333
>>> pd.Categorical(["a", "b", "c", "a", "b", "c"])
334334
['a', 'b', 'c', 'a', 'b', 'c']
335-
Categories (3, object): ['a', 'b', 'c']
335+
Categories (3, str): ['a', 'b', 'c']
336336
337337
Missing values are not included as a category.
338338
@@ -355,7 +355,7 @@ class Categorical(NDArrayBackedExtensionArray, PandasObject, ObjectStringArrayMi
355355
... )
356356
>>> c
357357
['a', 'b', 'c', 'a', 'b', 'c']
358-
Categories (3, object): ['c' < 'b' < 'a']
358+
Categories (3, str): ['c' < 'b' < 'a']
359359
>>> c.min()
360360
'c'
361361
"""
@@ -510,9 +510,9 @@ def dtype(self) -> CategoricalDtype:
510510
>>> cat = pd.Categorical(["a", "b"], ordered=True)
511511
>>> cat
512512
['a', 'b']
513-
Categories (2, object): ['a' < 'b']
513+
Categories (2, str): ['a' < 'b']
514514
>>> cat.dtype
515-
CategoricalDtype(categories=['a', 'b'], ordered=True, categories_dtype=object)
515+
CategoricalDtype(categories=['a', 'b'], ordered=True, categories_dtype=str)
516516
"""
517517
return self._dtype
518518

@@ -740,7 +740,7 @@ def from_codes(
740740
>>> dtype = pd.CategoricalDtype(["a", "b"], ordered=True)
741741
>>> pd.Categorical.from_codes(codes=[0, 1, 0, 1], dtype=dtype)
742742
['a', 'b', 'a', 'b']
743-
Categories (2, object): ['a' < 'b']
743+
Categories (2, str): ['a' < 'b']
744744
"""
745745
dtype = CategoricalDtype._from_values_or_dtype(
746746
categories=categories, ordered=ordered, dtype=dtype
@@ -922,12 +922,12 @@ def _set_categories(self, categories, fastpath: bool = False) -> None:
922922
>>> c = pd.Categorical(["a", "b"])
923923
>>> c
924924
['a', 'b']
925-
Categories (2, object): ['a', 'b']
925+
Categories (2, str): ['a', 'b']
926926
927927
>>> c._set_categories(pd.Index(["a", "c"]))
928928
>>> c
929929
['a', 'c']
930-
Categories (2, object): ['a', 'c']
930+
Categories (2, str): ['a', 'c']
931931
"""
932932
if fastpath:
933933
new_dtype = CategoricalDtype._from_fastpath(categories, self.ordered)
@@ -1111,15 +1111,15 @@ def set_categories(
11111111
2 c
11121112
3 NaN
11131113
dtype: category
1114-
Categories (3, object): ['a' < 'b' < 'c']
1114+
Categories (3, str): ['a' < 'b' < 'c']
11151115
11161116
>>> ser.cat.set_categories(["A", "B", "C"], rename=True)
11171117
0 A
11181118
1 B
11191119
2 C
11201120
3 NaN
11211121
dtype: category
1122-
Categories (3, object): ['A' < 'B' < 'C']
1122+
Categories (3, str): ['A' < 'B' < 'C']
11231123
11241124
For :class:`pandas.CategoricalIndex`:
11251125
@@ -1215,13 +1215,13 @@ def rename_categories(self, new_categories) -> Self:
12151215
12161216
>>> c.rename_categories({"a": "A", "c": "C"})
12171217
['A', 'A', 'b']
1218-
Categories (2, object): ['A', 'b']
1218+
Categories (2, str): ['A', 'b']
12191219
12201220
You may also provide a callable to create the new categories
12211221
12221222
>>> c.rename_categories(lambda x: x.upper())
12231223
['A', 'A', 'B']
1224-
Categories (2, object): ['A', 'B']
1224+
Categories (2, str): ['A', 'B']
12251225
"""
12261226

12271227
if is_dict_like(new_categories):
@@ -1281,15 +1281,15 @@ def reorder_categories(self, new_categories, ordered=None) -> Self:
12811281
2 c
12821282
3 a
12831283
dtype: category
1284-
Categories (3, object): ['c' < 'b' < 'a']
1284+
Categories (3, str): ['c' < 'b' < 'a']
12851285
12861286
>>> ser.sort_values()
12871287
2 c
12881288
1 b
12891289
0 a
12901290
3 a
12911291
dtype: category
1292-
Categories (3, object): ['c' < 'b' < 'a']
1292+
Categories (3, str): ['c' < 'b' < 'a']
12931293
12941294
For :class:`pandas.CategoricalIndex`:
12951295
@@ -1346,11 +1346,11 @@ def add_categories(self, new_categories) -> Self:
13461346
>>> c = pd.Categorical(["c", "b", "c"])
13471347
>>> c
13481348
['c', 'b', 'c']
1349-
Categories (2, object): ['b', 'c']
1349+
Categories (2, str): ['b', 'c']
13501350
13511351
>>> c.add_categories(["d", "a"])
13521352
['c', 'b', 'c']
1353-
Categories (4, object): ['b', 'c', 'd', 'a']
1353+
Categories (4, str): ['b', 'c', 'd', 'a']
13541354
"""
13551355

13561356
if not is_list_like(new_categories):
@@ -1414,11 +1414,11 @@ def remove_categories(self, removals) -> Self:
14141414
>>> c = pd.Categorical(["a", "c", "b", "c", "d"])
14151415
>>> c
14161416
['a', 'c', 'b', 'c', 'd']
1417-
Categories (4, object): ['a', 'b', 'c', 'd']
1417+
Categories (4, str): ['a', 'b', 'c', 'd']
14181418
14191419
>>> c.remove_categories(["d", "a"])
14201420
[NaN, 'c', 'b', 'c', NaN]
1421-
Categories (2, object): ['b', 'c']
1421+
Categories (2, str): ['b', 'c']
14221422
"""
14231423
from pandas import Index
14241424

@@ -1465,17 +1465,17 @@ def remove_unused_categories(self) -> Self:
14651465
>>> c = pd.Categorical(["a", "c", "b", "c", "d"])
14661466
>>> c
14671467
['a', 'c', 'b', 'c', 'd']
1468-
Categories (4, object): ['a', 'b', 'c', 'd']
1468+
Categories (4, str): ['a', 'b', 'c', 'd']
14691469
14701470
>>> c[2] = "a"
14711471
>>> c[4] = "c"
14721472
>>> c
14731473
['a', 'c', 'a', 'c', 'c']
1474-
Categories (4, object): ['a', 'b', 'c', 'd']
1474+
Categories (4, str): ['a', 'b', 'c', 'd']
14751475
14761476
>>> c.remove_unused_categories()
14771477
['a', 'c', 'a', 'c', 'c']
1478-
Categories (2, object): ['a', 'c']
1478+
Categories (2, str): ['a', 'c']
14791479
"""
14801480
idx, inv = np.unique(self._codes, return_inverse=True)
14811481

@@ -1540,35 +1540,35 @@ def map(
15401540
>>> cat = pd.Categorical(["a", "b", "c"])
15411541
>>> cat
15421542
['a', 'b', 'c']
1543-
Categories (3, object): ['a', 'b', 'c']
1543+
Categories (3, str): ['a', 'b', 'c']
15441544
>>> cat.map(lambda x: x.upper(), na_action=None)
15451545
['A', 'B', 'C']
1546-
Categories (3, object): ['A', 'B', 'C']
1546+
Categories (3, str): ['A', 'B', 'C']
15471547
>>> cat.map({"a": "first", "b": "second", "c": "third"}, na_action=None)
15481548
['first', 'second', 'third']
1549-
Categories (3, object): ['first', 'second', 'third']
1549+
Categories (3, str): ['first', 'second', 'third']
15501550
15511551
If the mapping is one-to-one the ordering of the categories is
15521552
preserved:
15531553
15541554
>>> cat = pd.Categorical(["a", "b", "c"], ordered=True)
15551555
>>> cat
15561556
['a', 'b', 'c']
1557-
Categories (3, object): ['a' < 'b' < 'c']
1557+
Categories (3, str): ['a' < 'b' < 'c']
15581558
>>> cat.map({"a": 3, "b": 2, "c": 1}, na_action=None)
15591559
[3, 2, 1]
15601560
Categories (3, int64): [3 < 2 < 1]
15611561
15621562
If the mapping is not one-to-one an :class:`~pandas.Index` is returned:
15631563
15641564
>>> cat.map({"a": "first", "b": "second", "c": "first"}, na_action=None)
1565-
Index(['first', 'second', 'first'], dtype='object')
1565+
Index(['first', 'second', 'first'], dtype='str')
15661566
15671567
If a `dict` is used, all unmapped categories are mapped to `NaN` and
15681568
the result is an :class:`~pandas.Index`:
15691569
15701570
>>> cat.map({"a": "first", "b": "second"}, na_action=None)
1571-
Index(['first', 'second', nan], dtype='object')
1571+
Index(['first', 'second', nan], dtype='str')
15721572
"""
15731573
assert callable(mapper) or is_dict_like(mapper)
15741574

@@ -2383,9 +2383,9 @@ def _reverse_indexer(self) -> dict[Hashable, npt.NDArray[np.intp]]:
23832383
>>> c = pd.Categorical(list("aabca"))
23842384
>>> c
23852385
['a', 'a', 'b', 'c', 'a']
2386-
Categories (3, object): ['a', 'b', 'c']
2386+
Categories (3, str): ['a', 'b', 'c']
23872387
>>> c.categories
2388-
Index(['a', 'b', 'c'], dtype='object')
2388+
Index(['a', 'b', 'c'], dtype='str')
23892389
>>> c.codes
23902390
array([0, 0, 1, 2, 0], dtype=int8)
23912391
>>> c._reverse_indexer()
@@ -2517,10 +2517,10 @@ def unique(self) -> Self:
25172517
--------
25182518
>>> pd.Categorical(list("baabc")).unique()
25192519
['b', 'a', 'c']
2520-
Categories (3, object): ['a', 'b', 'c']
2520+
Categories (3, str): ['a', 'b', 'c']
25212521
>>> pd.Categorical(list("baab"), categories=list("abc"), ordered=True).unique()
25222522
['b', 'a']
2523-
Categories (3, object): ['a' < 'b' < 'c']
2523+
Categories (3, str): ['a' < 'b' < 'c']
25242524
"""
25252525
return super().unique()
25262526

@@ -2845,10 +2845,10 @@ class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin):
28452845
4 c
28462846
5 c
28472847
dtype: category
2848-
Categories (3, object): ['a', 'b', 'c']
2848+
Categories (3, str): ['a', 'b', 'c']
28492849
28502850
>>> s.cat.categories
2851-
Index(['a', 'b', 'c'], dtype='object')
2851+
Index(['a', 'b', 'c'], dtype='str')
28522852
28532853
>>> s.cat.rename_categories(list("cba"))
28542854
0 c
@@ -2858,7 +2858,7 @@ class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin):
28582858
4 a
28592859
5 a
28602860
dtype: category
2861-
Categories (3, object): ['c', 'b', 'a']
2861+
Categories (3, str): ['c', 'b', 'a']
28622862
28632863
>>> s.cat.reorder_categories(list("cba"))
28642864
0 a
@@ -2868,7 +2868,7 @@ class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin):
28682868
4 c
28692869
5 c
28702870
dtype: category
2871-
Categories (3, object): ['c', 'b', 'a']
2871+
Categories (3, str): ['c', 'b', 'a']
28722872
28732873
>>> s.cat.add_categories(["d", "e"])
28742874
0 a
@@ -2878,7 +2878,7 @@ class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin):
28782878
4 c
28792879
5 c
28802880
dtype: category
2881-
Categories (5, object): ['a', 'b', 'c', 'd', 'e']
2881+
Categories (5, str): ['a', 'b', 'c', 'd', 'e']
28822882
28832883
>>> s.cat.remove_categories(["a", "c"])
28842884
0 NaN
@@ -2888,7 +2888,7 @@ class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin):
28882888
4 NaN
28892889
5 NaN
28902890
dtype: category
2891-
Categories (1, object): ['b']
2891+
Categories (1, str): ['b']
28922892
28932893
>>> s1 = s.cat.add_categories(["d", "e"])
28942894
>>> s1.cat.remove_unused_categories()
@@ -2899,7 +2899,7 @@ class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin):
28992899
4 c
29002900
5 c
29012901
dtype: category
2902-
Categories (3, object): ['a', 'b', 'c']
2902+
Categories (3, str): ['a', 'b', 'c']
29032903
29042904
>>> s.cat.set_categories(list("abcde"))
29052905
0 a
@@ -2909,7 +2909,7 @@ class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin):
29092909
4 c
29102910
5 c
29112911
dtype: category
2912-
Categories (5, object): ['a', 'b', 'c', 'd', 'e']
2912+
Categories (5, str): ['a', 'b', 'c', 'd', 'e']
29132913
29142914
>>> s.cat.as_ordered()
29152915
0 a
@@ -2919,7 +2919,7 @@ class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin):
29192919
4 c
29202920
5 c
29212921
dtype: category
2922-
Categories (3, object): ['a' < 'b' < 'c']
2922+
Categories (3, str): ['a' < 'b' < 'c']
29232923
29242924
>>> s.cat.as_unordered()
29252925
0 a
@@ -2929,7 +2929,7 @@ class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin):
29292929
4 c
29302930
5 c
29312931
dtype: category
2932-
Categories (3, object): ['a', 'b', 'c']
2932+
Categories (3, str): ['a', 'b', 'c']
29332933
"""
29342934

29352935
def __init__(self, data) -> None:

pandas/core/base.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -567,7 +567,7 @@ def array(self) -> ExtensionArray:
567567
>>> ser = pd.Series(pd.Categorical(["a", "b", "a"]))
568568
>>> ser.array
569569
['a', 'b', 'a']
570-
Categories (2, str): [a, b]
570+
Categories (2, str): ['a', 'b']
571571
"""
572572
raise AbstractMethodError(self)
573573

@@ -1386,7 +1386,7 @@ def factorize(
13861386
... )
13871387
>>> ser
13881388
['apple', 'bread', 'bread', 'cheese', 'milk']
1389-
Categories (4, str): [apple < bread < cheese < milk]
1389+
Categories (4, str): ['apple' < 'bread' < 'cheese' < 'milk']
13901390
13911391
>>> ser.searchsorted('bread')
13921392
np.int64(1)

0 commit comments

Comments
 (0)