Skip to content

Commit c4590a3

Browse files
author
MomIsBestFriend
committed
Fixed examples in pandas/core/groupby/
1 parent b5092d8 commit c4590a3

File tree

4 files changed

+62
-22
lines changed

4 files changed

+62
-22
lines changed

ci/code_checks.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -274,8 +274,8 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then
274274
pytest -q --doctest-modules pandas/core/series.py
275275
RET=$(($RET + $?)) ; echo $MSG "DONE"
276276

277-
MSG='Doctests groupby.py' ; echo $MSG
278-
pytest -q --doctest-modules pandas/core/groupby/groupby.py -k"-cumcount -describe -pipe"
277+
MSG='Doctests groupby' ; echo $MSG
278+
pytest -q --doctest-modules pandas/core/groupby/
279279
RET=$(($RET + $?)) ; echo $MSG "DONE"
280280

281281
MSG='Doctests tools' ; echo $MSG

pandas/core/groupby/generic.py

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -833,10 +833,13 @@ class DataFrameGroupBy(GroupBy):
833833
"""
834834
Examples
835835
--------
836-
837-
>>> df = pd.DataFrame({'A': [1, 1, 2, 2],
838-
... 'B': [1, 2, 3, 4],
839-
... 'C': np.random.randn(4)})
836+
>>> df = pd.DataFrame(
837+
... {
838+
... "A": [1, 1, 2, 2],
839+
... "B": [1, 2, 3, 4],
840+
... "C": [0.362838, 0.227877, 1.267767, -0.562860],
841+
... }
842+
... )
840843
841844
>>> df
842845
A B C
@@ -876,7 +879,7 @@ class DataFrameGroupBy(GroupBy):
876879
B C
877880
min max sum
878881
A
879-
1 1 2 0.590716
882+
1 1 2 0.590715
880883
2 3 4 0.704907
881884
882885
To control the output names with different aggregations per column,
@@ -887,8 +890,9 @@ class DataFrameGroupBy(GroupBy):
887890
... c_sum=pd.NamedAgg(column="C", aggfunc="sum"))
888891
b_min c_sum
889892
A
890-
1 1 -1.956929
891-
2 3 -0.322183
893+
1 1 0.590715
894+
2 3 0.704907
895+
892896
893897
- The keywords are the *output* column names
894898
- The values are tuples whose first element is the column to select

pandas/core/groupby/groupby.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -198,14 +198,14 @@ class providing the base-class of operations.
198198
functions that expect Series, DataFrames, GroupBy or Resampler objects.
199199
Instead of writing
200200
201-
>>> h(g(f(df.groupby('group')), arg1=a), arg2=b, arg3=c)
201+
>>> h(g(f(df.groupby('group')), arg1=a), arg2=b, arg3=c) # doctest: +SKIP
202202
203203
You can write
204204
205205
>>> (df.groupby('group')
206206
... .pipe(f)
207207
... .pipe(g, arg1=a)
208-
... .pipe(h, arg2=b, arg3=c))
208+
... .pipe(h, arg2=b, arg3=c)) # doctest: +SKIP
209209
210210
which is much more readable.
211211
@@ -2005,7 +2005,7 @@ def cumcount(self, ascending: bool = True):
20052005
20062006
Essentially this is equivalent to
20072007
2008-
>>> self.apply(lambda x: pd.Series(np.arange(len(x)), x.index))
2008+
self.apply(lambda x: pd.Series(np.arange(len(x)), x.index))
20092009
20102010
Parameters
20112011
----------

pandas/core/groupby/grouper.py

Lines changed: 46 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -79,16 +79,52 @@ class Grouper:
7979
--------
8080
Syntactic sugar for ``df.groupby('A')``
8181
82-
>>> df.groupby(Grouper(key='A'))
83-
84-
Specify a resample operation on the column 'date'
85-
86-
>>> df.groupby(Grouper(key='date', freq='60s'))
87-
88-
Specify a resample operation on the level 'date' on the columns axis
89-
with a frequency of 60s
90-
91-
>>> df.groupby(Grouper(level='date', freq='60s', axis=1))
82+
>>> df = pd.DataFrame(
83+
... {
84+
... "Animal": ["Falcon", "Parrot", "Falcon", "Falcon", "Parrot"],
85+
... "Speed": [100, 5, 200, 300, 15],
86+
... }
87+
... )
88+
>>> df
89+
Animal Speed
90+
0 Falcon 100
91+
1 Parrot 5
92+
2 Falcon 200
93+
3 Falcon 300
94+
4 Parrot 15
95+
>>> df.groupby(pd.Grouper(key="Animal")).mean()
96+
Speed
97+
Animal
98+
Falcon 200
99+
Parrot 10
100+
101+
102+
Specify a resample operation on the column 'Publish date'
103+
104+
>>> df = pd.DataFrame(
105+
... {
106+
... "Publish date": [
107+
... pd.Timestamp("2000-01-02"),
108+
... pd.Timestamp("2000-01-02"),
109+
... pd.Timestamp("2000-01-09"),
110+
... pd.Timestamp("2000-01-16")
111+
... ],
112+
... "ID": [0, 1, 2, 3],
113+
... "Price": [10, 20, 30, 40]
114+
... }
115+
... )
116+
>>> df
117+
Publish date ID Price
118+
0 2000-01-02 0 10
119+
1 2000-01-02 1 20
120+
2 2000-01-09 2 30
121+
3 2000-01-16 3 40
122+
>>> df.groupby(pd.Grouper(key="Publish date", freq="1W")).mean()
123+
ID Price
124+
Publish date
125+
2000-01-02 0.5 15.0
126+
2000-01-09 2.0 30.0
127+
2000-01-16 3.0 40.0
92128
"""
93129

94130
_attributes: Tuple[str, ...] = ("key", "level", "freq", "axis", "sort")

0 commit comments

Comments
 (0)