Skip to content

Commit d8518cf

Browse files
committed
Updated Python tests.
1 parent 2727789 commit d8518cf

File tree

4 files changed

+21
-20
lines changed

4 files changed

+21
-20
lines changed

python/pyspark/sql/dataframe.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -802,11 +802,11 @@ def groupBy(self, *cols):
802802
Each element should be a column name (string) or an expression (:class:`Column`).
803803
804804
>>> df.groupBy().avg().collect()
805-
[Row(AVG(age)=3.5)]
805+
[Row(avg(age)=3.5)]
806806
>>> df.groupBy('name').agg({'age': 'mean'}).collect()
807-
[Row(name=u'Alice', AVG(age)=2.0), Row(name=u'Bob', AVG(age)=5.0)]
807+
[Row(name=u'Alice', avg(age)=2.0), Row(name=u'Bob', avg(age)=5.0)]
808808
>>> df.groupBy(df.name).avg().collect()
809-
[Row(name=u'Alice', AVG(age)=2.0), Row(name=u'Bob', AVG(age)=5.0)]
809+
[Row(name=u'Alice', avg(age)=2.0), Row(name=u'Bob', avg(age)=5.0)]
810810
>>> df.groupBy(['name', df.age]).count().collect()
811811
[Row(name=u'Bob', age=5, count=1), Row(name=u'Alice', age=2, count=1)]
812812
"""
@@ -864,10 +864,10 @@ def agg(self, *exprs):
864864
(shorthand for ``df.groupBy.agg()``).
865865
866866
>>> df.agg({"age": "max"}).collect()
867-
[Row(MAX(age)=5)]
867+
[Row(max(age)=5)]
868868
>>> from pyspark.sql import functions as F
869869
>>> df.agg(F.min(df.age)).collect()
870-
[Row(MIN(age)=2)]
870+
[Row(min(age)=2)]
871871
"""
872872
return self.groupBy().agg(*exprs)
873873

python/pyspark/sql/functions.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -266,7 +266,7 @@ def coalesce(*cols):
266266
267267
>>> cDf.select(coalesce(cDf["a"], cDf["b"])).show()
268268
+-------------+
269-
|Coalesce(a,b)|
269+
|coalesce(a,b)|
270270
+-------------+
271271
| null|
272272
| 1|
@@ -275,7 +275,7 @@ def coalesce(*cols):
275275
276276
>>> cDf.select('*', coalesce(cDf["a"], lit(0.0))).show()
277277
+----+----+---------------+
278-
| a| b|Coalesce(a,0.0)|
278+
| a| b|coalesce(a,0.0)|
279279
+----+----+---------------+
280280
|null|null| 0.0|
281281
| 1|null| 1.0|

python/pyspark/sql/group.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -75,11 +75,11 @@ def agg(self, *exprs):
7575
7676
>>> gdf = df.groupBy(df.name)
7777
>>> gdf.agg({"*": "count"}).collect()
78-
[Row(name=u'Alice', COUNT(1)=1), Row(name=u'Bob', COUNT(1)=1)]
78+
[Row(name=u'Alice', count(1)=1), Row(name=u'Bob', count(1)=1)]
7979
8080
>>> from pyspark.sql import functions as F
8181
>>> gdf.agg(F.min(df.age)).collect()
82-
[Row(name=u'Alice', MIN(age)=2), Row(name=u'Bob', MIN(age)=5)]
82+
[Row(name=u'Alice', min(age)=2), Row(name=u'Bob', min(age)=5)]
8383
"""
8484
assert exprs, "exprs should not be empty"
8585
if len(exprs) == 1 and isinstance(exprs[0], dict):
@@ -110,9 +110,9 @@ def mean(self, *cols):
110110
:param cols: list of column names (string). Non-numeric columns are ignored.
111111
112112
>>> df.groupBy().mean('age').collect()
113-
[Row(AVG(age)=3.5)]
113+
[Row(avg(age)=3.5)]
114114
>>> df3.groupBy().mean('age', 'height').collect()
115-
[Row(AVG(age)=3.5, AVG(height)=82.5)]
115+
[Row(avg(age)=3.5, avg(height)=82.5)]
116116
"""
117117

118118
@df_varargs_api
@@ -125,9 +125,9 @@ def avg(self, *cols):
125125
:param cols: list of column names (string). Non-numeric columns are ignored.
126126
127127
>>> df.groupBy().avg('age').collect()
128-
[Row(AVG(age)=3.5)]
128+
[Row(avg(age)=3.5)]
129129
>>> df3.groupBy().avg('age', 'height').collect()
130-
[Row(AVG(age)=3.5, AVG(height)=82.5)]
130+
[Row(avg(age)=3.5, avg(height)=82.5)]
131131
"""
132132

133133
@df_varargs_api
@@ -136,9 +136,9 @@ def max(self, *cols):
136136
"""Computes the max value for each numeric columns for each group.
137137
138138
>>> df.groupBy().max('age').collect()
139-
[Row(MAX(age)=5)]
139+
[Row(max(age)=5)]
140140
>>> df3.groupBy().max('age', 'height').collect()
141-
[Row(MAX(age)=5, MAX(height)=85)]
141+
[Row(max(age)=5, max(height)=85)]
142142
"""
143143

144144
@df_varargs_api
@@ -149,9 +149,9 @@ def min(self, *cols):
149149
:param cols: list of column names (string). Non-numeric columns are ignored.
150150
151151
>>> df.groupBy().min('age').collect()
152-
[Row(MIN(age)=2)]
152+
[Row(min(age)=2)]
153153
>>> df3.groupBy().min('age', 'height').collect()
154-
[Row(MIN(age)=2, MIN(height)=80)]
154+
[Row(min(age)=2, min(height)=80)]
155155
"""
156156

157157
@df_varargs_api
@@ -162,9 +162,9 @@ def sum(self, *cols):
162162
:param cols: list of column names (string). Non-numeric columns are ignored.
163163
164164
>>> df.groupBy().sum('age').collect()
165-
[Row(SUM(age)=7)]
165+
[Row(sum(age)=7)]
166166
>>> df3.groupBy().sum('age', 'height').collect()
167-
[Row(SUM(age)=7, SUM(height)=165)]
167+
[Row(sum(age)=7, sum(height)=165)]
168168
"""
169169

170170

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,6 @@ case class Min(child: Expression) extends PartialAggregate with trees.UnaryNode[
9494

9595
override def nullable: Boolean = true
9696
override def dataType: DataType = child.dataType
97-
override def toString: String = s"MIN($child)"
9897

9998
override def asPartial: SplitEvaluation = {
10099
val partialMin = Alias(Min(child), "PartialMin")()
@@ -388,6 +387,8 @@ case class ApproxCountDistinct(child: Expression, relativeSD: Double = 0.05)
388387

389388
case class Average(child: Expression) extends PartialAggregate with trees.UnaryNode[Expression] {
390389

390+
override def prettyName: String = "avg"
391+
391392
override def nullable: Boolean = true
392393

393394
override def dataType: DataType = child.dataType match {

0 commit comments

Comments
 (0)