Skip to content

Commit 9df8683

Browse files
ueshinrxin
authored andcommitted
[SPARK-1938] [SQL] ApproxCountDistinctMergeFunction should return Int value.
`ApproxCountDistinctMergeFunction` should return `Int` value because the `dataType` of `ApproxCountDistinct` is `IntegerType`. Author: Takuya UESHIN <[email protected]> Closes apache#893 from ueshin/issues/SPARK-1938 and squashes the following commits: 3970e88 [Takuya UESHIN] Remove a superfluous line. 5ad7ec1 [Takuya UESHIN] Make dataType for each of CountDistinct, ApproxCountDistinctMerge and ApproxCountDistinct LongType. cbe7c71 [Takuya UESHIN] Revert a change. fc3ac0f [Takuya UESHIN] Fix evaluated value type of ApproxCountDistinctMergeFunction to Int.
1 parent 0682567 commit 9df8683

File tree

1 file changed

+4
-5
lines changed
  • sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions

1 file changed

+4
-5
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,6 @@ abstract class AggregateFunction
8282
override def dataType = base.dataType
8383

8484
def update(input: Row): Unit
85-
override def eval(input: Row): Any
8685

8786
// Do we really need this?
8887
override def newInstance() = makeCopy(productIterator.map { case a: AnyRef => a }.toArray)
@@ -166,7 +165,7 @@ case class CountDistinct(expressions: Seq[Expression]) extends AggregateExpressi
166165
override def children = expressions
167166
override def references = expressions.flatMap(_.references).toSet
168167
override def nullable = false
169-
override def dataType = IntegerType
168+
override def dataType = LongType
170169
override def toString = s"COUNT(DISTINCT ${expressions.mkString(",")})"
171170
override def newInstance() = new CountDistinctFunction(expressions, this)
172171
}
@@ -184,7 +183,7 @@ case class ApproxCountDistinctMerge(child: Expression, relativeSD: Double)
184183
extends AggregateExpression with trees.UnaryNode[Expression] {
185184
override def references = child.references
186185
override def nullable = false
187-
override def dataType = IntegerType
186+
override def dataType = LongType
188187
override def toString = s"APPROXIMATE COUNT(DISTINCT $child)"
189188
override def newInstance() = new ApproxCountDistinctMergeFunction(child, this, relativeSD)
190189
}
@@ -193,7 +192,7 @@ case class ApproxCountDistinct(child: Expression, relativeSD: Double = 0.05)
193192
extends PartialAggregate with trees.UnaryNode[Expression] {
194193
override def references = child.references
195194
override def nullable = false
196-
override def dataType = IntegerType
195+
override def dataType = LongType
197196
override def toString = s"APPROXIMATE COUNT(DISTINCT $child)"
198197

199198
override def asPartial: SplitEvaluation = {
@@ -394,7 +393,7 @@ case class CountDistinctFunction(expr: Seq[Expression], base: AggregateExpressio
394393
}
395394
}
396395

397-
override def eval(input: Row): Any = seen.size
396+
override def eval(input: Row): Any = seen.size.toLong
398397
}
399398

400399
case class FirstFunction(expr: Expression, base: AggregateExpression) extends AggregateFunction {

0 commit comments

Comments
 (0)