From 00451281622226dc09d02f6ea454b29e6a7b5999 Mon Sep 17 00:00:00 2001 From: Cheng Lian Date: Mon, 22 Aug 2016 18:29:21 +0800 Subject: [PATCH] Mark Collect as non-deterministic --- .../spark/sql/catalyst/expressions/aggregate/collect.scala | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala index ac2cefaddcf5..896ff61b2309 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala @@ -54,6 +54,10 @@ abstract class Collect extends ImperativeAggregate { override def inputAggBufferAttributes: Seq[AttributeReference] = Nil + // Both `CollectList` and `CollectSet` are non-deterministic since their results depend on the + // actual order of input rows. + override def deterministic: Boolean = false + protected[this] val buffer: Growable[Any] with Iterable[Any] override def initialize(b: MutableRow): Unit = {