Skip to content

Commit dcdc864

Browse files
dilipbiswalyhuai
authored andcommitted
[SPARK-12558][SQL] AnalysisException when multiple functions applied in GROUP BY clause
cloud-fan Can you please take a look ? In this case, we are failing during check analysis while validating the aggregation expression. I have added a semanticEquals for HiveGenericUDF to fix this. Please let me know if this is the right way to address this issue. Author: Dilip Biswal <[email protected]> Closes #10520 from dilipbiswal/spark-12558. (cherry picked from commit dc7b387) Signed-off-by: Yin Huai <[email protected]> Conflicts: sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala
1 parent f71e5cc commit dcdc864

File tree

2 files changed

+30
-0
lines changed

2 files changed

+30
-0
lines changed

sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,13 @@ import scala.reflect.ClassTag
2828

2929
import com.esotericsoftware.kryo.Kryo
3030
import com.esotericsoftware.kryo.io.{Input, Output}
31+
import com.google.common.base.Objects
3132

3233
import org.apache.hadoop.conf.Configuration
3334
import org.apache.hadoop.fs.Path
3435
import org.apache.hadoop.hive.ql.exec.{UDF, Utilities}
3536
import org.apache.hadoop.hive.ql.plan.{FileSinkDesc, TableDesc}
37+
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFMacro
3638
import org.apache.hadoop.hive.serde2.ColumnProjectionUtils
3739
import org.apache.hadoop.hive.serde2.avro.{AvroGenericRecordWritable, AvroSerdeUtils}
3840
import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector
@@ -47,6 +49,7 @@ private[hive] object HiveShim {
4749
// scale Hive 0.13 infers for BigDecimals from sources that don't specify them (e.g. UDFs)
4850
val UNLIMITED_DECIMAL_PRECISION = 38
4951
val UNLIMITED_DECIMAL_SCALE = 18
52+
val HIVE_GENERIC_UDF_MACRO_CLS = "org.apache.hadoop.hive.ql.udf.generic.GenericUDFMacro"
5053

5154
/*
5255
* This function in hive-0.13 become private, but we have to do this to walkaround hive bug
@@ -125,6 +128,26 @@ private[hive] object HiveShim {
125128
// for Serialization
126129
def this() = this(null)
127130

131+
override def hashCode(): Int = {
132+
if (functionClassName == HIVE_GENERIC_UDF_MACRO_CLS) {
133+
Objects.hashCode(functionClassName, instance.asInstanceOf[GenericUDFMacro].getBody())
134+
} else {
135+
functionClassName.hashCode()
136+
}
137+
}
138+
139+
override def equals(other: Any): Boolean = other match {
140+
case a: HiveFunctionWrapper if functionClassName == a.functionClassName =>
141+
// In case of udf macro, check to make sure they point to the same underlying UDF
142+
if (functionClassName == HIVE_GENERIC_UDF_MACRO_CLS) {
143+
a.instance.asInstanceOf[GenericUDFMacro].getBody() ==
144+
instance.asInstanceOf[GenericUDFMacro].getBody()
145+
} else {
146+
true
147+
}
148+
case _ => false
149+
}
150+
128151
@transient
129152
def deserializeObjectByKryo[T: ClassTag](
130153
kryo: Kryo,

sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -350,6 +350,13 @@ class HiveUDFSuite extends QueryTest with TestHiveSingleton with SQLTestUtils {
350350
sqlContext.dropTempTable("testUDF")
351351
}
352352

353+
test("Hive UDF in group by") {
354+
Seq(Tuple1(1451400761)).toDF("test_date").registerTempTable("tab1")
355+
val count = sql("select date(cast(test_date as timestamp))" +
356+
" from tab1 group by date(cast(test_date as timestamp))").count()
357+
assert(count == 1)
358+
}
359+
353360
test("SPARK-11522 select input_file_name from non-parquet table"){
354361

355362
withTempDir { tempDir =>

0 commit comments

Comments
 (0)