@@ -140,25 +140,58 @@ case class In(value: Expression, list: Seq[Expression]) extends Predicate {
140140 require(list != null , " list should not be null" )
141141 override def checkInputDataTypes (): TypeCheckResult = {
142142 list match {
143- case ListQuery (sub, _ , _) :: Nil =>
143+ case (l @ ListQuery (sub, children , _) ) :: Nil =>
144144 val valExprs = value match {
145145 case cns : CreateNamedStruct => cns.valExprs
146146 case expr => Seq (expr)
147147 }
148- if (valExprs.length != sub.output.length) {
149- TypeCheckResult .TypeCheckFailure (
148+
149+ // SPARK-21759:
150+ // It is possibly that the subquery plan has more output than value expressions, because
151+ // the condition expressions in `ListQuery` might use part of subquery plan's output.
152+ // For example, in the following query plan, the condition of `ListQuery` uses value#207
153+ // from the subquery query. For now the size of output of subquery is 2, the size of value
154+ // is 1.
155+ //
156+ // Filter key#201 IN (list#200 [(value#207 = min(value)#204)])
157+ // : +- Project [key#206, value#207]
158+ // : +- Filter (value#207 > val_9)
159+
160+ // Take the subset of output which are not going to match with value expressions and also
161+ // not used in condition expressions, if any.
162+ val subqueryOutputNotInCondition = sub.output.drop(valExprs.length).filter { attr =>
163+ l.children.forall { c =>
164+ ! c.references.contains(attr)
165+ }
166+ }
167+
168+ val basicErrorMessage =
169+ s """
170+ |The number of columns in the left hand side of an IN subquery does not match the
171+ |number of columns in the output of subquery.
172+ |#columns in left hand side: ${valExprs.length}.
173+ |#columns in right hand side: ${sub.output.length}.
174+ |Left side columns:
175+ |[ ${valExprs.map(_.sql).mkString(" , " )}].
176+ |Right side columns:
177+ |[ ${sub.output.map(_.sql).mkString(" , " )}].
178+ """ .stripMargin
179+
180+ if (valExprs.length > sub.output.length) {
181+ TypeCheckResult .TypeCheckFailure (basicErrorMessage)
182+ } else if (subqueryOutputNotInCondition.nonEmpty) {
183+ val finalErrorMessage = basicErrorMessage +
150184 s """
151- |The number of columns in the left hand side of an IN subquery does not match the
152- |number of columns in the output of subquery.
153- |#columns in left hand side: ${valExprs.length}.
154- |#columns in right hand side: ${sub.output.length}.
155- |Left side columns:
156- |[ ${valExprs.map(_.sql).mkString(" , " )}].
157- |Right side columns:
158- |[ ${sub.output.map(_.sql).mkString(" , " )}].
159- """ .stripMargin)
185+ | The additional output in subquery aren't used in the condition of subquery.
186+ | Additional output:
187+ | [ ${subqueryOutputNotInCondition.map(_.sql).mkString(" , " )}].
188+ | Condition:
189+ | [ ${children.map(_.sql).mkString(" , " )}].
190+ | ${l.references}
191+ """ .stripMargin
192+ TypeCheckResult .TypeCheckFailure (finalErrorMessage)
160193 } else {
161- val mismatchedColumns = valExprs.zip(sub.output).flatMap {
194+ val mismatchedColumns = valExprs.zip(sub.output.take(valExprs.length) ).flatMap {
162195 case (l, r) if l.dataType != r.dataType =>
163196 s " ( ${l.sql}: ${l.dataType.catalogString}, ${r.sql}: ${r.dataType.catalogString}) "
164197 case _ => None
0 commit comments