Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,9 @@ class SqlParser extends StandardTokenParsers with PackratParsers {
elem("decimal", _.isInstanceOf[lexical.FloatLit]) ^^ (_.chars)

protected lazy val baseExpression: PackratParser[Expression] =
expression ~ "[" ~ expression ~ "]" ~ expression ^^ {
case base ~ _ ~ ordinal ~ _ ~ field => GetField(GetItem(base, ordinal), field.toString)
} |
expression ~ "[" ~ expression <~ "]" ^^ {
case base ~ _ ~ ordinal => GetItem(base, ordinal)
} |
Expand Down Expand Up @@ -373,8 +376,13 @@ class SqlLexical(val keywords: Seq[String]) extends StdLexical {
)

override lazy val token: Parser[Token] = (
identChar ~ rep( identChar | digit ) ^^
{ case first ~ rest => processIdent(first :: rest mkString "") }
identChar ~ rep( identChar | digit ) ^^
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

indent too much

{
case first ~ rest => first match {
case '.' => StringLit(rest mkString "")
case _ => processIdent(first :: rest mkString "")
}
}
| rep1(digit) ~ opt('.' ~> rep(digit)) ^^ {
case i ~ None => NumericLit(i mkString "")
case i ~ Some(d) => FloatLit(i.mkString("") + "." + d.mkString(""))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,3 +101,41 @@ case class GetField(child: Expression, fieldName: String) extends UnaryExpressio

override def toString = s"$child.$fieldName"
}

/**
* Returns an array containing the value of fieldName
* for each element in the input array of type struct
*/
case class GetArrayField(child: Expression, fieldName: String) extends UnaryExpression {
type EvaluatedType = Any

def dataType = field.dataType
override def nullable = child.nullable || field.nullable
override def foldable = child.foldable

protected def arrayType = child.dataType match {
case ArrayType(s: StructType, _) => s
case otherType => sys.error(s"GetArrayField is not valid on fields of type $otherType")
}

lazy val field = if (arrayType.isInstanceOf[StructType]) {
arrayType.fields
.find(_.name == fieldName)
.getOrElse(sys.error(s"No such field $fieldName in ${child.dataType}"))
} else null


lazy val ordinal = arrayType.fields.indexOf(field)

override lazy val resolved = childrenResolved && child.dataType.isInstanceOf[ArrayType]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should also check that the element type of the ArrayType is StructType and that the requested field name can be found in that struct.


override def eval(input: Row): Any = {
val value : Seq[Row] = child.eval(input).asInstanceOf[Seq[Row]]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

remove the space after value.

val v = value.map{ t =>
if (t == null) null else t(ordinal)
}
v
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you can just use

value.map{ t =>
  if (t == null) null else t(ordinal)
}

as the last line of this eval function.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

= =

}

override def toString = s"$child.$fieldName"
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.plans.logical
import org.apache.spark.sql.catalyst.errors.TreeNodeException
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.plans.QueryPlan
import org.apache.spark.sql.catalyst.types.StructType
import org.apache.spark.sql.catalyst.types.{ArrayType, StructType}
import org.apache.spark.sql.catalyst.trees

abstract class LogicalPlan extends QueryPlan[LogicalPlan] {
Expand Down Expand Up @@ -108,6 +108,10 @@ abstract class LogicalPlan extends QueryPlan[LogicalPlan] {
a.dataType match {
case StructType(fields) =>
Some(Alias(nestedFields.foldLeft(a: Expression)(GetField), nestedFields.last)())
case ArrayType(fields, _) => nestedFields.length match {
case 1 => Some(Alias(GetArrayField(a, nestedFields.head), nestedFields.last)())
case _ => None // can't resolve arrayOfStruct.field1._
}
case _ => None // Don't know how to resolve these field references
}
case Seq() => None // No matches.
Expand Down
23 changes: 14 additions & 9 deletions sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -292,24 +292,29 @@ class JsonSuite extends QueryTest {
sql("select structWithArrayFields.field1[1], structWithArrayFields.field2[3] from jsonTable"),
(5, null) :: Nil
)
}

ignore("Complex field and type inferring (Ignored)") {
val jsonSchemaRDD = jsonRDD(complexFieldAndType)
jsonSchemaRDD.registerTempTable("jsonTable")
checkAnswer(
sql("select arrayOfStruct.field1, arrayOfStruct.field2 from jsonTable"),
(Seq(true, false, null), Seq("str1", null, null)) :: Nil
)

// Right now, "field1" and "field2" are treated as aliases. We should fix it.
checkAnswer(
sql("select arrayOfStruct[0].field1, arrayOfStruct[0].field2 from jsonTable"),
(true, "str1") :: Nil
)

// Right now, the analyzer cannot resolve arrayOfStruct.field1 and arrayOfStruct.field2.
// Getting all values of a specific field from an array of structs.
}

ignore("Complex field and type inferring (Ignored)") {
val jsonSchemaRDD = jsonRDD(complexFieldAndType)
jsonSchemaRDD.registerTempTable("jsonTable")

// still need add filter??? I am not sure whether this function is necessary. quite complex
checkAnswer(
sql("select arrayOfStruct.field1, arrayOfStruct.field2 from jsonTable"),
(Seq(true, false), Seq("str1", null)) :: Nil
sql("select arrayOfStruct.field1 from jsonTable where arrayOfStruct.field1 = true"),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why are you changing the test case since it still cannot work?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

wang pangzi, someone add field3 in testData arrayOfStruct. So it requires another null.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I add sql("select arrayOfStruct.field1 from jsonTable where arrayOfStruct.field1 = true") this test case in ignored part. It does not work because I came up with it but did not solve it. Or it makes no sense to solve it.

(Seq(true)) :: Nil
)

}

test("Type conflict in primitive field values") {
Expand Down