-
Notifications
You must be signed in to change notification settings - Fork 28.9k
SPARK-2096 [SQL]: Correctly parse dot notations for accessing an array of structs #2082
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -101,3 +101,41 @@ case class GetField(child: Expression, fieldName: String) extends UnaryExpressio | |
|
|
||
| override def toString = s"$child.$fieldName" | ||
| } | ||
|
|
||
| /** | ||
| * Returns an array containing the value of fieldName | ||
| * for each element in the input array of type struct | ||
| */ | ||
| case class GetArrayField(child: Expression, fieldName: String) extends UnaryExpression { | ||
| type EvaluatedType = Any | ||
|
|
||
| def dataType = field.dataType | ||
| override def nullable = child.nullable || field.nullable | ||
| override def foldable = child.foldable | ||
|
|
||
| protected def arrayType = child.dataType match { | ||
| case ArrayType(s: StructType, _) => s | ||
| case otherType => sys.error(s"GetArrayField is not valid on fields of type $otherType") | ||
| } | ||
|
|
||
| lazy val field = if (arrayType.isInstanceOf[StructType]) { | ||
| arrayType.fields | ||
| .find(_.name == fieldName) | ||
| .getOrElse(sys.error(s"No such field $fieldName in ${child.dataType}")) | ||
| } else null | ||
|
|
||
|
|
||
| lazy val ordinal = arrayType.fields.indexOf(field) | ||
|
|
||
| override lazy val resolved = childrenResolved && child.dataType.isInstanceOf[ArrayType] | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This should also check that the element type of the ArrayType is StructType and that the requested field name can be found in that struct. |
||
|
|
||
| override def eval(input: Row): Any = { | ||
| val value : Seq[Row] = child.eval(input).asInstanceOf[Seq[Row]] | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. remove the space after value. |
||
| val v = value.map{ t => | ||
| if (t == null) null else t(ordinal) | ||
| } | ||
| v | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you can just use as the last line of this
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. = = |
||
| } | ||
|
|
||
| override def toString = s"$child.$fieldName" | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -292,24 +292,29 @@ class JsonSuite extends QueryTest { | |
| sql("select structWithArrayFields.field1[1], structWithArrayFields.field2[3] from jsonTable"), | ||
| (5, null) :: Nil | ||
| ) | ||
| } | ||
|
|
||
| ignore("Complex field and type inferring (Ignored)") { | ||
| val jsonSchemaRDD = jsonRDD(complexFieldAndType) | ||
| jsonSchemaRDD.registerTempTable("jsonTable") | ||
| checkAnswer( | ||
| sql("select arrayOfStruct.field1, arrayOfStruct.field2 from jsonTable"), | ||
| (Seq(true, false, null), Seq("str1", null, null)) :: Nil | ||
| ) | ||
|
|
||
| // Right now, "field1" and "field2" are treated as aliases. We should fix it. | ||
| checkAnswer( | ||
| sql("select arrayOfStruct[0].field1, arrayOfStruct[0].field2 from jsonTable"), | ||
| (true, "str1") :: Nil | ||
| ) | ||
|
|
||
| // Right now, the analyzer cannot resolve arrayOfStruct.field1 and arrayOfStruct.field2. | ||
| // Getting all values of a specific field from an array of structs. | ||
| } | ||
|
|
||
| ignore("Complex field and type inferring (Ignored)") { | ||
| val jsonSchemaRDD = jsonRDD(complexFieldAndType) | ||
| jsonSchemaRDD.registerTempTable("jsonTable") | ||
|
|
||
| // still need add filter??? I am not sure whether this function is necessary. quite complex | ||
| checkAnswer( | ||
| sql("select arrayOfStruct.field1, arrayOfStruct.field2 from jsonTable"), | ||
| (Seq(true, false), Seq("str1", null)) :: Nil | ||
| sql("select arrayOfStruct.field1 from jsonTable where arrayOfStruct.field1 = true"), | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why are you changing the test case since it still cannot work?
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. wang pangzi, someone add field3 in testData arrayOfStruct. So it requires another null.
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I add sql("select arrayOfStruct.field1 from jsonTable where arrayOfStruct.field1 = true") this test case in ignored part. It does not work because I came up with it but did not solve it. Or it makes no sense to solve it. |
||
| (Seq(true)) :: Nil | ||
| ) | ||
|
|
||
| } | ||
|
|
||
| test("Type conflict in primitive field values") { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
indent too much