From 5cae64b0da57a3f45b54bcc39c18463d3945a934 Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Sun, 31 Dec 2017 19:27:00 +0900 Subject: [PATCH] Support for array as an option in SQL --- .../spark/sql/catalyst/parser/SqlBase.g4 | 9 ++++++ .../spark/sql/execution/SparkSqlParser.scala | 25 +++++++++++++++-- .../apache/spark/sql/sources/interfaces.scala | 28 +++++++++++++++++-- .../execution/command/DDLParserSuite.scala | 18 ++++++++---- 4 files changed, 70 insertions(+), 10 deletions(-) diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index 6fe995f650d55..3bb4e45d88ea5 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -295,12 +295,21 @@ tablePropertyKey ; tablePropertyValue + : propertyValue + | propertyArrayValue + ; + +propertyValue : INTEGER_VALUE | DECIMAL_VALUE | booleanValue | STRING ; +propertyArrayValue + : '[' propertyValue (',' propertyValue)* ']' + ; + constantList : '(' constant (',' constant)* ')' ; diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala index 29b584b55972c..f4912772335d4 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala @@ -23,6 +23,8 @@ import scala.collection.JavaConverters._ import org.antlr.v4.runtime.{ParserRuleContext, Token} import org.antlr.v4.runtime.tree.TerminalNode +import org.json4s.JsonDSL._ +import org.json4s.jackson.JsonMethods.{compact, render} import org.apache.spark.sql.SaveMode import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier} @@ -589,10 +591,29 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) { } /** - * A table property value can be String, Integer, Boolean or Decimal. This function extracts - * the property value based on whether its a string, integer, boolean or decimal literal. + * A table property value can be String, Integer, Boolean or Decimal. In addition, an array of + * the values can be set. This function extracts the property value using [[visitPropertyValue]]. */ override def visitTablePropertyValue(value: TablePropertyValueContext): String = { + if (value == null) { + null + } else if (value.propertyValue != null) { + visitPropertyValue(value.propertyValue) + } else if (value.propertyArrayValue != null) { + val values = value.propertyArrayValue.propertyValue.asScala.map { v => + visitPropertyValue(v) + } + compact(render(values)) + } else { + value.getText + } + } + + /** + * This function extracts individual value based on whether its a string, integer, boolean + * or decimal literal. + */ + override def visitPropertyValue(value: PropertyValueContext): String = { if (value == null) { null } else if (value.STRING != null) { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala index 6057a795c8bf5..7fc475d69b5b4 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala @@ -71,7 +71,8 @@ trait RelationProvider { * Returns a new base relation with the given parameters. * * @note The parameters' keywords are case insensitive and this insensitivity is enforced - * by the Map that is passed to the function. + * by the Map that is passed to the function. Also, the value of the Map can be a JSON + * array string if users set an array via option APIs. */ def createRelation(sqlContext: SQLContext, parameters: Map[String, String]): BaseRelation } @@ -102,7 +103,8 @@ trait SchemaRelationProvider { * Returns a new base relation with the given parameters and user defined schema. * * @note The parameters' keywords are case insensitive and this insensitivity is enforced - * by the Map that is passed to the function. + * by the Map that is passed to the function. Also, the value of the Map can be a JSON + * array string if users set an array via option APIs. */ def createRelation( sqlContext: SQLContext, @@ -122,7 +124,12 @@ trait StreamSourceProvider { /** * Returns the name and schema of the source that can be used to continually read data. + * * @since 2.0.0 + * + * @note The parameters' keywords are case insensitive and this insensitivity is enforced + * by the Map that is passed to the function. Also, the value of the Map can be a JSON + * array string if users set an array via option APIs. */ def sourceSchema( sqlContext: SQLContext, @@ -131,7 +138,13 @@ trait StreamSourceProvider { parameters: Map[String, String]): (String, StructType) /** + * Returns a source that can be used to continually read data. + * * @since 2.0.0 + * + * @note The parameters' keywords are case insensitive and this insensitivity is enforced + * by the Map that is passed to the function. Also, the value of the Map can be a JSON + * array string if users set an array via option APIs. */ def createSource( sqlContext: SQLContext, @@ -150,6 +163,13 @@ trait StreamSourceProvider { @Experimental @InterfaceStability.Unstable trait StreamSinkProvider { + /** + * Returns a sink that can be used to continually write data. + * + * @note The parameters' keywords are case insensitive and this insensitivity is enforced + * by the Map that is passed to the function. Also, the value of the Map can be a JSON + * array string if users set an array via option APIs. + */ def createSink( sqlContext: SQLContext, parameters: Map[String, String], @@ -172,6 +192,10 @@ trait CreatableRelationProvider { * @return Relation with a known schema * * @since 1.3.0 + * + * @note The parameters' keywords are case insensitive and this insensitivity is enforced + * by the Map that is passed to the function. Also, the value of the Map can be a JSON + * array string if users set an array via option APIs. */ def createRelation( sqlContext: SQLContext, diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala index eb7c33590b602..97760b5522baa 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala @@ -22,6 +22,9 @@ import java.util.Locale import scala.reflect.{classTag, ClassTag} +import org.json4s.JsonDSL._ +import org.json4s.jackson.JsonMethods.{compact, render} + import org.apache.spark.sql.{AnalysisException, SaveMode} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute @@ -1099,15 +1102,16 @@ class DDLParserSuite extends PlanTest with SharedSQLContext { """ |CREATE DATABASE database_name |LOCATION '/home/user/db' - |WITH DBPROPERTIES ('a'=1, 'b'=0.1, 'c'=TRUE) + |WITH DBPROPERTIES ('a'=1, 'b'=0.1, 'c'=TRUE, 'd'=[1, 0.1, TRUE, 'e']) """.stripMargin val parsed = parser.parsePlan(sql) + val dValue = compact(render(Seq(1.toString, 0.1.toString, true.toString, "e"))) val expected = CreateDatabaseCommand( "database_name", ifNotExists = false, Some("/home/user/db"), None, - Map("a" -> "1", "b" -> "0.1", "c" -> "true")) + Map("a" -> "1", "b" -> "0.1", "c" -> "true", "d" -> dValue)) comparePlans(parsed, expected) } @@ -1116,12 +1120,13 @@ class DDLParserSuite extends PlanTest with SharedSQLContext { val sql = """ |ALTER TABLE table_name - |SET TBLPROPERTIES ('a' = 1, 'b' = 0.1, 'c' = TRUE) + |SET TBLPROPERTIES ('a' = 1, 'b' = 0.1, 'c' = TRUE, 'd'=[1,0.1,TRUE,'e']) """.stripMargin val parsed = parser.parsePlan(sql) + val dValue = compact(render(Seq(1.toString, 0.1.toString, true.toString, "e"))) val expected = AlterTableSetPropertiesCommand( TableIdentifier("table_name"), - Map("a" -> "1", "b" -> "0.1", "c" -> "true"), + Map("a" -> "1", "b" -> "0.1", "c" -> "true", "d" -> dValue), isView = false) comparePlans(parsed, expected) @@ -1131,14 +1136,15 @@ class DDLParserSuite extends PlanTest with SharedSQLContext { val sql = """ |CREATE TABLE table_name USING json - |OPTIONS (a 1, b 0.1, c TRUE) + |OPTIONS (a 1, b 0.1, c TRUE, d [1, 0.1,TRUE, 'e']) """.stripMargin + val dValue = compact(render(Seq(1.toString, 0.1.toString, true.toString, "e"))) val expectedTableDesc = CatalogTable( identifier = TableIdentifier("table_name"), tableType = CatalogTableType.MANAGED, storage = CatalogStorageFormat.empty.copy( - properties = Map("a" -> "1", "b" -> "0.1", "c" -> "true") + properties = Map("a" -> "1", "b" -> "0.1", "c" -> "true", "d" -> dValue) ), schema = new StructType, provider = Some("json")