From 585548999c81192c3936c9b96bcaeab68cf94c44 Mon Sep 17 00:00:00 2001 From: Sameer Abhyankar Date: Mon, 14 Sep 2015 09:00:39 -0400 Subject: [PATCH 1/4] Add support for NOT NULL modified in column definition --- .../sql/execution/datasources/DDLParser.scala | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DDLParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DDLParser.scala index f7a88b98c0b4..0f5f0a4acb40 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DDLParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DDLParser.scala @@ -59,6 +59,7 @@ class DDLParser(parseQuery: String => LogicalPlan) protected val AS = Keyword("AS") protected val COMMENT = Keyword("COMMENT") protected val REFRESH = Keyword("REFRESH") + protected val NULL = Keyword("NULL") protected lazy val ddl: Parser[LogicalPlan] = createTable | describeTable | refreshTable @@ -173,13 +174,15 @@ class DDLParser(parseQuery: String => LogicalPlan) optionName ~ stringLit ^^ { case k ~ v => (k, v) } protected lazy val column: Parser[StructField] = - ident ~ dataType ~ (COMMENT ~> stringLit).? ^^ { case columnName ~ typ ~ cm => - val meta = cm match { - case Some(comment) => - new MetadataBuilder().putString(COMMENT.str.toLowerCase, comment).build() - case None => Metadata.empty - } - - StructField(columnName, typ, nullable = true, meta) + ident ~ dataType ~ (NOT ~ NULL).? ~ (COMMENT ~> stringLit).? ^^ { + case columnName ~ typ ~ setNotNullable ~ cm => + val meta = cm match { + case Some(comment) => + new MetadataBuilder().putString(COMMENT.str.toLowerCase, comment).build() + case None => Metadata.empty + } + + val isNullable = !setNotNullable.isDefined + StructField(columnName, typ, nullable = isNullable, meta) } } From 108b5ea5c361d3f52f106ad604a443ed546072ba Mon Sep 17 00:00:00 2001 From: Sameer Abhyankar Date: Fri, 2 Oct 2015 16:06:50 -0400 Subject: [PATCH 2/4] Add test case for support of NOT NULL modified in create table statement --- .../spark/sql/sources/DDLTestSuite.scala | 29 ++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala index 5f8514e1a241..234896cf4cc7 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala @@ -17,12 +17,16 @@ package org.apache.spark.sql.sources +import java.io.File + import org.apache.spark.rdd.RDD import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.test.SharedSQLContext import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.UTF8String +import org.apache.spark.util.Utils +import org.scalatest.BeforeAndAfter class DDLScanSource extends RelationProvider { override def createRelation( @@ -69,8 +73,9 @@ case class SimpleDDLScan(from: Int, to: Int, table: String)(@transient val sqlCo } } -class DDLTestSuite extends DataSourceTest with SharedSQLContext { +class DDLTestSuite extends DataSourceTest with SharedSQLContext with BeforeAndAfter { protected override lazy val sql = caseInsensitiveContext.sql _ + private var path: File = null override def beforeAll(): Unit = { super.beforeAll() @@ -84,6 +89,12 @@ class DDLTestSuite extends DataSourceTest with SharedSQLContext { | Table 'test1' |) """.stripMargin) + + path = Utils.createTempDir() + } + + after { + Utils.deleteRecursively(path) } sqlTest( @@ -113,4 +124,20 @@ class DDLTestSuite extends DataSourceTest with SharedSQLContext { assert(attributes.map(_.name) === Seq("col_name", "data_type", "comment")) assert(attributes.map(_.dataType).toSet === Set(StringType)) } + + test("SPARK-7012 Create table statement should support NOT NULL modifier for columns") { + sql( + s""" + |CREATE TEMPORARY TABLE tempTableDDL + |( tCol1 INT NOT NULL, + | tCol2 STRING + |) + |USING parquet + |OPTIONS ( + | path '${path.toString}' + |) + """.stripMargin + ) + caseInsensitiveContext.dropTempTable("tempTableDDL") + } } From 4fb680270d412ea120293e109cb25123cefe4667 Mon Sep 17 00:00:00 2001 From: Sameer Abhyankar Date: Sat, 3 Oct 2015 16:08:58 -0400 Subject: [PATCH 3/4] Modify test case to use withTempPath and changed variable name to notNull --- .../sql/execution/datasources/DDLParser.scala | 4 +- .../spark/sql/sources/DDLTestSuite.scala | 39 ++++++++----------- 2 files changed, 18 insertions(+), 25 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DDLParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DDLParser.scala index 0f5f0a4acb40..eb6822c00752 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DDLParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DDLParser.scala @@ -175,14 +175,14 @@ class DDLParser(parseQuery: String => LogicalPlan) protected lazy val column: Parser[StructField] = ident ~ dataType ~ (NOT ~ NULL).? ~ (COMMENT ~> stringLit).? ^^ { - case columnName ~ typ ~ setNotNullable ~ cm => + case columnName ~ typ ~ notNull ~ cm => val meta = cm match { case Some(comment) => new MetadataBuilder().putString(COMMENT.str.toLowerCase, comment).build() case None => Metadata.empty } - val isNullable = !setNotNullable.isDefined + val isNullable = !notNull.isDefined StructField(columnName, typ, nullable = isNullable, meta) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala index 234896cf4cc7..ca29d4f3fb73 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala @@ -17,7 +17,6 @@ package org.apache.spark.sql.sources -import java.io.File import org.apache.spark.rdd.RDD import org.apache.spark.sql._ @@ -25,8 +24,6 @@ import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.test.SharedSQLContext import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.UTF8String -import org.apache.spark.util.Utils -import org.scalatest.BeforeAndAfter class DDLScanSource extends RelationProvider { override def createRelation( @@ -73,9 +70,8 @@ case class SimpleDDLScan(from: Int, to: Int, table: String)(@transient val sqlCo } } -class DDLTestSuite extends DataSourceTest with SharedSQLContext with BeforeAndAfter { +class DDLTestSuite extends DataSourceTest with SharedSQLContext { protected override lazy val sql = caseInsensitiveContext.sql _ - private var path: File = null override def beforeAll(): Unit = { super.beforeAll() @@ -89,12 +85,6 @@ class DDLTestSuite extends DataSourceTest with SharedSQLContext with BeforeAndAf | Table 'test1' |) """.stripMargin) - - path = Utils.createTempDir() - } - - after { - Utils.deleteRecursively(path) } sqlTest( @@ -126,18 +116,21 @@ class DDLTestSuite extends DataSourceTest with SharedSQLContext with BeforeAndAf } test("SPARK-7012 Create table statement should support NOT NULL modifier for columns") { - sql( + withTempPath { dir => + val path = dir.getCanonicalPath + sql( s""" - |CREATE TEMPORARY TABLE tempTableDDL - |( tCol1 INT NOT NULL, - | tCol2 STRING - |) - |USING parquet - |OPTIONS ( - | path '${path.toString}' - |) - """.stripMargin - ) - caseInsensitiveContext.dropTempTable("tempTableDDL") + |CREATE TEMPORARY TABLE tempTableDDL + |( tCol1 INT NOT NULL, + | tCol2 STRING + |) + |USING parquet + |OPTIONS ( + | path '$path' + |) + """.stripMargin + ) + caseInsensitiveContext.dropTempTable("tempTableDDL") + } } } From f68a053ca5b2ba705ca490a1cf794894b6ffcaae Mon Sep 17 00:00:00 2001 From: Sameer Abhyankar Date: Sat, 3 Oct 2015 17:11:39 -0400 Subject: [PATCH 4/4] Additional cosmetic changes --- .../org/apache/spark/sql/execution/datasources/DDLParser.scala | 2 +- .../test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DDLParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DDLParser.scala index eb6822c00752..7c2cc5475eb9 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DDLParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DDLParser.scala @@ -182,7 +182,7 @@ class DDLParser(parseQuery: String => LogicalPlan) case None => Metadata.empty } - val isNullable = !notNull.isDefined + val isNullable = notNull.isEmpty StructField(columnName, typ, nullable = isNullable, meta) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala index ca29d4f3fb73..213654c7cd2d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala @@ -17,7 +17,6 @@ package org.apache.spark.sql.sources - import org.apache.spark.rdd.RDD import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.InternalRow