From 50f74d26ec1bc37c5f5bea054da60a1910778e46 Mon Sep 17 00:00:00 2001
From: Xiao Li <gatorsmile@gmail.com>
Date: Sun, 5 Mar 2017 20:18:20 -0800
Subject: [PATCH 1/4] add parseTableSchema API

---
 .../sql/catalyst/parser/AstBuilder.scala      |  2 +-
 .../sql/catalyst/parser/ParseDriver.scala     |  7 +-
 .../sql/catalyst/parser/ParserInterface.scala |  4 +
 .../parser/TableSchemaParserSuite.scala       | 85 +++++++++++++++++++
 4 files changed, 96 insertions(+), 2 deletions(-)
 create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableSchemaParserSuite.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index d2e091f4dda6..1c46cca7fdbf 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -1476,7 +1476,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
   /**
    * Create top level table schema.
    */
-  protected def createSchema(ctx: ColTypeListContext): StructType = {
+  def createSchema(ctx: ColTypeListContext): StructType = {
     StructType(Option(ctx).toSeq.flatMap(visitColTypeList))
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
index d687a85c18b6..8fd2ac5c1316 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
@@ -26,7 +26,7 @@ import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.expressions.Expression
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.trees.Origin
-import org.apache.spark.sql.types.DataType
+import org.apache.spark.sql.types.{DataType, StructType}
 
 /**
  * Base SQL parsing infrastructure.
@@ -49,6 +49,11 @@ abstract class AbstractSqlParser extends ParserInterface with Logging {
     astBuilder.visitSingleTableIdentifier(parser.singleTableIdentifier())
   }
 
+  /** Creates StructType for a given SQL string. */
+  override def parseTableSchema(sqlText: String): StructType = parse(sqlText) { parser =>
+    astBuilder.createSchema(parser.colTypeList())
+  }
+
   /** Creates LogicalPlan for a given SQL string. */
   override def parsePlan(sqlText: String): LogicalPlan = parse(sqlText) { parser =>
     astBuilder.visitSingleStatement(parser.singleStatement()) match {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserInterface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserInterface.scala
index 7f35d650b957..b2662d7617fe 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserInterface.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserInterface.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.catalyst.parser
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.expressions.Expression
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.types.StructType
 
 /**
  * Interface for a parser.
@@ -33,4 +34,7 @@ trait ParserInterface {
 
   /** Creates TableIdentifier for a given SQL string. */
   def parseTableIdentifier(sqlText: String): TableIdentifier
+
+  /** Creates StructType for a given SQL string. */
+  def parseTableSchema(sqlText: String): StructType
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableSchemaParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableSchemaParserSuite.scala
new file mode 100644
index 000000000000..1e86796f2dc2
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableSchemaParserSuite.scala
@@ -0,0 +1,85 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*    http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.spark.sql.catalyst.parser
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.types._
+
+class TableSchemaParserSuite extends SparkFunSuite {
+
+  def parse(sql: String): DataType = CatalystSqlParser.parseTableSchema(sql)
+
+  def checkTableSchema(tableSchemaString: String, expectedDataType: DataType): Unit = {
+    test(s"parse ${tableSchemaString.replace("\n", "")}") {
+      assert(parse(tableSchemaString) === expectedDataType)
+    }
+  }
+
+  checkTableSchema("a int", (new StructType).add("a", "int"))
+  checkTableSchema("A int", (new StructType).add("A", "int"))
+  checkTableSchema("a INT", (new StructType).add("a", "int"))
+  checkTableSchema("`!@#$%.^&*()` string", (new StructType).add("!@#$%.^&*()", "string"))
+  checkTableSchema("a int, b long", (new StructType).add("a", "int").add("b", "long"))
+  checkTableSchema("a STRUCT<intType: int, ts:timestamp>",
+    StructType(
+      StructField("a", StructType(
+        StructField("intType", IntegerType) ::
+        StructField("ts", TimestampType) :: Nil)) :: Nil))
+
+  checkTableSchema(
+    "a int comment 'test'",
+    (new StructType).add("a", "int", nullable = true, "test"))
+
+  test("complex hive type") {
+    val tableSchemaString =
+      """
+        |complexStructCol struct<
+        |struct:struct<deciMal:DECimal, anotherDecimal:decimAL(5,2)>,
+        |MAP:Map<timestamp, varchar(10)>,
+        |arrAy:Array<double>,
+        |anotherArray:Array<char(9)>>
+      """.stripMargin.replace("\n", "")
+
+    val builder = new MetadataBuilder
+    builder.putString(HIVE_TYPE_STRING,
+      "struct<struct:struct<deciMal:decimal(10,0),anotherDecimal:decimal(5,2)>," +
+        "MAP:map<timestamp,varchar(10)>,arrAy:array<double>,anotherArray:array<char(9)>>")
+
+    val expectedDataType =
+      StructType(
+        StructField("complexStructCol", StructType(
+          StructField("struct",
+            StructType(
+              StructField("deciMal", DecimalType.USER_DEFAULT) ::
+                StructField("anotherDecimal", DecimalType(5, 2)) :: Nil)) ::
+            StructField("MAP", MapType(TimestampType, StringType)) ::
+            StructField("arrAy", ArrayType(DoubleType)) ::
+            StructField("anotherArray", ArrayType(StringType)) :: Nil),
+          nullable = true,
+          builder.build()) :: Nil)
+
+    assert(parse(tableSchemaString) === expectedDataType)
+  }
+
+  test("illegal col types") {
+    val e = intercept[ParseException] {
+      CatalystSqlParser.parseTableSchema("a INT b long")
+    }.getMessage
+    assert(e.contains("mismatched input 'b' expecting {<EOF>, '(', ',', 'COMMENT'}"))
+  }
+}

From 3ec8483d8d7e8ba4be88041ebba73a2bda922186 Mon Sep 17 00:00:00 2001
From: Xiao Li <gatorsmile@gmail.com>
Date: Mon, 6 Mar 2017 15:37:19 -0800
Subject: [PATCH 2/4] address comments.

---
 .../sql/catalyst/parser/ParseDriver.scala     |  5 +++-
 .../sql/catalyst/parser/ParserInterface.scala |  5 +++-
 .../parser/TableSchemaParserSuite.scala       | 26 +++++++++----------
 3 files changed, 21 insertions(+), 15 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
index 8fd2ac5c1316..d9c0221784cf 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
@@ -49,7 +49,10 @@ abstract class AbstractSqlParser extends ParserInterface with Logging {
     astBuilder.visitSingleTableIdentifier(parser.singleTableIdentifier())
   }
 
-  /** Creates StructType for a given SQL string. */
+  /**
+   * Creates StructType for a given SQL string, which is a comma separated list of field
+   * definitions which will preserve the correct Hive metadata.
+   */
   override def parseTableSchema(sqlText: String): StructType = parse(sqlText) { parser =>
     astBuilder.createSchema(parser.colTypeList())
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserInterface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserInterface.scala
index b2662d7617fe..6edbe253970e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserInterface.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserInterface.scala
@@ -35,6 +35,9 @@ trait ParserInterface {
   /** Creates TableIdentifier for a given SQL string. */
   def parseTableIdentifier(sqlText: String): TableIdentifier
 
-  /** Creates StructType for a given SQL string. */
+  /**
+   * Creates StructType for a given SQL string, which is a comma separated list of field
+   * definitions which will preserve the correct Hive metadata.
+   */
   def parseTableSchema(sqlText: String): StructType
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableSchemaParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableSchemaParserSuite.scala
index 1e86796f2dc2..418519f0dbbb 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableSchemaParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableSchemaParserSuite.scala
@@ -30,20 +30,22 @@ class TableSchemaParserSuite extends SparkFunSuite {
     }
   }
 
-  checkTableSchema("a int", (new StructType).add("a", "int"))
-  checkTableSchema("A int", (new StructType).add("A", "int"))
-  checkTableSchema("a INT", (new StructType).add("a", "int"))
-  checkTableSchema("`!@#$%.^&*()` string", (new StructType).add("!@#$%.^&*()", "string"))
-  checkTableSchema("a int, b long", (new StructType).add("a", "int").add("b", "long"))
+  def intercept(sql: String): Unit =
+    intercept[ParseException](CatalystSqlParser.parseTableSchema(sql))
+
+  checkTableSchema("a int", new StructType().add("a", "int"))
+  checkTableSchema("A int", new StructType().add("A", "int"))
+  checkTableSchema("a INT", new StructType().add("a", "int"))
+  checkTableSchema("`!@#$%.^&*()` string", new StructType().add("!@#$%.^&*()", "string"))
+  checkTableSchema("a int, b long", new StructType().add("a", "int").add("b", "long"))
   checkTableSchema("a STRUCT<intType: int, ts:timestamp>",
     StructType(
       StructField("a", StructType(
         StructField("intType", IntegerType) ::
         StructField("ts", TimestampType) :: Nil)) :: Nil))
-
   checkTableSchema(
     "a int comment 'test'",
-    (new StructType).add("a", "int", nullable = true, "test"))
+    new StructType().add("a", "int", nullable = true, "test"))
 
   test("complex hive type") {
     val tableSchemaString =
@@ -76,10 +78,8 @@ class TableSchemaParserSuite extends SparkFunSuite {
     assert(parse(tableSchemaString) === expectedDataType)
   }
 
-  test("illegal col types") {
-    val e = intercept[ParseException] {
-      CatalystSqlParser.parseTableSchema("a INT b long")
-    }.getMessage
-    assert(e.contains("mismatched input 'b' expecting {<EOF>, '(', ',', 'COMMENT'}"))
-  }
+  // Negative cases
+  intercept("a INT b long")
+  intercept("a INT,, b long")
+  intercept("a INT, b long,,")
 }

From 22b7db8bc013d5dcd23c3ef0f45483c47ea66b98 Mon Sep 17 00:00:00 2001
From: Xiao Li <gatorsmile@gmail.com>
Date: Mon, 6 Mar 2017 15:55:56 -0800
Subject: [PATCH 3/4] address comments.

---
 .../org/apache/spark/sql/catalyst/parser/AstBuilder.scala    | 2 +-
 .../org/apache/spark/sql/catalyst/parser/ParseDriver.scala   | 2 +-
 .../spark/sql/catalyst/parser/TableSchemaParserSuite.scala   | 5 ++++-
 3 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 1c46cca7fdbf..d2e091f4dda6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -1476,7 +1476,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
   /**
    * Create top level table schema.
    */
-  def createSchema(ctx: ColTypeListContext): StructType = {
+  protected def createSchema(ctx: ColTypeListContext): StructType = {
     StructType(Option(ctx).toSeq.flatMap(visitColTypeList))
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
index d9c0221784cf..f704b0998cad 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
@@ -54,7 +54,7 @@ abstract class AbstractSqlParser extends ParserInterface with Logging {
    * definitions which will preserve the correct Hive metadata.
    */
   override def parseTableSchema(sqlText: String): StructType = parse(sqlText) { parser =>
-    astBuilder.createSchema(parser.colTypeList())
+    StructType(astBuilder.visitColTypeList(parser.colTypeList()))
   }
 
   /** Creates LogicalPlan for a given SQL string. */
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableSchemaParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableSchemaParserSuite.scala
index 418519f0dbbb..4a7c3a3e822f 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableSchemaParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableSchemaParserSuite.scala
@@ -22,7 +22,7 @@ import org.apache.spark.sql.types._
 
 class TableSchemaParserSuite extends SparkFunSuite {
 
-  def parse(sql: String): DataType = CatalystSqlParser.parseTableSchema(sql)
+  def parse(sql: String): StructType = CatalystSqlParser.parseTableSchema(sql)
 
   def checkTableSchema(tableSchemaString: String, expectedDataType: DataType): Unit = {
     test(s"parse ${tableSchemaString.replace("\n", "")}") {
@@ -79,7 +79,10 @@ class TableSchemaParserSuite extends SparkFunSuite {
   }
 
   // Negative cases
+  intercept("")
+  intercept("a")
   intercept("a INT b long")
   intercept("a INT,, b long")
   intercept("a INT, b long,,")
+  intercept("a INT, b long, c int,")
 }

From b18ae84c1f0485d929e58d217c1881d037721881 Mon Sep 17 00:00:00 2001
From: Xiao Li <gatorsmile@gmail.com>
Date: Wed, 15 Mar 2017 18:49:17 -0700
Subject: [PATCH 4/4] address comments.

---
 .../catalyst/parser/TableSchemaParserSuite.scala | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableSchemaParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableSchemaParserSuite.scala
index 4a7c3a3e822f..da1041d61708 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableSchemaParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableSchemaParserSuite.scala
@@ -25,12 +25,12 @@ class TableSchemaParserSuite extends SparkFunSuite {
   def parse(sql: String): StructType = CatalystSqlParser.parseTableSchema(sql)
 
   def checkTableSchema(tableSchemaString: String, expectedDataType: DataType): Unit = {
-    test(s"parse ${tableSchemaString.replace("\n", "")}") {
+    test(s"parse $tableSchemaString") {
       assert(parse(tableSchemaString) === expectedDataType)
     }
   }
 
-  def intercept(sql: String): Unit =
+  def assertError(sql: String): Unit =
     intercept[ParseException](CatalystSqlParser.parseTableSchema(sql))
 
   checkTableSchema("a int", new StructType().add("a", "int"))
@@ -79,10 +79,10 @@ class TableSchemaParserSuite extends SparkFunSuite {
   }
 
   // Negative cases
-  intercept("")
-  intercept("a")
-  intercept("a INT b long")
-  intercept("a INT,, b long")
-  intercept("a INT, b long,,")
-  intercept("a INT, b long, c int,")
+  assertError("")
+  assertError("a")
+  assertError("a INT b long")
+  assertError("a INT,, b long")
+  assertError("a INT, b long,,")
+  assertError("a INT, b long, c int,")
 }