Created seperate parser for hql.It pre parses the commands like

ravipesala · ravipesala · commit ba26cd1a5895 · 2014-09-30T09:51:42.000+05:30
cache,uncache,add jar etc.. and then parses with HiveQl
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
@@ -75,6 +75,9 @@ class LocalHiveContext(sc: SparkContext) extends HiveContext(sc) {
  */
 class HiveContext(sc: SparkContext) extends SQLContext(sc) {
   self =>
+    
+  @transient
+  protected[sql] val hiveParser = new HiveSqlParser  
 
   // Change the default SQL dialect to HiveQL
   override private[spark] def dialect: String = getConf(SQLConf.DIALECT, "hiveql")
@@ -95,15 +98,15 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
     if (dialect == "sql") {
       super.sql(sqlText)
     } else if (dialect == "hiveql") {
-      new SchemaRDD(this, HiveQl.parseSql(sqlText))
+      new SchemaRDD(this, hiveParser(sqlText))
     }  else {
       sys.error(s"Unsupported SQL dialect: $dialect.  Try 'sql' or 'hiveql'")
     }
   }
 
   @deprecated("hiveql() is deprecated as the sql function now parses using HiveQL by default. " +
              s"The SQL dialect for parsing can be set using ${SQLConf.DIALECT}", "1.1")
-  def hiveql(hqlQuery: String): SchemaRDD = new SchemaRDD(this, HiveQl.parseSql(hqlQuery))
+  def hiveql(hqlQuery: String): SchemaRDD = new SchemaRDD(this, hiveParser(hqlQuery))
 
   @deprecated("hql() is deprecated as the sql function now parses using HiveQL by default. " +
              s"The SQL dialect for parsing can be set using ${SQLConf.DIALECT}", "1.1")
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSqlParser.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSqlParser.scala
@@ -0,0 +1,138 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive
+
+import scala.language.implicitConversions
+import scala.util.parsing.combinator.syntactical.StandardTokenParsers
+import scala.util.parsing.combinator.PackratParsers
+import scala.util.parsing.input.CharArrayReader.EofCh
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.SqlLexical
+import scala.util.parsing.combinator.lexical.StdLexical
+
+/**
+ * A simple Hive SQL pre parser. It parses the commands like cache,uncache etc and 
+ * remaining actual query will be parsed by HiveQl.parseSql 
+ */
+class HiveSqlParser extends StandardTokenParsers with PackratParsers {  
+  
+   def apply(input: String): LogicalPlan = {
+    // Special-case out set commands since the value fields can be
+    // complex to handle without RegexParsers. Also this approach
+    // is clearer for the several possible cases of set commands.
+    if (input.trim.toLowerCase.startsWith("set")) {
+      input.trim.drop(3).split("=", 2).map(_.trim) match {
+        case Array("") => // "set"
+          SetCommand(None, None)
+        case Array(key) => // "set key"
+          SetCommand(Some(key), None)
+        case Array(key, value) => // "set key=value"
+          SetCommand(Some(key), Some(value))
+      }
+    } else if (input.trim.startsWith("!")) {
+      ShellCommand(input.drop(1))      
+    } else {
+      phrase(query)(new lexical.Scanner(input)) match {
+        case Success(r, x) => r
+        case x => sys.error(x.toString)
+      }
+    }
+  }
+   
+  protected case class Keyword(str: String)
+  
+  protected val CACHE = Keyword("CACHE")
+  protected val SET = Keyword("SET")
+  protected val ADD = Keyword("ADD")
+  protected val JAR = Keyword("JAR")
+  protected val TABLE = Keyword("TABLE")
+  protected val AS = Keyword("AS")
+  protected val UNCACHE = Keyword("UNCACHE")
+  protected val FILE = Keyword("FILE")
+  protected val DFS = Keyword("DFS")
+  protected val SOURCE = Keyword("SOURCE")
+  
+  protected implicit def asParser(k: Keyword): Parser[String] =
+    lexical.allCaseVersions(k.str).map(x => x : Parser[String]).reduce(_ | _)
+    
+  protected def allCaseConverse(k: String): Parser[String] =
+    lexical.allCaseVersions(k).map(x => x : Parser[String]).reduce(_ | _)   
+  
+  protected val reservedWords =
+    this.getClass
+      .getMethods
+      .filter(_.getReturnType == classOf[Keyword])
+      .map(_.invoke(this).asInstanceOf[Keyword].str)
+
+  override val lexical = new SqlLexical(reservedWords)
+  
+  protected lazy val query: Parser[LogicalPlan] = (
+    cache | unCache | addJar | addFile | dfs | source | hiveQl
+  )  
+  
+  protected lazy val hiveQl: Parser[LogicalPlan] =
+    remainingQuery ^^ { 
+      case r => HiveQl.parseSql(r.trim()) 
+    }
+  
+  /** It returns all remaining query */
+  protected lazy val remainingQuery: Parser[String] = new Parser[String] {
+    def apply(in:Input) = Success(in.source.subSequence(in.offset, in.source.length).toString,
+        in.drop(in.source.length()))
+  }  
+  
+  /** It returns all query */
+  protected lazy val allQuery: Parser[String] = new Parser[String] {
+    def apply(in:Input) = Success(in.source.toString,
+        in.drop(in.source.length()))
+  }  
+  
+  protected lazy val cache: Parser[LogicalPlan] =
+    CACHE ~ TABLE ~> ident ~ opt(AS ~> hiveQl) ^^ {
+      case tableName ~ None => CacheCommand(tableName, true)
+      case tableName ~ Some(plan) =>
+        CacheTableAsSelectCommand(tableName, plan)
+    }
+    
+  protected lazy val unCache: Parser[LogicalPlan] =
+    UNCACHE ~ TABLE ~> ident ^^ {
+      case tableName => CacheCommand(tableName, false)
+    }  
+    
+  protected lazy val addJar: Parser[LogicalPlan] =
+    ADD ~ JAR ~> remainingQuery ^^ {
+      case rq => AddJar(rq.trim())
+    }   
+    
+  protected lazy val addFile: Parser[LogicalPlan] =
+    ADD ~ FILE ~> remainingQuery ^^ {
+      case rq => AddFile(rq.trim())
+    }    
+    
+  protected lazy val dfs: Parser[LogicalPlan] =
+    DFS ~> allQuery ^^ {
+      case aq => NativeCommand(aq.trim())
+    }   
+    
+  protected lazy val source: Parser[LogicalPlan] =
+    SOURCE ~> remainingQuery ^^ {
+      case rq => SourceCommand(rq.trim())
+    }
+    
+}
+
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala
@@ -140,7 +140,7 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) {
   val describedTable = "DESCRIBE (\\w+)".r
 
   protected[hive] class HiveQLQueryExecution(hql: String) extends this.QueryExecution {
-    lazy val logical = HiveQl.parseSql(hql)
+    lazy val logical = hiveParser(hql)
     def hiveExec() = runSqlHive(hql)
     override def toString = hql + "\n" + super.toString
   }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/api/java/JavaHiveContext.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/api/java/JavaHiveContext.scala
@@ -34,7 +34,7 @@ class JavaHiveContext(sparkContext: JavaSparkContext) extends JavaSQLContext(spa
     if (sqlContext.dialect == "sql") {
       super.sql(sqlText)
     } else if (sqlContext.dialect == "hiveql") {
-      new JavaSchemaRDD(sqlContext, HiveQl.parseSql(sqlText))
+      new JavaSchemaRDD(sqlContext, sqlContext.hiveParser(sqlText))
     }  else {
       sys.error(s"Unsupported SQL dialect: ${sqlContext.dialect}.  Try 'sql' or 'hiveql'")
     }
@@ -45,5 +45,5 @@ class JavaHiveContext(sparkContext: JavaSparkContext) extends JavaSQLContext(spa
     */
   @Deprecated
   def hql(hqlQuery: String): JavaSchemaRDD =
-    new JavaSchemaRDD(sqlContext, HiveQl.parseSql(hqlQuery))
+    new JavaSchemaRDD(sqlContext, sqlContext.hiveParser(hqlQuery))
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala
@@ -88,4 +88,10 @@ class CachedTableSuite extends HiveComparisonTest {
     }
     assert(!TestHive.isCached("src"), "Table 'src' should not be cached")
   }
+  
+ test("'CACHE TABLE tableName AS SELECT ..'") {
+    TestHive.sql("CACHE TABLE testCacheTable AS SELECT * FROM src")
+    assert(TestHive.isCached("testCacheTable"), "Table 'testCacheTable' should be cached")
+    TestHive.uncacheTable("testCacheTable")
+  }  
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
@@ -34,7 +34,7 @@ class StatisticsSuite extends QueryTest with BeforeAndAfterAll {
 
   test("parse analyze commands") {
     def assertAnalyzeCommand(analyzeCommand: String, c: Class[_]) {
-      val parsed = HiveQl.parseSql(analyzeCommand)
+      val parsed = new HiveSqlParser().apply(analyzeCommand)
       val operators = parsed.collect {
         case a: AnalyzeTable => a
         case o => o

Original file line number	Diff line number	Diff line change
`@@ -140,7 +140,7 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) {`
`140`	`140`	`val describedTable = "DESCRIBE (\\w+)".r`
`141`	`141`
`142`	`142`	`protected[hive] class HiveQLQueryExecution(hql: String) extends this.QueryExecution {`
`143`		`- lazy val logical = HiveQl.parseSql(hql)`
	`143`	`+ lazy val logical = hiveParser(hql)`
`144`	`144`	`def hiveExec() = runSqlHive(hql)`
`145`	`145`	`override def toString = hql + "\n" + super.toString`
`146`	`146`	`}`
Original file line number	Diff line number	Diff line change
`@@ -88,4 +88,10 @@ class CachedTableSuite extends HiveComparisonTest {`
`88`	`88`	`}`
`89`	`89`	`assert(!TestHive.isCached("src"), "Table 'src' should not be cached")`
`90`	`90`	`}`
	`91`	`+`
	`92`	`+ test("'CACHE TABLE tableName AS SELECT ..'") {`
	`93`	`+ TestHive.sql("CACHE TABLE testCacheTable AS SELECT * FROM src")`
	`94`	`+ assert(TestHive.isCached("testCacheTable"), "Table 'testCacheTable' should be cached")`
	`95`	`+ TestHive.uncacheTable("testCacheTable")`
	`96`	`+ }`
`91`	`97`	`}`