apache
diff --git a/‎examples/src/main/scala/org/apache/spark/examples/sql/RDDRelation.scala‎
Lines changed: 0 additions & 11 deletions b/‎examples/src/main/scala/org/apache/spark/examples/sql/RDDRelation.scala‎
Lines changed: 0 additions & 11 deletions
diff --git a/‎examples/src/main/scala/org/apache/spark/examples/sql/hive/HiveFromSpark.scala‎
Lines changed: 10 additions & 0 deletions b/‎examples/src/main/scala/org/apache/spark/examples/sql/hive/HiveFromSpark.scala‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎sql/core/pom.xml‎
Lines changed: 0 additions & 15 deletions b/‎sql/core/pom.xml‎
Lines changed: 0 additions & 15 deletions
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala‎
Lines changed: 0 additions & 7 deletions b/‎sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala‎
Lines changed: 0 additions & 7 deletions
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala‎
Lines changed: 1 addition & 45 deletions b/‎sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala‎
Lines changed: 1 addition & 45 deletions
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/SchemaRDDLike.scala‎
Lines changed: 2 additions & 2 deletions b/‎sql/core/src/main/scala/org/apache/spark/sql/SchemaRDDLike.scala‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala‎
Lines changed: 0 additions & 22 deletions b/‎sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala‎
Lines changed: 0 additions & 22 deletions
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableOperations.scala‎
Lines changed: 31 additions & 9 deletions b/‎sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableOperations.scala‎
Lines changed: 31 additions & 9 deletions
@@ -59,27 +59,16 @@ object RDDRelation {
     // Write out an RDD as a parquet file.
     rdd.saveAsParquetFile("pair.parquet")
 
-    // Write out an RDD as a orc file.
-    rdd.saveAsOrcFile("pair.orc")
-
-
     // Read in parquet file.  Parquet files are self-describing so the schmema is preserved.
     val parquetFile = sqlContext.parquetFile("pair.parquet")
 
-    // Read in orc file.  orc files are self-describing so the schmema is preserved.
-    val orcFile = sqlContext.orcFile("pair.orc")
-
-
     // Queries can be run using the DSL on parequet files just like the original RDD.
     parquetFile.where('key === 1).select('value as 'a).collect().foreach(println)
 
     // These files can also be registered as tables.
     parquetFile.registerTempTable("parquetFile")
     sql("SELECT * FROM parquetFile").collect().foreach(println)
 
-    orcFile.registerTempTable("orcFile")
-    sql("SELECT * FROM orcFile").collect().foreach(println)
-
     sc.stop()
   }
 }
@@ -62,6 +62,16 @@ object HiveFromSpark {
     println("Result of SELECT *:")
     sql("SELECT * FROM records r JOIN src s ON r.key = s.key").collect().foreach(println)
 
+    // Write out an RDD as a orc file.
+    rdd.saveAsOrcFile("pair.orc")
+
+    // Read in orc file. Orc files are self-describing so the schmema is preserved.
+    val orcFile = hiveContext.orcFile("pair.orc")
+
+    // These files can also be registered as tables.
+    orcFile.registerTempTable("orcFile")
+    sql("SELECT * FROM records r JOIN orcFile s ON r.key = s.key").collect().foreach(println)
+
     sc.stop()
   }
 }
@@ -53,21 +53,6 @@
       <type>test-jar</type>
       <scope>test</scope>
     </dependency>
-    <dependency>
-       <groupId>org.spark-project.hive</groupId>
-       <artifactId>hive-exec</artifactId>
-       <version>${hive.version}</version>
-       <exclusions>
-        <exclusion>
-          <groupId>commons-logging</groupId>
-          <artifactId>commons-logging</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>org.slf4j</groupId>
-          <artifactId>slf4j-api</artifactId>
-        </exclusion>
-       </exclusions>
-    </dependency>
     <dependency>
       <groupId>com.twitter</groupId>
       <artifactId>parquet-column</artifactId>
 
@@ -35,7 +35,6 @@ private[spark] object SQLConf {
   val PARQUET_BINARY_AS_STRING = "spark.sql.parquet.binaryAsString"
   val PARQUET_CACHE_METADATA = "spark.sql.parquet.cacheMetadata"
   val PARQUET_COMPRESSION = "spark.sql.parquet.compression.codec"
-  val ORC_COMPRESSION = "spark.sql.orc.compression.codec"
 
   // This is only used for the thriftserver
   val THRIFTSERVER_POOL = "spark.sql.thriftserver.scheduler.pool"
@@ -84,12 +83,6 @@ private[sql] trait SQLConf {
   /** The compression codec for writing to a Parquetfile */
   private[spark] def parquetCompressionCodec: String = getConf(PARQUET_COMPRESSION, "snappy")
 
-  /** The compression codec for writing to a Orcfile
-   *  Note: only support zlib now since we use ```OrcOutputFormat.getRecordWriter``` ,which is not
-   *  allowed to configure thr compression kind
-   */
-  private[spark] def orcCompressionCodec: String = getConf(ORC_COMPRESSION, "zlib")
-
   /** The number of rows that will be  */
   private[spark] def columnBatchSize: Int = getConf(COLUMN_BATCH_SIZE, "1000").toInt
 
 
@@ -36,8 +36,7 @@ import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.SparkStrategies
 import org.apache.spark.sql.json._
 import org.apache.spark.sql.parquet.ParquetRelation
-import org.apache.spark.{Logging, SparkContext}
-import org.apache.spark.sql.orc.OrcRelation
+import org.apache.spark.SparkContext
 
 /**
  * :: AlphaComponent ::
@@ -148,14 +147,6 @@ class SQLContext(@transient val sparkContext: SparkContext)
   def parquetFile(path: String): SchemaRDD =
     new SchemaRDD(this, parquet.ParquetRelation(path, Some(sparkContext.hadoopConfiguration), this))
 
-  /**
-   * Loads a Orc file, returning the result as a [[SchemaRDD]].
-   *
-   * @group userf
-   */
-  def orcFile(path: String): SchemaRDD =
-    new SchemaRDD(this, orc.OrcRelation(path, Some(sparkContext.hadoopConfiguration), this))
-
   /**
    * Loads a JSON file (one object per line), returning the result as a [[SchemaRDD]].
    * It goes through the entire dataset once to determine the schema.
@@ -255,40 +246,6 @@ class SQLContext(@transient val sparkContext: SparkContext)
         path, ScalaReflection.attributesFor[A], allowExisting, conf, this))
   }
 
-  /**
-   * :: Experimental ::
-   * Creates an empty parquet file with the schema of class `A`, which can be registered as a table.
-   * This registered table can be used as the target of future `insertInto` operations.
-   *
-   * {{{
-   *   val sqlContext = new SQLContext(...)
-   *   import sqlContext._
-   *
-   *   case class Person(name: String, age: Int)
-   *   createOrcFile[Person]("path/to/file.orc").registerTempTable("people")
-   *   sql("INSERT INTO people SELECT 'michael', 29")
-   * }}}
-   *
-   * @tparam A A case class type that describes the desired schema of the parquet file to be
-   *           created.
-   * @param path The path where the directory containing parquet metadata should be created.
-   *             Data inserted into this table will also be stored at this location.
-   * @param allowExisting When false, an exception will be thrown if this directory already exists.
-   * @param conf A Hadoop configuration object that can be used to specify options to the parquet
-   *             output format.
-   *
-   * @group userf
-   */
-  @Experimental
-  def createOrcFile[A <: Product : TypeTag](
-      path: String,
-      allowExisting: Boolean = true,
-      conf: Configuration = new Configuration()): SchemaRDD = {
-    new SchemaRDD(
-      this,
-      OrcRelation.createEmpty(path, ScalaReflection.attributesFor[A], allowExisting, conf, this))
-  }
-
   /**
    * Registers the given RDD as a temporary table in the catalog.  Temporary tables exist only
    * during the lifetime of this instance of SQLContext.
@@ -334,7 +291,6 @@ class SQLContext(@transient val sparkContext: SparkContext)
       HashJoin ::
       InMemoryScans ::
       ParquetOperations ::
-      OrcOperations::
       BasicOperators ::
       CartesianProduct ::
       BroadcastNestedLoopJoin :: Nil
 
@@ -18,9 +18,8 @@
 package org.apache.spark.sql
 
 import org.apache.spark.annotation.{DeveloperApi, Experimental}
-import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.execution.SparkLogicalPlan
 import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
+import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.execution.LogicalRDD
 
 /**
@@ -81,6 +80,7 @@ private[sql] trait SchemaRDDLike {
    * Saves the contents of this `SchemaRDD` as a orc file, preserving the schema.  Files that
    * are written out using this method can be read back in as a SchemaRDD using the `orcFile`
    * function.
+   * Note: you can only use it in HiveContext
    *
    * @group schema
    */
 
@@ -26,7 +26,6 @@ import org.apache.spark.sql.catalyst.plans.physical._
 import org.apache.spark.sql.catalyst.types._
 import org.apache.spark.sql.columnar.{InMemoryRelation, InMemoryColumnarTableScan}
 import org.apache.spark.sql.parquet._
-import org.apache.spark.sql.orc.{OrcTableScan, InsertIntoOrcTable, OrcRelation}
 
 private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
   self: SQLContext#SparkPlanner =>
@@ -238,27 +237,6 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
     }
   }
 
-  object OrcOperations extends Strategy {
-    def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
-      case logical.WriteToOrcFile(path, child) =>
-        val relation =
-          OrcRelation.create(path, child, sparkContext.hadoopConfiguration, sqlContext)
-        InsertIntoOrcTable(relation, planLater(child), overwrite=true) :: Nil
-      case logical.InsertIntoOrcTable(table: OrcRelation, partition, child, overwrite) =>
-        InsertIntoOrcTable(table, planLater(child), overwrite) :: Nil
-      case PhysicalOperation(projectList, filters, relation: OrcRelation) =>
-        // TODO: need to implement predict push down.
-        val prunePushedDownFilters = identity[Seq[Expression]] _
-        pruneFilterProject(
-          projectList,
-          filters,
-          prunePushedDownFilters,
-          OrcTableScan(_, relation, None)) :: Nil
-
-      case _ => Nil
-    }
-  }
-
   object InMemoryScans extends Strategy {
     def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
       case PhysicalOperation(projectList, filters, mem: InMemoryRelation) =>
 
@@ -283,7 +283,7 @@ case class InsertIntoParquetTable(
         1
       } else {
         FileSystemHelper
-          .findMaxTaskId(NewFileOutputFormat.getOutputPath(job).toString, job.getConfiguration) + 1
+          .findMaxTaskId(NewFileOutputFormat.getOutputPath(job).toString, job.getConfiguration, "parquet") + 1
       }
 
     def writeShard(context: TaskContext, iter: Iterator[Row]): Int = {
@@ -488,7 +488,7 @@ private[parquet] object FilteringParquetRowInputFormat {
     .build[FileStatus, Array[BlockLocation]]()
 }
 
-private[parquet] object FileSystemHelper {
+private[sql] object FileSystemHelper {
   def listFiles(pathStr: String, conf: Configuration): Seq[Path] = {
     val origPath = new Path(pathStr)
     val fs = origPath.getFileSystem(conf)
@@ -504,19 +504,41 @@ private[parquet] object FileSystemHelper {
     fs.listStatus(path).map(_.getPath)
   }
 
-    /**
-     * Finds the maximum taskid in the output file names at the given path.
-     */
-  def findMaxTaskId(pathStr: String, conf: Configuration): Int = {
+  /**
+   *  List files with special extension
+   */
+  def listFiles(origPath: Path, conf: Configuration, extension: String): Seq[Path] = {
+    val fs = origPath.getFileSystem(conf)
+    if (fs == null) {
+      throw new IllegalArgumentException(
+        s"OrcTableOperations: Path $origPath is incorrectly formatted")
+    }
+    val path = origPath.makeQualified(fs)
+    if (fs.exists(path) && fs.getFileStatus(path).isDir) {
+      fs.listStatus(path).map(_.getPath).filter(p => p.getName.endsWith(extension))
+    } else {
+      Seq.empty
+    }
+  }
+
+  /**
+   * Finds the maximum taskid in the output file names at the given path.
+   */
+  def findMaxTaskId(pathStr: String, conf: Configuration, extension: String): Int = {
     val files = FileSystemHelper.listFiles(pathStr, conf)
-    // filename pattern is part-r-<int>.parquet
-    val nameP = new scala.util.matching.Regex("""part-r-(\d{1,}).parquet""", "taskid")
+    // filename pattern is part-r-<int>.$extension
+    val nameP = extension match {
+      case "parquet" => new scala.util.matching.Regex( """part-r-(\d{1,}).parquet""", "taskid")
+      case "orc" =>  new scala.util.matching.Regex( """part-r-(\d{1,}).orc""", "taskid")
+      case _ =>
+        sys.error(s"ERROR: unsupported extension: $extension")
+    }
     val hiddenFileP = new scala.util.matching.Regex("_.*")
     files.map(_.getName).map {
       case nameP(taskid) => taskid.toInt
       case hiddenFileP() => 0
       case other: String => {
-        sys.error("ERROR: attempting to append to set of Parquet files and found file" +
+        sys.error(s"ERROR: attempting to append to set of $extension files and found file" +
           s"that does not match name pattern: $other")
         0
       }