apache
diff --git a/‎python/pyspark/sql/readwriter.py‎
Lines changed: 41 additions & 3 deletions b/‎python/pyspark/sql/readwriter.py‎
Lines changed: 41 additions & 3 deletions
diff --git a/‎python/test_support/sql/orc_partitioned/._SUCCESS.crc‎
8 Bytes b/‎python/test_support/sql/orc_partitioned/._SUCCESS.crc‎
8 Bytes
diff --git a/‎python/test_support/sql/orc_partitioned/_SUCCESS‎ b/‎python/test_support/sql/orc_partitioned/_SUCCESS‎
diff --git a/‎python/test_support/sql/orc_partitioned/b=0/c=0/.part-r-00000-829af031-b970-49d6-ad39-30460a0be2c8.orc.crc‎
12 Bytes b/‎python/test_support/sql/orc_partitioned/b=0/c=0/.part-r-00000-829af031-b970-49d6-ad39-30460a0be2c8.orc.crc‎
12 Bytes
diff --git a/‎python/test_support/sql/orc_partitioned/b=0/c=0/part-r-00000-829af031-b970-49d6-ad39-30460a0be2c8.orc‎
168 Bytes b/‎python/test_support/sql/orc_partitioned/b=0/c=0/part-r-00000-829af031-b970-49d6-ad39-30460a0be2c8.orc‎
168 Bytes
diff --git a/‎python/test_support/sql/orc_partitioned/b=1/c=1/.part-r-00000-829af031-b970-49d6-ad39-30460a0be2c8.orc.crc‎
12 Bytes b/‎python/test_support/sql/orc_partitioned/b=1/c=1/.part-r-00000-829af031-b970-49d6-ad39-30460a0be2c8.orc.crc‎
12 Bytes
diff --git a/‎python/test_support/sql/orc_partitioned/b=1/c=1/part-r-00000-829af031-b970-49d6-ad39-30460a0be2c8.orc‎
168 Bytes b/‎python/test_support/sql/orc_partitioned/b=1/c=1/part-r-00000-829af031-b970-49d6-ad39-30460a0be2c8.orc‎
168 Bytes
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala‎
Lines changed: 9 additions & 0 deletions b/‎sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala‎
Lines changed: 12 additions & 0 deletions b/‎sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcHadoopFsRelationSuite.scala‎
Lines changed: 1 addition & 2 deletions b/‎sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcHadoopFsRelationSuite.scala‎
Lines changed: 1 addition & 2 deletions
@@ -146,14 +146,28 @@ def table(self, tableName):
         return self._df(self._jreader.table(tableName))
 
     @since(1.4)
-    def parquet(self, *path):
+    def parquet(self, *paths):
         """Loads a Parquet file, returning the result as a :class:`DataFrame`.
 
         >>> df = sqlContext.read.parquet('python/test_support/sql/parquet_partitioned')
         >>> df.dtypes
         [('name', 'string'), ('year', 'int'), ('month', 'int'), ('day', 'int')]
         """
-        return self._df(self._jreader.parquet(_to_seq(self._sqlContext._sc, path)))
+        return self._df(self._jreader.parquet(_to_seq(self._sqlContext._sc, paths)))
+
+    @since(1.5)
+    def orc(self, path):
+        """
+        Loads an ORC file, returning the result as a :class:`DataFrame`.
+
+        ::Note: Currently ORC support is only available together with
+        :class:`HiveContext`.
+
+        >>> df = hiveContext.read.orc('python/test_support/sql/orc_partitioned')
+        >>> df.dtypes
+        [('a', 'bigint'), ('b', 'int'), ('c', 'int')]
+        """
+        return self._df(self._jreader.orc(path))
 
     @since(1.4)
     def jdbc(self, url, table, column=None, lowerBound=None, upperBound=None, numPartitions=None,
@@ -378,6 +392,29 @@ def parquet(self, path, mode=None, partitionBy=None):
             self.partitionBy(partitionBy)
         self._jwrite.parquet(path)
 
+    def orc(self, path, mode=None, partitionBy=None):
+        """Saves the content of the :class:`DataFrame` in ORC format at the specified path.
+
+        ::Note: Currently ORC support is only available together with
+        :class:`HiveContext`.
+
+        :param path: the path in any Hadoop supported file system
+        :param mode: specifies the behavior of the save operation when data already exists.
+
+            * ``append``: Append contents of this :class:`DataFrame` to existing data.
+            * ``overwrite``: Overwrite existing data.
+            * ``ignore``: Silently ignore this operation if data already exists.
+            * ``error`` (default case): Throw an exception if data already exists.
+        :param partitionBy: names of partitioning columns
+
+        >>> orc_df = hiveContext.read.orc('python/test_support/sql/orc_partitioned')
+        >>> orc_df.write.orc(os.path.join(tempfile.mkdtemp(), 'data'))
+        """
+        self.mode(mode)
+        if partitionBy is not None:
+            self.partitionBy(partitionBy)
+        self._jwrite.orc(path)
+
     @since(1.4)
     def jdbc(self, url, table, mode=None, properties={}):
         """Saves the content of the :class:`DataFrame` to a external database table via JDBC.
@@ -408,7 +445,7 @@ def _test():
     import os
     import tempfile
     from pyspark.context import SparkContext
-    from pyspark.sql import Row, SQLContext
+    from pyspark.sql import Row, SQLContext, HiveContext
     import pyspark.sql.readwriter
 
     os.chdir(os.environ["SPARK_HOME"])
@@ -420,6 +457,7 @@ def _test():
     globs['os'] = os
     globs['sc'] = sc
     globs['sqlContext'] = SQLContext(sc)
+    globs['hiveContext'] = HiveContext(sc)
     globs['df'] = globs['sqlContext'].read.parquet('python/test_support/sql/parquet_partitioned')
 
     (failure_count, test_count) = doctest.testmod(
 
@@ -264,6 +264,15 @@ class DataFrameReader private[sql](sqlContext: SQLContext) extends Logging {
     }
   }
 
+  /**
+   * Loads an ORC file and returns the result as a [[DataFrame]].
+   *
+   * @param path input path
+   * @since 1.5.0
+   * @note Currently, this method can only be used together with `HiveContext`.
+   */
+  def orc(path: String): DataFrame = format("orc").load(path)
+
   /**
    * Returns the specified table as a [[DataFrame]].
    *
 
@@ -280,6 +280,18 @@ final class DataFrameWriter private[sql](df: DataFrame) {
    */
   def parquet(path: String): Unit = format("parquet").save(path)
 
+  /**
+   * Saves the content of the [[DataFrame]] in ORC format at the specified path.
+   * This is equivalent to:
+   * {{{
+   *   format("orc").save(path)
+   * }}}
+   *
+   * @since 1.5.0
+   * @note Currently, this method can only be used together with `HiveContext`.
+   */
+  def orc(path: String): Unit = format("orc").save(path)
+
   ///////////////////////////////////////////////////////////////////////////////////////
   // Builder pattern config options
   ///////////////////////////////////////////////////////////////////////////////////////
 
@@ -41,8 +41,7 @@ class OrcHadoopFsRelationSuite extends HadoopFsRelationTest {
           .parallelize(for (i <- 1 to 3) yield (i, s"val_$i", p1))
           .toDF("a", "b", "p1")
           .write
-          .format("orc")
-          .save(partitionDir.toString)
+          .orc(partitionDir.toString)
       }
 
       val dataSchemaWithPartition =
Original file line number	Diff line number	Diff line change
`@@ -264,6 +264,15 @@ class DataFrameReader private[sql](sqlContext: SQLContext) extends Logging {`
`264`	`264`	`}`
`265`	`265`	`}`
`266`	`266`
	`267`	`+ /**`
	`268`	`+ * Loads an ORC file and returns the result as a [[DataFrame]].`
	`269`	`+ *`
	`270`	`+ * @param path input path`
	`271`	`+ * @since 1.5.0`
	`272`	+ * @note Currently, this method can only be used together with `HiveContext`.
	`273`	`+ */`
	`274`	`+ def orc(path: String): DataFrame = format("orc").load(path)`
	`275`	`+`
`267`	`276`	`/**`
`268`	`277`	`* Returns the specified table as a [[DataFrame]].`
`269`	`278`	`*`
Original file line number	Diff line number	Diff line change
`@@ -41,8 +41,7 @@ class OrcHadoopFsRelationSuite extends HadoopFsRelationTest {`
`41`	`41`	`.parallelize(for (i <- 1 to 3) yield (i, s"val_$i", p1))`
`42`	`42`	`.toDF("a", "b", "p1")`
`43`	`43`	`.write`
`44`		`- .format("orc")`
`45`		`- .save(partitionDir.toString)`
	`44`	`+ .orc(partitionDir.toString)`
`46`	`45`	`}`
`47`	`46`
`48`	`47`	`val dataSchemaWithPartition =`