From 4471e2a40a1cb8a6815d075fe65a410af08f9083 Mon Sep 17 00:00:00 2001
From: hyukjinkwon <gurwls223@gmail.com>
Date: Sun, 1 May 2016 12:31:20 +0900
Subject: [PATCH 1/6] Add CSV documentation

---
 python/pyspark/sql/readwriter.py              | 49 +++++++++++++++++++
 .../apache/spark/sql/DataFrameReader.scala    | 47 ++++++++++++++++--
 .../apache/spark/sql/DataFrameWriter.scala    |  8 +++
 3 files changed, 100 insertions(+), 4 deletions(-)

diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py
index ed9e716ab78e3..d586638dd010b 100644
--- a/python/pyspark/sql/readwriter.py
+++ b/python/pyspark/sql/readwriter.py
@@ -282,6 +282,43 @@ def csv(self, paths):
 
         :param paths: string, or list of strings, for input path(s).
 
+        You can set the following CSV-specific options to deal with CSV files:
+            * ``sep`` or ``delimiter`` (default ``,``): sets the single character as a delimiter \
+                for each field and value.
+            * ``quote`` (default ``"``): sets the single character used for escaping \
+                quoted values where the delimiter can be part of the value.
+            * ``escape`` (default ``\``): sets the single character used for escaping quotes \
+                inside an already quoted value.
+            * ``comment`` (default ````): sets the single character used for skipping lines \
+                beginning with this character. By default, it is disabled.
+            * ``header`` (default ``false``): uses the first line as names of columns.
+            * ``ignoreLeadingWhiteSpace`` (default ``false``): defines whether or not leading \
+                whitespaces from values being read should be skipped.
+            * ``ignoreTrailingWhiteSpace`` (default ``false``): defines whether or not trailing \
+                whitespaces from values being read should be skipped.
+            * ``nullValue`` (default ````): sets the string representation of a null value.
+            * ``nanValue`` (default ``NaN``): sets the string representation of a non-number \
+                value.
+            * ``positiveInf`` (default ``Inf``): sets the string representation of a positive \
+                infinity value.
+            * ``negativeInf`` (default ``-Inf``): sets the string representation of a negative \
+                infinity value.
+            * ``dateFormat`` (default ``null``): sets the string that indicates a date format. \
+                Custom date formats follow the formats at ``java.text.SimpleDateFormat``. This \
+                applies to both date type and timestamp type By default, it is `null` which means \
+                trying to parse times and date by ``java.sql.Timestamp.valueOf()`` and \
+                ``java.sql.Date.valueOf()``.
+            * ``maxColumns`` (default ``20480``): defines a hard limit of how many columns \
+                a record can have.
+            * ``maxCharsPerColumn`` (default ``1000000``): defines the maximum number of \
+                characters allowed for any given value being read.
+            * ``mode`` (default ``PERMISSIVE``): allows a mode for dealing with corrupt records \
+                during parsing.
+                * ``PERMISSIVE`` : sets other fields to `null` when it meets a corrupted record. \
+                    When a schema is set by user, it sets `null` for extra fields.
+                * ``DROPMALFORMED`` : ignores the whole corrupted records.
+                * ``FAILFAST`` : throws an exception when it meets corrupted records.
+
         >>> df = sqlContext.read.csv('python/test_support/sql/ages.csv')
         >>> df.dtypes
         [('C0', 'string'), ('C1', 'string')]
@@ -663,6 +700,18 @@ def csv(self, path, mode=None, compression=None):
                             known case-insensitive shorten names (none, bzip2, gzip, lz4,
                             snappy and deflate).
 
+        You can set the following CSV-specific options to deal with CSV files:
+            * ``sep`` or ``delimiter`` (default ``,``): sets the single character as a delimiter \
+                for each field and value.
+            * ``encoding`` or ``charset`` (default ``UTF-8``): decodes the CSV files by the given \
+                encoding type.
+            * ``quote`` (default ``"``): sets the single character used for escaping \
+                quoted values where the delimiter can be part of the value.
+            * ``escape`` (default ``\``): sets the single character used for escaping quotes \
+                inside an already quoted value.
+            * ``header`` (default ``false``): writes the names of columns as the first line.
+            * ``nullValue`` (default ````): sets the string representation of a null value.
+
         >>> df.write.csv(os.path.join(tempfile.mkdtemp(), 'data'))
         """
         self.mode(mode)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
index 3d43f2022f669..1e1dae1b8cd1f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
@@ -290,7 +290,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
    * <li>`allowNumericLeadingZeros` (default `false`): allows leading zeros in numbers
    * (e.g. 00012)</li>
    * <li>`mode` (default `PERMISSIVE`): allows a mode for dealing with corrupt records
-   * during parsing.<li>
+   * during parsing.</li>
    * <ul>
    *  <li>`PERMISSIVE` : sets other fields to `null` when it meets a corrupted record, and puts the
    *  malformed string into a new field configured by `columnNameOfCorruptRecord`. When
@@ -300,7 +300,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
    * </ul>
    * <li>`columnNameOfCorruptRecord` (default `_corrupt_record`): allows renaming the new field
    * having malformed string created by `PERMISSIVE` mode. This overrides
-   * `spark.sql.columnNameOfCorruptRecord`.<li>
+   * `spark.sql.columnNameOfCorruptRecord`.</li>
    *
    * @since 1.4.0
    */
@@ -326,7 +326,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
    * <li>`allowBackslashEscapingAnyCharacter` (default `false`): allows accepting quoting of all
    * character using backslash quoting mechanism</li>
    * <li>`mode` (default `PERMISSIVE`): allows a mode for dealing with corrupt records
-   * during parsing.<li>
+   * during parsing.</li>
    * <ul>
    *  <li>`PERMISSIVE` : sets other fields to `null` when it meets a corrupted record, and puts the
    *  malformed string into a new field configured by `columnNameOfCorruptRecord`. When
@@ -336,7 +336,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
    * </ul>
    * <li>`columnNameOfCorruptRecord` (default `_corrupt_record`): allows renaming the new field
    * having malformed string created by `PERMISSIVE` mode. This overrides
-   * `spark.sql.columnNameOfCorruptRecord`.<li>
+   * `spark.sql.columnNameOfCorruptRecord`.</li>
    *
    * @since 1.6.0
    */
@@ -393,6 +393,45 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
    * This function goes through the input once to determine the input schema. To avoid going
    * through the entire data once, specify the schema explicitly using [[schema]].
    *
+   * You can set the following CSV-specific options to deal with CSV files:
+   * <li>`sep` or `delimiter` (default `,`): sets the single character as a delimiter for each
+   * field and value.</li>
+   * <li>`encoding` or `charset` (default `UTF-8`): decodes the CSV files by the given encoding
+   * type.</li>
+   * <li>`quote` (default `"`): sets the single character used for escaping quoted values where
+   * the delimiter can be part of the value.</li>
+   * <li>`escape` (default `\`): sets the single character used for escaping quotes inside
+   * an already quoted value.</li>
+   * <li>`comment` (default ``): sets the single character used for skipping lines beginning
+   * with this character. By default, it is disabled.</li>
+   * <li>`header` (default `false`): uses the first line as names of columns.</li>
+   * <li>`ignoreLeadingWhiteSpace` (default `false`): defines whether or not leading whitespaces
+   * from values being read should be skipped.</li>
+   * <li>`ignoreTrailingWhiteSpace` (default `fDataFraalse`): defines whether or not trailing
+   * whitespaces from values being read should be skipped.</li>
+   * <li>`nullValue` (default ``): sets the string representation of a null value.</li>
+   * <li>`nanValue` (default `NaN`): sets the string representation of a non-number" value.</li>
+   * <li>`positiveInf` (default `Inf`): sets the string representation of a positive infinity
+   * value.</li>
+   * <li>`negativeInf` (default `-Inf`): sets the string representation of a negative infinity
+   * value.</li>
+   * <li>`dateFormat` (default `null`): sets the string that indicates a date format. Custom date
+   * formats follow the formats at `java.text.SimpleDateFormat`. This applies to both date type
+   * and timestamp type By default, it is `null` which means trying to parse times and date by
+   * `java.sql.Timestamp.valueOf()` and `java.sql.Date.valueOf()`.</li>
+   * <li>`maxColumns` (default `20480`): defines a hard limit of how many columns
+   * a record can have.</li>
+   * <li>`maxCharsPerColumn` (default `1000000`): defines the maximum number of characters allowed
+   * for any given value being read.</li>
+   * <li>`mode` (default `PERMISSIVE`): allows a mode for dealing with corrupt records
+   *    during parsing.</li>
+   * <ul>
+   *   <li>`PERMISSIVE` : sets other fields to `null` when it meets a corrupted record. When
+   *     a schema is set by user, it sets `null` for extra fields.</li>
+   *   <li>`DROPMALFORMED` : ignores the whole corrupted records.</li>
+   *   <li>`FAILFAST` : throws an exception when it meets corrupted records.</li>
+   * </ul>
+   *
    * @since 2.0.0
    */
   @scala.annotation.varargs
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
index 28f5ccd26bc52..dac4aa73c163c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
@@ -606,6 +606,14 @@ final class DataFrameWriter private[sql](df: DataFrame) {
    * }}}
    *
    * You can set the following CSV-specific option(s) for writing CSV files:
+   * <li>`sep` or `delimiter` (default `,`): sets the single character as a delimiter for each
+   * field and value.</li>
+   * <li>`quote` (default `"`): sets the single character used for escaping quoted values where
+   * the delimiter can be part of the value.</li>
+   * <li>`escape` (default `\`): sets the single character used for escaping quotes inside
+   * an already quoted value.</li>
+   * <li>`header` (default `false`): writes the names of columns as the first line.</li>
+   * <li>`nullValue` (default ``): sets the string representation of a null value.</li>
    * <li>`compression` (default `null`): compression codec to use when saving to file. This can be
    * one of the known case-insensitive shorten names (`none`, `bzip2`, `gzip`, `lz4`,
    * `snappy` and `deflate`). </li>

From 34b52fa8ae942d5f84049fa7c788a761b459d973 Mon Sep 17 00:00:00 2001
From: hyukjinkwon <gurwls223@gmail.com>
Date: Sun, 1 May 2016 13:08:00 +0900
Subject: [PATCH 2/6] Replace `` to "empty string".

---
 python/pyspark/sql/readwriter.py                          | 8 ++++----
 .../main/scala/org/apache/spark/sql/DataFrameReader.scala | 6 +++---
 .../main/scala/org/apache/spark/sql/DataFrameWriter.scala | 4 ++--
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py
index d586638dd010b..8c0d23e563015 100644
--- a/python/pyspark/sql/readwriter.py
+++ b/python/pyspark/sql/readwriter.py
@@ -289,14 +289,14 @@ def csv(self, paths):
                 quoted values where the delimiter can be part of the value.
             * ``escape`` (default ``\``): sets the single character used for escaping quotes \
                 inside an already quoted value.
-            * ``comment`` (default ````): sets the single character used for skipping lines \
-                beginning with this character. By default, it is disabled.
+            * ``comment`` (default empty string): sets the single character used for skipping \
+                lines beginning with this character. By default, it is disabled.
             * ``header`` (default ``false``): uses the first line as names of columns.
             * ``ignoreLeadingWhiteSpace`` (default ``false``): defines whether or not leading \
                 whitespaces from values being read should be skipped.
             * ``ignoreTrailingWhiteSpace`` (default ``false``): defines whether or not trailing \
                 whitespaces from values being read should be skipped.
-            * ``nullValue`` (default ````): sets the string representation of a null value.
+            * ``nullValue`` (default empty string): sets the string representation of a null value.
             * ``nanValue`` (default ``NaN``): sets the string representation of a non-number \
                 value.
             * ``positiveInf`` (default ``Inf``): sets the string representation of a positive \
@@ -710,7 +710,7 @@ def csv(self, path, mode=None, compression=None):
             * ``escape`` (default ``\``): sets the single character used for escaping quotes \
                 inside an already quoted value.
             * ``header`` (default ``false``): writes the names of columns as the first line.
-            * ``nullValue`` (default ````): sets the string representation of a null value.
+            * ``nullValue`` (default empty string): sets the string representation of a null value.
 
         >>> df.write.csv(os.path.join(tempfile.mkdtemp(), 'data'))
         """
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
index 1e1dae1b8cd1f..d75cd10b1e696 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
@@ -402,14 +402,14 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
    * the delimiter can be part of the value.</li>
    * <li>`escape` (default `\`): sets the single character used for escaping quotes inside
    * an already quoted value.</li>
-   * <li>`comment` (default ``): sets the single character used for skipping lines beginning
+   * <li>`comment` (default empty string): sets the single character used for skipping lines beginning
    * with this character. By default, it is disabled.</li>
-   * <li>`header` (default `false`): uses the first line as names of columns.</li>
+   * <li>header` (default `false`): uses the first line as names of columns.</li>
    * <li>`ignoreLeadingWhiteSpace` (default `false`): defines whether or not leading whitespaces
    * from values being read should be skipped.</li>
    * <li>`ignoreTrailingWhiteSpace` (default `fDataFraalse`): defines whether or not trailing
    * whitespaces from values being read should be skipped.</li>
-   * <li>`nullValue` (default ``): sets the string representation of a null value.</li>
+   * <li>`nullValue` (default empty string): sets the string representation of a null value.</li>
    * <li>`nanValue` (default `NaN`): sets the string representation of a non-number" value.</li>
    * <li>`positiveInf` (default `Inf`): sets the string representation of a positive infinity
    * value.</li>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
index dac4aa73c163c..ff81d7832f387 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
@@ -613,8 +613,8 @@ final class DataFrameWriter private[sql](df: DataFrame) {
    * <li>`escape` (default `\`): sets the single character used for escaping quotes inside
    * an already quoted value.</li>
    * <li>`header` (default `false`): writes the names of columns as the first line.</li>
-   * <li>`nullValue` (default ``): sets the string representation of a null value.</li>
-   * <li>`compression` (default `null`): compression codec to use when saving to file. This can be
+   * <li>`nullValue` (default empty string): sets the string representation of a null value.</li>
+   * <li>compression` (default `null`): compression codec to use when saving to file. This can be
    * one of the known case-insensitive shorten names (`none`, `bzip2`, `gzip`, `lz4`,
    * `snappy` and `deflate`). </li>
    *

From b9aeac1b592c6f7af6658fea5550632f0deda79c Mon Sep 17 00:00:00 2001
From: hyukjinkwon <gurwls223@gmail.com>
Date: Sun, 1 May 2016 13:14:17 +0900
Subject: [PATCH 3/6] Add omitted opening tag ` and max length

---
 .../main/scala/org/apache/spark/sql/DataFrameReader.scala   | 6 +++---
 .../main/scala/org/apache/spark/sql/DataFrameWriter.scala   | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
index d75cd10b1e696..de49468e1939e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
@@ -402,9 +402,9 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
    * the delimiter can be part of the value.</li>
    * <li>`escape` (default `\`): sets the single character used for escaping quotes inside
    * an already quoted value.</li>
-   * <li>`comment` (default empty string): sets the single character used for skipping lines beginning
-   * with this character. By default, it is disabled.</li>
-   * <li>header` (default `false`): uses the first line as names of columns.</li>
+   * <li>`comment` (default empty string): sets the single character used for skipping lines
+   * beginning with this character. By default, it is disabled.</li>
+   * <li>`header` (default `false`): uses the first line as names of columns.</li>
    * <li>`ignoreLeadingWhiteSpace` (default `false`): defines whether or not leading whitespaces
    * from values being read should be skipped.</li>
    * <li>`ignoreTrailingWhiteSpace` (default `fDataFraalse`): defines whether or not trailing
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
index ff81d7832f387..dc4e28dd611ee 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
@@ -614,7 +614,7 @@ final class DataFrameWriter private[sql](df: DataFrame) {
    * an already quoted value.</li>
    * <li>`header` (default `false`): writes the names of columns as the first line.</li>
    * <li>`nullValue` (default empty string): sets the string representation of a null value.</li>
-   * <li>compression` (default `null`): compression codec to use when saving to file. This can be
+   * <li>`compression` (default `null`): compression codec to use when saving to file. This can be
    * one of the known case-insensitive shorten names (`none`, `bzip2`, `gzip`, `lz4`,
    * `snappy` and `deflate`). </li>
    *

From 8201f234b4d3563239e59f900ce48812cc198c0e Mon Sep 17 00:00:00 2001
From: hyukjinkwon <gurwls223@gmail.com>
Date: Sun, 1 May 2016 16:03:04 +0900
Subject: [PATCH 4/6] Remove alias for delimiter and charset.

---
 python/pyspark/sql/readwriter.py                      | 11 +++++++----
 .../scala/org/apache/spark/sql/DataFrameReader.scala  |  4 ++--
 .../scala/org/apache/spark/sql/DataFrameWriter.scala  |  2 +-
 3 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py
index 8c0d23e563015..9856c639627de 100644
--- a/python/pyspark/sql/readwriter.py
+++ b/python/pyspark/sql/readwriter.py
@@ -283,8 +283,10 @@ def csv(self, paths):
         :param paths: string, or list of strings, for input path(s).
 
         You can set the following CSV-specific options to deal with CSV files:
-            * ``sep`` or ``delimiter`` (default ``,``): sets the single character as a delimiter \
+            * ``delimiter`` (default ``,``): sets the single character as a delimiter \
                 for each field and value.
+            * ``charset`` (default ``UTF-8``): decodes the CSV files by the given \
+                encoding type.
             * ``quote`` (default ``"``): sets the single character used for escaping \
                 quoted values where the delimiter can be part of the value.
             * ``escape`` (default ``\``): sets the single character used for escaping quotes \
@@ -701,16 +703,17 @@ def csv(self, path, mode=None, compression=None):
                             snappy and deflate).
 
         You can set the following CSV-specific options to deal with CSV files:
-            * ``sep`` or ``delimiter`` (default ``,``): sets the single character as a delimiter \
+            * ``delimiter`` (default ``,``): sets the single character as a delimiter \
                 for each field and value.
-            * ``encoding`` or ``charset`` (default ``UTF-8``): decodes the CSV files by the given \
-                encoding type.
             * ``quote`` (default ``"``): sets the single character used for escaping \
                 quoted values where the delimiter can be part of the value.
             * ``escape`` (default ``\``): sets the single character used for escaping quotes \
                 inside an already quoted value.
             * ``header`` (default ``false``): writes the names of columns as the first line.
             * ``nullValue`` (default empty string): sets the string representation of a null value.
+            * ``compression``: compression codec to use when saving to file. This can be one of \
+                the known case-insensitive shorten names (none, bzip2, gzip, lz4, snappy and \
+                deflate).
 
         >>> df.write.csv(os.path.join(tempfile.mkdtemp(), 'data'))
         """
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
index de49468e1939e..0137084845ed5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
@@ -394,9 +394,9 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
    * through the entire data once, specify the schema explicitly using [[schema]].
    *
    * You can set the following CSV-specific options to deal with CSV files:
-   * <li>`sep` or `delimiter` (default `,`): sets the single character as a delimiter for each
+   * <li>delimiter` (default `,`): sets the single character as a delimiter for each
    * field and value.</li>
-   * <li>`encoding` or `charset` (default `UTF-8`): decodes the CSV files by the given encoding
+   * <li>`charset` (default `UTF-8`): decodes the CSV files by the given encoding
    * type.</li>
    * <li>`quote` (default `"`): sets the single character used for escaping quoted values where
    * the delimiter can be part of the value.</li>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
index dc4e28dd611ee..17b5f308075c2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
@@ -606,7 +606,7 @@ final class DataFrameWriter private[sql](df: DataFrame) {
    * }}}
    *
    * You can set the following CSV-specific option(s) for writing CSV files:
-   * <li>`sep` or `delimiter` (default `,`): sets the single character as a delimiter for each
+   * <li>`delimiter` (default `,`): sets the single character as a delimiter for each
    * field and value.</li>
    * <li>`quote` (default `"`): sets the single character used for escaping quoted values where
    * the delimiter can be part of the value.</li>

From 54f58d381b47762543278fffa33fb5fd13f64b91 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@gmail.com>
Date: Sun, 1 May 2016 20:42:42 +0900
Subject: [PATCH 5/6] Add starting tag for option, delimiter.

---
 .../src/main/scala/org/apache/spark/sql/DataFrameReader.scala   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
index 0137084845ed5..c03965f2d569a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
@@ -394,7 +394,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
    * through the entire data once, specify the schema explicitly using [[schema]].
    *
    * You can set the following CSV-specific options to deal with CSV files:
-   * <li>delimiter` (default `,`): sets the single character as a delimiter for each
+   * <li>`delimiter` (default `,`): sets the single character as a delimiter for each
    * field and value.</li>
    * <li>`charset` (default `UTF-8`): decodes the CSV files by the given encoding
    * type.</li>

From ab70b6d05db5744fce3fecd37a28647fd6622411 Mon Sep 17 00:00:00 2001
From: hyukjinkwon <gurwls223@gmail.com>
Date: Mon, 2 May 2016 09:32:35 +0900
Subject: [PATCH 6/6] Address comments

---
 python/pyspark/sql/readwriter.py                 | 16 ++++++++--------
 .../org/apache/spark/sql/DataFrameReader.scala   |  8 ++++----
 .../org/apache/spark/sql/DataFrameWriter.scala   |  4 ++--
 3 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py
index 9856c639627de..cc5e93dcadf4d 100644
--- a/python/pyspark/sql/readwriter.py
+++ b/python/pyspark/sql/readwriter.py
@@ -283,12 +283,12 @@ def csv(self, paths):
         :param paths: string, or list of strings, for input path(s).
 
         You can set the following CSV-specific options to deal with CSV files:
-            * ``delimiter`` (default ``,``): sets the single character as a delimiter \
+            * ``sep`` (default ``,``): sets the single character as a separator \
                 for each field and value.
             * ``charset`` (default ``UTF-8``): decodes the CSV files by the given \
                 encoding type.
             * ``quote`` (default ``"``): sets the single character used for escaping \
-                quoted values where the delimiter can be part of the value.
+                quoted values where the separator can be part of the value.
             * ``escape`` (default ``\``): sets the single character used for escaping quotes \
                 inside an already quoted value.
             * ``comment`` (default empty string): sets the single character used for skipping \
@@ -305,9 +305,9 @@ def csv(self, paths):
                 infinity value.
             * ``negativeInf`` (default ``-Inf``): sets the string representation of a negative \
                 infinity value.
-            * ``dateFormat`` (default ``null``): sets the string that indicates a date format. \
+            * ``dateFormat`` (default ``None``): sets the string that indicates a date format. \
                 Custom date formats follow the formats at ``java.text.SimpleDateFormat``. This \
-                applies to both date type and timestamp type By default, it is `null` which means \
+                applies to both date type and timestamp type. By default, it is None which means \
                 trying to parse times and date by ``java.sql.Timestamp.valueOf()`` and \
                 ``java.sql.Date.valueOf()``.
             * ``maxColumns`` (default ``20480``): defines a hard limit of how many columns \
@@ -316,8 +316,8 @@ def csv(self, paths):
                 characters allowed for any given value being read.
             * ``mode`` (default ``PERMISSIVE``): allows a mode for dealing with corrupt records \
                 during parsing.
-                * ``PERMISSIVE`` : sets other fields to `null` when it meets a corrupted record. \
-                    When a schema is set by user, it sets `null` for extra fields.
+                * ``PERMISSIVE`` : sets other fields to ``null`` when it meets a corrupted record. \
+                    When a schema is set by user, it sets ``null`` for extra fields.
                 * ``DROPMALFORMED`` : ignores the whole corrupted records.
                 * ``FAILFAST`` : throws an exception when it meets corrupted records.
 
@@ -703,10 +703,10 @@ def csv(self, path, mode=None, compression=None):
                             snappy and deflate).
 
         You can set the following CSV-specific options to deal with CSV files:
-            * ``delimiter`` (default ``,``): sets the single character as a delimiter \
+            * ``sep`` (default ``,``): sets the single character as a separator \
                 for each field and value.
             * ``quote`` (default ``"``): sets the single character used for escaping \
-                quoted values where the delimiter can be part of the value.
+                quoted values where the separator can be part of the value.
             * ``escape`` (default ``\``): sets the single character used for escaping quotes \
                 inside an already quoted value.
             * ``header`` (default ``false``): writes the names of columns as the first line.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
index c03965f2d569a..2d4a68f3c3a94 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
@@ -394,12 +394,12 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
    * through the entire data once, specify the schema explicitly using [[schema]].
    *
    * You can set the following CSV-specific options to deal with CSV files:
-   * <li>`delimiter` (default `,`): sets the single character as a delimiter for each
+   * <li>`sep` (default `,`): sets the single character as a separator for each
    * field and value.</li>
-   * <li>`charset` (default `UTF-8`): decodes the CSV files by the given encoding
+   * <li>`encoding` (default `UTF-8`): decodes the CSV files by the given encoding
    * type.</li>
    * <li>`quote` (default `"`): sets the single character used for escaping quoted values where
-   * the delimiter can be part of the value.</li>
+   * the separator can be part of the value.</li>
    * <li>`escape` (default `\`): sets the single character used for escaping quotes inside
    * an already quoted value.</li>
    * <li>`comment` (default empty string): sets the single character used for skipping lines
@@ -417,7 +417,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
    * value.</li>
    * <li>`dateFormat` (default `null`): sets the string that indicates a date format. Custom date
    * formats follow the formats at `java.text.SimpleDateFormat`. This applies to both date type
-   * and timestamp type By default, it is `null` which means trying to parse times and date by
+   * and timestamp type. By default, it is `null` which means trying to parse times and date by
    * `java.sql.Timestamp.valueOf()` and `java.sql.Date.valueOf()`.</li>
    * <li>`maxColumns` (default `20480`): defines a hard limit of how many columns
    * a record can have.</li>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
index 17b5f308075c2..a57d47d28ceb5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
@@ -606,10 +606,10 @@ final class DataFrameWriter private[sql](df: DataFrame) {
    * }}}
    *
    * You can set the following CSV-specific option(s) for writing CSV files:
-   * <li>`delimiter` (default `,`): sets the single character as a delimiter for each
+   * <li>`sep` (default `,`): sets the single character as a separator for each
    * field and value.</li>
    * <li>`quote` (default `"`): sets the single character used for escaping quoted values where
-   * the delimiter can be part of the value.</li>
+   * the separator can be part of the value.</li>
    * <li>`escape` (default `\`): sets the single character used for escaping quotes inside
    * an already quoted value.</li>
    * <li>`header` (default `false`): writes the names of columns as the first line.</li>