@@ -238,34 +238,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
238238
239239 /**
240240 * Loads a JSON file (one object per line) and returns the result as a [[DataFrame ]].
241- *
242- * This function goes through the input once to determine the input schema. If you know the
243- * schema in advance, use the version that specifies the schema to avoid the extra scan.
244- *
245- * You can set the following JSON-specific options to deal with non-standard JSON files:
246- * <li>`primitivesAsString` (default `false`): infers all primitive values as a string type</li>
247- * <li>`prefersDecimal` (default `false`): infers all floating-point values as a decimal
248- * type. If the values do not fit in decimal, then it infers them as doubles.</li>
249- * <li>`allowComments` (default `false`): ignores Java/C++ style comment in JSON records</li>
250- * <li>`allowUnquotedFieldNames` (default `false`): allows unquoted JSON field names</li>
251- * <li>`allowSingleQuotes` (default `true`): allows single quotes in addition to double quotes
252- * </li>
253- * <li>`allowNumericLeadingZeros` (default `false`): allows leading zeros in numbers
254- * (e.g. 00012)</li>
255- * <li>`allowBackslashEscapingAnyCharacter` (default `false`): allows accepting quoting of all
256- * character using backslash quoting mechanism</li>
257- * <li>`mode` (default `PERMISSIVE`): allows a mode for dealing with corrupt records
258- * during parsing.</li>
259- * <ul>
260- * <li>`PERMISSIVE` : sets other fields to `null` when it meets a corrupted record, and puts the
261- * malformed string into a new field configured by `columnNameOfCorruptRecord`. When
262- * a schema is set by user, it sets `null` for extra fields.</li>
263- * <li>`DROPMALFORMED` : ignores the whole corrupted records.</li>
264- * <li>`FAILFAST` : throws an exception when it meets corrupted records.</li>
265- * </ul>
266- * <li>`columnNameOfCorruptRecord` (default is the value specified in
267- * `spark.sql.columnNameOfCorruptRecord`): allows renaming the new field having malformed string
268- * created by `PERMISSIVE` mode. This overrides `spark.sql.columnNameOfCorruptRecord`.</li>
241+ * See the documentation on the overloaded `json()` method with varargs for more details.
269242 *
270243 * @since 1.4.0
271244 */
@@ -281,6 +254,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
281254 * schema in advance, use the version that specifies the schema to avoid the extra scan.
282255 *
283256 * You can set the following JSON-specific options to deal with non-standard JSON files:
257+ * <ul>
284258 * <li>`primitivesAsString` (default `false`): infers all primitive values as a string type</li>
285259 * <li>`prefersDecimal` (default `false`): infers all floating-point values as a decimal
286260 * type. If the values do not fit in decimal, then it infers them as doubles.</li>
@@ -304,7 +278,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
304278 * <li>`columnNameOfCorruptRecord` (default is the value specified in
305279 * `spark.sql.columnNameOfCorruptRecord`): allows renaming the new field having malformed string
306280 * created by `PERMISSIVE` mode. This overrides `spark.sql.columnNameOfCorruptRecord`.</li>
307- *
281+ * </ul>
308282 * @since 2.0.0
309283 */
310284 @ scala.annotation.varargs
@@ -356,54 +330,8 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
356330 }
357331
358332 /**
359- * Loads a CSV file and returns the result as a [[DataFrame ]].
360- *
361- * This function will go through the input once to determine the input schema if `inferSchema`
362- * is enabled. To avoid going through the entire data once, disable `inferSchema` option or
363- * specify the schema explicitly using [[schema ]].
364- *
365- * You can set the following CSV-specific options to deal with CSV files:
366- * <li>`sep` (default `,`): sets the single character as a separator for each
367- * field and value.</li>
368- * <li>`encoding` (default `UTF-8`): decodes the CSV files by the given encoding
369- * type.</li>
370- * <li>`quote` (default `"`): sets the single character used for escaping quoted values where
371- * the separator can be part of the value. If you would like to turn off quotations, you need to
372- * set not `null` but an empty string. This behaviour is different form
373- * `com.databricks.spark.csv`.</li>
374- * <li>`escape` (default `\`): sets the single character used for escaping quotes inside
375- * an already quoted value.</li>
376- * <li>`comment` (default empty string): sets the single character used for skipping lines
377- * beginning with this character. By default, it is disabled.</li>
378- * <li>`header` (default `false`): uses the first line as names of columns.</li>
379- * <li>`inferSchema` (default `false`): infers the input schema automatically from data. It
380- * requires one extra pass over the data.</li>
381- * <li>`ignoreLeadingWhiteSpace` (default `false`): defines whether or not leading whitespaces
382- * from values being read should be skipped.</li>
383- * <li>`ignoreTrailingWhiteSpace` (default `false`): defines whether or not trailing
384- * whitespaces from values being read should be skipped.</li>
385- * <li>`nullValue` (default empty string): sets the string representation of a null value.</li>
386- * <li>`nanValue` (default `NaN`): sets the string representation of a non-number" value.</li>
387- * <li>`positiveInf` (default `Inf`): sets the string representation of a positive infinity
388- * value.</li>
389- * <li>`negativeInf` (default `-Inf`): sets the string representation of a negative infinity
390- * value.</li>
391- * <li>`dateFormat` (default `null`): sets the string that indicates a date format. Custom date
392- * formats follow the formats at `java.text.SimpleDateFormat`. This applies to both date type
393- * and timestamp type. By default, it is `null` which means trying to parse times and date by
394- * `java.sql.Timestamp.valueOf()` and `java.sql.Date.valueOf()`.</li>
395- * <li>`maxColumns` (default `20480`): defines a hard limit of how many columns
396- * a record can have.</li>
397- * <li>`maxCharsPerColumn` (default `1000000`): defines the maximum number of characters allowed
398- * for any given value being read.</li>
399- * <li>`mode` (default `PERMISSIVE`): allows a mode for dealing with corrupt records
400- * during parsing.</li>
401- * <ul>
402- * <li>`PERMISSIVE` : sets other fields to `null` when it meets a corrupted record. When
403- * a schema is set by user, it sets `null` for extra fields.</li>
404- * <li>`DROPMALFORMED` : ignores the whole corrupted records.</li>
405- * <li>`FAILFAST` : throws an exception when it meets corrupted records.</li>
406- * </ul>
333+ * Loads a CSV file and returns the result as a [[DataFrame ]]. See the documentation on the
334+ * other overloaded `csv()` method for more details.
407335 *
408336 * @since 2.0.0
409337 */
@@ -420,6 +348,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
420348 * specify the schema explicitly using [[schema ]].
421349 *
422350 * You can set the following CSV-specific options to deal with CSV files:
351+ * <ul>
423352 * <li>`sep` (default `,`): sets the single character as a separator for each
424353 * field and value.</li>
425354 * <li>`encoding` (default `UTF-8`): decodes the CSV files by the given encoding
@@ -461,20 +390,15 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
461390 * <li>`DROPMALFORMED` : ignores the whole corrupted records.</li>
462391 * <li>`FAILFAST` : throws an exception when it meets corrupted records.</li>
463392 * </ul>
464- *
393+ * </ul>
465394 * @since 2.0.0
466395 */
467396 @ scala.annotation.varargs
468397 def csv (paths : String * ): DataFrame = format(" csv" ).load(paths : _* )
469398
470399 /**
471- * Loads a Parquet file, returning the result as a [[DataFrame ]]. This function returns an empty
472- * [[DataFrame ]] if no paths are passed in.
473- *
474- * You can set the following Parquet-specific option(s) for reading Parquet files:
475- * <li>`mergeSchema` (default is the value specified in `spark.sql.parquet.mergeSchema`): sets
476- * whether we should merge schemas collected from all Parquet part-files. This will override
477- * `spark.sql.parquet.mergeSchema`.</li>
400+ * Loads a Parquet file, returning the result as a [[DataFrame ]]. See the documentation
401+ * on the other overloaded `parquet()` method for more details.
478402 *
479403 * @since 2.0.0
480404 */
@@ -484,14 +408,14 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
484408 }
485409
486410 /**
487- * Loads a Parquet file, returning the result as a [[DataFrame ]]. This function returns an empty
488- * [[DataFrame ]] if no paths are passed in.
411+ * Loads a Parquet file, returning the result as a [[DataFrame ]].
489412 *
490413 * You can set the following Parquet-specific option(s) for reading Parquet files:
414+ * <ul>
491415 * <li>`mergeSchema` (default is the value specified in `spark.sql.parquet.mergeSchema`): sets
492416 * whether we should merge schemas collected from all Parquet part-files. This will override
493417 * `spark.sql.parquet.mergeSchema`.</li>
494- *
418+ * </ul>
495419 * @since 1.4.0
496420 */
497421 @ scala.annotation.varargs
@@ -534,18 +458,9 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
534458
535459 /**
536460 * Loads text files and returns a [[DataFrame ]] whose schema starts with a string column named
537- * "value", and followed by partitioned columns if there are any.
538- *
539- * Each line in the text files is a new row in the resulting DataFrame. For example:
540- * {{{
541- * // Scala:
542- * spark.read.text("/path/to/spark/README.md")
461+ * "value", and followed by partitioned columns if there are any. See the documentation on
462+ * the other overloaded `text()` method for more details.
543463 *
544- * // Java:
545- * spark.read().text("/path/to/spark/README.md")
546- * }}}
547- *
548- * @param path input path
549464 * @since 2.0.0
550465 */
551466 def text (path : String ): DataFrame = {
@@ -573,22 +488,8 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
573488 def text (paths : String * ): DataFrame = format(" text" ).load(paths : _* )
574489
575490 /**
576- * Loads text files and returns a [[Dataset ]] of String. The underlying schema of the Dataset
577- * contains a single string column named "value".
578- *
579- * If the directory structure of the text files contains partitioning information, those are
580- * ignored in the resulting Dataset. To include partitioning information as columns, use `text`.
581- *
582- * Each line in the text files is a new element in the resulting Dataset. For example:
583- * {{{
584- * // Scala:
585- * spark.read.textFile("/path/to/spark/README.md")
586- *
587- * // Java:
588- * spark.read().textFile("/path/to/spark/README.md")
589- * }}}
590- *
591- * @param path input path
491+ * Loads text files and returns a [[Dataset ]] of String. See the documentation on the
492+ * other overloaded `textFile()` method for more details.
592493 * @since 2.0.0
593494 */
594495 def textFile (path : String ): Dataset [String ] = {
0 commit comments