Skip to content

Commit d2c86f4

Browse files
committed
document improvement
1 parent a1301f5 commit d2c86f4

File tree

14 files changed

+89
-46
lines changed

14 files changed

+89
-46
lines changed

sql/core/src/main/java/org/apache/spark/sql/sources/v2/DataSourceV2.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,15 @@
1717

1818
package org.apache.spark.sql.sources.v2;
1919

20+
import org.apache.spark.annotation.InterfaceStability;
21+
2022
/**
2123
* The base interface for data source v2. Implementations must have a public, no arguments
2224
* constructor.
2325
*
2426
* Note that this is an empty interface, data source implementations should mix-in at least one of
25-
* the plug-in interfaces like `ReadSupport`. Otherwise it's just a dummy data source which is
27+
* the plug-in interfaces like {@link ReadSupport}. Otherwise it's just a dummy data source which is
2628
* un-readable/writable.
2729
*/
30+
@InterfaceStability.Evolving
2831
public interface DataSourceV2 {}

sql/core/src/main/java/org/apache/spark/sql/sources/v2/DataSourceV2Options.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,13 @@
2222
import java.util.Map;
2323
import java.util.Optional;
2424

25+
import org.apache.spark.annotation.InterfaceStability;
26+
2527
/**
2628
* An immutable string-to-string map in which keys are case-insensitive. This is used to represent
2729
* data source options.
2830
*/
31+
@InterfaceStability.Evolving
2932
public class DataSourceV2Options {
3033
private final Map<String, String> keyLowerCasedMap;
3134

sql/core/src/main/java/org/apache/spark/sql/sources/v2/ReadSupport.java

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,16 +17,18 @@
1717

1818
package org.apache.spark.sql.sources.v2;
1919

20+
import org.apache.spark.annotation.InterfaceStability;
2021
import org.apache.spark.sql.sources.v2.reader.DataSourceV2Reader;
2122

2223
/**
23-
* A mix-in interface for `DataSourceV2`. Users can implement this interface to provide data reading
24-
* ability and scan the data from the data source.
24+
* A mix-in interface for {@link DataSourceV2}. Data sources can implement this interface to
25+
* provide data reading ability and scan the data from the data source.
2526
*/
27+
@InterfaceStability.Evolving
2628
public interface ReadSupport {
2729

2830
/**
29-
* Creates a `DataSourceV2Reader` to scan the data for this data source.
31+
* Creates a {@link DataSourceV2Reader} to scan the data from this data source.
3032
*
3133
* @param options the options for this data source reader, which is an immutable case-insensitive
3234
* string-to-string map.

sql/core/src/main/java/org/apache/spark/sql/sources/v2/ReadSupportWithSchema.java

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,21 +17,23 @@
1717

1818
package org.apache.spark.sql.sources.v2;
1919

20+
import org.apache.spark.annotation.InterfaceStability;
2021
import org.apache.spark.sql.sources.v2.reader.DataSourceV2Reader;
2122
import org.apache.spark.sql.types.StructType;
2223

2324
/**
24-
* A mix-in interface for `DataSourceV2`. Users can implement this interface to provide data reading
25-
* ability and scan the data from the data source.
25+
* A mix-in interface for {@link DataSourceV2}. Data sources can implement this interface to
26+
* provide data reading ability and scan the data from the data source.
2627
*
27-
* This is a variant of `ReadSupport` that accepts user-specified schema when reading data. A data
28-
* source can implement both `ReadSupport` and `ReadSupportWithSchema` if it supports both schema
29-
* inference and user-specified schema.
28+
* This is a variant of {@link ReadSupport} that accepts user-specified schema when reading data.
29+
* A data source can implement both {@link ReadSupport} and {@link ReadSupportWithSchema} if it
30+
* supports both schema inference and user-specified schema.
3031
*/
32+
@InterfaceStability.Evolving
3133
public interface ReadSupportWithSchema {
3234

3335
/**
34-
* Create a `DataSourceV2Reader` to scan the data for this data source.
36+
* Create a {@link DataSourceV2Reader} to scan the data from this data source.
3537
*
3638
* @param schema the full schema of this data source reader. Full schema usually maps to the
3739
* physical schema of the underlying storage of this data source reader, e.g.

sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/DataReader.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,13 @@
1919

2020
import java.io.Closeable;
2121

22+
import org.apache.spark.annotation.InterfaceStability;
23+
2224
/**
23-
* A data reader returned by a read task and is responsible for outputting data for a RDD partition.
25+
* A data reader returned by {@link ReadTask#createReader()} and is responsible for outputting data
26+
* for a RDD partition.
2427
*/
28+
@InterfaceStability.Evolving
2529
public interface DataReader<T> extends Closeable {
2630

2731
/**

sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/DataSourceV2Reader.java

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,26 +19,33 @@
1919

2020
import java.util.List;
2121

22+
import org.apache.spark.annotation.InterfaceStability;
2223
import org.apache.spark.sql.Row;
24+
import org.apache.spark.sql.sources.v2.DataSourceV2Options;
25+
import org.apache.spark.sql.sources.v2.ReadSupport;
26+
import org.apache.spark.sql.sources.v2.ReadSupportWithSchema;
2327
import org.apache.spark.sql.types.StructType;
2428

2529
/**
26-
* A data source reader that can mix in various query optimization interfaces and implement these
27-
* optimizations. The actual scan logic should be delegated to `ReadTask`s that are returned by
28-
* this data source reader.
30+
* A data source reader that is returned by
31+
* {@link ReadSupport#createReader(DataSourceV2Options)} or
32+
* {@link ReadSupportWithSchema#createReader(StructType, DataSourceV2Options)}.
33+
* It can mix in various query optimization interfaces to speed up the data scan. The actual scan
34+
* logic should be delegated to {@link ReadTask}s that are returned by {@link #createReadTasks()}.
2935
*
3036
* There are mainly 3 kinds of query optimizations:
3137
* 1. Operators push-down. E.g., filter push-down, required columns push-down(aka column
3238
* pruning), etc. These push-down interfaces are named like `SupportsPushDownXXX`.
3339
* 2. Information Reporting. E.g., statistics reporting, ordering reporting, etc. These
3440
* reporting interfaces are named like `SupportsReportingXXX`.
35-
* 3. Special scan. E.g, columnar scan, unsafe row scan, etc. Note that a data source reader can
36-
* implement at most one special scan. These scan interfaces are named like `SupportsScanXXX`.
41+
* 3. Special scans. E.g, columnar scan, unsafe row scan, etc. These scan interfaces are named
42+
* like `SupportsScanXXX`.
3743
*
3844
* Spark first applies all operator push-down optimizations that this data source supports. Then
3945
* Spark collects information this data source reported for further optimizations. Finally Spark
4046
* issues the scan request and does the actual data reading.
4147
*/
48+
@InterfaceStability.Evolving
4249
public interface DataSourceV2Reader {
4350

4451
/**

sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/ReadTask.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,17 @@
1919

2020
import java.io.Serializable;
2121

22+
import org.apache.spark.annotation.InterfaceStability;
23+
2224
/**
23-
* A read task returned by a data source reader and is responsible to create the data reader.
24-
* The relationship between `ReadTask` and `DataReader` is similar to `Iterable` and `Iterator`.
25+
* A read task returned by {@link DataSourceV2Reader#createReadTasks()} and is responsible for
26+
* creating the actual data reader. The relationship between {@link ReadTask} and {@link DataReader}
27+
* is similar to the relationship between {@link Iterable} and {@link java.util.Iterator}.
2528
*
2629
* Note that, the read task will be serialized and sent to executors, then the data reader will be
2730
* created on executors and do the actual reading.
2831
*/
32+
@InterfaceStability.Evolving
2933
public interface ReadTask<T> extends Serializable {
3034

3135
/**

sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/Statistics.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,13 @@
1919

2020
import java.util.OptionalLong;
2121

22+
import org.apache.spark.annotation.InterfaceStability;
23+
2224
/**
2325
* An interface to represent statistics for a data source, which is returned by
24-
* `SupportsReportStatistics`.
26+
* {@link SupportsReportStatistics#getStatistics()}.
2527
*/
28+
@InterfaceStability.Evolving
2629
public interface Statistics {
2730
OptionalLong sizeInBytes();
2831
OptionalLong numRows();

sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsPushDownCatalystFilters.java

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,16 @@
2222
import org.apache.spark.sql.catalyst.expressions.Expression;
2323

2424
/**
25-
* A mix-in interface for `DataSourceV2Reader`. Users can implement this interface to push down
26-
* arbitrary expressions as predicates to the data source. This is an experimental and unstable
27-
* interface as `Expression` is not public and may get changed in future Spark versions.
25+
* A mix-in interface for {@link DataSourceV2Reader}. Data source readers can implement this
26+
* interface to push down arbitrary expressions as predicates to the data source.
27+
* This is an experimental and unstable interface as {@link Expression} is not public and may get
28+
* changed in the future Spark versions.
2829
*
29-
* Note that, if users implement both this interface and `SupportsPushDownFilters`, Spark will
30-
* ignore `SupportsPushDownFilters` and only process this interface.
30+
* Note that, if data source readers implement both this interface and
31+
* {@link SupportsPushDownFilters}, Spark will ignore {@link SupportsPushDownFilters} and only
32+
* process this interface.
3133
*/
34+
@InterfaceStability.Evolving
3235
@Experimental
3336
@InterfaceStability.Unstable
3437
public interface SupportsPushDownCatalystFilters {

sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsPushDownFilters.java

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,15 +17,18 @@
1717

1818
package org.apache.spark.sql.sources.v2.reader;
1919

20+
import org.apache.spark.annotation.InterfaceStability;
2021
import org.apache.spark.sql.sources.Filter;
2122

2223
/**
23-
* A mix-in interface for `DataSourceV2Reader`. Users can implement this interface to push down
24-
* filters to the data source and reduce the size of the data to be read.
24+
* A mix-in interface for {@link DataSourceV2Reader}. Data source readers can implement this
25+
* interface to push down filters to the data source and reduce the size of the data to be read.
2526
*
26-
* Note that, if users implement both this interface and `SupportsPushDownCatalystFilters`, Spark
27-
* will ignore this interface and only process `SupportsPushDownCatalystFilters`.
27+
* Note that, if data source readers implement both this interface and
28+
* {@link SupportsPushDownCatalystFilters}, Spark will ignore this interface and only process
29+
* {@link SupportsPushDownCatalystFilters}.
2830
*/
31+
@InterfaceStability.Evolving
2932
public interface SupportsPushDownFilters {
3033

3134
/**

0 commit comments

Comments
 (0)