diff --git a/core/src/main/scala/org/apache/spark/SSLOptions.scala b/core/src/main/scala/org/apache/spark/SSLOptions.scala
index be19179b00a4..5f14102c3c36 100644
--- a/core/src/main/scala/org/apache/spark/SSLOptions.scala
+++ b/core/src/main/scala/org/apache/spark/SSLOptions.scala
@@ -150,8 +150,8 @@ private[spark] object SSLOptions extends Logging {
* $ - `[ns].enabledAlgorithms` - a comma separated list of ciphers
*
* For a list of protocols and ciphers supported by particular Java versions, you may go to
- * [[https://blogs.oracle.com/java-platform-group/entry/diagnosing_tls_ssl_and_https Oracle
- * blog page]].
+ *
+ * Oracle blog page.
*
* You can optionally specify the default configuration. If you do, for each setting which is
* missing in SparkConf, the corresponding setting is used from the default configuration.
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala b/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
index bff5a29bb60f..d7e3a1b1be48 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
@@ -405,7 +405,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
* partitioning of the resulting key-value pair RDD by passing a Partitioner.
*
* @note If you are grouping in order to perform an aggregation (such as a sum or average) over
- * each key, using [[JavaPairRDD.reduceByKey]] or [[JavaPairRDD.combineByKey]]
+ * each key, using `JavaPairRDD.reduceByKey` or `JavaPairRDD.combineByKey`
* will provide much better performance.
*/
def groupByKey(partitioner: Partitioner): JavaPairRDD[K, JIterable[V]] =
@@ -416,7 +416,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
* resulting RDD with into `numPartitions` partitions.
*
* @note If you are grouping in order to perform an aggregation (such as a sum or average) over
- * each key, using [[JavaPairRDD.reduceByKey]] or [[JavaPairRDD.combineByKey]]
+ * each key, using `JavaPairRDD.reduceByKey` or `JavaPairRDD.combineByKey`
* will provide much better performance.
*/
def groupByKey(numPartitions: Int): JavaPairRDD[K, JIterable[V]] =
@@ -546,7 +546,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
* resulting RDD with the existing partitioner/parallelism level.
*
* @note If you are grouping in order to perform an aggregation (such as a sum or average) over
- * each key, using [[JavaPairRDD.reduceByKey]] or [[JavaPairRDD.combineByKey]]
+ * each key, using `JavaPairRDD.reduceByKey` or `JavaPairRDD.combineByKey`
* will provide much better performance.
*/
def groupByKey(): JavaPairRDD[K, JIterable[V]] =
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala b/core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala
index ccd94f876e0b..a20d264be5af 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala
@@ -103,10 +103,10 @@ class JavaRDD[T](val rdd: RDD[T])(implicit val classTag: ClassTag[T])
* @param withReplacement can elements be sampled multiple times (replaced when sampled out)
* @param fraction expected size of the sample as a fraction of this RDD's size
* without replacement: probability that each element is chosen; fraction must be [0, 1]
- * with replacement: expected number of times each element is chosen; fraction must be >= 0
+ * with replacement: expected number of times each element is chosen; fraction must be >= 0
*
* @note This is NOT guaranteed to provide exactly the fraction of the count
- * of the given [[RDD]].
+ * of the given `RDD`.
*/
def sample(withReplacement: Boolean, fraction: Double): JavaRDD[T] =
sample(withReplacement, fraction, Utils.random.nextLong)
@@ -117,11 +117,11 @@ class JavaRDD[T](val rdd: RDD[T])(implicit val classTag: ClassTag[T])
* @param withReplacement can elements be sampled multiple times (replaced when sampled out)
* @param fraction expected size of the sample as a fraction of this RDD's size
* without replacement: probability that each element is chosen; fraction must be [0, 1]
- * with replacement: expected number of times each element is chosen; fraction must be >= 0
+ * with replacement: expected number of times each element is chosen; fraction must be >= 0
* @param seed seed for the random number generator
*
* @note This is NOT guaranteed to provide exactly the fraction of the count
- * of the given [[RDD]].
+ * of the given `RDD`.
*/
def sample(withReplacement: Boolean, fraction: Double, seed: Long): JavaRDD[T] =
wrapRDD(rdd.sample(withReplacement, fraction, seed))
@@ -167,7 +167,7 @@ class JavaRDD[T](val rdd: RDD[T])(implicit val classTag: ClassTag[T])
* Return an RDD with the elements from `this` that are not in `other`.
*
* Uses `this` partitioner/partition size, because even if `other` is huge, the resulting
- * RDD will be <= us.
+ * RDD will be <= us.
*/
def subtract(other: JavaRDD[T]): JavaRDD[T] = wrapRDD(rdd.subtract(other))
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala b/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
index 38d347aeab8c..9481156bc93a 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
@@ -238,7 +238,9 @@ class JavaSparkContext(val sc: SparkContext)
* }}}
*
* Do
- * `JavaPairRDD rdd = sparkContext.dataStreamFiles("hdfs://a-hdfs-path")`,
+ * {{{
+ * JavaPairRDD rdd = sparkContext.dataStreamFiles("hdfs://a-hdfs-path")
+ * }}}
*
* then `rdd` contains
* {{{
@@ -270,7 +272,9 @@ class JavaSparkContext(val sc: SparkContext)
* }}}
*
* Do
- * `JavaPairRDD rdd = sparkContext.dataStreamFiles("hdfs://a-hdfs-path")`,
+ * {{{
+ * JavaPairRDD rdd = sparkContext.dataStreamFiles("hdfs://a-hdfs-path")
+ * }}},
*
* then `rdd` contains
* {{{
@@ -749,7 +753,7 @@ class JavaSparkContext(val sc: SparkContext)
/**
* Get a local property set in this thread, or null if it is missing. See
- * [[org.apache.spark.api.java.JavaSparkContext.setLocalProperty]].
+ * `org.apache.spark.api.java.JavaSparkContext.setLocalProperty`.
*/
def getLocalProperty(key: String): String = sc.getLocalProperty(key)
@@ -769,7 +773,7 @@ class JavaSparkContext(val sc: SparkContext)
* Application programmers can use this method to group all those jobs together and give a
* group description. Once set, the Spark web UI will associate such jobs with this group.
*
- * The application can also use [[org.apache.spark.api.java.JavaSparkContext.cancelJobGroup]]
+ * The application can also use `org.apache.spark.api.java.JavaSparkContext.cancelJobGroup`
* to cancel all running jobs in this group. For example,
* {{{
* // In the main thread:
@@ -802,7 +806,7 @@ class JavaSparkContext(val sc: SparkContext)
/**
* Cancel active jobs for the specified group. See
- * [[org.apache.spark.api.java.JavaSparkContext.setJobGroup]] for more information.
+ * `org.apache.spark.api.java.JavaSparkContext.setJobGroup` for more information.
*/
def cancelJobGroup(groupId: String): Unit = sc.cancelJobGroup(groupId)
diff --git a/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala b/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala
index 6ba79e506a64..2e991ce394c4 100644
--- a/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala
+++ b/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala
@@ -172,7 +172,7 @@ private final object SnappyCompressionCodec {
}
/**
- * Wrapper over [[SnappyOutputStream]] which guards against write-after-close and double-close
+ * Wrapper over `SnappyOutputStream` which guards against write-after-close and double-close
* issues. See SPARK-7660 for more details. This wrapping can be removed if we upgrade to a version
* of snappy-java that contains the fix for https://github.com/xerial/snappy-java/issues/107.
*/
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
index bff2b8f1d06c..8e673447581c 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -70,8 +70,8 @@ import org.apache.spark.util.random.{BernoulliCellSampler, BernoulliSampler, Poi
* All of the scheduling and execution in Spark is done based on these methods, allowing each RDD
* to implement its own way of computing itself. Indeed, users can implement custom RDDs (e.g. for
* reading data from a new storage system) by overriding these functions. Please refer to the
- * [[http://people.csail.mit.edu/matei/papers/2012/nsdi_spark.pdf Spark paper]] for more details
- * on RDD internals.
+ * Spark paper
+ * for more details on RDD internals.
*/
abstract class RDD[T: ClassTag](
@transient private var _sc: SparkContext,
@@ -469,7 +469,7 @@ abstract class RDD[T: ClassTag](
* @param withReplacement can elements be sampled multiple times (replaced when sampled out)
* @param fraction expected size of the sample as a fraction of this RDD's size
* without replacement: probability that each element is chosen; fraction must be [0, 1]
- * with replacement: expected number of times each element is chosen; fraction must be >= 0
+ * with replacement: expected number of times each element is chosen; fraction must be >= 0
* @param seed seed for the random number generator
*
* @note This is NOT guaranteed to provide exactly the fraction of the count
@@ -675,8 +675,8 @@ abstract class RDD[T: ClassTag](
* may even differ each time the resulting RDD is evaluated.
*
* @note This operation may be very expensive. If you are grouping in order to perform an
- * aggregation (such as a sum or average) over each key, using [[PairRDDFunctions.aggregateByKey]]
- * or [[PairRDDFunctions.reduceByKey]] will provide much better performance.
+ * aggregation (such as a sum or average) over each key, using `PairRDDFunctions.aggregateByKey`
+ * or `PairRDDFunctions.reduceByKey` will provide much better performance.
*/
def groupBy[K](f: T => K)(implicit kt: ClassTag[K]): RDD[(K, Iterable[T])] = withScope {
groupBy[K](f, defaultPartitioner(this))
@@ -688,8 +688,8 @@ abstract class RDD[T: ClassTag](
* may even differ each time the resulting RDD is evaluated.
*
* @note This operation may be very expensive. If you are grouping in order to perform an
- * aggregation (such as a sum or average) over each key, using [[PairRDDFunctions.aggregateByKey]]
- * or [[PairRDDFunctions.reduceByKey]] will provide much better performance.
+ * aggregation (such as a sum or average) over each key, using `PairRDDFunctions.aggregateByKey`
+ * or `PairRDDFunctions.reduceByKey` will provide much better performance.
*/
def groupBy[K](
f: T => K,
@@ -703,8 +703,8 @@ abstract class RDD[T: ClassTag](
* may even differ each time the resulting RDD is evaluated.
*
* @note This operation may be very expensive. If you are grouping in order to perform an
- * aggregation (such as a sum or average) over each key, using [[PairRDDFunctions.aggregateByKey]]
- * or [[PairRDDFunctions.reduceByKey]] will provide much better performance.
+ * aggregation (such as a sum or average) over each key, using `PairRDDFunctions.aggregateByKey`
+ * or `PairRDDFunctions.reduceByKey` will provide much better performance.
*/
def groupBy[K](f: T => K, p: Partitioner)(implicit kt: ClassTag[K], ord: Ordering[K] = null)
: RDD[(K, Iterable[T])] = withScope {
diff --git a/core/src/main/scala/org/apache/spark/security/CryptoStreamUtils.scala b/core/src/main/scala/org/apache/spark/security/CryptoStreamUtils.scala
index 8f15f50bee81..f41fc38be208 100644
--- a/core/src/main/scala/org/apache/spark/security/CryptoStreamUtils.scala
+++ b/core/src/main/scala/org/apache/spark/security/CryptoStreamUtils.scala
@@ -46,7 +46,7 @@ private[spark] object CryptoStreamUtils extends Logging {
val COMMONS_CRYPTO_CONF_PREFIX = "commons.crypto."
/**
- * Helper method to wrap [[OutputStream]] with [[CryptoOutputStream]] for encryption.
+ * Helper method to wrap `OutputStream` with `CryptoOutputStream` for encryption.
*/
def createCryptoOutputStream(
os: OutputStream,
@@ -62,7 +62,7 @@ private[spark] object CryptoStreamUtils extends Logging {
}
/**
- * Helper method to wrap [[InputStream]] with [[CryptoInputStream]] for decryption.
+ * Helper method to wrap `InputStream` with `CryptoInputStream` for decryption.
*/
def createCryptoInputStream(
is: InputStream,
diff --git a/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala b/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
index 19e020c968a9..7eb2da1c2748 100644
--- a/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
+++ b/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
@@ -43,7 +43,8 @@ import org.apache.spark.util.{BoundedPriorityQueue, SerializableConfiguration, S
import org.apache.spark.util.collection.CompactBuffer
/**
- * A Spark serializer that uses the [[https://code.google.com/p/kryo/ Kryo serialization library]].
+ * A Spark serializer that uses the
+ * Kryo serialization library.
*
* @note This serializer is not guaranteed to be wire-compatible across different versions of
* Spark. It is intended to be used to serialize/de-serialize data within a single
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockReplicationPolicy.scala b/core/src/main/scala/org/apache/spark/storage/BlockReplicationPolicy.scala
index bf087af16a5b..bb8a684b4c7a 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockReplicationPolicy.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockReplicationPolicy.scala
@@ -89,17 +89,18 @@ class RandomBlockReplicationPolicy
prioritizedPeers
}
+ // scalastyle:off line.size.limit
/**
* Uses sampling algorithm by Robert Floyd. Finds a random sample in O(n) while
- * minimizing space usage
- * [[http://math.stackexchange.com/questions/178690/
- * whats-the-proof-of-correctness-for-robert-floyds-algorithm-for-selecting-a-sin]]
+ * minimizing space usage. Please see
+ * here.
*
* @param n total number of indices
* @param m number of samples needed
* @param r random number generator
* @return list of m random unique indices
*/
+ // scalastyle:on line.size.limit
private def getSampleIds(n: Int, m: Int, r: Random): List[Int] = {
val indices = (n - m + 1 to n).foldLeft(Set.empty[Int]) {case (set, i) =>
val t = r.nextInt(i) + 1
diff --git a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
index 57f6f2f0a9be..dbeb970c81df 100644
--- a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
+++ b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
@@ -422,8 +422,8 @@ private[spark] object UIUtils extends Logging {
* the whole string will rendered as a simple escaped text.
*
* Note: In terms of security, only anchor tags with root relative links are supported. So any
- * attempts to embed links outside Spark UI, or other tags like