apache · HyukjinKwon · Jul 5, 2019
diff --git a/python/docs/conf.py b/python/docs/conf.py
@@ -31,7 +31,6 @@
 extensions = [
     'sphinx.ext.autodoc',
     'sphinx.ext.viewcode',
-    'epytext',
     'sphinx.ext.mathjax',
 ]
 

diff --git a/python/docs/epytext.py b/python/docs/epytext.py
diff --git a/python/pyspark/accumulators.py b/python/pyspark/accumulators.py
@@ -123,13 +123,13 @@ class Accumulator(object):
 
     """
     A shared variable that can be accumulated, i.e., has a commutative and associative "add"
-    operation. Worker tasks on a Spark cluster can add values to an Accumulator with the C{+=}
-    operator, but only the driver program is allowed to access its value, using C{value}.
+    operation. Worker tasks on a Spark cluster can add values to an Accumulator with the `+=`
+    operator, but only the driver program is allowed to access its value, using `value`.
     Updates from the workers get propagated automatically to the driver program.
 
-    While C{SparkContext} supports accumulators for primitive data types like C{int} and
-    C{float}, users can also define accumulators for custom types by providing a custom
-    L{AccumulatorParam} object. Refer to the doctest of this module for an example.
+    While :class:`SparkContext` supports accumulators for primitive data types like :class:`int` and
+    :class:`float`, users can also define accumulators for custom types by providing a custom
+    :class:`AccumulatorParam` object. Refer to the doctest of this module for an example.
     """
 
     def __init__(self, aid, value, accum_param):
@@ -185,14 +185,14 @@ class AccumulatorParam(object):
     def zero(self, value):
         """
         Provide a "zero value" for the type, compatible in dimensions with the
-        provided C{value} (e.g., a zero vector)
+        provided `value` (e.g., a zero vector)
         """
         raise NotImplementedError
 
     def addInPlace(self, value1, value2):
         """
         Add two values of the accumulator's data type, returning a new value;
-        for efficiency, can also update C{value1} in place and return it.
+        for efficiency, can also update `value1` in place and return it.
         """
         raise NotImplementedError
 

diff --git a/python/pyspark/broadcast.py b/python/pyspark/broadcast.py
@@ -49,8 +49,8 @@ def _from_id(bid):
 class Broadcast(object):
 
     """
-    A broadcast variable created with L{SparkContext.broadcast()}.
-    Access its value through C{.value}.
+    A broadcast variable created with :meth:`SparkContext.broadcast`.
+    Access its value through :attr:`value`.
 
     Examples:
 
@@ -69,7 +69,7 @@ class Broadcast(object):
     def __init__(self, sc=None, value=None, pickle_registry=None, path=None,
                  sock_file=None):
         """
-        Should not be called directly by users -- use L{SparkContext.broadcast()}
+        Should not be called directly by users -- use :meth:`SparkContext.broadcast`
         instead.
         """
         if sc is not None:

diff --git a/python/pyspark/conf.py b/python/pyspark/conf.py
@@ -79,16 +79,16 @@ class SparkConf(object):
     parameters as key-value pairs.
 
     Most of the time, you would create a SparkConf object with
-    C{SparkConf()}, which will load values from C{spark.*} Java system
+    ``SparkConf()``, which will load values from `spark.*` Java system
     properties as well. In this case, any parameters you set directly on
-    the C{SparkConf} object take priority over system properties.
+    the :class:`SparkConf` object take priority over system properties.
 
-    For unit tests, you can also call C{SparkConf(false)} to skip
+    For unit tests, you can also call ``SparkConf(false)`` to skip
     loading external settings and get the same configuration no matter
     what the system properties are.
 
     All setter methods in this class support chaining. For example,
-    you can write C{conf.setMaster("local").setAppName("My app")}.
+    you can write ``conf.setMaster("local").setAppName("My app")``.
 
     .. note:: Once a SparkConf object is passed to Spark, it is cloned
         and can no longer be modified by the user.

diff --git a/python/pyspark/context.py b/python/pyspark/context.py
@@ -61,7 +61,7 @@ class SparkContext(object):
 
     """
     Main entry point for Spark functionality. A SparkContext represents the
-    connection to a Spark cluster, and can be used to create L{RDD} and
+    connection to a Spark cluster, and can be used to create :class:`RDD` and
     broadcast variables on that cluster.
 
     .. note:: Only one :class:`SparkContext` should be active per JVM. You must `stop()`
@@ -86,7 +86,7 @@ def __init__(self, master=None, appName=None, sparkHome=None, pyFiles=None,
                  gateway=None, jsc=None, profiler_cls=BasicProfiler):
         """
         Create a new SparkContext. At least the master and app name should be set,
-        either through the named parameters here or through C{conf}.
+        either through the named parameters here or through `conf`.
 
         :param master: Cluster URL to connect to
                (e.g. mesos://host:port, spark://host:port, local[4]).
@@ -102,7 +102,7 @@ def __init__(self, master=None, appName=None, sparkHome=None, pyFiles=None,
                the batch size based on object sizes, or -1 to use an unlimited
                batch size
         :param serializer: The serializer for RDDs.
-        :param conf: A L{SparkConf} object setting Spark properties.
+        :param conf: A :class:`SparkConf` object setting Spark properties.
         :param gateway: Use an existing gateway and JVM, otherwise a new JVM
                will be instantiated.
         :param jsc: The JavaSparkContext instance (optional).
@@ -576,7 +576,7 @@ def _serialize_to_jvm(self, data, serializer, reader_func, createRDDServer):
 
     def pickleFile(self, name, minPartitions=None):
         """
-        Load an RDD previously saved using L{RDD.saveAsPickleFile} method.
+        Load an RDD previously saved using :meth:`RDD.saveAsPickleFile` method.
 
         >>> tmpFile = NamedTemporaryFile(delete=True)
         >>> tmpFile.close()
@@ -624,20 +624,24 @@ def wholeTextFiles(self, path, minPartitions=None, use_unicode=True):
         as `utf-8`), which is faster and smaller than unicode. (Added in
         Spark 1.2)
 
-        For example, if you have the following files::
+        For example, if you have the following files:
 
-          hdfs://a-hdfs-path/part-00000
-          hdfs://a-hdfs-path/part-00001
-          ...
-          hdfs://a-hdfs-path/part-nnnnn
+        .. code-block:: text
 
-        Do C{rdd = sparkContext.wholeTextFiles("hdfs://a-hdfs-path")},
-        then C{rdd} contains::
+            hdfs://a-hdfs-path/part-00000
+            hdfs://a-hdfs-path/part-00001
+            ...
+            hdfs://a-hdfs-path/part-nnnnn
+
+        Do ``rdd = sparkContext.wholeTextFiles("hdfs://a-hdfs-path")``,
+        then ``rdd`` contains:
 
-          (a-hdfs-path/part-00000, its content)
-          (a-hdfs-path/part-00001, its content)
-          ...
-          (a-hdfs-path/part-nnnnn, its content)
+        .. code-block:: text
+
+            (a-hdfs-path/part-00000, its content)
+            (a-hdfs-path/part-00001, its content)
+            ...
+            (a-hdfs-path/part-nnnnn, its content)
 
         .. note:: Small files are preferred, as each file will be loaded
             fully in memory.
@@ -705,7 +709,7 @@ def sequenceFile(self, path, keyClass=None, valueClass=None, keyConverter=None,
                and value Writable classes
             2. Serialization is attempted via Pyrolite pickling
             3. If this fails, the fallback is to call 'toString' on each key and value
-            4. C{PickleSerializer} is used to deserialize pickled objects on the Python side
+            4. :class:`PickleSerializer` is used to deserialize pickled objects on the Python side
 
         :param path: path to sequncefile
         :param keyClass: fully qualified classname of key Writable class
@@ -872,17 +876,16 @@ def union(self, rdds):
 
     def broadcast(self, value):
         """
-        Broadcast a read-only variable to the cluster, returning a
-        L{Broadcast<pyspark.broadcast.Broadcast>}
+        Broadcast a read-only variable to the cluster, returning a :class:`Broadcast`
         object for reading it in distributed functions. The variable will
         be sent to each cluster only once.
         """
         return Broadcast(self, value, self._pickled_broadcast_vars)
 
     def accumulator(self, value, accum_param=None):
         """
-        Create an L{Accumulator} with the given initial value, using a given
-        L{AccumulatorParam} helper object to define how to add values of the
+        Create an :class:`Accumulator` with the given initial value, using a given
+        :class:`AccumulatorParam` helper object to define how to add values of the
         data type if provided. Default AccumulatorParams are used for integers
         and floating-point numbers if you do not provide one. For other types,
         a custom AccumulatorParam can be used.
@@ -902,12 +905,11 @@ def accumulator(self, value, accum_param=None):
     def addFile(self, path, recursive=False):
         """
         Add a file to be downloaded with this Spark job on every node.
-        The C{path} passed can be either a local file, a file in HDFS
+        The `path` passed can be either a local file, a file in HDFS
         (or other Hadoop-supported filesystems), or an HTTP, HTTPS or
         FTP URI.
 
-        To access the file in Spark jobs, use
-        L{SparkFiles.get(fileName)<pyspark.files.SparkFiles.get>} with the
+        To access the file in Spark jobs, use :meth:`SparkFiles.get` with the
         filename to find its download location.
 
         A directory can be given if the recursive option is set to True.
@@ -932,7 +934,7 @@ def addFile(self, path, recursive=False):
     def addPyFile(self, path):
         """
         Add a .py or .zip dependency for all tasks to be executed on this
-        SparkContext in the future.  The C{path} passed can be either a local
+        SparkContext in the future.  The `path` passed can be either a local
         file, a file in HDFS (or other Hadoop-supported filesystems), or an
         HTTP, HTTPS or FTP URI.
 
@@ -978,7 +980,7 @@ def setJobGroup(self, groupId, description, interruptOnCancel=False):
         Application programmers can use this method to group all those jobs together and give a
         group description. Once set, the Spark web UI will associate such jobs with this group.
 
-        The application can use L{SparkContext.cancelJobGroup} to cancel all
+        The application can use :meth:`SparkContext.cancelJobGroup` to cancel all
         running jobs in this group.
 
         >>> import threading
@@ -1023,7 +1025,7 @@ def setLocalProperty(self, key, value):
     def getLocalProperty(self, key):
         """
         Get a local property set in this thread, or null if it is missing. See
-        L{setLocalProperty}
+        :meth:`setLocalProperty`.
         """
         return self._jsc.getLocalProperty(key)
 
@@ -1041,7 +1043,7 @@ def sparkUser(self):
 
     def cancelJobGroup(self, groupId):
         """
-        Cancel active jobs for the specified group. See L{SparkContext.setJobGroup}
+        Cancel active jobs for the specified group. See :meth:`SparkContext.setJobGroup`.
         for more information.
         """
         self._jsc.sc().cancelJobGroup(groupId)

diff --git a/python/pyspark/files.py b/python/pyspark/files.py
@@ -24,8 +24,7 @@
 class SparkFiles(object):
 
     """
-    Resolves paths to files added through
-    L{SparkContext.addFile()<pyspark.context.SparkContext.addFile>}.
+    Resolves paths to files added through :meth:`SparkContext.addFile`.
 
     SparkFiles contains only classmethods; users should not create SparkFiles
     instances.
@@ -41,7 +40,7 @@ def __init__(self):
     @classmethod
     def get(cls, filename):
         """
-        Get the absolute path of a file added through C{SparkContext.addFile()}.
+        Get the absolute path of a file added through :meth:`SparkContext.addFile`.
         """
         path = os.path.join(SparkFiles.getRootDirectory(), filename)
         return os.path.abspath(path)
@@ -50,7 +49,7 @@ def get(cls, filename):
     def getRootDirectory(cls):
         """
         Get the root directory that contains files added through
-        C{SparkContext.addFile()}.
+        :meth:`SparkContext.addFile`.
         """
         if cls._is_running_on_worker:
             return cls._root_directory

diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
@@ -2560,7 +2560,7 @@ class IndexToString(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable,
     corresponding string values.
     The index-string mapping is either from the ML attributes of the input column,
     or from user-supplied labels (which take precedence over ML attributes).
-    See L{StringIndexer} for converting strings into indices.
+    See :class:`StringIndexer` for converting strings into indices.
 
     .. versionadded:: 1.6.0
     """

diff --git a/python/pyspark/ml/linalg/__init__.py b/python/pyspark/ml/linalg/__init__.py
@@ -17,9 +17,9 @@
 
 """
 MLlib utilities for linear algebra. For dense vectors, MLlib
-uses the NumPy C{array} type, so you can simply pass NumPy arrays
-around. For sparse vectors, users can construct a L{SparseVector}
-object from MLlib or pass SciPy C{scipy.sparse} column vectors if
+uses the NumPy `array` type, so you can simply pass NumPy arrays
+around. For sparse vectors, users can construct a :class:`SparseVector`
+object from MLlib or pass SciPy `scipy.sparse` column vectors if
 SciPy is available in their environment.
 """
 
@@ -758,7 +758,7 @@ class Vectors(object):
     .. note:: Dense vectors are simply represented as NumPy array objects,
         so there is no need to covert them for use in MLlib. For sparse vectors,
         the factory methods in this class create an MLlib-compatible type, or users
-        can pass in SciPy's C{scipy.sparse} column vectors.
+        can pass in SciPy's `scipy.sparse` column vectors.
     """
 
     @staticmethod

diff --git a/python/pyspark/mllib/classification.py b/python/pyspark/mllib/classification.py
@@ -659,11 +659,11 @@ def train(cls, data, lambda_=1.0):
         Train a Naive Bayes model given an RDD of (label, features)
         vectors.
 
-        This is the Multinomial NB (U{http://tinyurl.com/lsdw6p}) which
+        This is the `Multinomial NB <http://tinyurl.com/lsdw6p>`_ which
         can handle all kinds of discrete data.  For example, by
         converting documents into TF-IDF vectors, it can be used for
         document classification. By making every vector a 0-1 vector,
-        it can also be used as Bernoulli NB (U{http://tinyurl.com/p7c96j6}).
+        it can also be used as `Bernoulli NB <http://tinyurl.com/p7c96j6>`_.
         The input feature values must be nonnegative.
 
         :param data:

diff --git a/python/pyspark/mllib/clustering.py b/python/pyspark/mllib/clustering.py
@@ -130,9 +130,9 @@ class BisectingKMeans(object):
     clusters, larger clusters get higher priority.
 
     Based on
-    U{http://glaros.dtc.umn.edu/gkhome/fetch/papers/docclusterKDDTMW00.pdf}
-    Steinbach, Karypis, and Kumar, A comparison of document clustering
-    techniques, KDD Workshop on Text Mining, 2000.
+    `Steinbach, Karypis, and Kumar, A comparison of document clustering
+    techniques, KDD Workshop on Text Mining, 2000
+    <http://glaros.dtc.umn.edu/gkhome/fetch/papers/docclusterKDDTMW00.pdf>`_.
 
     .. versionadded:: 2.0.0
     """

diff --git a/python/pyspark/mllib/linalg/__init__.py b/python/pyspark/mllib/linalg/__init__.py
@@ -17,9 +17,9 @@
 
 """
 MLlib utilities for linear algebra. For dense vectors, MLlib
-uses the NumPy C{array} type, so you can simply pass NumPy arrays
-around. For sparse vectors, users can construct a L{SparseVector}
-object from MLlib or pass SciPy C{scipy.sparse} column vectors if
+uses the NumPy `array` type, so you can simply pass NumPy arrays
+around. For sparse vectors, users can construct a :class:`SparseVector`
+object from MLlib or pass SciPy `scipy.sparse` column vectors if
 SciPy is available in their environment.
 """
 
@@ -847,7 +847,7 @@ class Vectors(object):
     .. note:: Dense vectors are simply represented as NumPy array objects,
         so there is no need to covert them for use in MLlib. For sparse vectors,
         the factory methods in this class create an MLlib-compatible type, or users
-        can pass in SciPy's C{scipy.sparse} column vectors.
+        can pass in SciPy's `scipy.sparse` column vectors.
     """
 
     @staticmethod

diff --git a/python/pyspark/mllib/random.py b/python/pyspark/mllib/random.py
@@ -54,8 +54,7 @@ def uniformRDD(sc, size, numPartitions=None, seed=None):
 
         To transform the distribution in the generated RDD from U(0.0, 1.0)
         to U(a, b), use
-        C{RandomRDDs.uniformRDD(sc, n, p, seed)\
-          .map(lambda v: a + (b - a) * v)}
+        ``RandomRDDs.uniformRDD(sc, n, p, seed).map(lambda v: a + (b - a) * v)``
 
         :param sc: SparkContext used to create the RDD.
         :param size: Size of the RDD.
@@ -85,8 +84,7 @@ def normalRDD(sc, size, numPartitions=None, seed=None):
 
         To transform the distribution in the generated RDD from standard normal
         to some other normal N(mean, sigma^2), use
-        C{RandomRDDs.normal(sc, n, p, seed)\
-          .map(lambda v: mean + sigma * v)}
+        ``RandomRDDs.normal(sc, n, p, seed).map(lambda v: mean + sigma * v)``
 
         :param sc: SparkContext used to create the RDD.
         :param size: Size of the RDD.