Beautifier losses doc (#2062)

WindQAQ · web-flow · commit e62add5e6999 · 2020-08-17T10:58:46.000-04:00
* Beautifier losses doc
diff --git a/tensorflow_addons/losses/contrastive.py b/tensorflow_addons/losses/contrastive.py
@@ -33,7 +33,7 @@ def contrastive_loss(
     by the margin constant for the samples of different labels.
 
     The euclidean distances `y_pred` between two embedding matrices
-    `a` and `b` with shape [batch_size, hidden_size] can be computed
+    `a` and `b` with shape `[batch_size, hidden_size]` can be computed
     as follows:
 
     ```python
@@ -44,14 +44,14 @@ def contrastive_loss(
     See: http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf
 
     Args:
-      y_true: 1-D integer `Tensor` with shape [batch_size] of
+      y_true: 1-D integer `Tensor` with shape `[batch_size]` of
         binary labels indicating positive vs negative pair.
-      y_pred: 1-D float `Tensor` with shape [batch_size] of
+      y_pred: 1-D float `Tensor` with shape `[batch_size]` of
         distances between two embedding matrices.
       margin: margin term in the loss definition.
 
     Returns:
-      contrastive_loss: 1-D float `Tensor` with shape [batch_size].
+      contrastive_loss: 1-D float `Tensor` with shape `[batch_size]`.
     """
     y_pred = tf.convert_to_tensor(y_pred)
     y_true = tf.dtypes.cast(y_true, y_pred.dtype)
@@ -71,12 +71,12 @@ class ContrastiveLoss(LossFunctionWrapper):
     See: http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf
 
     We expect labels `y_true` to be provided as 1-D integer `Tensor`
-    with shape [batch_size] of binary integer labels. And `y_pred` must be
-    1-D float `Tensor` with shape [batch_size] of distances between two
+    with shape `[batch_size]` of binary integer labels. And `y_pred` must be
+    1-D float `Tensor` with shape `[batch_size]` of distances between two
     embedding matrices.
 
     The euclidean distances `y_pred` between two embedding matrices
-    `a` and `b` with shape [batch_size, hidden_size] can be computed
+    `a` and `b` with shape `[batch_size, hidden_size]` can be computed
     as follows:
 
     ```python
diff --git a/tensorflow_addons/losses/focal_loss.py b/tensorflow_addons/losses/focal_loss.py
@@ -46,24 +46,25 @@ class SigmoidFocalCrossEntropy(LossFunctionWrapper):
                                             1.9097870e-04,
                                             2.0559824e-05]
     ```
-    Usage with tf.keras API:
+
+    Usage with `tf.keras` API:
 
     ```python
     model = tf.keras.Model(inputs, outputs)
     model.compile('sgd', loss=tfa.losses.SigmoidFocalCrossEntropy())
     ```
 
-    Args
-      alpha: balancing factor, default value is 0.25
-      gamma: modulating factor, default value is 2.0
+    Args:
+      alpha: balancing factor, default value is 0.25.
+      gamma: modulating factor, default value is 2.0.
 
     Returns:
       Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same
           shape as `y_true`; otherwise, it is scalar.
 
     Raises:
         ValueError: If the shape of `sample_weight` is invalid or value of
-          `gamma` is less than zero
+          `gamma` is less than zero.
     """
 
     @typechecked
@@ -105,7 +106,7 @@ def sigmoid_focal_crossentropy(
     best use-cases of focal loss is its usage in object detection where the
     imbalance between the background class and other classes is extremely high.
 
-    Args
+    Args:
         y_true: true targets tensor.
         y_pred: predictions tensor.
         alpha: balancing factor.
diff --git a/tensorflow_addons/losses/giou_loss.py b/tensorflow_addons/losses/giou_loss.py
@@ -40,7 +40,8 @@ class GIoULoss(LossFunctionWrapper):
     loss = gl(boxes1, boxes2)
     print('Loss: ', loss.numpy())  # Loss: [1.07500000298023224, 1.9333333373069763]
     ```
-    Usage with tf.keras API:
+
+    Usage with `tf.keras` API:
 
     ```python
     model = tf.keras.Model(inputs, outputs)
diff --git a/tensorflow_addons/losses/kappa_loss.py b/tensorflow_addons/losses/kappa_loss.py
@@ -25,15 +25,15 @@
 
 @tf.keras.utils.register_keras_serializable(package="Addons")
 class WeightedKappaLoss(tf.keras.losses.Loss):
-    """Implements the Weighted Kappa loss function.
+    r"""Implements the Weighted Kappa loss function.
 
     Weighted Kappa loss was introduced in the
     [Weighted kappa loss function for multi-class classification
     of ordinal data in deep learning]
     (https://www.sciencedirect.com/science/article/abs/pii/S0167865517301666).
     Weighted Kappa is widely used in Ordinal Classification Problems.
-    The loss value lies in [-inf, log 2], where log 2
-     means the random prediction.
+    The loss value lies in $ [-\infty, \log 2] $, where $ \log 2 $
+    means the random prediction.
 
     Usage:
 
@@ -67,18 +67,17 @@ def __init__(
         dtype: Optional[tf.DType] = tf.float32,
         reduction: str = tf.keras.losses.Reduction.NONE,
     ):
-        """Creates a `WeightedKappa` instance.
+        r"""Creates a `WeightedKappaLoss` instance.
 
         Args:
           num_classes: Number of unique classes in your dataset.
           weightage: (Optional) Weighting to be considered for calculating
             kappa statistics. A valid value is one of
-            ['linear', 'quadratic']. Defaults to `quadratic` since it's
-            mostly used.
+            ['linear', 'quadratic']. Defaults to 'quadratic'.
           name: (Optional) String name of the metric instance.
           epsilon: (Optional) increment to avoid log zero,
-            so the loss will be log(1 - k + epsilon), where k belongs to
-            [-1, 1], usually you can use the default value which is 1e-6.
+            so the loss will be $ \log(1 - k + \epsilon) $, where $ k $ lies
+            in $ [-1, 1] $. Defaults to 1e-6.
           dtype: (Optional) Data type of the metric result.
             Defaults to `tf.float32`.
         Raises:
diff --git a/tensorflow_addons/losses/lifted.py b/tensorflow_addons/losses/lifted.py
@@ -31,7 +31,7 @@ def lifted_struct_loss(
     """Computes the lifted structured loss.
 
     Args:
-      labels: 1-D tf.int32 `Tensor` with shape [batch_size] of
+      labels: 1-D tf.int32 `Tensor` with shape `[batch_size]` of
         multiclass integer labels.
       embeddings: 2-D float `Tensor` of embedding vectors. Embeddings should
         not be l2 normalized.
diff --git a/tensorflow_addons/losses/metric_learning.py b/tensorflow_addons/losses/metric_learning.py
@@ -25,11 +25,11 @@ def pairwise_distance(feature: TensorLike, squared: bool = False):
     output[i, j] = || feature[i, :] - feature[j, :] ||_2
 
     Args:
-      feature: 2-D Tensor of size [number of data, feature dimension].
+      feature: 2-D Tensor of size `[number of data, feature dimension]`.
       squared: Boolean, whether or not to square the pairwise distances.
 
     Returns:
-      pairwise_distances: 2-D Tensor of size [number of data, number of data].
+      pairwise_distances: 2-D Tensor of size `[number of data, number of data]`.
     """
     pairwise_distances_squared = tf.math.add(
         tf.math.reduce_sum(tf.math.square(feature), axis=[1], keepdims=True),
@@ -74,10 +74,10 @@ def angular_distance(feature: TensorLike):
     output[i, j] = 1 - cosine_similarity(feature[i, :], feature[j, :])
 
     Args:
-      feature: 2-D Tensor of size [number of data, feature dimension].
+      feature: 2-D Tensor of size `[number of data, feature dimension]`.
 
     Returns:
-      angular_distances: 2-D Tensor of size [number of data, number of data].
+      angular_distances: 2-D Tensor of size `[number of data, number of data]`.
     """
     # normalize input
     feature = tf.math.l2_normalize(feature, axis=1)
diff --git a/tensorflow_addons/losses/quantiles.py b/tensorflow_addons/losses/quantiles.py
@@ -94,7 +94,7 @@ class PinballLoss(LossFunctionWrapper):
     print('Loss: ', loss.numpy())  # Loss: 0.475
     ```
 
-    Usage with the `compile` API:
+    Usage with the `tf.keras` API:
 
     ```python
     model = tf.keras.Model(inputs, outputs)
diff --git a/tensorflow_addons/losses/sparsemax_loss.py b/tensorflow_addons/losses/sparsemax_loss.py
@@ -33,7 +33,7 @@ def sparsemax_loss(
     Computes the generalized multi-label classification loss for the sparsemax
     function. The implementation is a reformulation of the original loss
     function such that it uses the sparsemax properbility output instead of the
-    internal \tau variable. However, the output is identical to the original
+    internal $ \tau $ variable. However, the output is identical to the original
     loss function.
 
     [1]: https://arxiv.org/abs/1602.02068
diff --git a/tensorflow_addons/losses/triplet.py b/tensorflow_addons/losses/triplet.py
@@ -26,8 +26,8 @@ def _masked_maximum(data, mask, dim=1):
     """Computes the axis wise maximum over chosen elements.
 
     Args:
-      data: 2-D float `Tensor` of size [n, m].
-      mask: 2-D Boolean `Tensor` of size [n, m].
+      data: 2-D float `Tensor` of shape `[n, m]`.
+      mask: 2-D Boolean `Tensor` of shape `[n, m]`.
       dim: The dimension over which to compute the maximum.
 
     Returns:
@@ -48,8 +48,8 @@ def _masked_minimum(data, mask, dim=1):
     """Computes the axis wise minimum over chosen elements.
 
     Args:
-      data: 2-D float `Tensor` of size [n, m].
-      mask: 2-D Boolean `Tensor` of size [n, m].
+      data: 2-D float `Tensor` of shape `[n, m]`.
+      mask: 2-D Boolean `Tensor` of shape `[n, m]`.
       dim: The dimension over which to compute the minimum.
 
     Returns:
@@ -74,33 +74,36 @@ def triplet_semihard_loss(
     margin: FloatTensorLike = 1.0,
     distance_metric: Union[str, Callable] = "L2",
 ) -> tf.Tensor:
-    """Computes the triplet loss with semi-hard negative mining.
+    r"""Computes the triplet loss with semi-hard negative mining.
+
+    Usage:
+
+    >>> y_true = tf.convert_to_tensor([0, 0])
+    >>> y_pred = tf.convert_to_tensor([[0.0, 1.0], [1.0, 0.0]])
+    >>> tfa.losses.triplet_semihard_loss(y_true, y_pred, distance_metric="L2")
+    <tf.Tensor: shape=(), dtype=float32, numpy=2.4142137>
+
+    >>> # Calling with callable `distance_metric`
+    >>> distance_metric = lambda x: tf.linalg.matmul(x, x, transpose_b=True)
+    >>> tfa.losses.triplet_semihard_loss(y_true, y_pred, distance_metric=distance_metric)
+    <tf.Tensor: shape=(), dtype=float32, numpy=1.0>
 
     Args:
-      y_true: 1-D integer `Tensor` with shape [batch_size] of
+      y_true: 1-D integer `Tensor` with shape `[batch_size]` of
         multiclass integer labels.
       y_pred: 2-D float `Tensor` of embedding vectors. Embeddings should
         be l2 normalized.
       margin: Float, margin term in the loss definition.
-      distance_metric: str or function, determines distance metric:
-                       "L2" for l2-norm distance
-                       "squared-L2" for squared l2-norm distance
-                       "angular" for cosine similarity
-                        A custom function returning a 2d adjacency
-                          matrix of a chosen distance metric can
-                          also be passed here. e.g.
-
-                          def custom_distance(batch):
-                              batch = 1 - batch @ batch.T
-                              return batch
-
-                          triplet_semihard_loss(batch, labels,
-                                        distance_metric=custom_distance
-                                    )
+      distance_metric: `str` or a `Callable` that determines distance metric.
+        Valid strings are "L2" for l2-norm distance,
+        "squared-L2" for squared l2-norm distance,
+        and "angular" for cosine similarity.
 
+        A `Callable` should take a batch of embeddings as input and
+        return the pairwise distance matrix.
 
     Returns:
-      triplet_loss: float scalar with dtype of y_pred.
+      triplet_loss: float scalar with dtype of `y_pred`.
     """
 
     labels, embeddings = y_true, y_pred
@@ -207,33 +210,37 @@ def triplet_hard_loss(
     soft: bool = False,
     distance_metric: Union[str, Callable] = "L2",
 ) -> tf.Tensor:
-    """Computes the triplet loss with hard negative and hard positive mining.
+    r"""Computes the triplet loss with hard negative and hard positive mining.
+
+    Usage:
+
+    >>> y_true = tf.convert_to_tensor([0, 0])
+    >>> y_pred = tf.convert_to_tensor([[0.0, 1.0], [1.0, 0.0]])
+    >>> tfa.losses.triplet_hard_loss(y_true, y_pred, distance_metric="L2")
+    <tf.Tensor: shape=(), dtype=float32, numpy=1.0>
+
+    >>> # Calling with callable `distance_metric`
+    >>> distance_metric = lambda x: tf.linalg.matmul(x, x, transpose_b=True)
+    >>> tfa.losses.triplet_hard_loss(y_true, y_pred, distance_metric=distance_metric)
+    <tf.Tensor: shape=(), dtype=float32, numpy=0.0>
 
     Args:
-      y_true: 1-D integer `Tensor` with shape [batch_size] of
+      y_true: 1-D integer `Tensor` with shape `[batch_size]` of
         multiclass integer labels.
       y_pred: 2-D float `Tensor` of embedding vectors. Embeddings should
         be l2 normalized.
       margin: Float, margin term in the loss definition.
       soft: Boolean, if set, use the soft margin version.
-      distance_metric: str or function, determines distance metric:
-                       "L2" for l2-norm distance
-                       "squared-L2" for squared l2-norm distance
-                       "angular" for cosine similarity
-                        A custom function returning a 2d adjacency
-                          matrix of a chosen distance metric can
-                          also be passed here. e.g.
-
-                          def custom_distance(batch):
-                              batch = 1 - batch @ batch.T
-                              return batch
-
-                          triplet_semihard_loss(batch, labels,
-                                        distance_metric=custom_distance
-                                    )
+      distance_metric: `str` or a `Callable` that determines distance metric.
+        Valid strings are "L2" for l2-norm distance,
+        "squared-L2" for squared l2-norm distance,
+        and "angular" for cosine similarity.
+
+        A `Callable` should take a batch of embeddings as input and
+        return the pairwise distance matrix.
 
     Returns:
-      triplet_loss: float scalar with dtype of y_pred.
+      triplet_loss: float scalar with dtype of `y_pred`.
     """
     labels, embeddings = y_true, y_pred
 
@@ -311,7 +318,7 @@ class TripletSemiHardLoss(LossFunctionWrapper):
     See: https://arxiv.org/abs/1503.03832.
 
     We expect labels `y_true` to be provided as 1-D integer `Tensor` with shape
-    [batch_size] of multi-class integer labels. And embeddings `y_pred` must be
+    `[batch_size]` of multi-class integer labels. And embeddings `y_pred` must be
     2-D float `Tensor` of l2 normalized embedding vectors.
 
     Args:
@@ -348,7 +355,7 @@ class TripletHardLoss(LossFunctionWrapper):
     See: https://arxiv.org/pdf/1703.07737.
 
     We expect labels `y_true` to be provided as 1-D integer `Tensor` with shape
-    [batch_size] of multi-class integer labels. And embeddings `y_pred` must be
+    `[batch_size]` of multi-class integer labels. And embeddings `y_pred` must be
     2-D float `Tensor` of l2 normalized embedding vectors.
 
     Args: