tensorflow
diff --git a/‎BUILD‎
Lines changed: 1 addition & 0 deletions b/‎BUILD‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎README.md‎
Lines changed: 2 additions & 0 deletions b/‎README.md‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎tensorflow_addons/activations/BUILD‎
Lines changed: 29 additions & 0 deletions b/‎tensorflow_addons/activations/BUILD‎
Lines changed: 29 additions & 0 deletions
diff --git a/‎tensorflow_addons/activations/README.md‎
Lines changed: 27 additions & 0 deletions b/‎tensorflow_addons/activations/README.md‎
Lines changed: 27 additions & 0 deletions
diff --git a/‎tensorflow_addons/activations/__init__.py‎
Lines changed: 21 additions & 0 deletions b/‎tensorflow_addons/activations/__init__.py‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎tensorflow_addons/activations/python/__init__.py‎ b/‎tensorflow_addons/activations/python/__init__.py‎
diff --git a/‎tensorflow_addons/activations/python/sparsemax.py‎
Lines changed: 136 additions & 0 deletions b/‎tensorflow_addons/activations/python/sparsemax.py‎
Lines changed: 136 additions & 0 deletions
@@ -6,6 +6,7 @@ sh_binary(
         "MANIFEST.in",
         "setup.py",
         "//tensorflow_addons",
+        "//tensorflow_addons/activations:activations_py",
         "//tensorflow_addons/custom_ops:custom_ops_py",
         "//tensorflow_addons/layers:layers_py",
         "//tensorflow_addons/losses:losses_py",
 
@@ -12,11 +12,13 @@ developments that cannot be integrated into core TensorFlow
 ## Contents
 | Sub-Package    | Addon  | Reference                                  |
 |:----------------------- |:----------- |:---------------------------- |
+| tfa.activations | Sparsemax | https://arxiv.org/abs/1602.02068    |
 | tfa.image | transform |                                           |
 | tfa.layers | Maxout | https://arxiv.org/abs/1302.4389             |
 | tfa.layers | PoinareNormalize | https://arxiv.org/abs/1705.08039  |
 | tfa.layers | WeightNormalization | https://arxiv.org/abs/1602.07868 |
 | tfa.losses | LiftedStructLoss | https://arxiv.org/abs/1511.06452       |
+| tfa.losses | SparsemaxLoss | https://arxiv.org/abs/1602.02068 | 
 | tfa.losses | TripletSemiHardLoss | https://arxiv.org/abs/1503.03832       |
 | tfa.optimizers | LazyAdamOptimizer | https://arxiv.org/abs/1412.6980 |
 | tfa.text | skip_gram_sample | https://arxiv.org/abs/1301.3781 |
 
@@ -0,0 +1,29 @@
+licenses(["notice"])  # Apache 2.0
+
+package(default_visibility = ["//visibility:public"])
+
+py_library(
+    name = "activations_py",
+    srcs = [
+        "__init__.py",
+        "python/__init__.py",
+        "python/sparsemax.py"
+    ],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow_addons/utils:utils_py",
+    ],
+)
+
+py_test(
+    name = "sparsemax_py_test",
+    size = "small",
+    srcs = [
+        "python/sparsemax_test.py",
+    ],
+    main = "python/sparsemax_test.py",
+    srcs_version = "PY2AND3",
+    deps = [
+        ":activations_py",
+    ],
+)
@@ -0,0 +1,27 @@
+# Addons - Layers
+
+## Contents
+| Layer  | Reference                                     |
+|:----------------------- |:-----------------------------|
+| Sparsemax | https://arxiv.org/abs/1602.02068           |
+
+
+## Contribution Guidelines
+#### Standard API
+In order to conform with the current API standard, all activations
+must:
+ * Be a `tf.function`.
+ * Have the signature `fn(input, axis=-1, name=None)`.
+ * [Register as a keras global object](https://github.com/tensorflow/addons/blob/master/tensorflow_addons/utils/python/keras_utils.py)
+  so it can be serialized properly.
+ * Add the addon to the `py_library` in this sub-package's BUILD file.
+
+#### Testing Requirements
+ * Simple unittests that demonstrate the layer is behaving as expected.
+ * When applicable, run all unittests with TensorFlow's
+  `@run_all_in_graph_and_eager_modes` decorator.
+ * Add a `py_test` to this sub-package's BUILD file.
+
+#### Documentation Requirements
+ * Update the table of contents in the project's central README.
+ * Update the table of contents in this sub-package's README.
@@ -0,0 +1,21 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""A module containing activation routines."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow_addons.activations.python.sparsemax import sparsemax
@@ -0,0 +1,136 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tensorflow as tf
+
+from tensorflow_addons.utils.python import keras_utils
+
+
+@tf.function
+@keras_utils.register_keras_custom_object
+def sparsemax(logits, axis=-1, name=None):
+    """Sparsemax activation function [1].
+
+    For each batch `i` and class `j` we have
+      $$sparsemax[i, j] = max(logits[i, j] - tau(logits[i, :]), 0)$$
+
+    [1]: https://arxiv.org/abs/1602.02068
+
+    Args:
+        logits: Input tensor.
+        axis: Integer, axis along which the sparsemax operation is applied.
+        name: A name for the operation (optional).
+    Returns:
+        Tensor, output of sparsemax transformation. Has the same type and
+        shape as `logits`.
+    Raises:
+        ValueError: In case `dim(logits) == 1`.
+    """
+    logits = tf.convert_to_tensor(logits, name="logits")
+
+    # We need its original shape for shape inference.
+    shape = logits.get_shape()
+    rank = shape.rank
+    is_last_axis = (axis == -1) or (axis == rank - 1)
+
+    if is_last_axis:
+        output = _compute_2d_sparsemax(logits, name=name)
+        output.set_shape(shape)
+        return output
+
+    # If dim is not the last dimension, we have to do a transpose so that we can
+    # still perform softmax on its last dimension.
+
+    # Swap logits' dimension of dim and its last dimension.
+    rank_op = tf.rank(logits)
+    axis_norm = axis % rank
+    logits = _swap_axis(logits, axis_norm, tf.math.subtract(rank_op, 1))
+
+    # Do the actual softmax on its last dimension.
+    output = _compute_2d_sparsemax(logits)
+    output = _swap_axis(
+        output, axis_norm, tf.math.subtract(rank_op, 1), name=name)
+
+    # Make shape inference work since transpose may erase its static shape.
+    output.set_shape(shape)
+    return output
+
+
+def _swap_axis(logits, dim_index, last_index, **kwargs):
+    return tf.transpose(
+        logits,
+        tf.concat([
+            tf.range(dim_index), [last_index],
+            tf.range(dim_index + 1, last_index), [dim_index]
+        ], 0), **kwargs)
+
+
+@tf.function
+def _compute_2d_sparsemax(logits, name=None):
+    """Performs the sparsemax operation when axis=-1."""
+    shape_op = tf.shape(logits)
+    obs = tf.math.reduce_prod(shape_op[:-1])
+    dims = shape_op[-1]
+
+    # In the paper, they call the logits z.
+    # The mean(logits) can be substracted from logits to make the algorithm
+    # more numerically stable. the instability in this algorithm comes mostly
+    # from the z_cumsum. Substacting the mean will cause z_cumsum to be close
+    # to zero. However, in practise the numerical instability issues are very
+    # minor and substacting the mean causes extra issues with inf and nan
+    # input.
+    # Reshape to [obs, dims] as it is almost free and means the remanining
+    # code doesn't need to worry about the rank.
+    z = tf.reshape(logits, [obs, dims])
+
+    # sort z
+    z_sorted, _ = tf.nn.top_k(z, k=dims)
+
+    # calculate k(z)
+    z_cumsum = tf.math.cumsum(z_sorted, axis=-1)
+    k = tf.range(1, tf.cast(dims, logits.dtype) + 1, dtype=logits.dtype)
+    z_check = 1 + k * z_sorted > z_cumsum
+    # because the z_check vector is always [1,1,...1,0,0,...0] finding the
+    # (index + 1) of the last `1` is the same as just summing the number of 1.
+    k_z = tf.math.reduce_sum(tf.cast(z_check, tf.int32), axis=-1)
+
+    # calculate tau(z)
+    # If there are inf values or all values are -inf, the k_z will be zero,
+    # this is mathematically invalid and will also cause the gather_nd to fail.
+    # Prevent this issue for now by setting k_z = 1 if k_z = 0, this is then
+    # fixed later (see p_safe) by returning p = nan. This results in the same
+    # behavior as softmax.
+    k_z_safe = tf.math.maximum(k_z, 1)
+    indices = tf.stack(
+        [tf.range(0, obs), tf.reshape(k_z_safe, [-1]) - 1], axis=1)
+    tau_sum = tf.gather_nd(z_cumsum, indices)
+    tau_z = (tau_sum - 1) / tf.cast(k_z, logits.dtype)
+
+    # calculate p
+    p = tf.math.maximum(
+        tf.cast(0, logits.dtype), z - tf.expand_dims(tau_z, -1))
+    # If k_z = 0 or if z = nan, then the input is invalid
+    p_safe = tf.where(
+        tf.math.logical_or(
+            tf.math.equal(k_z, 0), tf.math.is_nan(z_cumsum[:, -1])),
+        tf.fill([obs, dims], tf.cast(float("nan"), logits.dtype)), p)
+
+    # Reshape back to original size
+    p_safe = tf.reshape(p_safe, shape_op, name=name)
+    return p_safe