tensorflow
diff --git a/‎BUILD‎
Lines changed: 1 addition & 0 deletions b/‎BUILD‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎tensorflow_addons/activations/BUILD‎
Lines changed: 29 additions & 0 deletions b/‎tensorflow_addons/activations/BUILD‎
Lines changed: 29 additions & 0 deletions
diff --git a/‎tensorflow_addons/activations/__init__.py‎
Lines changed: 23 additions & 0 deletions b/‎tensorflow_addons/activations/__init__.py‎
Lines changed: 23 additions & 0 deletions
diff --git a/‎tensorflow_addons/activations/python/__init__.py‎ b/‎tensorflow_addons/activations/python/__init__.py‎
diff --git a/‎tensorflow_addons/activations/python/sparsemax.py‎
Lines changed: 141 additions & 0 deletions b/‎tensorflow_addons/activations/python/sparsemax.py‎
Lines changed: 141 additions & 0 deletions
@@ -6,6 +6,7 @@ sh_binary(
         "MANIFEST.in",
         "setup.py",
         "tensorflow_addons/__init__.py",
+        "//tensorflow_addons/activations:activations_py",
         "//tensorflow_addons/custom_ops:custom_ops_py",
         "//tensorflow_addons/layers:layers_py",
         "//tensorflow_addons/losses:losses_py",
 
@@ -0,0 +1,29 @@
+licenses(["notice"])  # Apache 2.0
+
+package(default_visibility = ["//visibility:public"])
+
+py_library(
+    name = "activations_py",
+    srcs = [
+        "__init__.py",
+        "python/__init__.py",
+        "python/sparsemax.py"
+    ],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow_addons/utils:utils_py",
+    ],
+)
+
+py_test(
+    name = "sparsemax_py_test",
+    size = "small",
+    srcs = [
+        "python/sparsemax_test.py",
+    ],
+    main = "python/sparsemax_test.py",
+    srcs_version = "PY2AND3",
+    deps = [
+        ":activations_py",
+    ],
+)
@@ -0,0 +1,23 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""
+A module containing activation routines.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow_addons.activations.python.sparsemax import sparsemax
@@ -0,0 +1,141 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tensorflow as tf
+
+from tensorflow_addons.utils.python import keras_utils
+
+@keras_utils.register_keras_custom_object
+def sparsemax(logits, axis=-1, name=None):
+  """Sparsemax activation function [1].
+
+  For each batch `i` and class `j` we have
+    $$sparsemax[i, j] = max(logits[i, j] - tau(logits[i, :]), 0)$$
+
+  [1]: https://arxiv.org/abs/1602.02068
+
+  Args:
+      logits: Input tensor.
+      axis: Integer, axis along which the sparsemax operation is applied.
+      name: A name for the operation (optional).
+  Returns:
+      Tensor, output of sparsemax transformation. Has the same type and
+      shape as `logits`.
+  Raises:
+      ValueError: In case `dim(logits) == 1`.
+  """
+  logits = tf.convert_to_tensor(logits, name="logits")
+
+  # We need its original shape for shape inference.
+  shape = logits.get_shape()
+  rank = shape.rank
+  is_last_axis = (axis == -1) or (axis == rank - 1)
+
+  if is_last_axis:
+    output = _compute_2d_sparsemax(logits, name=name)
+    output.set_shape(shape)
+    return output
+
+  # If dim is not the last dimension, we have to do a transpose so that we can
+  # still perform softmax on its last dimension.
+
+  # Swap logits' dimension of dim and its last dimension.
+  rank_op = tf.rank(logits)
+  axis_norm = axis % rank
+  logits = _swap_axis(logits, axis_norm, tf.math.subtract(rank_op, 1))
+
+  # Do the actual softmax on its last dimension.
+  output = _compute_2d_sparsemax(logits)
+  output = _swap_axis(output, axis_norm, tf.math.subtract(rank_op, 1),
+                      name=name)
+
+  # Make shape inference work since transpose may erase its static shape.
+  output.set_shape(shape)
+  return output
+
+
+def _swap_axis(logits, dim_index, last_index, **kwargs):
+  return tf.transpose(
+    logits,
+    tf.concat([
+        tf.range(dim_index), [last_index],
+        tf.range(dim_index + 1, last_index), [dim_index]
+    ], 0),
+    **kwargs)
+
+
+@tf.function
+def _compute_2d_sparsemax(logits, name=None):
+  """Performs the sparsemax operation when axis=-1"""
+  shape_op = tf.shape(logits)
+  obs = tf.math.reduce_prod(shape_op[:-1])
+  dims = shape_op[-1]
+
+  # In the paper, they call the logits z.
+  # The mean(logits) can be substracted from logits to make the algorithm
+  # more numerically stable. the instability in this algorithm comes mostly
+  # from the z_cumsum. Substacting the mean will cause z_cumsum to be close
+  # to zero. However, in practise the numerical instability issues are very
+  # minor and substacting the mean causes extra issues with inf and nan
+  # input.
+  # Reshape to [obs, dims] as it is almost free and means the remanining
+  # code doesn't need to worry about the rank.
+  z = tf.reshape(logits, [obs, dims])
+
+  # sort z
+  z_sorted, _ = tf.nn.top_k(z, k=dims)
+
+  # calculate k(z)
+  z_cumsum = tf.math.cumsum(z_sorted, axis=-1)
+  k = tf.range(
+      1, tf.cast(dims, logits.dtype) + 1, dtype=logits.dtype)
+  z_check = 1 + k * z_sorted > z_cumsum
+  # because the z_check vector is always [1,1,...1,0,0,...0] finding the
+  # (index + 1) of the last `1` is the same as just summing the number of 1.
+  k_z = tf.math.reduce_sum(tf.cast(z_check, tf.int32), axis=-1)
+
+  # calculate tau(z)
+  # If there are inf values or all values are -inf, the k_z will be zero,
+  # this is mathematically invalid and will also cause the gather_nd to fail.
+  # Prevent this issue for now by setting k_z = 1 if k_z = 0, this is then
+  # fixed later (see p_safe) by returning p = nan. This results in the same
+  # behavior as softmax.
+  k_z_safe = tf.math.maximum(k_z, 1)
+  indices = tf.stack([
+    tf.range(0, obs),
+    tf.reshape(k_z_safe, [-1]) - 1
+  ], axis=1)
+  tau_sum = tf.gather_nd(z_cumsum, indices)
+  tau_z = (tau_sum - 1) / tf.cast(k_z, logits.dtype)
+
+  # calculate p
+  p = tf.math.maximum(
+      tf.cast(0, logits.dtype), z - tf.expand_dims(tau_z, -1))
+  # If k_z = 0 or if z = nan, then the input is invalid
+  p_safe = tf.where(
+      tf.math.logical_or(
+        tf.math.equal(k_z, 0),
+        tf.math.is_nan(z_cumsum[:, -1])
+      ),
+      tf.fill([obs, dims], tf.cast(float("nan"), logits.dtype)),
+      p)
+
+  # Reshape back to original size
+  p_safe = tf.reshape(p_safe, shape_op, name=name)
+  return p_safe