From 0575cfae0f81112f6154e500a9c82468a6b27995 Mon Sep 17 00:00:00 2001
From: Alexander Dicke <adicke.math@gmail.com>
Date: Fri, 25 Aug 2023 11:22:33 +0200
Subject: [PATCH 1/5] adds sigmoid like activation functions

---
 .../activation_functions/sigmoid_like.py      | 64 +++++++++++++++++++
 1 file changed, 64 insertions(+)
 create mode 100644 neural_network/activation_functions/sigmoid_like.py

diff --git a/neural_network/activation_functions/sigmoid_like.py b/neural_network/activation_functions/sigmoid_like.py
new file mode 100644
index 000000000000..a73e99b4b2ba
--- /dev/null
+++ b/neural_network/activation_functions/sigmoid_like.py
@@ -0,0 +1,64 @@
+import numpy as np
+
+
+def _base_activation(vector: np.ndarray, alpha: float, beta: float) -> np.ndarray:
+    """
+    Base activation for sigmoid, swish, and SiLU.
+    """
+    return np.power(vector, alpha) / (1 + np.exp(-beta * vector))
+
+
+def sigmoid(vector: np.ndarray) -> np.ndarray:
+    """
+    The standard sigmoid function.
+    Args:
+        vector: (np.ndarray): The input array.
+    Returns:
+        np.ndarray: The result of the sigmoid activation applied to the input array.
+    Examples:
+    >>> result = sigmoid(vector=np.array([0, np.log(2), np.log(5)]))
+    >>> np.linalg.norm(np.array([0.5, 0.66666667, 0.83333333]) - result) < 10**(-5)
+    True
+    """
+    return _base_activation(vector, 0, 1)
+
+
+def swish(vector: np.ndarray, beta: float) -> np.ndarray:
+    """
+    Swish activation: https://arxiv.org/abs/1710.05941v2
+    Args:
+        vector: (np.ndarray): The input array.
+        beta: (float)
+    Returns:
+        np.ndarray: The result of the swish activation applied to the input array.
+    Examples:
+    >>> result = swish(np.array([1, 2, 3]), 0)
+    >>> np.linalg.norm(np.array([0.5, 1., 1.5]) - result) < 10**(-5)
+    True
+    >>> result = swish(np.array([0, 1, 2]), np.log(2))
+    >>> np.linalg.norm(np.array([0, 0.66666667, 1.6]) - result) < 10**(-5)
+    True
+    """
+    return _base_activation(vector, 1, beta)
+
+
+def sigmoid_linear_unit(vector: np.ndarray) -> np.ndarray:
+    """
+    SiLU activation: https://arxiv.org/abs/1606.08415
+    Args:
+        vector: (np.ndarray): The input array.
+
+    Returns:
+        np.ndarray: The result of the sigmoid linear unit applied to the input array.
+    Examples:
+    >>> result = sigmoid_linear_unit(np.array([0, 1, np.log(2)]))
+    >>> np.linalg.norm(np.array([0, 0.7310585, 0.462098]) - result) < 10**(-5)
+    True
+    """
+    return swish(vector, 1)
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod()

From 63943a72d8bbfde2972b18aa85c766d408b50909 Mon Sep 17 00:00:00 2001
From: Alexander Dicke <adicke.math@gmail.com>
Date: Thu, 7 Sep 2023 17:00:32 +0200
Subject: [PATCH 2/5] feedback

---
 neural_network/activation_functions/sigmoid_like.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/neural_network/activation_functions/sigmoid_like.py b/neural_network/activation_functions/sigmoid_like.py
index a73e99b4b2ba..8daa0ae73f6f 100644
--- a/neural_network/activation_functions/sigmoid_like.py
+++ b/neural_network/activation_functions/sigmoid_like.py
@@ -20,7 +20,7 @@ def sigmoid(vector: np.ndarray) -> np.ndarray:
     >>> np.linalg.norm(np.array([0.5, 0.66666667, 0.83333333]) - result) < 10**(-5)
     True
     """
-    return _base_activation(vector, 0, 1)
+    return _base_activation(vector, alpha=0, beta=1)
 
 
 def swish(vector: np.ndarray, beta: float) -> np.ndarray:
@@ -39,7 +39,7 @@ def swish(vector: np.ndarray, beta: float) -> np.ndarray:
     >>> np.linalg.norm(np.array([0, 0.66666667, 1.6]) - result) < 10**(-5)
     True
     """
-    return _base_activation(vector, 1, beta)
+    return _base_activation(vector, alpha=1, beta=beta)
 
 
 def sigmoid_linear_unit(vector: np.ndarray) -> np.ndarray:
@@ -55,7 +55,7 @@ def sigmoid_linear_unit(vector: np.ndarray) -> np.ndarray:
     >>> np.linalg.norm(np.array([0, 0.7310585, 0.462098]) - result) < 10**(-5)
     True
     """
-    return swish(vector, 1)
+    return swish(vector, beta=1)
 
 
 if __name__ == "__main__":

From 6cb23f9be9d7f70a358b7a73dfdc13b2b8413bac Mon Sep 17 00:00:00 2001
From: Alexander Dicke <adicke.math@gmail.com>
Date: Thu, 7 Sep 2023 17:07:56 +0200
Subject: [PATCH 3/5] adds tests for _base_activation

---
 neural_network/activation_functions/sigmoid_like.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/neural_network/activation_functions/sigmoid_like.py b/neural_network/activation_functions/sigmoid_like.py
index 8daa0ae73f6f..0fc1630782df 100644
--- a/neural_network/activation_functions/sigmoid_like.py
+++ b/neural_network/activation_functions/sigmoid_like.py
@@ -4,6 +4,19 @@
 def _base_activation(vector: np.ndarray, alpha: float, beta: float) -> np.ndarray:
     """
     Base activation for sigmoid, swish, and SiLU.
+    Examples:
+    >>> result = _base_activation(np.array([0, np.log(2), np.log(5)]), 0, 1)
+    >>> np.linalg.norm(np.array([0.5, 0.66666667, 0.83333333]) - result) < 10**(-5)
+    True
+    >>> result = _base_activation(np.array([1, 2, 3]), 1, 0)
+    >>> np.linalg.norm(np.array([0.5, 1., 1.5]) - result) < 10**(-5)
+    True
+    >>> result = _base_activation(np.array([0, 1, 2]), 1, np.log(2))
+    >>> np.linalg.norm(np.array([0, 0.66666667, 1.6]) - result) < 10**(-5)
+    True
+    >>> result = _base_activation(np.array([0, 1, np.log(2)]), 1, 1)
+    >>> np.linalg.norm(np.array([0, 0.7310585, 0.462098]) - result) < 10**(-5)
+    True
     """
     return np.power(vector, alpha) / (1 + np.exp(-beta * vector))
 

From 8f37073e9aa000436f1307bf74f83ab10e9c6fc3 Mon Sep 17 00:00:00 2001
From: Alexander Dicke <adicke.math@gmail.com>
Date: Sat, 9 Sep 2023 09:38:33 +0200
Subject: [PATCH 4/5] removes _base_activation

---
 .../activation_functions/sigmoid_like.py      | 49 ++++++-------------
 1 file changed, 14 insertions(+), 35 deletions(-)

diff --git a/neural_network/activation_functions/sigmoid_like.py b/neural_network/activation_functions/sigmoid_like.py
index 0fc1630782df..0b5b9b776c97 100644
--- a/neural_network/activation_functions/sigmoid_like.py
+++ b/neural_network/activation_functions/sigmoid_like.py
@@ -1,26 +1,6 @@
 import numpy as np
 
 
-def _base_activation(vector: np.ndarray, alpha: float, beta: float) -> np.ndarray:
-    """
-    Base activation for sigmoid, swish, and SiLU.
-    Examples:
-    >>> result = _base_activation(np.array([0, np.log(2), np.log(5)]), 0, 1)
-    >>> np.linalg.norm(np.array([0.5, 0.66666667, 0.83333333]) - result) < 10**(-5)
-    True
-    >>> result = _base_activation(np.array([1, 2, 3]), 1, 0)
-    >>> np.linalg.norm(np.array([0.5, 1., 1.5]) - result) < 10**(-5)
-    True
-    >>> result = _base_activation(np.array([0, 1, 2]), 1, np.log(2))
-    >>> np.linalg.norm(np.array([0, 0.66666667, 1.6]) - result) < 10**(-5)
-    True
-    >>> result = _base_activation(np.array([0, 1, np.log(2)]), 1, 1)
-    >>> np.linalg.norm(np.array([0, 0.7310585, 0.462098]) - result) < 10**(-5)
-    True
-    """
-    return np.power(vector, alpha) / (1 + np.exp(-beta * vector))
-
-
 def sigmoid(vector: np.ndarray) -> np.ndarray:
     """
     The standard sigmoid function.
@@ -28,12 +8,12 @@ def sigmoid(vector: np.ndarray) -> np.ndarray:
         vector: (np.ndarray): The input array.
     Returns:
         np.ndarray: The result of the sigmoid activation applied to the input array.
-    Examples:
-    >>> result = sigmoid(vector=np.array([0, np.log(2), np.log(5)]))
-    >>> np.linalg.norm(np.array([0.5, 0.66666667, 0.83333333]) - result) < 10**(-5)
+
+    >>> np.linalg.norm(np.array([0.5, 0.66666667, 0.83333333]) \
+        - sigmoid(vector=np.array([0, np.log(2), np.log(5)]))) < 10**(-5)
     True
     """
-    return _base_activation(vector, alpha=0, beta=1)
+    return 1 / (1 + np.exp(-1 * vector))
 
 
 def swish(vector: np.ndarray, beta: float) -> np.ndarray:
@@ -44,15 +24,15 @@ def swish(vector: np.ndarray, beta: float) -> np.ndarray:
         beta: (float)
     Returns:
         np.ndarray: The result of the swish activation applied to the input array.
-    Examples:
-    >>> result = swish(np.array([1, 2, 3]), 0)
-    >>> np.linalg.norm(np.array([0.5, 1., 1.5]) - result) < 10**(-5)
+
+    >>> np.linalg.norm(np.array([0.5, 1., 1.5]) \
+            - swish(np.array([1, 2, 3]), 0)) < 10**(-5)
     True
-    >>> result = swish(np.array([0, 1, 2]), np.log(2))
-    >>> np.linalg.norm(np.array([0, 0.66666667, 1.6]) - result) < 10**(-5)
+    >>> np.linalg.norm(np.array([0, 0.66666667, 1.6]) \
+            - swish(np.array([0, 1, 2]), np.log(2))) < 10**(-5)
     True
     """
-    return _base_activation(vector, alpha=1, beta=beta)
+    return vector / (1 + np.exp(-beta * vector))
 
 
 def sigmoid_linear_unit(vector: np.ndarray) -> np.ndarray:
@@ -60,15 +40,14 @@ def sigmoid_linear_unit(vector: np.ndarray) -> np.ndarray:
     SiLU activation: https://arxiv.org/abs/1606.08415
     Args:
         vector: (np.ndarray): The input array.
-
     Returns:
         np.ndarray: The result of the sigmoid linear unit applied to the input array.
-    Examples:
-    >>> result = sigmoid_linear_unit(np.array([0, 1, np.log(2)]))
-    >>> np.linalg.norm(np.array([0, 0.7310585, 0.462098]) - result) < 10**(-5)
+
+    >>> np.linalg.norm(np.array([0, 0.7310585, 0.462098]) \
+            - sigmoid_linear_unit(np.array([0, 1, np.log(2)]))) < 10**(-5)
     True
     """
-    return swish(vector, beta=1)
+    return vector / (1 + np.exp(-1 * vector))
 
 
 if __name__ == "__main__":

From 8de62d97785a8993670851e450ec82b3f8c066c1 Mon Sep 17 00:00:00 2001
From: Alexander Dicke <adicke.math@gmail.com>
Date: Sun, 17 Sep 2023 11:06:13 +0200
Subject: [PATCH 5/5] feedback

---
 .../activation_functions/sigmoid_like.py      | 20 +++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/neural_network/activation_functions/sigmoid_like.py b/neural_network/activation_functions/sigmoid_like.py
index 0b5b9b776c97..c18481fbb49a 100644
--- a/neural_network/activation_functions/sigmoid_like.py
+++ b/neural_network/activation_functions/sigmoid_like.py
@@ -9,11 +9,11 @@ def sigmoid(vector: np.ndarray) -> np.ndarray:
     Returns:
         np.ndarray: The result of the sigmoid activation applied to the input array.
 
-    >>> np.linalg.norm(np.array([0.5, 0.66666667, 0.83333333]) \
-        - sigmoid(vector=np.array([0, np.log(2), np.log(5)]))) < 10**(-5)
+    >>> np.linalg.norm(np.array([0.5, 0.66666667, 0.83333333])
+    ... - sigmoid(vector=np.array([0, np.log(2), np.log(5)]))) < 10**(-5)
     True
     """
-    return 1 / (1 + np.exp(-1 * vector))
+    return 1 / (1 + np.exp(-vector))
 
 
 def swish(vector: np.ndarray, beta: float) -> np.ndarray:
@@ -25,11 +25,11 @@ def swish(vector: np.ndarray, beta: float) -> np.ndarray:
     Returns:
         np.ndarray: The result of the swish activation applied to the input array.
 
-    >>> np.linalg.norm(np.array([0.5, 1., 1.5]) \
-            - swish(np.array([1, 2, 3]), 0)) < 10**(-5)
+    >>> np.linalg.norm(np.array([0.5, 1., 1.5])
+    ... - swish(np.array([1, 2, 3]), 0)) < 10**(-5)
     True
-    >>> np.linalg.norm(np.array([0, 0.66666667, 1.6]) \
-            - swish(np.array([0, 1, 2]), np.log(2))) < 10**(-5)
+    >>> np.linalg.norm(np.array([0, 0.66666667, 1.6])
+    ... - swish(np.array([0, 1, 2]), np.log(2))) < 10**(-5)
     True
     """
     return vector / (1 + np.exp(-beta * vector))
@@ -43,11 +43,11 @@ def sigmoid_linear_unit(vector: np.ndarray) -> np.ndarray:
     Returns:
         np.ndarray: The result of the sigmoid linear unit applied to the input array.
 
-    >>> np.linalg.norm(np.array([0, 0.7310585, 0.462098]) \
-            - sigmoid_linear_unit(np.array([0, 1, np.log(2)]))) < 10**(-5)
+    >>> np.linalg.norm(np.array([0, 0.7310585, 0.462098])
+    ... - sigmoid_linear_unit(np.array([0, 1, np.log(2)]))) < 10**(-5)
     True
     """
-    return vector / (1 + np.exp(-1 * vector))
+    return vector / (1 + np.exp(-vector))
 
 
 if __name__ == "__main__":