From 8ca9043b952a3526336f6a1b57b16733ef67f0e5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?G=C3=B6kdeniz=20G=C3=BClmez?=
 <60228478+Goekdeniz-Guelmez@users.noreply.github.com>
Date: Wed, 17 Sep 2025 10:03:05 +0200
Subject: [PATCH 1/3] in. com.

---
 python/mlx/nn/layers/activations.py | 62 ++++++++++++++++++++++++++++-
 1 file changed, 61 insertions(+), 1 deletion(-)

diff --git a/python/mlx/nn/layers/activations.py b/python/mlx/nn/layers/activations.py
index 360bb113d5..051784499c 100644
--- a/python/mlx/nn/layers/activations.py
+++ b/python/mlx/nn/layers/activations.py
@@ -98,6 +98,34 @@ def softplus(x):
     return mx.logaddexp(x, 0)
 
 
+@partial(mx.compile, shapeless=True)
+def xielu(x, alpha_p, alpha_n, beta, eps):
+    r"""Applies the XieLU activation function.
+
+    This function uses parameterized positive and negative scaling with
+    exponential smoothing.
+
+    .. math::
+        \text{XieLU}(x) = \begin{cases}
+        \alpha_p * x^2 + \beta x & \text{if } x > 0 \\
+        (\exp(\min(x, \epsilon)) - 1 - x) * \alpha_n + \beta x & \text{if } x \leq 0
+        \end{cases}
+
+    Args:
+        alpha_p: Positive scaling parameter (softplus applied).
+        alpha_n: Negative scaling parameter (shifted by beta and softplus applied).
+        beta: Linear scaling factor.
+        eps: Clamping value for stability in the negative region.
+    """
+    alpha_p = mx.logaddexp(alpha_p, 0)
+    alpha_n = beta + mx.logaddexp(alpha_n, 0)
+    return mx.where(
+        x > 0,
+        alpha_p * mx.square(x) + beta * x,
+        (mx.expm1(mx.minimum(x, eps)) - x) * alpha_n + beta * x,
+    )
+
+
 @partial(mx.compile, shapeless=True)
 def softsign(x):
     r"""Applies the Softsign function.
@@ -541,6 +569,38 @@ def __call__(self, x: mx.array):
         return prelu(x, self.weight)
 
 
+class XieLU(Module):
+    r"""Applies the XieLU activation function.
+
+    See :func:`xielu` for the functional equivalent.
+
+    Args:
+        alpha_p_init (float): Initial value for the positive scaling parameter. Default: 0.8
+        alpha_n_init (float): Initial value for the negative scaling parameter. Default: 0.8
+        beta (float): Linear scaling factor. Default: 0.5
+        eps (float): Clamping value for stability in the negative region. Default: -1e-6
+    """
+
+    def __init__(
+        self,
+        alpha_p_init=0.8,
+        alpha_n_init=0.8,
+        beta=0.5,
+        eps=-1e-6,
+    ):
+        super().__init__()
+        alpha_p_tensor = mx.array(alpha_p_init)
+        alpha_n_tensor = mx.array(alpha_n_init - beta)
+        self.alpha_p = mx.log(mx.exp(alpha_p_tensor) - 1)
+        self.alpha_n = mx.log(mx.exp(alpha_n_tensor) - 1)
+
+        self.beta = mx.array(beta)
+        self.eps = mx.array(eps)
+
+    def __call__(self, x: mx.array) -> mx.array:
+        return xielu(x, self.alpha_p, self.alpha_n, self.beta, self.eps)
+
+
 class GELU(Module):
     r"""Applies the Gaussian Error Linear Units.
 
@@ -658,4 +718,4 @@ class Softmin(Module):
     r"""Applies the Softmin function.
 
     See :func:`softmin` for the functional equivalent.
-    """
+    """
\ No newline at end of file

From 658aa35eb91bd87fb20e3f5add960e2ccc3a2a8f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?G=C3=B6kdeniz=20G=C3=BClmez?=
 <60228478+Goekdeniz-Guelmez@users.noreply.github.com>
Date: Wed, 17 Sep 2025 10:05:00 +0200
Subject: [PATCH 2/3] update ackn. + docs + init

---
 ACKNOWLEDGMENTS.md               | 2 +-
 docs/src/python/nn/layers.rst    | 1 +
 python/mlx/nn/layers/__init__.py | 1 +
 3 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/ACKNOWLEDGMENTS.md b/ACKNOWLEDGMENTS.md
index 186908f09c..bc14077f7f 100644
--- a/ACKNOWLEDGMENTS.md
+++ b/ACKNOWLEDGMENTS.md
@@ -19,7 +19,7 @@ MLX was developed with contributions from the following individuals:
 - Gleb Pobudzey: Added the `where` primitive, and groups in 1D and 2D convolutions.
 - Paul Paczuski: Improved stability of BCE loss calculation
 - Max-Heinrich Laves: Added `conv_transpose1d`, `conv_transpose2d`, and `conv_transpose3d` ops.
-- Gökdeniz Gülmez: Added the `Muon (MomentUm Orthogonalized by Newton-schulz)` optimizer, and the `ReLU²` activation function.
+- Gökdeniz Gülmez: Added the `Muon (MomentUm Orthogonalized by Newton-schulz)` optimizer, the `XieLU`, and the `ReLU²` activation functions.
 
 <a href="https://github.com/ml-explore/mlx/graphs/contributors">
   <img class="dark-light" src="https://contrib.rocks/image?repo=ml-explore/mlx&anon=0&columns=20&max=100&r=true" />
diff --git a/docs/src/python/nn/layers.rst b/docs/src/python/nn/layers.rst
index 1469481477..b7d065fdf4 100644
--- a/docs/src/python/nn/layers.rst
+++ b/docs/src/python/nn/layers.rst
@@ -58,6 +58,7 @@ Layers
    Sequential
    Sigmoid
    SiLU
+   XieLU
    SinusoidalPositionalEncoding
    Softmin
    Softshrink
diff --git a/python/mlx/nn/layers/__init__.py b/python/mlx/nn/layers/__init__.py
index ea2d3029d8..e01e155eec 100644
--- a/python/mlx/nn/layers/__init__.py
+++ b/python/mlx/nn/layers/__init__.py
@@ -26,6 +26,7 @@
     Softsign,
     Step,
     Tanh,
+    XieLU,
     celu,
     elu,
     gelu,

From be4c1ac9d52cfa02d106b42fbd87ccacb75e7054 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?G=C3=B6kdeniz=20G=C3=BClmez?=
 <60228478+Goekdeniz-Guelmez@users.noreply.github.com>
Date: Wed, 17 Sep 2025 10:05:14 +0200
Subject: [PATCH 3/3] format

---
 python/mlx/nn/layers/activations.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/mlx/nn/layers/activations.py b/python/mlx/nn/layers/activations.py
index 051784499c..903bf7b081 100644
--- a/python/mlx/nn/layers/activations.py
+++ b/python/mlx/nn/layers/activations.py
@@ -718,4 +718,4 @@ class Softmin(Module):
     r"""Applies the Softmin function.
 
     See :func:`softmin` for the functional equivalent.
-    """
\ No newline at end of file
+    """