From 8ca9043b952a3526336f6a1b57b16733ef67f0e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=B6kdeniz=20G=C3=BClmez?= <60228478+Goekdeniz-Guelmez@users.noreply.github.com> Date: Wed, 17 Sep 2025 10:03:05 +0200 Subject: [PATCH 1/3] in. com. --- python/mlx/nn/layers/activations.py | 62 ++++++++++++++++++++++++++++- 1 file changed, 61 insertions(+), 1 deletion(-) diff --git a/python/mlx/nn/layers/activations.py b/python/mlx/nn/layers/activations.py index 360bb113d5..051784499c 100644 --- a/python/mlx/nn/layers/activations.py +++ b/python/mlx/nn/layers/activations.py @@ -98,6 +98,34 @@ def softplus(x): return mx.logaddexp(x, 0) +@partial(mx.compile, shapeless=True) +def xielu(x, alpha_p, alpha_n, beta, eps): + r"""Applies the XieLU activation function. + + This function uses parameterized positive and negative scaling with + exponential smoothing. + + .. math:: + \text{XieLU}(x) = \begin{cases} + \alpha_p * x^2 + \beta x & \text{if } x > 0 \\ + (\exp(\min(x, \epsilon)) - 1 - x) * \alpha_n + \beta x & \text{if } x \leq 0 + \end{cases} + + Args: + alpha_p: Positive scaling parameter (softplus applied). + alpha_n: Negative scaling parameter (shifted by beta and softplus applied). + beta: Linear scaling factor. + eps: Clamping value for stability in the negative region. + """ + alpha_p = mx.logaddexp(alpha_p, 0) + alpha_n = beta + mx.logaddexp(alpha_n, 0) + return mx.where( + x > 0, + alpha_p * mx.square(x) + beta * x, + (mx.expm1(mx.minimum(x, eps)) - x) * alpha_n + beta * x, + ) + + @partial(mx.compile, shapeless=True) def softsign(x): r"""Applies the Softsign function. @@ -541,6 +569,38 @@ def __call__(self, x: mx.array): return prelu(x, self.weight) +class XieLU(Module): + r"""Applies the XieLU activation function. + + See :func:`xielu` for the functional equivalent. + + Args: + alpha_p_init (float): Initial value for the positive scaling parameter. Default: 0.8 + alpha_n_init (float): Initial value for the negative scaling parameter. Default: 0.8 + beta (float): Linear scaling factor. Default: 0.5 + eps (float): Clamping value for stability in the negative region. Default: -1e-6 + """ + + def __init__( + self, + alpha_p_init=0.8, + alpha_n_init=0.8, + beta=0.5, + eps=-1e-6, + ): + super().__init__() + alpha_p_tensor = mx.array(alpha_p_init) + alpha_n_tensor = mx.array(alpha_n_init - beta) + self.alpha_p = mx.log(mx.exp(alpha_p_tensor) - 1) + self.alpha_n = mx.log(mx.exp(alpha_n_tensor) - 1) + + self.beta = mx.array(beta) + self.eps = mx.array(eps) + + def __call__(self, x: mx.array) -> mx.array: + return xielu(x, self.alpha_p, self.alpha_n, self.beta, self.eps) + + class GELU(Module): r"""Applies the Gaussian Error Linear Units. @@ -658,4 +718,4 @@ class Softmin(Module): r"""Applies the Softmin function. See :func:`softmin` for the functional equivalent. - """ + """ \ No newline at end of file From 658aa35eb91bd87fb20e3f5add960e2ccc3a2a8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=B6kdeniz=20G=C3=BClmez?= <60228478+Goekdeniz-Guelmez@users.noreply.github.com> Date: Wed, 17 Sep 2025 10:05:00 +0200 Subject: [PATCH 2/3] update ackn. + docs + init --- ACKNOWLEDGMENTS.md | 2 +- docs/src/python/nn/layers.rst | 1 + python/mlx/nn/layers/__init__.py | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/ACKNOWLEDGMENTS.md b/ACKNOWLEDGMENTS.md index 186908f09c..bc14077f7f 100644 --- a/ACKNOWLEDGMENTS.md +++ b/ACKNOWLEDGMENTS.md @@ -19,7 +19,7 @@ MLX was developed with contributions from the following individuals: - Gleb Pobudzey: Added the `where` primitive, and groups in 1D and 2D convolutions. - Paul Paczuski: Improved stability of BCE loss calculation - Max-Heinrich Laves: Added `conv_transpose1d`, `conv_transpose2d`, and `conv_transpose3d` ops. -- Gökdeniz Gülmez: Added the `Muon (MomentUm Orthogonalized by Newton-schulz)` optimizer, and the `ReLU²` activation function. +- Gökdeniz Gülmez: Added the `Muon (MomentUm Orthogonalized by Newton-schulz)` optimizer, the `XieLU`, and the `ReLU²` activation functions. diff --git a/docs/src/python/nn/layers.rst b/docs/src/python/nn/layers.rst index 1469481477..b7d065fdf4 100644 --- a/docs/src/python/nn/layers.rst +++ b/docs/src/python/nn/layers.rst @@ -58,6 +58,7 @@ Layers Sequential Sigmoid SiLU + XieLU SinusoidalPositionalEncoding Softmin Softshrink diff --git a/python/mlx/nn/layers/__init__.py b/python/mlx/nn/layers/__init__.py index ea2d3029d8..e01e155eec 100644 --- a/python/mlx/nn/layers/__init__.py +++ b/python/mlx/nn/layers/__init__.py @@ -26,6 +26,7 @@ Softsign, Step, Tanh, + XieLU, celu, elu, gelu, From be4c1ac9d52cfa02d106b42fbd87ccacb75e7054 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=B6kdeniz=20G=C3=BClmez?= <60228478+Goekdeniz-Guelmez@users.noreply.github.com> Date: Wed, 17 Sep 2025 10:05:14 +0200 Subject: [PATCH 3/3] format --- python/mlx/nn/layers/activations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/mlx/nn/layers/activations.py b/python/mlx/nn/layers/activations.py index 051784499c..903bf7b081 100644 --- a/python/mlx/nn/layers/activations.py +++ b/python/mlx/nn/layers/activations.py @@ -718,4 +718,4 @@ class Softmin(Module): r"""Applies the Softmin function. See :func:`softmin` for the functional equivalent. - """ \ No newline at end of file + """