From 0aa7e786ac180db330984b6481b1694a9340bbb2 Mon Sep 17 00:00:00 2001 From: MyznikovFD Date: Thu, 18 Dec 2025 15:12:54 +0300 Subject: [PATCH 1/3] refactor(families): implementations and tests of concrete parametric families are divided into separate files --- src/pysatl_core/families/builtins/__init__.py | 21 + .../families/builtins/continuous/__init__.py | 18 + .../families/builtins/continuous/normal.py | 326 +++++++++ .../families/builtins/continuous/uniform.py | 344 +++++++++ src/pysatl_core/families/configuration.py | 624 +--------------- tests/unit/families/builtins/__init__.py | 12 + .../families/builtins/continuous/__init__.py | 12 + .../unit/families/builtins/continuous/base.py | 30 + .../builtins/continuous/test_normal.py | 248 +++++++ .../builtins/continuous/test_uniform.py | 302 ++++++++ tests/unit/families/test_configuration.py | 671 ++---------------- 11 files changed, 1376 insertions(+), 1232 deletions(-) create mode 100644 src/pysatl_core/families/builtins/__init__.py create mode 100644 src/pysatl_core/families/builtins/continuous/__init__.py create mode 100644 src/pysatl_core/families/builtins/continuous/normal.py create mode 100644 src/pysatl_core/families/builtins/continuous/uniform.py create mode 100644 tests/unit/families/builtins/__init__.py create mode 100644 tests/unit/families/builtins/continuous/__init__.py create mode 100644 tests/unit/families/builtins/continuous/base.py create mode 100644 tests/unit/families/builtins/continuous/test_normal.py create mode 100644 tests/unit/families/builtins/continuous/test_uniform.py diff --git a/src/pysatl_core/families/builtins/__init__.py b/src/pysatl_core/families/builtins/__init__.py new file mode 100644 index 0000000..49ea81d --- /dev/null +++ b/src/pysatl_core/families/builtins/__init__.py @@ -0,0 +1,21 @@ +""" +Built-in distribution families for PySATL. + +This package contains implementations of standard statistical distribution families +that are available by default in PySATL. +""" + +__author__ = "Fedor Myznikov" +__copyright__ = "Copyright (c) 2025 PySATL project" +__license__ = "SPDX-License-Identifier: MIT" + + +from pysatl_core.families.builtins.continuous import ( + configure_normal_family, + configure_uniform_family, +) + +__all__ = [ + "configure_normal_family", + "configure_uniform_family", +] diff --git a/src/pysatl_core/families/builtins/continuous/__init__.py b/src/pysatl_core/families/builtins/continuous/__init__.py new file mode 100644 index 0000000..bfdd481 --- /dev/null +++ b/src/pysatl_core/families/builtins/continuous/__init__.py @@ -0,0 +1,18 @@ +""" +Built-in continuous distribution families. + +This module contains implementations of continuous parametric families. +""" + +__author__ = "Fedor Myznikov" +__copyright__ = "Copyright (c) 2025 PySATL project" +__license__ = "SPDX-License-Identifier: MIT" + + +from pysatl_core.families.builtins.continuous.normal import configure_normal_family +from pysatl_core.families.builtins.continuous.uniform import configure_uniform_family + +__all__ = [ + "configure_normal_family", + "configure_uniform_family", +] diff --git a/src/pysatl_core/families/builtins/continuous/normal.py b/src/pysatl_core/families/builtins/continuous/normal.py new file mode 100644 index 0000000..ee26136 --- /dev/null +++ b/src/pysatl_core/families/builtins/continuous/normal.py @@ -0,0 +1,326 @@ +""" +Normal distribution family implementation. + +Contains the Normal family with multiple parameterizations. +""" + +from __future__ import annotations + +__author__ = "Fedor Myznikov" +__copyright__ = "Copyright (c) 2025 PySATL project" +__license__ = "SPDX-License-Identifier: MIT" + +import math +from typing import TYPE_CHECKING, cast + +import numpy as np +from scipy.special import erf, erfinv + +from pysatl_core.distributions.strategies import DefaultSamplingUnivariateStrategy +from pysatl_core.distributions.support import ContinuousSupport +from pysatl_core.families.parametric_family import ParametricFamily +from pysatl_core.families.parametrizations import ( + Parametrization, + constraint, + parametrization, +) +from pysatl_core.families.registry import ParametricFamilyRegister +from pysatl_core.types import ( + CharacteristicName, + ComplexArray, + FamilyName, + NumericArray, + UnivariateContinuous, +) + +if TYPE_CHECKING: + from typing import Any + + +def configure_normal_family() -> None: + """ + Configure and register the Normal distribution family. + """ + NORMAL_DOC = """ + Normal (Gaussian) distribution. + + The normal distribution is a continuous probability distribution characterized + by its bell-shaped curve. It is symmetric about its mean and is defined by + two parameters: mean (μ) and standard deviation (σ). + + Probability density function: + f(x) = 1/(σ√(2π)) * exp(-(x-μ)²/(2σ²)) + + The normal distribution is widely used in statistics, natural sciences, + and social sciences as a simple model for complex random phenomena. + """ + + def pdf(parameters: Parametrization, x: NumericArray) -> NumericArray: + """ + Probability density function for normal distribution. + + Parameters + ---------- + parameters : Parametrization () + Distribution parameters object with fields: + - mu: float (mean) + - sigma: float (standard deviation) + x : NumericArray + Points at which to evaluate the probability density function + + Returns + ------- + NumericArray + Probability density values at points x + """ + parameters = cast(_MeanStd, parameters) + + sigma = parameters.sigma + mu = parameters.mu + + coefficient = 1.0 / (sigma * np.sqrt(2 * np.pi)) + exponent = -((x - mu) ** 2) / (2 * sigma**2) + + return cast(NumericArray, coefficient * np.exp(exponent)) + + def cdf(parameters: Parametrization, x: NumericArray) -> NumericArray: + """ + Cumulative distribution function for normal distribution. + + Parameters + ---------- + parameters : Parametrization + Distribution parameters object with fields: + - mu: float (mean) + - sigma: float (standard deviation) + x : NumericArray + Points at which to evaluate the cumulative distribution function + + Returns + ------- + NumericArray + Probabilities P(X ≤ x) for each point x + """ + parameters = cast(_MeanStd, parameters) + + z = (x - parameters.mu) / (parameters.sigma * np.sqrt(2)) + return cast(NumericArray, 0.5 * (1 + erf(z))) + + def ppf(parameters: Parametrization, p: NumericArray) -> NumericArray: + """ + Percent point function (inverse CDF) for normal distribution. + + Parameters + ---------- + parameters : Parametrization + Distribution parameters object with fields: + - mu: float (mean) + - sigma: float (standard deviation) + p : NumericArray + Probability from [0, 1] + + Returns + ------- + NumericArray + Quantiles corresponding to probabilities p + If p[i] is 0 or 1, then the result[i] is -inf and inf correspondingly + + Raises + ------ + ValueError + If probability is outside [0, 1] + """ + if np.any((p < 0) | (p > 1)): + raise ValueError("Probability must be in [0, 1]") + + parameters = cast(_MeanStd, parameters) + + result = cast( + NumericArray, + parameters.mu + parameters.sigma * np.sqrt(2) * erfinv(2 * p - 1), + ) + return result + + def char_func(parameters: Parametrization, t: NumericArray) -> ComplexArray: + """ + Characteristic function of normal distribution. + + Parameters + ---------- + parameters : Parametrization + Distribution parameters object with fields: + - mu: float (mean) + - sigma: float (standard deviation) + x : NumericArray + Points at which to evaluate the characteristic function + + Returns + ------- + ComplexArray + Characteristic function values at points x + """ + parameters = cast(_MeanStd, parameters) + + sigma = parameters.sigma + mu = parameters.mu + return cast(ComplexArray, np.exp(1j * mu * t - 0.5 * (sigma**2) * (t**2))) + + def mean_func(parameters: Parametrization, _: Any) -> float: + """Mean of normal distribution.""" + parameters = cast(_MeanStd, parameters) + return parameters.mu + + def var_func(parameters: Parametrization, _: Any) -> float: + """Variance of normal distribution.""" + parameters = cast(_MeanStd, parameters) + return parameters.sigma**2 + + def skew_func(_1: Parametrization, _2: Any) -> int: + """Skewness of normal distribution (always 0).""" + return 0 + + def kurt_func(_1: Parametrization, _2: Any, excess: bool = False) -> int: + """Raw or excess kurtosis of normal distribution. + + Parameters + ---------- + _1 : Parametrization + Needed by architecture parameter + excess : bool + A value defines if there will be raw or excess kurtosis + default is False + + Returns + ------- + int + Kurtosis value + """ + if not excess: + return 3 + else: + return 0 + + def _support(_: Parametrization) -> ContinuousSupport: + """Support of normal distribution""" + return ContinuousSupport() + + Normal = ParametricFamily( + name=FamilyName.NORMAL, + distr_type=UnivariateContinuous, + distr_parametrizations=["meanStd", "meanPrec", "exponential"], + distr_characteristics={ + CharacteristicName.PDF: pdf, + CharacteristicName.CDF: cdf, + CharacteristicName.PPF: ppf, + CharacteristicName.CF: char_func, + CharacteristicName.MEAN: mean_func, + CharacteristicName.VAR: var_func, + CharacteristicName.SKEW: skew_func, + CharacteristicName.KURT: kurt_func, + }, + sampling_strategy=DefaultSamplingUnivariateStrategy(), + support_by_parametrization=_support, + ) + Normal.__doc__ = NORMAL_DOC + + @parametrization(family=Normal, name="meanStd") + class _MeanStd(Parametrization): + """ + Standard parametrization of normal distribution. + + Parameters + ---------- + mu : float + Mean of the distribution + sigma : float + Standard deviation of the distribution + """ + + mu: float + sigma: float + + @constraint(description="sigma > 0") + def check_sigma_positive(self) -> bool: + """Check that standard deviation is positive.""" + return self.sigma > 0 + + @parametrization(family=Normal, name="meanPrec") + class _MeanPrec(Parametrization): + """ + Mean-precision parametrization of normal distribution. + + Parameters + ---------- + mu : float + Mean of the distribution + tau : float + Precision parameter (inverse variance) + """ + + mu: float + tau: float + + @constraint(description="tau > 0") + def check_tau_positive(self) -> bool: + """Check that precision parameter is positive.""" + return self.tau > 0 + + def transform_to_base_parametrization(self) -> Parametrization: + """ + Transform to Standard parametrization. + + Returns + ------- + Parametrization + Standard parametrization instance + """ + sigma = math.sqrt(1 / self.tau) + return _MeanStd(mu=self.mu, sigma=sigma) + + @parametrization(family=Normal, name="exponential") + class _Exp(Parametrization): + """ + Exponential family parametrization of normal distribution. + Uses the form: y = exp(a*x² + b*x + c) + + Parameters + ---------- + a : float + Quadratic term coefficient in exponential form + b : float + Linear term coefficient in exponential form + """ + + a: float + b: float + + @property + def c(self) -> float: + """ + Calculate the normalization constant c. + + Returns + ------- + float + Normalization constant + """ + return (self.b**2) / (4 * self.a) - (1 / 2) * math.log(math.pi / (-self.a)) + + @constraint(description="a < 0") + def check_a_negative(self) -> bool: + """Check that quadratic term coefficient is negative.""" + return self.a < 0 + + def transform_to_base_parametrization(self) -> Parametrization: + """ + Transform to Standard parametrization. + Returns + ------- + Parametrization + Standard parametrization instance + """ + mu = -self.b / (2 * self.a) + sigma = math.sqrt(-1 / (2 * self.a)) + return _MeanStd(mu=mu, sigma=sigma) + + ParametricFamilyRegister.register(Normal) diff --git a/src/pysatl_core/families/builtins/continuous/uniform.py b/src/pysatl_core/families/builtins/continuous/uniform.py new file mode 100644 index 0000000..f35f289 --- /dev/null +++ b/src/pysatl_core/families/builtins/continuous/uniform.py @@ -0,0 +1,344 @@ +""" +Uniform distribution family implementation. + +Contains the Uniform family with multiple parameterizations. +""" + +from __future__ import annotations + +__author__ = "Fedor Myznikov" +__copyright__ = "Copyright (c) 2025 PySATL project" +__license__ = "SPDX-License-Identifier: MIT" + +from typing import TYPE_CHECKING, cast + +import numpy as np + +from pysatl_core.distributions.strategies import DefaultSamplingUnivariateStrategy +from pysatl_core.distributions.support import ContinuousSupport +from pysatl_core.families.parametric_family import ParametricFamily +from pysatl_core.families.parametrizations import ( + Parametrization, + constraint, + parametrization, +) +from pysatl_core.families.registry import ParametricFamilyRegister +from pysatl_core.types import ( + CharacteristicName, + ComplexArray, + FamilyName, + NumericArray, + UnivariateContinuous, +) + +if TYPE_CHECKING: + from typing import Any + + +def configure_uniform_family() -> None: + """ + Configure and register the Uniform distribution family. + """ + UNIFORM_DOC = """ + Uniform (continuous) distribution. + + The uniform distribution is a continuous probability distribution where + all intervals of the same length are equally probable. It is defined by + two parameters: lower bound and upper bound. + + Probability density function: + f(x) = 1/(upper_bound - lower_bound) for x in [lower_bound, upper_bound], 0 otherwise + + The uniform distribution is often used when there is no prior knowledge + about the possible values of a variable, representing maximum uncertainty. + """ + + def pdf(parameters: Parametrization, x: NumericArray) -> NumericArray: + """ + Probability density function for uniform distribution. + - For x < lower_bound: returns 0 + - For x > upper_bound: returns 0 + - Otherwise: returns (1 / (upper_bound - lower_bound)) + + Parameters + ---------- + parameters : Parametrization + Distribution parameters object with fields: + - lower_bound: float (lower bound) + - upper_bound: float (upper bound) + x : NumericArray + Points at which to evaluate the probability density function + + Returns + ------- + NumericArray + Probability density values at points x + """ + parameters = cast(_Standard, parameters) + + lower_bound = parameters.lower_bound + upper_bound = parameters.upper_bound + + return np.where( + (x >= lower_bound) & (x <= upper_bound), 1.0 / (upper_bound - lower_bound), 0.0 + ) + + def cdf(parameters: Parametrization, x: NumericArray) -> NumericArray: + """ + Cumulative distribution function for uniform distribution. + Uses np.clip for vectorized computation: + - For x < lower_bound: returns 0 + - For x > upper_bound: returns 1 + + Parameters + ---------- + parameters : Parametrization + Distribution parameters object with fields: + - lower_bound: float (lower bound) + - upper_bound: float (upper bound) + x : NumericArray + Points at which to evaluate the cumulative distribution function + + Returns + ------- + NumericArray + Probabilities P(X ≤ x) for each point x + """ + parameters = cast(_Standard, parameters) + + lower_bound = parameters.lower_bound + upper_bound = parameters.upper_bound + + return cast( + NumericArray, np.clip((x - lower_bound) / (upper_bound - lower_bound), 0.0, 1.0) + ) + + def ppf(parameters: Parametrization, p: NumericArray) -> NumericArray: + """ + Percent point function (inverse CDF) for uniform distribution. + + For uniform distribution on [lower_bound, upper_bound]: + - For p = 0: returns lower_bound + - For p = 1: returns upper_bound + - For p in (0, 1): returns lower_bound + p × (upper_bound - lower_bound) + + Parameters + ---------- + parameters : Parametrization + Distribution parameters object with fields: + - lower_bound: float (lower bound) + - upper_bound: float (upper bound) + p : NumericArray + Probability from [0, 1] + + Returns + ------- + NumericArray + Quantiles corresponding to probabilities p + + Raises + ------ + ValueError + If probability is outside [0, 1] + """ + if np.any((p < 0) | (p > 1)): + raise ValueError("Probability must be in [0, 1]") + + parameters = cast(_Standard, parameters) + lower_bound = parameters.lower_bound + upper_bound = parameters.upper_bound + + return cast(NumericArray, lower_bound + p * (upper_bound - lower_bound)) + + def char_func(parameters: Parametrization, t: NumericArray) -> ComplexArray: + """ + Characteristic function of uniform distribution. + + Characteristic function formula for uniform distribution on [lower_bound, upper bound]: + φ(t) = sinc((upper bound - lower_bound) * t / 2) * + * exp(i * (lower_bound + upper_bound) * t / 2) + where sinc(x) = sin(πx)/(πx) as defined by numpy. + + Parameters + ---------- + parameters : Parametrization + Distribution parameters object with fields: + - lower_bound: float (lower bound) + - upper_bound: float (upper bound) + t : NumericArray + Points at which to evaluate the characteristic function + + Returns + ------- + ComplexArray + Characteristic function values at points t + """ + parameters = cast(_Standard, parameters) + + lower_bound = parameters.lower_bound + upper_bound = parameters.upper_bound + + width = upper_bound - lower_bound + center = (lower_bound + upper_bound) / 2 + + t_arr = np.asarray(t, dtype=np.float64) + + x = width * t_arr / (2 * np.pi) + sinc_val = np.sinc(x) + + return cast(ComplexArray, sinc_val * np.exp(1j * center * t_arr)) + + def mean_func(parameters: Parametrization, _: Any) -> float: + """Mean of uniform distribution.""" + parameters = cast(_Standard, parameters) + return (parameters.lower_bound + parameters.upper_bound) / 2 + + def var_func(parameters: Parametrization, _: Any) -> float: + """Variance of uniform distribution.""" + parameters = cast(_Standard, parameters) + width = parameters.upper_bound - parameters.lower_bound + return width**2 / 12 + + def skew_func(_1: Parametrization, _2: Any) -> int: + """Skewness of uniform distribution (always 0).""" + return 0 + + def kurt_func(_1: Parametrization, _2: Any, excess: bool = False) -> float: + """Raw or excess kurtosis of uniform distribution. + + Parameters + ---------- + _1 : Parametrization + Needed by architecture parameter + _2 : Any + Needed by architecture parameter + excess : bool + A value defines if there will be raw or excess kurtosis + default is False + + Returns + ------- + float + Kurtosis value + """ + if not excess: + return 1.8 + else: + return -1.2 + + def _support(parameters: Parametrization) -> ContinuousSupport: + """Support of uniform distribution""" + parameters = cast(_Standard, parameters.transform_to_base_parametrization()) + return ContinuousSupport( + left=parameters.lower_bound, + right=parameters.upper_bound, + left_closed=True, + right_closed=True, + ) + + Uniform = ParametricFamily( + name=FamilyName.CONTINUOUS_UNIFORM, + distr_type=UnivariateContinuous, + distr_parametrizations=["standard", "meanWidth", "minRange"], + distr_characteristics={ + CharacteristicName.PDF: pdf, + CharacteristicName.CDF: cdf, + CharacteristicName.PPF: ppf, + CharacteristicName.CF: char_func, + CharacteristicName.MEAN: mean_func, + CharacteristicName.VAR: var_func, + CharacteristicName.SKEW: skew_func, + CharacteristicName.KURT: kurt_func, + }, + sampling_strategy=DefaultSamplingUnivariateStrategy(), + support_by_parametrization=_support, + ) + Uniform.__doc__ = UNIFORM_DOC + + @parametrization(family=Uniform, name="standard") + class _Standard(Parametrization): + """ + Standard parametrization of uniform distribution. + + Parameters + ---------- + lower_bound : float + Lower bound of the distribution + upper_bound : float + Upper bound of the distribution + """ + + lower_bound: float + upper_bound: float + + @constraint(description="lower_bound < upper_bound") + def check_lower_less_than_upper(self) -> bool: + """Check that lower bound is less than upper bound.""" + return self.lower_bound < self.upper_bound + + @parametrization(family=Uniform, name="meanWidth") + class _MeanWidth(Parametrization): + """ + Mean-width parametrization of uniform distribution. + + Parameters + ---------- + mean : float + Mean (center) of the distribution + width : float + Width of the distribution (upper_bound - lower_bound) + """ + + mean: float + width: float + + @constraint(description="width > 0") + def check_width_positive(self) -> bool: + """Check that width is positive.""" + return self.width > 0 + + def transform_to_base_parametrization(self) -> Parametrization: + """ + Transform to Standard parametrization. + + Returns + ------- + Parametrization + Standard parametrization instance + """ + half_width = self.width / 2 + return _Standard(lower_bound=self.mean - half_width, upper_bound=self.mean + half_width) + + @parametrization(family=Uniform, name="minRange") + class _MinRange(Parametrization): + """ + Minimum-range parametrization of uniform distribution. + + Parameters + ---------- + minimum : float + Minimum value (lower bound) + range_val : float + Range of the distribution (upper_bound - lower_bound) + """ + + minimum: float + range_val: float + + @constraint(description="range_val > 0") + def check_range_positive(self) -> bool: + """Check that range is positive.""" + return self.range_val > 0 + + def transform_to_base_parametrization(self) -> Parametrization: + """ + Transform to Standard parametrization. + + Returns + ------- + Parametrization + Standard parametrization instance + """ + return _Standard(lower_bound=self.minimum, upper_bound=self.minimum + self.range_val) + + ParametricFamilyRegister.register(Uniform) diff --git a/src/pysatl_core/families/configuration.py b/src/pysatl_core/families/configuration.py index 043e791..3754be7 100644 --- a/src/pysatl_core/families/configuration.py +++ b/src/pysatl_core/families/configuration.py @@ -5,7 +5,7 @@ This module defines and configures parametric distribution families for the PySATL library: - :class:`Normal Family` — Gaussian distribution with multiple parameterizations. -- :class:`Uniform Family` — Gaussian distribution with multiple parameterizations. +- :class:`Uniform Family` — Uniform distribution with multiple parameterizations. Notes ----- @@ -21,32 +21,13 @@ __copyright__ = "Copyright (c) 2025 PySATL project" __license__ = "SPDX-License-Identifier: MIT" -import math from functools import lru_cache -from typing import TYPE_CHECKING, cast -import numpy as np -from scipy.special import erf, erfinv - -from pysatl_core.distributions.strategies import DefaultSamplingUnivariateStrategy -from pysatl_core.distributions.support import ContinuousSupport -from pysatl_core.families.parametric_family import ParametricFamily -from pysatl_core.families.parametrizations import ( - Parametrization, - constraint, - parametrization, +from pysatl_core.families.builtins import ( + configure_normal_family, + configure_uniform_family, ) from pysatl_core.families.registry import ParametricFamilyRegister -from pysatl_core.types import ( - CharacteristicName, - ComplexArray, - FamilyName, - NumericArray, - UnivariateContinuous, -) - -if TYPE_CHECKING: - from typing import Any @lru_cache(maxsize=1) @@ -63,603 +44,14 @@ def configure_families_register() -> ParametricFamilyRegister: ParametricFamilyRegister The global registry of parametric families. """ - _configure_normal_family() - _configure_uniform_family() + configure_normal_family() + configure_uniform_family() return ParametricFamilyRegister() -def _configure_normal_family() -> None: - NORMAL_DOC = """ - Normal (Gaussian) distribution. - - The normal distribution is a continuous probability distribution characterized - by its bell-shaped curve. It is symmetric about its mean and is defined by - two parameters: mean (μ) and standard deviation (σ). - - Probability density function: - f(x) = 1/(σ√(2π)) * exp(-(x-μ)²/(2σ²)) - - The normal distribution is widely used in statistics, natural sciences, - and social sciences as a simple model for complex random phenomena. +def reset_families_register() -> None: """ - - def normal_pdf(parameters: Parametrization, x: NumericArray) -> NumericArray: - """ - Probability density function for normal distribution. - - Parameters - ---------- - parameters : Parametrization () - Distribution parameters object with fields: - - mu: float (mean) - - sigma: float (standard deviation) - x : NumericArray - Points at which to evaluate the probability density function - - Returns - ------- - NumericArray - Probability density values at points x - """ - parameters = cast(_MeanStd, parameters) - - sigma = parameters.sigma - mu = parameters.mu - - coefficient = 1.0 / (sigma * np.sqrt(2 * np.pi)) - exponent = -((x - mu) ** 2) / (2 * sigma**2) - - return cast(NumericArray, coefficient * np.exp(exponent)) - - def normal_cdf(parameters: Parametrization, x: NumericArray) -> NumericArray: - """ - Cumulative distribution function for normal distribution. - - Parameters - ---------- - parameters : Parametrization - Distribution parameters object with fields: - - mu: float (mean) - - sigma: float (standard deviation) - x : NumericArray - Points at which to evaluate the cumulative distribution function - - Returns - ------- - NumericArray - Probabilities P(X ≤ x) for each point x - """ - parameters = cast(_MeanStd, parameters) - - z = (x - parameters.mu) / (parameters.sigma * np.sqrt(2)) - return cast(NumericArray, 0.5 * (1 + erf(z))) - - def normal_ppf(parameters: Parametrization, p: NumericArray) -> NumericArray: - """ - Percent point function (inverse CDF) for normal distribution. - - Parameters - ---------- - parameters : Parametrization - Distribution parameters object with fields: - - mu: float (mean) - - sigma: float (standard deviation) - p : NumericArray - Probability from [0, 1] - - Returns - ------- - NumericArray - Quantiles corresponding to probabilities p - If p[i] is 0 or 1, then the result[i] is -inf and inf correspondingly - - Raises - ------ - ValueError - If probability is outside [0, 1] - """ - if np.any((p < 0) | (p > 1)): - raise ValueError("Probability must be in [0, 1]") - - parameters = cast(_MeanStd, parameters) - - result = cast( - NumericArray, - parameters.mu + parameters.sigma * np.sqrt(2) * erfinv(2 * p - 1), - ) - return result - - def normal_char_func(parameters: Parametrization, t: NumericArray) -> ComplexArray: - """ - Characteristic function of normal distribution. - - Parameters - ---------- - parameters : Parametrization - Distribution parameters object with fields: - - mu: float (mean) - - sigma: float (standard deviation) - x : NumericArray - Points at which to evaluate the characteristic function - - Returns - ------- - ComplexArray - Characteristic function values at points x - """ - parameters = cast(_MeanStd, parameters) - - sigma = parameters.sigma - mu = parameters.mu - return cast(ComplexArray, np.exp(1j * mu * t - 0.5 * (sigma**2) * (t**2))) - - def mean_func(parameters: Parametrization, _: Any) -> float: - """Mean of normal distribution.""" - parameters = cast(_MeanStd, parameters) - return parameters.mu - - def var_func(parameters: Parametrization, _: Any) -> float: - """Variance of normal distribution.""" - parameters = cast(_MeanStd, parameters) - return parameters.sigma**2 - - def skew_func(_1: Parametrization, _2: Any) -> int: - """Skewness of normal distribution (always 0).""" - return 0 - - def kurt_func(_1: Parametrization, _2: Any, excess: bool = False) -> int: - """Raw or excess kurtosis of normal distribution. - - Parameters - ---------- - _1 : Parametrization - Needed by architecture parameter - excess : bool - A value defines if there will be raw or excess kurtosis - default is False - - Returns - ------- - int - Kurtosis value - """ - if not excess: - return 3 - else: - return 0 - - def _normal_support(_: Parametrization) -> ContinuousSupport: - """Support of normal distribution""" - return ContinuousSupport() - - Normal = ParametricFamily( - name=FamilyName.NORMAL, - distr_type=UnivariateContinuous, - distr_parametrizations=["meanStd", "meanPrec", "exponential"], - distr_characteristics={ - CharacteristicName.PDF: normal_pdf, - CharacteristicName.CDF: normal_cdf, - CharacteristicName.PPF: normal_ppf, - CharacteristicName.CF: normal_char_func, - CharacteristicName.MEAN: mean_func, - CharacteristicName.VAR: var_func, - CharacteristicName.SKEW: skew_func, - CharacteristicName.KURT: kurt_func, - }, - sampling_strategy=DefaultSamplingUnivariateStrategy(), - support_by_parametrization=_normal_support, - ) - Normal.__doc__ = NORMAL_DOC - - @parametrization(family=Normal, name="meanStd") - class _MeanStd(Parametrization): - """ - Standard parametrization of normal distribution. - - Parameters - ---------- - mu : float - Mean of the distribution - sigma : float - Standard deviation of the distribution - """ - - mu: float - sigma: float - - @constraint(description="sigma > 0") - def check_sigma_positive(self) -> bool: - """Check that standard deviation is positive.""" - return self.sigma > 0 - - @parametrization(family=Normal, name="meanPrec") - class _MeanPrec(Parametrization): - """ - Mean-precision parametrization of normal distribution. - - Parameters - ---------- - mu : float - Mean of the distribution - tau : float - Precision parameter (inverse variance) - """ - - mu: float - tau: float - - @constraint(description="tau > 0") - def check_tau_positive(self) -> bool: - """Check that precision parameter is positive.""" - return self.tau > 0 - - def transform_to_base_parametrization(self) -> Parametrization: - """ - Transform to Standard parametrization. - - Returns - ------- - Parametrization - Standard parametrization instance - """ - sigma = math.sqrt(1 / self.tau) - return _MeanStd(mu=self.mu, sigma=sigma) - - @parametrization(family=Normal, name="exponential") - class _Exp(Parametrization): - """ - Exponential family parametrization of normal distribution. - Uses the form: y = exp(a*x² + b*x + c) - - Parameters - ---------- - a : float - Quadratic term coefficient in exponential form - b : float - Linear term coefficient in exponential form - """ - - a: float - b: float - - @property - def c(self) -> float: - """ - Calculate the normalization constant c. - - Returns - ------- - float - Normalization constant - """ - return (self.b**2) / (4 * self.a) - (1 / 2) * math.log(math.pi / (-self.a)) - - @constraint(description="a < 0") - def check_a_negative(self) -> bool: - """Check that quadratic term coefficient is negative.""" - return self.a < 0 - - def transform_to_base_parametrization(self) -> Parametrization: - """ - Transform to Standard parametrization. - Returns - ------- - Parametrization - Standard parametrization instance - """ - mu = -self.b / (2 * self.a) - sigma = math.sqrt(-1 / (2 * self.a)) - return _MeanStd(mu=mu, sigma=sigma) - - ParametricFamilyRegister.register(Normal) - - -def _configure_uniform_family() -> None: - UNIFORM_DOC = """ - Uniform (continuous) distribution. - - The uniform distribution is a continuous probability distribution where - all intervals of the same length are equally probable. It is defined by - two parameters: lower bound and upper bound. - - Probability density function: - f(x) = 1/(upper_bound - lower_bound) for x in [lower_bound, upper_bound], 0 otherwise - - The uniform distribution is often used when there is no prior knowledge - about the possible values of a variable, representing maximum uncertainty. + Reset the cached families registry. """ - - def uniform_pdf(parameters: Parametrization, x: NumericArray) -> NumericArray: - """ - Probability density function for uniform distribution. - - For x < lower_bound: returns 0 - - For x > upper_bound: returns 0 - - Otherwise: returns (1 / (upper_bound - lower_bound)) - - Parameters - ---------- - parameters : Parametrization - Distribution parameters object with fields: - - lower_bound: float (lower bound) - - upper_bound: float (upper bound) - x : NumericArray - Points at which to evaluate the probability density function - - Returns - ------- - NumericArray - Probability density values at points x - """ - parameters = cast(_Standard, parameters) - - lower_bound = parameters.lower_bound - upper_bound = parameters.upper_bound - - return np.where( - (x >= lower_bound) & (x <= upper_bound), 1.0 / (upper_bound - lower_bound), 0.0 - ) - - def uniform_cdf(parameters: Parametrization, x: NumericArray) -> NumericArray: - """ - Cumulative distribution function for uniform distribution. - Uses np.clip for vectorized computation: - - For x < lower_bound: returns 0 - - For x > upper_bound: returns 1 - - Parameters - ---------- - parameters : Parametrization - Distribution parameters object with fields: - - lower_bound: float (lower bound) - - upper_bound: float (upper bound) - x : NumericArray - Points at which to evaluate the cumulative distribution function - - Returns - ------- - NumericArray - Probabilities P(X ≤ x) for each point x - """ - parameters = cast(_Standard, parameters) - - lower_bound = parameters.lower_bound - upper_bound = parameters.upper_bound - - return cast( - NumericArray, np.clip((x - lower_bound) / (upper_bound - lower_bound), 0.0, 1.0) - ) - - def uniform_ppf(parameters: Parametrization, p: NumericArray) -> NumericArray: - """ - Percent point function (inverse CDF) for uniform distribution. - - For uniform distribution on [lower_bound, upper_bound]: - - For p = 0: returns lower_bound - - For p = 1: returns upper_bound - - For p in (0, 1): returns lower_bound + p × (upper_bound - lower_bound) - - Parameters - ---------- - parameters : Parametrization - Distribution parameters object with fields: - - lower_bound: float (lower bound) - - upper_bound: float (upper bound) - p : NumericArray - Probability from [0, 1] - - Returns - ------- - NumericArray - Quantiles corresponding to probabilities p - - Raises - ------ - ValueError - If probability is outside [0, 1] - """ - if np.any((p < 0) | (p > 1)): - raise ValueError("Probability must be in [0, 1]") - - parameters = cast(_Standard, parameters) - lower_bound = parameters.lower_bound - upper_bound = parameters.upper_bound - - return cast(NumericArray, lower_bound + p * (upper_bound - lower_bound)) - - def uniform_char_func(parameters: Parametrization, t: NumericArray) -> ComplexArray: - """ - Characteristic function of uniform distribution. - - Characteristic function formula for uniform distribution on [lower_bound, upper bound]: - φ(t) = sinc((upper bound - lower_bound) * t / 2) * - * exp(i * (lower_bound + upper bound) * t / 2) - where sinc(x) = sin(πx)/(πx) as defined by numpy. - - Parameters - ---------- - parameters : Parametrization - Distribution parameters object with fields: - - lower_bound: float (lower bound) - - upper_bound: float (upper bound) - t : NumericArray - Points at which to evaluate the characteristic function - - Returns - ------- - ComplexArray - Characteristic function values at points t - """ - parameters = cast(_Standard, parameters) - - lower_bound = parameters.lower_bound - upper_bound = parameters.upper_bound - - width = upper_bound - lower_bound - center = (lower_bound + upper_bound) / 2 - - t_arr = np.asarray(t, dtype=np.float64) - - x = width * t_arr / (2 * np.pi) - sinc_val = np.sinc(x) - - return cast(ComplexArray, sinc_val * np.exp(1j * center * t_arr)) - - def mean_func(parameters: Parametrization, _: Any) -> float: - """Mean of uniform distribution.""" - parameters = cast(_Standard, parameters) - return (parameters.lower_bound + parameters.upper_bound) / 2 - - def var_func(parameters: Parametrization, _: Any) -> float: - """Variance of uniform distribution.""" - parameters = cast(_Standard, parameters) - width = parameters.upper_bound - parameters.lower_bound - return width**2 / 12 - - def skew_func(_1: Parametrization, _2: Any) -> int: - """Skewness of uniform distribution (always 0).""" - return 0 - - def kurt_func(_1: Parametrization, _2: Any, excess: bool = False) -> float: - """Raw or excess kurtosis of uniform distribution. - - Parameters - ---------- - _1 : Parametrization - Needed by architecture parameter - _2 : Any - Needed by architecture parameter - excess : bool - A value defines if there will be raw or excess kurtosis - default is False - - Returns - ------- - float - Kurtosis value - """ - if not excess: - return 1.8 - else: - return -1.2 - - def _uniform_support(parameters: Parametrization) -> ContinuousSupport: - """Support of uniform distribution""" - parameters = cast(_Standard, parameters.transform_to_base_parametrization()) - return ContinuousSupport( - left=parameters.lower_bound, - right=parameters.upper_bound, - left_closed=True, - right_closed=True, - ) - - Uniform = ParametricFamily( - name=FamilyName.CONTINUOUS_UNIFORM, - distr_type=UnivariateContinuous, - distr_parametrizations=["standard", "meanWidth", "minRange"], - distr_characteristics={ - CharacteristicName.PDF: uniform_pdf, - CharacteristicName.CDF: uniform_cdf, - CharacteristicName.PPF: uniform_ppf, - CharacteristicName.CF: uniform_char_func, - CharacteristicName.MEAN: mean_func, - CharacteristicName.VAR: var_func, - CharacteristicName.SKEW: skew_func, - CharacteristicName.KURT: kurt_func, - }, - sampling_strategy=DefaultSamplingUnivariateStrategy(), - support_by_parametrization=_uniform_support, - ) - Uniform.__doc__ = UNIFORM_DOC - - @parametrization(family=Uniform, name="standard") - class _Standard(Parametrization): - """ - Standard parametrization of uniform distribution. - - Parameters - ---------- - lower_bound : float - Lower bound of the distribution - upper_bound : float - Upper bound of the distribution - """ - - lower_bound: float - upper_bound: float - - @constraint(description="lower_bound < upper_bound") - def check_lower_less_than_upper(self) -> bool: - """Check that lower bound is less than upper bound.""" - return self.lower_bound < self.upper_bound - - @parametrization(family=Uniform, name="meanWidth") - class _MeanWidth(Parametrization): - """ - Mean-width parametrization of uniform distribution. - - Parameters - ---------- - mean : float - Mean (center) of the distribution - width : float - Width of the distribution (upper_bound - lower_bound) - """ - - mean: float - width: float - - @constraint(description="width > 0") - def check_width_positive(self) -> bool: - """Check that width is positive.""" - return self.width > 0 - - def transform_to_base_parametrization(self) -> Parametrization: - """ - Transform to Standard parametrization. - - Returns - ------- - Parametrization - Standard parametrization instance - """ - half_width = self.width / 2 - return _Standard(lower_bound=self.mean - half_width, upper_bound=self.mean + half_width) - - @parametrization(family=Uniform, name="minRange") - class _MinRange(Parametrization): - """ - Minimum-range parametrization of uniform distribution. - - Parameters - ---------- - minimum : float - Minimum value (lower bound) - range_val : float - Range of the distribution (upper_bound - lower_bound) - """ - - minimum: float - range_val: float - - @constraint(description="range_val > 0") - def check_range_positive(self) -> bool: - """Check that range is positive.""" - return self.range_val > 0 - - def transform_to_base_parametrization(self) -> Parametrization: - """ - Transform to Standard parametrization. - - Returns - ------- - Parametrization - Standard parametrization instance - """ - return _Standard(lower_bound=self.minimum, upper_bound=self.minimum + self.range_val) - - ParametricFamilyRegister.register(Uniform) - - -def reset_families_register() -> None: configure_families_register.cache_clear() ParametricFamilyRegister._reset() diff --git a/tests/unit/families/builtins/__init__.py b/tests/unit/families/builtins/__init__.py new file mode 100644 index 0000000..c39b160 --- /dev/null +++ b/tests/unit/families/builtins/__init__.py @@ -0,0 +1,12 @@ +""" +PySATL Core +=========== + +Core framework for probabilistic distributions providing type definitions, +distribution abstractions, characteristic computation graphs, and parametric +family management. +""" + +__author__ = "Leonid Elkin, Mikhail Mikhailov" +__copyright__ = "Copyright (c) 2025 PySATL project" +__license__ = "SPDX-License-Identifier: MIT" diff --git a/tests/unit/families/builtins/continuous/__init__.py b/tests/unit/families/builtins/continuous/__init__.py new file mode 100644 index 0000000..c39b160 --- /dev/null +++ b/tests/unit/families/builtins/continuous/__init__.py @@ -0,0 +1,12 @@ +""" +PySATL Core +=========== + +Core framework for probabilistic distributions providing type definitions, +distribution abstractions, characteristic computation graphs, and parametric +family management. +""" + +__author__ = "Leonid Elkin, Mikhail Mikhailov" +__copyright__ = "Copyright (c) 2025 PySATL project" +__license__ = "SPDX-License-Identifier: MIT" diff --git a/tests/unit/families/builtins/continuous/base.py b/tests/unit/families/builtins/continuous/base.py new file mode 100644 index 0000000..06b3e6b --- /dev/null +++ b/tests/unit/families/builtins/continuous/base.py @@ -0,0 +1,30 @@ +""" +Common fixtures and utilities for continuous distribution tests. +""" + +__author__ = "Fedor Myznikov" +__copyright__ = "Copyright (c) 2025 PySATL project" +__license__ = "SPDX-License-Identifier: MIT" + + +import math +from typing import Any + +import numpy as np + + +class BaseDistributionTest: + """Base class for all distribution families' tests""" + + # Precision for floating point comparisons + CALCULATION_PRECISION = 1e-10 + + @staticmethod + def assert_arrays_almost_equal( + actual: np.ndarray[Any, Any], expected: np.ndarray[Any, Any], precision: float | None = None + ) -> None: + """Helper method to assert arrays are almost equal.""" + if precision is None: + precision = BaseDistributionTest.CALCULATION_PRECISION + + np.testing.assert_array_almost_equal(actual, expected, decimal=int(-math.log10(precision))) diff --git a/tests/unit/families/builtins/continuous/test_normal.py b/tests/unit/families/builtins/continuous/test_normal.py new file mode 100644 index 0000000..cefe0f3 --- /dev/null +++ b/tests/unit/families/builtins/continuous/test_normal.py @@ -0,0 +1,248 @@ +""" +Tests for Normal Distribution Family + +This module tests the functionality of the normal distribution family, +including parameterizations, characteristics, and sampling. +""" + +__author__ = "Fedor Myznikov" +__copyright__ = "Copyright (c) 2025 PySATL project" +__license__ = "SPDX-License-Identifier: MIT" + +import math + +import numpy as np +import pytest +from scipy.stats import norm + +from pysatl_core.distributions.support import ContinuousSupport +from pysatl_core.families.configuration import configure_families_register +from pysatl_core.types import ( + CharacteristicName, + ContinuousSupportShape1D, + FamilyName, + UnivariateContinuous, +) + +from .base import BaseDistributionTest + + +class TestNormalFamily(BaseDistributionTest): + """Test suite for Normal distribution family.""" + + def setup_method(self): + """Setup before each test method.""" + registry = configure_families_register() + self.normal_family = registry.get(FamilyName.NORMAL) + self.normal_dist_example = self.normal_family(mu=2.0, sigma=1.5) + + def test_family_properties(self): + """Test basic properties of normal family.""" + assert self.normal_family.name == FamilyName.NORMAL + + # Check parameterizations + expected_parametrizations = {"meanStd", "meanPrec", "exponential"} + assert set(self.normal_family.parametrization_names) == expected_parametrizations + assert self.normal_family.base_parametrization_name == "meanStd" + + def test_mean_std_parametrization_creation(self): + """Test creation of distribution with standard parametrization.""" + dist = self.normal_family(mu=2.0, sigma=1.5) + + assert dist.family_name == FamilyName.NORMAL + assert dist.distribution_type == UnivariateContinuous + assert dist.parameters == {"mu": 2.0, "sigma": 1.5} + assert dist.parametrization_name == "meanStd" + + def test_mean_prec_parametrization_creation(self): + """Test creation of distribution with mean-precision parametrization.""" + dist = self.normal_family(mu=2.0, tau=0.25, parametrization_name="meanPrec") + + assert dist.parameters == {"mu": 2.0, "tau": 0.25} + assert dist.parametrization_name == "meanPrec" + + def test_exponential_parametrization_creation(self): + """Test creation of distribution with exponential parametrization.""" + # For N(2, 1.5): a = -1/(2*1.5²) = -0.222..., b = 2/1.5² = 0.888... + dist = self.normal_family(a=-0.222, b=0.888, parametrization_name="exponential") + + assert dist.parameters == {"a": -0.222, "b": 0.888} + assert dist.parametrization_name == "exponential" + + def test_parametrization_constraints(self): + """Test parameter constraints validation.""" + # Sigma must be positive + with pytest.raises(ValueError, match="sigma > 0"): + self.normal_family(mu=0, sigma=-1.0) + + # Tau must be positive + with pytest.raises(ValueError, match="tau > 0"): + self.normal_family(mu=0, tau=-1.0, parametrization_name="meanPrec") + + # a must be negative + with pytest.raises(ValueError, match="a < 0"): + self.normal_family(a=1.0, b=0.0, parametrization_name="exponential") + + @pytest.mark.parametrize( + "char_func_getter, expected", + [ + (lambda distr: distr.query_method(CharacteristicName.MEAN)(None), 2.0), + (lambda distr: distr.query_method(CharacteristicName.VAR)(None), 2.25), + (lambda distr: distr.query_method(CharacteristicName.SKEW)(None), 0.0), + ], + ) + def test_moments(self, char_func_getter, expected): + """Test moment calculations using parameterized tests.""" + actual = char_func_getter(self.normal_dist_example) + assert abs(actual - expected) < self.CALCULATION_PRECISION + + def test_kurtosis_calculation(self): + """Test kurtosis calculation with excess parameter.""" + kurt_func = self.normal_dist_example.query_method(CharacteristicName.KURT) + + raw_kurt = kurt_func(None) + assert abs(raw_kurt - 3.0) < self.CALCULATION_PRECISION + + excess_kurt = kurt_func(None, excess=True) + assert abs(excess_kurt - 0.0) < self.CALCULATION_PRECISION + + raw_kurt_explicit = kurt_func(None, excess=False) + assert abs(raw_kurt_explicit - 3.0) < self.CALCULATION_PRECISION + + @pytest.mark.parametrize( + "parametrization_name, params, expected_mu, expected_sigma", + [ + ("meanStd", {"mu": 2.0, "sigma": 1.5}, 2.0, 1.5), + ("meanPrec", {"mu": 2.0, "tau": 0.25}, 2.0, math.sqrt(1 / 0.25)), + ("exponential", {"a": -1 / (2 * 1.5**2), "b": 2 / (1.5**2)}, 2.0, 1.5), + ], + ) + def test_parametrization_conversions( + self, parametrization_name, params, expected_mu, expected_sigma + ): + """Test conversions between different parameterizations.""" + base_params = self.normal_family.to_base( + self.normal_family.get_parametrization(parametrization_name)(**params) + ) + + assert abs(base_params.parameters["mu"] - expected_mu) < self.CALCULATION_PRECISION + assert abs(base_params.parameters["sigma"] - expected_sigma) < self.CALCULATION_PRECISION + + def test_analytical_computations_availability(self): + """Test that analytical computations are available for exponential distribution.""" + comp = self.normal_family(mu=0.0, sigma=1.0).analytical_computations + + expected_chars = { + CharacteristicName.PDF, + CharacteristicName.CDF, + CharacteristicName.PPF, + CharacteristicName.CF, + CharacteristicName.MEAN, + CharacteristicName.VAR, + CharacteristicName.SKEW, + CharacteristicName.KURT, + } + assert set(comp.keys()) == expected_chars + + def test_pdf_array_input(self): + """Test PDF calculation with array input.""" + pdf = self.normal_dist_example.query_method(CharacteristicName.PDF) + x_array = np.array([-1.0, 0.0, 1.0, 2.0, 3.0, 4.0]) + + pdf_array = pdf(x_array) + assert pdf_array.shape == x_array.shape + scipy_pdf = norm.pdf(x_array, loc=2.0, scale=1.5) + + self.assert_arrays_almost_equal(pdf_array, scipy_pdf) + + def test_cdf_array_input(self): + """Test CDF calculation with array input.""" + cdf = self.normal_dist_example.query_method(CharacteristicName.CDF) + x_array = np.array([-1.0, 0.0, 1.0, 2.0, 3.0, 4.0]) + + cdf_array = cdf(x_array) + assert cdf_array.shape == x_array.shape + scipy_cdf = norm.cdf(x_array, loc=2.0, scale=1.5) + + self.assert_arrays_almost_equal(cdf_array, scipy_cdf) + + def test_ppf_array_input(self): + """Test PPF calculation with array input.""" + ppf = self.normal_dist_example.query_method(CharacteristicName.PPF) + p_array = np.array([0.001, 0.01, 0.1, 0.25, 0.5, 0.75, 0.9, 0.99, 0.999]) + + ppf_array = ppf(p_array) + assert ppf_array.shape == p_array.shape + scipy_ppf = norm.ppf(p_array, loc=2.0, scale=1.5) + + self.assert_arrays_almost_equal(ppf_array, scipy_ppf) + + def test_characteristic_function_array_input(self): + """Test characteristic function calculation with array input.""" + char_func = self.normal_dist_example.query_method(CharacteristicName.CF) + t_array = np.array([-2.0, -1.0, 0.0, 1.0, 2.0]) + + cf_array = char_func(t_array) + assert cf_array.shape == t_array.shape + + mu, sigma = 2.0, 1.5 + expected = np.exp(1j * mu * t_array - 0.5 * (sigma**2) * (t_array**2)) + + self.assert_arrays_almost_equal(cf_array.real, expected.real) + self.assert_arrays_almost_equal(cf_array.imag, expected.imag) + + def test_normal_support(self): + """Test that normal distribution has correct support (entire real line).""" + dist = self.normal_dist_example + + assert dist.support is not None + assert isinstance(dist.support, ContinuousSupport) + + assert dist.support.left == float("-inf") + assert dist.support.right == float("inf") + assert not dist.support.left_closed + assert not dist.support.right_closed + + assert dist.support.contains(0) is True + assert dist.support.contains(float("inf")) is False + assert dist.support.contains(float("-inf")) is False + + test_points = np.array([-500, 0, 5]) + results = dist.support.contains(test_points) + assert np.all(results) + + assert dist.support.shape == ContinuousSupportShape1D.REAL_LINE + + +class TestNormalFamilyEdgeCases(BaseDistributionTest): + """Test edge cases and error conditions.""" + + def setup_method(self): + """Setup before each test method.""" + registry = configure_families_register() + self.normal_family = registry.get(FamilyName.NORMAL) + + def test_invalid_parameterization(self): + """Test error for invalid parameterization name.""" + with pytest.raises(KeyError): + self.normal_family.distribution(parametrization_name="invalid_name", mu=0, sigma=1) + + def test_missing_parameters(self): + """Test error for missing required parameters.""" + with pytest.raises(TypeError): + self.normal_family.distribution(mu=0) # Missing sigma + + def test_invalid_probability_ppf(self): + """Test PPF with invalid probability values.""" + dist = self.normal_family(mu=2.0, sigma=1.5) + ppf = dist.query_method(CharacteristicName.PPF) + + # Test boundaries + assert ppf(0.0) == float("-inf") + assert ppf(1.0) == float("inf") + + # Test invalid probabilities + with pytest.raises(ValueError): + ppf(-0.1) + with pytest.raises(ValueError): + ppf(1.1) diff --git a/tests/unit/families/builtins/continuous/test_uniform.py b/tests/unit/families/builtins/continuous/test_uniform.py new file mode 100644 index 0000000..c492fe4 --- /dev/null +++ b/tests/unit/families/builtins/continuous/test_uniform.py @@ -0,0 +1,302 @@ +""" +Tests for Uniform Distribution Family + +This module tests the functionality of the uniform distribution family, +including parameterizations, characteristics, and sampling. +""" + +__author__ = "Fedor Myznikov" +__copyright__ = "Copyright (c) 2025 PySATL project" +__license__ = "SPDX-License-Identifier: MIT" + + +import numpy as np +import pytest +from scipy.stats import uniform + +from pysatl_core.distributions.support import ContinuousSupport +from pysatl_core.families.configuration import configure_families_register +from pysatl_core.types import ( + CharacteristicName, + ContinuousSupportShape1D, + FamilyName, + UnivariateContinuous, +) + +from .base import BaseDistributionTest + + +class TestUniformFamily(BaseDistributionTest): + """Test suite for Uniform distribution family.""" + + def setup_method(self): + """Setup before each test method.""" + registry = configure_families_register() + self.uniform_family = registry.get(FamilyName.CONTINUOUS_UNIFORM) + self.uniform_dist_example = self.uniform_family(lower_bound=2.0, upper_bound=5.0) + + def test_family_properties(self): + """Test basic properties of uniform family.""" + assert self.uniform_family.name == FamilyName.CONTINUOUS_UNIFORM + + # Check parameterizations + expected_parametrizations = {"standard", "meanWidth", "minRange"} + assert set(self.uniform_family.parametrization_names) == expected_parametrizations + assert self.uniform_family.base_parametrization_name == "standard" + + def test_standard_parametrization_creation(self): + """Test creation of distribution with standard parametrization.""" + dist = self.uniform_family(lower_bound=2.0, upper_bound=5.0) + + assert dist.family_name == FamilyName.CONTINUOUS_UNIFORM + assert dist.distribution_type == UnivariateContinuous + assert dist.parameters == {"lower_bound": 2.0, "upper_bound": 5.0} + assert dist.parametrization_name == "standard" + + def test_mean_width_parametrization_creation(self): + """Test creation of distribution with mean-width parametrization.""" + dist = self.uniform_family(mean=3.5, width=3.0, parametrization_name="meanWidth") + + assert dist.parameters == {"mean": 3.5, "width": 3.0} + assert dist.parametrization_name == "meanWidth" + + def test_min_range_parametrization_creation(self): + """Test creation of distribution with min-range parametrization.""" + dist = self.uniform_family(minimum=2.0, range_val=3.0, parametrization_name="minRange") + + assert dist.parameters == {"minimum": 2.0, "range_val": 3.0} + assert dist.parametrization_name == "minRange" + + def test_parametrization_constraints(self): + """Test parameter constraints validation.""" + # lower_bound must be less than upper_bound + with pytest.raises(ValueError, match="lower_bound < upper_bound"): + self.uniform_family(lower_bound=5.0, upper_bound=2.0) + + # width must be positive + with pytest.raises(ValueError, match="width > 0"): + self.uniform_family(mean=3.5, width=0.0, parametrization_name="meanWidth") + + # range_val must be positive + with pytest.raises(ValueError, match="range_val > 0"): + self.uniform_family(minimum=2.0, range_val=0.0, parametrization_name="minRange") + + def test_characteristic_function_at_zero(self): + """Test characteristic function at zero returns 1.""" + char_func = self.uniform_dist_example.query_method(CharacteristicName.CF) + + cf_value_zero = char_func(0.0) + assert abs(cf_value_zero.real - 1.0) < self.CALCULATION_PRECISION + assert abs(cf_value_zero.imag) < self.CALCULATION_PRECISION + + def test_moments(self): + """Test moment calculations.""" + # Mean + mean_func = self.uniform_dist_example.query_method(CharacteristicName.MEAN) + assert abs(mean_func(None) - 3.5) < self.CALCULATION_PRECISION + + # Variance + var_func = self.uniform_dist_example.query_method(CharacteristicName.VAR) + assert abs(var_func(None) - 0.75) < self.CALCULATION_PRECISION + + # Skewness + skew_func = self.uniform_dist_example.query_method(CharacteristicName.SKEW) + assert abs(skew_func(None) - 0.0) < self.CALCULATION_PRECISION + + def test_kurtosis_calculation(self): + """Test kurtosis calculation with excess parameter.""" + kurt_func = self.uniform_dist_example.query_method(CharacteristicName.KURT) + + raw_kurt = kurt_func(None) + assert abs(raw_kurt - 1.8) < self.CALCULATION_PRECISION + + excess_kurt = kurt_func(None, excess=True) + assert abs(excess_kurt + 1.2) < self.CALCULATION_PRECISION + + raw_kurt_explicit = kurt_func(None, excess=False) + assert abs(raw_kurt_explicit - 1.8) < self.CALCULATION_PRECISION + + @pytest.mark.parametrize( + "parametrization_name, params, expected_lower, expected_upper", + [ + ("standard", {"lower_bound": 2.0, "upper_bound": 5.0}, 2.0, 5.0), + ("meanWidth", {"mean": 3.5, "width": 3.0}, 2.0, 5.0), + ("minRange", {"minimum": 2.0, "range_val": 3.0}, 2.0, 5.0), + ], + ) + def test_parametrization_conversions( + self, parametrization_name, params, expected_lower, expected_upper + ): + """Test conversions between different parameterizations.""" + base_params = self.uniform_family.to_base( + self.uniform_family.get_parametrization(parametrization_name)(**params) + ) + + assert ( + abs(base_params.parameters["lower_bound"] - expected_lower) < self.CALCULATION_PRECISION + ) + assert ( + abs(base_params.parameters["upper_bound"] - expected_upper) < self.CALCULATION_PRECISION + ) + + def test_analytical_computations_availability(self): + """Test that analytical computations are available for exponential distribution.""" + comp = self.uniform_family(lower_bound=0.0, upper_bound=1.0).analytical_computations + + expected_chars = { + CharacteristicName.PDF, + CharacteristicName.CDF, + CharacteristicName.PPF, + CharacteristicName.CF, + CharacteristicName.MEAN, + CharacteristicName.VAR, + CharacteristicName.SKEW, + CharacteristicName.KURT, + } + assert set(comp.keys()) == expected_chars + + def test_pdf_array_input(self): + """Test PDF calculation with array input.""" + pdf = self.uniform_dist_example.query_method(CharacteristicName.PDF) + x_array = np.array([1.0, 2.0, 2.5, 3.0, 4.0, 5.0, 6.0]) + + pdf_array = pdf(x_array) + assert pdf_array.shape == x_array.shape + scipy_pdf = uniform.pdf(x_array, loc=2.0, scale=3.0) + + self.assert_arrays_almost_equal(pdf_array, scipy_pdf) + + def test_cdf_array_input(self): + """Test CDF calculation with array input.""" + cdf = self.uniform_dist_example.query_method(CharacteristicName.CDF) + x_array = np.array([1.0, 2.0, 2.5, 3.0, 4.0, 5.0, 6.0]) + + cdf_array = cdf(x_array) + assert cdf_array.shape == x_array.shape + scipy_cdf = uniform.cdf(x_array, loc=2.0, scale=3.0) + + self.assert_arrays_almost_equal(cdf_array, scipy_cdf) + + def test_ppf_array_input(self): + """Test PPF calculation with array input.""" + ppf = self.uniform_dist_example.query_method(CharacteristicName.PPF) + p_array = np.array([0.0, 0.25, 0.5, 0.75, 1.0]) + + ppf_array = ppf(p_array) + assert ppf_array.shape == p_array.shape + scipy_ppf = uniform.ppf(p_array, loc=2.0, scale=3.0) + + self.assert_arrays_almost_equal(ppf_array, scipy_ppf) + + def test_characteristic_function_array_input(self): + """Test characteristic function calculation with array input.""" + char_func = self.uniform_dist_example.query_method(CharacteristicName.CF) + t_array = np.array([-2.0, -1.0, 1.0, 2.0, 0.0]) + + cf_array = char_func(t_array) + assert cf_array.shape == t_array.shape + + a, b = 2.0, 5.0 + width = b - a + center = (a + b) / 2 + + x = width * t_array / (2 * np.pi) + expected = np.sinc(x) * np.exp(1j * center * t_array) + + self.assert_arrays_almost_equal(cf_array.real, expected.real) + self.assert_arrays_almost_equal(cf_array.imag, expected.imag) + + def test_uniform_support(self): + """Test that uniform distribution has correct support [lower_bound, upper_bound].""" + dist = self.uniform_dist_example + + assert dist.support is not None + assert isinstance(dist.support, ContinuousSupport) + + assert dist.support.left == 2.0 + assert dist.support.right == 5.0 + assert dist.support.left_closed + assert dist.support.right_closed + + # Test containment + assert dist.support.contains(2.0) is True + assert dist.support.contains(5.0) is True + assert dist.support.contains(3.5) is True + assert dist.support.contains(1.9) is False + assert dist.support.contains(5.1) is False + + # Test array + test_points = np.array([1.9, 2.0, 3.5, 5.0, 5.1]) + expected = np.array([False, True, True, True, False]) + results = dist.support.contains(test_points) + np.testing.assert_array_equal(results, expected) + + assert dist.support.shape == ContinuousSupportShape1D.BOUNDED_INTERVAL + + +class TestUniformFamilyEdgeCases(BaseDistributionTest): + """Test edge cases and error conditions for uniform distribution.""" + + def setup_method(self): + """Setup before each test method.""" + registry = configure_families_register() + self.uniform_family = registry.get(FamilyName.CONTINUOUS_UNIFORM) + + def test_invalid_parameterization(self): + """Test error for invalid parameterization name.""" + with pytest.raises(KeyError): + self.uniform_family.distribution( + parametrization_name="invalid_name", lower_bound=0.0, upper_bound=1.0 + ) + + def test_missing_parameters(self): + """Test error for missing required parameters.""" + with pytest.raises(TypeError): + self.uniform_family.distribution(lower_bound=0.0) # Missing upper_bound + + with pytest.raises(TypeError): + self.uniform_family.distribution(upper_bound=1.0) # Missing lower_bound + + def test_invalid_probability_ppf(self): + """Test PPF with invalid probability values.""" + dist = self.uniform_family(lower_bound=0.0, upper_bound=1.0) + ppf = dist.query_method(CharacteristicName.PPF) + + # Test boundaries + assert ppf(0.0) == 0.0 + assert ppf(1.0) == 1.0 + + # Test invalid probabilities + with pytest.raises(ValueError): + ppf(-0.1) + with pytest.raises(ValueError): + ppf(1.1) + + def test_single_value_uniform(self): + """Test uniform distribution with single value (lower_bound == upper_bound).""" + with pytest.raises(ValueError, match="lower_bound < upper_bound"): + self.uniform_family(lower_bound=2.0, upper_bound=2.0) + + def test_characteristic_function_edge_cases(self): + """Test characteristic function at edge cases.""" + dist = self.uniform_family(lower_bound=0.0, upper_bound=1.0) + char_func = dist.query_method(CharacteristicName.CF) + + # Test with very small t + cf_value_small = char_func(self.CALCULATION_PRECISION) + assert abs(cf_value_small.real - 1.0) < self.CALCULATION_PRECISION + + # Test with large t + cf_value_large = char_func(1000.0) + assert isinstance(cf_value_large, complex) + assert abs(cf_value_large) <= 1.0 + + def test_negative_width(self): + """Test that negative width is rejected.""" + with pytest.raises(ValueError, match="width > 0"): + self.uniform_family(mean=0.0, width=-1.0, parametrization_name="meanWidth") + + def test_negative_range(self): + """Test that negative range is rejected.""" + with pytest.raises(ValueError, match="range_val > 0"): + self.uniform_family(minimum=0.0, range_val=-1.0, parametrization_name="minRange") diff --git a/tests/unit/families/test_configuration.py b/tests/unit/families/test_configuration.py index b4dd69a..0a7f2d8 100644 --- a/tests/unit/families/test_configuration.py +++ b/tests/unit/families/test_configuration.py @@ -1,642 +1,81 @@ """ -Tests for Normal Distribution Family Configuration +Tests for Distribution Families Configuration -This module tests the functionality of the normal distribution family -defined in configuration.py, including parameterizations, characteristics, -and sampling. +This module tests the configuration and registration of distribution families +in the global ParametricFamilyRegister. """ __author__ = "Fedor Myznikov" __copyright__ = "Copyright (c) 2025 PySATL project" __license__ = "SPDX-License-Identifier: MIT" -import math - -import numpy as np import pytest -from scipy.stats import norm, uniform -from pysatl_core.distributions.support import ContinuousSupport -from pysatl_core.families.configuration import configure_families_register -from pysatl_core.families.registry import ParametricFamilyRegister -from pysatl_core.types import ( - CharacteristicName, - ContinuousSupportShape1D, - FamilyName, - UnivariateContinuous, +from pysatl_core.families.configuration import ( + configure_families_register, + reset_families_register, ) +from pysatl_core.families.registry import ParametricFamilyRegister +from pysatl_core.types import FamilyName -class BaseDistributionTest: - """Based class for all distribution families' tests""" - - # Precision for floating point comparisons - CALCULATION_PRECISION = 1e-10 - - -class TestNormalFamily(BaseDistributionTest): - """Test suite for Normal distribution family.""" +class TestConfiguration: + """Test suite for configuration functionality.""" def setup_method(self): """Setup before each test method.""" - registry = configure_families_register() - self.normal_family = registry.get(FamilyName.NORMAL) - self.normal_dist_example = self.normal_family(mu=2.0, sigma=1.5) - - def test_family_registration(self): - """Test that normal family is properly registered.""" - family = ParametricFamilyRegister.get(FamilyName.NORMAL) - assert family.name == FamilyName.NORMAL - - # Check parameterizations - expected_parametrizations = {"meanStd", "meanPrec", "exponential"} - assert set(family.parametrization_names) == expected_parametrizations - assert family.base_parametrization_name == "meanStd" - - def test_mean_var_parametrization_creation(self): - """Test creation of distribution with standard parametrization.""" - dist = self.normal_family(mu=2.0, sigma=1.5) - - assert dist.family_name == FamilyName.NORMAL - assert dist.distribution_type == UnivariateContinuous - assert dist.parameters == {"mu": 2.0, "sigma": 1.5} - assert dist.parametrization_name == "meanStd" - - def test_mean_prec_parametrization_creation(self): - """Test creation of distribution with mean-precision parametrization.""" - dist = self.normal_family(mu=2.0, tau=0.25, parametrization_name="meanPrec") - - assert dist.parameters == {"mu": 2.0, "tau": 0.25} - assert dist.parametrization_name == "meanPrec" - - def test_exponential_parametrization_creation(self): - """Test creation of distribution with exponential parametrization.""" - # For N(2, 1.5): a = -1/(2*1.5²) = -0.222..., b = 2/1.5² = 0.888... - dist = self.normal_family(a=-0.222, b=0.888, parametrization_name="exponential") - - assert dist.parameters == {"a": -0.222, "b": 0.888} - assert dist.parametrization_name == "exponential" - - def test_parametrization_constraints(self): - """Test parameter constraints validation.""" - # Sigma must be positive - with pytest.raises(ValueError, match="sigma > 0"): - self.normal_family(mu=0, sigma=-1.0) - - # Tau must be positive - with pytest.raises(ValueError, match="tau > 0"): - self.normal_family(mu=0, tau=-1.0, parametrization_name="meanPrec") - - # a must be negative - with pytest.raises(ValueError, match="a < 0"): - self.normal_family(a=1.0, b=0.0, parametrization_name="exponential") - - def test_pdf_calculation(self): - """Test PDF calculation against scipy.stats.norm.""" - pdf = self.normal_dist_example.query_method(CharacteristicName.PDF) - test_points = [-1.0, 0.0, 1.0, 2.0, 3.0, 4.0] - - for x in test_points: - # Our implementation - our_pdf = pdf(x) - # Scipy reference - scipy_pdf = norm.pdf(x, loc=2.0, scale=1.5) - - assert abs(our_pdf - scipy_pdf) < self.CALCULATION_PRECISION - - def test_cdf_calculation(self): - """Test CDF calculation against scipy.stats.norm.""" - cdf = self.normal_dist_example.query_method(CharacteristicName.CDF) - test_points = [-1.0, 0.0, 1.0, 2.0, 3.0, 4.0] - - for x in test_points: - our_cdf = cdf(x) - scipy_cdf = norm.cdf(x, loc=2.0, scale=1.5) - - assert abs(our_cdf - scipy_cdf) < self.CALCULATION_PRECISION - - def test_ppf_calculation(self): - """Test PPF calculation against scipy.stats.norm.""" - ppf = self.normal_dist_example.query_method(CharacteristicName.PPF) - test_probabilities = [0.001, 0.01, 0.1, 0.25, 0.5, 0.75, 0.9, 0.99, 0.999] - - for p in test_probabilities: - our_ppf = ppf(p) - scipy_ppf = norm.ppf(p, loc=2.0, scale=1.5) - - assert abs(our_ppf - scipy_ppf) < self.CALCULATION_PRECISION - - @pytest.mark.parametrize( - "char_func_arg", - [ - -2.0, - -1.0, - 0.0, - 1.0, - 2.0, - ], - ) - def test_characteristic_function(self, char_func_arg): - """Test characteristic function calculation at specific points.""" - char_func = self.normal_dist_example.query_method(CharacteristicName.CF) - cf_value = char_func(char_func_arg) - - expected_real = math.exp(-0.5 * (1.5 * char_func_arg) ** 2) * math.cos(2.0 * char_func_arg) - expected_imag = math.exp(-0.5 * (1.5 * char_func_arg) ** 2) * math.sin(2.0 * char_func_arg) - - assert abs(cf_value.real - expected_real) < self.CALCULATION_PRECISION - assert abs(cf_value.imag - expected_imag) < self.CALCULATION_PRECISION - - @pytest.mark.parametrize( - "char_func_getter, expected", - [ - ( - lambda distr: distr.query_method(CharacteristicName.MEAN)(None), - 2.0, - ), - ( - lambda distr: distr.query_method(CharacteristicName.VAR)(None), - 2.25, - ), - ( - lambda distr: distr.query_method(CharacteristicName.SKEW)(None), - 0.0, - ), - ], - ) - def test_moments(self, char_func_getter, expected): - """Test moment calculations using parameterized tests.""" - actual = char_func_getter(self.normal_dist_example) - assert abs(actual - expected) < self.CALCULATION_PRECISION - - def test_kurtosis_calculation(self): - """Test kurtosis calculation with excess parameter.""" - kurt_func = self.normal_dist_example.query_method(CharacteristicName.KURT) - - raw_kurt = kurt_func(None) - assert abs(raw_kurt - 3.0) < self.CALCULATION_PRECISION - - excess_kurt = kurt_func(None, excess=True) - assert abs(excess_kurt - 0.0) < self.CALCULATION_PRECISION - - raw_kurt_explicit = kurt_func(None, excess=False) - assert abs(raw_kurt_explicit - 3.0) < self.CALCULATION_PRECISION - - @pytest.mark.parametrize( - "parametrization_name, params, expected_mu, expected_sigma", - [ - ("meanStd", {"mu": 2.0, "sigma": 1.5}, 2.0, 1.5), - ("meanPrec", {"mu": 2.0, "tau": 0.25}, 2.0, math.sqrt(1 / 0.25)), - ("exponential", {"a": -1 / (2 * 1.5**2), "b": 2 / (1.5**2)}, 2.0, 1.5), - ], - ) - def test_parametrization_conversions( - self, parametrization_name, params, expected_mu, expected_sigma - ): - """Test conversions between different parameterizations.""" - base_params = self.normal_family.to_base( - self.normal_family.get_parametrization(parametrization_name)(**params) - ) - - assert abs(base_params.parameters["mu"] - expected_mu) < self.CALCULATION_PRECISION - assert abs(base_params.parameters["sigma"] - expected_sigma) < self.CALCULATION_PRECISION - - def test_analytical_computations_caching(self): - """Test that analytical computations are properly cached.""" - comp = self.normal_family(mu=0.0, sigma=1.0).analytical_computations - - expected_chars = { - CharacteristicName.PDF, - CharacteristicName.CDF, - CharacteristicName.PPF, - CharacteristicName.CF, - CharacteristicName.MEAN, - CharacteristicName.VAR, - CharacteristicName.SKEW, - CharacteristicName.KURT, + self.registry = configure_families_register() + + def test_configure_families_register_returns_registry(self): + """Test that configure_families_register returns a ParametricFamilyRegister.""" + assert isinstance(self.registry, ParametricFamilyRegister) + + def test_configure_families_register_is_singleton(self): + """Test that configure_families_register returns the same instance.""" + registry2 = configure_families_register() + assert self.registry is registry2 + + def test_families_registered(self): + """Test that all expected families are registered.""" + expected_families = { + FamilyName.NORMAL, + FamilyName.CONTINUOUS_UNIFORM, } - assert set(comp.keys()) == expected_chars - - def test_array_input_support(self): - """Test that PDF supports array inputs.""" - dist = self.normal_family(mu=0.0, sigma=1.0) - x_array = np.array([-2.0, -1.0, 0.0, 1.0, 2.0]) - - pdf = dist.query_method(CharacteristicName.PDF) - pdf_array = pdf(x_array) - - assert pdf_array.shape == x_array.shape - scipy_pdf = norm.pdf(x_array, loc=0.0, scale=1.0) - - np.testing.assert_array_almost_equal( - pdf_array, scipy_pdf, decimal=int(-math.log10(self.CALCULATION_PRECISION)) - ) - - def test_normal_support(self): - """Test that normal distribution has correct support (entire real line).""" - dist = self.normal_dist_example - assert dist.support is not None - assert isinstance(dist.support, ContinuousSupport) - - assert dist.support.left == float("-inf") - assert dist.support.right == float("inf") - assert not dist.support.left_closed # -∞ is always open - assert not dist.support.right_closed # +∞ is always open - - assert dist.support.contains(0) is True - assert dist.support.contains(float("inf")) is False # ∞ is not in the support - assert dist.support.contains(float("-inf")) is False # -∞ is not in the support - - test_points = np.array([-500, 0, 5]) - results = dist.support.contains(test_points) - assert np.all(results) - - assert dist.support.shape == ContinuousSupportShape1D.REAL_LINE - - -class TestNormalFamilyEdgeCases: - """Test edge cases and error conditions.""" - - def setup_method(self): - """Setup before each test method.""" - registry = configure_families_register() - self.normal_family = registry.get(FamilyName.NORMAL) - self.normal_dist_example = self.normal_family(mu=2.0, sigma=1.5) + registered_families = set(self.registry._registered_families.keys()) + assert expected_families.issubset(registered_families) - def test_invalid_parameterization(self): - """Test error for invalid parameterization name.""" - with pytest.raises(KeyError): - self.normal_family.distribution(parametrization_name="invalid_name", mu=0, sigma=1) + def test_reset_families_register(self): + """Test that reset_families_register clears the cache.""" + registry1 = configure_families_register() + reset_families_register() + registry2 = configure_families_register() - def test_missing_parameters(self): - """Test error for missing required parameters.""" - with pytest.raises(TypeError): - self.normal_family.distribution(mu=0) # Missing sigma + # They should be different instances after reset + assert registry1 is not registry2 - def test_invalid_probability_ppf(self): - """Test PPF with invalid probability values.""" - ppf = self.normal_dist_example.query_method(CharacteristicName.PPF) + def test_registry_singleton_pattern(self): + """Test that ParametricFamilyRegister itself follows singleton pattern.""" + registry1 = ParametricFamilyRegister() + registry2 = ParametricFamilyRegister() + assert registry1 is registry2 - # Test boundaries - assert ppf(0.0) == float("-inf") - assert ppf(1.0) == float("inf") + def test_registry_get_family_method(self): + """Test the get method of ParametricFamilyRegister.""" + # Test getting existing family + normal_family = self.registry.get(FamilyName.NORMAL) + assert normal_family is not None + assert normal_family.name == FamilyName.NORMAL - # Test invalid probabilities - with pytest.raises(ValueError): - ppf(-0.1) + # Test getting non-existent family with pytest.raises(ValueError): - ppf(1.1) - - -class TestUniformFamily(BaseDistributionTest): - """Test suite for Uniform distribution family.""" - - def setup_method(self): - """Setup before each test method.""" - registry = configure_families_register() - self.uniform_family = registry.get(FamilyName.CONTINUOUS_UNIFORM) - self.uniform_dist_example = self.uniform_family(lower_bound=2.0, upper_bound=5.0) - - def test_family_registration(self): - """Test that uniform family is properly registered.""" - family = ParametricFamilyRegister.get(FamilyName.CONTINUOUS_UNIFORM) - assert family.name == FamilyName.CONTINUOUS_UNIFORM - - # Check parameterizations - expected_parametrizations = {"standard", "meanWidth", "minRange"} - assert set(family.parametrization_names) == expected_parametrizations - assert family.base_parametrization_name == "standard" - - def test_standard_parametrization_creation(self): - """Test creation of distribution with standard parametrization.""" - dist = self.uniform_family(lower_bound=2.0, upper_bound=5.0) - - assert dist.family_name == FamilyName.CONTINUOUS_UNIFORM - assert dist.distribution_type == UnivariateContinuous - - assert dist.parameters == {"lower_bound": 2.0, "upper_bound": 5.0} - assert dist.parametrization_name == "standard" - - def test_mean_width_parametrization_creation(self): - """Test creation of distribution with mean-width parametrization.""" - dist = self.uniform_family(mean=3.5, width=3.0, parametrization_name="meanWidth") - - assert dist.parameters == {"mean": 3.5, "width": 3.0} - assert dist.parametrization_name == "meanWidth" - - def test_min_range_parametrization_creation(self): - """Test creation of distribution with min-range parametrization.""" - dist = self.uniform_family(minimum=2.0, range_val=3.0, parametrization_name="minRange") - - assert dist.parameters == {"minimum": 2.0, "range_val": 3.0} - assert dist.parametrization_name == "minRange" - - def test_parametrization_constraints(self): - """Test parameter constraints validation.""" - # lower_bound must be less than upper_bound - with pytest.raises(ValueError, match="lower_bound < upper_bound"): - self.uniform_family(lower_bound=5.0, upper_bound=2.0) - - # width must be positive - with pytest.raises(ValueError, match="width > 0"): - self.uniform_family(mean=3.5, width=0.0, parametrization_name="meanWidth") - - # range_val must be positive - with pytest.raises(ValueError, match="range_val > 0"): - self.uniform_family(minimum=2.0, range_val=0.0, parametrization_name="minRange") - - def test_pdf_calculation(self): - """Test PDF calculation against scipy.stats.uniform.""" - pdf = self.uniform_dist_example.query_method(CharacteristicName.PDF) - test_points = [1.0, 2.0, 2.5, 3.0, 4.0, 5.0, 6.0] - - for x in test_points: - # Our implementation - our_pdf = pdf(x) - # Scipy reference - scipy_pdf = uniform.pdf(x, loc=2.0, scale=3.0) - - assert abs(our_pdf - scipy_pdf) < self.CALCULATION_PRECISION - - def test_cdf_calculation(self): - """Test CDF calculation against scipy.stats.uniform.""" - cdf = self.uniform_dist_example.query_method(CharacteristicName.CDF) - test_points = [1.0, 2.0, 2.5, 3.0, 4.0, 5.0, 6.0] - - for x in test_points: - our_cdf = cdf(x) - scipy_cdf = uniform.cdf(x, loc=2.0, scale=3.0) - - assert abs(our_cdf - scipy_cdf) < self.CALCULATION_PRECISION - - def test_ppf_calculation(self): - """Test PPF calculation against scipy.stats.uniform.""" - ppf = self.uniform_dist_example.query_method(CharacteristicName.PPF) - test_probabilities = [0.0, 0.25, 0.5, 0.75, 1.0] - - for p in test_probabilities: - our_ppf = ppf(p) - scipy_ppf = uniform.ppf(p, loc=2.0, scale=3.0) - - assert abs(our_ppf - scipy_ppf) < self.CALCULATION_PRECISION - - @pytest.mark.parametrize( - "char_func_arg", - [ - -2.0, - -1.0, - 1.0, - 2.0, - ], - ) - def test_characteristic_function(self, char_func_arg): - """Test characteristic function calculation at specific points.""" - char_func = self.uniform_dist_example.query_method(CharacteristicName.CF) - cf_value = char_func(char_func_arg) - - # Analytical formula for characteristic function of uniform distribution - a, b = 2.0, 5.0 - width = b - a - - # φ(t) = (e^{itb} - e^{ita}) / (it(b-a)) - # Re(φ(t)) = (sin(tb) - sin(ta)) / (t(b-a)) - # Im(φ(t)) = -(cos(tb) - cos(ta)) / (t(b-a)) - if abs(char_func_arg) < self.CALCULATION_PRECISION: - expected_real = 1.0 - expected_imag = 0.0 - else: - expected_real = (math.sin(b * char_func_arg) - math.sin(a * char_func_arg)) / ( - char_func_arg * width - ) - expected_imag = -(math.cos(b * char_func_arg) - math.cos(a * char_func_arg)) / ( - char_func_arg * width - ) - - assert abs(cf_value.real - expected_real) < self.CALCULATION_PRECISION - assert abs(cf_value.imag - expected_imag) < self.CALCULATION_PRECISION - - def test_characteristic_function_at_zero(self): - """Test characteristic function at zero returns 1.""" - char_func = self.uniform_dist_example.query_method(CharacteristicName.CF) - - cf_value_zero = char_func(0.0) - assert abs(cf_value_zero.real - 1.0) < self.CALCULATION_PRECISION - assert abs(cf_value_zero.imag) < self.CALCULATION_PRECISION - - cf_value_small = char_func(self.CALCULATION_PRECISION) - assert abs(cf_value_small.real - 1.0) < self.CALCULATION_PRECISION - - cf_value_large = char_func(1000.0) - assert isinstance(cf_value_large, complex) - assert abs(cf_value_large) <= 1 - - def test_moments(self): - """Test moment calculations.""" - dist = self.uniform_dist_example - - # Mean - mean_func = dist.query_method(CharacteristicName.MEAN) - assert abs(mean_func(None) - 3.5) < self.CALCULATION_PRECISION - - # Variance - var_func = dist.query_method(CharacteristicName.VAR) - assert abs(var_func(None) - 0.75) < self.CALCULATION_PRECISION - - # Skewness - skew_func = dist.query_method(CharacteristicName.SKEW) - assert abs(skew_func(None) - 0.0) < self.CALCULATION_PRECISION - - def test_kurtosis_calculation(self): - """Test kurtosis calculation with excess parameter.""" - kurt_func = self.uniform_dist_example.query_method(CharacteristicName.KURT) - - raw_kurt = kurt_func(None) - assert abs(raw_kurt - 1.8) < self.CALCULATION_PRECISION - - excess_kurt = kurt_func(None, excess=True) - assert abs(excess_kurt + 1.2) < self.CALCULATION_PRECISION - - raw_kurt_explicit = kurt_func(None, excess=False) - assert abs(raw_kurt_explicit - 1.8) < self.CALCULATION_PRECISION - - @pytest.mark.parametrize( - "parametrization_name, params, expected_lower, expected_upper", - [ - ("standard", {"lower_bound": 2.0, "upper_bound": 5.0}, 2.0, 5.0), - ("meanWidth", {"mean": 3.5, "width": 3.0}, 2.0, 5.0), - ("minRange", {"minimum": 2.0, "range_val": 3.0}, 2.0, 5.0), - ], - ) - def test_parametrization_conversions( - self, parametrization_name, params, expected_lower, expected_upper - ): - """Test conversions between different parameterizations.""" - base_params = self.uniform_family.to_base( - self.uniform_family.get_parametrization(parametrization_name)(**params) - ) - - assert ( - abs(base_params.parameters["lower_bound"] - expected_lower) < self.CALCULATION_PRECISION - ) - assert ( - abs(base_params.parameters["upper_bound"] - expected_upper) < self.CALCULATION_PRECISION - ) - - def test_analytical_computations_caching(self): - """Test that analytical computations are properly cached.""" - comp = self.uniform_family(lower_bound=0.0, upper_bound=1.0).analytical_computations - - expected_chars = { - CharacteristicName.PDF, - CharacteristicName.CDF, - CharacteristicName.PPF, - CharacteristicName.CF, - CharacteristicName.MEAN, - CharacteristicName.VAR, - CharacteristicName.SKEW, - CharacteristicName.KURT, - } - assert set(comp.keys()) == expected_chars - - def test_array_input_support_pdf(self): - """Test that PDF supports array inputs.""" - dist = self.uniform_family(lower_bound=0.0, upper_bound=1.0) - x_array = np.array([-0.5, 0.0, 0.25, 0.5, 0.75, 1.0, 1.5]) - - pdf = dist.query_method(CharacteristicName.PDF) - pdf_array = pdf(x_array) - - assert pdf_array.shape == x_array.shape - scipy_pdf = uniform.pdf(x_array, loc=0.0, scale=1.0) - - np.testing.assert_array_almost_equal( - pdf_array, scipy_pdf, decimal=int(-math.log10(self.CALCULATION_PRECISION)) - ) - - def test_array_input_support_cdf(self): - """Test that CDF supports array inputs.""" - dist = self.uniform_family(lower_bound=0.0, upper_bound=1.0) - x_array = np.array([-0.5, 0.0, 0.25, 0.5, 0.75, 1.0, 1.5]) - - cdf = dist.query_method(CharacteristicName.CDF) - cdf_array = cdf(x_array) - - assert cdf_array.shape == x_array.shape - scipy_cdf = uniform.cdf(x_array, loc=0.0, scale=1.0) - - np.testing.assert_array_almost_equal( - cdf_array, scipy_cdf, decimal=int(-math.log10(self.CALCULATION_PRECISION)) - ) - - def test_array_input_support_ppf(self): - """Test that PPF supports array inputs.""" - dist = self.uniform_family(lower_bound=0.0, upper_bound=1.0) - p_array = np.array([0.0, 0.25, 0.5, 0.75, 1.0]) - - ppf = dist.query_method(CharacteristicName.PPF) - ppf_array = ppf(p_array) - - assert ppf_array.shape == p_array.shape - scipy_ppf = uniform.ppf(p_array, loc=0.0, scale=1.0) - - np.testing.assert_array_almost_equal( - ppf_array, scipy_ppf, decimal=int(-math.log10(self.CALCULATION_PRECISION)) - ) - - def test_uniform_support(self): - """Test that uniform distribution has correct support [lower_bound, upper_bound].""" - dist = self.uniform_dist_example - - assert dist.support is not None - assert isinstance(dist.support, ContinuousSupport) - - assert dist.support.left == 2.0 - assert dist.support.right == 5.0 - assert dist.support.left_closed # [a, b] - inclusive - assert dist.support.right_closed # [a, b] - inclusive - - # Test containment - assert dist.support.contains(2.0) is True # boundary included - assert dist.support.contains(5.0) is True # boundary included - assert dist.support.contains(3.5) is True # inside - assert dist.support.contains(1.9) is False # outside left - assert dist.support.contains(5.1) is False # outside right - - # Test array - test_points = np.array([1.9, 2.0, 3.5, 5.0, 5.1]) - expected = np.array([False, True, True, True, False]) - results = dist.support.contains(test_points) - np.testing.assert_array_equal(results, expected) - - assert dist.support.shape == ContinuousSupportShape1D.BOUNDED_INTERVAL - - -class TestUniformFamilyEdgeCases(BaseDistributionTest): - """Test edge cases and error conditions for uniform distribution.""" - - def setup_method(self): - """Setup before each test method.""" - registry = configure_families_register() - self.uniform_family = registry.get(FamilyName.CONTINUOUS_UNIFORM) - self.uniform_dist = self.uniform_family(lower_bound=0.0, upper_bound=1.0) - - def test_invalid_parameterization(self): - """Test error for invalid parameterization name.""" - with pytest.raises(KeyError): - self.uniform_family.distribution( - parametrization_name="invalid_name", lower_bound=0.0, upper_bound=1.0 - ) - - def test_missing_parameters(self): - """Test error for missing required parameters.""" - with pytest.raises(TypeError): - self.uniform_family.distribution(lower_bound=0.0) # Missing upper_bound - - with pytest.raises(TypeError): - self.uniform_family.distribution(upper_bound=1.0) # Missing lower_bound - - def test_invalid_probability_ppf(self): - """Test PPF with invalid probability values.""" - ppf = self.uniform_dist.query_method(CharacteristicName.PPF) - - # Test boundaries - assert ppf(0.0) == 0.0 - assert ppf(1.0) == 1.0 - - # Test invalid probabilities - with pytest.raises(ValueError): - ppf(-0.1) - with pytest.raises(ValueError): - ppf(1.1) - - def test_single_value_uniform(self): - """Test uniform distribution with single value (lower_bound == upper_bound).""" - # This should fail validation - with pytest.raises(ValueError, match="lower_bound < upper_bound"): - self.uniform_family(lower_bound=2.0, upper_bound=2.0) - - def test_characteristic_function_edge_cases(self): - """Test characteristic function at edge cases.""" - char_func = self.uniform_dist.query_method(CharacteristicName.CF) - - # Test with very small t - cf_value_small = char_func(self.CALCULATION_PRECISION) - # Should be close to 1, but may have numerical issues - assert abs(cf_value_small.real - 1.0) < self.CALCULATION_PRECISION - - # Test with large t - cf_value_large = char_func(1000.0) - # Characteristic function should still be a complex number - assert isinstance(cf_value_large, complex) - assert abs(cf_value_large) <= 1.0 # |φ(t)| ≤ 1 for all t + self.registry.get("NonExistentFamily") - def test_negative_width(self): - """Test that negative width is rejected.""" - with pytest.raises(ValueError, match="width > 0"): - self.uniform_family(mean=0.0, width=-1.0, parametrization_name="meanWidth") + def test_registry_list_registered_families(self): + """Test the list_registered_families method of ParametricFamilyRegister.""" + families_list = ParametricFamilyRegister.list_registered_families() - def test_negative_range(self): - """Test that negative range is rejected.""" - with pytest.raises(ValueError, match="range_val > 0"): - self.uniform_family(minimum=0.0, range_val=-1.0, parametrization_name="minRange") + assert isinstance(families_list, list) + assert FamilyName.NORMAL in families_list + assert FamilyName.CONTINUOUS_UNIFORM in families_list + assert "NonExistentFamily" not in families_list From c19925fe6743e3213a418e62e1a093782fcf462a Mon Sep 17 00:00:00 2001 From: MyznikovFD Date: Thu, 18 Dec 2025 15:35:44 +0300 Subject: [PATCH 2/3] feat(families): add exponential family implementation --- src/pysatl_core/families/builtins/__init__.py | 2 + .../families/builtins/continuous/__init__.py | 2 + .../builtins/continuous/exponential.py | 270 ++++++++++++++++++ src/pysatl_core/families/configuration.py | 9 +- src/pysatl_core/types.py | 1 + 5 files changed, 281 insertions(+), 3 deletions(-) create mode 100644 src/pysatl_core/families/builtins/continuous/exponential.py diff --git a/src/pysatl_core/families/builtins/__init__.py b/src/pysatl_core/families/builtins/__init__.py index 49ea81d..b7c58b6 100644 --- a/src/pysatl_core/families/builtins/__init__.py +++ b/src/pysatl_core/families/builtins/__init__.py @@ -11,6 +11,7 @@ from pysatl_core.families.builtins.continuous import ( + configure_exponential_family, configure_normal_family, configure_uniform_family, ) @@ -18,4 +19,5 @@ __all__ = [ "configure_normal_family", "configure_uniform_family", + "configure_exponential_family", ] diff --git a/src/pysatl_core/families/builtins/continuous/__init__.py b/src/pysatl_core/families/builtins/continuous/__init__.py index bfdd481..d7f9492 100644 --- a/src/pysatl_core/families/builtins/continuous/__init__.py +++ b/src/pysatl_core/families/builtins/continuous/__init__.py @@ -9,10 +9,12 @@ __license__ = "SPDX-License-Identifier: MIT" +from pysatl_core.families.builtins.continuous.exponential import configure_exponential_family from pysatl_core.families.builtins.continuous.normal import configure_normal_family from pysatl_core.families.builtins.continuous.uniform import configure_uniform_family __all__ = [ "configure_normal_family", "configure_uniform_family", + "configure_exponential_family", ] diff --git a/src/pysatl_core/families/builtins/continuous/exponential.py b/src/pysatl_core/families/builtins/continuous/exponential.py new file mode 100644 index 0000000..a14dc9b --- /dev/null +++ b/src/pysatl_core/families/builtins/continuous/exponential.py @@ -0,0 +1,270 @@ +""" +Exponential distribution family implementation. + +Contains the Exponential family with rate and scale parameterizations. +""" + +from __future__ import annotations + +__author__ = "Fedor Myznikov" +__copyright__ = "Copyright (c) 2025 PySATL project" +__license__ = "SPDX-License-Identifier: MIT" + +from typing import TYPE_CHECKING, cast + +import numpy as np + +from pysatl_core.distributions.strategies import DefaultSamplingUnivariateStrategy +from pysatl_core.distributions.support import ContinuousSupport +from pysatl_core.families.parametric_family import ParametricFamily +from pysatl_core.families.parametrizations import ( + Parametrization, + constraint, + parametrization, +) +from pysatl_core.families.registry import ParametricFamilyRegister +from pysatl_core.types import ( + CharacteristicName, + ComplexArray, + FamilyName, + NumericArray, + UnivariateContinuous, +) + +if TYPE_CHECKING: + from typing import Any + + +def configure_exponential_family() -> None: + """ + Configure and register the Exponential distribution family. + """ + EXPONENTIAL_DOC = """ + Exponential distribution. + + The exponential distribution is a continuous probability distribution that + describes the time between events in a Poisson process. It has a single + parameter: rate (λ) or scale (β = 1/λ). + + Probability density function (rate parametrization): + f(x) = λ * exp(-λ * x) for x ≥ 0 + + The exponential distribution is memoryless and is widely used in reliability + engineering, queuing theory, and survival analysis. + """ + + def pdf(parameters: Parametrization, x: NumericArray) -> NumericArray: + """ + Probability density function for exponential distribution. + + Parameters + ---------- + parameters : Parametrization + Distribution parameters object with fields: + - lambda_: float (rate parameter) + x : NumericArray + Points at which to evaluate the probability density function + + Returns + ------- + NumericArray + Probability density values at points x + """ + parameters = cast(_Rate, parameters) + + lambda_ = parameters.lambda_ + return np.where(x >= 0, lambda_ * np.exp(-lambda_ * x), 0.0) + + def cdf(parameters: Parametrization, x: NumericArray) -> NumericArray: + """ + Cumulative distribution function for exponential distribution. + + Parameters + ---------- + parameters : Parametrization + Distribution parameters object with fields: + - lambda_: float (rate parameter) + x : NumericArray + Points at which to evaluate the cumulative distribution function + + Returns + ------- + NumericArray + Probabilities P(X ≤ x) for each point x + """ + parameters = cast(_Rate, parameters) + + lambda_ = parameters.lambda_ + return np.where(x >= 0, 1.0 - np.exp(-lambda_ * x), 0.0) + + def ppf(parameters: Parametrization, p: NumericArray) -> NumericArray: + """ + Percent point function (inverse CDF) for exponential distribution. + + Parameters + ---------- + parameters : Parametrization + Distribution parameters object with fields: + - lambda_: float (rate parameter) + p : NumericArray + Probability from [0, 1] + + Returns + ------- + NumericArray + Quantiles corresponding to probabilities p: + - For p = 0: returns 0.0 + - For p = 1: returns np.inf + - For p in (0, 1): returns -ln(1-p)/λ + + Raises + ------ + ValueError + If probability is outside [0, 1] + """ + if np.any((p < 0) | (p > 1)): + raise ValueError("Probability must be in [0, 1]") + + parameters = cast(_Rate, parameters) + lambda_ = parameters.lambda_ + + with np.errstate(divide="ignore", invalid="ignore"): + return np.where(p < 1.0, -np.log1p(-p) / lambda_, np.inf) + + def char_func(parameters: Parametrization, t: NumericArray) -> ComplexArray: + """ + Characteristic function of exponential distribution. + + Parameters + ---------- + parameters : Parametrization + Distribution parameters object with fields: + - lambda_: float (rate parameter) + t : NumericArray + Points at which to evaluate the characteristic function + + Returns + ------- + ComplexArray + Characteristic function values at points t + """ + CALCULATION_PRECISION = 1e-10 + + parameters = cast(_Rate, parameters) + lambda_ = parameters.lambda_ + t_arr = np.asarray(t, dtype=np.float64) + + denominator = lambda_ - 1j * t_arr + result = np.where( + np.abs(t_arr) < CALCULATION_PRECISION, + 1.0 + 0j, + lambda_ / denominator, + ) + return cast(ComplexArray, result) + + def mean_func(parameters: Parametrization, _: Any) -> float: + """Mean of exponential distribution.""" + parameters = cast(_Rate, parameters) + return 1.0 / parameters.lambda_ + + def var_func(parameters: Parametrization, _: Any) -> float: + """Variance of exponential distribution.""" + parameters = cast(_Rate, parameters) + return 1.0 / (parameters.lambda_**2) + + def skew_func(_1: Parametrization, _2: Any) -> float: + """Skewness of exponential distribution (always 2).""" + return 2.0 + + def kurt_func(_1: Parametrization, _2: Any, excess: bool = False) -> float: + """Raw or excess kurtosis of exponential distribution. + + Parameters + ---------- + _1 : Parametrization + Needed by architecture parameter + excess : bool + A value defines if there will be raw or excess kurtosis + default is False + + Returns + ------- + float + Kurtosis value + """ + if not excess: + return 9.0 + else: + return 6.0 + + def _support(_: Parametrization) -> ContinuousSupport: + """Support of exponential distribution""" + return ContinuousSupport(left=0.0) + + Exponential = ParametricFamily( + name=FamilyName.EXPONENTIAL, + distr_type=UnivariateContinuous, + distr_parametrizations=["rate", "scale"], + distr_characteristics={ + CharacteristicName.PDF: pdf, + CharacteristicName.CDF: cdf, + CharacteristicName.PPF: ppf, + CharacteristicName.CF: char_func, + CharacteristicName.MEAN: mean_func, + CharacteristicName.VAR: var_func, + CharacteristicName.SKEW: skew_func, + CharacteristicName.KURT: kurt_func, + }, + sampling_strategy=DefaultSamplingUnivariateStrategy(), + support_by_parametrization=_support, + ) + Exponential.__doc__ = EXPONENTIAL_DOC + + @parametrization(family=Exponential, name="rate") + class _Rate(Parametrization): + """ + Rate parametrization of exponential distribution. + + Parameters + ---------- + lambda_ : float + Rate parameter (λ) of the distribution + """ + + lambda_: float + + @constraint(description="lambda_ > 0") + def check_lambda_positive(self) -> bool: + """Check that rate parameter is positive.""" + return self.lambda_ > 0 + + @parametrization(family=Exponential, name="scale") + class _Scale(Parametrization): + """ + Scale parametrization of exponential distribution. + + Parameters + ---------- + beta : float + Scale parameter (β) of the distribution, β = 1/λ + """ + + beta: float + + @constraint(description="beta > 0") + def check_beta_positive(self) -> bool: + """Check that scale parameter is positive.""" + return self.beta > 0 + + def transform_to_base_parametrization(self) -> Parametrization: + """ + Transform to Rate parametrization. + + Returns + ------- + Parametrization + Rate parametrization instance + """ + return _Rate(lambda_=1.0 / self.beta) + + ParametricFamilyRegister.register(Exponential) diff --git a/src/pysatl_core/families/configuration.py b/src/pysatl_core/families/configuration.py index 3754be7..efb029a 100644 --- a/src/pysatl_core/families/configuration.py +++ b/src/pysatl_core/families/configuration.py @@ -4,8 +4,9 @@ This module defines and configures parametric distribution families for the PySATL library: -- :class:`Normal Family` — Gaussian distribution with multiple parameterizations. -- :class:`Uniform Family` — Uniform distribution with multiple parameterizations. +- **Normal Family** — Gaussian distribution with multiple parameterizations. +- **Uniform Family** — Uniform distribution with multiple parameterizations. +- **Exponential Family** — Exponential distribution with multiple parameterizations. Notes ----- @@ -24,6 +25,7 @@ from functools import lru_cache from pysatl_core.families.builtins import ( + configure_exponential_family, configure_normal_family, configure_uniform_family, ) @@ -44,8 +46,9 @@ def configure_families_register() -> ParametricFamilyRegister: ParametricFamilyRegister The global registry of parametric families. """ - configure_normal_family() + configure_exponential_family() configure_uniform_family() + configure_normal_family() return ParametricFamilyRegister() diff --git a/src/pysatl_core/types.py b/src/pysatl_core/types.py index ead403a..c79a03d 100644 --- a/src/pysatl_core/types.py +++ b/src/pysatl_core/types.py @@ -276,6 +276,7 @@ class CharacteristicName(StrEnum): class FamilyName(StrEnum): NORMAL = "Normal" CONTINUOUS_UNIFORM = "ContinuousUniform" + EXPONENTIAL = "Exponential" __all__ = [ From 7de7ffe996ca850888779fc2fcfb0be103cef6ee Mon Sep 17 00:00:00 2001 From: MyznikovFD Date: Thu, 18 Dec 2025 15:36:32 +0300 Subject: [PATCH 3/3] test(families): tests for exponential family --- .../builtins/continuous/test_exponential.py | 261 ++++++++++++++++++ 1 file changed, 261 insertions(+) create mode 100644 tests/unit/families/builtins/continuous/test_exponential.py diff --git a/tests/unit/families/builtins/continuous/test_exponential.py b/tests/unit/families/builtins/continuous/test_exponential.py new file mode 100644 index 0000000..3944595 --- /dev/null +++ b/tests/unit/families/builtins/continuous/test_exponential.py @@ -0,0 +1,261 @@ +""" +Tests for Exponential Distribution Family + +This module tests the functionality of the exponential distribution family, +including parameterizations, characteristics, and sampling. +""" + +__author__ = "Fedor Myznikov" +__copyright__ = "Copyright (c) 2025 PySATL project" +__license__ = "SPDX-License-Identifier: MIT" + + +import numpy as np +import pytest +from scipy.stats import expon + +from pysatl_core.distributions.support import ContinuousSupport +from pysatl_core.families.configuration import configure_families_register +from pysatl_core.types import ( + CharacteristicName, + ContinuousSupportShape1D, + FamilyName, + UnivariateContinuous, +) + +from .base import BaseDistributionTest + + +class TestExponentialFamily(BaseDistributionTest): + """Test suite for Exponential distribution family.""" + + def setup_method(self): + """Setup before each test method.""" + registry = configure_families_register() + self.exponential_family = registry.get(FamilyName.EXPONENTIAL) + self.exponential_dist_example = self.exponential_family(lambda_=0.5) + + def test_family_properties(self): + """Test basic properties of exponential family.""" + assert self.exponential_family.name == FamilyName.EXPONENTIAL + + # Check parameterizations + expected_parametrizations = {"rate", "scale"} + assert set(self.exponential_family.parametrization_names) == expected_parametrizations + assert self.exponential_family.base_parametrization_name == "rate" + + def test_rate_parametrization_creation(self): + """Test creation of distribution with rate parametrization.""" + dist = self.exponential_family(lambda_=0.5) + + assert dist.family_name == FamilyName.EXPONENTIAL + assert dist.distribution_type == UnivariateContinuous + assert dist.parameters == {"lambda_": 0.5} + assert dist.parametrization_name == "rate" + + def test_scale_parametrization_creation(self): + """Test creation of distribution with scale parametrization.""" + dist = self.exponential_family(beta=2.0, parametrization_name="scale") + + assert dist.parameters == {"beta": 2.0} + assert dist.parametrization_name == "scale" + + def test_parametrization_constraints(self): + """Test parameter constraints validation.""" + # lambda_ must be positive + with pytest.raises(ValueError, match="lambda_ > 0"): + self.exponential_family(lambda_=-1.0) + + # beta must be positive + with pytest.raises(ValueError, match="beta > 0"): + self.exponential_family(beta=0.0, parametrization_name="scale") + + def test_moments(self): + """Test moment calculations.""" + # Mean + mean_func = self.exponential_dist_example.query_method(CharacteristicName.MEAN) + assert abs(mean_func(None) - 2.0) < self.CALCULATION_PRECISION + + # Variance + var_func = self.exponential_dist_example.query_method(CharacteristicName.VAR) + assert abs(var_func(None) - 4.0) < self.CALCULATION_PRECISION + + # Skewness + skew_func = self.exponential_dist_example.query_method(CharacteristicName.SKEW) + assert abs(skew_func(None) - 2.0) < self.CALCULATION_PRECISION + + def test_kurtosis_calculation(self): + """Test kurtosis calculation with excess parameter.""" + kurt_func = self.exponential_dist_example.query_method(CharacteristicName.KURT) + + raw_kurt = kurt_func(None) + assert abs(raw_kurt - 9.0) < self.CALCULATION_PRECISION + + excess_kurt = kurt_func(None, excess=True) + assert abs(excess_kurt - 6.0) < self.CALCULATION_PRECISION + + raw_kurt_explicit = kurt_func(None, excess=False) + assert abs(raw_kurt_explicit - 9.0) < self.CALCULATION_PRECISION + + @pytest.mark.parametrize( + "parametrization_name, params, expected_lambda", + [ + ("rate", {"lambda_": 0.5}, 0.5), + ("scale", {"beta": 2.0}, 0.5), # lambda = 1/beta = 0.5 + ], + ) + def test_parametrization_conversions(self, parametrization_name, params, expected_lambda): + """Test conversions between different parameterizations.""" + base_params = self.exponential_family.to_base( + self.exponential_family.get_parametrization(parametrization_name)(**params) + ) + + assert abs(base_params.parameters["lambda_"] - expected_lambda) < self.CALCULATION_PRECISION + + def test_analytical_computations_availability(self): + """Test that analytical computations are available for exponential distribution.""" + comp = self.exponential_family(lambda_=1.0).analytical_computations + + expected_chars = { + CharacteristicName.PDF, + CharacteristicName.CDF, + CharacteristicName.PPF, + CharacteristicName.CF, + CharacteristicName.MEAN, + CharacteristicName.VAR, + CharacteristicName.SKEW, + CharacteristicName.KURT, + } + assert set(comp.keys()) == expected_chars + + def test_pdf_array_input(self): + """Test PDF calculation with array input.""" + pdf = self.exponential_dist_example.query_method(CharacteristicName.PDF) + x_array = np.array([-1.0, 0.0, 1.0, 2.0, 3.0, 4.0]) + + pdf_array = pdf(x_array) + assert pdf_array.shape == x_array.shape + scipy_pdf = expon.pdf(x_array, scale=2.0) # scale = 1/lambda = 2.0 + + self.assert_arrays_almost_equal(pdf_array, scipy_pdf) + + def test_cdf_array_input(self): + """Test CDF calculation with array input.""" + cdf = self.exponential_dist_example.query_method(CharacteristicName.CDF) + x_array = np.array([-1.0, 0.0, 1.0, 2.0, 3.0, 4.0]) + + cdf_array = cdf(x_array) + assert cdf_array.shape == x_array.shape + scipy_cdf = expon.cdf(x_array, scale=2.0) # scale = 1/lambda = 2.0 + + self.assert_arrays_almost_equal(cdf_array, scipy_cdf) + + def test_ppf_array_input(self): + """Test PPF calculation with array input.""" + ppf = self.exponential_dist_example.query_method(CharacteristicName.PPF) + p_array = np.array([0.001, 0.01, 0.1, 0.25, 0.5, 0.75, 0.9, 0.99, 0.999]) + + ppf_array = ppf(p_array) + assert ppf_array.shape == p_array.shape + scipy_ppf = expon.ppf(p_array, scale=2.0) # scale = 1/lambda = 2.0 + + self.assert_arrays_almost_equal(ppf_array, scipy_ppf) + + def test_characteristic_function_array_input(self): + """Test characteristic function calculation with array input.""" + char_func = self.exponential_dist_example.query_method(CharacteristicName.CF) + t_array = np.array([-2.0, -1.0, 0.0, 1.0, 2.0]) + + cf_array = char_func(t_array) + assert cf_array.shape == t_array.shape + + lambda_ = 0.5 + denominator = lambda_**2 + t_array**2 + expected_real = lambda_**2 / denominator + expected_imag = lambda_ * t_array / denominator + + expected_real = np.where(np.abs(t_array) < self.CALCULATION_PRECISION, 1.0, expected_real) + expected_imag = np.where(np.abs(t_array) < self.CALCULATION_PRECISION, 0.0, expected_imag) + + expected = expected_real + 1j * expected_imag + + self.assert_arrays_almost_equal(cf_array.real, expected.real) + self.assert_arrays_almost_equal(cf_array.imag, expected.imag) + + def test_exponential_support(self): + """Test that exponential distribution has correct support [0, ∞).""" + dist = self.exponential_dist_example + + assert dist.support is not None + assert isinstance(dist.support, ContinuousSupport) + + assert dist.support.left == 0.0 + assert dist.support.right == float("inf") + assert dist.support.left_closed + assert not dist.support.right_closed + + # Test containment + assert dist.support.contains(0.0) is True + assert dist.support.contains(1.0) is True + assert dist.support.contains(-0.1) is False + assert dist.support.contains(float("inf")) is False + + # Test array + test_points = np.array([-0.1, 0.0, 1.0, 10.0]) + expected = np.array([False, True, True, True]) + results = dist.support.contains(test_points) + np.testing.assert_array_equal(results, expected) + + assert dist.support.shape == ContinuousSupportShape1D.RAY_RIGHT + + +class TestExponentialFamilyEdgeCases(BaseDistributionTest): + """Test edge cases and error conditions for exponential distribution.""" + + def setup_method(self): + """Setup before each test method.""" + registry = configure_families_register() + self.exponential_family = registry.get(FamilyName.EXPONENTIAL) + + def test_invalid_parameterization(self): + """Test error for invalid parameterization name.""" + with pytest.raises(KeyError): + self.exponential_family.distribution(parametrization_name="invalid_name", lambda_=1.0) + + def test_missing_parameters(self): + """Test error for missing required parameters.""" + with pytest.raises(TypeError): + self.exponential_family.distribution() # Missing lambda_ + + def test_invalid_probability_ppf(self): + """Test PPF with invalid probability values.""" + dist = self.exponential_family(lambda_=1.0) + ppf = dist.query_method(CharacteristicName.PPF) + + # Test boundaries + assert ppf(0.0) == 0.0 + assert ppf(1.0) == float("inf") + + # Test invalid probabilities + with pytest.raises(ValueError): + ppf(-0.1) + with pytest.raises(ValueError): + ppf(1.1) + + def test_characteristic_function_at_zero(self): + """Test characteristic function at zero returns 1.""" + dist = self.exponential_family(lambda_=1.0) + char_func = dist.query_method(CharacteristicName.CF) + + cf_value_zero = char_func(0.0) + assert abs(cf_value_zero.real - 1.0) < self.CALCULATION_PRECISION + assert abs(cf_value_zero.imag) < self.CALCULATION_PRECISION + + def test_characteristic_function_large_t(self): + """Test characteristic function with large t.""" + dist = self.exponential_family(lambda_=1.0) + char_func = dist.query_method(CharacteristicName.CF) + + cf_value_large = char_func(1000.0) + assert np.iscomplexobj(cf_value_large) + assert abs(cf_value_large) <= 1.0