Skip to content

Commit 034eac4

Browse files
authored
Merge pull request #13 from Davide-Ruffini/main
Multivariate Binomial and Poisson distributions
2 parents bbf140c + 5511131 commit 034eac4

3 files changed

Lines changed: 385 additions & 1 deletion

File tree

gemact/config.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,5 +60,7 @@
6060
'uniform': 'distributions.Uniform',
6161
'multinomial': 'distributions.Multinomial',
6262
'dirichlet multinomial' : 'distributions.Dirichlet_Multinomial',
63-
'negative multinomial' : 'distributions.NegMultinom'
63+
'negative multinomial' : 'distributions.NegMultinom',
64+
'negative multinomial' : 'distributions.MultivariateBinomial',
65+
'negative multinomial' : 'distributions.MultivariatePoisson'
6466
}

gemact/distributions.py

Lines changed: 380 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6667,4 +6667,384 @@ def mgf(self, t):
66676667
"""
66686668
exponent = np.sum(self.p * np.exp(t))
66696669
return (self.p0 / (1 - exponent)) ** self.x0
6670+
6671+
6672+
class MultivariateBinomial:
6673+
"""
6674+
Multivariate Binomial distribution.
6675+
Implementation from "Multivariate Binomial and Poisson Distribution", A.S. Krishnamoorthy, 2013
6676+
6677+
:param \\**kwargs:
6678+
See below
6679+
6680+
:Keyword Arguments:
6681+
* *n* (``int``) --
6682+
Number of trials.
6683+
* *p_joint* (``numpy.ndarray``) --
6684+
Joint probability matrix where p_joint[i,j] = P(Xi=1,Xj=1)
6685+
6686+
"""
6687+
6688+
def __init__(self, **kwargs):
6689+
6690+
self.n = kwargs['n']
6691+
self.p = np.diag(kwargs['p_joint'])
6692+
self.k = len(self.p)
6693+
self.q = 1 - self.p
6694+
6695+
6696+
self.p_joint = np.asarray(kwargs['p_joint'])
6697+
assert self.p_joint.shape == (self.k, self.k), "p_joint must be k x k"
6698+
6699+
# Calculate dependence measures d_ij
6700+
self.d = self.p_joint - np.outer(self.p, self.p)
6701+
6702+
@property
6703+
def n(self):
6704+
return self.__n
6705+
6706+
@n.setter
6707+
def n(self,value):
6708+
hf.assert_type_value(value, 'n', logger, (float, int), lower_bound=0, lower_close=False)
6709+
self.__n = value
6710+
6711+
@property
6712+
def p(self):
6713+
return self.__p
6714+
6715+
@p.setter
6716+
def p(self, value):
6717+
6718+
for element in value:
6719+
hf.assert_type_value(element, 'p', logger, (float, np.floating), lower_bound=0, upper_bound=1, lower_close=True, upper_close=True)
6720+
value = np.array(value)
6721+
6722+
if sum(value) >= 1:
6723+
raise ValueError("success probabilities must not be greater than 1.")
6724+
6725+
self.__p = value
6726+
6727+
@staticmethod
6728+
def name():
6729+
return 'multivariate binomial'
6730+
6731+
@staticmethod
6732+
def category():
6733+
return {''}
6734+
6735+
6736+
def pmf(self, x):
6737+
"""
6738+
Probability mass function.
6739+
6740+
Uses G-polynomial expansion for dependence correction.
6741+
6742+
:param x: Quantile where PMF is evaluated.
6743+
:type x: ``numpy.ndarray``
6744+
:return: Probability mass function evaluated at x.
6745+
:rtype: ``numpy.float64``
6746+
"""
6747+
x = np.asarray(x)
6748+
marg_prod = np.prod([binom(self.n, xi) * (self.p[i]**xi) * (self.q[i]**(self.n-xi))
6749+
for i, xi in enumerate(x)])
6750+
6751+
correction = 1.0
6752+
for i in range(self.k):
6753+
for j in range(i+1, self.k):
6754+
if np.abs(self.d[i,j]) > 1e-10:
6755+
g_i = self._G_polynomial(1, x[i], self.p[i])
6756+
g_j = self._G_polynomial(1, x[j], self.p[j])
6757+
denom = self.n * self.p[i] * self.q[i] * self.p[j] * self.q[j]
6758+
correction += self.d[i,j] * g_i * g_j / denom
6759+
6760+
return marg_prod * correction
6761+
6762+
def logpmf(self, x):
6763+
return mt.log(self.pmf(x))
6764+
6765+
def mean(self):
6766+
"""
6767+
Mean vector of the distribution.
6768+
6769+
:return: Mean vector.
6770+
:rtype: ``numpy.ndarray``
6771+
"""
6772+
return self.n * self.p
6773+
6774+
def cov(self):
6775+
"""
6776+
Covariance matrix of the distribution.
6777+
6778+
:return: Covariance matrix.
6779+
:rtype: ``numpy.ndarray``
6780+
"""
6781+
cov_mat = np.zeros((self.k, self.k))
6782+
for i in range(self.k):
6783+
for j in range(self.k):
6784+
if i == j:
6785+
cov_mat[i,j] = self.n * self.p[i] * self.q[i]
6786+
else:
6787+
cov_mat[i,j] = self.n * (self.p_joint[i,j] - self.p[i]*self.p[j])
6788+
return cov_mat
6789+
6790+
def var(self):
6791+
"""
6792+
Variance.
6793+
6794+
:return: Variance array.
6795+
:rtype: ``numpy.ndarray``
6796+
"""
6797+
6798+
return np.diag(self.cov())
6799+
6800+
def correlation(self):
6801+
"""
6802+
Correlation matrix of the distribution.
6803+
6804+
:return: Correlation matrix.
6805+
:rtype: ``numpy.ndarray``
6806+
"""
6807+
cov = self.cov()
6808+
std_devs = np.sqrt(np.diag(cov))
6809+
return cov / np.outer(std_devs, std_devs)
6810+
6811+
def _G_polynomial(self, r, x, p):
6812+
"""
6813+
Compute G_r(x;p) orthogonal polynomial.
6814+
6815+
:param r: Order of the polynomial
6816+
:param x: Evaluation point
6817+
:param p: Probability parameter
6818+
:return: Polynomial value
6819+
:rtype: ``float``
6820+
"""
6821+
q = 1 - p
6822+
if r == 0:
6823+
return 1.0
6824+
elif r == 1:
6825+
return (x - self.n*p) / np.sqrt(self.n*p*q)
6826+
else:
6827+
return ((x - self.n*p) * self._G_polynomial(r-1, x, p) -
6828+
(r-1)*q * self._G_polynomial(r-2, x, p)) / np.sqrt(self.n*p*q)
6829+
6830+
def rvs(self, size=1, random_state=None):
6831+
"""
6832+
Random variates generator function.
6833+
6834+
:param size: Number of random variates to generate (default=1).
6835+
:type size: ``int``
6836+
:param random_state: Random state for reproducibility.
6837+
:type random_state: ``int``, optional
6838+
:return: Array of random variates.
6839+
:rtype: ``numpy.ndarray``
6840+
"""
6841+
if random_state is not None:
6842+
np.random.seed(random_state)
6843+
6844+
corr = self.correlation()
6845+
np.fill_diagonal(corr, 1.0) # Ensure diagonal=1
6846+
6847+
uniforms = norm.cdf(np.random.multivariate_normal(
6848+
mean=np.zeros(self.k),
6849+
cov=corr,
6850+
size=(size, self.n)
6851+
))
6852+
6853+
# Convert to binomial
6854+
return np.sum(uniforms < self.p, axis=1).astype(int)
6855+
6856+
6857+
class MultivariatePoisson:
6858+
6859+
"""
6860+
Multivariate Poisson distribution.
6861+
Implementation from "Multivariate Binomial and Poisson Distribution", A.S. Krishnamoorthy, 2013
6862+
6863+
:param \\**kwargs:
6864+
See below
6865+
6866+
:Keyword Arguments:
6867+
* *mu* (``numpy.ndarray``) --
6868+
Array (k,) of marginal means.
6869+
* *mu_joint* (``numpy.ndarray``) --
6870+
Matrix (k,k) of Cov(Xi,Xj).
6871+
6872+
"""
6873+
6874+
def __init__(self, **kwargs):
6875+
required_params = ['mu', 'mu_joint']
6876+
for param in required_params:
6877+
if param not in kwargs:
6878+
raise ValueError(f"Missing required parameter: {param}")
6879+
6880+
self.mu = np.array(kwargs["mu"])
6881+
self.k = len(self.mu)
6882+
self.mu_joint = np.asarray(kwargs["mu_joint"])
6883+
6884+
@property
6885+
def mu(self):
6886+
return self.__mu
6887+
6888+
@mu.setter
6889+
def mu(self,value):
6890+
6891+
for element in value:
6892+
hf.assert_type_value(element, 'mu', logger, (float, int), lower_bound=None, upper_bound=None, lower_close=True, upper_close=True)
6893+
value = np.array(value)
6894+
6895+
self.__mu = value
6896+
6897+
@property
6898+
def mu_joint(self):
6899+
return self.__mu_joint
6900+
6901+
@mu_joint.setter
6902+
def mu_joint(self, value):
6903+
value = np.asarray(value)
6904+
6905+
for element in value.flatten():
6906+
hf.assert_type_value(element, 'mu_joint', logger, (float, np.floating, int), lower_bound=None, upper_bound=None, lower_close=True, upper_close=True)
6907+
6908+
if value.shape != (self.k, self.k):
6909+
raise ValueError("mu_joint must be k x k")
6910+
6911+
np.fill_diagonal(value, 0)
6912+
self.__mu_joint = value
6913+
6914+
@staticmethod
6915+
def name():
6916+
return 'multivariate poisson'
6917+
6918+
@staticmethod
6919+
def category():
6920+
return {''}
6921+
6922+
def pmf(self, x):
6923+
"""
6924+
Probability mass function using Charlier polynomials expansion.
6925+
6926+
:param x: Quantile where PMF is evaluated.
6927+
:type x: ``numpy.ndarray``
6928+
:return: Probability mass function evaluated at x.
6929+
:rtype: ``numpy.float64``
6930+
"""
6931+
x = np.asarray(x)
6932+
6933+
6934+
marg_prod = np.prod([poisson.pmf(xi, self.mu[i])
6935+
for i, xi in enumerate(x)])
6936+
6937+
correction = 1.0
6938+
for i in range(self.k):
6939+
for j in range(i+1, self.k):
6940+
if np.abs(self.mu_joint[i,j]) > 1e-10:
6941+
k_i = self._charlier_poly(x[i], self.mu[i], 1)
6942+
k_j = self._charlier_poly(x[j], self.mu[j], 1)
6943+
correction += self.mu_joint[i,j] * k_i * k_j / (self.mu[i] * self.mu[j])
6944+
6945+
return marg_prod * correction
6946+
6947+
def logpmf(self, x):
6948+
return mt.log(self.pmf(x))
6949+
6950+
def mean(self):
6951+
"""
6952+
Mean vector of the distribution.
6953+
6954+
:return: Mean vector.
6955+
:rtype: ``numpy.ndarray``
6956+
"""
6957+
return self.mu
6958+
6959+
def cov(self):
6960+
"""
6961+
Covariance matrix of the distribution.
6962+
6963+
:return: Covariance matrix.
6964+
:rtype: ``numpy.ndarray``
6965+
"""
6966+
cov_mat = np.diag(self.mu)
6967+
6968+
for i in range(self.k):
6969+
for j in range(i+1, self.k):
6970+
cov_mat[i,j] = self.mu_joint[i,j]
6971+
cov_mat[j,i] = self.mu_joint[i,j]
6972+
6973+
return cov_mat
6974+
6975+
def var(self):
6976+
"""
6977+
Variance.
6978+
6979+
:return: Variance array.
6980+
:rtype: ``numpy.ndarray``
6981+
"""
6982+
6983+
return np.diag(self.cov())
6984+
6985+
def correlation(self):
6986+
"""
6987+
Correlation matrix of the distribution.
6988+
6989+
:return: Correlation matrix.
6990+
:rtype: ``numpy.ndarray``
6991+
"""
6992+
cov = self.covariance()
6993+
std_devs = np.sqrt(np.diag(cov))
6994+
return cov / np.outer(std_devs, std_devs)
6995+
6996+
def _charlier_poly(self, x, mu, r):
6997+
"""
6998+
Charlier polynomial K_r(x; mu).
6999+
7000+
:param x: Evaluation point
7001+
:param mu: Mean parameter
7002+
:param r: Order of polynomial
7003+
:return: Polynomial value
7004+
:rtype: ``float``
7005+
"""
7006+
if r == 0:
7007+
return 1.0
7008+
elif r == 1:
7009+
return (x - mu) / np.sqrt(mu)
7010+
else:
7011+
7012+
return ((x - mu) * self._charlier_poly(x, mu, r-1) - \
7013+
(r-1) * mu * self._charlier_poly(x, mu, r-2)) / np.sqrt(mu)
7014+
7015+
def rvs(self, size=1, random_state=None):
7016+
"""
7017+
Random variates generator function.
7018+
7019+
:param size: Number of random variates to generate (default=1).
7020+
:type size: ``int``
7021+
:param random_state: Random state for reproducibility.
7022+
:type random_state: ``int``, optional
7023+
:return: Array of random variates.
7024+
:rtype: ``numpy.ndarray``
7025+
"""
7026+
if random_state is not None:
7027+
np.random.seed(random_state)
7028+
samples = np.zeros((size, self.k), dtype=int)
7029+
7030+
common_shocks = {}
7031+
for i in range(self.k):
7032+
for j in range(i+1, self.k):
7033+
if self.mu_joint[i,j] > 0:
7034+
common_shocks[(i,j)] = np.random.poisson(
7035+
self.mu_joint[i,j], size=size)
7036+
7037+
for i in range(self.k):
7038+
# independent component
7039+
samples[:,i] = np.random.poisson(
7040+
self.mu[i] - np.sum([self.mu_joint[i,j] for j in range(self.k) if j != i]),
7041+
size=size)
7042+
7043+
# Shocks
7044+
for j in range(self.k):
7045+
if i < j and (i,j) in common_shocks:
7046+
samples[:,i] += common_shocks[(i,j)]
7047+
elif i > j and (j,i) in common_shocks:
7048+
samples[:,i] += common_shocks[(j,i)]
7049+
return samples.squeeze()
66707050

gemact/libraries.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,5 @@
1515
from itertools import groupby
1616
from scipy.special import gammaln
1717
import math as mt
18+
from math import comb
19+

0 commit comments

Comments
 (0)