Skip to content

Commit 1e2d6ce

Browse files
0.24.23
1 parent 4b3d60d commit 1e2d6ce

2 files changed

Lines changed: 144 additions & 1 deletion

File tree

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"
77

88
[project]
99
name = "spotpython"
10-
version = "0.24.22"
10+
version = "0.24.23"
1111
authors = [
1212
{ name="T. Bartz-Beielstein", email="tbb@bartzundbartz.de" }
1313
]

src/spotpython/utils/stats.py

Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
import pandas as pd
2+
import numpy as np
3+
from scipy.stats import norm, t
4+
from numpy.linalg import pinv, inv, LinAlgError
5+
6+
7+
def cov_to_cor(covariance_matrix) -> np.ndarray:
8+
"""Convert a covariance matrix to a correlation matrix.
9+
10+
Args:
11+
covariance_matrix (numpy.ndarray): A square matrix of covariance values.
12+
13+
Returns:
14+
numpy.ndarray: A corresponding square matrix of correlation coefficients.
15+
16+
Examples:
17+
>>> from spotpython.utils.stats import cov_to_cor
18+
>>> import numpy as np
19+
>>> cov_matrix = np.array([[1, 0.8], [0.8, 1]])
20+
>>> cov_to_cor(cov_matrix)
21+
array([[1. , 0.8],
22+
[0.8, 1. ]])
23+
"""
24+
d = np.sqrt(np.diag(covariance_matrix))
25+
return covariance_matrix / np.outer(d, d)
26+
27+
28+
def partial_correlation(x, method="pearson") -> dict:
29+
"""Calculate the partial correlation matrix for a given data set.
30+
31+
Args:
32+
x (pandas.DataFrame or numpy.ndarray): The data matrix with variables as columns.
33+
method (str): Correlation method, one of 'pearson', 'kendall', or 'spearman'.
34+
35+
Returns:
36+
dict: A dictionary containing the partial correlation estimates, p-values,
37+
statistics, sample size (n), number of given parameters (gp), and method used.
38+
39+
Raises:
40+
ValueError: If input is not a matrix-like structure or not numeric.
41+
42+
References:
43+
1. Kim, S. ppcor: An R package for a fast calculation to semi-partial correlation coefficients.
44+
Commun Stat Appl Methods 22, 6 (Nov 2015), 665–674.
45+
46+
Examples:
47+
>>> from spotpython.utils.stats import cov_to_cor
48+
>>> import numpy as np
49+
>>> import pandas as pd
50+
>>> data = pd.DataFrame({
51+
>>> 'A': [1, 2, 3],
52+
>>> 'B': [4, 5, 6],
53+
>>> 'C': [7, 8, 9]
54+
>>> })
55+
>>> partial_correlation(data, method='pearson')
56+
{'estimate': array([[ 1. , -1. , 1. ],
57+
[-1. , 1. , -1. ],
58+
[ 1. , -1. , 1. ]]),
59+
'p_value': array([[0. , 0. , 0. ],
60+
[0. , 0. , 0. ],
61+
[0. , 0. , 0. ]]), ...
62+
}
63+
"""
64+
if isinstance(x, pd.DataFrame):
65+
x = x.to_numpy()
66+
if not isinstance(x, np.ndarray):
67+
raise ValueError("Supply a matrix-like 'x'")
68+
if not np.issubdtype(x.dtype, np.number):
69+
raise ValueError("'x' must be numeric")
70+
71+
n = x.shape[0]
72+
gp = x.shape[1] - 2
73+
cvx = np.cov(x, rowvar=False, bias=True)
74+
75+
try:
76+
if np.linalg.det(cvx) < np.finfo(float).eps:
77+
icvx = pinv(cvx)
78+
else:
79+
icvx = inv(cvx)
80+
except LinAlgError:
81+
icvx = pinv(cvx)
82+
83+
p_cor = -cov_to_cor(icvx)
84+
np.fill_diagonal(p_cor, 1)
85+
86+
epsilon = 1e-10 # small value to prevent division by zero
87+
if method == "kendall":
88+
denominator = np.sqrt(2 * (2 * (n - gp) + 5) / (9 * (n - gp) * (n - 1 - gp)))
89+
statistic = p_cor / denominator
90+
p_value = 2 * norm.cdf(-np.abs(statistic))
91+
else:
92+
factor = np.sqrt((n - 2 - gp) / (1 - p_cor**2 + epsilon))
93+
statistic = p_cor * factor
94+
p_value = 2 * t.cdf(-np.abs(statistic), df=n - 2 - gp)
95+
96+
np.fill_diagonal(statistic, 0)
97+
np.fill_diagonal(p_value, 0)
98+
99+
return {"estimate": p_cor, "p_value": p_value, "statistic": statistic, "n": n, "gp": gp, "method": method}
100+
101+
102+
def pairwise_partial_correlation(x, y, z, method="pearson") -> dict:
103+
"""Calculate the pairwise partial correlation between two variables given others.
104+
105+
Args:
106+
x (array-like): The first variable as a 1-dimensional array or list.
107+
y (array-like): The second variable as a 1-dimensional array or list.
108+
z (pandas.DataFrame): A data frame containing other conditional variables.
109+
method (str): Correlation method, one of 'pearson', 'kendall', or 'spearman'.
110+
111+
Returns:
112+
dict: A dictionary with the partial correlation estimate, p-value, statistic,
113+
sample size (n), number of given parameters (gp), and method used.
114+
115+
References:
116+
1. Kim, S. ppcor: An R package for a fast calculation to semi-partial correlation coefficients.
117+
Commun Stat Appl Methods 22, 6 (Nov 2015), 665–674.
118+
119+
Examples:
120+
>>> from spotpython.utils.stats import pairwise_partial_correlation
121+
>>> import pandas as pd
122+
>>> x = [1, 2, 3]
123+
>>> y = [4, 5, 6]
124+
>>> z = pd.DataFrame({'C': [7, 8, 9]})
125+
>>> pairwise_partial_correlation(x, y, z)
126+
{'estimate': -1.0, 'p_value': 0.0, 'statistic': -inf, 'n': 3, 'gp': 1, 'method': 'pearson'}
127+
"""
128+
x = np.asarray(x)
129+
y = np.asarray(y)
130+
z = pd.DataFrame(z)
131+
132+
xyz = pd.concat([pd.Series(x), pd.Series(y), z], axis=1)
133+
134+
pcor_result = partial_correlation(xyz, method=method)
135+
136+
return {
137+
"estimate": pcor_result["estimate"][0, 1],
138+
"p_value": pcor_result["p_value"][0, 1],
139+
"statistic": pcor_result["statistic"][0, 1],
140+
"n": pcor_result["n"],
141+
"gp": pcor_result["gp"],
142+
"method": method,
143+
}

0 commit comments

Comments
 (0)