-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathinverse_normal.py
More file actions
122 lines (92 loc) · 3.59 KB
/
inverse_normal.py
File metadata and controls
122 lines (92 loc) · 3.59 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Jun 23 10:36:20 2023
@author: ing
"""
import numpy as np
import scipy as scipy
import math
import scipy.special
def replace_repeated_with_mean(arr):
""" The purpose of this script is to replace repeated values in an array,
with the mean of those values.
"""
# Find the indices where the elements change
change_indices = np.where(np.diff(arr) != 0)[0] + 1
# Split the array into sets of repeated elements
sets = np.split(arr, change_indices)
# Calculate the mean of each set and replace the elements
for i in range(len(sets)):
set_mean = np.mean(sets[i])
sets[i].fill(set_mean)
# Concatenate the sets back into a single array
result = np.concatenate(sets)
return result
def inverse_normal(X, method='Blom', repeat_val=False):
""" Applies a rank-based inverse normal transform to an numpy input. The
method performs faster if there are no repeated values in columns. Repeated
column values are problematic, as they cannot be ranked against one another.
Here, they are replaced by the mean rank value. The user should specify if
there are repeat values in the array provided
Inputs:
X: A 1-D numpy array or 2-D matrix. This function transforms this input data
so that it is normally distributed.
c: Constant to be used in the transformation
method: method to choose c:
'Blom':c=3/8
'Tukey':c=1/3
'Bliss':c=1/2
'Waerden':c=0
Outputs:
X_trans: Transformed data
References:
- Van der Waerden BL. Order tests for the two-sample
problem and their power. Proc Koninklijke Nederlandse
Akademie van Wetenschappen. Ser A. 1952; 55:453-458
- Blom G. Statistical estimates and transformed
beta-variables. Wiley, New York, 1958.
- Tukey JW. The future of data analysis.
Ann Math Stat. 1962; 33:1-67.
- Bliss CI. Statistics in biology. McGraw-Hill,
New York, 1967.
___________________________________
Alex Ing, 2023, partly based on MATLAB scripts by
Anderson M. Winkler (http://brainder.org)
"""
if np.sum(np.isnan(X))>0:
raise ValueError("input contains nans")
if method == 'Blom':
c=3/8
elif method == 'Tukey':
c=1/3
elif method == 'Bliss':
c=1/2
elif method == 'Waerden':
c=0
else:
print('method unknown, using Blom as default')
c=3/8
if repeat_val==False:
ix = np.argsort(X,axis=0) ## get the rank indices
ri = np.argsort(ix,axis=0) ## get the indices of ranked values
N = X.shape[0] ## get the size of the numpy array
p = (ri-c)/(N-2*c+1)
X_trans = math.sqrt(2)*scipy.special.erfinv(2*p-1)
else:
## If we have repeated values, it is necessary to loop over columns
## to deal with them
X_trans = np.zeros(shape = X.shape)
for i in range(X.shape[1]):
print(i)
Y = X[:,i]
iy = np.argsort(Y,axis=0) ## get the rank indices
ri = np.argsort(iy,axis=0) ## get the indices of ranked values
N = Y.shape[0] ## get the size of the numpy array
p = (ri-c)/(N-2*c+1)
Y_trans = math.sqrt(2)*scipy.special.erfinv(2*p-1)
Y_trans = replace_repeated_with_mean(Y_trans)
## Here, we check for repeated values, repeated values are
## replaced by the mean of repeated values
X_trans[:,i] = Y_trans
return X_trans