Machine-Learning-for-Asset-Management/denoisingAndDetoning.py at master · andres-torres-dev/Machine-Learning-for-Asset-Management · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import pandas as pd
import numpy as np
from sklearn.neighbors import KernelDensity

# getPCA computes the eigenvalues and
# eigenvector from given matrix
def getPCA(matrix):
    eVal, eVec = np.linalg.eigh(matrix)

    indices = eVal.argsort()[::-1]
    eVal, eVec = eVal[indices], eVec[:,indices]

    eVal = np.diagflat(eVal)

    return eVal, eVec

def mpPDF(x, pts=1000):
    var = np.var(x)
    q = x.shape[0] / x.shape[1]
    eMin, eMax = var*(1-(1./q)**.5)**2, var*(1+(1./q)**.5)**2
    eVal = np.linspace(eMin,eMax,pts)
    pdf = q/(2*np.pi*var*eVal)*((eMax-eVal)*(eVal-eMin))**.5
    pdf = pd.Series(pdf, index=eVal)

    return pdf

def fitKDE(obs, bWidth=.25, kernel="gaussian", x=None):
    # Fit kernel to a series of obs, and derive the prob of obs
    # x is the array of values on which the fit KDE will be evaluated
    if len(obs.shape)==1:obs=obs.reshape(-1,1)
    kde=KernelDensity(kernel=kernel,bandwidth=bWidth).fit(obs)
    if x is None:x=np.unique(obs).reshape(-1,1)
    if len(x.shape)==1:x=x.reshape(-1,1)
    logProb=kde.score_samples(x) # log(density)
    pdf=pd.Series(np.exp(logProb),index=x.flatten())

    return pdf

def covariance(matrix):
    return np.cov(matrix)

def cov2corr(cov):
    std = np.sqrt(np.diag(cov))
    corr = cov/np.outer(std, std)
    corr[corr < -1], corr[corr > 1] = -1, 1 # fix numerical error

    return corr

def corr2cov(corr,std):
    cov = corr * np.outer(std, std)

    return cov

# conditionNumber takes the eigevalues
# of the correlation matrix and returns their ratio
def conditionNumber(eVal):
    eVal = np.diag(eVal)

    return eVal[1] / eVal[0]

# denoiseCorrelation implements the constant residual eigenvalue method
# to reduce the noise of the data while preserving the data
def denoiseCorrelation(eVal, eVec, nFacts):
    eVal[nFacts:] = eVal[nFacts:].sum() / float(eVal.shape[0] - nFacts)
    eVal = np.diag(eVal)
    cov = np.dot(eVec, eVal).dot(eVec.T)

    return cov2corr(cov)

# detoneCorrelation removes the market component from a
# denoised correlation matrix eigenvalues and eigenvectors
# where nFacts respresent the number of components
# associated with market noise
def detoneCorrelation( eVal, eVec, nFacts):
    c2_ = np.dot(eVec[:, nFacts: ], eVal[:, nFacts:]).dot(eVec[:, nFacts:].T)
    c2 = c2_.dot(np.linalg.inv(np.dot(np.sqrt(np.diag(c2_)), np.sqrt(np.diag(c2_).T))))

    return c2