-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathembedder.py
More file actions
54 lines (39 loc) · 1.43 KB
/
embedder.py
File metadata and controls
54 lines (39 loc) · 1.43 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
from scipy.fft import fft
import numpy as np
from utility import aa_to_complex
def normalize(x: np.ndarray) -> np.ndarray:
norm = np.linalg.norm(x)
return x / (norm if norm > 0 else 1)
class ProteinEmbedder():
def __init__(self, dim_reduct='MDS', dist_func='SMS', n=2048):
"""
Initialize the ProteinEmbedder with the specified method.
Args:
method: Method for embedding ('t-SNE', 'MDS', 'UMAP')
"""
self.aa_to_complex = aa_to_complex[dim_reduct][dist_func]
self.aa_to_complex['X'] = sum(num for num in self.aa_to_complex.values()) / len(self.aa_to_complex)
self.n = n
def encode(self, sequence):
"""
Encode a protein sequence into a complex vector.
Args:
sequence: Protein sequence as string
Returns:
complex vector representing the sequence
"""
# Convert sequence to complex numbers
complex_seq = []
for aa in sequence.upper():
try:
complex_seq.append(self.aa_to_complex[aa])
except KeyError:
# Handle unknown amino acids
complex_seq.append(self.aa_to_complex['X'])
# Apply FFT
complex_seq = np.array(complex_seq)
fft_result = fft(complex_seq, n=self.n)
return fft_result
if __name__ == "__main__":
# Example usage
embedder = ProteinEmbedder()