Skip to content

Commit feebdcd

Browse files
committed
Clean up before release
1 parent 71bfc80 commit feebdcd

24 files changed

Lines changed: 38103 additions & 3891 deletions
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
name: Publish Python 🐍 distribution 📦 to PyPI and TestPyPI
2+
3+
on: push
4+
5+
jobs:
6+
build:
7+
name: Build distribution 📦
8+
runs-on: ubuntu-latest
9+
10+
steps:
11+
- uses: actions/checkout@v4
12+
- name: Set up Python
13+
uses: actions/setup-python@v5
14+
with:
15+
python-version: "3.x"
16+
- name: Install pypa/build
17+
run: >-
18+
python3 -m
19+
pip install
20+
build
21+
--user
22+
- name: Install dependencies
23+
run: python3 -m pip install -r requirements.txt
24+
- name: Build a binary wheel and a source tarball
25+
run: python3 -m build
26+
- name: Store the distribution packages
27+
uses: actions/upload-artifact@v4
28+
with:
29+
name: python-package-distributions
30+
path: dist/
31+
32+
publish-to-pypi:
33+
name: >-
34+
Publish Python 🐍 distribution 📦 to PyPI
35+
if: startsWith(github.ref, 'refs/tags/') # only publish to PyPI on tag pushes
36+
needs:
37+
- build
38+
runs-on: ubuntu-latest
39+
environment:
40+
name: pypi
41+
url: https://pypi.org/p/Allohubpy
42+
permissions:
43+
id-token: write # IMPORTANT: mandatory for trusted publishing
44+
45+
steps:
46+
- name: Download all the dists
47+
uses: actions/download-artifact@v4
48+
with:
49+
name: python-package-distributions
50+
path: dist/
51+
- name: Publish distribution 📦 to PyPI
52+
uses: pypa/gh-action-pypi-publish@release/v1
53+
54+
github-release:
55+
name: >-
56+
Sign the Python 🐍 distribution 📦 with Sigstore
57+
and upload them to GitHub Release
58+
needs:
59+
- publish-to-pypi
60+
runs-on: ubuntu-latest
61+
62+
permissions:
63+
contents: write # IMPORTANT: mandatory for making GitHub Releases
64+
id-token: write # IMPORTANT: mandatory for sigstore
65+
66+
steps:
67+
- name: Download all the dists
68+
uses: actions/download-artifact@v4
69+
with:
70+
name: python-package-distributions
71+
path: dist/
72+
- name: Sign the dists with Sigstore
73+
uses: sigstore/gh-action-sigstore-python@v3.0.0
74+
with:
75+
inputs: >-
76+
./dist/*.tar.gz
77+
./dist/*.whl
78+
- name: Create GitHub Release
79+
env:
80+
GITHUB_TOKEN: ${{ github.token }}
81+
run: >-
82+
gh release create
83+
'${{ github.ref_name }}'
84+
--repo '${{ github.repository }}'
85+
--notes ""
86+
- name: Upload artifact signatures to GitHub Release
87+
env:
88+
GITHUB_TOKEN: ${{ github.token }}
89+
# Upload to GitHub Release using the `gh` CLI.
90+
# `dist/` contains the built packages, and the
91+
# sigstore-produced signatures and certificates.
92+
run: >-
93+
gh release upload
94+
'${{ github.ref_name }}' dist/**
95+
--repo '${{ github.repository }}'
96+

Allohubpy/SAtraj.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -165,13 +165,13 @@ def combine(self, other):
165165

166166
def compute_entropy(self, bootstrap=0):
167167
"""
168-
Computes the shanon entropy for each alphabet fragment
168+
Computes the shannon entropy for each alphabet fragment
169169
170170
Args:
171171
bootstrap (int): Number of samples to create to estimate statistics
172172
173173
Returns:
174-
List containing the shanon entropies for each position or list of list if bootstrap !=0
174+
List containing the shannon entropies for each position or list of list if bootstrap !=0
175175
"""
176176
results = []
177177
iterations = 1
@@ -200,7 +200,7 @@ def fragments_probabilities(self, column):
200200
Computes the probabilities for each letter given an array
201201
202202
Args:
203-
column (np.array): data array for which the shanon entropy should be computed.
203+
column (np.array): data array for which the shannon entropy should be computed.
204204
205205
Returns:
206206
np.array with the probabilities for each possible fragment

Allohubpy/plotter/Allohub_plots.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,12 @@
66

77
# Example plots to use in analysis or to use as inspiration for personalized plots for Allohubpy
88

9-
def plot_shanon_entropy(entropy, action="save", ylim=(0,4), name="SA_shanon_entropy.png"):
9+
def plot_shannon_entropy(entropy, action="save", ylim=(0,4), name="SA_shannon_entropy.png"):
1010
"""
11-
Plots the shanon entropies
11+
Plots the shannon entropies
1212
1313
Args:
14-
entropy (np.array or list): Shanon entropies for each fragment.
14+
entropy (np.array or list): shannon entropies for each fragment.
1515
action (str): What to do with the plot, 'save' for saving it, 'show' for displaying it
1616
ylim (tuple of floats): Maximum and minimum value for the y axis.
1717
name (str): name to use to save the plot, including format.
@@ -39,7 +39,7 @@ def plot_shanon_entropy(entropy, action="save", ylim=(0,4), name="SA_shanon_entr
3939
plt.close()
4040

4141

42-
def plot_shanon_entropy_sd(entropy_arrays, ylim = (0,4), action="save", name="SA_shanon_entropy.png"):
42+
def plot_shannon_entropy_sd(entropy_arrays, ylim = (0,4), action="save", name="SA_shannon_entropy.png"):
4343
"""
4444
Plots the Shannon entropies for multiple entropy arrays, with error shade representing the
4545
standard deviation of the mean at each fragment index.

Allohubpy/src/encodeframe_extension_build.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@
55

66
ffibuilder.cdef("void encode_frame(unsigned int n_windows, unsigned int n_fragments, unsigned int f_size, float (*MDframe)[3], float (*Fragments)[3], int *Encoding);")
77

8-
src_dir = os.path.abspath("Allohubpy/src")
8+
#src_dir = os.path.abspath("Allohubpy/src")
9+
src_dir = os.path.join("Allohubpy", "src")
910
ffibuilder.set_source(
1011
"Allohubpy._encodeframe",
1112
""" #include "encodeframe.h" """,

Allohubpy/src/kabsch_extension_build.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@
55

66
ffibuilder.cdef("double wrmsd_kabsch(unsigned int size, float (*Xarray)[3], float (*Yarray)[3]);")
77

8-
src_dir = os.path.abspath("Allohubpy/src")
8+
#src_dir = os.path.abspath("Allohubpy/src")
9+
src_dir = os.path.join("Allohubpy", "src")
910
ffibuilder.set_source(
1011
"Allohubpy._kabsch",
1112
"""#include "kabsch.h" """,

MANIFEST.in

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
include Allohubpy/Allohubpy_cython.pyx
2+
include Allohubpy/src/kabsch_extension_build.py
3+
include Allohubpy/src/encodeframe_extension_build.py
4+
include Allohubpy/src/kabsch.c
5+
include Allohubpy/src/kabsch.h
6+
include Allohubpy/src/encodeframe.h
7+
include Allohubpy/src/encodeframe.c

README.md

Lines changed: 186 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,196 @@
11
# Allohubpy
22
Allohubpy is a python package for the detection and charectarization of allosteric signals using a information theoric approach.
33

4-
The method captures local conformational changes associated with global motions from molecular dynamics simulations through the use of a Structural Alphabet, which simplifies the complexity of the Cartesian space by reducing the dimensionality down to a string of encoded fragments. These encoded fragments can then be used to compute the shanon entropy, mutual information between positions and build networks of correlated motions.
4+
The method captures local conformational changes associated with global motions from molecular dynamics simulations through the use of a Structural Alphabet, which simplifies the complexity of the Cartesian space by reducing the dimensionality down to a string of encoded fragments. These encoded fragments can then be used to compute the shannon entropy, mutual information between positions and build networks of correlated motions.
55

66
The folder notebooks contains examples for how to run the package with some sample data.
77

88
## Installation
99

1010
The package can be installed through pip with pip install Allohubpy.
11+
To install all the required packages: pip install -r requirements.txt
1112

12-
Alternatively, one can compile the required code by running python setup.py build_ext --inplace and manualy adding the package to the PYTHONPATH.
13+
Alternatively, one can compile the required code by running python setup.py such as build_ext --inplace and manualy adding the package to the PYTHONPATH.
14+
One can also clone the repository and run pip install .
15+
16+
## Examples
17+
18+
Examples on how to run the code can be found on the notebooks folder.
19+
All necessary data is provided in the respctive data folders under notebook.
20+
21+
## Usage
22+
23+
### Plotting
24+
25+
The package comes with premade plotting functions that can be used directly or as a template.
26+
All plotting functions are found under allohubpy/plotter/Allohub_plots.py
27+
28+
## TrajProcessor
29+
30+
The TrajProcessor module offers encoder for 3di and M32K25.
31+
One can choose how many frames to skip as equilibration and the frequency (Stride) of the frames to be used.
32+
33+
34+
35+
```python
36+
from Allohubpy import TrajProcessor
37+
38+
enc_3di = TrajProcessor.Encoder3DI("outputname_3di.sa")
39+
40+
# Encoder for M32K25
41+
enc_mk = TrajProcessor.SAEncoder("outputname_mk.sa")
42+
43+
# Trajectory is saved every 10 ps. with stride = 10 only the 1-th frames will be processed producing an spacing of 100 ps between frames
44+
# We skip 100 frames -> 1ns as extra equilibration
45+
46+
## Load repl1 of condition 1
47+
enc_3di.load_trajectory(topology="topo.pdb", mdtraj="mdtraj.xtc", skip=100, stride=10)
48+
enc_3di.encode()
49+
50+
enc_mk.load_trajectory(topology="topo.pdb", mdtraj="mdtraj.xtc", skip=100, stride=10)
51+
enc_mk.encode()
52+
```
53+
54+
55+
### SA handler
56+
57+
The SA handler for a SA trajectory can be initialized as follows:
58+
Block_size is the number of frames that will be used for each mutual information estimation and alphabet is the list of possible tokens for the selected alphabet. M32K25 and 3DI alphabets are provided by default.
59+
60+
The SA trajectory can be loaded with .load_data. Each encoded frame should be one line.
61+
62+
```python
63+
from Allohubpy import SAtraj
64+
sahandler = SAtraj.SAtraj(block_size=100, alphabet=SAtraj.ALPHABETS["M32K25"])
65+
sahandler.load_data("safile")
66+
```
67+
68+
After loading the data one can compute:
69+
The probabilities of each fragment with: .get_probabilities()
70+
The transition matrix between fragments with: .compute_transitions()
71+
And the Shannon entropy: .compute_entropy()
72+
73+
Plots can be created using the provided plotting functions.
74+
75+
```python
76+
from Allohubpy.plotter import Allohub_plots
77+
78+
entropy = sahandler.compute_entropy()
79+
Allohub_plots.plot_shannon_entropy_sd(entropy, ylim=(0,4), action="show")
80+
81+
fragment_probs = sahandler.get_probabilities()
82+
Allohub_plots.plot_fragment_probabilities(probability_matrix=fragment_probs, vocabulary=SAtraj.ALPHABETS["M32K25"], action="show")
83+
84+
transition_matrix = sahandler.compute_transitions()
85+
Allohub_plots.plot_transition_probabilities(trans_matrix=transition_matrix, vocabulary=SAtraj.ALPHABETS["M32K25"], action="show")
86+
```
87+
88+
Finaly, the mutual information matrices can be obtained by running:
89+
90+
```python
91+
mi_array = sahandler.compute_mis(max_workers=8)
92+
```
93+
94+
### Mutual information object
95+
96+
The computed mutual information matrices are stored in a MI object. One can retrive the matrix by doing .get_mi_matrix()
97+
The eigenvector decomposition can be done by calling .compute_eigensystem()
98+
99+
Mi matrices can also be added together using addition and substraction.
100+
101+
### Overlap object
102+
103+
The obtained mi matrices with their eigenvectors and eienvalues computed can then be passed to the overlap handler which can be used to assess convergence and find up and down regulated fragments.
104+
105+
```python
106+
from Allohubpy import Overlap
107+
108+
overlap = Overlap.Overlap([mi_array1, mi_array2, ....], ev_list=[0,1,2])
109+
overlap.fill_overlap_matrix()
110+
111+
# plot overlap
112+
Allohub_plots.plot_overlap(overlap.get_overlap_matrix(), vmax=0.4, action="show")
113+
114+
# compute similarities
115+
similarity_matrix = overlap.compute_similarities()
116+
```
117+
118+
For the up and down regulated fragments one needs to provide a mapping of the mi_arrays to the condition they belong.
119+
The method will return a dictionary of pandas dataframes for each combination of conditions.
120+
121+
Each dataframe has the following columns:
122+
FragmentPairs, log2FoldChange, AdjustedPValues and PValues
123+
124+
```python
125+
pdown_regulated_fragments = overlap.updown_regulation(traj_mapping=[0,0,0,1,1,1],splitting=True)
126+
t12_updown = updown_regulated_fragments[(0,1)]
127+
128+
Allohub_plots.plot_updownregulation(t12_updown, fold_threshold=2, ylim=10, pvalue_threshold=0.01, action="show")
129+
```
130+
131+
### SA Network
132+
133+
One can also create graph representations based on the mutual information.
134+
For that a matrix of distances from all c alphas to all c alphas is needed.
135+
136+
The selected top pairs with higher signal at a given distance will be selected to build the network.
137+
138+
```python
139+
from Allohubpy import SANetwork
140+
141+
SAgraph = SANetwork.SANetWork(traj_list= mi_array1 + mi_array2 + ..., distance_limit=7)
142+
143+
SAgraph.set_distance_matrix(matrix=distance_matrix, fragment_size=4)
144+
145+
SAgraph.create_graph(threshold=90)
146+
```
147+
148+
The graph can be extracted with .get_graph() and analyzed with .compute_centrality() or by extracting the shortest path from a set of selected residues.
149+
150+
```python
151+
centrality_df = SAgraph_fbp.compute_centrality()
152+
Allohub_plots.plot_network_centrality(centrality_df, action="show")
153+
154+
site1_fragments = [476, 509]
155+
site2_fragments = [260, 281]
156+
157+
# Subgraph is a networkx object with the nodes and edges of the shortest paths connecting those residues
158+
# Shortest_pathd ans shortest_distances are list of the shortest paths and their distances respectively.
159+
# z_score provides an estimate of how statistically coupled the two sites are
160+
subgraph, shortest_paths, shortest_distances, z_score = SAgraph_fbp.identify_preferential_connections(start_fragments=site1_fragments,
161+
end_fragments=site2_fragments)
162+
163+
Allohub_plots.plot_SA_graph(subgraph, site1_fragments, site2_fragments, action="show")
164+
165+
```
166+
167+
## Incorporating own alphabets
168+
169+
The package also provides base classes to be used to create new alphabet encodings.
170+
171+
```python
172+
from Allohubpy.TrajProcessor import AbsEncoder
173+
174+
class MyEncoder(AbsEncoder):
175+
176+
177+
# Atoms to keep should be the list of atom names that your encoding needs from the md trajectory
178+
def __init__(self, output_file_name):
179+
super().__init__(atoms_to_keep=["CA", "CB", "N", "C"], output_file_name=output_file_name)
180+
181+
182+
183+
def process_frame(self, frame_dict):
184+
# Process frame is called on every MD frame when one call .encode()
185+
# frame_dict is a dictionary with the following keys:
186+
# "residues" containing the list of residues present
187+
# One key for each atom name in atoms to keep. for example "CA", "CB", etc.
188+
# The function needs to return the encoded string for that frame.
189+
# Under each key there is a list for all the elements under that group,f or example:
190+
# "CA" will have all c alphas and "CB" all c betas.
191+
# If one residue does not contain that atom name, then it will not be present in the array, so len(CA) != len(CB)
192+
# One can use the residues list (Three letter code) to deal with it.
193+
194+
return encoded_string
195+
196+
```

0 commit comments

Comments
 (0)