Fraternalilab
diff --git a/‎.github/workflows/python-publish.yml‎
Lines changed: 96 additions & 0 deletions b/‎.github/workflows/python-publish.yml‎
Lines changed: 96 additions & 0 deletions
diff --git a/‎Allohubpy/SAtraj.py‎
Lines changed: 3 additions & 3 deletions b/‎Allohubpy/SAtraj.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎Allohubpy/plotter/Allohub_plots.py‎
Lines changed: 4 additions & 4 deletions b/‎Allohubpy/plotter/Allohub_plots.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎Allohubpy/src/encodeframe_extension_build.py‎
Lines changed: 2 additions & 1 deletion b/‎Allohubpy/src/encodeframe_extension_build.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎Allohubpy/src/kabsch_extension_build.py‎
Lines changed: 2 additions & 1 deletion b/‎Allohubpy/src/kabsch_extension_build.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎MANIFEST.in‎
Lines changed: 7 additions & 0 deletions b/‎MANIFEST.in‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 186 additions & 2 deletions b/‎README.md‎
Lines changed: 186 additions & 2 deletions
@@ -0,0 +1,96 @@
+name: Publish Python 🐍 distribution 📦 to PyPI and TestPyPI
+
+on: push
+
+jobs:
+  build:
+    name: Build distribution 📦
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v4
+    - name: Set up Python
+      uses: actions/setup-python@v5
+      with:
+        python-version: "3.x"
+    - name: Install pypa/build
+      run: >-
+        python3 -m
+        pip install
+        build
+        --user
+    - name: Install dependencies
+      run: python3 -m pip install -r requirements.txt 
+    - name: Build a binary wheel and a source tarball
+      run: python3 -m build
+    - name: Store the distribution packages
+      uses: actions/upload-artifact@v4
+      with:
+        name: python-package-distributions
+        path: dist/
+
+  publish-to-pypi:
+    name: >-
+      Publish Python 🐍 distribution 📦 to PyPI
+    if: startsWith(github.ref, 'refs/tags/')  # only publish to PyPI on tag pushes
+    needs:
+    - build
+    runs-on: ubuntu-latest
+    environment:
+      name: pypi
+      url: https://pypi.org/p/Allohubpy 
+    permissions:
+      id-token: write  # IMPORTANT: mandatory for trusted publishing
+
+    steps:
+    - name: Download all the dists
+      uses: actions/download-artifact@v4
+      with:
+        name: python-package-distributions
+        path: dist/
+    - name: Publish distribution 📦 to PyPI
+      uses: pypa/gh-action-pypi-publish@release/v1
+
+  github-release:
+    name: >-
+      Sign the Python 🐍 distribution 📦 with Sigstore
+      and upload them to GitHub Release
+    needs:
+    - publish-to-pypi
+    runs-on: ubuntu-latest
+
+    permissions:
+      contents: write  # IMPORTANT: mandatory for making GitHub Releases
+      id-token: write  # IMPORTANT: mandatory for sigstore
+
+    steps:
+    - name: Download all the dists
+      uses: actions/download-artifact@v4
+      with:
+        name: python-package-distributions
+        path: dist/
+    - name: Sign the dists with Sigstore
+      uses: sigstore/gh-action-sigstore-python@v3.0.0
+      with:
+        inputs: >-
+          ./dist/*.tar.gz
+          ./dist/*.whl
+    - name: Create GitHub Release
+      env:
+        GITHUB_TOKEN: ${{ github.token }}
+      run: >-
+        gh release create
+        '${{ github.ref_name }}'
+        --repo '${{ github.repository }}'
+        --notes ""
+    - name: Upload artifact signatures to GitHub Release
+      env:
+        GITHUB_TOKEN: ${{ github.token }}
+      # Upload to GitHub Release using the `gh` CLI.
+      # `dist/` contains the built packages, and the
+      # sigstore-produced signatures and certificates.
+      run: >-
+        gh release upload
+        '${{ github.ref_name }}' dist/**
+        --repo '${{ github.repository }}'
+
@@ -165,13 +165,13 @@ def combine(self, other):
 
     def compute_entropy(self, bootstrap=0):
         """
-        Computes the shanon entropy for each alphabet fragment
+        Computes the shannon entropy for each alphabet fragment
 
         Args:
             bootstrap (int): Number of samples to create to estimate statistics
 
         Returns:
-            List containing the shanon entropies for each position or list of list if bootstrap !=0
+            List containing the shannon entropies for each position or list of list if bootstrap !=0
         """
         results = []
         iterations = 1
@@ -200,7 +200,7 @@ def fragments_probabilities(self, column):
         Computes the probabilities for each letter given an array
 
         Args:
-            column (np.array): data array for which the shanon entropy should be computed.
+            column (np.array): data array for which the shannon entropy should be computed.
 
         Returns:
             np.array with the probabilities for each possible fragment
 
@@ -6,12 +6,12 @@
 
 # Example plots to use in analysis or to use as inspiration for personalized plots for Allohubpy
 
-def plot_shanon_entropy(entropy, action="save", ylim=(0,4), name="SA_shanon_entropy.png"):
+def plot_shannon_entropy(entropy, action="save", ylim=(0,4), name="SA_shannon_entropy.png"):
     """
-    Plots the shanon entropies
+    Plots the shannon entropies
 
     Args:
-        entropy (np.array or list): Shanon entropies for each fragment. 
+        entropy (np.array or list): shannon entropies for each fragment. 
         action (str): What to do with the plot, 'save' for saving it, 'show' for displaying it
         ylim (tuple of floats): Maximum and minimum value for the y axis.
         name (str): name to use to save the plot, including format.
@@ -39,7 +39,7 @@ def plot_shanon_entropy(entropy, action="save", ylim=(0,4), name="SA_shanon_entr
     plt.close()
 
 
-def plot_shanon_entropy_sd(entropy_arrays, ylim = (0,4), action="save", name="SA_shanon_entropy.png"):
+def plot_shannon_entropy_sd(entropy_arrays, ylim = (0,4), action="save", name="SA_shannon_entropy.png"):
     """
     Plots the Shannon entropies for multiple entropy arrays, with error shade representing the 
     standard deviation of the mean at each fragment index.
 
@@ -5,7 +5,8 @@
 
 ffibuilder.cdef("void encode_frame(unsigned int n_windows, unsigned int n_fragments, unsigned int f_size, float (*MDframe)[3], float (*Fragments)[3], int *Encoding);")
 
-src_dir = os.path.abspath("Allohubpy/src")
+#src_dir = os.path.abspath("Allohubpy/src")
+src_dir = os.path.join("Allohubpy", "src")
 ffibuilder.set_source(
     "Allohubpy._encodeframe",
     """ #include "encodeframe.h" """,
 
@@ -5,7 +5,8 @@
 
 ffibuilder.cdef("double wrmsd_kabsch(unsigned int size,  float (*Xarray)[3], float (*Yarray)[3]);")
 
-src_dir = os.path.abspath("Allohubpy/src")
+#src_dir = os.path.abspath("Allohubpy/src")
+src_dir = os.path.join("Allohubpy", "src")
 ffibuilder.set_source(
     "Allohubpy._kabsch",
     """#include "kabsch.h" """,
 
@@ -0,0 +1,7 @@
+include Allohubpy/Allohubpy_cython.pyx
+include Allohubpy/src/kabsch_extension_build.py
+include Allohubpy/src/encodeframe_extension_build.py
+include Allohubpy/src/kabsch.c
+include Allohubpy/src/kabsch.h
+include Allohubpy/src/encodeframe.h
+include Allohubpy/src/encodeframe.c
@@ -1,12 +1,196 @@
 # Allohubpy
 Allohubpy is a python package for the detection and charectarization of allosteric signals using a information theoric approach. 
 
-The method captures local conformational changes associated with global motions from molecular dynamics simulations through the use of a Structural Alphabet, which simplifies the complexity of the Cartesian space by reducing the dimensionality down to a string of encoded fragments. These encoded fragments can then be used to compute the shanon entropy, mutual information between positions and build networks of correlated motions.
+The method captures local conformational changes associated with global motions from molecular dynamics simulations through the use of a Structural Alphabet, which simplifies the complexity of the Cartesian space by reducing the dimensionality down to a string of encoded fragments. These encoded fragments can then be used to compute the shannon entropy, mutual information between positions and build networks of correlated motions.
 
 The folder notebooks contains examples for how to run the package with some sample data.
 
 ## Installation
 
 The package can be installed through pip with pip install Allohubpy.
+To install all the required packages: pip install -r requirements.txt
 
-Alternatively, one can compile the required code by running python setup.py build_ext --inplace and manualy adding the package to the PYTHONPATH.
+Alternatively, one can compile the required code by running python setup.py such as build_ext --inplace and manualy adding the package to the PYTHONPATH.
+One can also clone the repository and run pip install .
+
+## Examples
+
+Examples on how to run the code can be found on the notebooks folder.
+All necessary data is provided in the respctive data folders under notebook.
+
+## Usage
+
+### Plotting
+
+The package comes with premade plotting functions that can be used directly or as a template.
+All plotting functions are found under allohubpy/plotter/Allohub_plots.py
+
+## TrajProcessor
+
+The TrajProcessor module offers encoder for 3di and M32K25. 
+One can choose how many frames to skip as equilibration and the frequency (Stride) of the frames to be used.
+
+
+
+```python
+from Allohubpy import TrajProcessor
+
+enc_3di = TrajProcessor.Encoder3DI("outputname_3di.sa")
+
+# Encoder for M32K25
+enc_mk = TrajProcessor.SAEncoder("outputname_mk.sa")
+
+# Trajectory is saved every 10 ps. with stride = 10 only the 1-th frames will be processed producing an spacing of 100 ps between frames
+# We skip 100 frames -> 1ns as extra equilibration
+
+## Load repl1 of condition 1
+enc_3di.load_trajectory(topology="topo.pdb", mdtraj="mdtraj.xtc", skip=100, stride=10)
+enc_3di.encode()
+
+enc_mk.load_trajectory(topology="topo.pdb", mdtraj="mdtraj.xtc", skip=100, stride=10)
+enc_mk.encode()
+```
+
+
+### SA handler
+
+The SA handler for a SA trajectory can be initialized as follows:
+Block_size is the number of frames that will be used for each mutual information estimation and alphabet is the list of possible tokens for the selected alphabet. M32K25 and 3DI alphabets are provided by default.
+
+The SA trajectory can be loaded with .load_data. Each encoded frame should be one line.
+
+```python
+from Allohubpy import SAtraj
+sahandler = SAtraj.SAtraj(block_size=100, alphabet=SAtraj.ALPHABETS["M32K25"])
+sahandler.load_data("safile")
+```
+
+After loading the data one can compute:
+The probabilities of each fragment with: .get_probabilities()
+The transition matrix between fragments with: .compute_transitions()
+And the Shannon entropy: .compute_entropy()
+
+Plots can be created using the provided plotting functions.
+
+```python
+from Allohubpy.plotter import Allohub_plots
+
+entropy = sahandler.compute_entropy()
+Allohub_plots.plot_shannon_entropy_sd(entropy, ylim=(0,4), action="show")
+
+fragment_probs = sahandler.get_probabilities()
+Allohub_plots.plot_fragment_probabilities(probability_matrix=fragment_probs, vocabulary=SAtraj.ALPHABETS["M32K25"], action="show")
+
+transition_matrix = sahandler.compute_transitions()
+Allohub_plots.plot_transition_probabilities(trans_matrix=transition_matrix, vocabulary=SAtraj.ALPHABETS["M32K25"], action="show")
+```
+
+Finaly, the mutual information matrices can be obtained by running:
+
+```python
+mi_array = sahandler.compute_mis(max_workers=8)
+```
+
+### Mutual information object
+
+The computed mutual information matrices are stored in a MI object. One can retrive the matrix by doing .get_mi_matrix()
+The eigenvector decomposition can be done by calling .compute_eigensystem()
+
+Mi matrices can also be added together using addition and substraction.
+
+### Overlap object
+
+The obtained mi matrices with their eigenvectors and eienvalues computed can then be passed to the overlap handler which can be used to assess convergence and find up and down regulated fragments.
+
+```python
+from Allohubpy import Overlap
+
+overlap = Overlap.Overlap([mi_array1, mi_array2, ....], ev_list=[0,1,2])
+overlap.fill_overlap_matrix()
+
+# plot overlap
+Allohub_plots.plot_overlap(overlap.get_overlap_matrix(), vmax=0.4, action="show")
+
+# compute similarities
+similarity_matrix = overlap.compute_similarities()
+```
+
+For the up and down regulated fragments one needs to provide a mapping of the mi_arrays to the condition they belong.
+The method will return a dictionary of pandas dataframes for each combination of conditions.
+
+Each dataframe has the following columns:
+FragmentPairs, log2FoldChange, AdjustedPValues and PValues
+
+```python
+pdown_regulated_fragments = overlap.updown_regulation(traj_mapping=[0,0,0,1,1,1],splitting=True)
+t12_updown = updown_regulated_fragments[(0,1)]
+
+Allohub_plots.plot_updownregulation(t12_updown,  fold_threshold=2, ylim=10, pvalue_threshold=0.01, action="show")
+```
+
+### SA Network
+
+One can also create graph representations based on the mutual information.
+For that a matrix of distances from all c alphas to all c alphas is needed.
+
+The selected top pairs with higher signal at a given distance will be selected to build the network.
+
+```python
+from Allohubpy import SANetwork
+
+SAgraph = SANetwork.SANetWork(traj_list= mi_array1 +  mi_array2 + ..., distance_limit=7)
+
+SAgraph.set_distance_matrix(matrix=distance_matrix, fragment_size=4)
+
+SAgraph.create_graph(threshold=90)
+```
+
+The graph can be extracted with .get_graph() and analyzed with .compute_centrality() or by extracting the shortest path from a set of selected residues.
+
+```python
+centrality_df = SAgraph_fbp.compute_centrality()
+Allohub_plots.plot_network_centrality(centrality_df, action="show")
+
+site1_fragments = [476, 509] 
+site2_fragments = [260, 281]
+
+# Subgraph is a networkx object with the nodes and edges of the shortest paths connecting those residues
+# Shortest_pathd ans shortest_distances are list of the shortest paths and their distances respectively.
+# z_score provides an estimate of how statistically coupled the two sites are
+subgraph, shortest_paths, shortest_distances, z_score = SAgraph_fbp.identify_preferential_connections(start_fragments=site1_fragments,
+                                                                                                       end_fragments=site2_fragments)
+
+Allohub_plots.plot_SA_graph(subgraph, site1_fragments, site2_fragments, action="show")
+
+```
+
+## Incorporating own alphabets
+
+The package also provides base classes to be used to create new alphabet encodings.
+
+```python
+from Allohubpy.TrajProcessor import AbsEncoder
+
+class MyEncoder(AbsEncoder):
+
+
+    # Atoms to keep should be the list of atom names that your encoding needs from the md trajectory
+    def __init__(self, output_file_name):
+        super().__init__(atoms_to_keep=["CA", "CB", "N", "C"], output_file_name=output_file_name)
+
+
+
+    def process_frame(self, frame_dict):
+        # Process frame is called on every MD frame when one call .encode()
+        # frame_dict is a dictionary with the following keys:
+        # "residues" containing the list of residues present
+        # One key for each atom name in atoms to keep. for example "CA", "CB", etc.
+        # The function needs to return the encoded string for that frame.
+        # Under each key there is a list for all the elements under that group,f or example:
+        # "CA" will have all c alphas and "CB" all c betas. 
+        # If one residue does not contain that atom name, then it will not be present in the array, so len(CA) != len(CB)
+        # One can use the residues list (Three letter code) to deal with it.
+        
+        return encoded_string
+
+```