cusbg · rdk · Feb 25, 2026 · Feb 25, 2026
diff --git a/.gitignore b/.gitignore
@@ -13,6 +13,7 @@ data/
 dist/
 node_modules/
 venv/
+local/
 
 # Administration stuff (symlinked files)
 executor_p2rank
diff --git a/conservation/hmm_based/conservation_hmm_based.py b/conservation/hmm_based/conservation_hmm_based.py
@@ -68,6 +68,17 @@ def compute_conservation(
         mask_output: bool,
         max_seqs: int,
 ):
+    if database_file is None:
+        raise RuntimeError(
+            "HMM sequence database file is not set."
+            " Set HMM_SEQUENCE_FILE environment variable.")
+    if not os.path.isfile(database_file):
+        raise RuntimeError(
+            f"HMM sequence database file not found: {database_file}")
+    if not os.access(database_file, os.R_OK):
+        raise RuntimeError(
+            f"HMM sequence database file is not readable: {database_file}")
+
     unweighted_msa_file = _generate_msa(
         fasta_file, database_file, working_directory, execute_command)
 

diff --git a/docker-compose.yml b/docker-compose.yml
@@ -72,6 +72,21 @@ services:
       - predictions:/data/prankweb/predictions
       - docking:/data/prankweb/docking
       - tunnels:/data/prankweb/tunnels
+  conservation-server:
+    # TODO: Mount the conservation volume so the HMM database is available.
+    # Use `docker compose run` with `-v` as described in conservation/README.md
+    # for now. To make `docker compose up` work, add:
+    #   volumes:
+    #     - conservation:/data/conservation
+    build:
+      context: ./
+      dockerfile: ./executor-p2rank/Dockerfile
+      args:
+        UID: ${UID:-5988}
+        GID: ${GID:-5988}
+    command: ["uvicorn", "conservation.conservation_server.main:app", "--host", "0.0.0.0", "--port", "8030"]
+    ports:
+      - "8030:8030"
   executor-docking:
     build:
       context: ./

diff --git a/executor-p2rank/Dockerfile b/executor-p2rank/Dockerfile
@@ -192,6 +192,9 @@ RUN chmod a+x ./p2rank.sh \
   && chmod a+x ./run_p2rank.py \
   && chmod a+x ./run_p2rank_task.py
 
+COPY --chown=user:user ./executor-p2rank/conservation/conservation_server/requirements.txt ./conservation/conservation_server/
+RUN pip3 install -r ./conservation/conservation_server/requirements.txt
+
 #
 # administration tools
 WORKDIR /opt/administration

diff --git a/executor-p2rank/conservation/README.md b/executor-p2rank/conservation/README.md
@@ -0,0 +1,179 @@
+# Conservation
+
+Tools for computing HMM-based conservation scores for protein sequences (without running P2Rank predictions).
+All tools are designed to run inside the `executor-p2rank` Docker image for now.
+
+## Conservation server
+
+A FastAPI web server that exposes conservation computation as an HTTP endpoint.
+
+### Running the server using Docker
+
+```bash
+# to avoid running docker as root, add current user to docker group (you may need to log out and back in for this to take effect)
+sudo usermod -aG docker $USER
+
+# prepare data/cache directory on the host
+mkdir -p /ssd/p2rank-conservation-docker-data/hmm-based
+
+# download latest uniref database
+cd /ssd/p2rank-conservation-docker-data/hmm-based
+wget https://ftp.expasy.org/databases/uniprot/current_release/uniref/uniref50/uniref50.fasta.gz && gunzip uniref50.fasta.gz
+
+# alternatively you can download the database inside the container using download_database.py script from prankweb
+# but this downloads old version of uniref50 (from 2023-11-01). Might be useful for reproducibility of old results.
+docker compose run -it --rm -p 8030:8030 \
+    --user "$(id -u):$(id -g)" \
+    -v /ssd/p2rank-conservation-docker-data:/data/conservation \
+    conservation-server bash
+/opt/hmm-based-conservation/download_database.py
+# exit container
+
+# build/rebuild docker image (only necessary after changes in this repo)
+docker compose build conservation-server
+
+# run server with mounted data directory
+docker compose run --rm -p 8030:8030 \
+    --user "$(id -u):$(id -g)" \
+    -v /ssd/p2rank-conservation-docker-data:/data/conservation \
+    conservation-server
+```
+
+The server starts on port 8030.
+Check API documentation at http://localhost:8030/docs .
+
+To run with more workers for concurrent requests:
+
+```bash
+docker compose run --rm -p 8030:8030 conservation-server \
+  uvicorn conservation.conservation_server.main:app \
+    --host 0.0.0.0 --port 8030 --workers 8
+```
+
+Set `--workers` to `n_cpu * 2` for CPU-bound workloads.
+
+### Endpoints
+
+#### GET /health
+
+Returns `{"status": "ok"}` when the server is running.
+
+```bash
+curl http://localhost:8030/health
+```
+
+#### POST /conservation
+
+Accepts FASTA content, runs the HMM conservation pipeline, returns the raw
+`.hom` file as plain text.
+
+Request body (JSON):
+```json
+{
+  "fasta_content": ">sp|P00520|ABL1_MOUSE\nMLPPGPLLLLLLLSGTARLFS"
+}
+```
+
+Response (text/plain):
+```
+0	0.583	M
+1	1.247	L
+2	0.891	P
+...
+```
+
+Example:
+
+```bash
+curl -X POST http://localhost:8030/conservation \
+  -H "Content-Type: application/json" \
+  -d '{"fasta_content": ">test\nGSMSDQEAKPSTEDLGDKKEGEYIKLKVIGQDSSEIHFKVKMTTHLKKLKESYCQRQGVPMNSLRFLFEGQRIADNHTPKELGMEEEDVIEVYQEQTGGHSTV"}'
+```
+
+### Interactive API docs
+
+FastAPI auto-generates interactive API documentation at:
+
+- Swagger UI: http://localhost:8030/docs
+- ReDoc: http://localhost:8030/redoc
+
+## Contents
+
+- `run_conservation.py` -- CLI script that computes conservation for a single FASTA file.
+- `calculate_conservations_batch.sh` -- Batch script that runs `run_conservation.py` in parallel over a directory of FASTA files.
+- `conservation_server/` -- FastAPI web server that exposes conservation computation via HTTP.
+
+## Data directory
+
+The conservation pipeline requires the HMM sequence database and optionally
+a cache directory. Inside the Docker container these are configured via
+environment variables baked into the image:
+
+| Variable | Default in image | Description |
+|---|---|---|
+| `HMM_SEQUENCE_FILE` | `/data/conservation/hmm-based/uniref50.fasta` | UniRef50 FASTA database used by HMMER |
+| `HMM_CONSERVATION_CACHE` | `/data/conservation/hmm-based-cache/` | Directory for caching computed scores |
+| `HMMER_DIR` | `/opt/hmm-based-conservation-dependencies/hmmer-3.3.2/bin/` | Path to HMMER binaries (phmmer, esl-weight, esl-alistat) |
+
+The database files are stored in the `prankweb_conservation` Docker volume,
+mounted at `/data/conservation` inside the container.
+
+## Building the Docker image
+
+The image is built from the project root (not from this directory) because
+the Dockerfile references multiple project directories:
+
+```bash
+cd /path/to/prankweb   # project root containing docker-compose.yml
+docker compose build conservation-server
+```
+
+This uses the same multi-stage `executor-p2rank/Dockerfile` that builds
+HMMER, BLAST, and other dependencies from source.
+
+## run_conservation.py
+
+Computes HMM-based conservation for a single FASTA file.
+
+```
+./run_conservation.py --file /path/to/input.fasta --output /path/to/output --working /tmp/work
+```
+
+Arguments:
+- `--file` -- Path to a FASTA file containing a single protein sequence.
+- `--output` -- Directory where the `.hom` output file will be written (default: `./`).
+- `--working` -- Temporary working directory for intermediate HMMER files (default: `./working`).
+
+Output is a `.hom` file (TSV with columns: position index, conservation score, amino acid).
+The file is named after the input: `input.fasta` produces `input.hom`.
+
+Example inside Docker:
+
+```bash
+docker compose run --rm conservation-server \
+  python conservation/run_conservation.py \
+    --file /data/conservation/example.fasta \
+    --output /tmp/out \
+    --working /tmp/work
+```
+
+## calculate_conservations_batch.sh
+
+Batch wrapper that finds all `.fasta` files in a directory and processes them
+in parallel using `xargs -P`.
+
+```
+./calculate_conservations_batch.sh <n_processes> <input_dir> <output_dir>
+```
+
+Arguments:
+1. Number of parallel processes.
+2. Input directory containing `.fasta` files.
+3. Output directory for `.hom` files.
+
+Example inside Docker:
+
+```bash
+docker compose run --rm conservation-server \
+  bash conservation/calculate_conservations_batch.sh 4 /data/input /data/output
+```
diff --git a/executor-p2rank/conservation/__init__.py b/executor-p2rank/conservation/__init__.py
diff --git a/executor-p2rank/conservation/calculate_conservations_batch.sh b/executor-p2rank/conservation/calculate_conservations_batch.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+n_proc=$1
+input_dir=$2
+output_dir=$3
+
+work_dir="${output_dir}/.work_dir"
+
+n=$(find "$input_dir" -name "*.fasta" | wc -l)
+
+echo "processing $n fasta files"
+
+mkdir -p "$output_dir"
+mkdir -p "$work_dir"
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+
+find "$input_dir" -name "*.fasta" | xargs -P "${n_proc}" -I{} "$SCRIPT_DIR/run_conservation.py" --file {} --output "$output_dir" --working "$work_dir"
+
+echo "processed $n fasta files"
+echo "output_dir: $output_dir"
+echo Done.
diff --git a/executor-p2rank/conservation/conservation_server/__init__.py b/executor-p2rank/conservation/conservation_server/__init__.py
diff --git a/executor-p2rank/conservation/conservation_server/main.py b/executor-p2rank/conservation/conservation_server/main.py
@@ -0,0 +1,92 @@
+#!/usr/bin/env python3
+"""
+Minimal FastAPI server wrapping the HMM-based conservation computation.
+"""
+
+import logging
+import os
+import sys
+import tempfile
+import shutil
+import traceback
+
+from fastapi import FastAPI, HTTPException
+from fastapi.responses import PlainTextResponse
+from pydantic import BaseModel, Field
+
+# Add executor-p2rank root to path (same pattern as run_conservation.py:15).
+_executor_root = os.path.dirname(os.path.dirname(os.path.dirname(
+    os.path.abspath(__file__))))
+if _executor_root not in sys.path:
+    sys.path.insert(0, _executor_root)
+
+from conservation.run_conservation import main as run_conservation_main
+
+logger = logging.getLogger("conservation_server")
+
+app = FastAPI(title="Conservation Server")
+
+
+class ConservationRequest(BaseModel):
+    fasta_content: str = Field(
+        ...,
+        description="Raw FASTA content (header line + sequence).",
+    )
+
+
+@app.get("/health")
+def health():
+    return {"status": "ok"}
+
+
+@app.post("/conservation", response_class=PlainTextResponse)
+def compute_conservation(request: ConservationRequest):
+    """
+    Accept FASTA content, run HMM conservation pipeline, return raw .hom file.
+
+    Sync endpoint — FastAPI runs it in a thread pool automatically,
+    so the subprocess-heavy computation does not block the event loop.
+    """
+    tmp_dir = tempfile.mkdtemp(prefix="conservation_")
+    try:
+        fasta_path = os.path.join(tmp_dir, "input.fasta")
+        with open(fasta_path, "w") as f:
+            f.write(request.fasta_content)
+            if not request.fasta_content.endswith("\n"):
+                f.write("\n")
+
+        working_dir = os.path.join(tmp_dir, "working")
+        output_dir = os.path.join(tmp_dir, "output")
+        os.makedirs(working_dir)
+        os.makedirs(output_dir)
+
+        run_conservation_main({
+            "file": fasta_path,
+            "working": working_dir,
+            "output": output_dir,
+        })
+
+        hom_file = os.path.join(output_dir, "input.hom")
+        if not os.path.exists(hom_file):
+            raise HTTPException(
+                status_code=500,
+                detail="Conservation computation failed: output file not produced.",
+            )
+
+        with open(hom_file) as f:
+            return f.read()
+
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.exception("Conservation computation failed.")
+        # TODO: Reconsider exposing full traceback in production.
+        # Consider returning only a generic error message and logging
+        # the traceback server-side only.
+        raise HTTPException(status_code=500, detail={
+            "error": type(e).__name__,
+            "message": str(e),
+            "traceback": traceback.format_exc().splitlines(),
+        })
+    finally:
+        shutil.rmtree(tmp_dir, ignore_errors=True)
diff --git a/executor-p2rank/conservation/conservation_server/requirements.txt b/executor-p2rank/conservation/conservation_server/requirements.txt
@@ -0,0 +1,2 @@
+fastapi==0.115.6
+uvicorn[standard]==0.34.0