From 8c635b3b732751cf35454d3eb483466c7e5100b4 Mon Sep 17 00:00:00 2001
From: nsheff <nsheff@users.noreply.github.com>
Date: Wed, 18 Feb 2026 09:03:45 -0500
Subject: [PATCH 01/31] Add Swagger documentation for API query parameters;
 improve fhr metadata and other gtars access

---
 refget/__init__.py  | 10 ++++++--
 refget/cli/store.py | 37 ++++++++++++++++++++++++++++
 refget/router.py    | 59 +++++++++++++++++++++++++++++----------------
 refget/store.py     | 17 +++++++++++--
 4 files changed, 98 insertions(+), 25 deletions(-)

diff --git a/refget/__init__.py b/refget/__init__.py
index 9b53cc3..8129a43 100644
--- a/refget/__init__.py
+++ b/refget/__init__.py
@@ -2,11 +2,10 @@
 refget - GA4GH reference sequence and sequence collection tools.
 
 Import from submodules:
-    from refget.store import RefgetStore, digest_fasta, StorageMode
+    from refget.store import RefgetStore, digest_fasta, StorageMode, compute_fai, digest_sequence, SequenceCollection
     from refget.digests import sha512t24u_digest, md5_digest, ga4gh_digest
     from refget.utils import compare_seqcols, validate_seqcol, seqcol_digest
     from refget.clients import SequenceCollectionClient, FastaDrsClient
-    from refget.models import SequenceCollection
     from refget.router import create_refget_router
     from refget.agents import RefgetDBAgent
 """
@@ -15,10 +14,17 @@
 from .exceptions import InvalidSeqColError
 from .const import GTARS_INSTALLED
 from .utils import canonical_str
+from .store import RefgetStore, StorageMode, digest_fasta, compute_fai, digest_sequence, SequenceCollection
 
 __all__ = [
     "__version__",
     "InvalidSeqColError",
     "GTARS_INSTALLED",
     "canonical_str",
+    "RefgetStore",
+    "StorageMode",
+    "digest_fasta",
+    "compute_fai",
+    "digest_sequence",
+    "SequenceCollection",
 ]
diff --git a/refget/cli/store.py b/refget/cli/store.py
index 7df195b..99370d7 100644
--- a/refget/cli/store.py
+++ b/refget/cli/store.py
@@ -906,6 +906,10 @@ def _remove_collection_from_store(store_path: Path, digest: str) -> bool:
     if collection_file.exists():
         collection_file.unlink()
 
+    # Remove the FHR metadata sidecar file (if it exists)
+    fhr_file = store_path / "collections" / f"{digest}.fhr.json"
+    fhr_file.unlink(missing_ok=True)
+
     return True
 
 
@@ -946,3 +950,36 @@ def remove(
         }
     )
     raise typer.Exit(EXIT_SUCCESS)
+
+
+@app.command()
+def metadata(
+    digest: str = typer.Argument(help="Collection digest"),
+    path: Optional[Path] = typer.Option(
+        None, "--path", "-p", help="Store path"
+    ),
+):
+    """Show FHR metadata for a collection."""
+    store = _load_store(path)
+    fhr = store.get_fhr_metadata(digest)
+    if fhr is None:
+        print_error(f"No FHR metadata for collection {digest}", EXIT_FAILURE)
+    import json
+
+    print(json.dumps(fhr.to_dict(), indent=2))
+    raise typer.Exit(EXIT_SUCCESS)
+
+
+@app.command("metadata-set")
+def metadata_set(
+    digest: str = typer.Argument(help="Collection digest"),
+    file: Path = typer.Argument(help="Path to FHR JSON file"),
+    path: Optional[Path] = typer.Option(
+        None, "--path", "-p", help="Store path"
+    ),
+):
+    """Set FHR metadata for a collection from a JSON file."""
+    store = _load_store(path)
+    store.load_fhr_metadata(digest, str(file))
+    print(f"Set FHR metadata for collection {digest}")
+    raise typer.Exit(EXIT_SUCCESS)
diff --git a/refget/router.py b/refget/router.py
index 96dbd9b..82e768d 100644
--- a/refget/router.py
+++ b/refget/router.py
@@ -20,7 +20,7 @@
 
 import logging
 
-from fastapi import APIRouter, Response, HTTPException, Request, Depends
+from fastapi import APIRouter, Response, HTTPException, Request, Depends, Query
 from .models import Similarities, PaginationResult, PaginatedDigestList
 from .agents import RefgetDBAgent
 
@@ -100,8 +100,8 @@ def create_refget_router(
 async def sequence(
     dbagent=Depends(get_dbagent),
     sequence_digest: str = example_sequence,
-    start: int = None,
-    end: int = None,
+    start: int | None = Query(None, description="Start position (0-based, inclusive)"),
+    end: int | None = Query(None, description="End position (0-based, exclusive)"),
 ):
     return Response(content=dbagent.seq.get(sequence_digest, start, end), media_type="text/plain")
 
@@ -126,9 +126,9 @@ async def seq_metadata(dbagent=Depends(get_dbagent), sequence_digest: str = exam
 async def collection(
     dbagent=Depends(get_dbagent),
     collection_digest: str = example_collection_digest,
-    level: int | None = None,
-    collated: bool = True,
-    attribute: str = None,
+    level: int | None = Query(None, description="Recursion depth (1 or 2)", ge=1, le=2),
+    collated: bool = Query(True, description="Return collated format (arrays) vs itemwise"),
+    attribute: str | None = Query(None, description="Return only this attribute (e.g., 'names', 'lengths')"),
 ):
     if level == None:
         level = 2
@@ -212,9 +212,9 @@ async def compare_2_digests(
 )
 async def calc_similarities(
     collection_digest: str,
-    species: str = "human",
-    page_size: int = 50,
-    page: int = 0,
+    species: str = Query("human", description="Species to filter by ('human' or 'mouse')"),
+    page_size: int = Query(50, description="Number of results per page"),
+    page: int = Query(0, description="Page number (0-indexed)"),
     dbagent=Depends(get_dbagent),
 ) -> Similarities:
     _LOGGER.info("Calculating Jaccard similarities...")
@@ -235,9 +235,9 @@ async def calc_similarities(
 )
 async def calc_similarities_from_json(
     seqcolA: dict,
-    species: str = "human",
-    page_size: int = 50,
-    page: int = 0,
+    species: str = Query("human", description="Species to filter by ('human' or 'mouse')"),
+    page_size: int = Query(50, description="Number of results per page"),
+    page: int = Query(0, description="Page number (0-indexed)"),
     dbagent=Depends(get_dbagent),
 ) -> Similarities:
     """
@@ -341,13 +341,25 @@ async def compare_1_digest(
     response_model=PaginatedDigestList,
 )
 async def list_collections_by_offset(
-    request: Request,
     dbagent=Depends(get_dbagent),
-    page_size: int = 100,
-    page: int = 0,
+    page_size: int = Query(100, description="Number of results per page"),
+    page: int = Query(0, description="Page number (0-indexed)"),
+    names: str | None = Query(None, description="Filter by names attribute digest"),
+    lengths: str | None = Query(None, description="Filter by lengths attribute digest"),
+    sequences: str | None = Query(None, description="Filter by sequences attribute digest"),
+    name_length_pairs: str | None = Query(None, description="Filter by name_length_pairs digest"),
+    sorted_sequences: str | None = Query(None, description="Filter by sorted_sequences digest"),
 ):
-    # Extract all query params except pagination params
-    filters = {k: v for k, v in request.query_params.items() if k not in ["page", "page_size"]}
+    # Build filters from explicit parameters
+    filters = {
+        k: v for k, v in {
+            "names": names,
+            "lengths": lengths,
+            "sequences": sequences,
+            "name_length_pairs": name_length_pairs,
+            "sorted_sequences": sorted_sequences,
+        }.items() if v is not None
+    }
 
     if filters:
         try:
@@ -373,7 +385,10 @@ async def list_collections_by_offset(
     response_model=PaginatedDigestList,
 )
 async def list_attributes(
-    dbagent=Depends(get_dbagent), attribute: str = "names", page_size: int = 100, page: int = 0
+    dbagent=Depends(get_dbagent),
+    attribute: str = "names",
+    page_size: int = Query(100, description="Number of results per page"),
+    page: int = Query(0, description="Page number (0-indexed)"),
 ):
     try:
         res = dbagent.attribute.list(attribute, limit=page_size, offset=page * page_size)
@@ -397,7 +412,9 @@ async def list_attributes(
     response_model=PaginatedDigestList,
 )
 async def list_cpangenomes_by_offset(
-    dbagent=Depends(get_dbagent), page_size: int = 100, page: int = 0
+    dbagent=Depends(get_dbagent),
+    page_size: int = Query(100, description="Number of results per page"),
+    page: int = Query(0, description="Page number (0-indexed)"),
 ):
     res = dbagent.pangenome.list_by_offset(limit=page_size, offset=page * page_size)
     res["results"] = [x.digest for x in res["results"]]
@@ -413,8 +430,8 @@ async def list_cpangenomes_by_offset(
 async def pangenome(
     dbagent=Depends(get_dbagent),
     pangenome_digest: str = example_pangenome_digest,
-    level: int | None = None,
-    collated: bool = True,
+    level: int | None = Query(None, description="Recursion depth (1-4)", ge=1, le=4),
+    collated: bool = Query(True, description="Return collated format (arrays) vs itemwise"),
 ):
     if level == None:
         level = 2
diff --git a/refget/store.py b/refget/store.py
index 504c27b..042b90c 100644
--- a/refget/store.py
+++ b/refget/store.py
@@ -8,15 +8,28 @@
 from .const import GTARS_INSTALLED
 
 if GTARS_INSTALLED:
-    from gtars.refget import RefgetStore, digest_fasta, StorageMode
+    from gtars.refget import (
+        RefgetStore,
+        StorageMode,
+        digest_fasta,
+        compute_fai,
+        digest_sequence,
+        SequenceCollection,
+    )
 else:
     RefgetStore = None
-    digest_fasta = None
     StorageMode = None
+    digest_fasta = None
+    compute_fai = None
+    digest_sequence = None
+    SequenceCollection = None
 
 __all__ = [
     "RefgetStore",
     "digest_fasta",
     "StorageMode",
+    "compute_fai",
+    "digest_sequence",
+    "SequenceCollection",
     "GTARS_INSTALLED",
 ]

From afc7a2dc028d5e8aab410d4d0c2292ee7aa8167c Mon Sep 17 00:00:00 2001
From: nsheff <nsheff@users.noreply.github.com>
Date: Wed, 18 Feb 2026 10:09:08 -0500
Subject: [PATCH 02/31] add local store lookup capability to the seqcol CLI
 commands

---
 refget/cli/seqcol.py                   | 111 ++++++++++++++++++++++-
 test_fasta/base.farg                   |   8 ++
 tests/test_cli/test_seqcol_commands.py | 119 +++++++++++++++++++++++++
 3 files changed, 237 insertions(+), 1 deletion(-)
 create mode 100644 test_fasta/base.farg

diff --git a/refget/cli/seqcol.py b/refget/cli/seqcol.py
index 2e83ab1..0bc3881 100644
--- a/refget/cli/seqcol.py
+++ b/refget/cli/seqcol.py
@@ -19,7 +19,7 @@
 
 import typer
 
-from refget.cli.config_manager import get_seqcol_servers
+from refget.cli.config_manager import get_seqcol_servers, get_store_path
 from refget.cli.output import (
     EXIT_FAILURE,
     EXIT_NETWORK_ERROR,
@@ -32,6 +32,7 @@
 # Heavy imports moved inside functions to speed up CLI startup:
 # - refget.clients (requests ~51ms)
 # - refget.utils (jsonschema ~60ms)
+# - refget.store (gtars ~100ms)
 
 
 def _get_client(server_override: Optional[str] = None):
@@ -54,6 +55,101 @@ def _get_client(server_override: Optional[str] = None):
     return SequenceCollectionClient(urls=urls, raise_errors=False)
 
 
+def _collection_to_seqcol_dict(store, digest: str, level: int = 2) -> Optional[dict]:
+    """
+    Convert a RefgetStore collection to seqcol API dict format.
+
+    Args:
+        store: RefgetStore instance with the collection loaded
+        digest: Collection digest
+        level: 1 for attribute digests only, 2 for full arrays
+
+    Returns:
+        Seqcol dict in API format, or None if collection not found.
+    """
+    from refget.utils import canonical_str
+    from refget.digests import sha512t24u_digest
+
+    names = []
+    lengths = []
+    sequences = []
+
+    for coll in store.iter_collections():
+        if coll.digest == digest:
+            for seq in coll.sequences:
+                m = seq.metadata
+                names.append(m.name)
+                lengths.append(m.length)
+                sequences.append("SQ." + m.sha512t24u)
+            break
+    else:
+        # Collection not found in iteration
+        return None
+
+    if not names:
+        return None
+
+    if level == 1:
+        # Return digests of arrays instead of arrays themselves
+        return {
+            "names": sha512t24u_digest(canonical_str(names)),
+            "lengths": sha512t24u_digest(canonical_str(lengths)),
+            "sequences": sha512t24u_digest(canonical_str(sequences)),
+        }
+    else:
+        # Level 2: return full arrays
+        return {
+            "names": names,
+            "lengths": lengths,
+            "sequences": sequences,
+        }
+
+
+def _get_local_seqcol(digest: str, level: int = 2) -> Optional[dict]:
+    """
+    Try to get a seqcol from the local RefgetStore.
+
+    Args:
+        digest: Collection digest to look up
+        level: 1 for attribute digests only, 2 for full arrays
+
+    Returns:
+        Seqcol dict if found locally, None otherwise.
+    """
+    try:
+        from refget.store import RefgetStore
+    except ImportError:
+        # gtars not installed - can't use local store
+        return None
+
+    store_path = get_store_path()
+    rgstore_path = store_path / "rgstore.json"
+
+    # Check if store exists
+    if not store_path.exists() or not rgstore_path.exists():
+        return None
+
+    try:
+        store = RefgetStore.open_local(str(store_path))
+        store.set_quiet(True)
+
+        # Check if collection exists
+        collection_digests = {meta.digest for meta in store.list_collections()}
+        if digest not in collection_digests:
+            return None
+
+        # Load the collection (triggers lazy loading if needed)
+        if not store.is_collection_loaded(digest):
+            store.get_collection(digest)
+
+        # Convert to seqcol dict format
+        return _collection_to_seqcol_dict(store, digest, level)
+
+    except Exception:
+        # Any error (store corruption, etc.) - fall back to remote
+        return None
+
+
 def _compute_snlp_digest(seqcol_dict: dict) -> str:
     """
     Compute the sorted_name_length_pairs digest from a seqcol dict.
@@ -134,6 +230,12 @@ def _load_seqcol(input_str: str, client, level: int = 2) -> Optional[dict]:
             return None
 
     else:  # digest
+        # Try local store first
+        result = _get_local_seqcol(input_str, level=level)
+        if result is not None:
+            return result
+
+        # Fall back to remote
         result = client.get_collection(input_str, level=level)
         if result is None:
             print_error(f"Could not fetch seqcol for digest: {input_str}", EXIT_FAILURE)
@@ -176,6 +278,13 @@ def show(
     Level 1 returns attribute digests only.
     Level 2 (default) returns full arrays.
     """
+    # Try local store first
+    result = _get_local_seqcol(digest, level=level)
+    if result is not None:
+        print_json(result)
+        raise typer.Exit(EXIT_SUCCESS)
+
+    # Fall back to remote servers
     client = _get_client(server)
 
     try:
diff --git a/test_fasta/base.farg b/test_fasta/base.farg
new file mode 100644
index 0000000..5a3c2fe
--- /dev/null
+++ b/test_fasta/base.farg
@@ -0,0 +1,8 @@
+##seqcol_digest=XZlrcEGi6mlopZ2uD8ObHkQB1d0oDwKk
+##names_digest=Fw1r9eRxfOZD98KKrhlYQNEdSRHoVxAG
+##sequences_digest=0uDQVLuHaOZi1u76LjV__yrVUIz9Bwhr
+##lengths_digest=cGRMZIb3AVgkcAfNv39RN7hnT5Chk7RX
+#name	length	alphabet	sha512t24u	md5
+chrX	8	dna2bit	iYtREV555dUFKg2_agSJW6suquUyPpMw	5f63cfaa3ef61f88c9635fb9d18ec945
+chr1	4	dna2bit	YBbVX0dLKG1ieEDCiMmkrTZFt_Z5Vdaj	31fc6ca291a32fb9df82b85e5f077e31
+chr2	4	dna2bit	AcLxtBuKEPk_7PGE_H4dGElwZHCujwH6	92c6a56c9e9459d8a42b96f7884710bc
diff --git a/tests/test_cli/test_seqcol_commands.py b/tests/test_cli/test_seqcol_commands.py
index de8324e..88887d6 100644
--- a/tests/test_cli/test_seqcol_commands.py
+++ b/tests/test_cli/test_seqcol_commands.py
@@ -214,3 +214,122 @@ def test_invalid_file_format(self, cli, tmp_path):
         result = cli("seqcol", "digest", str(invalid))
 
         assert result.exit_code != 0
+
+
+class TestSeqcolLocalStoreLookup:
+    """Tests for local store lookup in seqcol show and compare commands."""
+
+    def test_show_from_local_store(self, cli, populated_store):
+        """Show command retrieves collection from local store."""
+        digest = populated_store["digest"]
+        store_path = populated_store["path"]
+
+        # Use REFGET_STORE env var to point to our test store
+        import os
+
+        old_env = os.environ.get("REFGET_STORE")
+        os.environ["REFGET_STORE"] = str(store_path)
+
+        try:
+            result = cli("seqcol", "show", digest)
+
+            assert result.exit_code == 0
+            data = json.loads(result.stdout)
+            # Level 2 (default) should have arrays
+            assert "names" in data
+            assert "lengths" in data
+            assert "sequences" in data
+            assert isinstance(data["names"], list)
+        finally:
+            if old_env:
+                os.environ["REFGET_STORE"] = old_env
+            elif "REFGET_STORE" in os.environ:
+                del os.environ["REFGET_STORE"]
+
+    def test_show_from_local_store_level1(self, cli, populated_store):
+        """Show command with level=1 returns digests from local store."""
+        digest = populated_store["digest"]
+        store_path = populated_store["path"]
+
+        import os
+
+        old_env = os.environ.get("REFGET_STORE")
+        os.environ["REFGET_STORE"] = str(store_path)
+
+        try:
+            result = cli("seqcol", "show", digest, "--level", "1")
+
+            assert result.exit_code == 0
+            data = json.loads(result.stdout)
+            # Level 1 should have string digests, not arrays
+            assert "names" in data
+            assert "lengths" in data
+            assert "sequences" in data
+            assert isinstance(data["names"], str)
+            assert isinstance(data["lengths"], str)
+            assert isinstance(data["sequences"], str)
+        finally:
+            if old_env:
+                os.environ["REFGET_STORE"] = old_env
+            elif "REFGET_STORE" in os.environ:
+                del os.environ["REFGET_STORE"]
+
+    def test_compare_uses_local_store_for_digest(self, cli, populated_store):
+        """Compare command resolves digest inputs from local store first."""
+        digest = populated_store["digest"]
+        store_path = populated_store["path"]
+
+        import os
+
+        old_env = os.environ.get("REFGET_STORE")
+        os.environ["REFGET_STORE"] = str(store_path)
+
+        try:
+            # Compare local store collection with itself
+            result = cli("seqcol", "compare", digest, digest)
+
+            # Should succeed (both resolved from local store)
+            assert result.exit_code == 0
+            data = json.loads(result.stdout)
+            assert data.get("compatible", False) is True
+        finally:
+            if old_env:
+                os.environ["REFGET_STORE"] = old_env
+            elif "REFGET_STORE" in os.environ:
+                del os.environ["REFGET_STORE"]
+
+    def test_compare_local_digest_with_fasta(self, cli, populated_store):
+        """Compare local store digest with FASTA file."""
+        digest = populated_store["digest"]
+        store_path = populated_store["path"]
+
+        import os
+
+        old_env = os.environ.get("REFGET_STORE")
+        os.environ["REFGET_STORE"] = str(store_path)
+
+        try:
+            # Compare local store collection with original FASTA
+            result = cli("seqcol", "compare", digest, str(BASE_FASTA))
+
+            # Should succeed and show they are compatible (same content)
+            assert result.exit_code == 0
+            data = json.loads(result.stdout)
+            assert data.get("compatible") is True
+        finally:
+            if old_env:
+                os.environ["REFGET_STORE"] = old_env
+            elif "REFGET_STORE" in os.environ:
+                del os.environ["REFGET_STORE"]
+
+    def test_show_nonexistent_digest_not_in_local_store(self, cli, temp_store, monkeypatch):
+        """Show command falls back to remote for digest not in local store."""
+        # Use a digest that doesn't exist anywhere
+        fake_digest = "NONEXISTENT123456789012345678901234567890"
+
+        monkeypatch.setenv("REFGET_STORE", str(temp_store))
+
+        result = cli("seqcol", "show", fake_digest)
+
+        # Should fail (not in local store, not on remote servers)
+        assert result.exit_code != 0

From 09646a241abbf195b8412fd1249c9b96395a7f61 Mon Sep 17 00:00:00 2001
From: nsheff <nsheff@users.noreply.github.com>
Date: Wed, 18 Feb 2026 12:36:20 -0500
Subject: [PATCH 03/31] improve fasta digest ui

---
 frontend/src/features/digest/DigestPage.jsx   | 110 +++++++++++++++---
 .../src/features/digest/fastaDigestWorker.js  |  73 ++++++++++--
 2 files changed, 156 insertions(+), 27 deletions(-)

diff --git a/frontend/src/features/digest/DigestPage.jsx b/frontend/src/features/digest/DigestPage.jsx
index 4eb2369..88ca6f8 100644
--- a/frontend/src/features/digest/DigestPage.jsx
+++ b/frontend/src/features/digest/DigestPage.jsx
@@ -1,4 +1,4 @@
-import { useState, useRef, useEffect } from 'react';
+import { useState, useRef, useEffect, useCallback } from 'react';
 import { useSearchParams, useNavigate } from 'react-router-dom';
 import toast from 'react-hot-toast';
 import FastaDropzone from './FastaDropzone';
@@ -54,6 +54,13 @@ function loadFromHistory(digest) {
   }
 }
 
+function createWorker() {
+  return new Worker(
+    new URL('./fastaDigestWorker.js', import.meta.url),
+    { type: 'module' }
+  );
+}
+
 export default function DigestPage() {
   const [searchParams] = useSearchParams();
   const navigate = useNavigate();
@@ -63,6 +70,7 @@ export default function DigestPage() {
   const [progress, setProgress] = useState(null);
   const [error, setError] = useState(null);
   const [history, setHistory] = useState([]);
+  const [stats, setStats] = useState(null);
   const workerRef = useRef(null);
 
   // Load history on mount
@@ -85,15 +93,16 @@ export default function DigestPage() {
     }
   }, [searchParams]);
 
-  // Initialize worker
-  useEffect(() => {
-    workerRef.current = new Worker(
-      new URL('./fastaDigestWorker.js', import.meta.url),
-      { type: 'module' }
-    );
+  const setupWorker = useCallback(() => {
+    // Terminate existing worker if any
+    if (workerRef.current) {
+      workerRef.current.terminate();
+    }
+
+    const worker = createWorker();
 
-    workerRef.current.onmessage = (e) => {
-      const { type, result, message, bytesProcessed, totalSize, percent } = e.data;
+    worker.onmessage = (e) => {
+      const { type, result, message, bytesProcessed, totalSize, percent, stats: workerStats } = e.data;
 
       if (type === 'status') {
         setStatus(message);
@@ -103,8 +112,19 @@ export default function DigestPage() {
         setResult(result);
         setStatus(null);
         setProgress(null);
+        if (workerStats) {
+          setStats(workerStats);
+          if (import.meta.env.DEV) {
+            console.log('[FASTA Digest]', {
+              chunks: workerStats.chunks,
+              avgChunkSize: `${(workerStats.avgChunkSize / 1024).toFixed(1)} KB`,
+              elapsed: `${(workerStats.elapsedMs / 1000).toFixed(1)}s`,
+              throughput: `${(workerStats.totalBytes / workerStats.elapsedMs / 1024).toFixed(1)} MB/s`
+            });
+          }
+        }
         // Save to localStorage
-        const name = workerRef.current._fileName;
+        const name = worker._fileName;
         saveToHistory(result, name);
         setHistory(getHistory());
         // Update URL
@@ -115,20 +135,47 @@ export default function DigestPage() {
         setStatus(null);
         setProgress(null);
         toast.error(message);
+      } else if (type === 'cancelled') {
+        setStatus(null);
+        setProgress(null);
+        setError('Processing cancelled.');
       }
     };
 
-    return () => workerRef.current?.terminate();
+    workerRef.current = worker;
+    return worker;
   }, []);
 
+  // Initialize worker on mount
+  useEffect(() => {
+    setupWorker();
+    return () => workerRef.current?.terminate();
+  }, [setupWorker]);
+
   const handleFileSelected = (file) => {
+    // Cancel and replace any running worker to prevent double-processing
+    const worker = setupWorker();
     setFileName(file.name);
     setResult(null);
     setError(null);
     setProgress(null);
+    setStats(null);
     setStatus('Starting...');
-    workerRef.current._fileName = file.name;
-    workerRef.current.postMessage({ file });
+    worker._fileName = file.name;
+    worker.postMessage({ file });
+  };
+
+  const handleCancel = () => {
+    if (workerRef.current) {
+      workerRef.current.postMessage({ type: 'cancel' });
+    }
+  };
+
+  const handleClear = () => {
+    setError(null);
+    setStatus(null);
+    setProgress(null);
+    setStats(null);
   };
 
   const handleHistoryClick = (digest) => {
@@ -304,6 +351,12 @@ export default function DigestPage() {
           <div className="d-flex align-items-center mb-2">
             <div className="spinner-border spinner-border-sm me-2"></div>
             <span>{status}</span>
+            <button
+              className="btn btn-sm btn-outline-danger ms-3"
+              onClick={handleCancel}
+            >
+              Cancel
+            </button>
           </div>
 
           {progress && (
@@ -328,11 +381,19 @@ export default function DigestPage() {
         </div>
       )}
 
-      {/* Error */}
+      {/* Error or Cancelled */}
       {error && (
-        <div className="alert alert-danger mt-3">
-          <i className="bi bi-exclamation-triangle me-2"></i>
-          {error}
+        <div className={`alert ${error === 'Processing cancelled.' ? 'alert-warning' : 'alert-danger'} mt-3 d-flex justify-content-between align-items-center`}>
+          <div>
+            <i className={`bi ${error === 'Processing cancelled.' ? 'bi-x-circle' : 'bi-exclamation-triangle'} me-2`}></i>
+            {error}
+          </div>
+          <button
+            className="btn btn-sm btn-outline-secondary"
+            onClick={handleClear}
+          >
+            Clear
+          </button>
         </div>
       )}
 
@@ -346,6 +407,21 @@ export default function DigestPage() {
         onDownloadRgsi={handleDownloadRgsi}
       />
 
+      {/* Processing Stats (collapsed) */}
+      {stats && result && (
+        <details className="mt-3">
+          <summary className="text-muted" style={{ cursor: 'pointer' }}>
+            <small>Processing details</small>
+          </summary>
+          <div className="mt-2 small text-muted">
+            <div>Chunks processed: {stats.chunks.toLocaleString()}</div>
+            <div>Average chunk size: {(stats.avgChunkSize / 1024).toFixed(1)} KB</div>
+            <div>Elapsed time: {(stats.elapsedMs / 1000).toFixed(1)}s</div>
+            <div>Throughput: {(stats.totalBytes / stats.elapsedMs / 1024).toFixed(1)} MB/s</div>
+          </div>
+        </details>
+      )}
+
       {/* History */}
       {history.length > 0 && (
         <div className="mt-5">
diff --git a/frontend/src/features/digest/fastaDigestWorker.js b/frontend/src/features/digest/fastaDigestWorker.js
index 616aed6..86e9178 100644
--- a/frontend/src/features/digest/fastaDigestWorker.js
+++ b/frontend/src/features/digest/fastaDigestWorker.js
@@ -2,7 +2,10 @@
 // Runs in background thread to avoid freezing UI.
 // Uses streaming API for files of any size.
 
+const PROGRESS_INTERVAL_MS = 200;  // Max 5 updates/sec
+let lastProgressTime = 0;
 let wasmModule = null;
+let cancelled = false;
 
 async function initWasm() {
   if (wasmModule) return wasmModule;
@@ -14,7 +17,17 @@ async function initWasm() {
 }
 
 self.onmessage = async (e) => {
+  const { type } = e.data;
+
+  if (type === 'cancel') {
+    cancelled = true;
+    return;
+  }
+
   const { file } = e.data;
+  cancelled = false;
+
+  const stats = { chunks: 0, totalBytes: 0, startTime: Date.now() };
 
   try {
     self.postMessage({ type: 'status', message: 'Loading WASM module...' });
@@ -34,25 +47,55 @@ self.onmessage = async (e) => {
       const totalSize = file.size;
 
       while (true) {
+        if (cancelled) {
+          reader.cancel();
+          gtars.fastaHasherFree(hasher);
+          self.postMessage({ type: 'cancelled' });
+          return;
+        }
+
         const { done, value } = await reader.read();
         if (done) break;
 
-        // Pass chunk directly to Rust - no parsing in JS
-        gtars.fastaHasherUpdate(hasher, value);
+        try {
+          gtars.fastaHasherUpdate(hasher, value);
+        } catch (err) {
+          gtars.fastaHasherFree(hasher);
+          const msg = err.message || '';
+          if (msg.toLowerCase().includes('fasta') || msg.toLowerCase().includes('parse')) {
+            self.postMessage({ type: 'error', message: `Invalid FASTA format: ${msg}`, category: 'parse' });
+          } else {
+            self.postMessage({ type: 'error', message: `WASM processing error: ${msg}`, category: 'wasm' });
+          }
+          return;
+        }
 
+        stats.chunks++;
         bytesProcessed += value.length;
-        self.postMessage({
-          type: 'progress',
-          bytesProcessed,
-          totalSize,
-          percent: Math.round(100 * bytesProcessed / totalSize)
-        });
+        stats.totalBytes = bytesProcessed;
+
+        const now = Date.now();
+        if (now - lastProgressTime >= PROGRESS_INTERVAL_MS) {
+          lastProgressTime = now;
+          self.postMessage({
+            type: 'progress',
+            bytesProcessed,
+            totalSize,
+            percent: Math.round(100 * bytesProcessed / totalSize)
+          });
+        }
       }
 
+      // Send final progress to ensure 100%
+      self.postMessage({ type: 'progress', bytesProcessed: totalSize, totalSize, percent: 100 });
+
       // Finalize and get result
       self.postMessage({ type: 'status', message: 'Computing final digests...' });
       const result = gtars.fastaHasherFinish(hasher);
-      self.postMessage({ type: 'result', result });
+
+      stats.elapsedMs = Date.now() - stats.startTime;
+      stats.avgChunkSize = stats.chunks > 0 ? Math.round(stats.totalBytes / stats.chunks) : 0;
+      self.postMessage({ type: 'result', result, stats });
 
     } catch (err) {
       gtars.fastaHasherFree(hasher);  // Cleanup on error
@@ -60,6 +103,16 @@ self.onmessage = async (e) => {
     }
 
   } catch (error) {
-    self.postMessage({ type: 'error', message: error.message || 'Processing failed' });
+    const msg = error.message || 'Processing failed';
+    let category = 'unknown';
+    if (msg.toLowerCase().includes('gzip') || msg.toLowerCase().includes('decompress') || msg.toLowerCase().includes('corrupt')) {
+      category = 'gzip';
+      self.postMessage({ type: 'error', message: `File appears corrupted or is not valid gzip: ${msg}`, category });
+    } else if (msg.toLowerCase().includes('stream') || msg.toLowerCase().includes('read')) {
+      category = 'stream';
+      self.postMessage({ type: 'error', message: `Error reading file: ${msg}`, category });
+    } else {
+      self.postMessage({ type: 'error', message: msg, category });
+    }
   }
 };

From 58f57990b006d75ed4e5877c3c7f8202e11cb360 Mon Sep 17 00:00:00 2001
From: nsheff <nsheff@users.noreply.github.com>
Date: Wed, 18 Feb 2026 12:47:23 -0500
Subject: [PATCH 04/31] fix cancel

---
 frontend/src/features/digest/DigestPage.jsx | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/frontend/src/features/digest/DigestPage.jsx b/frontend/src/features/digest/DigestPage.jsx
index 88ca6f8..0ea1f59 100644
--- a/frontend/src/features/digest/DigestPage.jsx
+++ b/frontend/src/features/digest/DigestPage.jsx
@@ -167,8 +167,12 @@ export default function DigestPage() {
 
   const handleCancel = () => {
     if (workerRef.current) {
-      workerRef.current.postMessage({ type: 'cancel' });
+      workerRef.current.terminate();
+      workerRef.current = null;
     }
+    setStatus(null);
+    setProgress(null);
+    setError('Processing cancelled.');
   };
 
   const handleClear = () => {

From eb2ce703daee6541a2810f323870aa0c68a11290 Mon Sep 17 00:00:00 2001
From: nsheff <nsheff@users.noreply.github.com>
Date: Wed, 18 Feb 2026 13:15:31 -0500
Subject: [PATCH 05/31] fix comparison links

---
 frontend/.env.production              |  1 +
 frontend/src/main.jsx                 |  1 +
 frontend/src/pages/ComparisonView.jsx | 54 +++++++++++++--------------
 3 files changed, 29 insertions(+), 27 deletions(-)
 create mode 100644 frontend/.env.production

diff --git a/frontend/.env.production b/frontend/.env.production
new file mode 100644
index 0000000..4c7c46f
--- /dev/null
+++ b/frontend/.env.production
@@ -0,0 +1 @@
+VITE_API_BASE=https://seqcolapi.databio.org
diff --git a/frontend/src/main.jsx b/frontend/src/main.jsx
index 2268aef..1ad13fe 100644
--- a/frontend/src/main.jsx
+++ b/frontend/src/main.jsx
@@ -351,6 +351,7 @@ const router = createBrowserRouter([
       {
         path: '/scim/:digest1/:digest2',
         element: <SCIM />,
+        errorElement: <ErrorBoundary />,
         loader: (request) => {
           return fetchComparison(
             request.params.digest1,
diff --git a/frontend/src/pages/ComparisonView.jsx b/frontend/src/pages/ComparisonView.jsx
index e2419b4..ddb1257 100644
--- a/frontend/src/pages/ComparisonView.jsx
+++ b/frontend/src/pages/ComparisonView.jsx
@@ -136,21 +136,21 @@ const SequencesReport = ({ messageArray }) => {
 // ✅❔❌❔
 const coordinateSystemInterpretation = (comparison) => {
   const lengthsANotB =
-    comparison.array_elements.a.lengths -
-    comparison.array_elements.a_and_b.lengths;
+    comparison.array_elements.a_count.lengths -
+    comparison.array_elements.a_and_b_count.lengths;
   const lengthsBNotA =
-    comparison.array_elements.b.lengths -
-    comparison.array_elements.a_and_b.lengths;
+    comparison.array_elements.b_count.lengths -
+    comparison.array_elements.a_and_b_count.lengths;
   const namesANotB =
-    comparison.array_elements.a.names - comparison.array_elements.a_and_b.names;
+    comparison.array_elements.a_count.names - comparison.array_elements.a_and_b_count.names;
   const namesBNotA =
-    comparison.array_elements.b.names - comparison.array_elements.a_and_b.names;
+    comparison.array_elements.b_count.names - comparison.array_elements.a_and_b_count.names;
   const nlpANotB =
-    comparison.array_elements.a.name_length_pairs -
-    comparison.array_elements.a_and_b.name_length_pairs;
+    comparison.array_elements.a_count.name_length_pairs -
+    comparison.array_elements.a_and_b_count.name_length_pairs;
   const nlpBNotA =
-    comparison.array_elements.b.name_length_pairs -
-    comparison.array_elements.a_and_b.name_length_pairs;
+    comparison.array_elements.b_count.name_length_pairs -
+    comparison.array_elements.a_and_b_count.name_length_pairs;
   const msgArray = [];
   // If the name_length_pairs match, then the coordinate systems are identical
   if (nlpANotB === 0 && nlpBNotA === 0) {
@@ -162,7 +162,7 @@ const coordinateSystemInterpretation = (comparison) => {
   } else if (nlpANotB > 0 && nlpBNotA === 0) {
     // If B nlp is a subset of A
     msgArray.push("Collection B's coordinate system is a subset of A's.");
-  } else if (comparison.array_elements.a_and_b.name_length_pairs !== 0) {
+  } else if (comparison.array_elements.a_and_b_count.name_length_pairs !== 0) {
     // If there is some overlap
     msgArray.push('The coordinate systems are partially overlapping.');
   } else {
@@ -230,22 +230,22 @@ const ComparisonView = ({ paramComparison }) => {
 
   // ✅❔❌
   const getInterpretation = (comparison, attribute) => {
-    const nSequencesA = comparison.array_elements.a[attribute];
-    const nSequencesB = comparison.array_elements.b[attribute];
+    const nSequencesA = comparison.array_elements.a_count[attribute];
+    const nSequencesB = comparison.array_elements.b_count[attribute];
     const aNotB =
-      comparison.array_elements.a[attribute] -
-      comparison.array_elements.a_and_b[attribute];
+      comparison.array_elements.a_count[attribute] -
+      comparison.array_elements.a_and_b_count[attribute];
     const bNotA =
-      comparison.array_elements.b[attribute] -
-      comparison.array_elements.a_and_b[attribute];
+      comparison.array_elements.b_count[attribute] -
+      comparison.array_elements.a_and_b_count[attribute];
     const orderCheck = comparison.array_elements.a_and_b_same_order[attribute];
 
     let interpTerm = '';
     const msgArray = [];
 
     if (
-      comparison.array_elements.a_and_b[attribute] == nSequencesA &&
-      comparison.array_elements.a_and_b[attribute] == nSequencesB
+      comparison.array_elements.a_and_b_count[attribute] == nSequencesA &&
+      comparison.array_elements.a_and_b_count[attribute] == nSequencesB
     ) {
       msgArray.push(`🟰 The ${attribute} contents are identical.`);
       if (orderCheck === true) {
@@ -256,8 +256,8 @@ const ComparisonView = ({ paramComparison }) => {
       interpTerm = 'identical_content';
     }
     if (
-      comparison.array_elements.a_and_b[attribute] == nSequencesA &&
-      comparison.array_elements.a_and_b[attribute] < nSequencesB
+      comparison.array_elements.a_and_b_count[attribute] == nSequencesA &&
+      comparison.array_elements.a_and_b_count[attribute] < nSequencesB
     ) {
       msgArray.push(
         `Collection B contains all ${nSequencesA} ${attribute} from collection A, and ${bNotA} additional.`,
@@ -265,20 +265,20 @@ const ComparisonView = ({ paramComparison }) => {
       interpTerm = 'subset';
     }
     if (
-      comparison.array_elements.a_and_b[attribute] == nSequencesB &&
-      comparison.array_elements.a_and_b[attribute] < nSequencesA
+      comparison.array_elements.a_and_b_count[attribute] == nSequencesB &&
+      comparison.array_elements.a_and_b_count[attribute] < nSequencesA
     ) {
       msgArray.push(
         `Collection A contains all ${nSequencesB} ${attribute} from collection B, and ${aNotB} additional.`,
       );
       interpTerm = 'subset';
     }
-    if (comparison.array_elements.a_and_b[attribute] === 0) {
+    if (comparison.array_elements.a_and_b_count[attribute] === 0) {
       msgArray.push(`The collections' ${attribute} contents are disjoint.`);
       interpTerm = 'disjoint';
     } else if (
-      comparison.array_elements.a_and_b[attribute] < nSequencesA &&
-      comparison.array_elements.a_and_b[attribute] < nSequencesB
+      comparison.array_elements.a_and_b_count[attribute] < nSequencesA &&
+      comparison.array_elements.a_and_b_count[attribute] < nSequencesB
     ) {
       msgArray.push(
         `The collections' ${attribute} contents are partially overlapping; some are shared, and some are unique to each collection.`,
@@ -375,7 +375,7 @@ const ComparisonView = ({ paramComparison }) => {
         <span className='fw-normal'>(number of elements found in both)</span>:
       </h6>
       <div className='row mb-3'>
-        {Object.entries(comparison.array_elements.a_and_b).map(
+        {Object.entries(comparison.array_elements.a_and_b_count).map(
           ([key, value]) => (
             <div className='d-flex' key={key}>
               <label className='col-sm-3 d-flex justify-content-end px-4 fw-medium'>

From f2c74b4c398ce52e22df9744106895daaa1263d7 Mon Sep 17 00:00:00 2001
From: nsheff <nsheff@users.noreply.github.com>
Date: Wed, 18 Feb 2026 13:22:47 -0500
Subject: [PATCH 06/31] improve error msg

---
 frontend/src/main.jsx               | 59 ++++++++++++++++++++++++++---
 frontend/src/services/fetchData.jsx | 13 ++++++-
 2 files changed, 66 insertions(+), 6 deletions(-)

diff --git a/frontend/src/main.jsx b/frontend/src/main.jsx
index 1ad13fe..00b2bbb 100644
--- a/frontend/src/main.jsx
+++ b/frontend/src/main.jsx
@@ -295,14 +295,63 @@ const CollectionTable = ({ collections }) => {
 function ErrorBoundary() {
   const error = useRouteError();
   console.error(error);
+
+  const isNetworkError =
+    error.message?.includes('Failed to fetch') ||
+    error.message?.includes('NetworkError');
+  const isNotFound = error.isNotFound || error.message?.includes('not found');
+
+  const CopyableDigest = ({ digest }) => (
+    <code
+      className='user-select-all bg-light px-2 py-1 rounded cursor-pointer'
+      style={{ fontSize: '0.85em' }}
+      onClick={() => copyToClipboard(digest)}
+      title='Click to copy'
+    >
+      {digest}
+    </code>
+  );
+
   return (
     <div className='alert alert-danger' role='alert'>
-      {error.message}
-      <br></br>
-      Is the API service operating correctly at{' '}
-      <a href={`${API_BASE}`}>{API_BASE}</a>?<br />
+      <strong>Error:</strong> {error.message}
+      <hr />
+      {isNetworkError ? (
+        <p>
+          Could not connect to the API at{' '}
+          <a href={`${API_BASE}`}>{API_BASE}</a>.
+          <br />
+          Make sure the server is running and accessible.
+        </p>
+      ) : isNotFound ? (
+        <div>
+          <p>
+            The requested collection(s) do not exist on this server.
+          </p>
+          {error.digest1 && error.digest2 && (
+            <div className='mb-3'>
+              <div className='mb-2'>
+                <strong>Digest A:</strong>{' '}
+                <CopyableDigest digest={error.digest1} />
+              </div>
+              <div>
+                <strong>Digest B:</strong>{' '}
+                <CopyableDigest digest={error.digest2} />
+              </div>
+            </div>
+          )}
+          <p className='mb-0'>
+            API: <a href={`${API_BASE}`}>{API_BASE}</a>
+          </p>
+        </div>
+      ) : (
+        <p>
+          Is the API service operating correctly at{' '}
+          <a href={`${API_BASE}`}>{API_BASE}</a>?
+        </p>
+      )}
       <button
-        className='btn btn-danger'
+        className='btn btn-danger mt-3'
         onClick={() => window.location.reload()}
       >
         Reload
diff --git a/frontend/src/services/fetchData.jsx b/frontend/src/services/fetchData.jsx
index 6ee01f9..9eac0b5 100644
--- a/frontend/src/services/fetchData.jsx
+++ b/frontend/src/services/fetchData.jsx
@@ -74,7 +74,18 @@ export const fetchCollectionLevels = async (digest) => {
 
 export const fetchComparison = async (digest1, digest2) => {
   const url = `${API_BASE}/comparison/${digest1}/${digest2}`;
-  return fetch(url).then((response) => response.json());
+  const response = await fetch(url);
+  if (!response.ok) {
+    if (response.status === 404) {
+      const err = new Error('Collection not found');
+      err.digest1 = digest1;
+      err.digest2 = digest2;
+      err.isNotFound = true;
+      throw err;
+    }
+    throw new Error(`Comparison failed: ${response.status} ${response.statusText}`);
+  }
+  return response.json();
 };
 
 export const fetchComparisonJSON = async (data, digest) => {

From 9c62b3b4cdd765a626c90bc704fb74fb0a577211 Mon Sep 17 00:00:00 2001
From: nsheff <nsheff@users.noreply.github.com>
Date: Thu, 19 Feb 2026 07:12:20 -0500
Subject: [PATCH 07/31] update CLI store commands

---
 refget/cli/store.py                   | 358 ++++++++++++++------------
 tests/test_cli/test_store_commands.py |  54 +++-
 2 files changed, 234 insertions(+), 178 deletions(-)

diff --git a/refget/cli/store.py b/refget/cli/store.py
index a8b4980..9334630 100644
--- a/refget/cli/store.py
+++ b/refget/cli/store.py
@@ -7,10 +7,10 @@
 Commands:
     init        - Initialize local store
     add         - Import FASTA to local store
-    list        - List collections in store
+    list        - List collections or sequences in store
+    get         - Get collection or sequence by digest
     pull        - Pull collection from remote
     export      - Export collection as FASTA
-    seq         - Get sequence/subsequence
     fai         - Generate .fai from digest
     chrom-sizes - Generate chrom.sizes from digest
     stats       - Store statistics
@@ -74,14 +74,14 @@ def _get_collection_digests(store) -> set:
     return {meta.digest for meta in store.list_collections()}
 
 
-def _load_store(path: Optional[Path], must_exist: bool = True, server: Optional[str] = None):
+def _load_store(path: Optional[Path], must_exist: bool = True, remote: Optional[str] = None):
     """
     Load a RefgetStore from local path or remote server.
 
     Args:
         path: Optional path override (uses config if None)
         must_exist: If True, error if store doesn't exist
-        server: Optional remote server URL (overrides path)
+        remote: Optional remote store URL (overrides path)
 
     Returns:
         RefgetStore instance
@@ -90,10 +90,10 @@ def _load_store(path: Optional[Path], must_exist: bool = True, server: Optional[
     from refget.store import RefgetStore
 
     # Remote store takes precedence
-    if server:
+    if remote:
         cache_path = _get_store_path(path) / ".remote_cache"
         cache_path.mkdir(parents=True, exist_ok=True)
-        return RefgetStore.open_remote(str(cache_path), server)
+        return RefgetStore.open_remote(str(cache_path), remote)
 
     store_path = _get_store_path(path)
 
@@ -234,40 +234,58 @@ def add(
 
 
 @app.command("list")
-def list_collections(
+def list_items(
+    sequences: bool = typer.Option(
+        False,
+        "--sequences",
+        "-s",
+        help="List sequences instead of collections",
+    ),
     path: Optional[Path] = typer.Option(
         None,
         "--path",
         "-p",
         help="Store path (default: from config)",
     ),
-    server: Optional[str] = typer.Option(
+    remote: Optional[str] = typer.Option(
         None,
-        "--server",
-        "-s",
+        "--remote",
+        "-r",
         help="Remote store URL (overrides --path)",
     ),
 ) -> None:
     """
-    List collections in the store.
+    List collections or sequences in the store.
 
-    Outputs JSON: {"collections": [{"digest": "...", "sequences": N}, ...]}
+    By default, lists collections. Use --sequences to list individual sequences.
+
+    Outputs JSON:
+        Collections: {"collections": [{"digest": "..."}, ...]}
+        Sequences:   {"sequences": [{"digest": "...", "name": "...", "length": N}, ...]}
     """
-    store = _load_store(path, server=server)
+    store = _load_store(path, remote=remote)
 
-    collections = []
-    for meta in store.list_collections():
-        collections.append(
-            {
-                "digest": meta.digest,
-            }
-        )
+    if sequences:
+        items = []
+        for meta in store.list_sequences():
+            items.append(
+                {
+                    "digest": meta.sha512t24u,
+                    "name": meta.name,
+                    "length": meta.length,
+                }
+            )
+        print_json({"sequences": items})
+    else:
+        collections = []
+        for meta in store.list_collections():
+            collections.append(
+                {
+                    "digest": meta.digest,
+                }
+            )
+        print_json({"collections": collections})
 
-    print_json(
-        {
-            "collections": collections,
-        }
-    )
     raise typer.Exit(EXIT_SUCCESS)
 
 
@@ -275,7 +293,29 @@ def list_collections(
 def get(
     digest: str = typer.Argument(
         ...,
-        help="Collection digest to retrieve",
+        help="Collection or sequence digest",
+    ),
+    sequence: bool = typer.Option(
+        False,
+        "--sequence",
+        "-s",
+        help="Get sequence instead of collection",
+    ),
+    name: Optional[str] = typer.Option(
+        None,
+        "--name",
+        "-n",
+        help="Sequence name (when getting sequence from collection)",
+    ),
+    start: Optional[int] = typer.Option(
+        None,
+        "--start",
+        help="Start position for subsequence (0-based, inclusive)",
+    ),
+    end: Optional[int] = typer.Option(
+        None,
+        "--end",
+        help="End position for subsequence (0-based, exclusive)",
     ),
     path: Optional[Path] = typer.Option(
         None,
@@ -283,72 +323,128 @@ def get(
         "-p",
         help="Store path (default: from config)",
     ),
-    server: Optional[str] = typer.Option(
+    remote: Optional[str] = typer.Option(
         None,
-        "--server",
-        "-s",
+        "--remote",
+        "-r",
         help="Remote store URL (overrides --path)",
     ),
 ) -> None:
     """
-    Get a collection by digest.
+    Get a collection or sequence by digest.
 
-    Returns the full sequence collection with names, lengths, and sequences.
-
-    Outputs JSON: {"names": [...], "lengths": [...], "sequences": [...]}
-    """
-    store = _load_store(path, server=server)
+    By default, returns the full sequence collection with names, lengths, and sequences.
+    Use --sequence to get a sequence instead.
 
-    # Check if collection exists
-    if digest not in _get_collection_digests(store):
-        print_error(f"Collection not found: {digest}", EXIT_FAILURE)
-        return  # Unreachable, but clarifies control flow
+    Examples:
+        refget store get <coll_digest>                     # Get collection
+        refget store get <seq_digest> -s                   # Get sequence
+        refget store get <coll_digest> -s -n chr1          # Sequence by name
+        refget store get <seq_digest> -s --start 0 --end 100  # Subsequence
 
-    # Ensure collection is loaded
-    _ensure_collection_loaded(store, digest)
+    Outputs JSON for collections: {"names": [...], "lengths": [...], "sequences": [...]}
+    Outputs raw sequence text for sequences.
+    """
+    store = _load_store(path, remote=remote)
 
-    # Get collection data
-    names = []
-    lengths = []
-    sequences = []
+    if sequence:
+        # Sequence retrieval mode
+        seq_data = None
 
-    for coll in store.iter_collections():
-        if coll.digest == digest:
-            for seq in coll.sequences:
-                m = seq.metadata
-                names.append(m.name)
-                lengths.append(m.length)
-                sequences.append("SQ." + m.sha512t24u)
-            break
+        if name is not None:
+            # Get sequence by collection + name
+            try:
+                record = store.get_sequence_by_name(digest, name)
+            except KeyError as e:
+                print_error(str(e), EXIT_FAILURE)
+                return
+
+            if start is not None and end is not None:
+                # Get substring using the sequence digest
+                try:
+                    seq_data = store.get_substring(record.metadata.sha512t24u, start, end)
+                except KeyError as e:
+                    print_error(str(e), EXIT_FAILURE)
+                    return
+            elif start is not None or end is not None:
+                print_error("Both --start and --end must be provided for substring", EXIT_FAILURE)
+                return
+            else:
+                seq_data = record.decode()
+        else:
+            # Direct sequence lookup by digest
+            if start is not None and end is not None:
+                try:
+                    seq_data = store.get_substring(digest, start, end)
+                except KeyError as e:
+                    print_error(str(e), EXIT_FAILURE)
+                    return
+            elif start is not None or end is not None:
+                print_error("Both --start and --end must be provided for substring", EXIT_FAILURE)
+                return
+            else:
+                try:
+                    record = store.get_sequence(digest)
+                    seq_data = record.decode()
+                except KeyError as e:
+                    print_error(str(e), EXIT_FAILURE)
+                    return
+
+        # Output raw sequence to stdout
+        print(seq_data)
+    else:
+        # Collection retrieval mode (default)
+        # Check if collection exists
+        if digest not in _get_collection_digests(store):
+            print_error(f"Collection not found: {digest}", EXIT_FAILURE)
+            return
+
+        # Ensure collection is loaded
+        _ensure_collection_loaded(store, digest)
+
+        # Get collection data
+        names = []
+        lengths = []
+        sequences = []
+
+        for coll in store.iter_collections():
+            if coll.digest == digest:
+                for seq in coll.sequences:
+                    m = seq.metadata
+                    names.append(m.name)
+                    lengths.append(m.length)
+                    sequences.append("SQ." + m.sha512t24u)
+                break
+
+        if not names:
+            print_error(f"Collection not found: {digest}", EXIT_FAILURE)
+            return
 
-    if not names:
-        print_error(f"Collection not found: {digest}", EXIT_FAILURE)
-        return  # Unreachable, but clarifies control flow
+        print_json(
+            {
+                "names": names,
+                "lengths": lengths,
+                "sequences": sequences,
+            }
+        )
 
-    print_json(
-        {
-            "names": names,
-            "lengths": lengths,
-            "sequences": sequences,
-        }
-    )
     raise typer.Exit(EXIT_SUCCESS)
 
 
-def _find_remote_urls(server_override: Optional[str] = None) -> List[str]:
+def _find_remote_urls(remote_override: Optional[str] = None) -> List[str]:
     """
     Find remote RefgetStore URLs to try.
 
     Resolution order:
-        1. --server flag (direct RefgetStore URL)
+        1. --remote flag (direct RefgetStore URL)
         2. Configured remote_stores
         3. Configured seqcol_servers (discover RefgetStore via service-info)
 
     Returns:
         List of remote store URLs to try, in priority order.
     """
-    if server_override:
-        return [server_override]
+    if remote_override:
+        return [remote_override]
 
     urls: List[str] = []
 
@@ -390,10 +486,10 @@ def pull(
         "-p",
         help="Store path (default: from config)",
     ),
-    server: Optional[str] = typer.Option(
+    remote: Optional[str] = typer.Option(
         None,
-        "--server",
-        "-s",
+        "--remote",
+        "-r",
         help="Remote store URL (default: try configured remote_stores)",
     ),
     eager: bool = typer.Option(
@@ -416,7 +512,7 @@ def pull(
     downloaded on-demand when accessed (lazy loading). Use --eager to
     pre-fetch all sequences.
 
-    Resolution order (if --server not specified):
+    Resolution order (if --remote not specified):
         1. Check local store (already cached?)
         2. Try configured remote_stores in priority order
         3. Try seqcol_servers (discover RefgetStore via service-info)
@@ -424,9 +520,9 @@ def pull(
     Use --file for batch operations with multiple digests.
 
     Examples:
-        refget store pull ABC123 --server https://example.com/store
+        refget store pull ABC123 --remote https://example.com/store
         refget store pull ABC123 --eager  # Pre-fetch all sequences
-        refget store pull --file digests.txt --server https://example.com/store
+        refget store pull --file digests.txt --remote https://example.com/store
     """
     check_dependency("gtars", "store", "store")
     from refget.store import RefgetStore
@@ -450,11 +546,11 @@ def pull(
         print_error("No digests to pull", EXIT_FAILURE)
 
     # Determine remote URLs to try
-    remote_urls = _find_remote_urls(server)
+    remote_urls = _find_remote_urls(remote)
 
     if not remote_urls:
         print_error(
-            "No remote store found. Use --server or configure remote_stores:\n"
+            "No remote store found. Use --remote or configure remote_stores:\n"
             "  refget config add remote_store https://example.com/store",
             EXIT_FAILURE,
         )
@@ -575,10 +671,10 @@ def export(
         "-p",
         help="Store path (default: from config)",
     ),
-    server: Optional[str] = typer.Option(
+    remote: Optional[str] = typer.Option(
         None,
-        "--server",
-        "-s",
+        "--remote",
+        "-r",
         help="Remote store URL (overrides --path)",
     ),
     line_width: int = typer.Option(
@@ -598,7 +694,7 @@ def export(
 
     If no output file is specified, exports to stdout.
     """
-    store = _load_store(path, server=server)
+    store = _load_store(path, remote=remote)
 
     # Ensure collection is loaded (required for export)
     _ensure_collection_loaded(store, digest)
@@ -634,86 +730,6 @@ def _do_export(output_path: str) -> None:
     raise typer.Exit(EXIT_SUCCESS)
 
 
-@app.command()
-def seq(
-    digest: str = typer.Argument(
-        ...,
-        help="Sequence digest or collection digest",
-    ),
-    name: Optional[str] = typer.Option(
-        None,
-        "--name",
-        "-n",
-        help="Sequence name (when using collection digest)",
-    ),
-    start: Optional[int] = typer.Option(
-        None,
-        "--start",
-        "-s",
-        help="Start position (0-based, inclusive)",
-    ),
-    end: Optional[int] = typer.Option(
-        None,
-        "--end",
-        "-e",
-        help="End position (0-based, exclusive)",
-    ),
-    path: Optional[Path] = typer.Option(
-        None,
-        "--path",
-        "-p",
-        help="Store path (default: from config)",
-    ),
-    server: Optional[str] = typer.Option(
-        None,
-        "--server",
-        help="Remote store URL (overrides --path)",
-    ),
-) -> None:
-    """
-    Get a sequence or subsequence.
-
-    Examples:
-        refget store seq <seq_digest>                          # Full sequence
-        refget store seq <seq_digest> --start 100 --end 200    # Subsequence
-        refget store seq <coll_digest> --name chr1             # By name
-        refget store seq <coll_digest> --name chr1 -s 100 -e 200
-    """
-    store = _load_store(path, server=server)
-
-    sequence = None
-
-    if name is not None:
-        # Get sequence by collection + name
-        record = store.get_sequence_by_name(digest, name)
-        if record is None:
-            print_error(f"Sequence '{name}' not found in collection {digest}", EXIT_FAILURE)
-        if start is not None and end is not None:
-            # Get substring using the sequence digest
-            sequence = store.get_substring(record.metadata.sha512t24u, start, end)
-        elif start is not None or end is not None:
-            print_error("Both --start and --end must be provided for substring", EXIT_FAILURE)
-        else:
-            # Use decode() to get the sequence string (handles encoded mode)
-            sequence = record.decode()
-    else:
-        # Direct sequence lookup by digest
-        if start is not None and end is not None:
-            sequence = store.get_substring(digest, start, end)
-        elif start is not None or end is not None:
-            print_error("Both --start and --end must be provided for substring", EXIT_FAILURE)
-        else:
-            record = store.get_sequence(digest)
-            if record is None:
-                print_error(f"Sequence not found: {digest}", EXIT_FAILURE)
-            # Use decode() to get the sequence string (handles encoded mode)
-            sequence = record.decode()
-
-    # Output raw sequence to stdout
-    print(sequence)
-    raise typer.Exit(EXIT_SUCCESS)
-
-
 @app.command()
 def fai(
     digest: str = typer.Argument(
@@ -732,10 +748,10 @@ def fai(
         "-p",
         help="Store path (default: from config)",
     ),
-    server: Optional[str] = typer.Option(
+    remote: Optional[str] = typer.Option(
         None,
-        "--server",
-        "-s",
+        "--remote",
+        "-r",
         help="Remote store URL (overrides --path)",
     ),
 ) -> None:
@@ -747,7 +763,7 @@ def fai(
     Note: Byte offset columns will be placeholder values since the collection
     may not correspond to any specific FASTA file layout.
     """
-    store = _load_store(path, server=server)
+    store = _load_store(path, remote=remote)
 
     # Ensure collection is loaded
     _ensure_collection_loaded(store, digest)
@@ -798,10 +814,10 @@ def chrom_sizes(
         "-p",
         help="Store path (default: from config)",
     ),
-    server: Optional[str] = typer.Option(
+    remote: Optional[str] = typer.Option(
         None,
-        "--server",
-        "-s",
+        "--remote",
+        "-r",
         help="Remote store URL (overrides --path)",
     ),
 ) -> None:
@@ -810,7 +826,7 @@ def chrom_sizes(
 
     Outputs UCSC-compatible chrom.sizes format (tab-separated name/length).
     """
-    store = _load_store(path, server=server)
+    store = _load_store(path, remote=remote)
 
     # Ensure collection is loaded
     _ensure_collection_loaded(store, digest)
@@ -848,10 +864,10 @@ def stats(
         "-p",
         help="Store path (default: from config)",
     ),
-    server: Optional[str] = typer.Option(
+    remote: Optional[str] = typer.Option(
         None,
-        "--server",
-        "-s",
+        "--remote",
+        "-r",
         help="Remote store URL (overrides --path)",
     ),
 ) -> None:
@@ -863,7 +879,7 @@ def stats(
     Example output:
         {"collections": 3, "sequences": 75, "storage_mode": "Encoded"}
     """
-    store = _load_store(path, server=server)
+    store = _load_store(path, remote=remote)
 
     stats_obj = store.stats()
 
diff --git a/tests/test_cli/test_store_commands.py b/tests/test_cli/test_store_commands.py
index b77a9da..a1a4522 100644
--- a/tests/test_cli/test_store_commands.py
+++ b/tests/test_cli/test_store_commands.py
@@ -278,25 +278,27 @@ def test_export_nonexistent_digest(self, cli, tmp_path):
         assert result.exit_code != 0
 
 
-class TestStoreSeq:
-    """Tests for: refget store seq <digest>"""
+class TestStoreGetSequence:
+    """Tests for: refget store get <digest> --sequence"""
 
     def test_gets_sequence_by_name(self, cli, tmp_path):
-        """Gets sequence by name."""
+        """Gets sequence by name using get -s."""
         store_path = tmp_path / "store"
 
         cli("store", "init", "--path", str(store_path))
         add_result = cli("store", "add", str(BASE_FASTA), "--path", str(store_path))
         digest = json.loads(add_result.stdout)["digest"]
 
-        result = cli("store", "seq", digest, "--name", "chr1", "--path", str(store_path))
+        result = cli(
+            "store", "get", digest, "-s", "--name", "chr1", "--path", str(store_path)
+        )
 
         assert result.exit_code == 0
         # Output should be sequence (GGAA for chr1 in base.fa)
         assert len(result.stdout.strip()) > 0
 
     def test_substring(self, cli, tmp_path):
-        """Gets subsequence with range."""
+        """Gets subsequence with range using get -s."""
         store_path = tmp_path / "store"
 
         cli("store", "init", "--path", str(store_path))
@@ -305,8 +307,9 @@ def test_substring(self, cli, tmp_path):
 
         result = cli(
             "store",
-            "seq",
+            "get",
             digest,
+            "-s",
             "--name",
             "chrX",
             "--start",
@@ -330,12 +333,49 @@ def test_seq_nonexistent_name(self, cli, tmp_path):
         digest = json.loads(add_result.stdout)["digest"]
 
         result = cli(
-            "store", "seq", digest, "--name", "nonexistent_chr", "--path", str(store_path)
+            "store",
+            "get",
+            digest,
+            "-s",
+            "--name",
+            "nonexistent_chr",
+            "--path",
+            str(store_path),
         )
 
         assert result.exit_code != 0
 
 
+class TestStoreListSequences:
+    """Tests for: refget store list --sequences"""
+
+    def test_list_sequences(self, cli, tmp_path):
+        """Lists sequences with -s flag."""
+        store_path = tmp_path / "store"
+        cli("store", "init", "--path", str(store_path))
+        cli("store", "add", str(BASE_FASTA), "--path", str(store_path))
+
+        result = cli("store", "list", "-s", "--path", str(store_path))
+
+        data = assert_json_output(result, ["sequences"])
+        assert len(data["sequences"]) >= 1
+        # Each sequence should have digest, name, length
+        for seq in data["sequences"]:
+            assert "digest" in seq
+            assert "name" in seq
+            assert "length" in seq
+
+    def test_list_sequences_empty_store(self, cli, tmp_path):
+        """Lists sequences in empty store."""
+        store_path = tmp_path / "store"
+        cli("store", "init", "--path", str(store_path))
+
+        result = cli("store", "list", "-s", "--path", str(store_path))
+
+        data = assert_json_output(result, ["sequences"])
+        assert data["sequences"] == []
+
+
 class TestStoreStats:
     """Tests for: refget store stats"""
 

From 38f290d8c392a749434845e2de17a00ac07e2902 Mon Sep 17 00:00:00 2001
From: nsheff <nsheff@users.noreply.github.com>
Date: Thu, 19 Feb 2026 07:12:36 -0500
Subject: [PATCH 08/31] first draft of refget-r

---
 refget-r/.Rbuildignore                      |   6 +
 refget-r/DESCRIPTION                        |  35 +++
 refget-r/LICENSE                            |  21 ++
 refget-r/NAMESPACE                          |  41 +++
 refget-r/R/RefgetGenome-class.R             | 270 ++++++++++++++++++++
 refget-r/R/bulk-extraction.R                | 181 +++++++++++++
 refget-r/R/conversion.R                     |  42 +++
 refget-r/R/getSeq-methods.R                 | 152 +++++++++++
 refget-r/R/utils.R                          |  29 +++
 refget-r/R/zzz.R                            |  13 +
 refget-r/README.md                          | 120 +++++++++
 refget-r/tests/testthat.R                   |   5 +
 refget-r/tests/testthat/test-RefgetGenome.R | 124 +++++++++
 refget-r/tests/testthat/test-bulk.R         | 134 ++++++++++
 refget-r/tests/testthat/test-conversion.R   |  30 +++
 refget-r/tests/testthat/test-getSeq.R       | 156 +++++++++++
 refget-r/tests/testthat/test-remote.R       |  39 +++
 17 files changed, 1398 insertions(+)
 create mode 100644 refget-r/.Rbuildignore
 create mode 100644 refget-r/DESCRIPTION
 create mode 100644 refget-r/LICENSE
 create mode 100644 refget-r/NAMESPACE
 create mode 100644 refget-r/R/RefgetGenome-class.R
 create mode 100644 refget-r/R/bulk-extraction.R
 create mode 100644 refget-r/R/conversion.R
 create mode 100644 refget-r/R/getSeq-methods.R
 create mode 100644 refget-r/R/utils.R
 create mode 100644 refget-r/R/zzz.R
 create mode 100644 refget-r/README.md
 create mode 100644 refget-r/tests/testthat.R
 create mode 100644 refget-r/tests/testthat/test-RefgetGenome.R
 create mode 100644 refget-r/tests/testthat/test-bulk.R
 create mode 100644 refget-r/tests/testthat/test-conversion.R
 create mode 100644 refget-r/tests/testthat/test-getSeq.R
 create mode 100644 refget-r/tests/testthat/test-remote.R

diff --git a/refget-r/.Rbuildignore b/refget-r/.Rbuildignore
new file mode 100644
index 0000000..3c3871e
--- /dev/null
+++ b/refget-r/.Rbuildignore
@@ -0,0 +1,6 @@
+^\.Rproj\.user$
+^.*\.Rproj$
+^\.git$
+^\.gitignore$
+^README\.Rmd$
+^LICENSE\.md$
diff --git a/refget-r/DESCRIPTION b/refget-r/DESCRIPTION
new file mode 100644
index 0000000..f0aed17
--- /dev/null
+++ b/refget-r/DESCRIPTION
@@ -0,0 +1,35 @@
+Package: BiocRefgetStore
+Title: BSgenome-Compatible Interface to RefgetStore
+Version: 0.1.0
+Authors@R:
+    c(person("Nathan", "Sheffield", , "nathan@databio.org", role = c("aut", "cre"),
+             comment = c(ORCID = "0000-0001-5643-4068")),
+      person("Sheffield Lab", role = "fnd"))
+Description: Provides a BSgenome-compatible interface for accessing genomic
+    sequences stored in RefgetStore format. Enables chromosome-name-based
+    sequence access (e.g., getSeq(genome, "chr1", 1000, 2000)) using
+    RefgetStore as the backend, with optional conversion to Biostrings
+    DNAString/DNAStringSet objects.
+License: MIT + file LICENSE
+Encoding: UTF-8
+Roxygen: list(markdown = TRUE)
+RoxygenNote: 7.3.2
+Depends:
+    R (>= 4.0),
+    methods
+Imports:
+    gtars,
+    GenomeInfoDb
+Suggests:
+    Biostrings,
+    GenomicRanges,
+    testthat (>= 3.0.0)
+biocViews: Infrastructure, DataRepresentation, Sequencing
+Config/testthat/edition: 3
+Collate:
+    'utils.R'
+    'RefgetGenome-class.R'
+    'getSeq-methods.R'
+    'conversion.R'
+    'bulk-extraction.R'
+    'zzz.R'
diff --git a/refget-r/LICENSE b/refget-r/LICENSE
new file mode 100644
index 0000000..5c6e794
--- /dev/null
+++ b/refget-r/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2026 Sheffield Lab
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/refget-r/NAMESPACE b/refget-r/NAMESPACE
new file mode 100644
index 0000000..349f1c9
--- /dev/null
+++ b/refget-r/NAMESPACE
@@ -0,0 +1,41 @@
+# Generated by roxygen2: do not edit by hand
+
+# Exports - Constructors
+export(RefgetGenome)
+export(RefgetGenome.from_directory)
+export(RefgetGenome.from_fasta)
+export(RefgetGenome.from_remote)
+
+# Exports - Accessors
+export(collection_digest)
+export(coordinate_system)
+export(sequence_digests)
+export(store)
+
+# Exports - Bulk extraction
+export(extractRegions)
+export(extractToFasta)
+export(exportChromosomes)
+
+# Exports - Conversion
+export(as_DNAString)
+export(as_DNAStringSet)
+
+# S4 methods
+exportMethods(getSeq)
+exportMethods(seqinfo)
+exportMethods(seqnames)
+exportMethods(seqlengths)
+exportMethods(length)
+exportMethods(names)
+exportMethods("[[")
+exportMethods(show)
+
+# S4 classes
+exportClasses(RefgetGenome)
+
+# Imports
+import(methods)
+importFrom(GenomeInfoDb, Seqinfo)
+importFrom(GenomeInfoDb, seqnames)
+importFrom(GenomeInfoDb, seqlengths)
diff --git a/refget-r/R/RefgetGenome-class.R b/refget-r/R/RefgetGenome-class.R
new file mode 100644
index 0000000..3118f24
--- /dev/null
+++ b/refget-r/R/RefgetGenome-class.R
@@ -0,0 +1,270 @@
+#' RefgetGenome Class
+#'
+#' A BSgenome-compatible wrapper around a RefgetStore collection that provides
+#' chromosome-name-based sequence access.
+#'
+#' @slot store A gtars RefgetStore object
+#' @slot collection_digest Character string containing the seqcol digest
+#' @slot seqinfo A Seqinfo object with sequence metadata
+#'
+#' @exportClass RefgetGenome
+setClass(
+  "RefgetGenome",
+  slots = list(
+    store = "ANY",  # gtars RefgetStore
+    collection_digest = "character",
+    seqinfo = "ANY"  # GenomeInfoDb::Seqinfo
+  )
+)
+
+#' Create a RefgetGenome object
+#'
+#' Creates a BSgenome-compatible genome object backed by a RefgetStore.
+#'
+#' @param store A gtars RefgetStore object
+#' @param digest Optional. The collection digest to use.
+#' @param namespace Optional. Alias namespace (e.g., "refseq", "genbank")
+#' @param alias Optional. Alias name (e.g., "hg38", "GRCh38")
+#'
+#' @return A RefgetGenome object
+#'
+#' @details
+#' You must provide either `digest` or both `namespace` and `alias`.
+#' If using aliases, the function will resolve the alias to a digest using
+#' the store's alias system.
+#'
+#' @examples
+#' \dontrun{
+#' # Load by digest
+#' store <- gtars::refget_store_open_local("/path/to/store")
+#' genome <- RefgetGenome(store, digest = "abc123...")
+#'
+#' # Load by alias
+#' genome <- RefgetGenome(store, namespace = "refseq", alias = "GRCh38")
+#' }
+#'
+#' @export
+RefgetGenome <- function(store, digest = NULL, namespace = NULL, alias = NULL) {
+  # Validate inputs
+  if (is.null(digest) && (is.null(namespace) || is.null(alias))) {
+    stop("Must provide either 'digest' or both 'namespace' and 'alias'")
+  }
+
+  # Resolve alias to digest if needed
+  if (is.null(digest)) {
+    collection_meta <- gtars::get_collection_by_alias(store, namespace, alias)
+    if (is.null(collection_meta)) {
+      stop(sprintf("No collection found for alias '%s/%s'", namespace, alias))
+    }
+    digest <- collection_meta@digest
+  }
+
+  # Get level2 data for building Seqinfo
+  level2 <- gtars::get_level2(store, digest)
+  if (is.null(level2)) {
+    stop(sprintf("Collection '%s' not found in store", digest))
+  }
+
+  # Build Seqinfo from level2 data
+  seqinfo <- GenomeInfoDb::Seqinfo(
+    seqnames = level2$names,
+    seqlengths = as.integer(level2$lengths)
+  )
+
+  new("RefgetGenome",
+      store = store,
+      collection_digest = digest,
+      seqinfo = seqinfo)
+}
+
+#' Create RefgetGenome from a directory
+#'
+#' Convenience constructor that loads a RefgetStore from a directory.
+#'
+#' @param path Path to the RefgetStore directory
+#' @param digest The collection digest to use
+#' @param namespace Optional alias namespace
+#' @param alias Optional alias name
+#'
+#' @return A RefgetGenome object
+#'
+#' @examples
+#' \dontrun{
+#' genome <- RefgetGenome.from_directory("/path/to/store", digest = "abc123...")
+#' }
+#'
+#' @export
+RefgetGenome.from_directory <- function(path, digest = NULL, namespace = NULL, alias = NULL) {
+  store <- gtars::refget_store_open_local(path)
+  RefgetGenome(store, digest = digest, namespace = namespace, alias = alias)
+}
+
+#' Create RefgetGenome from a FASTA file
+#'
+#' Creates an in-memory RefgetStore from a FASTA file and returns a RefgetGenome.
+#'
+#' @param fasta_path Path to a FASTA file
+#'
+#' @return A RefgetGenome object
+#'
+#' @examples
+#' \dontrun{
+#' genome <- RefgetGenome.from_fasta("/path/to/genome.fa")
+#' }
+#'
+#' @export
+RefgetGenome.from_fasta <- function(fasta_path) {
+  store <- gtars::refget_store()
+  digest <- gtars::add_fasta(store, fasta_path)
+  RefgetGenome(store, digest = digest)
+}
+
+#' Create RefgetGenome from a remote store
+#'
+#' Creates a RefgetGenome backed by a remote RefgetStore with local caching.
+#'
+#' @param cache_path Local directory for caching downloaded data
+#' @param remote_url URL of the remote RefgetStore
+#' @param digest The collection digest to use
+#' @param namespace Optional alias namespace
+#' @param alias Optional alias name
+#'
+#' @return A RefgetGenome object
+#'
+#' @examples
+#' \dontrun{
+#' genome <- RefgetGenome.from_remote(
+#'   cache_path = "~/.cache/refget",
+#'   remote_url = "https://refget.databio.org/store",
+#'   namespace = "refseq",
+#'   alias = "GRCh38"
+#' )
+#' }
+#'
+#' @export
+RefgetGenome.from_remote <- function(cache_path, remote_url, digest = NULL, namespace = NULL, alias = NULL) {
+  store <- gtars::refget_store_open_remote(cache_path, remote_url)
+  RefgetGenome(store, digest = digest, namespace = namespace, alias = alias)
+}
+
+# =============================================================================
+# Accessor Methods
+# =============================================================================
+
+#' Get the collection digest
+#'
+#' @param genome A RefgetGenome object
+#' @return The seqcol digest string
+#' @export
+collection_digest <- function(genome) {
+  genome@collection_digest
+}
+
+#' Get the coordinate system digest
+#'
+#' Returns the sorted_name_length_pairs digest which identifies the coordinate
+#' system. Two genomes with the same coordinate_system() are compatible for
+#' coordinate-based operations.
+#'
+#' @param genome A RefgetGenome object
+#' @return The sorted_name_length_pairs digest string
+#' @export
+coordinate_system <- function(genome) {
+  meta <- gtars::get_collection_metadata(genome@store, genome@collection_digest)
+  meta@sorted_name_length_pairs_digest
+}
+
+#' Get the underlying RefgetStore
+#'
+#' @param genome A RefgetGenome object
+#' @return The gtars RefgetStore object
+#' @export
+store <- function(genome) {
+  genome@store
+}
+
+#' Get per-sequence SHA512t24u digests
+#'
+#' @param genome A RefgetGenome object
+#' @return Named character vector of sequence digests
+#' @export
+sequence_digests <- function(genome) {
+  level2 <- gtars::get_level2(genome@store, genome@collection_digest)
+  digests <- level2$sequences
+  names(digests) <- level2$names
+  digests
+}
+
+# Standard BSgenome-like accessors via S4 methods
+
+#' @rdname RefgetGenome-class
+#' @export
+setMethod("seqinfo", "RefgetGenome", function(x) {
+  x@seqinfo
+})
+
+#' @rdname RefgetGenome-class
+#' @export
+setMethod("seqnames", "RefgetGenome", function(x) {
+  GenomeInfoDb::seqnames(x@seqinfo)
+})
+
+#' @rdname RefgetGenome-class
+#' @export
+setMethod("seqlengths", "RefgetGenome", function(x) {
+  GenomeInfoDb::seqlengths(x@seqinfo)
+})
+
+#' @rdname RefgetGenome-class
+#' @export
+setMethod("length", "RefgetGenome", function(x) {
+  length(GenomeInfoDb::seqnames(x@seqinfo))
+})
+
+#' @rdname RefgetGenome-class
+#' @export
+setMethod("names", "RefgetGenome", function(x) {
+  as.character(GenomeInfoDb::seqnames(x@seqinfo))
+})
+
+#' Extract a full sequence by name
+#'
+#' @param x A RefgetGenome object
+#' @param i Sequence name (e.g., "chr1")
+#' @return Sequence string or DNAString if Biostrings is available
+#' @rdname RefgetGenome-class
+#' @export
+setMethod("[[", c("RefgetGenome", "character"), function(x, i) {
+  record <- gtars::get_sequence_by_name(x@store, x@collection_digest, i)
+  if (is.null(record)) {
+    stop(sprintf("Sequence '%s' not found in collection", i))
+  }
+  seq_string <- record@data
+
+  # Convert to DNAString if Biostrings is available
+  if (requireNamespace("Biostrings", quietly = TRUE)) {
+    return(Biostrings::DNAString(seq_string))
+  }
+  seq_string
+})
+
+#' Show method for RefgetGenome
+#'
+#' @param object A RefgetGenome object
+#' @rdname RefgetGenome-class
+#' @export
+setMethod("show", "RefgetGenome", function(object) {
+  n_seqs <- length(object)
+  cat(sprintf("RefgetGenome with %d sequences\n", n_seqs))
+  cat(sprintf("  collection_digest: %s\n", object@collection_digest))
+
+  # Show first few sequence names
+  seq_names <- names(object)
+  if (length(seq_names) > 5) {
+    cat(sprintf("  seqnames: %s ... (%d more)\n",
+                paste(seq_names[1:5], collapse = ", "),
+                length(seq_names) - 5))
+  } else {
+    cat(sprintf("  seqnames: %s\n", paste(seq_names, collapse = ", ")))
+  }
+})
diff --git a/refget-r/R/bulk-extraction.R b/refget-r/R/bulk-extraction.R
new file mode 100644
index 0000000..7186414
--- /dev/null
+++ b/refget-r/R/bulk-extraction.R
@@ -0,0 +1,181 @@
+#' Extract regions from a RefgetGenome
+#'
+#' Efficiently extract multiple genomic regions using BED-based extraction.
+#'
+#' @param genome A RefgetGenome object
+#' @param regions A GRanges object or data.frame with columns: chrom, start, end
+#' @param as.character If TRUE, return character vector instead of DNAStringSet
+#'
+#' @return DNAStringSet or character vector of extracted sequences
+#'
+#' @examples
+#' \dontrun{
+#' genome <- RefgetGenome.from_fasta("genome.fa")
+#'
+#' # From GRanges
+#' library(GenomicRanges)
+#' regions <- GRanges(c("chr1:1000-2000", "chr2:5000-6000"))
+#' seqs <- extractRegions(genome, regions)
+#'
+#' # From data.frame
+#' regions_df <- data.frame(
+#'   chrom = c("chr1", "chr2"),
+#'   start = c(1000, 5000),
+#'   end = c(2000, 6000)
+#' )
+#' seqs <- extractRegions(genome, regions_df)
+#' }
+#'
+#' @export
+extractRegions <- function(genome, regions, as.character = FALSE) {
+  # Convert GRanges to data.frame
+  if (inherits(regions, "GRanges")) {
+    if (!requireNamespace("GenomicRanges", quietly = TRUE)) {
+      stop("GenomicRanges package required for GRanges input")
+    }
+    regions <- data.frame(
+      chrom = as.character(GenomicRanges::seqnames(regions)),
+      start = GenomicRanges::start(regions),
+      end = GenomicRanges::end(regions),
+      stringsAsFactors = FALSE
+    )
+  }
+
+  # Validate columns
+  required_cols <- c("chrom", "start", "end")
+  if (!all(required_cols %in% names(regions))) {
+    stop("regions must have columns: chrom, start, end")
+  }
+
+  # Write temp BED file (0-based coordinates for BED)
+  bed_file <- tempfile(fileext = ".bed")
+  on.exit(unlink(bed_file), add = TRUE)
+
+  bed_df <- data.frame(
+    chrom = regions$chrom,
+    start = as.integer(regions$start - 1),  # Convert to 0-based
+    end = as.integer(regions$end)
+  )
+  write.table(bed_df, bed_file, sep = "\t", row.names = FALSE,
+              col.names = FALSE, quote = FALSE)
+
+  # Use gtars BED extraction
+  retrieved <- gtars::get_seqs_bed_file_to_vec(
+    genome@store,
+    genome@collection_digest,
+    bed_file
+  )
+
+  # Extract sequence strings
+  seqs <- vapply(retrieved, function(r) r@sequence, character(1))
+
+  # Name by region
+  result_names <- sprintf("%s:%d-%d", regions$chrom, regions$start, regions$end)
+  names(seqs) <- result_names
+
+  # Convert to DNAStringSet if requested
+  if (!as.character && requireNamespace("Biostrings", quietly = TRUE)) {
+    return(Biostrings::DNAStringSet(seqs))
+  }
+
+  seqs
+}
+
+#' Extract regions to a FASTA file
+#'
+#' Write extracted sequences directly to a FASTA file.
+#'
+#' @param genome A RefgetGenome object
+#' @param regions A GRanges object or data.frame with columns: chrom, start, end
+#' @param output_path Path for output FASTA file
+#'
+#' @return Invisibly returns the output path
+#'
+#' @examples
+#' \dontrun{
+#' genome <- RefgetGenome.from_fasta("genome.fa")
+#' regions <- data.frame(
+#'   chrom = c("chr1", "chr2"),
+#'   start = c(1000, 5000),
+#'   end = c(2000, 6000)
+#' )
+#' extractToFasta(genome, regions, "extracted.fa")
+#' }
+#'
+#' @export
+extractToFasta <- function(genome, regions, output_path) {
+  # Convert GRanges to data.frame
+  if (inherits(regions, "GRanges")) {
+    if (!requireNamespace("GenomicRanges", quietly = TRUE)) {
+      stop("GenomicRanges package required for GRanges input")
+    }
+    regions <- data.frame(
+      chrom = as.character(GenomicRanges::seqnames(regions)),
+      start = GenomicRanges::start(regions),
+      end = GenomicRanges::end(regions),
+      stringsAsFactors = FALSE
+    )
+  }
+
+  # Validate columns
+  required_cols <- c("chrom", "start", "end")
+  if (!all(required_cols %in% names(regions))) {
+    stop("regions must have columns: chrom, start, end")
+  }
+
+  # Write temp BED file (0-based coordinates for BED)
+  bed_file <- tempfile(fileext = ".bed")
+  on.exit(unlink(bed_file), add = TRUE)
+
+  bed_df <- data.frame(
+    chrom = regions$chrom,
+    start = as.integer(regions$start - 1),  # Convert to 0-based
+    end = as.integer(regions$end)
+  )
+  write.table(bed_df, bed_file, sep = "\t", row.names = FALSE,
+              col.names = FALSE, quote = FALSE)
+
+  # Use gtars BED extraction to FASTA
+  gtars::get_seqs_bed_file(
+    genome@store,
+    genome@collection_digest,
+    bed_file,
+    output_path
+  )
+
+  invisible(output_path)
+}
+
+#' Export specific chromosomes to FASTA
+#'
+#' Export one or more complete chromosomes to a FASTA file.
+#'
+#' @param genome A RefgetGenome object
+#' @param names Character vector of chromosome names to export (NULL = all)
+#' @param output_path Path for output FASTA file
+#' @param line_width Number of bases per line in output (default: 80)
+#'
+#' @return Invisibly returns the output path
+#'
+#' @examples
+#' \dontrun{
+#' genome <- RefgetGenome.from_fasta("genome.fa")
+#'
+#' # Export specific chromosomes
+#' exportChromosomes(genome, c("chr1", "chr2"), "subset.fa")
+#'
+#' # Export all chromosomes
+#' exportChromosomes(genome, NULL, "all.fa")
+#' }
+#'
+#' @export
+exportChromosomes <- function(genome, names = NULL, output_path, line_width = 80L) {
+  gtars::export_fasta(
+    genome@store,
+    genome@collection_digest,
+    output_path,
+    sequence_names = names,
+    line_width = as.integer(line_width)
+  )
+  invisible(output_path)
+}
diff --git a/refget-r/R/conversion.R b/refget-r/R/conversion.R
new file mode 100644
index 0000000..7a279ed
--- /dev/null
+++ b/refget-r/R/conversion.R
@@ -0,0 +1,42 @@
+#' Convert sequence to DNAString
+#'
+#' @param seq_string Character string containing a DNA sequence
+#' @return A Biostrings DNAString object
+#'
+#' @examples
+#' \dontrun{
+#' dna <- as_DNAString("ACGTACGT")
+#' }
+#'
+#' @export
+as_DNAString <- function(seq_string) {
+  if (!requireNamespace("Biostrings", quietly = TRUE)) {
+    stop("Biostrings package required for DNAString conversion. ",
+         "Install with: BiocManager::install('Biostrings')")
+  }
+  Biostrings::DNAString(seq_string)
+}
+
+#' Convert sequences to DNAStringSet
+#'
+#' @param seq_strings Character vector of DNA sequences
+#' @param names Optional names for the sequences
+#' @return A Biostrings DNAStringSet object
+#'
+#' @examples
+#' \dontrun{
+#' seqs <- as_DNAStringSet(c("ACGT", "GGCC"), names = c("seq1", "seq2"))
+#' }
+#'
+#' @export
+as_DNAStringSet <- function(seq_strings, names = NULL) {
+  if (!requireNamespace("Biostrings", quietly = TRUE)) {
+    stop("Biostrings package required for DNAStringSet conversion. ",
+         "Install with: BiocManager::install('Biostrings')")
+  }
+  result <- Biostrings::DNAStringSet(seq_strings)
+  if (!is.null(names)) {
+    names(result) <- names
+  }
+  result
+}
diff --git a/refget-r/R/getSeq-methods.R b/refget-r/R/getSeq-methods.R
new file mode 100644
index 0000000..7b0f9f2
--- /dev/null
+++ b/refget-r/R/getSeq-methods.R
@@ -0,0 +1,152 @@
+# Define getSeq generic if not already available from Biostrings/BSgenome
+# This allows the package to work without Biostrings installed
+if (!isGeneric("getSeq")) {
+  setGeneric("getSeq", function(x, ...) standardGeneric("getSeq"))
+}
+
+#' getSeq method for RefgetGenome
+#'
+#' Extract sequences from a RefgetGenome object using BSgenome-compatible syntax.
+#'
+#' @param x A RefgetGenome object
+#' @param names Sequence names (character vector) or a GRanges object
+#' @param start Start positions (integer vector, 1-based)
+#' @param end End positions (integer vector, 1-based)
+#' @param strand Strand ("+" or "-"). Default is "+".
+#' @param as.character If TRUE, return character strings instead of DNAString/DNAStringSet
+#' @param ... Additional arguments (ignored)
+#'
+#' @return
+#' - Single sequence: DNAString (or character if as.character=TRUE or Biostrings unavailable)
+#' - Multiple sequences: DNAStringSet (or character vector)
+#'
+#' @examples
+#' \dontrun{
+#' genome <- RefgetGenome.from_fasta("genome.fa")
+#'
+#' # Full chromosome
+#' seq <- getSeq(genome, "chr1")
+#'
+#' # Region by coordinates
+#' seq <- getSeq(genome, "chr1", 1000, 2000)
+#'
+#' # Multiple regions
+#' seqs <- getSeq(genome, c("chr1", "chr2"), c(1000, 5000), c(2000, 6000))
+#'
+#' # From GRanges (requires GenomicRanges)
+#' library(GenomicRanges)
+#' gr <- GRanges(c("chr1:1000-2000", "chr2:5000-6000:-"))
+#' seqs <- getSeq(genome, gr)
+#' }
+#'
+#' @rdname getSeq-methods
+#' @export
+setMethod("getSeq", "RefgetGenome",
+  function(x, names, start = NA, end = NA, strand = "+", as.character = FALSE, ...) {
+    # Handle GRanges input
+    if (inherits(names, "GRanges")) {
+      return(.getSeq_GRanges(x, names, as.character = as.character))
+    }
+
+    # Ensure names is character
+    names <- as.character(names)
+
+    # Handle single sequence (full or region)
+    if (length(names) == 1 && length(start) <= 1 && length(end) <= 1) {
+      return(.getSeq_single(x, names, start, end, strand, as.character))
+    }
+
+    # Vectorized extraction
+    .getSeq_vectorized(x, names, start, end, strand, as.character)
+  }
+)
+
+# -----------------------------------------------------------------------------
+# Internal helpers
+# -----------------------------------------------------------------------------
+
+#' Extract a single sequence or region
+#' @keywords internal
+.getSeq_single <- function(genome, name, start, end, strand, as.character) {
+  # Get full sequence via get_sequence_by_name
+  record <- gtars::get_sequence_by_name(genome@store, genome@collection_digest, name)
+  if (is.null(record)) {
+    stop(sprintf("Sequence '%s' not found in collection", name))
+  }
+
+  seq_string <- record@data
+
+  # Extract substring if coordinates provided
+  if (!is.na(start) && !is.na(end)) {
+    # R uses 1-based indexing
+    if (start < 1 || end > nchar(seq_string)) {
+      stop(sprintf("Coordinates [%d, %d] out of range for sequence '%s' (length %d)",
+                   start, end, name, nchar(seq_string)))
+    }
+    seq_string <- substr(seq_string, start, end)
+  }
+
+  # Handle negative strand
+  if (identical(strand, "-")) {
+    seq_string <- .reverse_complement(seq_string)
+  }
+
+  # Convert to DNAString if requested and Biostrings available
+  if (!as.character && requireNamespace("Biostrings", quietly = TRUE)) {
+    return(Biostrings::DNAString(seq_string))
+  }
+
+  seq_string
+}
+
+#' Vectorized sequence extraction
+#' @keywords internal
+.getSeq_vectorized <- function(genome, names, start, end, strand, as.character) {
+  n <- length(names)
+
+  # Recycle start/end/strand to match names length
+  if (length(start) == 1) start <- rep(start, n)
+  if (length(end) == 1) end <- rep(end, n)
+  if (length(strand) == 1) strand <- rep(strand, n)
+
+  if (length(start) != n || length(end) != n || length(strand) != n) {
+    stop("Length mismatch: names, start, end, and strand must have compatible lengths")
+  }
+
+  # Extract each sequence
+  seqs <- vapply(seq_len(n), function(i) {
+    .getSeq_single(genome, names[i], start[i], end[i], strand[i], as.character = TRUE)
+  }, character(1))
+
+  # Name the results
+  if (all(is.na(start)) || all(is.na(end))) {
+    result_names <- names
+  } else {
+    result_names <- sprintf("%s:%d-%d", names, start, end)
+  }
+  names(seqs) <- result_names
+
+  # Convert to DNAStringSet if requested
+  if (!as.character && requireNamespace("Biostrings", quietly = TRUE)) {
+    return(Biostrings::DNAStringSet(seqs))
+  }
+
+  seqs
+}
+
+#' Extract sequences from GRanges
+#' @keywords internal
+.getSeq_GRanges <- function(genome, gr, as.character) {
+  if (!requireNamespace("GenomicRanges", quietly = TRUE)) {
+    stop("GenomicRanges package required for GRanges input")
+  }
+
+  # Extract components from GRanges
+  names <- as.character(GenomicRanges::seqnames(gr))
+  start <- GenomicRanges::start(gr)
+  end <- GenomicRanges::end(gr)
+  strand <- as.character(GenomicRanges::strand(gr))
+  strand[strand == "*"] <- "+"  # Treat unstranded as +
+
+  .getSeq_vectorized(genome, names, start, end, strand, as.character)
+}
diff --git a/refget-r/R/utils.R b/refget-r/R/utils.R
new file mode 100644
index 0000000..bd4de76
--- /dev/null
+++ b/refget-r/R/utils.R
@@ -0,0 +1,29 @@
+#' Reverse complement a DNA sequence
+#'
+#' @param seq DNA sequence string
+#' @return Reverse complement string
+#' @keywords internal
+.reverse_complement <- function(seq) {
+  # Use Biostrings if available (faster and handles IUPAC codes)
+  if (requireNamespace("Biostrings", quietly = TRUE)) {
+    rc <- Biostrings::reverseComplement(Biostrings::DNAString(seq))
+    return(as.character(rc))
+  }
+
+  # Pure R fallback for basic ACGT
+  complement_map <- c(
+    A = "T", T = "A", G = "C", C = "G",
+    a = "t", t = "a", g = "c", c = "g",
+    N = "N", n = "n"
+  )
+
+  chars <- strsplit(seq, "")[[1]]
+  complemented <- complement_map[chars]
+  # Handle unknown characters by keeping them
+  complemented[is.na(complemented)] <- chars[is.na(complemented)]
+  paste(rev(complemented), collapse = "")
+}
+
+#' Null coalescing operator
+#' @keywords internal
+`%||%` <- function(x, y) if (is.null(x)) y else x
diff --git a/refget-r/R/zzz.R b/refget-r/R/zzz.R
new file mode 100644
index 0000000..c322b4c
--- /dev/null
+++ b/refget-r/R/zzz.R
@@ -0,0 +1,13 @@
+#' @import methods
+#' @importFrom GenomeInfoDb Seqinfo seqnames seqlengths
+NULL
+
+.onLoad <- function(libname, pkgname) {
+  # Check that gtars is available
+  if (!requireNamespace("gtars", quietly = TRUE)) {
+    packageStartupMessage(
+      "Note: gtars package is required but not available. ",
+      "Install from: https://github.com/databio/gtars"
+    )
+  }
+}
diff --git a/refget-r/README.md b/refget-r/README.md
new file mode 100644
index 0000000..64ce684
--- /dev/null
+++ b/refget-r/README.md
@@ -0,0 +1,120 @@
+# BiocRefgetStore
+
+BSgenome-compatible interface to RefgetStore for R/Bioconductor.
+
+## Overview
+
+BiocRefgetStore provides a bridge between the gtars RefgetStore format and Bioconductor's BSgenome API. This allows you to use RefgetStore-backed genomes with the familiar `getSeq()` interface that Bioconductor users expect.
+
+## Installation
+
+```r
+# Install gtars first (required)
+# See: https://github.com/databio/gtars
+
+# Install BiocRefgetStore
+devtools::install_local("path/to/refget-r")
+```
+
+## Quick Start
+
+```r
+library(BiocRefgetStore)
+
+# Create a genome from a FASTA file
+genome <- RefgetGenome.from_fasta("genome.fa")
+
+# BSgenome-compatible access
+seq <- getSeq(genome, "chr1", 1000, 2000)           # Returns DNAString
+seqs <- getSeq(genome, c("chr1", "chr2"))           # Returns DNAStringSet
+genome[["chr1"]]                                    # Full chromosome
+
+# Standard accessors
+seqnames(genome)         # c("chr1", "chr2", ...)
+seqlengths(genome)       # Named integer vector
+seqinfo(genome)          # Seqinfo object
+```
+
+## Loading Genomes
+
+```r
+# From FASTA file (creates in-memory store)
+genome <- RefgetGenome.from_fasta("/path/to/genome.fa")
+
+# From persisted RefgetStore directory
+genome <- RefgetGenome.from_directory("/path/to/store", digest = "abc123...")
+
+# From RefgetStore with alias
+store <- gtars::refget_store_open_local("/path/to/store")
+genome <- RefgetGenome(store, namespace = "refseq", alias = "GRCh38")
+
+# From remote store (cloud-backed with local caching)
+genome <- RefgetGenome.from_remote(
+  cache_path = "~/.cache/refget",
+  remote_url = "https://refget.databio.org/store",
+  namespace = "refseq",
+  alias = "GRCh38"
+)
+```
+
+## Sequence Extraction
+
+```r
+# Single region
+seq <- getSeq(genome, "chr1", 1000, 2000)
+
+# Multiple regions
+seqs <- getSeq(genome,
+               names = c("chr1", "chr2"),
+               start = c(1000, 5000),
+               end = c(2000, 6000))
+
+# From GRanges (requires GenomicRanges)
+library(GenomicRanges)
+gr <- GRanges(c("chr1:1000-2000", "chr2:5000-6000:-"))
+seqs <- getSeq(genome, gr)
+
+# Strand-aware extraction
+seq <- getSeq(genome, "chr1", 1000, 2000, strand = "-")  # Reverse complement
+```
+
+## Bulk Extraction
+
+```r
+# Extract multiple regions efficiently
+regions <- data.frame(
+  chrom = c("chr1", "chr2", "chr3"),
+  start = c(1000, 5000, 10000),
+  end = c(2000, 6000, 11000)
+)
+seqs <- extractRegions(genome, regions)
+
+# Write regions to FASTA
+extractToFasta(genome, regions, "extracted.fa")
+
+# Export specific chromosomes
+exportChromosomes(genome, c("chr1", "chr2"), "subset.fa")
+```
+
+## RefgetStore-Specific Features
+
+```r
+# Get the seqcol digest
+collection_digest(genome)
+
+# Get coordinate system (for compatibility checking)
+coordinate_system(genome)
+
+# Get per-sequence digests
+sequence_digests(genome)
+
+# Access underlying RefgetStore
+store <- store(genome)
+```
+
+## Dependencies
+
+- **Required**: gtars (for RefgetStore), GenomeInfoDb (for Seqinfo)
+- **Optional**: Biostrings (for DNAString/DNAStringSet), GenomicRanges (for GRanges support)
+
+Without Biostrings, sequences are returned as character strings.
diff --git a/refget-r/tests/testthat.R b/refget-r/tests/testthat.R
new file mode 100644
index 0000000..3259445
--- /dev/null
+++ b/refget-r/tests/testthat.R
@@ -0,0 +1,5 @@
+# This file is part of the standard testthat pattern
+library(testthat)
+library(BiocRefgetStore)
+
+test_check("BiocRefgetStore")
diff --git a/refget-r/tests/testthat/test-RefgetGenome.R b/refget-r/tests/testthat/test-RefgetGenome.R
new file mode 100644
index 0000000..1dc7b83
--- /dev/null
+++ b/refget-r/tests/testthat/test-RefgetGenome.R
@@ -0,0 +1,124 @@
+# Test RefgetGenome class construction and accessors
+
+test_that("RefgetGenome can be created from FASTA", {
+  # Skip if gtars not available
+  skip_if_not_installed("gtars")
+
+  # Create test FASTA
+  fasta_file <- tempfile(fileext = ".fa")
+  writeLines(c(
+    ">chr1",
+    "ACGTACGTACGTACGTACGT",
+    ">chr2",
+    "GGCCGGCCGGCCGGCC"
+  ), fasta_file)
+  on.exit(unlink(fasta_file))
+
+  # Create genome
+  genome <- RefgetGenome.from_fasta(fasta_file)
+
+  # Test basic properties
+  expect_s4_class(genome, "RefgetGenome")
+  expect_equal(length(genome), 2)
+  expect_equal(sort(names(genome)), c("chr1", "chr2"))
+})
+
+test_that("RefgetGenome accessors work", {
+  skip_if_not_installed("gtars")
+
+  fasta_file <- tempfile(fileext = ".fa")
+  writeLines(c(
+    ">chr1",
+    "ACGTACGTACGTACGTACGT",
+    ">chr2",
+    "GGCCGGCCGGCCGGCC"
+  ), fasta_file)
+  on.exit(unlink(fasta_file))
+
+  genome <- RefgetGenome.from_fasta(fasta_file)
+
+  # Test seqlengths
+  lens <- seqlengths(genome)
+  expect_equal(lens[["chr1"]], 20)
+  expect_equal(lens[["chr2"]], 16)
+
+  # Test collection_digest
+  digest <- collection_digest(genome)
+  expect_type(digest, "character")
+  expect_true(nchar(digest) > 0)
+
+  # Test sequence_digests
+  seq_digests <- sequence_digests(genome)
+  expect_named(seq_digests)
+  expect_true(all(c("chr1", "chr2") %in% names(seq_digests)))
+})
+
+test_that("RefgetGenome [[ extraction works", {
+  skip_if_not_installed("gtars")
+
+  fasta_file <- tempfile(fileext = ".fa")
+  writeLines(c(
+    ">chr1",
+    "ACGTACGTACGTACGTACGT"
+  ), fasta_file)
+  on.exit(unlink(fasta_file))
+
+  genome <- RefgetGenome.from_fasta(fasta_file)
+
+  # Extract full sequence
+  seq <- genome[["chr1"]]
+
+  # Should be character or DNAString depending on Biostrings availability
+  if (requireNamespace("Biostrings", quietly = TRUE)) {
+    expect_s4_class(seq, "DNAString")
+    expect_equal(as.character(seq), "ACGTACGTACGTACGTACGT")
+  } else {
+    expect_type(seq, "character")
+    expect_equal(seq, "ACGTACGTACGTACGTACGT")
+  }
+})
+
+test_that("RefgetGenome errors on missing sequence", {
+  skip_if_not_installed("gtars")
+
+  fasta_file <- tempfile(fileext = ".fa")
+  writeLines(c(
+    ">chr1",
+    "ACGT"
+  ), fasta_file)
+  on.exit(unlink(fasta_file))
+
+  genome <- RefgetGenome.from_fasta(fasta_file)
+
+  expect_error(genome[["chrX"]], "not found")
+})
+
+test_that("RefgetGenome from_directory works", {
+  skip_if_not_installed("gtars")
+
+  # Create a store on disk
+  store_dir <- tempfile()
+  dir.create(store_dir)
+  on.exit(unlink(store_dir, recursive = TRUE))
+
+  store <- gtars::refget_store_on_disk(store_dir)
+
+  fasta_file <- tempfile(fileext = ".fa")
+  writeLines(c(">seq1", "ACGT"), fasta_file)
+  on.exit(unlink(fasta_file), add = TRUE)
+
+  digest <- gtars::add_fasta(store, fasta_file)
+
+  # Load from directory
+  genome <- RefgetGenome.from_directory(store_dir, digest = digest)
+  expect_s4_class(genome, "RefgetGenome")
+  expect_equal(names(genome), "seq1")
+})
+
+test_that("RefgetGenome constructor requires digest or alias", {
+  skip_if_not_installed("gtars")
+
+  store <- gtars::refget_store()
+
+  expect_error(RefgetGenome(store), "Must provide either")
+})
diff --git a/refget-r/tests/testthat/test-bulk.R b/refget-r/tests/testthat/test-bulk.R
new file mode 100644
index 0000000..056d70a
--- /dev/null
+++ b/refget-r/tests/testthat/test-bulk.R
@@ -0,0 +1,134 @@
+# Test bulk extraction functions
+
+test_that("extractRegions works with data.frame", {
+  skip_if_not_installed("gtars")
+
+  fasta_file <- tempfile(fileext = ".fa")
+  writeLines(c(
+    ">chr1",
+    "ACGTACGTACGTACGTACGT",
+    ">chr2",
+    "GGCCGGCCGGCCGGCC"
+  ), fasta_file)
+  on.exit(unlink(fasta_file))
+
+  genome <- RefgetGenome.from_fasta(fasta_file)
+
+  regions <- data.frame(
+    chrom = c("chr1", "chr2"),
+    start = c(1, 1),
+    end = c(4, 4)
+  )
+
+  seqs <- extractRegions(genome, regions, as.character = TRUE)
+
+  expect_length(seqs, 2)
+  expect_equal(seqs[[1]], "ACGT")
+  expect_equal(seqs[[2]], "GGCC")
+})
+
+test_that("extractRegions works with GRanges", {
+  skip_if_not_installed("gtars")
+  skip_if_not_installed("GenomicRanges")
+
+  fasta_file <- tempfile(fileext = ".fa")
+  writeLines(c(
+    ">chr1",
+    "ACGTACGTACGTACGTACGT"
+  ), fasta_file)
+  on.exit(unlink(fasta_file))
+
+  genome <- RefgetGenome.from_fasta(fasta_file)
+
+  gr <- GenomicRanges::GRanges(c("chr1:1-4", "chr1:5-8"))
+
+  seqs <- extractRegions(genome, gr, as.character = TRUE)
+
+  expect_length(seqs, 2)
+  expect_equal(seqs[[1]], "ACGT")
+  expect_equal(seqs[[2]], "ACGT")
+})
+
+test_that("extractToFasta writes correct output", {
+  skip_if_not_installed("gtars")
+
+  fasta_file <- tempfile(fileext = ".fa")
+  writeLines(c(
+    ">chr1",
+    "ACGTACGTACGTACGTACGT"
+  ), fasta_file)
+  on.exit(unlink(fasta_file))
+
+  genome <- RefgetGenome.from_fasta(fasta_file)
+
+  regions <- data.frame(
+    chrom = c("chr1"),
+    start = c(1),
+    end = c(8)
+  )
+
+  output_file <- tempfile(fileext = ".fa")
+  on.exit(unlink(output_file), add = TRUE)
+
+  result <- extractToFasta(genome, regions, output_file)
+
+  expect_equal(result, output_file)
+  expect_true(file.exists(output_file))
+
+  # Check content
+  content <- readLines(output_file)
+  expect_true(length(content) > 0)
+})
+
+test_that("exportChromosomes works", {
+  skip_if_not_installed("gtars")
+
+  fasta_file <- tempfile(fileext = ".fa")
+  writeLines(c(
+    ">chr1",
+    "ACGTACGTACGTACGTACGT",
+    ">chr2",
+    "GGCCGGCCGGCCGGCC"
+  ), fasta_file)
+  on.exit(unlink(fasta_file))
+
+  genome <- RefgetGenome.from_fasta(fasta_file)
+
+  output_file <- tempfile(fileext = ".fa")
+  on.exit(unlink(output_file), add = TRUE)
+
+  # Export just chr1
+  result <- exportChromosomes(genome, "chr1", output_file)
+
+  expect_equal(result, output_file)
+  expect_true(file.exists(output_file))
+
+  # Read and verify
+  content <- readLines(output_file)
+  expect_true(any(grepl(">chr1", content)))
+  expect_false(any(grepl(">chr2", content)))
+})
+
+test_that("exportChromosomes exports all when names is NULL", {
+  skip_if_not_installed("gtars")
+
+  fasta_file <- tempfile(fileext = ".fa")
+  writeLines(c(
+    ">chr1",
+    "ACGT",
+    ">chr2",
+    "GGCC"
+  ), fasta_file)
+  on.exit(unlink(fasta_file))
+
+  genome <- RefgetGenome.from_fasta(fasta_file)
+
+  output_file <- tempfile(fileext = ".fa")
+  on.exit(unlink(output_file), add = TRUE)
+
+  exportChromosomes(genome, NULL, output_file)
+
+  content <- readLines(output_file)
+  expect_true(any(grepl(">chr1", content)))
+  expect_true(any(grepl(">chr2", content)))
+})
diff --git a/refget-r/tests/testthat/test-conversion.R b/refget-r/tests/testthat/test-conversion.R
new file mode 100644
index 0000000..49df15a
--- /dev/null
+++ b/refget-r/tests/testthat/test-conversion.R
@@ -0,0 +1,30 @@
+# Test Biostrings conversion functions
+
+test_that("as_DNAString works", {
+  skip_if_not_installed("Biostrings")
+
+  dna <- as_DNAString("ACGT")
+  expect_s4_class(dna, "DNAString")
+  expect_equal(as.character(dna), "ACGT")
+})
+
+test_that("as_DNAStringSet works", {
+  skip_if_not_installed("Biostrings")
+
+  seqs <- as_DNAStringSet(c("ACGT", "GGCC"))
+  expect_s4_class(seqs, "DNAStringSet")
+  expect_length(seqs, 2)
+})
+
+test_that("as_DNAStringSet accepts names", {
+  skip_if_not_installed("Biostrings")
+
+  seqs <- as_DNAStringSet(c("ACGT", "GGCC"), names = c("seq1", "seq2"))
+  expect_equal(names(seqs), c("seq1", "seq2"))
+})
+
+test_that("as_DNAString errors without Biostrings", {
+  # This test verifies the error message is clear
+  # In practice, this won't run if Biostrings is installed
+  skip("Cannot test Biostrings absence when it's installed")
+})
diff --git a/refget-r/tests/testthat/test-getSeq.R b/refget-r/tests/testthat/test-getSeq.R
new file mode 100644
index 0000000..ffbd4a0
--- /dev/null
+++ b/refget-r/tests/testthat/test-getSeq.R
@@ -0,0 +1,156 @@
+# Test getSeq methods
+
+test_that("getSeq extracts full sequence", {
+  skip_if_not_installed("gtars")
+
+  fasta_file <- tempfile(fileext = ".fa")
+  writeLines(c(
+    ">chr1",
+    "ACGTACGTACGTACGTACGT"
+  ), fasta_file)
+  on.exit(unlink(fasta_file))
+
+  genome <- RefgetGenome.from_fasta(fasta_file)
+
+  # Full sequence
+  seq <- getSeq(genome, "chr1", as.character = TRUE)
+  expect_equal(seq, "ACGTACGTACGTACGTACGT")
+})
+
+test_that("getSeq extracts regions by coordinates", {
+  skip_if_not_installed("gtars")
+
+  fasta_file <- tempfile(fileext = ".fa")
+  writeLines(c(
+    ">chr1",
+    "ACGTACGTACGTACGTACGT"
+  ), fasta_file)
+  on.exit(unlink(fasta_file))
+
+  genome <- RefgetGenome.from_fasta(fasta_file)
+
+  # Region extraction (1-based, inclusive)
+  seq <- getSeq(genome, "chr1", start = 1, end = 4, as.character = TRUE)
+  expect_equal(seq, "ACGT")
+
+  seq <- getSeq(genome, "chr1", start = 5, end = 8, as.character = TRUE)
+  expect_equal(seq, "ACGT")
+})
+
+test_that("getSeq handles negative strand", {
+  skip_if_not_installed("gtars")
+
+  fasta_file <- tempfile(fileext = ".fa")
+  writeLines(c(
+    ">chr1",
+    "ACGTACGTACGTACGTACGT"
+  ), fasta_file)
+  on.exit(unlink(fasta_file))
+
+  genome <- RefgetGenome.from_fasta(fasta_file)
+
+  # Forward strand
+  seq_plus <- getSeq(genome, "chr1", 1, 4, strand = "+", as.character = TRUE)
+  expect_equal(seq_plus, "ACGT")
+
+  # Reverse complement
+  seq_minus <- getSeq(genome, "chr1", 1, 4, strand = "-", as.character = TRUE)
+  expect_equal(seq_minus, "ACGT")  # ACGT reverse complement is ACGT
+})
+
+test_that("getSeq vectorized extraction works", {
+  skip_if_not_installed("gtars")
+
+  fasta_file <- tempfile(fileext = ".fa")
+  writeLines(c(
+    ">chr1",
+    "ACGTACGTACGTACGTACGT",
+    ">chr2",
+    "GGCCGGCCGGCCGGCC"
+  ), fasta_file)
+  on.exit(unlink(fasta_file))
+
+  genome <- RefgetGenome.from_fasta(fasta_file)
+
+  # Multiple regions
+  seqs <- getSeq(genome,
+                 names = c("chr1", "chr2"),
+                 start = c(1, 1),
+                 end = c(4, 4),
+                 as.character = TRUE)
+
+  expect_length(seqs, 2)
+  expect_equal(seqs[[1]], "ACGT")
+  expect_equal(seqs[[2]], "GGCC")
+})
+
+test_that("getSeq errors on out-of-range coordinates", {
+  skip_if_not_installed("gtars")
+
+  fasta_file <- tempfile(fileext = ".fa")
+  writeLines(c(">chr1", "ACGT"), fasta_file)
+  on.exit(unlink(fasta_file))
+
+  genome <- RefgetGenome.from_fasta(fasta_file)
+
+  expect_error(getSeq(genome, "chr1", 1, 100), "out of range")
+})
+
+test_that("getSeq with GRanges input works", {
+  skip_if_not_installed("gtars")
+  skip_if_not_installed("GenomicRanges")
+
+  fasta_file <- tempfile(fileext = ".fa")
+  writeLines(c(
+    ">chr1",
+    "ACGTACGTACGTACGTACGT",
+    ">chr2",
+    "GGCCGGCCGGCCGGCC"
+  ), fasta_file)
+  on.exit(unlink(fasta_file))
+
+  genome <- RefgetGenome.from_fasta(fasta_file)
+
+  # Create GRanges
+  gr <- GenomicRanges::GRanges(c("chr1:1-4", "chr2:1-4"))
+
+  seqs <- getSeq(genome, gr, as.character = TRUE)
+
+  expect_length(seqs, 2)
+  expect_equal(seqs[[1]], "ACGT")
+  expect_equal(seqs[[2]], "GGCC")
+})
+
+test_that("getSeq returns DNAString when Biostrings available", {
+  skip_if_not_installed("gtars")
+  skip_if_not_installed("Biostrings")
+
+  fasta_file <- tempfile(fileext = ".fa")
+  writeLines(c(">chr1", "ACGT"), fasta_file)
+  on.exit(unlink(fasta_file))
+
+  genome <- RefgetGenome.from_fasta(fasta_file)
+
+  seq <- getSeq(genome, "chr1")
+  expect_s4_class(seq, "DNAString")
+})
+
+test_that("getSeq returns DNAStringSet for multiple sequences", {
+  skip_if_not_installed("gtars")
+  skip_if_not_installed("Biostrings")
+
+  fasta_file <- tempfile(fileext = ".fa")
+  writeLines(c(
+    ">chr1",
+    "ACGT",
+    ">chr2",
+    "GGCC"
+  ), fasta_file)
+  on.exit(unlink(fasta_file))
+
+  genome <- RefgetGenome.from_fasta(fasta_file)
+
+  seqs <- getSeq(genome, c("chr1", "chr2"))
+  expect_s4_class(seqs, "DNAStringSet")
+  expect_length(seqs, 2)
+})
diff --git a/refget-r/tests/testthat/test-remote.R b/refget-r/tests/testthat/test-remote.R
new file mode 100644
index 0000000..5fc03fb
--- /dev/null
+++ b/refget-r/tests/testthat/test-remote.R
@@ -0,0 +1,39 @@
+# Test remote store access
+
+test_that("RefgetGenome.from_remote constructor works", {
+  skip_if_not_installed("gtars")
+  skip("Requires a live remote server")
+
+  # This test requires a live remote server
+  cache_dir <- tempfile()
+  dir.create(cache_dir)
+  on.exit(unlink(cache_dir, recursive = TRUE))
+
+  # Example (would need real server and digest)
+  # genome <- RefgetGenome.from_remote(
+  #   cache_path = cache_dir,
+  #   remote_url = "https://refget.databio.org/store",
+  #   digest = "known_digest_here"
+  # )
+  # expect_s4_class(genome, "RefgetGenome")
+})
+
+test_that("coordinate_system accessor works", {
+  skip_if_not_installed("gtars")
+
+  fasta_file <- tempfile(fileext = ".fa")
+  writeLines(c(
+    ">chr1",
+    "ACGTACGTACGTACGTACGT",
+    ">chr2",
+    "GGCCGGCCGGCCGGCC"
+  ), fasta_file)
+  on.exit(unlink(fasta_file))
+
+  genome <- RefgetGenome.from_fasta(fasta_file)
+
+  # coordinate_system should return sorted_name_length_pairs_digest
+  coord_sys <- coordinate_system(genome)
+  expect_type(coord_sys, "character")
+  expect_true(nchar(coord_sys) > 0)
+})

From 4da6f2bec694466def6c1233efd4a956f8e6c9f5 Mon Sep 17 00:00:00 2001
From: nsheff <nsheff@users.noreply.github.com>
Date: Fri, 20 Feb 2026 19:22:12 -0500
Subject: [PATCH 09/31] Add compliance testing, improve frontend error
 handling, and bump to v0.11.0

---
 deployment/demo_up.sh                         |   3 +-
 frontend/src/components/ObjectLists.jsx       |   9 +-
 frontend/src/components/ReportCard.jsx        |  77 +++
 .../digest/FastaDropzone.jsx                  |   0
 .../digest/SeqColResult.jsx                   |   0
 .../digest/digest.css                         |   0
 .../digest/fastaDigestWorker.js               |   0
 frontend/src/features/digest/index.js         |   1 -
 frontend/src/main.jsx                         | 166 +++---
 frontend/src/pages/AttributeView.jsx          |   5 +
 frontend/src/pages/CollectionView.jsx         |  45 +-
 frontend/src/pages/ComparisonView.jsx         | 187 ++-----
 frontend/src/pages/CompliancePage.jsx         | 216 ++++++++
 .../{features/digest => pages}/DigestPage.jsx |  17 +-
 frontend/src/pages/HomePage.jsx               |   7 +-
 frontend/src/pages/PangenomeView.jsx          |   4 +
 frontend/src/pages/SCIM.jsx                   |  38 +-
 frontend/src/pages/SCOM.jsx                   | 314 ++---------
 frontend/src/services/fetchData.jsx           | 169 +++---
 frontend/src/stores/similarities.js           |  59 ++-
 frontend/src/utilities.jsx                    |  24 +-
 refget/__init__.py                            |   4 +
 refget/_version.py                            |   2 +-
 refget/compliance.py                          | 496 ++++++++++++++++++
 refget/router.py                              |  67 +++
 requirements/requirements-all.txt             |   2 +-
 seqcolapi/_version.py                         |   1 -
 seqcolapi/const.py                            |   9 +-
 seqcolapi/main.py                             |   2 +-
 test_fasta/different_order.rgsi               |   8 +
 test_fasta/sample_fhr.json                    |  14 +
 test_fasta/subset.rgsi                        |   7 +
 tests/api/conftest.py                         |  36 --
 tests/api/test_compliance.py                  | 356 +++----------
 tests/conftest.py                             |  19 +
 .../test_compliance_integration.py            | 260 ---------
 tests/integration/test_run_compliance.py      |  18 +
 tests/test_cli/test_store_commands.py         | 150 ++++++
 tests/test_cli/test_store_pull.py             | 400 ++++++++++++++
 39 files changed, 1946 insertions(+), 1246 deletions(-)
 create mode 100644 frontend/src/components/ReportCard.jsx
 rename frontend/src/{features => components}/digest/FastaDropzone.jsx (100%)
 rename frontend/src/{features => components}/digest/SeqColResult.jsx (100%)
 rename frontend/src/{features => components}/digest/digest.css (100%)
 rename frontend/src/{features => components}/digest/fastaDigestWorker.js (100%)
 delete mode 100644 frontend/src/features/digest/index.js
 create mode 100644 frontend/src/pages/CompliancePage.jsx
 rename frontend/src/{features/digest => pages}/DigestPage.jsx (96%)
 create mode 100644 refget/compliance.py
 delete mode 100644 seqcolapi/_version.py
 create mode 100644 test_fasta/different_order.rgsi
 create mode 100644 test_fasta/sample_fhr.json
 create mode 100644 test_fasta/subset.rgsi
 delete mode 100644 tests/integration/test_compliance_integration.py
 create mode 100644 tests/integration/test_run_compliance.py
 create mode 100644 tests/test_cli/test_store_pull.py

diff --git a/deployment/demo_up.sh b/deployment/demo_up.sh
index 2a1018e..96595c8 100644
--- a/deployment/demo_up.sh
+++ b/deployment/demo_up.sh
@@ -39,7 +39,8 @@ uvicorn seqcolapi.main:app --reload --port 8100 &
 PID=$!
 
 echo "Loading demo sequence collections..."
-python data_loaders/load_demo_seqcols.py
+# Unset storage locations so demo loader creates fake URLs instead of uploading
+FASTA_STORAGE_LOCATIONS="" python data_loaders/load_demo_seqcols.py
 
 # Set up cleanup on Ctrl+C
 trap cleanup SIGINT EXIT
diff --git a/frontend/src/components/ObjectLists.jsx b/frontend/src/components/ObjectLists.jsx
index b6c99c8..ecf39de 100644
--- a/frontend/src/components/ObjectLists.jsx
+++ b/frontend/src/components/ObjectLists.jsx
@@ -3,7 +3,8 @@ import { useLoaderData } from 'react-router-dom';
 
 // Basic list of Sequence Collections
 const CollectionList = ({ collections }) => {
-  const seqColList = collections || useLoaderData()[0];
+  const loaderData = useLoaderData();
+  const seqColList = collections || loaderData[0];
 
   return (
     <>
@@ -24,7 +25,8 @@ const CollectionList = ({ collections }) => {
 };
 
 const AttributeList = ({ attributeName, attributeDigests }) => {
-  const attrList = attributeDigests || useLoaderData()[0];
+  const loaderData = useLoaderData();
+  const attrList = attributeDigests || loaderData[0];
 
   return (
     <>
@@ -44,7 +46,8 @@ const AttributeList = ({ attributeName, attributeDigests }) => {
 
 // Basic list of Pangenomes
 const PangenomeList = ({ pangenomes }) => {
-  const pangenomeList = pangenomes || useLoaderData()[1];
+  const loaderData = useLoaderData();
+  const pangenomeList = pangenomes || loaderData[1];
 
   return (
     <>
diff --git a/frontend/src/components/ReportCard.jsx b/frontend/src/components/ReportCard.jsx
new file mode 100644
index 0000000..7a80584
--- /dev/null
+++ b/frontend/src/components/ReportCard.jsx
@@ -0,0 +1,77 @@
+import { useState } from 'react';
+
+const ReportCard = ({
+  title,
+  tooltipText,
+  messageArray,
+  colorScheme = 'info'
+}) => {
+  const [showTooltip, setShowTooltip] = useState(false);
+
+  // Map color scheme to Bootstrap CSS classes
+  const headerClass = `bg-${colorScheme} bg-opacity-25`;
+  const titleClass = `fw-medium text-${colorScheme}-emphasis`;
+  const iconClass = `ms-2 text-${colorScheme}-emphasis`;
+  const bodyClass = `bg-${colorScheme} bg-opacity-10 rounded-bottom-1`;
+
+  return (
+    <div
+      className='card'
+      style={{
+        borderColor: 'var(--bs-border-color-translucent)',
+      }}
+    >
+      <div
+        className={`card-header ${headerClass}`}
+        style={{
+          borderColor: 'var(--bs-border-color-translucent)',
+        }}
+      >
+        <div className='d-flex align-items-center'>
+          <span className={titleClass}>
+            {title}
+          </span>
+          <div className='position-relative'>
+            <span
+              className={iconClass}
+              style={{
+                width: '20px',
+                height: '20px',
+                fontSize: '0.7rem',
+                cursor: 'pointer',
+              }}
+              onMouseEnter={() => setShowTooltip(true)}
+              onMouseLeave={() => setShowTooltip(false)}
+            >
+              <i className='bi bi-question-circle-fill'></i>
+            </span>
+            {showTooltip && (
+              <div
+                className='position-absolute bg-dark text-white rounded p-2 shadow-lg'
+                style={{
+                  left: '25px',
+                  top: '0',
+                  width: '250px',
+                  fontSize: '0.75rem',
+                  zIndex: 1050,
+                }}
+              >
+                {tooltipText}
+              </div>
+            )}
+          </div>
+        </div>
+      </div>
+
+      <div className={`card-body ${bodyClass}`}>
+        <ul className='mb-0'>
+          {messageArray.map((msg, index) => (
+            <li key={index}>{msg}</li>
+          ))}
+        </ul>
+      </div>
+    </div>
+  );
+};
+
+export { ReportCard };
diff --git a/frontend/src/features/digest/FastaDropzone.jsx b/frontend/src/components/digest/FastaDropzone.jsx
similarity index 100%
rename from frontend/src/features/digest/FastaDropzone.jsx
rename to frontend/src/components/digest/FastaDropzone.jsx
diff --git a/frontend/src/features/digest/SeqColResult.jsx b/frontend/src/components/digest/SeqColResult.jsx
similarity index 100%
rename from frontend/src/features/digest/SeqColResult.jsx
rename to frontend/src/components/digest/SeqColResult.jsx
diff --git a/frontend/src/features/digest/digest.css b/frontend/src/components/digest/digest.css
similarity index 100%
rename from frontend/src/features/digest/digest.css
rename to frontend/src/components/digest/digest.css
diff --git a/frontend/src/features/digest/fastaDigestWorker.js b/frontend/src/components/digest/fastaDigestWorker.js
similarity index 100%
rename from frontend/src/features/digest/fastaDigestWorker.js
rename to frontend/src/components/digest/fastaDigestWorker.js
diff --git a/frontend/src/features/digest/index.js b/frontend/src/features/digest/index.js
deleted file mode 100644
index 279fe78..0000000
--- a/frontend/src/features/digest/index.js
+++ /dev/null
@@ -1 +0,0 @@
-export { default as DigestPage } from './DigestPage';
diff --git a/frontend/src/main.jsx b/frontend/src/main.jsx
index 00b2bbb..4a65f6b 100644
--- a/frontend/src/main.jsx
+++ b/frontend/src/main.jsx
@@ -20,7 +20,8 @@ import { SCOM } from './pages/SCOM.jsx';
 import { HomePage } from './pages/HomePage.jsx';
 import { HPRCGenomes } from './pages/HPRCGenomes.jsx';
 import { HumanReferencesView } from './pages/HumanReferences.jsx';
-import { DigestPage } from './features/digest';
+import { DigestPage } from './pages/DigestPage.jsx';
+import { CompliancePage } from './pages/CompliancePage.jsx';
 
 import {
   fetchServiceInfo,
@@ -32,20 +33,13 @@ import {
   fetchAttribute,
 } from './services/fetchData.jsx';
 
-import {
-  AttributeValue,
-  LinkedAttributeDigest,
-} from './components/ValuesAndDigests.jsx';
-import { CollectionList, PangenomeList } from './components/ObjectLists.jsx';
-import { copyToClipboardIcon, copyToClipboard } from './utilities';
+import { copyToClipboard } from './utilities';
 
 import {
   Outlet,
-  Link,
   createBrowserRouter,
   RouterProvider,
   useLoaderData,
-  useParams,
   useRouteError,
   useNavigate,
   useLocation,
@@ -53,41 +47,6 @@ import {
 
 import { API_BASE } from './utilities.jsx';
 
-const Level1Collection = ({ collection }) => {
-  return (
-    <div>
-      Names:{' '}
-      <Link to={`/attribute/collection/names/${collection.names}`}>
-        {collection.names}
-      </Link>
-      <br />
-      Lengths:{' '}
-      <Link to={`/attribute/collection/lengths/${collection.lengths}`}>
-        {collection.lengths}
-      </Link>
-      <br />
-      Sequences:{' '}
-      <Link to={`/attribute/collection/sequences/${collection.sequences}`}>
-        {collection.sequences}
-      </Link>
-      <br />
-    </div>
-  );
-};
-
-const Level2Collection = ({ collection }) => {
-  return (
-    <div>
-      <h3>Names</h3>
-      <pre>{JSON.stringify(collection.names, null, 2)}</pre>
-      <h3>Lengths</h3>
-      <pre>{JSON.stringify(collection.lengths, null, 2)}</pre>
-      <h3>Sequences</h3>
-      <pre>{JSON.stringify(collection.sequences, null, 2)}</pre>
-    </div>
-  );
-};
-
 const Nav = () => {
   const navigate = useNavigate();
   const location = useLocation().pathname.substring(1) || '';
@@ -139,7 +98,7 @@ const Nav = () => {
             <li className='nav-item mx-2 my-0 h6'>
               <span
                 onClick={() => navigate('/fasta')}
-                className={`nav-link cursor-pointer ${location === 'fasta' ? 'fw-medium text-black' : 'fw-light'}`}
+                className={`nav-link cursor-pointer ${location.startsWith('fasta') ? 'fw-medium text-black' : 'fw-light'}`}
               >
                 FASTADigest
               </span>
@@ -147,7 +106,7 @@ const Nav = () => {
             <li className='nav-item mx-2 my-0 h6'>
               <span
                 onClick={() => navigate('/scim')}
-                className={`nav-link cursor-pointer ${location === 'scim' ? 'fw-medium text-black' : 'fw-light'}`}
+                className={`nav-link cursor-pointer ${location.startsWith('scim') ? 'fw-medium text-black' : 'fw-light'}`}
               >
                 SCIM
               </span>
@@ -155,11 +114,19 @@ const Nav = () => {
             <li className='nav-item mx-2 my-0 h6'>
               <span
                 onClick={() => navigate('/scom')}
-                className={`nav-link cursor-pointer ${location === 'scom' ? 'fw-medium text-black' : 'fw-light'}`}
+                className={`nav-link cursor-pointer ${location.startsWith('scom') ? 'fw-medium text-black' : 'fw-light'}`}
               >
                 SCOM
               </span>
             </li>
+            <li className='nav-item mx-2 my-0 h6'>
+              <span
+                onClick={() => navigate('/compliance')}
+                className={`nav-link cursor-pointer ${location.startsWith('compliance') ? 'fw-medium text-black' : 'fw-light'}`}
+              >
+                Compliance
+              </span>
+            </li>
             <li className='nav-item mx-2 my-0 h6'>
               <a
                 href={`${API_BASE}/docs`}
@@ -197,24 +164,58 @@ const Nav = () => {
   );
 };
 
+class ReactErrorBoundary extends React.Component {
+  constructor(props) {
+    super(props);
+    this.state = { hasError: false, error: null };
+  }
+
+  static getDerivedStateFromError(error) {
+    return { hasError: true, error };
+  }
+
+  componentDidCatch(error, errorInfo) {
+    console.error('ReactErrorBoundary caught an error:', error, errorInfo);
+  }
+
+  render() {
+    if (this.state.hasError) {
+      return (
+        <div className='alert alert-danger' role='alert'>
+          <strong>Something went wrong.</strong>
+          <p className='mt-2'>{this.state.error?.message || 'An unexpected error occurred.'}</p>
+          <button
+            className='btn btn-danger mt-2'
+            onClick={() => window.location.reload()}
+          >
+            Reload
+          </button>
+        </div>
+      );
+    }
+    return this.props.children;
+  }
+}
+
 const App = () => {
   const loaderData = useLoaderData();
-  const refgetVersion = loaderData['version']['refget_pkg_version'];
   return (
     <>
       <Nav />
       <main className='container'>
-        <Outlet />
+        <ReactErrorBoundary>
+          <Outlet />
+        </ReactErrorBoundary>
       </main>
       <div className='container'>
         <footer className='flex-wrap py-3 my-4 align-top d-flex justify-content-between align-items-center border-top'>
           <div className='d-flex flex-column'>
             <div>
               <span className='badge rounded-pill bg-primary text-primary bg-opacity-25 border border-primary me-1'>
-                refget {refgetVersion}
+                refget {loaderData['version']['refget_version']}
               </span>
               <span className='badge rounded-pill bg-primary text-primary bg-opacity-25 border border-primary me-1'>
-                seqcolapi {loaderData['version']['seqcolapi_version']}
+                gtars {loaderData['version']['gtars_version']}
               </span>
               <span className='badge rounded-pill bg-primary text-primary bg-opacity-25 border border-primary me-1'>
                 python {loaderData['version']['python_version']}
@@ -243,55 +244,6 @@ const App = () => {
   );
 };
 
-const CollectionTable = ({ collections }) => {
-  const seqColList = collections || useLoaderData();
-  return (
-    <table>
-      <thead>
-        <tr>
-          <th>Collection digest</th>
-          <th>Names</th>
-          <th>Lengths</th>
-          <th>Sequences</th>
-        </tr>
-      </thead>
-      <tbody>
-        {seqColList['results'].map((collection) => (
-          <tr key={collection}>
-            <td>
-              <LinkedCollectionDigest
-                digest={collection.digest}
-                clipboard={false}
-              />
-            </td>
-            <td className='tiny mx-2'>
-              <LinkedAttributeDigest
-                attribute='names'
-                digest={collection.names_digest}
-                clipboard={false}
-              />
-            </td>
-            <td className='tiny mx-2'>
-              <LinkedAttributeDigest
-                attribute='lengths'
-                digest={collection.lengths_digest}
-                clipboard={false}
-              />
-            </td>
-            <td className='tiny mx-2'>
-              <LinkedAttributeDigest
-                attribute='sequences'
-                digest={collection.sequences_digest}
-                clipboard={false}
-              />
-            </td>
-          </tr>
-        ))}
-      </tbody>
-    </table>
-  );
-};
-
 function ErrorBoundary() {
   const error = useRouteError();
   console.error(error);
@@ -365,6 +317,7 @@ const router = createBrowserRouter([
     path: '/',
     element: <App />,
     loader: fetchServiceInfo,
+    errorElement: <ErrorBoundary />,
     children: [
       {
         path: '/',
@@ -375,26 +328,37 @@ const router = createBrowserRouter([
       {
         path: '/demo',
         element: <DemoPage />,
+        errorElement: <ErrorBoundary />,
       },
       {
         path: '/fasta',
         element: <DigestPage />,
+        errorElement: <ErrorBoundary />,
+      },
+      {
+        path: '/compliance',
+        element: <CompliancePage />,
+        errorElement: <ErrorBoundary />,
       },
       {
         path: '/human',
         element: <HumanReferencesView />,
+        errorElement: <ErrorBoundary />,
       },
       {
         path: '/hprc',
         element: <HPRCGenomes />,
+        errorElement: <ErrorBoundary />,
       },
       {
         path: '/scim',
         element: <SCIM />,
+        errorElement: <ErrorBoundary />,
       },
       {
         path: '/scom',
         element: <SCOM />,
+        errorElement: <ErrorBoundary />,
         loader: fetchAllSeqCols,
       },
       {
@@ -419,6 +383,7 @@ const router = createBrowserRouter([
       {
         path: '/attribute/:attribute/:digest',
         element: <AttributeView />,
+        errorElement: <ErrorBoundary />,
         loader: (request) => {
           return fetchAttribute(
             request.params.attribute,
@@ -429,6 +394,7 @@ const router = createBrowserRouter([
       {
         path: '/pangenome/:digest',
         element: <PangenomeView />,
+        errorElement: <ErrorBoundary />,
         loader: (request) => fetchPangenomeLevels(request.params.digest),
       },
     ],
diff --git a/frontend/src/pages/AttributeView.jsx b/frontend/src/pages/AttributeView.jsx
index cabeec3..33e3a23 100644
--- a/frontend/src/pages/AttributeView.jsx
+++ b/frontend/src/pages/AttributeView.jsx
@@ -9,6 +9,11 @@ import { CollectionList } from '../components/ObjectLists.jsx';
 const AttributeView = () => {
   const content = useLoaderData();
   const { attribute, digest } = useParams();
+
+  if (!Array.isArray(content) || content.length < 2) {
+    return <div className="alert alert-warning">Failed to load attribute data.</div>;
+  }
+
   const api_url = `${API_BASE}/attribute/collection/${attribute}/${digest}`;
   const api_url_list = `${API_BASE}/list/collection?${attribute}=${digest}`;
   let results = content[0];
diff --git a/frontend/src/pages/CollectionView.jsx b/frontend/src/pages/CollectionView.jsx
index 7469ab1..86cd378 100644
--- a/frontend/src/pages/CollectionView.jsx
+++ b/frontend/src/pages/CollectionView.jsx
@@ -1,45 +1,22 @@
 import { Link, useLoaderData, useParams } from 'react-router-dom';
-import { useState } from 'react';
 import { API_BASE } from '../utilities.jsx';
 import {
   AttributeValue,
   LinkedAttributeDigest,
 } from '../components/ValuesAndDigests.jsx';
 
-const CollectionView = (params) => {
+const CollectionView = () => {
   const collection = useLoaderData();
-  const [collectionRepresentation, setCollectionRepresentation] =
-    useState(null);
   const { digest } = useParams();
 
+  if (!Array.isArray(collection) || collection.length < 3) {
+    return <div className="alert alert-warning">Failed to load collection data.</div>;
+  }
+
   let level1 = collection[0];
   let level2 = collection[1];
   let uncollated = collection[2];
 
-  // const col_str = (2 == 1 ? "asdf" : <pre>{JSON.stringify(collectionRepresentation, null, 2)}</pre>)
-  const showLevel = (level, collated = true) => {
-    fetchSeqColDetails(digest, level, collated).then((data) => {
-      if (level == 1) {
-        data = Level1Collection(data);
-      } else if (level == 2) {
-        data = Level2Collection(data);
-      }
-      setCollectionRepresentation(data);
-    });
-
-    const showUncollated = () => {
-      fetchSeqColDetails(digest, 'uncollated').then((data) => {
-        setCollectionRepresentation(data);
-      });
-    };
-  };
-
-  const urls = {
-    level1: `/collection/${digest}?level=1`,
-    level2: `/collection/${digest}?level=2`,
-    uncollated: `/collection/${digest}?collated=false`,
-  };
-
   let attribute_list_views = [];
   for (let attribute in level2) {
     attribute_list_views.push(
@@ -101,11 +78,11 @@ const CollectionView = (params) => {
                 aria-expanded='true'
                 aria-controls='collapseLevel1'
               >
-                <h6 className='mb-0'>Level 1: {urls['level1']}</h6>
+                <h6 className='mb-0'>Level 1: {`/collection/${digest}?level=1`}</h6>
               </button>
               <a
                 className='btn btn-secondary btn-sm'
-                href={API_BASE + urls['level1']}
+                href={API_BASE + `/collection/${digest}?level=1`}
                 target='_blank'
                 rel='noopener noreferrer'
                 style={{ zIndex: 999 }}
@@ -135,11 +112,11 @@ const CollectionView = (params) => {
                 aria-expanded='false'
                 aria-controls='collapseLevel2'
               >
-                <h6 className='mb-0'>Level 2: {urls['level2']}</h6>
+                <h6 className='mb-0'>Level 2: {`/collection/${digest}?level=2`}</h6>
               </button>
               <a
                 className='btn btn-secondary btn-sm'
-                href={API_BASE + urls['level2']}
+                href={API_BASE + `/collection/${digest}?level=2`}
                 target='_blank'
                 rel='noopener noreferrer'
                 style={{ zIndex: 999 }}
@@ -169,11 +146,11 @@ const CollectionView = (params) => {
                 aria-expanded='false'
                 aria-controls='collapseUncollated'
               >
-                <h6 className='mb-0'>Uncollated: {urls['uncollated']}</h6>
+                <h6 className='mb-0'>Uncollated: {`/collection/${digest}?collated=false`}</h6>
               </button>
               <a
                 className='btn btn-secondary btn-sm'
-                href={API_BASE + urls['uncollated']}
+                href={API_BASE + `/collection/${digest}?collated=false`}
                 target='_blank'
                 rel='noopener noreferrer'
                 style={{ zIndex: 999 }}
diff --git a/frontend/src/pages/ComparisonView.jsx b/frontend/src/pages/ComparisonView.jsx
index ddb1257..421c299 100644
--- a/frontend/src/pages/ComparisonView.jsx
+++ b/frontend/src/pages/ComparisonView.jsx
@@ -2,139 +2,17 @@ import { useState } from 'react';
 import { LinkedCollectionDigest } from '../components/ValuesAndDigests.jsx';
 import { useLoaderData } from 'react-router-dom';
 import { Link } from 'react-router-dom';
+import { ReportCard } from '../components/ReportCard.jsx';
 
-
-import { API_BASE } from '../utilities.jsx';
-
-const CoordSystemReport = ({ messageArray }) => {
-  const [showTooltip, setShowTooltip] = useState(false);
-
-  return (
-    <div
-      className='card'
-      style={{
-        borderColor: 'var(--bs-border-color-translucent)',
-      }}
-    >
-      <div
-        className='card-header bg-warning bg-opacity-25'
-        style={{
-          borderColor: 'var(--bs-border-color-translucent)',
-        }}
-      >
-        <div className='d-flex align-items-center'>
-          <span className='fw-medium text-warning-emphasis'>
-            Coordinate System
-          </span>
-          <div className='position-relative'>
-            <span
-              className='ms-2 text-warning-emphasis'
-              style={{
-                width: '20px',
-                height: '20px',
-                fontSize: '0.7rem',
-                cursor: 'pointer',
-              }}
-              onMouseEnter={() => setShowTooltip(true)}
-              onMouseLeave={() => setShowTooltip(false)}
-            >
-              <i className='bi bi-question-circle-fill'></i>
-            </span>
-            {showTooltip && (
-              <div
-                className='position-absolute bg-dark text-white rounded p-2 shadow-lg'
-                style={{
-                  left: '25px',
-                  top: '0',
-                  width: '250px',
-                  fontSize: '0.75rem',
-                  zIndex: 1050,
-                }}
-              >
-                This assessment reports on the compatibility of the names and
-                lengths of the sequences, without regard to sequence content.
-              </div>
-            )}
-          </div>
-        </div>
-      </div>
-
-      <div className='card-body bg-warning bg-opacity-10 rounded-bottom-1'>
-        <ul className='mb-0'>
-          {messageArray.map((msg, index) => (
-            <li key={index}>{msg}</li>
-          ))}
-        </ul>
-      </div>
-    </div>
-  );
-};
-
-const SequencesReport = ({ messageArray }) => {
-  const [showTooltip, setShowTooltip] = useState(false);
-
-  return (
-    <div
-      className='card'
-      style={{
-        borderColor: 'var(--bs-border-color-translucent)',
-      }}
-    >
-      <div
-        className='card-header bg-info bg-opacity-25'
-        style={{
-          borderColor: 'var(--bs-border-color-translucent)',
-        }}
-      >
-        <div className='d-flex align-items-center'>
-          <span className='fw-medium text-info-emphasis'>Sequences</span>
-          <div className='position-relative'>
-            <span
-              className='ms-2 text-info-emphasis'
-              style={{
-                width: '20px',
-                height: '20px',
-                fontSize: '0.7rem',
-                cursor: 'pointer',
-              }}
-              onMouseEnter={() => setShowTooltip(true)}
-              onMouseLeave={() => setShowTooltip(false)}
-            >
-              <i className='bi bi-question-circle-fill'></i>
-            </span>
-            {showTooltip && (
-              <div
-                className='position-absolute bg-dark text-white rounded p-2 shadow-lg'
-                style={{
-                  left: '25px',
-                  top: '0',
-                  width: '250px',
-                  fontSize: '0.75rem',
-                  zIndex: 1050,
-                }}
-              >
-                This assessment reports on the sequences only, without regard to
-                their names.
-              </div>
-            )}
-          </div>
-        </div>
-      </div>
-
-      <div className='card-body bg-info bg-opacity-10 rounded-bottom-1'>
-        <ul className='mb-0'>
-          {messageArray.map((msg, index) => (
-            <li key={index}>{msg}</li>
-          ))}
-        </ul>
-      </div>
-    </div>
-  );
-};
+import { API_BASE, encodeToBase64 } from '../utilities.jsx';
 
 // Component to display the comparison between two collections
 // ✅❔❌❔
 const coordinateSystemInterpretation = (comparison) => {
+  if (!comparison?.array_elements?.a_count || !comparison?.array_elements?.b_count || !comparison?.array_elements?.a_and_b_count) {
+    return ['Unable to interpret: incomplete comparison data'];
+  }
+
   const lengthsANotB =
     comparison.array_elements.a_count.lengths -
     comparison.array_elements.a_and_b_count.lengths;
@@ -190,7 +68,7 @@ const coordinateSystemInterpretation = (comparison) => {
 
 const LinkToLocalComparison = ({ comparison }) => {
   const [copied, setCopied] = useState(false);
-  const base64encodedComparison = btoa(JSON.stringify(comparison));
+  const base64encodedComparison = encodeToBase64(JSON.stringify(comparison));
   return (
     <button
       className='btn btn-secondary btn-sm'
@@ -222,6 +100,15 @@ const LinkToLocalComparison = ({ comparison }) => {
 const ComparisonView = ({ paramComparison }) => {
   const loaderData = useLoaderData();
   const comparison = paramComparison || loaderData;
+
+  if (!comparison || !comparison.digests || !comparison.array_elements) {
+    return (
+      <div className='alert alert-warning mt-3' role='alert'>
+        Invalid comparison data. The response may be malformed or incomplete.
+      </div>
+    );
+  }
+
   const comp_str = JSON.stringify(comparison, null, 2);
 
   let api_url = `${API_BASE}/comparison/${comparison.digests.a}/${comparison.digests.b}`;
@@ -230,6 +117,14 @@ const ComparisonView = ({ paramComparison }) => {
 
   // ✅❔❌
   const getInterpretation = (comparison, attribute) => {
+    if (
+      comparison.array_elements.a_count?.[attribute] == null ||
+      comparison.array_elements.b_count?.[attribute] == null ||
+      comparison.array_elements.a_and_b_count?.[attribute] == null
+    ) {
+      return [`Unable to interpret: missing ${attribute} data`];
+    }
+
     const nSequencesA = comparison.array_elements.a_count[attribute];
     const nSequencesB = comparison.array_elements.b_count[attribute];
     const aNotB =
@@ -240,12 +135,11 @@ const ComparisonView = ({ paramComparison }) => {
       comparison.array_elements.a_and_b_count[attribute];
     const orderCheck = comparison.array_elements.a_and_b_same_order[attribute];
 
-    let interpTerm = '';
     const msgArray = [];
 
     if (
-      comparison.array_elements.a_and_b_count[attribute] == nSequencesA &&
-      comparison.array_elements.a_and_b_count[attribute] == nSequencesB
+      comparison.array_elements.a_and_b_count[attribute] === nSequencesA &&
+      comparison.array_elements.a_and_b_count[attribute] === nSequencesB
     ) {
       msgArray.push(`🟰 The ${attribute} contents are identical.`);
       if (orderCheck === true) {
@@ -253,29 +147,25 @@ const ComparisonView = ({ paramComparison }) => {
       } else if (orderCheck === false) {
         msgArray.push('❌ The elements are in different order.');
       }
-      interpTerm = 'identical_content';
     }
     if (
-      comparison.array_elements.a_and_b_count[attribute] == nSequencesA &&
+      comparison.array_elements.a_and_b_count[attribute] === nSequencesA &&
       comparison.array_elements.a_and_b_count[attribute] < nSequencesB
     ) {
       msgArray.push(
         `Collection B contains all ${nSequencesA} ${attribute} from collection A, and ${bNotA} additional.`,
       );
-      interpTerm = 'subset';
     }
     if (
-      comparison.array_elements.a_and_b_count[attribute] == nSequencesB &&
+      comparison.array_elements.a_and_b_count[attribute] === nSequencesB &&
       comparison.array_elements.a_and_b_count[attribute] < nSequencesA
     ) {
       msgArray.push(
         `Collection A contains all ${nSequencesB} ${attribute} from collection B, and ${aNotB} additional.`,
       );
-      interpTerm = 'subset';
     }
     if (comparison.array_elements.a_and_b_count[attribute] === 0) {
       msgArray.push(`The collections' ${attribute} contents are disjoint.`);
-      interpTerm = 'disjoint';
     } else if (
       comparison.array_elements.a_and_b_count[attribute] < nSequencesA &&
       comparison.array_elements.a_and_b_count[attribute] < nSequencesB
@@ -283,7 +173,6 @@ const ComparisonView = ({ paramComparison }) => {
       msgArray.push(
         `The collections' ${attribute} contents are partially overlapping; some are shared, and some are unique to each collection.`,
       );
-      interpTerm = 'partial_overlap';
     }
 
     return msgArray;
@@ -329,10 +218,20 @@ const ComparisonView = ({ paramComparison }) => {
       <h5 className='mt-4'>Interpretation Summary</h5>
       <div className='row'>
         <div className='col-md-6'>
-          <SequencesReport messageArray={interpretation['sequences']} />
+          <ReportCard
+            title="Sequences"
+            tooltipText="This assessment reports on the sequences only, without regard to their names."
+            messageArray={interpretation['sequences']}
+            colorScheme="info"
+          />
         </div>
         <div className='col-md-6'>
-          <CoordSystemReport messageArray={coordSystemMessages} />
+          <ReportCard
+            title="Coordinate System"
+            tooltipText="This assessment reports on the compatibility of the names and lengths of the sequences, without regard to sequence content."
+            messageArray={coordSystemMessages}
+            colorScheme="warning"
+          />
         </div>
       </div>
 
@@ -343,7 +242,7 @@ const ComparisonView = ({ paramComparison }) => {
         <label className='col-sm-3 d-flex justify-content-end px-4 fw-medium'>
           Found in collection A only:
         </label>
-        {comparison.attributes.a_only != '' ? (
+        {comparison.attributes.a_only.length > 0 ? (
           comparison.attributes.a_only.join(', ')
         ) : (
           <span className=''>None</span>
@@ -353,7 +252,7 @@ const ComparisonView = ({ paramComparison }) => {
         <label className='col-sm-3 d-flex justify-content-end px-4 fw-medium'>
           Found in collection B only:
         </label>
-        {comparison.attributes.b_only != '' ? (
+        {comparison.attributes.b_only.length > 0 ? (
           comparison.attributes.b_only.join(', ')
         ) : (
           <span className=''>None</span>
@@ -363,7 +262,7 @@ const ComparisonView = ({ paramComparison }) => {
         <label className='col-sm-3 d-flex justify-content-end px-4 fw-medium'>
           Found in both:
         </label>
-        {comparison.attributes.a_and_b != '' ? (
+        {comparison.attributes.a_and_b.length > 0 ? (
           comparison.attributes.a_and_b.join(', ')
         ) : (
           <span className=''>None</span>
diff --git a/frontend/src/pages/CompliancePage.jsx b/frontend/src/pages/CompliancePage.jsx
new file mode 100644
index 0000000..1ba964a
--- /dev/null
+++ b/frontend/src/pages/CompliancePage.jsx
@@ -0,0 +1,216 @@
+import React, { useState, useRef } from 'react';
+import { API_BASE } from '../utilities.jsx';
+
+export const CompliancePage = () => {
+  const [targetUrl, setTargetUrl] = useState('');
+  const [results, setResults] = useState([]);
+  const [summary, setSummary] = useState(null);
+  const [total, setTotal] = useState(0);
+  const [serverUrl, setServerUrl] = useState('');
+  const [loading, setLoading] = useState(false);
+  const [error, setError] = useState(null);
+  const eventSourceRef = useRef(null);
+
+  const runCompliance = () => {
+    setLoading(true);
+    setError(null);
+    setResults([]);
+    setSummary(null);
+    setTotal(0);
+    setServerUrl('');
+
+    const params = targetUrl.trim()
+      ? `?target_url=${encodeURIComponent(targetUrl.trim())}`
+      : '';
+    const url = `${API_BASE}/compliance/stream${params}`;
+
+    const es = new EventSource(url);
+    eventSourceRef.current = es;
+
+    es.onmessage = (event) => {
+      const data = JSON.parse(event.data);
+
+      if (data.type === 'start') {
+        setTotal(data.total);
+        setServerUrl(data.server_url);
+      } else if (data.type === 'result') {
+        setResults((prev) => [...prev, data]);
+      } else if (data.type === 'done') {
+        setSummary(data);
+        setLoading(false);
+        es.close();
+      }
+    };
+
+    es.onerror = () => {
+      if (!summary) {
+        setError('Connection lost or server unavailable');
+      }
+      setLoading(false);
+      es.close();
+    };
+  };
+
+  const stopCompliance = () => {
+    if (eventSourceRef.current) {
+      eventSourceRef.current.close();
+    }
+    setLoading(false);
+  };
+
+  const passed = results.filter((r) => r.passed).length;
+  const failed = results.filter((r) => !r.passed).length;
+  const completed = results.length;
+
+  return (
+    <div>
+      <h2>Compliance Test Runner</h2>
+      <p className="text-muted mb-4">
+        Run GA4GH SeqCol specification compliance checks against any server.
+        Structure tests validate response format, pagination, and endpoint availability.
+      </p>
+
+      <div className="card mb-4">
+        <div className="card-body">
+          <div className="row align-items-end">
+            <div className="col-md-8">
+              <label htmlFor="targetUrl" className="form-label fw-medium">
+                Target Server URL
+              </label>
+              <input
+                type="text"
+                className="form-control"
+                id="targetUrl"
+                placeholder={`Leave empty to test this server (${API_BASE})`}
+                value={targetUrl}
+                onChange={(e) => setTargetUrl(e.target.value)}
+                onKeyDown={(e) => {
+                  if (e.key === 'Enter' && !loading) runCompliance();
+                }}
+                disabled={loading}
+              />
+            </div>
+            <div className="col-md-4 mt-2 mt-md-0">
+              {loading ? (
+                <button
+                  className="btn btn-outline-danger w-100"
+                  onClick={stopCompliance}
+                >
+                  <span className="spinner-border spinner-border-sm me-2" role="status" />
+                  Stop ({completed}/{total})
+                </button>
+              ) : (
+                <button
+                  className="btn btn-primary w-100"
+                  onClick={runCompliance}
+                >
+                  Run Compliance Tests
+                </button>
+              )}
+            </div>
+          </div>
+        </div>
+      </div>
+
+      {error && (
+        <div className="alert alert-danger" role="alert">
+          <strong>Error:</strong> {error}
+        </div>
+      )}
+
+      {(results.length > 0 || loading) && (
+        <div>
+          <div className="card mb-4">
+            <div className="card-body">
+              <div className="row text-center">
+                <div className="col-md-3">
+                  <div className="fs-4 fw-bold">{total}</div>
+                  <div className="text-muted small">Total</div>
+                </div>
+                <div className="col-md-3">
+                  <div className="fs-4 fw-bold text-success">{passed}</div>
+                  <div className="text-muted small">Passed</div>
+                </div>
+                <div className="col-md-3">
+                  <div className="fs-4 fw-bold text-danger">{failed}</div>
+                  <div className="text-muted small">Failed</div>
+                </div>
+                <div className="col-md-3">
+                  <div className="text-muted small mt-1">
+                    {serverUrl}
+                  </div>
+                  {summary && (
+                    <div className="text-muted small">
+                      {new Date().toLocaleString()}
+                    </div>
+                  )}
+                </div>
+              </div>
+              <div className="mt-3">
+                <div className="progress" style={{ height: '8px' }}>
+                  <div
+                    className="progress-bar bg-success"
+                    style={{
+                      width: `${total > 0 ? (passed / total) * 100 : 0}%`,
+                      transition: 'width 0.3s ease',
+                    }}
+                  />
+                  <div
+                    className="progress-bar bg-danger"
+                    style={{
+                      width: `${total > 0 ? (failed / total) * 100 : 0}%`,
+                      transition: 'width 0.3s ease',
+                    }}
+                  />
+                </div>
+              </div>
+            </div>
+          </div>
+
+          <div className="list-group">
+            {results.map((result, idx) => (
+              <div
+                key={idx}
+                className={`list-group-item d-flex justify-content-between align-items-start ${
+                  result.passed ? '' : 'list-group-item-danger'
+                }`}
+              >
+                <div className="me-auto">
+                  <div className="d-flex align-items-center">
+                    <span
+                      className={`badge rounded-pill me-2 ${
+                        result.passed ? 'bg-success' : 'bg-danger'
+                      }`}
+                    >
+                      {result.passed ? 'PASS' : 'FAIL'}
+                    </span>
+                    <span className="fw-medium">{result.name}</span>
+                  </div>
+                  {result.description && (
+                    <div className="text-muted small mt-1">
+                      {result.description}
+                    </div>
+                  )}
+                  {result.error && (
+                    <div className="text-danger small mt-1">
+                      <code>{result.error}</code>
+                    </div>
+                  )}
+                </div>
+                <span className="badge bg-light text-dark">
+                  {result.duration_ms.toFixed(0)}ms
+                </span>
+              </div>
+            ))}
+            {loading && completed < total && (
+              <div className="list-group-item text-muted d-flex align-items-center">
+                <span className="spinner-border spinner-border-sm me-2" role="status" />
+                Running check {completed + 1} of {total}...
+              </div>
+            )}
+          </div>
+        </div>
+      )}
+    </div>
+  );
+};
diff --git a/frontend/src/features/digest/DigestPage.jsx b/frontend/src/pages/DigestPage.jsx
similarity index 96%
rename from frontend/src/features/digest/DigestPage.jsx
rename to frontend/src/pages/DigestPage.jsx
index 0ea1f59..001a578 100644
--- a/frontend/src/features/digest/DigestPage.jsx
+++ b/frontend/src/pages/DigestPage.jsx
@@ -1,9 +1,9 @@
 import { useState, useRef, useEffect, useCallback } from 'react';
 import { useSearchParams, useNavigate } from 'react-router-dom';
 import toast from 'react-hot-toast';
-import FastaDropzone from './FastaDropzone';
-import SeqColResult from './SeqColResult';
-import './digest.css';
+import FastaDropzone from '../components/digest/FastaDropzone';
+import SeqColResult from '../components/digest/SeqColResult';
+import '../components/digest/digest.css';
 
 const HISTORY_KEY = 'digest-history';
 const MAX_HISTORY = 20;
@@ -56,12 +56,12 @@ function loadFromHistory(digest) {
 
 function createWorker() {
   return new Worker(
-    new URL('./fastaDigestWorker.js', import.meta.url),
+    new URL('../components/digest/fastaDigestWorker.js', import.meta.url),
     { type: 'module' }
   );
 }
 
-export default function DigestPage() {
+export function DigestPage() {
   const [searchParams] = useSearchParams();
   const navigate = useNavigate();
   const [result, setResult] = useState(null);
@@ -142,6 +142,13 @@ export default function DigestPage() {
       }
     };
 
+    worker.onerror = (event) => {
+      event.preventDefault();
+      setError(event.message || 'Worker crashed unexpectedly');
+      setStatus(null);
+      setProgress(null);
+    };
+
     workerRef.current = worker;
     return worker;
   }, []);
diff --git a/frontend/src/pages/HomePage.jsx b/frontend/src/pages/HomePage.jsx
index 7a957ff..85d53ad 100644
--- a/frontend/src/pages/HomePage.jsx
+++ b/frontend/src/pages/HomePage.jsx
@@ -6,6 +6,11 @@ import { AttributeList } from '../components/ObjectLists';
 
 const HomePage = () => {
   const loaderData = useLoaderData();
+
+  if (!Array.isArray(loaderData) || loaderData.length < 3) {
+    return <div className="alert alert-warning">Failed to load homepage data.</div>;
+  }
+
   const collections = loaderData[0];
   const pangenomes = loaderData[1];
   const name_length_pairs = loaderData[2];
@@ -86,7 +91,7 @@ const HomePage = () => {
       </ul>
       <PangenomeExamplesList />
 
-      <h5 className='mt-4'>4. List of name_length_pairs on this server:</h5>
+      <h5 className='mt-4'>5. List of name_length_pairs on this server:</h5>
       <p className='text-muted fs-6'>
         The{' '}
         <span className='font-monospace text-success'>/list/attributes</span>{' '}
diff --git a/frontend/src/pages/PangenomeView.jsx b/frontend/src/pages/PangenomeView.jsx
index 2d55013..146382f 100644
--- a/frontend/src/pages/PangenomeView.jsx
+++ b/frontend/src/pages/PangenomeView.jsx
@@ -11,6 +11,10 @@ const PangenomeView = ({ params }) => {
   const pangenome = useLoaderData();
   const { digest } = useParams();
 
+  if (!Array.isArray(pangenome) || pangenome.length < 3) {
+    return <div className="alert alert-warning">Failed to load pangenome data.</div>;
+  }
+
   let level1 = pangenome[0];
   let level2 = pangenome[1];
   let itemwise = pangenome[2];
diff --git a/frontend/src/pages/SCIM.jsx b/frontend/src/pages/SCIM.jsx
index 7e8a40a..41e9ee1 100644
--- a/frontend/src/pages/SCIM.jsx
+++ b/frontend/src/pages/SCIM.jsx
@@ -2,7 +2,7 @@ import { useEffect, useState } from 'react';
 import { useSearchParams, useLoaderData } from 'react-router-dom';
 import toast from 'react-hot-toast';
 
-import { API_BASE } from '../utilities.jsx';
+import { API_BASE, encodeToBase64, decodeFromBase64 } from '../utilities.jsx';
 import { ComparisonView } from './ComparisonView.jsx';
 
 // Seqcol Comparison Interpretation Module (SCIM)
@@ -30,18 +30,24 @@ const SCIM = () => {
   useEffect(() => {
     const comparisonFromQuery = searchParams.get('val');
     if (comparisonFromQuery) {
-      // decode base64encoded string
-      const decodedComparisonFromQuery = atob(comparisonFromQuery);
-      // prettify the comparison string
-      const prettyComparison = JSON.stringify(
-        JSON.parse(decodedComparisonFromQuery),
-        null,
-        2,
-      );
-      setComparisonStr(prettyComparison);
-
-      const parsedComparison = JSON.parse(decodedComparisonFromQuery);
-      setComparison(parsedComparison);
+      try {
+        // decode base64encoded string
+        const decodedComparisonFromQuery = decodeFromBase64(comparisonFromQuery);
+        // prettify the comparison string
+        const prettyComparison = JSON.stringify(
+          JSON.parse(decodedComparisonFromQuery),
+          null,
+          2,
+        );
+        setComparisonStr(prettyComparison);
+
+        const parsedComparison = JSON.parse(decodedComparisonFromQuery);
+        setComparison(parsedComparison);
+      } catch {
+        toast.error('Invalid comparison URL. The data may be corrupted.');
+        setComparison(null);
+        setComparisonStr('');
+      }
     }
   }, [searchParams]);
 
@@ -79,7 +85,7 @@ const SCIM = () => {
     setComparison(parsedComparison);
 
     // update the query param to base64 encoded string
-    const base64encodedComparison = btoa(comparisonStr);
+    const base64encodedComparison = encodeToBase64(comparisonStr);
     window.history.pushState(
       {},
       '',
@@ -95,9 +101,9 @@ const SCIM = () => {
 
   const loadExample = () => {
     const exampleData =
-      'eyJkaWdlc3RzIjp7ImEiOiJYWmxyY0VHaTZtbG9wWjJ1RDhPYkhrUUIxZDBvRHdLayIsImIiOiJRdlQ1dEFRMEI4Vmt4ZC1xRmZ0bHpFazJReWZQdGdPdiJ9LCJhdHRyaWJ1dGVzIjp7ImFfb25seSI6W10sImJfb25seSI6W10sImFfYW5kX2IiOlsibGVuZ3RocyIsIm5hbWVfbGVuZ3RoX3BhaXJzIiwibmFtZXMiLCJzZXF1ZW5jZXMiLCJzb3J0ZWRfc2VxdWVuY2VzIl19LCJhcnJheV9lbGVtZW50cyI6eyJhIjp7Imxlbmd0aHMiOjMsIm5hbWVfbGVuZ3RoX3BhaXJzIjozLCJuYW1lcyI6Mywic2VxdWVuY2VzIjozLCJzb3J0ZWRfc2VxdWVuY2VzIjozfSwiYiI6eyJsZW5ndGhzIjozLCJuYW1lX2xlbmd0aF9wYWlycyI6MywibmFtZXMiOjMsInNlcXVlbmNlcyI6Mywic29ydGVkX3NlcXVlbmNlcyI6M30sImFfYW5kX2IiOnsibGVuZ3RocyI6MywibmFtZV9sZW5ndGhfcGFpcnMiOjAsIm5hbWVzIjowLCJzZXF1ZW5jZXMiOjMsInNvcnRlZF9zZXF1ZW5jZXMiOjN9LCJhX2FuZF9iX3NhbWVfb3JkZXIiOnsibGVuZ3RocyI6dHJ1ZSwibmFtZV9sZW5ndGhfcGFpcnMiOm51bGwsIm5hbWVzIjpudWxsLCJzZXF1ZW5jZXMiOnRydWUsInNvcnRlZF9zZXF1ZW5jZXMiOnRydWV9fX0=';
+      'eyJkaWdlc3RzIjp7ImEiOiJYWmxyY0VHaTZtbG9wWjJ1RDhPYkhrUUIxZDBvRHdLayIsImIiOiJRdlQ1dEFRMEI4Vmt4ZC1xRmZ0bHpFazJReWZQdGdPdiJ9LCJhdHRyaWJ1dGVzIjp7ImFfb25seSI6W10sImJfb25seSI6W10sImFfYW5kX2IiOlsibGVuZ3RocyIsIm5hbWVfbGVuZ3RoX3BhaXJzIiwibmFtZXMiLCJzZXF1ZW5jZXMiLCJzb3J0ZWRfc2VxdWVuY2VzIl19LCJhcnJheV9lbGVtZW50cyI6eyJhX2NvdW50Ijp7Imxlbmd0aHMiOjMsIm5hbWVfbGVuZ3RoX3BhaXJzIjozLCJuYW1lcyI6Mywic2VxdWVuY2VzIjozLCJzb3J0ZWRfc2VxdWVuY2VzIjozfSwiYl9jb3VudCI6eyJsZW5ndGhzIjozLCJuYW1lX2xlbmd0aF9wYWlycyI6MywibmFtZXMiOjMsInNlcXVlbmNlcyI6Mywic29ydGVkX3NlcXVlbmNlcyI6M30sImFfYW5kX2JfY291bnQiOnsibGVuZ3RocyI6MywibmFtZV9sZW5ndGhfcGFpcnMiOjAsIm5hbWVzIjowLCJzZXF1ZW5jZXMiOjMsInNvcnRlZF9zZXF1ZW5jZXMiOjN9LCJhX2FuZF9iX3NhbWVfb3JkZXIiOnsibGVuZ3RocyI6dHJ1ZSwibmFtZV9sZW5ndGhfcGFpcnMiOm51bGwsIm5hbWVzIjpudWxsLCJzZXF1ZW5jZXMiOnRydWUsInNvcnRlZF9zZXF1ZW5jZXMiOnRydWV9fX0=';
 
-    const decodedComparison = atob(exampleData);
+    const decodedComparison = decodeFromBase64(exampleData);
     const prettyComparison = JSON.stringify(
       JSON.parse(decodedComparison),
       null,
diff --git a/frontend/src/pages/SCOM.jsx b/frontend/src/pages/SCOM.jsx
index e859f96..b27d747 100644
--- a/frontend/src/pages/SCOM.jsx
+++ b/frontend/src/pages/SCOM.jsx
@@ -1,4 +1,4 @@
-import { useEffect, useState } from 'react';
+import { useCallback, useEffect, useState } from 'react';
 import { encodeComparison } from '../utilities.jsx';
 import { useLoaderData, useNavigate, useSearchParams } from 'react-router-dom';
 import toast from 'react-hot-toast';
@@ -11,7 +11,6 @@ import {
 } from '../services/fetchData.jsx';
 import { MultiMetricHeatmapPlot } from '../components/MultiMetricHeatmapPlot.jsx';
 import { StripPlot } from '../components/StripPlot.jsx';
-// import { NetworkGraph } from '../components/NetworkGraph.jsx';
 
 import { useSimilaritiesStore } from '../stores/similarities';
 
@@ -20,7 +19,7 @@ const SCOM = () => {
   const navigate = useNavigate();
   const [searchParams] = useSearchParams();
   const loaderData = useLoaderData();
-  const collections = loaderData[0];
+  const collections = Array.isArray(loaderData) && loaderData.length >= 1 ? loaderData[0] : null;
 
   const {
     selectedCollectionsIndex,
@@ -38,19 +37,17 @@ const SCOM = () => {
     getAllCollections,
     initializeSelectedCollections,
     sortBy,
-    setSortBy,
     sortAscending,
-    setSortAscending,
-    sortSimilarities,
+    sortByColumn,
+    resetSort,
     species,
-    setSpecies
+    setSpecies,
+    error: storeError,
+    setError: setStoreError,
   } = useSimilaritiesStore();
 
   const [stripJitter, setStripJitter] = useState('none');
   const [stripOrientation, setStripOrientation] = useState('horizontal');
-  const [heatmapMetric, setHeatmapMetric] = useState('sequences');
-  // const [networkMetric, setNetworkMetric] = useState('sequences');
-  // const [networkThreshold, setNetworkThreshold] = useState(0.8);
   const [relationship, setRelationship] = useState('oneToMany');
   const [isLoading, setIsLoading] = useState(false);
   const [pendingPrefill, setPendingPrefill] = useState(null);
@@ -270,15 +267,8 @@ const SCOM = () => {
     ]
   }
 
-  // const handleSelectCollection = (index) => {
-  //   setSelectedCollectionsIndex((prev) => {
-  //     const newArray = [...prev];
-  //     newArray[index] = !newArray[index];
-  //     return newArray;
-  //   });
-  // };
-
   const handleNavigateSCIM = async (similarityRow) => {
+    setStoreError(null);
     try {
       let comparison;
       if (similarityRow.custom) {
@@ -294,8 +284,8 @@ const SCOM = () => {
       }
       const encodedComparison = encodeComparison(comparison);
       navigate(`/scim?val=${encodedComparison}`);
-      // window.scrollTo(0, 0);
     } catch (error) {
+      setStoreError('Comparison could not be made.');
       toast.error(
         <span>
           <strong>Error:</strong> Comparison could not be made.
@@ -304,29 +294,12 @@ const SCOM = () => {
     }
   };
 
-  // const handleRelationshipChange = (newRelationship) => {
-  //   if (
-  //     newRelationship === 'oneToMany' &&
-  //     relationship === 'manyToMany' &&
-  //     selectedCollections.length > 1
-  //   ) {
-  //     setCustomCollections([]);
-  //     setSelectedCollectionsIndex(collections.results.map(() => false));
-  //     setCustomCount(1);
-  //   }
-  //   setSelectedCollectionsIndex((prev) =>
-  //     prev.map((item, index) =>
-  //       index < collections.results.length ? false : item,
-  //     ),
-  //   );
-  //   setStripJitter('none');
-  //   setRelationship(newRelationship);
-  // };
-
-  const handleAddCustomCollection = async (data, name) => {
+  const handleAddCustomCollection = useCallback(async (data, name) => {
+    setStoreError(null);
     try {
       data = JSON.parse(data);
     } catch (e) {
+      setStoreError('Invalid JSON format. Please check your input.');
       toast.error(
         <span>
           <strong>Error:</strong> Invalid JSON format. Please check your input.
@@ -335,23 +308,11 @@ const SCOM = () => {
       return;
     }
 
-    // if (relationship === 'manyToMany' && allCollections.includes(name)) {
-    //   toast.error(
-    //     <span>
-    //       <strong>Error:</strong> Collection with name already exists. Please
-    //       try another name.
-    //     </span>,
-    //   );
-    //   return;
-    // }
-
     try {
       setIsLoading(true);
       const result = await fetchSimilaritiesJSON(data, species);
       if (result?.similarities) {
-        // const customDigest = 'query_seqcol' + (customCount > 1 ? customCount : '');
         const customDigest = 'Input Seqcol';
-        //  console.log(result.similarities)
         const flattenedSimilarities = result.similarities.flatMap((s) =>
           s.human_readable_names.map((humanReadableName) => ({
             selectedDigest: name !== '' ? name : customDigest,
@@ -396,6 +357,7 @@ const SCOM = () => {
     } catch (e) {
       console.error('SCOM submission error:', e);
       console.log('Data that was submitted:', data);
+      setStoreError('Collection is invalid. Please check your input.');
       toast.error(
         <span>
           <strong>Error:</strong> Collection is invalid. Please check your
@@ -404,10 +366,10 @@ const SCOM = () => {
       );
       return;
     } finally {
-      setSortBy(null);
+      resetSort();
       setIsLoading(false);
     }
-  };
+  }, [species, relationship, collections, customCollections, customCount, setCustomCollections, setSelectedCollectionsIndex, setCustomCount, resetSort, setIsLoading, setStoreError]);
 
   // Auto-submit prefilled data (wait for collections to be ready)
   useEffect(() => {
@@ -415,7 +377,7 @@ const SCOM = () => {
       handleAddCustomCollection(JSON.stringify(pendingPrefill.json), pendingPrefill.name || '');
       setPendingPrefill(null);
     }
-  }, [pendingPrefill, isLoading, collections]);
+  }, [pendingPrefill, isLoading, collections, handleAddCustomCollection]);
 
   useEffect(() => {
     const fetchAllSimilarities = async () => {
@@ -424,41 +386,10 @@ const SCOM = () => {
       for (let i = 0; i < selectedCollectionsIndex.length; i++) {
         if (!selectedCollectionsIndex[i]) continue;
 
-        // const collection = allCollections[i];
-        
-
-        if (i < collections.results.length && relationship === 'manyToMany') {
-          // // server collection
-          // try {
-          //   const result = await fetchSimilarities(collection);
-          //   if (result?.similarities) {
-          //     const flattenedSimilarities = result.similarities.map((s) => ({
-          //       selectedDigest: collection,
-          //       comparedDigest: s.digest,
-          //       comparedAlias: s.human_readable_name,
-          //       lengths: s.similarities.lengths,
-          //       name_length_pairs: s.similarities.name_length_pairs,
-          //       names: s.similarities.names,
-          //       sequences: s.similarities.sequences,
-          //       sorted_sequences: s.similarities.sorted_sequences,
-          //       custom: false,
-          //       raw: null,
-          //     }));
-          //     allSimilarities.push(...flattenedSimilarities);
-          //   }
-          // } catch (error) {
-          //   console.error(
-          //     `Error fetching similarities for ${collection}:`,
-          //     error,
-          //   );
-          // }
-        } else {
-          // custom collection
-          const customIndex = i - collections.results.length;
-          const customCollection = customCollections[customIndex];
-          if (customCollection) {
-            allSimilarities.push(...customCollection.similarities);
-          }
+        const customIndex = i - collections.results.length;
+        const customCollection = customCollections[customIndex];
+        if (customCollection) {
+          allSimilarities.push(...customCollection.similarities);
         }
       }
 
@@ -469,40 +400,19 @@ const SCOM = () => {
   }, [selectedCollectionsIndex, customCollections]);
 
   const handleSortTable = (column) => {
-    if (sortBy === column) {
-      setSortAscending(!sortAscending)
-      sortSimilarities()
-    } else {
-      setSortBy(column)
-      setSortAscending(false)
-      sortSimilarities()
-    } 
+    sortByColumn(column);
   };
 
+  if (!collections) {
+    return <div className="alert alert-warning">Failed to load collection data.</div>;
+  }
+
   return (
     <div className='mb-5'>
       <div className='row'>
         <div className='col-12'>
           <div className='d-flex align-items-end justify-content-between'>
             <h4 className='fw-light'>Seqcol Comparison Overview Module (SCOM)</h4>
-            {/* <ul className='nav nav-pills border border-2 border-light-subtle rounded rounded-3 bg-body-secondary'>
-              <li className='nav-item pointer-cursor'>
-                <span
-                  className={`nav-link px-3 py-1 me-1 tiny cursor-pointer fw-medium ${relationship === 'oneToMany' && 'active'}`}
-                  onClick={() => handleRelationshipChange('oneToMany')}
-                >
-                  One-to-Many
-                </span>
-              </li>
-              <li className='nav-item pointer-cursor'>
-                <span
-                  className={`nav-link px-3 py-1 me-1 tiny cursor-pointer fw-medium ${relationship === 'manyToMany' && 'active'}`}
-                  onClick={() => handleRelationshipChange('manyToMany')}
-                >
-                  Many-to-Many
-                </span>
-              </li>
-            </ul> */}
           </div>
 
           <div className='mt-2 mb-0 text-muted'>
@@ -525,11 +435,6 @@ const SCOM = () => {
               </li>
             </ol>
           </div>
-          {/* <p className='mb-2 text-muted'>
-            If you would like to view metrics for multiple sequence collections
-            at once, use the "Many-to-Many" tab.
-          </p> */}
-
           <div className='row mt-4'>
             <div
               className={`${relationship === 'manyToMany' ? 'col-6' : 'col-12'}`}
@@ -595,101 +500,30 @@ const SCOM = () => {
                   value={customCollectionJSON}
                   placeholder='Paste output from `refget fasta seqcol yourfasta.fa` here.'
                   className='form-control tiny border-0 rounded-0 rounded-bottom z-active'
-                  // style={{ maxHeight: 'calc(200px - 32.333333px)' }}
                   rows='12'
                 />
               </div>
             </div>
 
-            {/* {relationship === 'manyToMany' && (
-              <div className='col-6'>
-                <div className='card'>
-                  <div className='card-header tiny d-flex justify-content-between'>
-                    <span className='fw-bold'>
-                      Selected Sequence Collections
-                    </span>
-                    <button
-                      className='btn btn-outline-dark btn-xs ms-auto'
-                      style={{
-                        pointerEvents: 'none',
-                        borderColor: 'var(--bs-border-color-translucent)',
-                      }}
-                    >
-                      {selectedCollections.length} Selected
-                    </button>
-                    <button
-                      className='btn btn-secondary btn-xs shadow-sm ms-1'
-                      onClick={() =>
-                        setSelectedCollectionsIndex((prev) =>
-                          prev.map(() => true),
-                        )
-                      }
-                    >
-                      Select All
-                    </button>
-                    <button
-                      className='btn btn-danger btn-xs shadow-sm ms-1'
-                      onClick={() =>
-                        setSelectedCollectionsIndex((prev) =>
-                          prev.map(() => false),
-                        )
-                      }
-                    >
-                      Reset
-                    </button>
-                  </div>
-                  <ul
-                    className='list-group list-group-flush overflow-auto'
-                    style={{ maxHeight: '200px' }}
-                  >
-                    {allCollections &&
-                      allCollections.map((collection, index) => (
-                        <li className='list-group-item pb-0 tiny' key={index}>
-                          <div className='d-flex align-items-between'>
-                            <div className='form-check form-switch'>
-                              <input
-                                className='form-check-input cursor-pointer me-3'
-                                type='checkbox'
-                                id={'collection_' + index}
-                                onChange={() => handleSelectCollection(index)}
-                                checked={selectedCollectionsIndex[index]}
-                              />
-                              <label
-                                className='form-check-label cursor-pointer'
-                                htmlFor={'collection_' + index}
-                              >
-                                {collection}
-                              </label>
-                            </div>
-                            {index >= collections.results.length ? (
-                              <span
-                                className='ms-auto bi bi-trash-fill text-danger cursor-pointer'
-                                onClick={() => {
-                                  const customIndex =
-                                    index - collections.results.length;
-                                  setCustomCollections((prev) =>
-                                    prev.filter((_, i) => i !== customIndex),
-                                  );
-                                  setSelectedCollectionsIndex((prev) =>
-                                    prev.filter((_, i) => i !== index),
-                                  );
-                                  toast.success('Custom collection removed.');
-                                }}
-                              />
-                            ) : (
-                              <span className='ms-auto bi bi-cloud text-body-tertiary' />
-                            )}
-                          </div>
-                        </li>
-                      ))}
-                  </ul>
-                </div>
-              </div>
-            )} */}
           </div>
         </div>
       </div>
 
+      {storeError && (
+        <div className='alert alert-danger mt-3 d-flex justify-content-between align-items-center' role='alert'>
+          <div>
+            <i className='bi bi-exclamation-triangle me-2'></i>
+            <strong>Error:</strong> {storeError}
+          </div>
+          <button
+            className='btn btn-sm btn-outline-danger'
+            onClick={() => setStoreError(null)}
+          >
+            Dismiss
+          </button>
+        </div>
+      )}
+
       {(similarities && !isLoading) ? (
         <div className='row'>
           <div className='col-12'>
@@ -716,7 +550,6 @@ const SCOM = () => {
                 {relationship === 'manyToMany' && (
                   <option value='uniform'>Uniformly Distributed Points</option>
                 )}
-                {/* {relationship === 'manyToMany' && <option value='normal'>Normally Distributed Points</option>} */}
                 {relationship === 'oneToMany' && (
                   <option value='bars'>Bars</option>
                 )}
@@ -735,74 +568,9 @@ const SCOM = () => {
 
             <div className='d-flex align-items-end justify-content-between mt-5 mb-2'>
               <h5 className='fw-light'>Heatmap</h5>
-              {/* <select
-                className='form-select form-select-sm'
-                style={{width: '20%'}}
-                aria-label='heatmap-select'
-                value={heatmapMetric}
-                onChange={(e) => setHeatmapMetric(e.target.value)}
-              >
-                <option value='lengths'>Lengths</option>
-                <option value='name_length_pairs'>Name Length Pairs</option>
-                <option value='names'>Names</option>
-                <option value='sequences'>Sequences</option>
-                <option value='sorted_sequences'>Sorted Sequences</option>
-              </select> */}
             </div>
             <MultiMetricHeatmapPlot similarities={similarities.map(({ raw, ...rest }) => rest)} />
 
-            {/* {relationship === 'manyToMany' && (
-              <>
-                <div className='d-flex align-items-end justify-content-between mt-5 mb-2'>
-                  <h5 className='fw-light'>Network Graph</h5>
-                  <div className='input-group input-group-sm ms-auto w-25'>
-                    <span className='input-group-text'>Threshold</span>
-                    <input
-                      type='range'
-                      min='0'
-                      max='1'
-                      step='0.01'
-                      value={networkThreshold}
-                      onChange={(e) =>
-                        setNetworkThreshold(Number(e.target.value))
-                      }
-                      className='form-control form-range'
-                      style={{ height: 'inherit' }}
-                    />
-                    <input
-                      type='number'
-                      min='0'
-                      max='1'
-                      step='0.01'
-                      value={networkThreshold}
-                      onChange={(e) =>
-                        setNetworkThreshold(Number(e.target.value))
-                      }
-                      className='form-control'
-                      style={{ maxWidth: '70px' }}
-                    />
-                  </div>
-                  <select
-                    className='form-select form-select-sm w-25 ms-2'
-                    aria-label='network-select'
-                    value={networkMetric}
-                    onChange={(e) => setNetworkMetric(e.target.value)}
-                  >
-                    <option value='lengths'>Lengths</option>
-                    <option value='name_length_pairs'>Name Length Pairs</option>
-                    <option value='names'>Names</option>
-                    <option value='sequences'>Sequences</option>
-                    <option value='sorted_sequences'>Sorted Sequences</option>
-                  </select>
-                </div>
-                <NetworkGraph
-                  similarities={similarities.map(({ raw, ...rest }) => rest)}
-                  metric={networkMetric}
-                  threshold={networkThreshold}
-                />
-              </>
-            )} */}
-
             <div className='d-flex align-items-end justify-content-between'>
               <h5 className='fw-light mt-5'>Seqcol Comparison Summary Table</h5>
               <p className='mb-2 text-muted'>
@@ -813,7 +581,6 @@ const SCOM = () => {
               <table className='table table-striped table-hover table-rounded'>
                 <thead>
                   <tr>
-                    {/* <th className='cursor-pointer' onClick={() => handleSortTable('selectedDigest')}>Seqcol A <i className={sortBy === 'selectedDigest' ? (sortAscending ? 'bi bi-sort-up' : 'bi bi-sort-down') : 'bi bi-filter'} /></th> */}
                     <th className='cursor-pointer text-nowrap' onClick={() => handleSortTable('comparedAlias')}>Compared Seqcol <i className={sortBy === 'comparedAlias' ? (sortAscending ? 'bi bi-sort-up' : 'bi bi-sort-down') : 'bi bi-filter'} /></th>
                     <th className='cursor-pointer text-nowrap' onClick={() => handleSortTable('comparedDigest')}>Compared Seqcol Digest <i className={sortBy === 'comparedDigest' ? (sortAscending ? 'bi bi-sort-up' : 'bi bi-sort-down') : 'bi bi-filter'} /></th>
                     <th className='cursor-pointer text-nowrap' onClick={() => handleSortTable('lengths')}>Lengths <i className={sortBy === 'lengths' ? (sortAscending ? 'bi bi-sort-up' : 'bi bi-sort-down') : 'bi bi-filter'} /></th>
@@ -830,7 +597,6 @@ const SCOM = () => {
                       className='cursor-pointer'
                       onClick={() => handleNavigateSCIM(row)}
                     >
-                      {/* <td>{row.selectedDigest}</td> */}
                       <td>{row.comparedAlias ? row.comparedAlias : row.comparedDigest}</td>
                       <td>{row.comparedDigest}</td>
                       <td>{Number.isInteger(row.lengths) ? row.lengths : row.lengths.toFixed(3)}</td>
diff --git a/frontend/src/services/fetchData.jsx b/frontend/src/services/fetchData.jsx
index 9eac0b5..ac6fbab 100644
--- a/frontend/src/services/fetchData.jsx
+++ b/frontend/src/services/fetchData.jsx
@@ -1,52 +1,88 @@
 import { API_BASE } from '../utilities.jsx';
 
+export class AppError extends Error {
+  constructor(message, { status, isNotFound, digest1, digest2 } = {}) {
+    super(message);
+    this.name = 'AppError';
+    this.status = status ?? null;
+    this.isNotFound = isNotFound ?? false;
+    this.digest1 = digest1 ?? null;
+    this.digest2 = digest2 ?? null;
+  }
+}
+
+const checkResponse = async (response, url) => {
+  if (!response.ok) {
+    let errorDetail = response.statusText;
+    try {
+      const errorData = await response.json();
+      errorDetail = errorData.detail || errorData.message || errorData.error || errorDetail;
+    } catch {
+      try {
+        errorDetail = await response.text();
+        if (errorDetail.length > 200) {
+          errorDetail = errorDetail.substring(0, 200) + '...';
+        }
+      } catch {
+        // Fallback to status text if body cannot be read
+      }
+    }
+    throw new Error(`HTTP ${response.status} from ${url}: ${errorDetail}`);
+  }
+  return response;
+};
+
 export const fetchServiceInfo = async () => {
-  const response = await fetch(`${API_BASE}/service-info`);
+  const url = `${API_BASE}/service-info`;
+  const response = await fetch(url);
+  await checkResponse(response, url);
   return response.json();
 };
 
-export const fetchPangenomeLevels = async (
-  digest,
-  level = '2',
-  collated = true,
-) => {
-  const url = `${API_BASE}/pangenome/${digest}?level=1`;
-  const url2 = `${API_BASE}/pangenome/${digest}?level=2`;
-  const urlItemwise = `${API_BASE}/pangenome/${digest}?collated=false`;
-  let resps = [
-    fetch(url).then((response) => response.json()),
-    fetch(url2).then((response) => response.json()),
-    fetch(urlItemwise).then((response) => response.json()),
+export const fetchPangenomeLevels = async (digest) => {
+  const urls = [
+    `${API_BASE}/pangenome/${digest}?level=1`,
+    `${API_BASE}/pangenome/${digest}?level=2`,
+    `${API_BASE}/pangenome/${digest}?collated=false`,
   ];
 
-  return Promise.all(resps);
+  return Promise.all(
+    urls.map(async (url) => {
+      const response = await fetch(url);
+      await checkResponse(response, url);
+      return response.json();
+    }),
+  );
 };
 
 export const fetchSeqColList = async () => {
-  const url = `${API_BASE}/list/collection?page_size=10&page=0`;
-  const url2 = `${API_BASE}/list/pangenome?page_size=5`;
-  const url3 = `${API_BASE}/list/attributes/name_length_pairs?page_size=5`;
-  let resps = [
-    fetch(url).then((response) => response.json()),
-    fetch(url2).then((response) => response.json()),
-    fetch(url3).then((response) => response.json()),
+  const urls = [
+    `${API_BASE}/list/collection?page_size=10&page=0`,
+    `${API_BASE}/list/pangenome?page_size=5`,
+    `${API_BASE}/list/attributes/name_length_pairs?page_size=5`,
   ];
-  return Promise.all(resps);
+
+  return Promise.all(
+    urls.map(async (url) => {
+      const response = await fetch(url);
+      await checkResponse(response, url);
+      return response.json();
+    }),
+  );
 };
 
 export const fetchAllSeqCols = async () => {
-  const url = `${API_BASE}/list/collection?page_size=1000&page=0`;
-  let resps = [fetch(url).then((response) => response.json())];
-  return Promise.all(resps);
-};
+  const urls = [
+    `${API_BASE}/list/collection?page_size=1000&page=0`,
+  ];
 
-export const fetchSeqColDetails = async (
-  digest,
-  level = '2',
-  collated = true,
-) => {
-  const url = `${API_BASE}/collection/${digest}?level=${level}&collated=${collated}`;
-  return fetch(url).then((response) => response.json());
+  return Promise.all(
+    urls.map(async (url) => {
+      const response = await fetch(url);
+      await checkResponse(response, url);
+      return response.json();
+    }),
+  );
 };
 
 export const fetchCollectionLevels = async (digest) => {
@@ -56,20 +92,13 @@ export const fetchCollectionLevels = async (digest) => {
     `${API_BASE}/collection/${digest}?collated=false`,
   ];
 
-  const responses = await Promise.all(
-    urls.map((url) =>
-      fetch(url).then((response) => {
-        if (!response.ok) {
-          throw new Error(
-            `Error fetching data from ${url}: ${response.statusText}`,
-          );
-        }
-        return response.json();
-      }),
-    ),
+  return Promise.all(
+    urls.map(async (url) => {
+      const response = await fetch(url);
+      await checkResponse(response, url);
+      return response.json();
+    }),
   );
-
-  return responses;
 };
 
 export const fetchComparison = async (digest1, digest2) => {
@@ -77,55 +106,67 @@ export const fetchComparison = async (digest1, digest2) => {
   const response = await fetch(url);
   if (!response.ok) {
     if (response.status === 404) {
-      const err = new Error('Collection not found');
-      err.digest1 = digest1;
-      err.digest2 = digest2;
-      err.isNotFound = true;
-      throw err;
+      throw new AppError('Collection not found', {
+        status: 404,
+        isNotFound: true,
+        digest1,
+        digest2,
+      });
     }
-    throw new Error(`Comparison failed: ${response.status} ${response.statusText}`);
+    await checkResponse(response, url);
   }
   return response.json();
 };
 
 export const fetchComparisonJSON = async (data, digest) => {
   const url = `${API_BASE}/comparison/${digest}`;
-  return fetch(url, {
+  const response = await fetch(url, {
     method: 'POST',
     headers: {
       'Content-Type': 'application/json',
     },
     body: JSON.stringify(data),
-  }).then((response) => response.json());
+  });
+  await checkResponse(response, url);
+  return response.json();
 };
 
 export const fetchAttribute = async (attribute, digest) => {
-  const url = `${API_BASE}/list/collection?${attribute}=${digest}`;
-  const url2 = `${API_BASE}/attribute/collection/${attribute}/${digest}`;
-  let resps = [
-    fetch(url).then((response) => response.json()),
-    fetch(url2).then((response) => response.json()),
+  const urls = [
+    `${API_BASE}/list/collection?${attribute}=${digest}`,
+    `${API_BASE}/attribute/collection/${attribute}/${digest}`,
   ];
-  return Promise.all(resps);
+
+  return Promise.all(
+    urls.map(async (url) => {
+      const response = await fetch(url);
+      await checkResponse(response, url);
+      return response.json();
+    }),
+  );
 };
 
 export const fetchSimilarities = async (digest) => {
   const url = `${API_BASE}/similarities/${digest}?page_size=60`;
-  return fetch(url, {
+  const response = await fetch(url, {
     method: 'POST',
     headers: {
       'Content-Type': 'application/json',
     },
-  }).then((response) => response.json());
+  });
+  await checkResponse(response, url);
+  return response.json();
 };
 
 export const fetchSimilaritiesJSON = async (data, species) => {
   const url = `${API_BASE}/similarities/?species=${species}&page_size=60`;
-  return fetch(url, {
+  const response = await fetch(url, {
     method: 'POST',
     headers: {
       'Content-Type': 'application/json',
     },
     body: JSON.stringify(data),
-  }).then((response) => response.json());
+  });
+  await checkResponse(response, url);
+  return response.json();
 };
diff --git a/frontend/src/stores/similarities.js b/frontend/src/stores/similarities.js
index 5c854c2..6c70663 100644
--- a/frontend/src/stores/similarities.js
+++ b/frontend/src/stores/similarities.js
@@ -7,33 +7,41 @@ export const useSimilaritiesStore = create((set, get) => ({
   customCollectionJSON: '',
   customCount: 1,
   similarities: null,
+  error: null,
   sortBy: null,
   sortAscending: false,
   species: 'human',
 
-  setSortBy: (value) => set({ sortBy: value }),
-  setSortAscending: (value) => set({ sortAscending: value }),
   setSpecies: (value) => set({ species: value }),
+  setError: (value) => set({ error: value }),
 
-  sortSimilarities: () => {
+  resetSort: () => set({ sortBy: null, sortAscending: false }),
+
+  sortByColumn: (column) => {
     const { similarities, sortBy, sortAscending } = get();
-    
-    if (!similarities || !sortBy) return;
-    
-    const sampleValue = similarities.find(item => item[sortBy] != null)?.[sortBy];
-    
+
+    const newSortBy = column;
+    const newSortAscending = sortBy === column ? !sortAscending : false;
+
+    if (!similarities) {
+      set({ sortBy: newSortBy, sortAscending: newSortAscending });
+      return;
+    }
+
+    const sampleValue = similarities.find(item => item[newSortBy] != null)?.[newSortBy];
+
     const sorted = [...similarities];
-    
+
     if (typeof sampleValue === 'number') {
-      sorted.sort((a, b) => sortAscending ? a[sortBy] - b[sortBy] : b[sortBy] - a[sortBy]);
+      sorted.sort((a, b) => newSortAscending ? a[newSortBy] - b[newSortBy] : b[newSortBy] - a[newSortBy]);
     } else {
-      sorted.sort((a, b) => sortAscending 
-        ? String(a[sortBy]).localeCompare(String(b[sortBy]))
-        : String(b[sortBy]).localeCompare(String(a[sortBy]))
+      sorted.sort((a, b) => newSortAscending
+        ? String(a[newSortBy]).localeCompare(String(b[newSortBy]))
+        : String(b[newSortBy]).localeCompare(String(a[newSortBy]))
       );
     }
-    
-    set({ similarities: sorted });
+
+    set({ sortBy: newSortBy, sortAscending: newSortAscending, similarities: sorted });
   },
 
   setSelectedCollectionsIndex: (value) => {
@@ -68,25 +76,26 @@ export const useSimilaritiesStore = create((set, get) => ({
 
   setSimilarities: (value) => {
     const { sortBy, sortAscending } = get();
-    
-    if (!sortBy) {
+
+    if (!sortBy || !value) {
       set({ similarities: value });
       return;
     }
 
     const sampleValue = value.find(item => item[sortBy] != null)?.[sortBy];
 
+    const sorted = [...value];
+
     if (typeof sampleValue === 'number') {
-      set({ similarities: sortAscending
-        ? value.sort((a, b) => a[sortBy] - b[sortBy]) 
-        : value.sort((a, b) => b[sortBy] - a[sortBy]) 
-      });
+      sorted.sort((a, b) => sortAscending ? a[sortBy] - b[sortBy] : b[sortBy] - a[sortBy]);
     } else {
-      set({ similarities: sortAscending
-        ? value.sort((a, b) => a[sortBy].localeCompare(b[sortBy])) 
-        : value.sort((a, b) => b[sortBy].localeCompare(a[sortBy])) 
-      });
+      sorted.sort((a, b) => sortAscending
+        ? String(a[sortBy]).localeCompare(String(b[sortBy]))
+        : String(b[sortBy]).localeCompare(String(a[sortBy]))
+      );
     }
+
+    set({ similarities: sorted });
   },
 
   getAllCollections: (collections) => {
diff --git a/frontend/src/utilities.jsx b/frontend/src/utilities.jsx
index c5f2ecd..d137471 100644
--- a/frontend/src/utilities.jsx
+++ b/frontend/src/utilities.jsx
@@ -5,13 +5,29 @@ import copyToClipboardIcon from './assets/copy_to_clipboard.svg';
 import barcodeIcon from './assets/barcode.svg';
 
 const copyToClipboard = async (text) => {
-  toast.success('Digest copied!');
-  return await navigator.clipboard.writeText(text);
+  try {
+    await navigator.clipboard.writeText(text);
+    toast.success('Digest copied!');
+  } catch (error) {
+    toast.error('Failed to copy to clipboard');
+  }
 };
 
 const snakeToTitle = (str) =>
   str.replace(/_/g, ' ').replace(/\b\w/g, (char) => char.toUpperCase());
 
+// Unicode-safe base64 encoding
+// Handles all Unicode characters including non-ASCII sequences
+const encodeToBase64 = (str) => {
+  return btoa(unescape(encodeURIComponent(str)));
+};
+
+// Unicode-safe base64 decoding
+// Handles all Unicode characters including non-ASCII sequences
+const decodeFromBase64 = (encoded) => {
+  return decodeURIComponent(escape(atob(encoded)));
+};
+
 const encodeComparison = (input) => {
   let jsonString;
 
@@ -28,7 +44,7 @@ const encodeComparison = (input) => {
     throw new Error('Input must be an object or valid JSON string');
   }
 
-  return btoa(jsonString);
+  return encodeToBase64(jsonString);
 };
 
 export {
@@ -38,4 +54,6 @@ export {
   copyToClipboardIcon,
   snakeToTitle,
   encodeComparison,
+  encodeToBase64,
+  decodeFromBase64,
 };
diff --git a/refget/__init__.py b/refget/__init__.py
index 8129a43..e739c06 100644
--- a/refget/__init__.py
+++ b/refget/__init__.py
@@ -15,6 +15,8 @@
 from .const import GTARS_INSTALLED
 from .utils import canonical_str
 from .store import RefgetStore, StorageMode, digest_fasta, compute_fai, digest_sequence, SequenceCollection
+from .compliance import run_compliance
+from .clients import SequenceCollectionClient
 
 __all__ = [
     "__version__",
@@ -27,4 +29,6 @@
     "compute_fai",
     "digest_sequence",
     "SequenceCollection",
+    "run_compliance",
+    "SequenceCollectionClient",
 ]
diff --git a/refget/_version.py b/refget/_version.py
index 1f4c4d4..ae6db5f 100644
--- a/refget/_version.py
+++ b/refget/_version.py
@@ -1 +1 @@
-__version__ = "0.10.1"
+__version__ = "0.11.0"
diff --git a/refget/compliance.py b/refget/compliance.py
new file mode 100644
index 0000000..7020d9e
--- /dev/null
+++ b/refget/compliance.py
@@ -0,0 +1,496 @@
+"""
+GA4GH SeqCol API Compliance Suite.
+
+This is THE canonical compliance suite. It can be run two ways:
+1. Via pytest: tests/api/test_compliance.py wraps these checks
+2. Via web UI: /compliance/stream endpoint streams results in real-time
+
+All check functions take an api_root URL and raise AssertionError on failure.
+The runner functions execute checks and return structured results.
+
+Test data is loaded from test_fasta/test_fasta_digests.json and
+tests/api/comparison/ fixture files relative to the repository root.
+"""
+
+import json
+import logging
+import time
+from dataclasses import dataclass, field, asdict
+from datetime import datetime, timezone
+from pathlib import Path
+
+import requests
+
+_LOGGER = logging.getLogger(__name__)
+
+COMPLIANCE_TIMEOUT = 3  # seconds per request
+
+# ============================================================
+# Test data -- loaded from repository fixtures
+# ============================================================
+
+REPO_ROOT = Path(__file__).parent.parent
+_DIGESTS_FILE = REPO_ROOT / "test_fasta" / "test_fasta_digests.json"
+_COMPARISON_DIR = REPO_ROOT / "tests" / "api" / "comparison"
+
+# Load digest test data
+with open(_DIGESTS_FILE) as _f:
+    DIGEST_DATA = json.load(_f)
+
+# Convert to list of (name, bundle) tuples for iteration
+DIGEST_TESTS = [(name, bundle) for name, bundle in DIGEST_DATA.items()]
+
+# Comparison fixture files (base.fa vs each other file)
+COMPARISON_FILES = [
+    _COMPARISON_DIR / "compare_base.fa_subset.fa.json",
+    _COMPARISON_DIR / "compare_base.fa_different_names.fa.json",
+    _COMPARISON_DIR / "compare_base.fa_different_order.fa.json",
+    _COMPARISON_DIR / "compare_base.fa_pair_swap.fa.json",
+    _COMPARISON_DIR / "compare_base.fa_swap_wo_coords.fa.json",
+]
+
+# Load comparison fixtures
+COMPARISON_FIXTURES = {}
+for _f in COMPARISON_FILES:
+    with open(_f) as _fp:
+        COMPARISON_FIXTURES[_f.name] = json.load(_fp)
+
+
+# ============================================================
+# Result types
+# ============================================================
+
+
+@dataclass
+class CheckResult:
+    """Result of a single compliance check."""
+
+    name: str
+    passed: bool
+    duration_ms: float
+    description: str | None = None
+    message: str | None = None
+    error: str | None = None
+
+
+@dataclass
+class ComplianceReport:
+    """Full compliance report for a server."""
+
+    server_url: str
+    timestamp: str
+    total: int = 0
+    passed: int = 0
+    failed: int = 0
+    errors: int = 0
+    results: list[dict] = field(default_factory=list)
+
+    def to_dict(self) -> dict:
+        return asdict(self)
+
+
+def _timed_check(name: str, func, *args, **kwargs) -> CheckResult:
+    """Run a check function and capture timing and errors."""
+    description = (func.__doc__ or "").strip().split("\n")[0] or None
+    start = time.monotonic()
+    try:
+        func(*args, **kwargs)
+        elapsed = (time.monotonic() - start) * 1000
+        return CheckResult(name=name, passed=True, duration_ms=round(elapsed, 2), description=description)
+    except AssertionError as e:
+        elapsed = (time.monotonic() - start) * 1000
+        return CheckResult(
+            name=name, passed=False, duration_ms=round(elapsed, 2), description=description, error=str(e)
+        )
+    except requests.exceptions.RequestException as e:
+        elapsed = (time.monotonic() - start) * 1000
+        return CheckResult(
+            name=name,
+            passed=False,
+            duration_ms=round(elapsed, 2),
+            description=description,
+            error=f"Connection error: {e}",
+        )
+    except Exception as e:
+        elapsed = (time.monotonic() - start) * 1000
+        return CheckResult(
+            name=name,
+            passed=False,
+            duration_ms=round(elapsed, 2),
+            description=description,
+            error=f"Unexpected error: {e}",
+        )
+
+
+# ============================================================
+# Structure checks -- validate response format
+# ============================================================
+
+
+def check_service_info(api_root):
+    """Service-info returns required GA4GH fields and seqcol schema."""
+    res = requests.get(f"{api_root}/service-info", timeout=COMPLIANCE_TIMEOUT)
+    data = res.json()
+    assert "id" in data, "service-info missing 'id' field"
+    assert "type" in data, "service-info missing 'type' field"
+    assert "group" in data["type"], "service-info type missing 'group'"
+    assert "artifact" in data["type"], "service-info type missing 'artifact'"
+    assert "version" in data["type"], "service-info type missing 'version'"
+    assert "seqcol" in data, "service-info must have 'seqcol' section"
+    assert "schema" in data["seqcol"], "seqcol section must include 'schema'"
+    schema = data["seqcol"]["schema"]
+    assert "properties" in schema, "schema must have 'properties'"
+    assert "lengths" in schema["properties"], "schema must define 'lengths'"
+    assert "names" in schema["properties"], "schema must define 'names'"
+    assert "sequences" in schema["properties"], "schema must define 'sequences'"
+
+
+def check_list_collections(api_root):
+    """List collections returns paginated results with total count."""
+    res = requests.get(f"{api_root}/list/collection", timeout=COMPLIANCE_TIMEOUT)
+    data = res.json()
+    assert "results" in data, "list/collection missing 'results' field"
+    assert isinstance(data["results"], list), "list/collection 'results' should be a list"
+    assert "pagination" in data, "list/collection missing 'pagination' field"
+    assert "page" in data["pagination"], "pagination missing 'page'"
+    assert "page_size" in data["pagination"], "pagination missing 'page_size'"
+    assert "total" in data["pagination"], "pagination must include 'total' per GA4GH spec"
+    assert isinstance(data["pagination"]["total"], int), "pagination 'total' must be an integer"
+
+
+def check_list_attributes(api_root, attribute_name):
+    """List attributes endpoint returns paginated results."""
+    res = requests.get(f"{api_root}/list/attributes/{attribute_name}", timeout=COMPLIANCE_TIMEOUT)
+    data = res.json()
+    assert "results" in data, f"list/attributes/{attribute_name} missing 'results' field"
+    assert isinstance(
+        data["results"], list
+    ), f"list/attributes/{attribute_name} 'results' should be a list"
+
+
+def check_openapi_available(api_root):
+    """OpenAPI endpoint is available (RECOMMENDED by spec Section 3.6)."""
+    res = requests.get(f"{api_root}/openapi.json", timeout=COMPLIANCE_TIMEOUT)
+    assert res.status_code == 200, f"OpenAPI endpoint returned status {res.status_code}"
+    data = res.json()
+    assert "openapi" in data, "OpenAPI response missing 'openapi' field"
+
+
+# ============================================================
+# Collection checks -- verify content against known test data
+# ============================================================
+
+
+def check_collection_level1(api_root, fa_name, bundle):
+    """Level 1 response returns digest strings for all attributes."""
+    digest = bundle["top_level_digest"]
+    res = requests.get(f"{api_root}/collection/{digest}?level=1", timeout=COMPLIANCE_TIMEOUT)
+    assert res.status_code == 200, f"Collection {digest} returned HTTP {res.status_code} (expected 200)"
+    data = res.json()
+    for attr in ["names", "lengths", "sequences"]:
+        assert isinstance(data[attr], str), (
+            f"Level 1 {attr} should be digest string, got {type(data[attr]).__name__}: {data[attr]}"
+        )
+        assert data[attr] == bundle["level1"][attr], (
+            f"Level 1 {attr} for {fa_name}: expected {bundle['level1'][attr]}, got {data[attr]}"
+        )
+    assert "sorted_name_length_pairs" in data, "Level 1 missing sorted_name_length_pairs"
+
+
+def check_collection_level2(api_root, fa_name, bundle):
+    """Level 2 response returns arrays matching expected content."""
+    digest = bundle["top_level_digest"]
+    res = requests.get(f"{api_root}/collection/{digest}?level=2", timeout=COMPLIANCE_TIMEOUT)
+    assert res.status_code == 200, f"Collection {digest} returned HTTP {res.status_code} (expected 200)"
+    data = res.json()
+    for attr in ["names", "lengths", "sequences"]:
+        assert isinstance(data[attr], list), (
+            f"Level 2 {attr} should be array, got {type(data[attr]).__name__}"
+        )
+        assert data[attr] == bundle["level2"][attr], (
+            f"Level 2 {attr} for {fa_name}: expected {bundle['level2'][attr]}, got {data[attr]}"
+        )
+    assert "sorted_name_length_pairs" not in data, "Level 2 should not have sorted_name_length_pairs"
+
+
+def check_default_level_returns_level2(api_root, fa_name, bundle):
+    """Collection without ?level= param returns level 2 arrays (spec default)."""
+    digest = bundle["top_level_digest"]
+    res = requests.get(f"{api_root}/collection/{digest}", timeout=COMPLIANCE_TIMEOUT)
+    assert res.status_code == 200, f"Collection {digest} returned HTTP {res.status_code} (expected 200)"
+    data = res.json()
+    for attr in ["names", "lengths", "sequences"]:
+        assert isinstance(data[attr], list), (
+            f"Default level for {fa_name} {attr} should be array, got {type(data[attr]).__name__}"
+        )
+
+
+def check_sorted_name_length_pairs(api_root, fa_name, bundle):
+    """Level 1 sorted_name_length_pairs digest matches expected value."""
+    digest = bundle["top_level_digest"]
+    res = requests.get(f"{api_root}/collection/{digest}?level=1", timeout=COMPLIANCE_TIMEOUT)
+    assert res.status_code == 200, f"Collection {digest} returned HTTP {res.status_code} (expected 200)"
+    data = res.json()
+    expected = bundle["sorted_name_length_pairs_digest"]
+    actual = data.get("sorted_name_length_pairs")
+    assert actual == expected, (
+        f"SNLP for {fa_name}: expected {expected}, got {actual}"
+    )
+
+
+# ============================================================
+# Attribute checks -- verify attribute retrieval
+# ============================================================
+
+
+def check_attribute_retrieval(api_root, fa_name, bundle, attr_name):
+    """Attribute endpoint returns correct array for a known digest."""
+    attr_digest = bundle["level1"][attr_name]
+    expected = bundle["level2"][attr_name]
+    res = requests.get(
+        f"{api_root}/attribute/collection/{attr_name}/{attr_digest}", timeout=COMPLIANCE_TIMEOUT
+    )
+    assert res.status_code == 200, (
+        f"Attribute {attr_name}/{attr_digest} returned HTTP {res.status_code} (expected 200)"
+    )
+    actual = res.json()
+    assert actual == expected, (
+        f"Attribute {attr_name} for {fa_name}: expected {expected}, got {actual}"
+    )
+
+
+def check_transient_attribute_not_served(api_root):
+    """Transient attributes (sorted_name_length_pairs) return 404 from /attribute."""
+    bundle = DIGEST_TESTS[0][1]
+    digest = bundle["top_level_digest"]
+    level1 = requests.get(f"{api_root}/collection/{digest}?level=1", timeout=COMPLIANCE_TIMEOUT).json()
+    snlp_digest = level1["sorted_name_length_pairs"]
+    res = requests.get(
+        f"{api_root}/attribute/collection/sorted_name_length_pairs/{snlp_digest}",
+        timeout=COMPLIANCE_TIMEOUT,
+    )
+    assert res.status_code == 404, "Transient attributes should not be served by /attribute endpoint"
+
+
+# ============================================================
+# List/filter checks -- verify filtering and pagination
+# ============================================================
+
+
+def check_list_filter_by_attribute(api_root, fa_name, bundle, attr_name):
+    """List collections filtered by attribute digest returns the expected collection."""
+    attr_digest = bundle["level1"][attr_name]
+    top_digest = bundle["top_level_digest"]
+    res = requests.get(
+        f"{api_root}/list/collection?{attr_name}={attr_digest}", timeout=COMPLIANCE_TIMEOUT
+    )
+    assert res.status_code == 200, f"List filter returned HTTP {res.status_code}"
+    data = res.json()
+    assert "results" in data, "Filtered list missing 'results'"
+    assert top_digest in data["results"], (
+        f"Collection {top_digest} not in results when filtering by {attr_name}={attr_digest} for {fa_name}. "
+        f"Got {len(data['results'])} results: {data['results'][:5]}"
+    )
+
+
+def check_list_multi_attribute_filter_and(api_root):
+    """Multiple filter attributes use AND logic (spec Section 3.4)."""
+    bundle = DIGEST_TESTS[0][1]
+    names_digest = bundle["level1"]["names"]
+    lengths_digest = bundle["level1"]["lengths"]
+    data = requests.get(
+        f"{api_root}/list/collection?names={names_digest}&lengths={lengths_digest}",
+        timeout=COMPLIANCE_TIMEOUT,
+    ).json()
+    assert bundle["top_level_digest"] in data["results"], (
+        "AND filter should return base.fa collection"
+    )
+
+
+# ============================================================
+# Comparison checks -- verify comparison endpoint
+# ============================================================
+
+
+def check_comparison(api_root, fixture_name, expected):
+    """GET comparison returns correct diff structure matching fixture data."""
+    url = f"{api_root}/comparison/{expected['digests']['a']}/{expected['digests']['b']}"
+    res = requests.get(url, timeout=COMPLIANCE_TIMEOUT)
+    assert res.status_code == 200, f"Comparison returned HTTP {res.status_code} for {fixture_name}"
+    import refget
+
+    actual = res.json()
+    assert refget.canonical_str(actual) == refget.canonical_str(expected), (
+        f"Comparison mismatch for {fixture_name}.\n"
+        f"  Expected attributes: {expected.get('attributes')}\n"
+        f"  Got attributes: {actual.get('attributes')}"
+    )
+
+
+def check_comparison_structure(api_root):
+    """Comparison response has all required fields (digests, attributes, array_elements)."""
+    digest_a = DIGEST_TESTS[0][1]["top_level_digest"]
+    digest_b = DIGEST_TESTS[1][1]["top_level_digest"]
+    data = requests.get(
+        f"{api_root}/comparison/{digest_a}/{digest_b}", timeout=COMPLIANCE_TIMEOUT
+    ).json()
+    assert "digests" in data and "a" in data["digests"] and "b" in data["digests"]
+    assert "attributes" in data
+    assert "a_only" in data["attributes"]
+    assert "b_only" in data["attributes"]
+    assert "a_and_b" in data["attributes"]
+    assert "array_elements" in data
+    assert "a_count" in data["array_elements"]
+    assert "b_count" in data["array_elements"]
+    assert "a_and_b_count" in data["array_elements"]
+    assert "a_and_b_same_order" in data["array_elements"]
+
+
+def check_comparison_same_order_values(api_root):
+    """Identical comparison: a_and_b_same_order values are all true."""
+    digest = DIGEST_TESTS[0][1]["top_level_digest"]
+    data = requests.get(
+        f"{api_root}/comparison/{digest}/{digest}", timeout=COMPLIANCE_TIMEOUT
+    ).json()
+    same_order = data["array_elements"]["a_and_b_same_order"]
+    for attr, val in same_order.items():
+        assert val is True or val is False or val is None, (
+            f"a_and_b_same_order[{attr}] must be bool or null, got {type(val)}"
+        )
+        assert val is True, f"Identical comparison: a_and_b_same_order[{attr}] should be true"
+
+
+def check_comparison_post(api_root, fixture_name, expected):
+    """POST comparison with local seqcol body returns correct diff."""
+    import refget
+
+    digest_b = expected["digests"]["b"]
+    client = refget.SequenceCollectionClient(urls=[api_root])
+    local_collection = client.get_collection(digest_b)
+
+    digest_a = expected["digests"]["a"]
+    res = requests.post(
+        f"{api_root}/comparison/{digest_a}",
+        json=local_collection,
+        timeout=COMPLIANCE_TIMEOUT,
+    )
+    assert res.status_code == 200, (
+        f"Comparison POST returned HTTP {res.status_code} for {fixture_name}"
+    )
+    data = res.json()
+    assert data["digests"]["a"] == expected["digests"]["a"], (
+        f"POST digest a: expected {expected['digests']['a']}, got {data['digests']['a']}"
+    )
+    assert data["attributes"] == expected["attributes"], (
+        f"POST attributes for {fixture_name}: expected {expected['attributes']}, got {data['attributes']}"
+    )
+    assert data["array_elements"] == expected["array_elements"], (
+        f"POST array_elements for {fixture_name}: expected {expected['array_elements']}, got {data['array_elements']}"
+    )
+
+
+# ============================================================
+# Check registry -- builds the full compliance suite
+# ============================================================
+
+
+def build_checks(api_root: str) -> list[tuple[str, callable, list]]:
+    """Build the complete list of compliance checks.
+
+    Returns list of (name, function, args) tuples.
+    """
+    checks = []
+
+    # Structure checks
+    checks.append(("service_info", check_service_info, [api_root]))
+    checks.append(("list_collections", check_list_collections, [api_root]))
+    for attr in ["lengths", "names", "sequences"]:
+        checks.append((f"list_attributes_{attr}", check_list_attributes, [api_root, attr]))
+    checks.append(("openapi_available", check_openapi_available, [api_root]))
+
+    # Collection content checks (per FASTA file)
+    for fa_name, bundle in DIGEST_TESTS:
+        tag = fa_name.replace(".fa", "")
+        checks.append((f"collection_level1_{tag}", check_collection_level1, [api_root, fa_name, bundle]))
+        checks.append((f"collection_level2_{tag}", check_collection_level2, [api_root, fa_name, bundle]))
+        checks.append((f"default_level2_{tag}", check_default_level_returns_level2, [api_root, fa_name, bundle]))
+        checks.append((f"snlp_digest_{tag}", check_sorted_name_length_pairs, [api_root, fa_name, bundle]))
+
+    # Attribute retrieval checks (per FASTA, per attribute)
+    for fa_name, bundle in DIGEST_TESTS:
+        tag = fa_name.replace(".fa", "")
+        for attr in ["lengths", "names", "sequences"]:
+            checks.append((
+                f"attribute_{attr}_{tag}",
+                check_attribute_retrieval,
+                [api_root, fa_name, bundle, attr],
+            ))
+
+    # Attribute filtering checks
+    checks.append(("transient_attribute_not_served", check_transient_attribute_not_served, [api_root]))
+    checks.append(("multi_attribute_filter_and", check_list_multi_attribute_filter_and, [api_root]))
+
+    # List filter checks (base.fa, filter by each attribute)
+    base_name, base_bundle = DIGEST_TESTS[0]
+    for attr in ["lengths", "names", "sequences"]:
+        checks.append((
+            f"list_filter_{attr}",
+            check_list_filter_by_attribute,
+            [api_root, base_name, base_bundle, attr],
+        ))
+
+    # Comparison checks
+    checks.append(("comparison_structure", check_comparison_structure, [api_root]))
+    checks.append(("comparison_same_order", check_comparison_same_order_values, [api_root]))
+
+    for fixture_name, expected in COMPARISON_FIXTURES.items():
+        tag = fixture_name.replace("compare_", "").replace(".json", "")
+        checks.append((f"comparison_{tag}", check_comparison, [api_root, fixture_name, expected]))
+        checks.append((f"comparison_post_{tag}", check_comparison_post, [api_root, fixture_name, expected]))
+
+    return checks
+
+
+# ============================================================
+# Runners -- batch and streaming
+# ============================================================
+
+
+def run_compliance(api_root: str) -> dict:
+    """Run all compliance checks and return a report dict."""
+    api_root = api_root.rstrip("/")
+    report = ComplianceReport(
+        server_url=api_root,
+        timestamp=datetime.now(timezone.utc).isoformat(),
+    )
+
+    for name, func, args in build_checks(api_root):
+        result = _timed_check(name, func, *args)
+        report.results.append(asdict(result))
+        report.total += 1
+        if result.passed:
+            report.passed += 1
+        else:
+            report.failed += 1
+
+    return report.to_dict()
+
+
+def run_compliance_stream(api_root: str):
+    """Generator that yields each check result as a JSON string for SSE streaming."""
+    api_root = api_root.rstrip("/")
+    checks = build_checks(api_root)
+
+    yield json.dumps({"type": "start", "total": len(checks), "server_url": api_root})
+
+    passed = 0
+    failed = 0
+    for name, func, args in checks:
+        result = _timed_check(name, func, *args)
+        if result.passed:
+            passed += 1
+        else:
+            failed += 1
+        yield json.dumps({"type": "result", **asdict(result)})
+
+    yield json.dumps({"type": "done", "passed": passed, "failed": failed, "total": len(checks)})
diff --git a/refget/router.py b/refget/router.py
index 82e768d..eeb76ef 100644
--- a/refget/router.py
+++ b/refget/router.py
@@ -21,6 +21,7 @@
 import logging
 
 from fastapi import APIRouter, Response, HTTPException, Request, Depends, Query
+from fastapi.responses import StreamingResponse
 from .models import Similarities, PaginationResult, PaginatedDigestList
 from .agents import RefgetDBAgent
 
@@ -45,6 +46,7 @@ def create_refget_router(
     collections: bool = True,
     pangenomes: bool = False,
     fasta_drs: bool = False,
+    compliance: bool = True,
     refget_store_url: str = None,
 ) -> APIRouter:
     """
@@ -85,6 +87,9 @@ def create_refget_router(
     if fasta_drs:
         _LOGGER.info("Adding FASTA DRS endpoints...")
         refget_router.include_router(fasta_drs_router, prefix="/fasta")
+    if compliance:
+        _LOGGER.info("Adding compliance endpoints...")
+        refget_router.include_router(compliance_router)
     return refget_router
 
 
@@ -562,3 +567,65 @@ async def get_fasta_index(
         }
     except ValueError:
         raise HTTPException(status_code=404, detail="Object not found")
+
+
+compliance_router = APIRouter()
+
+
+@compliance_router.get(
+    "/compliance/run",
+    summary="Run compliance checks against a seqcol server",
+    tags=["Compliance"],
+)
+def run_compliance_endpoint(
+    request: Request,
+    target_url: str | None = Query(None, description="Target server URL to test (defaults to self)"),
+):
+    """
+    Run GA4GH SeqCol compliance structure tests against a server.
+
+    Only runs structure tests (service-info, list, pagination, collection structure).
+    Content tests that require specific test data are not included.
+
+    If no target_url is provided, tests run against this server.
+    """
+    from .compliance import run_compliance
+
+    if target_url is None:
+        scheme = request.headers.get("x-forwarded-proto", request.url.scheme)
+        host = request.headers.get("host", request.url.netloc)
+        target_url = f"{scheme}://{host}"
+
+    return run_compliance(target_url)
+
+
+@compliance_router.get(
+    "/compliance/stream",
+    summary="Stream compliance checks via Server-Sent Events",
+    tags=["Compliance"],
+)
+def stream_compliance_endpoint(
+    request: Request,
+    target_url: str | None = Query(None, description="Target server URL to test (defaults to self)"),
+):
+    """
+    Stream compliance check results in real-time via Server-Sent Events.
+
+    Each event contains a JSON object with type "start", "result", or "done".
+    """
+    from .compliance import run_compliance_stream
+
+    if target_url is None:
+        scheme = request.headers.get("x-forwarded-proto", request.url.scheme)
+        host = request.headers.get("host", request.url.netloc)
+        target_url = f"{scheme}://{host}"
+
+    def event_stream():
+        for data in run_compliance_stream(target_url):
+            yield f"data: {data}\n\n"
+
+    return StreamingResponse(
+        event_stream(),
+        media_type="text/event-stream",
+        headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"},
+    )
diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt
index 146a887..1d1c6e3 100644
--- a/requirements/requirements-all.txt
+++ b/requirements/requirements-all.txt
@@ -1,5 +1,5 @@
 jsonschema
-gtars>=0.6.0
+gtars>=0.7.0
 pyyaml
 requests
 sqlmodel
diff --git a/seqcolapi/_version.py b/seqcolapi/_version.py
deleted file mode 100644
index 3e2f46a..0000000
--- a/seqcolapi/_version.py
+++ /dev/null
@@ -1 +0,0 @@
-__version__ = "0.9.0"
diff --git a/seqcolapi/const.py b/seqcolapi/const.py
index 07fbe54..ccf3895 100644
--- a/seqcolapi/const.py
+++ b/seqcolapi/const.py
@@ -1,13 +1,12 @@
 import os
 
-from refget._version import __version__ as refget_pkg_version
+from refget._version import __version__ as refget_version
+from gtars import __version__ as gtars_version
 from platform import python_version
 
-from ._version import __version__ as seqcolapi_version
-
 ALL_VERSIONS = {
-    "seqcolapi_version": seqcolapi_version,
-    "refget_pkg_version": refget_pkg_version,
+    "refget_version": refget_version,
+    "gtars_version": gtars_version,
     "python_version": python_version(),
     "seqcol_spec_version": "1.0.0",
 }
diff --git a/seqcolapi/main.py b/seqcolapi/main.py
index 70d6fd6..b13cc72 100644
--- a/seqcolapi/main.py
+++ b/seqcolapi/main.py
@@ -69,7 +69,7 @@ async def lifespan_loader(app):
 app = FastAPI(
     title="Sequence Collections API",
     description="An API providing metadata such as names, lengths, and other values for collections of reference sequences",
-    version=ALL_VERSIONS["seqcolapi_version"],
+    version=ALL_VERSIONS["refget_version"],
     lifespan=lifespan_loader,
 )
 
diff --git a/test_fasta/different_order.rgsi b/test_fasta/different_order.rgsi
new file mode 100644
index 0000000..3cf702e
--- /dev/null
+++ b/test_fasta/different_order.rgsi
@@ -0,0 +1,8 @@
+##seqcol_digest=Tpdsg75D4GKCGEHtIiDSL9Zx-DSuX5V8
+##names_digest=dOAOfPGkf3wAf3CUsbjVTKhY9Wq2DL6f
+##sequences_digest=7t6Ulz6OeUWu6FBxntbvFKOl8w3icl2h
+##lengths_digest=x5qpE4FtMkvlwpKIzvHs3a02Nex5tthp
+#name	length	alphabet	sha512t24u	md5	description
+chr1	4	dna2bit	YBbVX0dLKG1ieEDCiMmkrTZFt_Z5Vdaj	31fc6ca291a32fb9df82b85e5f077e31	
+chr2	4	dna2bit	AcLxtBuKEPk_7PGE_H4dGElwZHCujwH6	92c6a56c9e9459d8a42b96f7884710bc	
+chrX	8	dna2bit	iYtREV555dUFKg2_agSJW6suquUyPpMw	5f63cfaa3ef61f88c9635fb9d18ec945	
diff --git a/test_fasta/sample_fhr.json b/test_fasta/sample_fhr.json
new file mode 100644
index 0000000..098bb0e
--- /dev/null
+++ b/test_fasta/sample_fhr.json
@@ -0,0 +1,14 @@
+{
+  "schema": "https://raw.githubusercontent.com/FAIR-bioHeaders/FHR-Specification/main/fhr.json",
+  "schemaVersion": 1.0,
+  "genome": "Test organism",
+  "version": "v1.0",
+  "taxon": {
+    "name": "Test organism",
+    "uri": "https://identifiers.org/taxonomy:12345"
+  },
+  "masking": "soft-masked",
+  "genomeSynonym": ["test_v1"],
+  "dateCreated": "2025-01-01",
+  "license": "CC0-1.0"
+}
diff --git a/test_fasta/subset.rgsi b/test_fasta/subset.rgsi
new file mode 100644
index 0000000..e767fc7
--- /dev/null
+++ b/test_fasta/subset.rgsi
@@ -0,0 +1,7 @@
+##seqcol_digest=sv7GIP1K0qcskIKF3iaBmQpaum21vH74
+##names_digest=iyNUhtfR0TALytlmxK1Zx1_q3frkZyAd
+##sequences_digest=3ZP38SZcoc9wN7jsRyNSP9mQ1a3TUoUF
+##lengths_digest=7-_HdxYiRf-AJLBKOTaJUdxXrUkIXs6T
+#name	length	alphabet	sha512t24u	md5	description
+chrX	8	dna2bit	iYtREV555dUFKg2_agSJW6suquUyPpMw	5f63cfaa3ef61f88c9635fb9d18ec945	
+chr1	4	dna2bit	YBbVX0dLKG1ieEDCiMmkrTZFt_Z5Vdaj	31fc6ca291a32fb9df82b85e5f077e31	
diff --git a/tests/api/conftest.py b/tests/api/conftest.py
index 4bf9fe5..9ebaaca 100644
--- a/tests/api/conftest.py
+++ b/tests/api/conftest.py
@@ -1,42 +1,6 @@
 import pytest
 from pathlib import Path
 
-from tests.conftest import DEMO_FILES
-
-# from tests.conftest import pytest_addoption, api_root, pytest_configure, pytest_collection_modifyitems, check_server_is_running
-from tests.conftest import API_TEST_DIR
-
-COLLECTION_TESTS = [
-    (DEMO_FILES[0], f"{API_TEST_DIR}/collection/base_collection.json"),
-    (DEMO_FILES[1], f"{API_TEST_DIR}/collection/different_names_collection.json"),
-    (DEMO_FILES[2], f"{API_TEST_DIR}/collection/different_order_collection.json"),
-    (DEMO_FILES[3], f"{API_TEST_DIR}/collection/pair_swap_collection.json"),
-    (DEMO_FILES[4], f"{API_TEST_DIR}/collection/subset_collection.json"),
-    (DEMO_FILES[5], f"{API_TEST_DIR}/collection/swap_wo_coords_collection.json"),
-]
-
-COMPARISON_TESTS = [
-    f"{API_TEST_DIR}/comparison/compare_base.fa_subset.fa.json",  # subset
-    f"{API_TEST_DIR}/comparison/compare_base.fa_different_names.fa.json",  # same sequences, different names
-    f"{API_TEST_DIR}/comparison/compare_base.fa_different_order.fa.json",  # same sequences, name order switch, but equivalent coordinate system
-    f"{API_TEST_DIR}/comparison/compare_base.fa_pair_swap.fa.json",  # swapped name-length-pairs
-    f"{API_TEST_DIR}/comparison/compare_base.fa_swap_wo_coords.fa.json",  # swapped name-length-pairs, but no coord system change
-]
-
-
-ATTRIBUTE_TESTS = [
-    ("lengths", "7-_HdxYiRf-AJLBKOTaJUdxXrUkIXs6T", [8, 4]),
-    ("names", "Fw1r9eRxfOZD98KKrhlYQNEdSRHoVxAG", ["chrX", "chr1", "chr2"]),
-]
-
-ATTRIBUTE_LIST_TESTS = [
-    (
-        "lengths",
-        "cGRMZIb3AVgkcAfNv39RN7hnT5Chk7RX",
-        f"{API_TEST_DIR}/attribute/cGRM.json",
-    )
-]
-
 
 @pytest.fixture(scope="session")
 def test_data_root():
diff --git a/tests/api/test_compliance.py b/tests/api/test_compliance.py
index 442d6f4..0e8652d 100644
--- a/tests/api/test_compliance.py
+++ b/tests/api/test_compliance.py
@@ -1,299 +1,115 @@
-# Compliance suite for the GA4GH SeqCol API v1.0.0
+# Pytest wrapper for the GA4GH SeqCol compliance suite.
 #
-# Endpoints tested:
-#   - GET /service-info
-#   - GET /collection/:digest (level 1 and level 2)
-#   - GET /comparison/:digest1/:digest2
-#   - POST /comparison/:digest
-#   - GET /attribute/collection/:attr/:digest
-#   - GET /list/collection (with pagination and filtering)
-#   - GET /list/attributes/:attr
+# The canonical compliance checks live in refget/compliance.py.
+# This file parametrizes them for pytest execution.
 #
-# Also validates:
-#   - Level 1 returns digest strings, level 2 returns arrays
-#   - Transient attributes (sorted_name_length_pairs) in level 1 only
-#   - Pagination structure (results + pagination fields)
+# Run against an external server:
+#   pytest tests/api --api-root https://seqcolapi.databio.org
 #
-# Tests fall into two categories:
-# 1. Content tests (collection, comparison, attribute): compare full responses to known fixtures
-# 2. Structure tests (service-info, list endpoints): validate response structure only, since values vary by server
+# Run via integration test server:
+#   ./scripts/test-integration.sh
 
-import json
 import pytest
-import requests
-import refget
-
-# Collection endpoints
-from tests.api.conftest import (
-    COLLECTION_TESTS,
-    COMPARISON_TESTS,
-    ATTRIBUTE_TESTS,
-    ATTRIBUTE_LIST_TESTS,
+from refget.compliance import (
+    DIGEST_TESTS,
+    COMPARISON_FIXTURES,
+    check_service_info,
+    check_list_collections,
+    check_list_attributes,
+    check_openapi_available,
+    check_collection_level1,
+    check_collection_level2,
+    check_default_level_returns_level2,
+    check_sorted_name_length_pairs,
+    check_attribute_retrieval,
+    check_transient_attribute_not_served,
+    check_list_filter_by_attribute,
+    check_list_multi_attribute_filter_and,
+    check_comparison,
+    check_comparison_structure,
+    check_comparison_same_order_values,
+    check_comparison_post,
 )
-from tests.conftest import DIGEST_TESTS
-
-demo_file = "demo0.fa"
-response_file = "tests/demo0_collection.json"
-
-print("Testing Compliance")
-
-
-def read_url(url):
-    import requests
-    import yaml
-
-    try:
-        response = requests.get(url, timeout=1)
-    except requests.exceptions.ConnectionError:
-        print(f"Connection error: {url}")
-        raise e
-    data = response.content
-    return yaml.safe_load(data)
-
-
-def check_collection(api_root, demo_file, response_file, data_root):
-
-    # Need schema to make sure we eliminate inherent attributes correctly
-    # schema_path = "https://schema.databio.org/refget/SeqColArraySetInherent.yaml"
-    # schema = read_url(schema_path)
-    # inherent_attrs = schema["inherent"]
-
-    inherent_attrs = ["names", "sequences"]
-    print(f"Loading fasta file at '{data_root}/{demo_file}'")
-    digest = refget.fasta_to_digest(f"{data_root}/{demo_file}", inherent_attrs=inherent_attrs)
-    print(f"Checking digest: {digest}")
-    res = requests.get(f"{api_root}/collection/{digest}")
-
-    client = refget.SequenceCollectionClient(urls=[api_root])
-
-    srv_response = client.get_collection(digest, level=1)
-    print("Server response:", srv_response)
-    try:
-        server_answer = json.loads(res.content)
-    except json.decoder.JSONDecodeError:
-        print(f"Url: {url}")
-
-    with open(response_file) as fp:
-        correct_answer = json.load(fp)
-
-    assert (
-        server_answer["sequences"] == correct_answer["sequences"]
-    ), f"Collection endpoint failed: sequence mismatch for {demo_file}"
-    assert (
-        server_answer["names"] == correct_answer["names"]
-    ), f"Collection endpoint failed: names mismatch for {demo_file}"
-    assert (
-        server_answer["lengths"] == correct_answer["lengths"]
-    ), f"Collection endpoint failed: lengths mismatch for {demo_file}"
-
-
-def check_comparison(api_root, response_file):
-    with open(response_file) as fp:
-        correct_answer = json.load(fp)
-
-    url = (
-        f"{api_root}/comparison/{correct_answer['digests']['a']}/{correct_answer['digests']['b']}"
-    )
-    res = requests.get(url)
-    try:
-        server_answer = json.loads(res.content)
-        print("Server answer:", refget.canonical_str(server_answer))
-        print("Correct answer:", refget.canonical_str(correct_answer))
-        assert refget.canonical_str(server_answer) == refget.canonical_str(
-            correct_answer
-        ), f"Comparison endpoint failed: {url}. File: {response_file}"
-    except json.decoder.JSONDecodeError:
-        print(f"Url: {url}")
-        assert False, f"Comparison endpoint failed: {url}"
-
-
-def check_attribute(api_root, attribute_type, attribute, correct_value):
-    url = f"{api_root}/attribute/collection/{attribute_type}/{attribute}"
-    res = requests.get(url)
-    try:
-        server_answer = json.loads(res.content)
-        assert (
-            server_answer == correct_value
-        ), f"Attribute endpoint failed: {url}. Answer: {correct_value}"
-    except json.decoder.JSONDecodeError:
-        print(f"Url: {url}")
-        assert False, f"Attribute endpoint failed: {url}"
-
 
-def check_list_collections_by_attribute(api_root, attribute_type, attribute, response_file):
-    with open(response_file) as fp:
-        correct_answer = json.load(fp)
-
-    url = f"{api_root}/list/collection?{attribute_type}={attribute}"
-    res = requests.get(url)
-    try:
-        server_answer = json.loads(res.content)
-        print("Server answer:", server_answer)
-        for digest in correct_answer["results"]:
-            print("Checking digest:", digest)
-            assert (
-                digest in server_answer["results"]
-            ), f"Attribute endpoint failed: {url}. Missing: {digest}"
-    except json.decoder.JSONDecodeError:
-        print(f"Url: {url}")
-        assert False, f"Attribute endpoint failed: {url}"
-
-
-def check_service_info(api_root):
-    url = f"{api_root}/service-info"
-    res = requests.get(url)
-    try:
-        server_answer = json.loads(res.content)
-        # Check required GA4GH service-info fields exist
-        assert "id" in server_answer, "service-info missing 'id' field"
-        assert "type" in server_answer, "service-info missing 'type' field"
-        assert "group" in server_answer["type"], "service-info type missing 'group'"
-        assert "artifact" in server_answer["type"], "service-info type missing 'artifact'"
-        assert "version" in server_answer["type"], "service-info type missing 'version'"
-    except json.decoder.JSONDecodeError:
-        print(f"Url: {url}")
-        assert False, f"Service-info endpoint failed: {url}"
-
-
-def check_list_collections(api_root):
-    url = f"{api_root}/list/collection"
-    res = requests.get(url)
-    try:
-        server_answer = json.loads(res.content)
-        assert "results" in server_answer, "list/collection missing 'results' field"
-        assert isinstance(
-            server_answer["results"], list
-        ), "list/collection 'results' should be a list"
-        assert "pagination" in server_answer, "list/collection missing 'pagination' field"
-        assert "page" in server_answer["pagination"], "pagination missing 'page'"
-        assert "page_size" in server_answer["pagination"], "pagination missing 'page_size'"
-    except json.decoder.JSONDecodeError:
-        print(f"Url: {url}")
-        assert False, f"List collections endpoint failed: {url}"
-
-
-def check_list_attributes(api_root, attribute_name):
-    url = f"{api_root}/list/attributes/{attribute_name}"
-    res = requests.get(url)
-    try:
-        server_answer = json.loads(res.content)
-        assert (
-            "results" in server_answer
-        ), f"list/attributes/{attribute_name} missing 'results' field"
-        assert isinstance(
-            server_answer["results"], list
-        ), f"list/attributes/{attribute_name} 'results' should be a list"
-    except json.decoder.JSONDecodeError:
-        print(f"Url: {url}")
-        assert False, f"List attributes endpoint failed: {url}"
 
+@pytest.mark.require_service
+class TestAPI:
+    """GA4GH SeqCol compliance tests. Expects demo data loaded on the server."""
 
-def check_collection_structure(api_root, digest):
-    # Level 1: inherent attributes should be digest strings
-    level1 = requests.get(f"{api_root}/collection/{digest}?level=1").json()
-    for attr in ["names", "lengths", "sequences"]:
-        assert isinstance(level1[attr], str), f"Level 1 {attr} should be digest string"
+    # ---- Structure checks ----
 
-    # Level 1 should include transient attribute
-    assert "sorted_name_length_pairs" in level1, "Level 1 missing sorted_name_length_pairs"
+    def test_service_info(self, api_root):
+        check_service_info(api_root)
 
-    # Level 2: inherent attributes should be arrays
-    level2 = requests.get(f"{api_root}/collection/{digest}?level=2").json()
-    for attr in ["names", "lengths", "sequences"]:
-        assert isinstance(level2[attr], list), f"Level 2 {attr} should be array"
+    def test_list_collections(self, api_root):
+        check_list_collections(api_root)
 
-    # Level 2 should NOT include transient attribute
-    assert (
-        "sorted_name_length_pairs" not in level2
-    ), "Level 2 should not have sorted_name_length_pairs"
+    @pytest.mark.parametrize("attribute_name", ["lengths", "names", "sequences"])
+    def test_list_attributes(self, api_root, attribute_name):
+        check_list_attributes(api_root, attribute_name)
 
+    @pytest.mark.recommended
+    def test_openapi_available(self, api_root):
+        check_openapi_available(api_root)
 
-def check_comparison_post(api_root, response_file, test_data_root):
-    with open(response_file) as fp:
-        correct_answer = json.load(fp)
+    # ---- Collection content checks ----
 
-    # Get the local collection to POST
-    digest_b = correct_answer["digests"]["b"]
-    client = refget.SequenceCollectionClient(urls=[api_root])
-    local_collection = client.get_collection(digest_b)
+    @pytest.mark.parametrize("fa_name, bundle", DIGEST_TESTS)
+    def test_collection_level1(self, api_root, fa_name, bundle):
+        check_collection_level1(api_root, fa_name, bundle)
 
-    # POST to compare with collection A on server
-    digest_a = correct_answer["digests"]["a"]
-    url = f"{api_root}/comparison/{digest_a}"
-    res = requests.post(url, json=local_collection)
-    try:
-        server_answer = json.loads(res.content)
-        # POST endpoint returns "POSTed seqcol" for digest b since it doesn't know the digest
-        # So we compare everything except the digests.b field
-        assert (
-            server_answer["digests"]["a"] == correct_answer["digests"]["a"]
-        ), f"Comparison POST: digest a mismatch"
-        assert (
-            server_answer["attributes"] == correct_answer["attributes"]
-        ), f"Comparison POST: attributes mismatch"
-        assert (
-            server_answer["array_elements"] == correct_answer["array_elements"]
-        ), f"Comparison POST: array_elements mismatch"
-    except json.decoder.JSONDecodeError:
-        print(f"Url: {url}")
-        assert False, f"Comparison POST endpoint failed: {url}"
+    @pytest.mark.parametrize("fa_name, bundle", DIGEST_TESTS)
+    def test_collection_level2(self, api_root, fa_name, bundle):
+        check_collection_level2(api_root, fa_name, bundle)
 
+    @pytest.mark.parametrize("fa_name, bundle", DIGEST_TESTS)
+    def test_default_level_returns_level2(self, api_root, fa_name, bundle):
+        check_default_level_returns_level2(api_root, fa_name, bundle)
 
-@pytest.mark.require_service
-class TestAPI:
-    print("Testing Compliance")
+    @pytest.mark.parametrize("fa_name, bundle", DIGEST_TESTS)
+    def test_sorted_name_length_pairs(self, api_root, fa_name, bundle):
+        check_sorted_name_length_pairs(api_root, fa_name, bundle)
 
-    @pytest.mark.parametrize("test_values", COLLECTION_TESTS)
-    def test_collection_endpoint(self, api_root, test_values, test_data_root):
-        print("Testing collection endpoint")
-        check_collection(api_root, *test_values, test_data_root)
+    # ---- Attribute checks ----
 
-    @pytest.mark.parametrize("response_file", COMPARISON_TESTS)
-    def test_comparison_endpoint(self, api_root, response_file):
-        print("Testing comparison endpoint")
-        check_comparison(api_root, response_file)
+    @pytest.mark.parametrize("fa_name, bundle", DIGEST_TESTS)
+    @pytest.mark.parametrize("attr_name", ["lengths", "names", "sequences"])
+    def test_attribute_retrieval(self, api_root, fa_name, bundle, attr_name):
+        check_attribute_retrieval(api_root, fa_name, bundle, attr_name)
 
-    @pytest.mark.parametrize("test_values", ATTRIBUTE_TESTS)
-    def test_attribute_endpoint(self, api_root, test_values):
-        check_attribute(api_root, *test_values)
+    def test_transient_attribute_not_served(self, api_root):
+        check_transient_attribute_not_served(api_root)
 
-    @pytest.mark.parametrize("test_values", ATTRIBUTE_LIST_TESTS)
-    def test_attribute_list_endpoint(self, api_root, test_values):
-        check_list_collections_by_attribute(api_root, *test_values)
+    # ---- List/filter checks ----
 
-    def test_service_info_endpoint(self, api_root):
-        check_service_info(api_root)
+    @pytest.mark.parametrize("attr_name", ["lengths", "names", "sequences"])
+    def test_list_filter_by_attribute(self, api_root, attr_name):
+        fa_name, bundle = DIGEST_TESTS[0]
+        check_list_filter_by_attribute(api_root, fa_name, bundle, attr_name)
 
-    def test_list_collections_endpoint(self, api_root):
-        check_list_collections(api_root)
+    def test_multi_attribute_filter_and(self, api_root):
+        check_list_multi_attribute_filter_and(api_root)
 
-    @pytest.mark.parametrize("attribute_name", ["lengths", "names", "sequences"])
-    def test_list_attributes_endpoint(self, api_root, attribute_name):
-        check_list_attributes(api_root, attribute_name)
+    # ---- Comparison checks ----
 
-    @pytest.mark.parametrize("response_file", COMPARISON_TESTS)
-    def test_comparison_post_endpoint(self, api_root, response_file, test_data_root):
-        check_comparison_post(api_root, response_file, test_data_root)
+    def test_comparison_structure(self, api_root):
+        check_comparison_structure(api_root)
 
-    @pytest.mark.parametrize("fa_file, fa_digest_bundle", DIGEST_TESTS)
-    def test_collection_structure(self, api_root, fa_file, fa_digest_bundle):
-        digest = fa_digest_bundle["top_level_digest"]
-        check_collection_structure(api_root, digest)
+    def test_comparison_same_order_values(self, api_root):
+        check_comparison_same_order_values(api_root)
 
-    @pytest.mark.parametrize("fa_file, fa_digest_bundle", DIGEST_TESTS)
-    def test_collections(self, api_root, fa_file, fa_digest_bundle):
-        client = refget.SequenceCollectionClient(urls=[api_root])
-        digest = fa_digest_bundle["top_level_digest"]
-        srv_response = client.get_collection(digest, level=1)
-        print("Server response:", srv_response)
+    @pytest.mark.parametrize(
+        "fixture_name, expected",
+        list(COMPARISON_FIXTURES.items()),
+        ids=list(COMPARISON_FIXTURES.keys()),
+    )
+    def test_comparison(self, api_root, fixture_name, expected):
+        check_comparison(api_root, fixture_name, expected)
 
-    @pytest.mark.snlp
-    @pytest.mark.parametrize("fa_file, fa_digest_bundle", DIGEST_TESTS)
-    def test_sorted_name_length_pairs(self, api_root, fa_file, fa_digest_bundle):
-        client = refget.SequenceCollectionClient(urls=[api_root])
-        digest = fa_digest_bundle["top_level_digest"]
-        srv_response = client.get_collection(digest, level=1)
-        assert (
-            srv_response["sorted_name_length_pairs"]
-            == fa_digest_bundle["sorted_name_length_pairs_digest"]
-        ), f"Collection endpoint failed: sorted_name_length_pairs mismatch for {demo_file}"
+    @pytest.mark.parametrize(
+        "fixture_name, expected",
+        list(COMPARISON_FIXTURES.items()),
+        ids=list(COMPARISON_FIXTURES.keys()),
+    )
+    def test_comparison_post(self, api_root, fixture_name, expected):
+        check_comparison_post(api_root, fixture_name, expected)
diff --git a/tests/conftest.py b/tests/conftest.py
index 093814d..536bc8c 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -64,11 +64,18 @@ def invoke(*args):
 
 TEST_DATA_DIR = Path(__file__).parent.parent / "test_fasta"
 BASE_FASTA = TEST_DATA_DIR / "base.fa"
+
+
+@pytest.fixture(scope="session")
+def test_data_root():
+    """Provides the absolute path to the test_fasta directory."""
+    return TEST_DATA_DIR
 DIFFERENT_NAMES_FASTA = TEST_DATA_DIR / "different_names.fa"
 DIFFERENT_ORDER_FASTA = TEST_DATA_DIR / "different_order.fa"
 PAIR_SWAP_FASTA = TEST_DATA_DIR / "pair_swap.fa"
 SUBSET_FASTA = TEST_DATA_DIR / "subset.fa"
 SWAP_WO_COORDS_FASTA = TEST_DATA_DIR / "swap_wo_coords.fa"
+SAMPLE_FHR_JSON = TEST_DATA_DIR / "sample_fhr.json"
 
 
 # ============================================================
@@ -246,6 +253,8 @@ def pytest_configure(config):
     config.addinivalue_line("markers", "requires_network: mark test as requiring network access")
     config.addinivalue_line("markers", "requires_db: mark test as requiring database access")
     config.addinivalue_line("markers", "slow: mark test as slow running")
+    config.addinivalue_line("markers", "recommended: mark test as RECOMMENDED (not REQUIRED) by GA4GH spec")
+    config.addinivalue_line("markers", "require_service: mark test as requiring a running seqcol service")
 
 
 def pytest_collection_modifyitems(config, items):
@@ -273,3 +282,13 @@ def pytest_collection_modifyitems(config, items):
         for item in items:
             if "requires_db" in item.keywords:
                 item.add_marker(skip_db)
+
+    # Skip require_service tests if no api_root or test_server available
+    api_root = config.getoption("api_root")
+    if api_root is None:
+        skip_service = pytest.mark.skip(reason="No --api-root provided and not running via integration test_server")
+        for item in items:
+            if "require_service" in item.keywords:
+                # Only skip if this is the base TestAPI class, not a subclass with test_server
+                if "TestAPI" in item.nodeid and "TestComplianceViaIntegration" not in item.nodeid:
+                    item.add_marker(skip_service)
diff --git a/tests/integration/test_compliance_integration.py b/tests/integration/test_compliance_integration.py
deleted file mode 100644
index 76773d1..0000000
--- a/tests/integration/test_compliance_integration.py
+++ /dev/null
@@ -1,260 +0,0 @@
-"""
-Compliance tests running against the integration test server.
-
-These tests verify the API responses match expected fixtures,
-using the ephemeral Docker PostgreSQL + test server infrastructure.
-"""
-
-import json
-import pytest
-import requests
-from pathlib import Path
-
-from tests.conftest import DIGEST_TESTS
-
-
-class TestComplianceStructure:
-    """Test response structure matches GA4GH spec."""
-
-    def test_service_info_structure(self, test_server):
-        """Service-info has required GA4GH fields"""
-        res = requests.get(f"{test_server}/service-info")
-        assert res.status_code == 200
-        data = res.json()
-        # GA4GH service-info required fields
-        assert "id" in data
-        assert "type" in data
-        assert "group" in data["type"]
-        assert "artifact" in data["type"]
-        assert "version" in data["type"]
-
-    def test_service_info_seqcol_schema(self, test_server):
-        """Service-info MUST include seqcol.schema (GA4GH spec requirement)"""
-        res = requests.get(f"{test_server}/service-info")
-        assert res.status_code == 200
-        data = res.json()
-        # Spec: service-info MUST return the JSON Schema implemented by the server
-        assert "seqcol" in data, "service-info must have 'seqcol' section"
-        assert "schema" in data["seqcol"], "seqcol section must include 'schema'"
-        schema = data["seqcol"]["schema"]
-        # Schema should define the required attributes
-        assert "properties" in schema, "schema must have 'properties'"
-        assert "lengths" in schema["properties"], "schema must define 'lengths'"
-        assert "names" in schema["properties"], "schema must define 'names'"
-        assert "sequences" in schema["properties"], "schema must define 'sequences'"
-
-    def test_list_collections_structure(self, test_server):
-        """List collections has pagination structure per GA4GH paging guide"""
-        res = requests.get(f"{test_server}/list/collection")
-        assert res.status_code == 200
-        data = res.json()
-        assert "results" in data
-        assert isinstance(data["results"], list)
-        assert "pagination" in data
-        assert "page" in data["pagination"]
-        assert "page_size" in data["pagination"]
-        assert "total" in data["pagination"], "pagination must include 'total' per GA4GH spec"
-
-    def test_list_collections_filter_by_attribute(self, test_server):
-        """List collections filtered by attribute digest (REQUIRED by spec)"""
-        # Use base.fa's names digest to filter
-        names_digest = DIGEST_TESTS[0][1]["level1"]["names"]
-        res = requests.get(f"{test_server}/list/collection?names={names_digest}")
-        assert res.status_code == 200
-        data = res.json()
-        assert "results" in data
-        # Should return only collections with this exact names digest
-        # base.fa has this names digest
-        assert DIGEST_TESTS[0][1]["top_level_digest"] in data["results"]
-
-
-class TestAttributeEndpoint:
-    """Test /attribute/collection/:attr/:digest endpoint (REQUIRED by spec)."""
-
-    @pytest.mark.parametrize("fa_file, fa_digest_bundle", DIGEST_TESTS)
-    def test_attribute_lengths(self, test_server, fa_file, fa_digest_bundle):
-        """Retrieve lengths attribute by its digest"""
-        lengths_digest = fa_digest_bundle["level1"]["lengths"]
-        expected_lengths = fa_digest_bundle["level2"]["lengths"]
-        res = requests.get(f"{test_server}/attribute/collection/lengths/{lengths_digest}")
-        assert res.status_code == 200
-        data = res.json()
-        assert data == expected_lengths
-
-    @pytest.mark.parametrize("fa_file, fa_digest_bundle", DIGEST_TESTS)
-    def test_attribute_names(self, test_server, fa_file, fa_digest_bundle):
-        """Retrieve names attribute by its digest"""
-        names_digest = fa_digest_bundle["level1"]["names"]
-        expected_names = fa_digest_bundle["level2"]["names"]
-        res = requests.get(f"{test_server}/attribute/collection/names/{names_digest}")
-        assert res.status_code == 200
-        data = res.json()
-        assert data == expected_names
-
-    @pytest.mark.parametrize("fa_file, fa_digest_bundle", DIGEST_TESTS)
-    def test_attribute_sequences(self, test_server, fa_file, fa_digest_bundle):
-        """Retrieve sequences attribute by its digest"""
-        sequences_digest = fa_digest_bundle["level1"]["sequences"]
-        expected_sequences = fa_digest_bundle["level2"]["sequences"]
-        res = requests.get(f"{test_server}/attribute/collection/sequences/{sequences_digest}")
-        assert res.status_code == 200
-        data = res.json()
-        assert data == expected_sequences
-
-    def test_attribute_not_found(self, test_server):
-        """Non-existent attribute digest returns 404"""
-        res = requests.get(f"{test_server}/attribute/collection/names/nonexistent_digest_12345")
-        assert res.status_code == 404
-
-
-class TestCollectionLevels:
-    """Test collection level 1 vs level 2 response formats."""
-
-    @pytest.mark.parametrize("fa_file, fa_digest_bundle", DIGEST_TESTS)
-    def test_default_level_returns_level2(self, test_server, fa_file, fa_digest_bundle):
-        """Collection without ?level= param returns level 2 (spec default)"""
-        digest = fa_digest_bundle["top_level_digest"]
-        res = requests.get(f"{test_server}/collection/{digest}")
-        assert res.status_code == 200
-        data = res.json()
-        # Level 2 returns arrays, not digest strings
-        for attr in ["names", "lengths", "sequences"]:
-            assert isinstance(data[attr], list), f"Default should return level 2 (arrays)"
-
-    @pytest.mark.parametrize("fa_file, fa_digest_bundle", DIGEST_TESTS)
-    def test_level1_returns_digests(self, test_server, fa_file, fa_digest_bundle):
-        """Level 1 returns digest strings for attributes"""
-        digest = fa_digest_bundle["top_level_digest"]
-        res = requests.get(f"{test_server}/collection/{digest}?level=1")
-        assert res.status_code == 200
-        data = res.json()
-        for attr in ["names", "lengths", "sequences"]:
-            assert isinstance(data[attr], str), f"Level 1 {attr} should be digest string"
-        # Transient attribute present in level 1
-        assert "sorted_name_length_pairs" in data
-
-    @pytest.mark.parametrize("fa_file, fa_digest_bundle", DIGEST_TESTS)
-    def test_level2_returns_arrays(self, test_server, fa_file, fa_digest_bundle):
-        """Level 2 returns arrays for attributes"""
-        digest = fa_digest_bundle["top_level_digest"]
-        res = requests.get(f"{test_server}/collection/{digest}?level=2")
-        assert res.status_code == 200
-        data = res.json()
-        for attr in ["names", "lengths", "sequences"]:
-            assert isinstance(data[attr], list), f"Level 2 {attr} should be array"
-        # Transient attribute NOT in level 2
-        assert "sorted_name_length_pairs" not in data
-
-    @pytest.mark.parametrize("fa_file, fa_digest_bundle", DIGEST_TESTS)
-    def test_sorted_name_length_pairs_digest(self, test_server, fa_file, fa_digest_bundle):
-        """Level 1 sorted_name_length_pairs matches expected digest"""
-        digest = fa_digest_bundle["top_level_digest"]
-        res = requests.get(f"{test_server}/collection/{digest}?level=1")
-        assert res.status_code == 200
-        data = res.json()
-        assert (
-            data["sorted_name_length_pairs"] == fa_digest_bundle["sorted_name_length_pairs_digest"]
-        )
-
-
-class TestComparison:
-    """Test comparison endpoint responses."""
-
-    def test_compare_identical(self, test_server):
-        """Comparing collection to itself returns expected structure"""
-        # Use base.fa digest
-        digest = DIGEST_TESTS[0][1]["top_level_digest"]
-        res = requests.get(f"{test_server}/comparison/{digest}/{digest}")
-        assert res.status_code == 200
-        data = res.json()
-        assert "digests" in data
-        assert data["digests"]["a"] == digest
-        assert data["digests"]["b"] == digest
-        assert "attributes" in data
-        assert "array_elements" in data
-
-    def test_compare_different(self, test_server):
-        """Comparing different collections returns diff structure"""
-        digest_a = DIGEST_TESTS[0][1]["top_level_digest"]  # base.fa
-        digest_b = DIGEST_TESTS[1][1]["top_level_digest"]  # different_names.fa
-        res = requests.get(f"{test_server}/comparison/{digest_a}/{digest_b}")
-        assert res.status_code == 200
-        data = res.json()
-        assert data["digests"]["a"] == digest_a
-        assert data["digests"]["b"] == digest_b
-        assert "a_and_b" in data["attributes"]
-
-    def test_compare_full_structure(self, test_server):
-        """Comparison returns complete structure per spec"""
-        digest_a = DIGEST_TESTS[0][1]["top_level_digest"]  # base.fa
-        digest_b = DIGEST_TESTS[1][1]["top_level_digest"]  # different_names.fa
-        res = requests.get(f"{test_server}/comparison/{digest_a}/{digest_b}")
-        assert res.status_code == 200
-        data = res.json()
-        # Verify digests structure
-        assert "digests" in data
-        assert "a" in data["digests"]
-        assert "b" in data["digests"]
-        # Verify attributes structure
-        assert "attributes" in data
-        assert "a_only" in data["attributes"]
-        assert "b_only" in data["attributes"]
-        assert "a_and_b" in data["attributes"]
-        # Verify array_elements structure
-        assert "array_elements" in data
-        assert "a_count" in data["array_elements"]
-        assert "b_count" in data["array_elements"]
-        assert "a_and_b_count" in data["array_elements"]
-        assert "a_and_b_same_order" in data["array_elements"]
-
-    def test_compare_post_with_seqcol_body(self, test_server):
-        """POST comparison with local seqcol in body (RECOMMENDED by spec)"""
-        digest_a = DIGEST_TESTS[0][1]["top_level_digest"]  # base.fa on server
-        # POST the level 2 representation of different_names.fa
-        seqcol_b = DIGEST_TESTS[1][1]["level2"]
-        res = requests.post(
-            f"{test_server}/comparison/{digest_a}",
-            json=seqcol_b,
-        )
-        assert res.status_code == 200
-        data = res.json()
-        assert "digests" in data
-        assert data["digests"]["a"] == digest_a
-        # b digest may be computed or null per spec
-        assert "attributes" in data
-        assert "array_elements" in data
-
-    def test_compare_with_fixtures(self, test_server):
-        """Comparison results match fixture files"""
-        # Test base.fa vs different_names.fa comparison
-        with open("tests/api/comparison/compare_base.fa_different_names.fa.json") as f:
-            expected = json.load(f)
-
-        res = requests.get(
-            f"{test_server}/comparison/{expected['digests']['a']}/{expected['digests']['b']}"
-        )
-        assert res.status_code == 200
-        data = res.json()
-        assert data["digests"] == expected["digests"]
-        assert data["attributes"] == expected["attributes"]
-        assert data["array_elements"] == expected["array_elements"]
-
-
-class TestCollectionContent:
-    """Test collection content matches fixtures."""
-
-    @pytest.mark.parametrize("fa_file, fa_digest_bundle", DIGEST_TESTS)
-    def test_collection_content(self, test_server, fa_file, fa_digest_bundle):
-        """Collection arrays match expected values from digests file"""
-        digest = fa_digest_bundle["top_level_digest"]
-        expected = fa_digest_bundle["level2"]
-        res = requests.get(f"{test_server}/collection/{digest}?level=2")
-        assert res.status_code == 200
-        data = res.json()
-
-        # Verify lengths match
-        assert data["lengths"] == expected["lengths"]
-        # Verify names match
-        assert data["names"] == expected["names"]
-        # Verify sequence digests match
-        assert data["sequences"] == expected["sequences"]
diff --git a/tests/integration/test_run_compliance.py b/tests/integration/test_run_compliance.py
new file mode 100644
index 0000000..d240024
--- /dev/null
+++ b/tests/integration/test_run_compliance.py
@@ -0,0 +1,18 @@
+"""Run the standalone compliance suite against the integration test server."""
+
+import pytest
+from tests.api.test_compliance import TestAPI
+
+
+@pytest.mark.require_service
+class TestComplianceViaIntegration(TestAPI):
+    """Run compliance tests against integration test server.
+
+    Inherits all tests from TestAPI but provides api_root from
+    the integration test_server fixture instead of --api-root CLI option.
+    """
+
+    @pytest.fixture
+    def api_root(self, test_server):
+        """Map test_server fixture to api_root for compliance tests."""
+        return test_server
diff --git a/tests/test_cli/test_store_commands.py b/tests/test_cli/test_store_commands.py
index a1a4522..120a53d 100644
--- a/tests/test_cli/test_store_commands.py
+++ b/tests/test_cli/test_store_commands.py
@@ -13,6 +13,7 @@
     BASE_FASTA,
     DIFFERENT_NAMES_FASTA,
     DIFFERENT_ORDER_FASTA,
+    SAMPLE_FHR_JSON,
     TEST_FASTA_DIGESTS,
     assert_json_output,
 )
@@ -480,3 +481,152 @@ def test_add_to_nonexistent_store(self, cli, tmp_path):
         result = cli("store", "add", str(BASE_FASTA), "--path", str(nonexistent))
 
         assert result.exit_code != 0
+
+
+def _setup_store_with_fasta(cli, tmp_path):
+    """Initialize a store, add BASE_FASTA, and return (store_path, digest)."""
+    store_path = tmp_path / "store"
+    cli("store", "init", "--path", str(store_path))
+    add_result = cli("store", "add", str(BASE_FASTA), "--path", str(store_path))
+    digest = json.loads(add_result.stdout)["digest"]
+    return store_path, digest
+
+
+class TestStoreMetadata:
+    """Tests for: refget store metadata / metadata-set"""
+
+    def test_metadata_no_fhr_set(self, cli, tmp_path):
+        """Error when no FHR metadata exists for a collection."""
+        store_path, digest = _setup_store_with_fasta(cli, tmp_path)
+
+        result = cli("store", "metadata", digest, "--path", str(store_path))
+
+        assert result.exit_code != 0
+        assert "No FHR metadata" in result.stdout
+
+    def test_metadata_set_from_json_file(self, cli, tmp_path):
+        """Happy path: set FHR metadata from a JSON file."""
+        store_path, digest = _setup_store_with_fasta(cli, tmp_path)
+
+        result = cli(
+            "store", "metadata-set", digest, str(SAMPLE_FHR_JSON),
+            "--path", str(store_path),
+        )
+
+        assert result.exit_code == 0
+        assert "Set FHR metadata for collection" in result.stdout
+
+    def test_metadata_read_after_set(self, cli, tmp_path):
+        """Round-trip: set metadata then read it back."""
+        store_path, digest = _setup_store_with_fasta(cli, tmp_path)
+
+        cli(
+            "store", "metadata-set", digest, str(SAMPLE_FHR_JSON),
+            "--path", str(store_path),
+        )
+
+        result = cli("store", "metadata", digest, "--path", str(store_path))
+
+        assert result.exit_code == 0
+        data = json.loads(result.stdout)
+        assert data["genome"] == "Test organism"
+        assert data["version"] == "v1.0"
+        assert data["masking"] == "soft-masked"
+        assert "test_v1" in data["genomeSynonym"]
+
+    def test_metadata_output_is_valid_json(self, cli, tmp_path):
+        """Output is valid JSON with camelCase keys per FHR spec."""
+        store_path, digest = _setup_store_with_fasta(cli, tmp_path)
+
+        cli(
+            "store", "metadata-set", digest, str(SAMPLE_FHR_JSON),
+            "--path", str(store_path),
+        )
+
+        result = cli("store", "metadata", digest, "--path", str(store_path))
+
+        assert result.exit_code == 0
+        data = json.loads(result.stdout)
+
+        # Verify camelCase keys from the FHR spec
+        assert "schemaVersion" in data
+        assert "genomeSynonym" in data
+        assert "dateCreated" in data
+
+        # Verify no snake_case keys leaked through
+        raw = result.stdout
+        assert "schema_version" not in raw
+        assert "genome_synonym" not in raw
+        assert "date_created" not in raw
+
+    def test_metadata_set_nonexistent_file(self, cli, tmp_path):
+        """Error when JSON file does not exist."""
+        store_path, digest = _setup_store_with_fasta(cli, tmp_path)
+
+        result = cli(
+            "store", "metadata-set", digest, "/nonexistent/fhr.json",
+            "--path", str(store_path),
+        )
+
+        assert result.exit_code != 0
+
+    def test_metadata_nonexistent_digest(self, cli, tmp_path):
+        """Error when reading metadata for a nonexistent digest."""
+        store_path = tmp_path / "store"
+        cli("store", "init", "--path", str(store_path))
+
+        result = cli(
+            "store", "metadata", "nonexistent_digest_123",
+            "--path", str(store_path),
+        )
+
+        assert result.exit_code != 0
+
+    def test_metadata_set_then_overwrite(self, cli, tmp_path):
+        """Overwriting metadata replaces the previous values."""
+        store_path, digest = _setup_store_with_fasta(cli, tmp_path)
+
+        # Set original metadata
+        cli(
+            "store", "metadata-set", digest, str(SAMPLE_FHR_JSON),
+            "--path", str(store_path),
+        )
+
+        # Create updated FHR JSON
+        updated_fhr = tmp_path / "updated_fhr.json"
+        updated_fhr.write_text(json.dumps({
+            "schema": "https://raw.githubusercontent.com/FAIR-bioHeaders/FHR-Specification/main/fhr.json",
+            "schemaVersion": 1.0,
+            "genome": "Updated organism",
+            "version": "v2.0",
+        }))
+
+        # Overwrite
+        cli(
+            "store", "metadata-set", digest, str(updated_fhr),
+            "--path", str(store_path),
+        )
+
+        result = cli("store", "metadata", digest, "--path", str(store_path))
+
+        assert result.exit_code == 0
+        data = json.loads(result.stdout)
+        assert data["genome"] == "Updated organism"
+
+    def test_metadata_removed_with_collection(self, cli, tmp_path):
+        """Metadata sidecar is cleaned up when the collection is removed."""
+        store_path, digest = _setup_store_with_fasta(cli, tmp_path)
+
+        # Set metadata
+        cli(
+            "store", "metadata-set", digest, str(SAMPLE_FHR_JSON),
+            "--path", str(store_path),
+        )
+
+        # Remove the collection
+        cli("store", "remove", digest, "--path", str(store_path))
+
+        # Metadata should be gone
+        result = cli("store", "metadata", digest, "--path", str(store_path))
+
+        assert result.exit_code != 0
diff --git a/tests/test_cli/test_store_pull.py b/tests/test_cli/test_store_pull.py
new file mode 100644
index 0000000..c90cd72
--- /dev/null
+++ b/tests/test_cli/test_store_pull.py
@@ -0,0 +1,400 @@
+# tests/test_cli/test_store_pull.py
+
+"""Tests for refget store pull CLI command.
+
+Note: The HTTP server fixtures use subprocess instead of threading because
+gtars' open_remote (Rust/PyO3) holds the GIL during HTTP requests, which
+would deadlock a Python-thread-based HTTP server.
+"""
+
+import json
+import os
+import signal
+import socket
+import subprocess
+import sys
+import time
+from pathlib import Path
+
+import pytest
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from conftest import (
+    BASE_FASTA,
+    DIFFERENT_NAMES_FASTA,
+)
+
+# Skip entire module if gtars is not installed
+pytest.importorskip("gtars")
+
+
+def _find_free_port() -> int:
+    """Find a free port on localhost."""
+    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+        s.bind(("", 0))
+        return s.getsockname()[1]
+
+
+def _start_http_server(directory: str, port: int) -> subprocess.Popen:
+    """Start an HTTP server as a subprocess serving the given directory."""
+    proc = subprocess.Popen(
+        [sys.executable, "-m", "http.server", str(port), "--directory", directory],
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+    )
+    # Wait for server to be ready
+    max_wait = 5.0
+    start_time = time.time()
+    while time.time() - start_time < max_wait:
+        try:
+            with socket.create_connection(("127.0.0.1", port), timeout=0.1):
+                break
+        except (ConnectionRefusedError, OSError):
+            time.sleep(0.1)
+    else:
+        proc.terminate()
+        raise RuntimeError(f"HTTP server failed to start on port {port}")
+    return proc
+
+
+def _stop_http_server(proc: subprocess.Popen) -> None:
+    """Stop an HTTP server subprocess."""
+    proc.terminate()
+    try:
+        proc.wait(timeout=5)
+    except subprocess.TimeoutExpired:
+        proc.kill()
+        proc.wait()
+
+
+@pytest.fixture
+def remote_store_server(cli, tmp_path):
+    """Set up a local store, serve it over HTTP, yield (url, digest, source_store_path)."""
+    source_store = tmp_path / "source_store"
+    cli("store", "init", "--path", str(source_store))
+    add_result = cli("store", "add", str(BASE_FASTA), "--path", str(source_store))
+    assert add_result.exit_code == 0, f"Failed to add FASTA: {add_result.stdout}"
+    digest = json.loads(add_result.stdout)["digest"]
+
+    port = _find_free_port()
+    proc = _start_http_server(str(source_store), port)
+
+    yield f"http://127.0.0.1:{port}", digest, source_store
+
+    _stop_http_server(proc)
+
+
+@pytest.fixture
+def multi_remote_store_server(cli, tmp_path):
+    """Set up a local store with multiple FASTAs, serve over HTTP."""
+    source_store = tmp_path / "multi_source_store"
+    cli("store", "init", "--path", str(source_store))
+
+    add_result1 = cli("store", "add", str(BASE_FASTA), "--path", str(source_store))
+    assert add_result1.exit_code == 0
+    digest1 = json.loads(add_result1.stdout)["digest"]
+
+    add_result2 = cli("store", "add", str(DIFFERENT_NAMES_FASTA), "--path", str(source_store))
+    assert add_result2.exit_code == 0
+    digest2 = json.loads(add_result2.stdout)["digest"]
+
+    port = _find_free_port()
+    proc = _start_http_server(str(source_store), port)
+
+    yield f"http://127.0.0.1:{port}", digest1, digest2, source_store
+
+    _stop_http_server(proc)
+
+
+@pytest.fixture
+def local_store(cli, tmp_path):
+    """Initialize an empty local store for pulling into."""
+    store_path = tmp_path / "local_store"
+    result = cli("store", "init", "--path", str(store_path))
+    assert result.exit_code == 0
+    return store_path
+
+
+class TestStorePullBasic:
+    """Core pull functionality tests."""
+
+    def test_pull_single_digest(self, cli, tmp_path, remote_store_server):
+        """Pull a known digest from the remote store server."""
+        server_url, digest, _ = remote_store_server
+        local_store = tmp_path / "pull_store"
+        cli("store", "init", "--path", str(local_store))
+
+        result = cli("store", "pull", digest, "--server", server_url, "--path", str(local_store))
+
+        assert result.exit_code == 0, f"Pull failed: {result.stdout}"
+        data = json.loads(result.stdout)
+        assert data["status"] == "pulled"
+        assert data["digest"] == digest
+
+    def test_pull_creates_local_cache(self, cli, tmp_path, remote_store_server):
+        """After pulling, the .remote_cache directory is created."""
+        server_url, digest, _ = remote_store_server
+        local_store = tmp_path / "cache_store"
+        cli("store", "init", "--path", str(local_store))
+
+        result = cli("store", "pull", digest, "--server", server_url, "--path", str(local_store))
+
+        assert result.exit_code == 0
+        cache_dir = local_store / ".remote_cache"
+        assert cache_dir.exists()
+
+    def test_pull_quiet_flag(self, cli, tmp_path, remote_store_server):
+        """Pull with --quiet suppresses progress output."""
+        server_url, digest, _ = remote_store_server
+        local_store = tmp_path / "quiet_store"
+        cli("store", "init", "--path", str(local_store))
+
+        result = cli(
+            "store", "pull", digest, "--server", server_url, "--path", str(local_store), "--quiet"
+        )
+
+        assert result.exit_code == 0
+        data = json.loads(result.stdout)
+        assert data["status"] == "pulled"
+
+
+class TestStorePullEager:
+    """Eager sequence fetching tests."""
+
+    def test_pull_eager_fetches_sequences(self, cli, tmp_path, remote_store_server):
+        """Pull with --eager pre-fetches all sequences."""
+        server_url, digest, _ = remote_store_server
+        local_store = tmp_path / "eager_store"
+        cli("store", "init", "--path", str(local_store))
+
+        result = cli(
+            "store", "pull", digest, "--server", server_url,
+            "--path", str(local_store), "--eager"
+        )
+
+        assert result.exit_code == 0, f"Eager pull failed: {result.stdout}"
+        data = json.loads(result.stdout)
+        assert data["eager"] is True
+        assert data["sequences_fetched"] > 0
+
+    def test_pull_default_is_lazy(self, cli, tmp_path, remote_store_server):
+        """Pull without --eager uses lazy mode."""
+        server_url, digest, _ = remote_store_server
+        local_store = tmp_path / "lazy_store"
+        cli("store", "init", "--path", str(local_store))
+
+        result = cli("store", "pull", digest, "--server", server_url, "--path", str(local_store))
+
+        assert result.exit_code == 0
+        data = json.loads(result.stdout)
+        assert data["eager"] is False
+        assert "sequences_fetched" not in data
+
+
+class TestStorePullBatch:
+    """Batch pull via --file tests."""
+
+    def test_pull_from_file(self, cli, tmp_path, multi_remote_store_server):
+        """Pull multiple digests from a file."""
+        server_url, digest1, digest2, _ = multi_remote_store_server
+        local_store = tmp_path / "batch_store"
+        cli("store", "init", "--path", str(local_store))
+
+        digest_file = tmp_path / "digests.txt"
+        digest_file.write_text(f"{digest1}\n{digest2}\n")
+
+        result = cli(
+            "store", "pull", "--file", str(digest_file),
+            "--server", server_url, "--path", str(local_store)
+        )
+
+        assert result.exit_code == 0, f"Batch pull failed: {result.stdout}"
+        data = json.loads(result.stdout)
+        assert "results" in data
+        assert len(data["results"]) == 2
+
+    def test_pull_file_with_blank_lines(self, cli, tmp_path, remote_store_server):
+        """File with blank lines and whitespace is handled gracefully."""
+        server_url, digest, _ = remote_store_server
+        local_store = tmp_path / "blank_store"
+        cli("store", "init", "--path", str(local_store))
+
+        digest_file = tmp_path / "digests_blanks.txt"
+        digest_file.write_text(f"\n  \n{digest}\n\n  \n")
+
+        result = cli(
+            "store", "pull", "--file", str(digest_file),
+            "--server", server_url, "--path", str(local_store)
+        )
+
+        assert result.exit_code == 0
+        data = json.loads(result.stdout)
+        # Single digest after stripping blanks, so no "results" wrapper
+        assert data["digest"] == digest
+        assert data["status"] == "pulled"
+
+    def test_pull_file_not_found(self, cli, tmp_path):
+        """Passing a nonexistent file to --file returns error."""
+        local_store = tmp_path / "nofile_store"
+        cli("store", "init", "--path", str(local_store))
+
+        result = cli(
+            "store", "pull", "--file", "/nonexistent/digests.txt",
+            "--server", "http://127.0.0.1:1", "--path", str(local_store)
+        )
+
+        assert result.exit_code != 0
+
+    def test_pull_empty_file(self, cli, tmp_path, remote_store_server):
+        """Empty file returns error about no digests."""
+        server_url, _, _ = remote_store_server
+        local_store = tmp_path / "empty_file_store"
+        cli("store", "init", "--path", str(local_store))
+
+        digest_file = tmp_path / "empty.txt"
+        digest_file.write_text("")
+
+        result = cli(
+            "store", "pull", "--file", str(digest_file),
+            "--server", server_url, "--path", str(local_store)
+        )
+
+        assert result.exit_code != 0
+
+
+class TestStorePullAlreadyLocal:
+    """Skip already-cached collections."""
+
+    def test_pull_already_local(self, cli, tmp_path, remote_store_server):
+        """Pulling a digest that exists locally returns already_local status."""
+        server_url, digest, _ = remote_store_server
+        local_store = tmp_path / "already_store"
+        cli("store", "init", "--path", str(local_store))
+
+        # Add the same FASTA to local store
+        cli("store", "add", str(BASE_FASTA), "--path", str(local_store))
+
+        # Try to pull -- should detect it is already local
+        result = cli("store", "pull", digest, "--server", server_url, "--path", str(local_store))
+
+        assert result.exit_code == 0
+        data = json.loads(result.stdout)
+        assert data["status"] == "already_local"
+
+
+class TestStorePullErrors:
+    """Error case tests."""
+
+    def test_pull_nonexistent_digest(self, cli, tmp_path, remote_store_server):
+        """Pull a digest that does not exist on the remote."""
+        server_url, _, _ = remote_store_server
+        local_store = tmp_path / "nonexist_store"
+        cli("store", "init", "--path", str(local_store))
+
+        result = cli(
+            "store", "pull", "NONEXISTENT_DIGEST_12345678901234",
+            "--server", server_url, "--path", str(local_store)
+        )
+
+        assert result.exit_code != 0
+        data = json.loads(result.stdout)
+        assert data["status"] == "not_found"
+
+    def test_pull_unreachable_server(self, cli, tmp_path):
+        """Pull from an unreachable URL returns error."""
+        local_store = tmp_path / "unreach_store"
+        cli("store", "init", "--path", str(local_store))
+
+        result = cli(
+            "store", "pull", "some_digest_abc123",
+            "--server", "http://127.0.0.1:1", "--path", str(local_store)
+        )
+
+        assert result.exit_code != 0
+
+    def test_pull_no_digest_or_file(self, cli, tmp_path):
+        """Pull with neither digest nor --file returns error."""
+        local_store = tmp_path / "noarg_store"
+        cli("store", "init", "--path", str(local_store))
+
+        result = cli(
+            "store", "pull",
+            "--server", "http://127.0.0.1:1", "--path", str(local_store)
+        )
+
+        assert result.exit_code != 0
+
+    def test_pull_both_digest_and_file(self, cli, tmp_path):
+        """Pull with both digest and --file returns error."""
+        local_store = tmp_path / "both_store"
+        cli("store", "init", "--path", str(local_store))
+
+        digest_file = tmp_path / "digests.txt"
+        digest_file.write_text("some_digest\n")
+
+        result = cli(
+            "store", "pull", "some_digest",
+            "--file", str(digest_file),
+            "--server", "http://127.0.0.1:1", "--path", str(local_store)
+        )
+
+        assert result.exit_code != 0
+
+    def test_pull_no_server_configured(self, cli, tmp_path, monkeypatch):
+        """Pull without --server and no configured remotes returns error."""
+        local_store = tmp_path / "noserver_store"
+        cli("store", "init", "--path", str(local_store))
+
+        # Patch _find_remote_urls to return empty list
+        monkeypatch.setattr(
+            "refget.cli.store._find_remote_urls",
+            lambda server_override=None: []
+        )
+
+        result = cli(
+            "store", "pull", "some_digest",
+            "--path", str(local_store)
+        )
+
+        assert result.exit_code != 0
+
+
+class TestStorePullMultipleRemotes:
+    """Fallback across multiple remotes."""
+
+    def test_pull_tries_next_remote_on_failure(
+        self, cli, tmp_path, remote_store_server, monkeypatch
+    ):
+        """When first remote lacks the digest, tries the next one."""
+        server_url, digest, _ = remote_store_server
+
+        # Set up an empty store served over HTTP (first remote)
+        empty_store = tmp_path / "empty_remote"
+        cli("store", "init", "--path", str(empty_store))
+
+        port = _find_free_port()
+        empty_proc = _start_http_server(str(empty_store), port)
+        empty_url = f"http://127.0.0.1:{port}"
+
+        try:
+            local_store = tmp_path / "multi_remote_store"
+            cli("store", "init", "--path", str(local_store))
+
+            # Patch to return empty server first, then the populated one
+            monkeypatch.setattr(
+                "refget.cli.store._find_remote_urls",
+                lambda server_override=None: [empty_url, server_url]
+            )
+
+            result = cli("store", "pull", digest, "--path", str(local_store), "--quiet")
+
+            assert result.exit_code == 0, f"Multi-remote pull failed: {result.stdout}"
+            # Extract JSON from output (error messages from failed remotes may precede it)
+            stdout = result.stdout
+            json_start = stdout.rfind("{")
+            assert json_start >= 0, f"No JSON found in output: {stdout}"
+            data = json.loads(stdout[json_start:])
+            assert data["status"] == "pulled"
+            assert data["source"] == server_url
+        finally:
+            _stop_http_server(empty_proc)

From 286fbabb388ac4a7ce4441443e00cf01cb64bc64 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 27 Feb 2026 05:24:38 +0000
Subject: [PATCH 10/31] Bump rollup from 4.35.0 to 4.59.0 in /frontend

Bumps [rollup](https://github.com/rollup/rollup) from 4.35.0 to 4.59.0.
- [Release notes](https://github.com/rollup/rollup/releases)
- [Changelog](https://github.com/rollup/rollup/blob/master/CHANGELOG.md)
- [Commits](https://github.com/rollup/rollup/compare/v4.35.0...v4.59.0)

---
updated-dependencies:
- dependency-name: rollup
  dependency-version: 4.59.0
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 frontend/package-lock.json | 274 +++++++++++++++++++++++--------------
 1 file changed, 175 insertions(+), 99 deletions(-)

diff --git a/frontend/package-lock.json b/frontend/package-lock.json
index 7a78b74..ddfcfe8 100644
--- a/frontend/package-lock.json
+++ b/frontend/package-lock.json
@@ -981,9 +981,9 @@
       }
     },
     "node_modules/@rollup/rollup-android-arm-eabi": {
-      "version": "4.35.0",
-      "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.35.0.tgz",
-      "integrity": "sha512-uYQ2WfPaqz5QtVgMxfN6NpLD+no0MYHDBywl7itPYd3K5TjjSghNKmX8ic9S8NU8w81NVhJv/XojcHptRly7qQ==",
+      "version": "4.59.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.59.0.tgz",
+      "integrity": "sha512-upnNBkA6ZH2VKGcBj9Fyl9IGNPULcjXRlg0LLeaioQWueH30p6IXtJEbKAgvyv+mJaMxSm1l6xwDXYjpEMiLMg==",
       "cpu": [
         "arm"
       ],
@@ -995,9 +995,9 @@
       ]
     },
     "node_modules/@rollup/rollup-android-arm64": {
-      "version": "4.35.0",
-      "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm64/-/rollup-android-arm64-4.35.0.tgz",
-      "integrity": "sha512-FtKddj9XZudurLhdJnBl9fl6BwCJ3ky8riCXjEw3/UIbjmIY58ppWwPEvU3fNu+W7FUsAsB1CdH+7EQE6CXAPA==",
+      "version": "4.59.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm64/-/rollup-android-arm64-4.59.0.tgz",
+      "integrity": "sha512-hZ+Zxj3SySm4A/DylsDKZAeVg0mvi++0PYVceVyX7hemkw7OreKdCvW2oQ3T1FMZvCaQXqOTHb8qmBShoqk69Q==",
       "cpu": [
         "arm64"
       ],
@@ -1009,9 +1009,9 @@
       ]
     },
     "node_modules/@rollup/rollup-darwin-arm64": {
-      "version": "4.35.0",
-      "resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-arm64/-/rollup-darwin-arm64-4.35.0.tgz",
-      "integrity": "sha512-Uk+GjOJR6CY844/q6r5DR/6lkPFOw0hjfOIzVx22THJXMxktXG6CbejseJFznU8vHcEBLpiXKY3/6xc+cBm65Q==",
+      "version": "4.59.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-arm64/-/rollup-darwin-arm64-4.59.0.tgz",
+      "integrity": "sha512-W2Psnbh1J8ZJw0xKAd8zdNgF9HRLkdWwwdWqubSVk0pUuQkoHnv7rx4GiF9rT4t5DIZGAsConRE3AxCdJ4m8rg==",
       "cpu": [
         "arm64"
       ],
@@ -1023,9 +1023,9 @@
       ]
     },
     "node_modules/@rollup/rollup-darwin-x64": {
-      "version": "4.35.0",
-      "resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-x64/-/rollup-darwin-x64-4.35.0.tgz",
-      "integrity": "sha512-3IrHjfAS6Vkp+5bISNQnPogRAW5GAV1n+bNCrDwXmfMHbPl5EhTmWtfmwlJxFRUCBZ+tZ/OxDyU08aF6NI/N5Q==",
+      "version": "4.59.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-x64/-/rollup-darwin-x64-4.59.0.tgz",
+      "integrity": "sha512-ZW2KkwlS4lwTv7ZVsYDiARfFCnSGhzYPdiOU4IM2fDbL+QGlyAbjgSFuqNRbSthybLbIJ915UtZBtmuLrQAT/w==",
       "cpu": [
         "x64"
       ],
@@ -1037,9 +1037,9 @@
       ]
     },
     "node_modules/@rollup/rollup-freebsd-arm64": {
-      "version": "4.35.0",
-      "resolved": "https://registry.npmjs.org/@rollup/rollup-freebsd-arm64/-/rollup-freebsd-arm64-4.35.0.tgz",
-      "integrity": "sha512-sxjoD/6F9cDLSELuLNnY0fOrM9WA0KrM0vWm57XhrIMf5FGiN8D0l7fn+bpUeBSU7dCgPV2oX4zHAsAXyHFGcQ==",
+      "version": "4.59.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-freebsd-arm64/-/rollup-freebsd-arm64-4.59.0.tgz",
+      "integrity": "sha512-EsKaJ5ytAu9jI3lonzn3BgG8iRBjV4LxZexygcQbpiU0wU0ATxhNVEpXKfUa0pS05gTcSDMKpn3Sx+QB9RlTTA==",
       "cpu": [
         "arm64"
       ],
@@ -1051,9 +1051,9 @@
       ]
     },
     "node_modules/@rollup/rollup-freebsd-x64": {
-      "version": "4.35.0",
-      "resolved": "https://registry.npmjs.org/@rollup/rollup-freebsd-x64/-/rollup-freebsd-x64-4.35.0.tgz",
-      "integrity": "sha512-2mpHCeRuD1u/2kruUiHSsnjWtHjqVbzhBkNVQ1aVD63CcexKVcQGwJ2g5VphOd84GvxfSvnnlEyBtQCE5hxVVw==",
+      "version": "4.59.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-freebsd-x64/-/rollup-freebsd-x64-4.59.0.tgz",
+      "integrity": "sha512-d3DuZi2KzTMjImrxoHIAODUZYoUUMsuUiY4SRRcJy6NJoZ6iIqWnJu9IScV9jXysyGMVuW+KNzZvBLOcpdl3Vg==",
       "cpu": [
         "x64"
       ],
@@ -1065,9 +1065,9 @@
       ]
     },
     "node_modules/@rollup/rollup-linux-arm-gnueabihf": {
-      "version": "4.35.0",
-      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-gnueabihf/-/rollup-linux-arm-gnueabihf-4.35.0.tgz",
-      "integrity": "sha512-mrA0v3QMy6ZSvEuLs0dMxcO2LnaCONs1Z73GUDBHWbY8tFFocM6yl7YyMu7rz4zS81NDSqhrUuolyZXGi8TEqg==",
+      "version": "4.59.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-gnueabihf/-/rollup-linux-arm-gnueabihf-4.59.0.tgz",
+      "integrity": "sha512-t4ONHboXi/3E0rT6OZl1pKbl2Vgxf9vJfWgmUoCEVQVxhW6Cw/c8I6hbbu7DAvgp82RKiH7TpLwxnJeKv2pbsw==",
       "cpu": [
         "arm"
       ],
@@ -1079,9 +1079,9 @@
       ]
     },
     "node_modules/@rollup/rollup-linux-arm-musleabihf": {
-      "version": "4.35.0",
-      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-musleabihf/-/rollup-linux-arm-musleabihf-4.35.0.tgz",
-      "integrity": "sha512-DnYhhzcvTAKNexIql8pFajr0PiDGrIsBYPRvCKlA5ixSS3uwo/CWNZxB09jhIapEIg945KOzcYEAGGSmTSpk7A==",
+      "version": "4.59.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-musleabihf/-/rollup-linux-arm-musleabihf-4.59.0.tgz",
+      "integrity": "sha512-CikFT7aYPA2ufMD086cVORBYGHffBo4K8MQ4uPS/ZnY54GKj36i196u8U+aDVT2LX4eSMbyHtyOh7D7Zvk2VvA==",
       "cpu": [
         "arm"
       ],
@@ -1093,9 +1093,9 @@
       ]
     },
     "node_modules/@rollup/rollup-linux-arm64-gnu": {
-      "version": "4.35.0",
-      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-gnu/-/rollup-linux-arm64-gnu-4.35.0.tgz",
-      "integrity": "sha512-uagpnH2M2g2b5iLsCTZ35CL1FgyuzzJQ8L9VtlJ+FckBXroTwNOaD0z0/UF+k5K3aNQjbm8LIVpxykUOQt1m/A==",
+      "version": "4.59.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-gnu/-/rollup-linux-arm64-gnu-4.59.0.tgz",
+      "integrity": "sha512-jYgUGk5aLd1nUb1CtQ8E+t5JhLc9x5WdBKew9ZgAXg7DBk0ZHErLHdXM24rfX+bKrFe+Xp5YuJo54I5HFjGDAA==",
       "cpu": [
         "arm64"
       ],
@@ -1107,9 +1107,9 @@
       ]
     },
     "node_modules/@rollup/rollup-linux-arm64-musl": {
-      "version": "4.35.0",
-      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-musl/-/rollup-linux-arm64-musl-4.35.0.tgz",
-      "integrity": "sha512-XQxVOCd6VJeHQA/7YcqyV0/88N6ysSVzRjJ9I9UA/xXpEsjvAgDTgH3wQYz5bmr7SPtVK2TsP2fQ2N9L4ukoUg==",
+      "version": "4.59.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-musl/-/rollup-linux-arm64-musl-4.59.0.tgz",
+      "integrity": "sha512-peZRVEdnFWZ5Bh2KeumKG9ty7aCXzzEsHShOZEFiCQlDEepP1dpUl/SrUNXNg13UmZl+gzVDPsiCwnV1uI0RUA==",
       "cpu": [
         "arm64"
       ],
@@ -1120,10 +1120,10 @@
         "linux"
       ]
     },
-    "node_modules/@rollup/rollup-linux-loongarch64-gnu": {
-      "version": "4.35.0",
-      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-loongarch64-gnu/-/rollup-linux-loongarch64-gnu-4.35.0.tgz",
-      "integrity": "sha512-5pMT5PzfgwcXEwOaSrqVsz/LvjDZt+vQ8RT/70yhPU06PTuq8WaHhfT1LW+cdD7mW6i/J5/XIkX/1tCAkh1W6g==",
+    "node_modules/@rollup/rollup-linux-loong64-gnu": {
+      "version": "4.59.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-loong64-gnu/-/rollup-linux-loong64-gnu-4.59.0.tgz",
+      "integrity": "sha512-gbUSW/97f7+r4gHy3Jlup8zDG190AuodsWnNiXErp9mT90iCy9NKKU0Xwx5k8VlRAIV2uU9CsMnEFg/xXaOfXg==",
       "cpu": [
         "loong64"
       ],
@@ -1134,10 +1134,38 @@
         "linux"
       ]
     },
-    "node_modules/@rollup/rollup-linux-powerpc64le-gnu": {
-      "version": "4.35.0",
-      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-powerpc64le-gnu/-/rollup-linux-powerpc64le-gnu-4.35.0.tgz",
-      "integrity": "sha512-c+zkcvbhbXF98f4CtEIP1EBA/lCic5xB0lToneZYvMeKu5Kamq3O8gqrxiYYLzlZH6E3Aq+TSW86E4ay8iD8EA==",
+    "node_modules/@rollup/rollup-linux-loong64-musl": {
+      "version": "4.59.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-loong64-musl/-/rollup-linux-loong64-musl-4.59.0.tgz",
+      "integrity": "sha512-yTRONe79E+o0FWFijasoTjtzG9EBedFXJMl888NBEDCDV9I2wGbFFfJQQe63OijbFCUZqxpHz1GzpbtSFikJ4Q==",
+      "cpu": [
+        "loong64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ]
+    },
+    "node_modules/@rollup/rollup-linux-ppc64-gnu": {
+      "version": "4.59.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-ppc64-gnu/-/rollup-linux-ppc64-gnu-4.59.0.tgz",
+      "integrity": "sha512-sw1o3tfyk12k3OEpRddF68a1unZ5VCN7zoTNtSn2KndUE+ea3m3ROOKRCZxEpmT9nsGnogpFP9x6mnLTCaoLkA==",
+      "cpu": [
+        "ppc64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ]
+    },
+    "node_modules/@rollup/rollup-linux-ppc64-musl": {
+      "version": "4.59.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-ppc64-musl/-/rollup-linux-ppc64-musl-4.59.0.tgz",
+      "integrity": "sha512-+2kLtQ4xT3AiIxkzFVFXfsmlZiG5FXYW7ZyIIvGA7Bdeuh9Z0aN4hVyXS/G1E9bTP/vqszNIN/pUKCk/BTHsKA==",
       "cpu": [
         "ppc64"
       ],
@@ -1149,9 +1177,23 @@
       ]
     },
     "node_modules/@rollup/rollup-linux-riscv64-gnu": {
-      "version": "4.35.0",
-      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-riscv64-gnu/-/rollup-linux-riscv64-gnu-4.35.0.tgz",
-      "integrity": "sha512-s91fuAHdOwH/Tad2tzTtPX7UZyytHIRR6V4+2IGlV0Cej5rkG0R61SX4l4y9sh0JBibMiploZx3oHKPnQBKe4g==",
+      "version": "4.59.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-riscv64-gnu/-/rollup-linux-riscv64-gnu-4.59.0.tgz",
+      "integrity": "sha512-NDYMpsXYJJaj+I7UdwIuHHNxXZ/b/N2hR15NyH3m2qAtb/hHPA4g4SuuvrdxetTdndfj9b1WOmy73kcPRoERUg==",
+      "cpu": [
+        "riscv64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ]
+    },
+    "node_modules/@rollup/rollup-linux-riscv64-musl": {
+      "version": "4.59.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-riscv64-musl/-/rollup-linux-riscv64-musl-4.59.0.tgz",
+      "integrity": "sha512-nLckB8WOqHIf1bhymk+oHxvM9D3tyPndZH8i8+35p/1YiVoVswPid2yLzgX7ZJP0KQvnkhM4H6QZ5m0LzbyIAg==",
       "cpu": [
         "riscv64"
       ],
@@ -1163,9 +1205,9 @@
       ]
     },
     "node_modules/@rollup/rollup-linux-s390x-gnu": {
-      "version": "4.35.0",
-      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-s390x-gnu/-/rollup-linux-s390x-gnu-4.35.0.tgz",
-      "integrity": "sha512-hQRkPQPLYJZYGP+Hj4fR9dDBMIM7zrzJDWFEMPdTnTy95Ljnv0/4w/ixFw3pTBMEuuEuoqtBINYND4M7ujcuQw==",
+      "version": "4.59.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-s390x-gnu/-/rollup-linux-s390x-gnu-4.59.0.tgz",
+      "integrity": "sha512-oF87Ie3uAIvORFBpwnCvUzdeYUqi2wY6jRFWJAy1qus/udHFYIkplYRW+wo+GRUP4sKzYdmE1Y3+rY5Gc4ZO+w==",
       "cpu": [
         "s390x"
       ],
@@ -1177,9 +1219,9 @@
       ]
     },
     "node_modules/@rollup/rollup-linux-x64-gnu": {
-      "version": "4.35.0",
-      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-gnu/-/rollup-linux-x64-gnu-4.35.0.tgz",
-      "integrity": "sha512-Pim1T8rXOri+0HmV4CdKSGrqcBWX0d1HoPnQ0uw0bdp1aP5SdQVNBy8LjYncvnLgu3fnnCt17xjWGd4cqh8/hA==",
+      "version": "4.59.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-gnu/-/rollup-linux-x64-gnu-4.59.0.tgz",
+      "integrity": "sha512-3AHmtQq/ppNuUspKAlvA8HtLybkDflkMuLK4DPo77DfthRb71V84/c4MlWJXixZz4uruIH4uaa07IqoAkG64fg==",
       "cpu": [
         "x64"
       ],
@@ -1191,9 +1233,9 @@
       ]
     },
     "node_modules/@rollup/rollup-linux-x64-musl": {
-      "version": "4.35.0",
-      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-musl/-/rollup-linux-x64-musl-4.35.0.tgz",
-      "integrity": "sha512-QysqXzYiDvQWfUiTm8XmJNO2zm9yC9P/2Gkrwg2dH9cxotQzunBHYr6jk4SujCTqnfGxduOmQcI7c2ryuW8XVg==",
+      "version": "4.59.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-musl/-/rollup-linux-x64-musl-4.59.0.tgz",
+      "integrity": "sha512-2UdiwS/9cTAx7qIUZB/fWtToJwvt0Vbo0zmnYt7ED35KPg13Q0ym1g442THLC7VyI6JfYTP4PiSOWyoMdV2/xg==",
       "cpu": [
         "x64"
       ],
@@ -1204,10 +1246,38 @@
         "linux"
       ]
     },
+    "node_modules/@rollup/rollup-openbsd-x64": {
+      "version": "4.59.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-openbsd-x64/-/rollup-openbsd-x64-4.59.0.tgz",
+      "integrity": "sha512-M3bLRAVk6GOwFlPTIxVBSYKUaqfLrn8l0psKinkCFxl4lQvOSz8ZrKDz2gxcBwHFpci0B6rttydI4IpS4IS/jQ==",
+      "cpu": [
+        "x64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "openbsd"
+      ]
+    },
+    "node_modules/@rollup/rollup-openharmony-arm64": {
+      "version": "4.59.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-openharmony-arm64/-/rollup-openharmony-arm64-4.59.0.tgz",
+      "integrity": "sha512-tt9KBJqaqp5i5HUZzoafHZX8b5Q2Fe7UjYERADll83O4fGqJ49O1FsL6LpdzVFQcpwvnyd0i+K/VSwu/o/nWlA==",
+      "cpu": [
+        "arm64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "openharmony"
+      ]
+    },
     "node_modules/@rollup/rollup-win32-arm64-msvc": {
-      "version": "4.35.0",
-      "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-arm64-msvc/-/rollup-win32-arm64-msvc-4.35.0.tgz",
-      "integrity": "sha512-OUOlGqPkVJCdJETKOCEf1mw848ZyJ5w50/rZ/3IBQVdLfR5jk/6Sr5m3iO2tdPgwo0x7VcncYuOvMhBWZq8ayg==",
+      "version": "4.59.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-arm64-msvc/-/rollup-win32-arm64-msvc-4.59.0.tgz",
+      "integrity": "sha512-V5B6mG7OrGTwnxaNUzZTDTjDS7F75PO1ae6MJYdiMu60sq0CqN5CVeVsbhPxalupvTX8gXVSU9gq+Rx1/hvu6A==",
       "cpu": [
         "arm64"
       ],
@@ -1219,9 +1289,9 @@
       ]
     },
     "node_modules/@rollup/rollup-win32-ia32-msvc": {
-      "version": "4.35.0",
-      "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-ia32-msvc/-/rollup-win32-ia32-msvc-4.35.0.tgz",
-      "integrity": "sha512-2/lsgejMrtwQe44glq7AFFHLfJBPafpsTa6JvP2NGef/ifOa4KBoglVf7AKN7EV9o32evBPRqfg96fEHzWo5kw==",
+      "version": "4.59.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-ia32-msvc/-/rollup-win32-ia32-msvc-4.59.0.tgz",
+      "integrity": "sha512-UKFMHPuM9R0iBegwzKF4y0C4J9u8C6MEJgFuXTBerMk7EJ92GFVFYBfOZaSGLu6COf7FxpQNqhNS4c4icUPqxA==",
       "cpu": [
         "ia32"
       ],
@@ -1232,10 +1302,24 @@
         "win32"
       ]
     },
+    "node_modules/@rollup/rollup-win32-x64-gnu": {
+      "version": "4.59.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-x64-gnu/-/rollup-win32-x64-gnu-4.59.0.tgz",
+      "integrity": "sha512-laBkYlSS1n2L8fSo1thDNGrCTQMmxjYY5G0WFWjFFYZkKPjsMBsgJfGf4TLxXrF6RyhI60L8TMOjBMvXiTcxeA==",
+      "cpu": [
+        "x64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "win32"
+      ]
+    },
     "node_modules/@rollup/rollup-win32-x64-msvc": {
-      "version": "4.35.0",
-      "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-x64-msvc/-/rollup-win32-x64-msvc-4.35.0.tgz",
-      "integrity": "sha512-PIQeY5XDkrOysbQblSW7v3l1MDZzkTEzAfTPkj5VAu3FW8fS4ynyLg2sINp0fp3SjZ8xkRYpLqoKcYqAkhU1dw==",
+      "version": "4.59.0",
+      "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-x64-msvc/-/rollup-win32-x64-msvc-4.59.0.tgz",
+      "integrity": "sha512-2HRCml6OztYXyJXAvdDXPKcawukWY2GpR5/nxKp4iBgiO3wcoEGkAaqctIbZcNB6KlUQBIqt8VYkNSj2397EfA==",
       "cpu": [
         "x64"
       ],
@@ -1288,10 +1372,9 @@
       }
     },
     "node_modules/@types/estree": {
-      "version": "1.0.6",
-      "resolved": "https://registry.npmjs.org/@types/estree/-/estree-1.0.6.tgz",
-      "integrity": "sha512-AYnb1nQyY49te+VRAVgmzfcgjYS91mY5P0TKUDCLEM+gNnA+3T6rWITXRLYCpahpqSQbN5cE+gHpnPyXjHWxcw==",
-      "dev": true,
+      "version": "1.0.8",
+      "resolved": "https://registry.npmjs.org/@types/estree/-/estree-1.0.8.tgz",
+      "integrity": "sha512-dWHzHa2WqEXI/O1E9OjrocMTKJl2mSrEolh1Iomrv6U+JuNwaHXsXx9bLu5gG7BUWFIN0skIQJQ/L1rIex4X6w==",
       "license": "MIT"
     },
     "node_modules/@types/geojson": {
@@ -4424,13 +4507,13 @@
       "license": "Unlicense"
     },
     "node_modules/rollup": {
-      "version": "4.35.0",
-      "resolved": "https://registry.npmjs.org/rollup/-/rollup-4.35.0.tgz",
-      "integrity": "sha512-kg6oI4g+vc41vePJyO6dHt/yl0Rz3Thv0kJeVQ3D1kS3E5XSuKbPc29G4IpT/Kv1KQwgHVcN+HtyS+HYLNSvQg==",
+      "version": "4.59.0",
+      "resolved": "https://registry.npmjs.org/rollup/-/rollup-4.59.0.tgz",
+      "integrity": "sha512-2oMpl67a3zCH9H79LeMcbDhXW/UmWG/y2zuqnF2jQq5uq9TbM9TVyXvA4+t+ne2IIkBdrLpAaRQAvo7YI/Yyeg==",
       "dev": true,
       "license": "MIT",
       "dependencies": {
-        "@types/estree": "1.0.6"
+        "@types/estree": "1.0.8"
       },
       "bin": {
         "rollup": "dist/bin/rollup"
@@ -4440,25 +4523,31 @@
         "npm": ">=8.0.0"
       },
       "optionalDependencies": {
-        "@rollup/rollup-android-arm-eabi": "4.35.0",
-        "@rollup/rollup-android-arm64": "4.35.0",
-        "@rollup/rollup-darwin-arm64": "4.35.0",
-        "@rollup/rollup-darwin-x64": "4.35.0",
-        "@rollup/rollup-freebsd-arm64": "4.35.0",
-        "@rollup/rollup-freebsd-x64": "4.35.0",
-        "@rollup/rollup-linux-arm-gnueabihf": "4.35.0",
-        "@rollup/rollup-linux-arm-musleabihf": "4.35.0",
-        "@rollup/rollup-linux-arm64-gnu": "4.35.0",
-        "@rollup/rollup-linux-arm64-musl": "4.35.0",
-        "@rollup/rollup-linux-loongarch64-gnu": "4.35.0",
-        "@rollup/rollup-linux-powerpc64le-gnu": "4.35.0",
-        "@rollup/rollup-linux-riscv64-gnu": "4.35.0",
-        "@rollup/rollup-linux-s390x-gnu": "4.35.0",
-        "@rollup/rollup-linux-x64-gnu": "4.35.0",
-        "@rollup/rollup-linux-x64-musl": "4.35.0",
-        "@rollup/rollup-win32-arm64-msvc": "4.35.0",
-        "@rollup/rollup-win32-ia32-msvc": "4.35.0",
-        "@rollup/rollup-win32-x64-msvc": "4.35.0",
+        "@rollup/rollup-android-arm-eabi": "4.59.0",
+        "@rollup/rollup-android-arm64": "4.59.0",
+        "@rollup/rollup-darwin-arm64": "4.59.0",
+        "@rollup/rollup-darwin-x64": "4.59.0",
+        "@rollup/rollup-freebsd-arm64": "4.59.0",
+        "@rollup/rollup-freebsd-x64": "4.59.0",
+        "@rollup/rollup-linux-arm-gnueabihf": "4.59.0",
+        "@rollup/rollup-linux-arm-musleabihf": "4.59.0",
+        "@rollup/rollup-linux-arm64-gnu": "4.59.0",
+        "@rollup/rollup-linux-arm64-musl": "4.59.0",
+        "@rollup/rollup-linux-loong64-gnu": "4.59.0",
+        "@rollup/rollup-linux-loong64-musl": "4.59.0",
+        "@rollup/rollup-linux-ppc64-gnu": "4.59.0",
+        "@rollup/rollup-linux-ppc64-musl": "4.59.0",
+        "@rollup/rollup-linux-riscv64-gnu": "4.59.0",
+        "@rollup/rollup-linux-riscv64-musl": "4.59.0",
+        "@rollup/rollup-linux-s390x-gnu": "4.59.0",
+        "@rollup/rollup-linux-x64-gnu": "4.59.0",
+        "@rollup/rollup-linux-x64-musl": "4.59.0",
+        "@rollup/rollup-openbsd-x64": "4.59.0",
+        "@rollup/rollup-openharmony-arm64": "4.59.0",
+        "@rollup/rollup-win32-arm64-msvc": "4.59.0",
+        "@rollup/rollup-win32-ia32-msvc": "4.59.0",
+        "@rollup/rollup-win32-x64-gnu": "4.59.0",
+        "@rollup/rollup-win32-x64-msvc": "4.59.0",
         "fsevents": "~2.3.2"
       }
     },
@@ -5128,13 +5217,6 @@
         "vega-util": "^2.1.0"
       }
     },
-    "node_modules/vega-expression/node_modules/@types/estree": {
-      "version": "1.0.8",
-      "resolved": "https://registry.npmjs.org/@types/estree/-/estree-1.0.8.tgz",
-      "integrity": "sha512-dWHzHa2WqEXI/O1E9OjrocMTKJl2mSrEolh1Iomrv6U+JuNwaHXsXx9bLu5gG7BUWFIN0skIQJQ/L1rIex4X6w==",
-      "license": "MIT",
-      "peer": true
-    },
     "node_modules/vega-force": {
       "version": "5.1.0",
       "resolved": "https://registry.npmjs.org/vega-force/-/vega-force-5.1.0.tgz",
@@ -5219,12 +5301,6 @@
         "vega-util": "^2.1.0"
       }
     },
-    "node_modules/vega-interpreter/node_modules/vega-util": {
-      "version": "2.1.0",
-      "resolved": "https://registry.npmjs.org/vega-util/-/vega-util-2.1.0.tgz",
-      "integrity": "sha512-PGfp0m0QCufDmcxKJCWQy4Ov23FoF8DSXmoJwSezi3itQaa2hbxK0+xwsTMP2vy4PR16Pu25HMzgMwXVW1+33w==",
-      "license": "BSD-3-Clause"
-    },
     "node_modules/vega-label": {
       "version": "2.1.0",
       "resolved": "https://registry.npmjs.org/vega-label/-/vega-label-2.1.0.tgz",

From 599cc5b62f3e9fb40c81d946909c2f96e19ef9fe Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Sat, 28 Feb 2026 07:51:10 +0000
Subject: [PATCH 11/31] Bump minimatch from 3.1.2 to 3.1.5 in /frontend

Bumps [minimatch](https://github.com/isaacs/minimatch) from 3.1.2 to 3.1.5.
- [Changelog](https://github.com/isaacs/minimatch/blob/main/changelog.md)
- [Commits](https://github.com/isaacs/minimatch/compare/v3.1.2...v3.1.5)

---
updated-dependencies:
- dependency-name: minimatch
  dependency-version: 3.1.5
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 frontend/package-lock.json | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/frontend/package-lock.json b/frontend/package-lock.json
index 7a78b74..e89c7a5 100644
--- a/frontend/package-lock.json
+++ b/frontend/package-lock.json
@@ -3868,10 +3868,11 @@
       }
     },
     "node_modules/minimatch": {
-      "version": "3.1.2",
-      "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz",
-      "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==",
+      "version": "3.1.5",
+      "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.5.tgz",
+      "integrity": "sha512-VgjWUsnnT6n+NUk6eZq77zeFdpW2LWDzP6zFGrCbHXiYNul5Dzqk2HHQ5uFH2DNW5Xbp8+jVzaeNt94ssEEl4w==",
       "dev": true,
+      "license": "ISC",
       "dependencies": {
         "brace-expansion": "^1.1.7"
       },
@@ -5219,12 +5220,6 @@
         "vega-util": "^2.1.0"
       }
     },
-    "node_modules/vega-interpreter/node_modules/vega-util": {
-      "version": "2.1.0",
-      "resolved": "https://registry.npmjs.org/vega-util/-/vega-util-2.1.0.tgz",
-      "integrity": "sha512-PGfp0m0QCufDmcxKJCWQy4Ov23FoF8DSXmoJwSezi3itQaa2hbxK0+xwsTMP2vy4PR16Pu25HMzgMwXVW1+33w==",
-      "license": "BSD-3-Clause"
-    },
     "node_modules/vega-label": {
       "version": "2.1.0",
       "resolved": "https://registry.npmjs.org/vega-label/-/vega-label-2.1.0.tgz",

From 3e83b3a35d295256852ea789f5e38c75c3f7b966 Mon Sep 17 00:00:00 2001
From: nsheff <nsheff@users.noreply.github.com>
Date: Sat, 28 Feb 2026 08:48:02 -0500
Subject: [PATCH 12/31] clean up actions

---
 .github/dependabot.yml                   | 19 ++++++++
 .github/workflows/claude-code-review.yml | 57 ++++--------------------
 2 files changed, 28 insertions(+), 48 deletions(-)
 create mode 100644 .github/dependabot.yml

diff --git a/.github/dependabot.yml b/.github/dependabot.yml
new file mode 100644
index 0000000..f46f5c9
--- /dev/null
+++ b/.github/dependabot.yml
@@ -0,0 +1,19 @@
+version: 2
+updates:
+  - package-ecosystem: "pip"
+    directory: "/"
+    schedule:
+      interval: "weekly"
+    target-branch: "dev"
+
+  - package-ecosystem: "npm"
+    directory: "/frontend"
+    schedule:
+      interval: "weekly"
+    target-branch: "dev"
+
+  - package-ecosystem: "github-actions"
+    directory: "/"
+    schedule:
+      interval: "weekly"
+    target-branch: "dev"
diff --git a/.github/workflows/claude-code-review.yml b/.github/workflows/claude-code-review.yml
index 474e2ba..32c3840 100644
--- a/.github/workflows/claude-code-review.yml
+++ b/.github/workflows/claude-code-review.yml
@@ -1,30 +1,22 @@
 name: Claude Code Review
 
 on:
-  pull_request:
-    types: [opened, ready_for_review]
-    # Optional: Only run on specific file changes
-    # paths:
-    #   - "src/**/*.ts"
-    #   - "src/**/*.tsx"
-    #   - "src/**/*.js"
-    #   - "src/**/*.jsx"
+  workflow_dispatch:
+    inputs:
+      pr_number:
+        description: 'PR number to review'
+        required: true
+        type: number
 
 jobs:
   claude-review:
-    # Optional: Filter by PR author
-    # if: |
-    #   github.event.pull_request.user.login == 'external-contributor' ||
-    #   github.event.pull_request.user.login == 'new-developer' ||
-    #   github.event.pull_request.author_association == 'FIRST_TIME_CONTRIBUTOR'
-    
     runs-on: ubuntu-latest
     permissions:
       contents: read
       pull-requests: read
       issues: read
       id-token: write
-    
+
     steps:
       - name: Checkout repository
         uses: actions/checkout@v4
@@ -36,43 +28,12 @@ jobs:
         uses: anthropics/claude-code-action@beta
         with:
           claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
-
-          # Optional: Specify model (defaults to Claude Sonnet 4, uncomment for Claude Opus 4)
-          # model: "claude-opus-4-20250514"
-          
-          # Direct prompt for automated review (no @claude mention needed)
           direct_prompt: |
-            Please review this pull request and provide feedback on:
+            Please review pull request #${{ inputs.pr_number }} and provide feedback on:
             - Code quality and best practices
             - Potential bugs or issues
             - Performance considerations
             - Security concerns
             - Test coverage
-            
-            Be constructive and helpful in your feedback.
-
-          # Optional: Use sticky comments to make Claude reuse the same comment on subsequent pushes to the same PR
-          # use_sticky_comment: true
-          
-          # Optional: Customize review based on file types
-          # direct_prompt: |
-          #   Review this PR focusing on:
-          #   - For TypeScript files: Type safety and proper interface usage
-          #   - For API endpoints: Security, input validation, and error handling
-          #   - For React components: Performance, accessibility, and best practices
-          #   - For tests: Coverage, edge cases, and test quality
-          
-          # Optional: Different prompts for different authors
-          # direct_prompt: |
-          #   ${{ github.event.pull_request.author_association == 'FIRST_TIME_CONTRIBUTOR' && 
-          #   'Welcome! Please review this PR from a first-time contributor. Be encouraging and provide detailed explanations for any suggestions.' ||
-          #   'Please provide a thorough code review focusing on our coding standards and best practices.' }}
-          
-          # Optional: Add specific tools for running tests or linting
-          # allowed_tools: "Bash(npm run test),Bash(npm run lint),Bash(npm run typecheck)"
-          
-          # Optional: Skip review for certain conditions
-          # if: |
-          #   !contains(github.event.pull_request.title, '[skip-review]') &&
-          #   !contains(github.event.pull_request.title, '[WIP]')
 
+            Be constructive and helpful in your feedback.

From c15333511401b095968d97d07ed135e2a7b10390 Mon Sep 17 00:00:00 2001
From: nsheff <nsheff@users.noreply.github.com>
Date: Sat, 28 Feb 2026 08:49:12 -0500
Subject: [PATCH 13/31] first pass at an r pkg

---
 refget-r/NAMESPACE                          |   1 +
 refget-r/R/RefgetGenome-class.R             |   4 +-
 refget-r/install_and_test.sh                |  41 ++
 refget-r/tests/testthat/test-RefgetGenome.R |   4 +-
 refget-r/tests/testthat/test-constructors.R | 100 +++++
 refget-r/tests/testthat/test-edge-cases.R   | 291 +++++++++++++++
 refget-r/vignettes/getting-started.Rmd      | 185 +++++++++
 refget-r/vignettes/reference.Rmd            | 393 ++++++++++++++++++++
 8 files changed, 1015 insertions(+), 4 deletions(-)
 create mode 100755 refget-r/install_and_test.sh
 create mode 100644 refget-r/tests/testthat/test-constructors.R
 create mode 100644 refget-r/tests/testthat/test-edge-cases.R
 create mode 100644 refget-r/vignettes/getting-started.Rmd
 create mode 100644 refget-r/vignettes/reference.Rmd

diff --git a/refget-r/NAMESPACE b/refget-r/NAMESPACE
index 349f1c9..0c172ce 100644
--- a/refget-r/NAMESPACE
+++ b/refget-r/NAMESPACE
@@ -37,5 +37,6 @@ exportClasses(RefgetGenome)
 # Imports
 import(methods)
 importFrom(GenomeInfoDb, Seqinfo)
+importFrom(GenomeInfoDb, seqinfo)
 importFrom(GenomeInfoDb, seqnames)
 importFrom(GenomeInfoDb, seqlengths)
diff --git a/refget-r/R/RefgetGenome-class.R b/refget-r/R/RefgetGenome-class.R
index 3118f24..2a45560 100644
--- a/refget-r/R/RefgetGenome-class.R
+++ b/refget-r/R/RefgetGenome-class.R
@@ -115,8 +115,8 @@ RefgetGenome.from_directory <- function(path, digest = NULL, namespace = NULL, a
 #' @export
 RefgetGenome.from_fasta <- function(fasta_path) {
   store <- gtars::refget_store()
-  digest <- gtars::add_fasta(store, fasta_path)
-  RefgetGenome(store, digest = digest)
+  result <- gtars::add_fasta(store, fasta_path)
+  RefgetGenome(store, digest = result$digest)
 }
 
 #' Create RefgetGenome from a remote store
diff --git a/refget-r/install_and_test.sh b/refget-r/install_and_test.sh
new file mode 100755
index 0000000..a3e3179
--- /dev/null
+++ b/refget-r/install_and_test.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+# Install and test BiocRefgetStore
+# Usage: bash install_and_test.sh [install|test|both]
+# Default: both
+
+set -e
+
+PKG_DIR="$(cd "$(dirname "$0")" && pwd)"
+ACTION="${1:-both}"
+
+R_CMD="bulker exec databio/nsheff -- R"
+RSCRIPT_CMD="bulker exec databio/nsheff -- Rscript"
+
+install_pkg() {
+    echo "=== Installing BiocRefgetStore ==="
+    $R_CMD CMD INSTALL --no-multiarch "$PKG_DIR"
+    echo "=== Installation complete ==="
+}
+
+run_tests() {
+    echo "=== Running tests ==="
+    $RSCRIPT_CMD -e "testthat::test_local('$PKG_DIR')"
+    echo "=== Tests complete ==="
+}
+
+case "$ACTION" in
+    install)
+        install_pkg
+        ;;
+    test)
+        run_tests
+        ;;
+    both)
+        install_pkg
+        run_tests
+        ;;
+    *)
+        echo "Usage: bash install_and_test.sh [install|test|both]"
+        exit 1
+        ;;
+esac
diff --git a/refget-r/tests/testthat/test-RefgetGenome.R b/refget-r/tests/testthat/test-RefgetGenome.R
index 1dc7b83..aceeb6c 100644
--- a/refget-r/tests/testthat/test-RefgetGenome.R
+++ b/refget-r/tests/testthat/test-RefgetGenome.R
@@ -107,10 +107,10 @@ test_that("RefgetGenome from_directory works", {
   writeLines(c(">seq1", "ACGT"), fasta_file)
   on.exit(unlink(fasta_file), add = TRUE)
 
-  digest <- gtars::add_fasta(store, fasta_file)
+  result <- gtars::add_fasta(store, fasta_file)
 
   # Load from directory
-  genome <- RefgetGenome.from_directory(store_dir, digest = digest)
+  genome <- RefgetGenome.from_directory(store_dir, digest = result$digest)
   expect_s4_class(genome, "RefgetGenome")
   expect_equal(names(genome), "seq1")
 })
diff --git a/refget-r/tests/testthat/test-constructors.R b/refget-r/tests/testthat/test-constructors.R
new file mode 100644
index 0000000..227f02b
--- /dev/null
+++ b/refget-r/tests/testthat/test-constructors.R
@@ -0,0 +1,100 @@
+# Test constructor edge cases
+
+test_that("RefgetGenome with invalid digest errors", {
+  skip_if_not_installed("gtars")
+
+  store <- gtars::refget_store()
+
+  # A digest that doesn't exist in the store
+  expect_error(
+    RefgetGenome(store, digest = "nonexistent_digest_abc123"),
+    "not found"
+  )
+})
+
+test_that("RefgetGenome.from_fasta with nonexistent file errors", {
+  skip_if_not_installed("gtars")
+
+  expect_error(
+    RefgetGenome.from_fasta("/tmp/does_not_exist_xyz.fa")
+  )
+})
+
+test_that("RefgetGenome.from_directory with nonexistent path errors", {
+  skip_if_not_installed("gtars")
+
+  expect_error(
+    RefgetGenome.from_directory("/tmp/no_such_store_dir_xyz", digest = "abc")
+  )
+})
+
+test_that("RefgetGenome with only namespace (missing alias) errors", {
+  skip_if_not_installed("gtars")
+
+  store <- gtars::refget_store()
+
+  expect_error(
+    RefgetGenome(store, namespace = "refseq"),
+    "Must provide either"
+  )
+})
+
+test_that("RefgetGenome with only alias (missing namespace) errors", {
+  skip_if_not_installed("gtars")
+
+  store <- gtars::refget_store()
+
+  expect_error(
+    RefgetGenome(store, alias = "hg38"),
+    "Must provide either"
+  )
+})
+
+test_that("RefgetGenome with neither digest nor alias errors", {
+  skip_if_not_installed("gtars")
+
+  store <- gtars::refget_store()
+
+  expect_error(
+    RefgetGenome(store),
+    "Must provide either"
+  )
+})
+
+test_that("RefgetGenome.from_fasta returns correct class", {
+  skip_if_not_installed("gtars")
+
+  fasta_file <- tempfile(fileext = ".fa")
+  writeLines(c(">seq1", "ACGT"), fasta_file)
+  on.exit(unlink(fasta_file))
+
+  genome <- RefgetGenome.from_fasta(fasta_file)
+  expect_s4_class(genome, "RefgetGenome")
+
+  # Verify the digest was set
+  d <- collection_digest(genome)
+  expect_type(d, "character")
+  expect_true(nchar(d) > 0)
+})
+
+test_that("RefgetGenome.from_directory roundtrip works", {
+  skip_if_not_installed("gtars")
+
+  # Create on-disk store and add FASTA
+  store_dir <- tempfile()
+  dir.create(store_dir)
+  on.exit(unlink(store_dir, recursive = TRUE))
+
+  store <- gtars::refget_store_on_disk(store_dir)
+  fasta_file <- tempfile(fileext = ".fa")
+  writeLines(c(">chr1", "AAAA", ">chr2", "CCCC"), fasta_file)
+  on.exit(unlink(fasta_file), add = TRUE)
+
+  result <- gtars::add_fasta(store, fasta_file)
+
+  # Reload from directory
+  genome <- RefgetGenome.from_directory(store_dir, digest = result$digest)
+  expect_s4_class(genome, "RefgetGenome")
+  expect_equal(length(genome), 2)
+  expect_equal(sort(names(genome)), c("chr1", "chr2"))
+})
diff --git a/refget-r/tests/testthat/test-edge-cases.R b/refget-r/tests/testthat/test-edge-cases.R
new file mode 100644
index 0000000..31a3f37
--- /dev/null
+++ b/refget-r/tests/testthat/test-edge-cases.R
@@ -0,0 +1,291 @@
+# Test edge cases and gaps in current coverage
+
+# -- Single-sequence FASTA ------------------------------------------------
+
+test_that("RefgetGenome works with single-sequence FASTA", {
+  skip_if_not_installed("gtars")
+
+  fasta_file <- tempfile(fileext = ".fa")
+  writeLines(c(">only_seq", "ACGTACGT"), fasta_file)
+  on.exit(unlink(fasta_file))
+
+  genome <- RefgetGenome.from_fasta(fasta_file)
+
+  expect_equal(length(genome), 1)
+  expect_equal(names(genome), "only_seq")
+  expect_equal(seqlengths(genome)[["only_seq"]], 8)
+})
+
+# -- Sequences with N/ambiguous bases -------------------------------------
+
+test_that("getSeq handles sequences with N bases", {
+  skip_if_not_installed("gtars")
+
+  fasta_file <- tempfile(fileext = ".fa")
+  writeLines(c(">chrN", "ACNNNGTACGT"), fasta_file)
+  on.exit(unlink(fasta_file))
+
+  genome <- RefgetGenome.from_fasta(fasta_file)
+  seq <- getSeq(genome, "chrN", as.character = TRUE)
+  expect_equal(seq, "ACNNNGTACGT")
+
+  # Substring containing Ns
+  sub <- getSeq(genome, "chrN", start = 2, end = 6, as.character = TRUE)
+  expect_equal(sub, "CNNNG")
+})
+
+# -- Partial coordinates (only start or only end) -------------------------
+
+test_that("getSeq with only start=NA returns full sequence", {
+  skip_if_not_installed("gtars")
+
+  fasta_file <- tempfile(fileext = ".fa")
+  writeLines(c(">chr1", "ACGTACGT"), fasta_file)
+  on.exit(unlink(fasta_file))
+
+  genome <- RefgetGenome.from_fasta(fasta_file)
+
+  # Both NA -> full sequence
+  seq <- getSeq(genome, "chr1", start = NA, end = NA, as.character = TRUE)
+  expect_equal(seq, "ACGTACGT")
+})
+
+# -- Coordinate boundary conditions ---------------------------------------
+
+test_that("getSeq works at sequence boundaries", {
+  skip_if_not_installed("gtars")
+
+  fasta_file <- tempfile(fileext = ".fa")
+  writeLines(c(">chr1", "ACGTACGT"), fasta_file)
+  on.exit(unlink(fasta_file))
+
+  genome <- RefgetGenome.from_fasta(fasta_file)
+
+  # start=1, end=seqlength (full range)
+  full <- getSeq(genome, "chr1", start = 1, end = 8, as.character = TRUE)
+  expect_equal(full, "ACGTACGT")
+
+  # First base only
+  first <- getSeq(genome, "chr1", start = 1, end = 1, as.character = TRUE)
+  expect_equal(first, "A")
+
+  # Last base only
+  last <- getSeq(genome, "chr1", start = 8, end = 8, as.character = TRUE)
+  expect_equal(last, "T")
+})
+
+# -- extractRegions with single region ------------------------------------
+
+test_that("extractRegions works with a single-row data.frame", {
+  skip_if_not_installed("gtars")
+
+  fasta_file <- tempfile(fileext = ".fa")
+  writeLines(c(">chr1", "ACGTACGTACGTACGTACGT"), fasta_file)
+  on.exit(unlink(fasta_file))
+
+  genome <- RefgetGenome.from_fasta(fasta_file)
+
+  regions <- data.frame(
+    chrom = "chr1",
+    start = 1,
+    end = 4,
+    stringsAsFactors = FALSE
+  )
+  seqs <- extractRegions(genome, regions, as.character = TRUE)
+
+  expect_length(seqs, 1)
+  expect_equal(names(seqs), "chr1:1-4")
+})
+
+# -- extractRegions error on missing columns ------------------------------
+
+test_that("extractRegions errors on missing required columns", {
+  skip_if_not_installed("gtars")
+
+  fasta_file <- tempfile(fileext = ".fa")
+  writeLines(c(">chr1", "ACGT"), fasta_file)
+  on.exit(unlink(fasta_file))
+
+  genome <- RefgetGenome.from_fasta(fasta_file)
+
+  # Missing 'end' column
+  bad_df <- data.frame(chrom = "chr1", start = 1)
+  expect_error(extractRegions(genome, bad_df), "must have columns")
+
+  # Wrong column names
+  bad_df2 <- data.frame(chromosome = "chr1", begin = 1, finish = 4)
+  expect_error(extractRegions(genome, bad_df2), "must have columns")
+})
+
+# -- exportChromosomes with nonexistent name ------------------------------
+
+test_that("exportChromosomes with nonexistent chromosome", {
+  skip_if_not_installed("gtars")
+
+  fasta_file <- tempfile(fileext = ".fa")
+  writeLines(c(">chr1", "ACGT"), fasta_file)
+  on.exit(unlink(fasta_file))
+
+  genome <- RefgetGenome.from_fasta(fasta_file)
+  output <- tempfile(fileext = ".fa")
+  on.exit(unlink(output), add = TRUE)
+
+  # Requesting a nonexistent chromosome should error or produce empty output
+  expect_error(exportChromosomes(genome, names = "chrX", output_path = output))
+})
+
+# -- show() method --------------------------------------------------------
+
+test_that("show() produces expected output", {
+  skip_if_not_installed("gtars")
+
+  fasta_file <- tempfile(fileext = ".fa")
+  writeLines(c(
+    ">chr1", "ACGTACGT",
+    ">chr2", "GGCCGGCC"
+  ), fasta_file)
+  on.exit(unlink(fasta_file))
+
+  genome <- RefgetGenome.from_fasta(fasta_file)
+
+  out <- capture.output(show(genome))
+  expect_true(any(grepl("RefgetGenome with 2 sequences", out)))
+  expect_true(any(grepl("collection_digest:", out)))
+  expect_true(any(grepl("seqnames:", out)))
+})
+
+test_that("show() truncates when >5 sequences", {
+  skip_if_not_installed("gtars")
+
+  fasta_file <- tempfile(fileext = ".fa")
+  lines <- unlist(lapply(paste0("seq", 1:7), function(nm) {
+    c(paste0(">", nm), "ACGT")
+  }))
+  writeLines(lines, fasta_file)
+  on.exit(unlink(fasta_file))
+
+  genome <- RefgetGenome.from_fasta(fasta_file)
+
+  out <- capture.output(show(genome))
+  expect_true(any(grepl("more\\)", out)))
+})
+
+# -- length(), names(), seqnames() on multi-sequence genome ---------------
+
+test_that("length, names, seqnames work on multi-sequence genome", {
+  skip_if_not_installed("gtars")
+
+  fasta_file <- tempfile(fileext = ".fa")
+  writeLines(c(
+    ">chr1", "AAAA",
+    ">chr2", "CCCC",
+    ">chr3", "GGGG"
+  ), fasta_file)
+  on.exit(unlink(fasta_file))
+
+  genome <- RefgetGenome.from_fasta(fasta_file)
+
+  expect_equal(length(genome), 3)
+  expect_type(names(genome), "character")
+  expect_length(names(genome), 3)
+
+  sn <- seqnames(genome)
+  expect_length(sn, 3)
+  expect_true(all(c("chr1", "chr2", "chr3") %in% as.character(sn)))
+})
+
+# -- coordinate_system() returns a string ---------------------------------
+
+test_that("coordinate_system returns a character string", {
+  skip_if_not_installed("gtars")
+
+  fasta_file <- tempfile(fileext = ".fa")
+  writeLines(c(">chr1", "ACGTACGT"), fasta_file)
+  on.exit(unlink(fasta_file))
+
+  genome <- RefgetGenome.from_fasta(fasta_file)
+
+  cs <- coordinate_system(genome)
+  expect_type(cs, "character")
+  expect_length(cs, 1)
+  expect_true(nchar(cs) > 0)
+})
+
+# -- store() returns the underlying RefgetStore ---------------------------
+
+test_that("store() returns the underlying RefgetStore", {
+  skip_if_not_installed("gtars")
+
+  fasta_file <- tempfile(fileext = ".fa")
+  writeLines(c(">chr1", "ACGT"), fasta_file)
+  on.exit(unlink(fasta_file))
+
+  genome <- RefgetGenome.from_fasta(fasta_file)
+
+  s <- store(genome)
+  expect_false(is.null(s))
+  # The store should be usable with gtars functions
+  expect_true(inherits(s, "RefgetStore") || is(s, "RefgetStore"))
+})
+
+# -- getSeq as.character flag with Biostrings available -------------------
+
+test_that("getSeq as.character=TRUE returns character even with Biostrings", {
+  skip_if_not_installed("gtars")
+  skip_if_not_installed("Biostrings")
+
+  fasta_file <- tempfile(fileext = ".fa")
+  writeLines(c(">chr1", "ACGTACGT"), fasta_file)
+  on.exit(unlink(fasta_file))
+
+  genome <- RefgetGenome.from_fasta(fasta_file)
+
+  # as.character=TRUE should force character output
+  seq <- getSeq(genome, "chr1", as.character = TRUE)
+  expect_type(seq, "character")
+  expect_equal(seq, "ACGTACGT")
+
+  # as.character=FALSE should return DNAString
+  seq2 <- getSeq(genome, "chr1", as.character = FALSE)
+  expect_s4_class(seq2, "DNAString")
+})
+
+test_that("getSeq vectorized as.character=TRUE returns character vector", {
+  skip_if_not_installed("gtars")
+  skip_if_not_installed("Biostrings")
+
+  fasta_file <- tempfile(fileext = ".fa")
+  writeLines(c(
+    ">chr1", "ACGTACGT",
+    ">chr2", "GGCCGGCC"
+  ), fasta_file)
+  on.exit(unlink(fasta_file))
+
+  genome <- RefgetGenome.from_fasta(fasta_file)
+
+  seqs <- getSeq(genome, c("chr1", "chr2"), as.character = TRUE)
+  expect_type(seqs, "character")
+  expect_length(seqs, 2)
+
+  seqs2 <- getSeq(genome, c("chr1", "chr2"), as.character = FALSE)
+  expect_s4_class(seqs2, "DNAStringSet")
+})
+
+# -- seqinfo returns Seqinfo object ---------------------------------------
+
+test_that("seqinfo returns a Seqinfo object", {
+  skip_if_not_installed("gtars")
+
+  fasta_file <- tempfile(fileext = ".fa")
+  writeLines(c(
+    ">chr1", "ACGTACGT",
+    ">chr2", "GGCC"
+  ), fasta_file)
+  on.exit(unlink(fasta_file))
+
+  genome <- RefgetGenome.from_fasta(fasta_file)
+
+  si <- seqinfo(genome)
+  expect_s4_class(si, "Seqinfo")
+  expect_true("chr1" %in% GenomeInfoDb::seqnames(si))
+})
diff --git a/refget-r/vignettes/getting-started.Rmd b/refget-r/vignettes/getting-started.Rmd
new file mode 100644
index 0000000..bdea166
--- /dev/null
+++ b/refget-r/vignettes/getting-started.Rmd
@@ -0,0 +1,185 @@
+---
+title: "Getting Started with BiocRefgetStore"
+output: rmarkdown::html_vignette
+vignette: >
+  %\VignetteIndexEntry{Getting Started with BiocRefgetStore}
+  %\VignetteEngine{knitr::rmarkdown}
+  %\VignetteEncoding{UTF-8}
+---
+
+BiocRefgetStore provides a BSgenome-compatible interface to reference genomes
+backed by GA4GH refget stores. Instead of managing FASTA files, you connect
+to a refget store (local or remote) and access sequences by digest.
+
+This tutorial uses the **2023 Human Pangenome Reference** -- a remote refget
+store containing 47 haplotype-resolved assemblies hosted on S3. Sequences are
+downloaded on-demand and cached locally, so you don't need to download the
+entire dataset upfront.
+
+## Installation
+
+BiocRefgetStore depends on `gtars`, a Rust-backed R package. Because `gtars`
+lives in a Rust monorepo with sibling crates, it must be installed from a local
+clone of the full repository (not directly from GitHub):
+
+```{r install, eval=FALSE}
+# 1. Clone the gtars monorepo (if you haven't already)
+# git clone https://github.com/databio/gtars.git
+
+# 2. Install gtars from the local clone (requires Rust toolchain)
+install.packages("path/to/gtars/gtars-r", repos = NULL, type = "source")
+
+# 3. Install BiocRefgetStore
+remotes::install_github("refgenie/refget", subdir = "refget-r", ref = "r")
+# Or from local source:
+# install.packages("path/to/refget/refget-r", repos = NULL, type = "source")
+```
+
+`gtars` is not on CRAN and `remotes::install_github()` won't work for it
+because the R package depends on sibling Rust crates via relative paths.
+You need the full monorepo checkout so those paths resolve correctly.
+
+## Connect to a remote pangenome store
+
+Load a genome from the Human Pangenome Reference store. The store metadata
+(~1.5 MB) is fetched on first use; individual sequences are downloaded
+on-demand and cached locally:
+
+```{r remote-store, eval=FALSE}
+library(BiocRefgetStore)
+
+# 2023 Human Pangenome Reference (47 haplotype-resolved assemblies)
+pangenome_url <- "https://refgenie.s3.us-east-1.amazonaws.com/pangenome_refget_store"
+
+# One assembly from the pangenome (HG03540.pri.mat.f1_v2)
+genome <- RefgetGenome.from_remote(
+  cache_path = "~/.cache/refget/pangenome",
+  remote_url = pangenome_url,
+  digest = "0aHV7I-94paL9Z1H4LNlqsW3WxJhlou5"
+)
+genome
+#> RefgetGenome with 750 sequences
+#>   collection_digest: 0aHV7I-94paL9Z1H4LNlqsW3WxJhlou5
+#>   seqnames: JAGYVX010000001.1, JAGYVX010000002.1, ... (745 more)
+```
+
+Subsequent calls reuse the local cache -- no re-downloading.
+
+## Basic sequence access
+
+Extract a full sequence or a region by coordinates:
+
+```{r getseq, eval=FALSE}
+# Full sequence (returns DNAString if Biostrings is installed)
+seq <- genome[["JAGYVX010000001.1"]]
+
+# Region by coordinates (1-based, inclusive)
+region <- getSeq(genome, "JAGYVX010000001.1", start = 1000, end = 2000)
+
+# Force character output
+region_chr <- getSeq(genome, "JAGYVX010000001.1", start = 1000, end = 2000,
+                     as.character = TRUE)
+```
+
+Negative-strand extraction applies the reverse complement:
+
+```{r strand, eval=FALSE}
+rc <- getSeq(genome, "JAGYVX010000001.1", start = 1000, end = 2000, strand = "-")
+```
+
+## Multiple regions at once
+
+Pass vectors of names, starts, and ends:
+
+```{r vectorized, eval=FALSE}
+seqs <- getSeq(
+  genome,
+  names = c("JAGYVX010000001.1", "JAGYVX010000002.1", "JAGYVX010000003.1"),
+  start = c(100, 200, 300),
+  end   = c(199, 299, 399)
+)
+```
+
+If you have a GRanges object, pass it directly:
+
+```{r granges, eval=FALSE}
+library(GenomicRanges)
+gr <- GRanges(c("JAGYVX010000001.1:100-199:+", "JAGYVX010000002.1:200-299:-"))
+seqs <- getSeq(genome, gr)
+```
+
+## Bulk extraction from a data.frame
+
+`extractRegions` accepts a data.frame with `chrom`, `start`, `end` columns:
+
+```{r extract-regions, eval=FALSE}
+regions <- data.frame(
+  chrom = c("JAGYVX010000001.1", "JAGYVX010000001.1", "JAGYVX010000002.1"),
+  start = c(100, 5000, 200),
+  end   = c(199, 5099, 299)
+)
+seqs <- extractRegions(genome, regions, as.character = TRUE)
+```
+
+## Export sequences to FASTA
+
+Write extracted regions or full sequences to a FASTA file:
+
+```{r export, eval=FALSE}
+# Regions to FASTA
+extractToFasta(genome, regions, "extracted_regions.fa")
+
+# Specific sequences
+exportChromosomes(genome, c("JAGYVX010000001.1", "JAGYVX010000002.1"), "subset.fa")
+
+# All sequences
+exportChromosomes(genome, output_path = "full_assembly.fa")
+```
+
+## Genome metadata
+
+Inspect sequences and their properties:
+
+```{r metadata, eval=FALSE}
+seqnames(genome)       # sequence names
+seqlengths(genome)     # named integer vector of lengths
+seqinfo(genome)        # full Seqinfo object
+length(genome)         # number of sequences
+collection_digest(genome)   # seqcol digest
+coordinate_system(genome)   # sorted_name_length_pairs digest
+sequence_digests(genome)    # per-sequence SHA512t24u digests
+```
+
+## Working with local FASTA files
+
+You can also create a genome directly from a local FASTA file. This builds
+an in-memory refget store, computes sequence digests, and creates a `Seqinfo`
+object automatically:
+
+```{r from-fasta, eval=FALSE}
+genome <- RefgetGenome.from_fasta("genome.fa")
+genome
+```
+
+## Persistent on-disk store
+
+For large genomes you access repeatedly, use an on-disk store so sequences
+are indexed once and reused across sessions:
+
+```{r on-disk, eval=FALSE}
+# First time: create the store from FASTA
+store <- gtars::refget_store_on_disk("~/.local/share/refget/hg38")
+result <- gtars::add_fasta(store, "hg38.fa")
+# Save the digest: result$digest
+
+# Later: reload without re-parsing
+genome <- RefgetGenome.from_directory(
+  "~/.local/share/refget/hg38",
+  digest = "saved_digest_string"
+)
+```
+
+## Next steps
+
+See the [Reference](reference.html) vignette for complete documentation of
+every function and method in the package.
diff --git a/refget-r/vignettes/reference.Rmd b/refget-r/vignettes/reference.Rmd
new file mode 100644
index 0000000..c6238c4
--- /dev/null
+++ b/refget-r/vignettes/reference.Rmd
@@ -0,0 +1,393 @@
+---
+title: "BiocRefgetStore Reference"
+output: rmarkdown::html_vignette
+vignette: >
+  %\VignetteIndexEntry{BiocRefgetStore Reference}
+  %\VignetteEngine{knitr::rmarkdown}
+  %\VignetteEncoding{UTF-8}
+---
+
+Complete reference for every exported function, method, and class in
+BiocRefgetStore.
+
+## Constructors
+
+### RefgetGenome.from_fasta
+
+```r
+RefgetGenome.from_fasta(fasta_path)
+```
+
+Create a `RefgetGenome` from a FASTA file. Builds an in-memory refget store,
+computes digests, and indexes all sequences.
+
+- **fasta_path** — Path to a FASTA file (`.fa`, `.fasta`, `.fa.gz`).
+- **Returns** — A `RefgetGenome` object.
+
+```{r from-fasta, eval=FALSE}
+genome <- RefgetGenome.from_fasta("hg38.fa")
+```
+
+### RefgetGenome.from_directory
+
+```r
+RefgetGenome.from_directory(path, digest = NULL, namespace = NULL, alias = NULL)
+```
+
+Load a `RefgetGenome` from a persisted on-disk refget store directory.
+
+- **path** — Path to a directory created by `gtars::refget_store_on_disk()`.
+- **digest** — Collection digest string (provide this OR namespace + alias).
+- **namespace** — Alias namespace (e.g., `"refseq"`).
+- **alias** — Alias name (e.g., `"GRCh38"`).
+- **Returns** — A `RefgetGenome` object.
+
+```{r from-directory, eval=FALSE}
+genome <- RefgetGenome.from_directory("~/.refget/hg38", digest = "abc123...")
+```
+
+### RefgetGenome.from_remote
+
+```r
+RefgetGenome.from_remote(cache_path, remote_url, digest = NULL, namespace = NULL, alias = NULL)
+```
+
+Create a `RefgetGenome` backed by a remote refget store with local caching.
+
+- **cache_path** — Local directory for caching downloaded data.
+- **remote_url** — URL of the remote refget store.
+- **digest** / **namespace** / **alias** — Same as `RefgetGenome.from_directory`.
+- **Returns** — A `RefgetGenome` object.
+
+```{r from-remote, eval=FALSE}
+genome <- RefgetGenome.from_remote(
+  cache_path = "~/.cache/refget",
+  remote_url = "https://refget.databio.org/store",
+  namespace = "refseq", alias = "GRCh38"
+)
+```
+
+### RefgetGenome (low-level)
+
+```r
+RefgetGenome(store, digest = NULL, namespace = NULL, alias = NULL)
+```
+
+Construct a `RefgetGenome` from an existing `gtars::RefgetStore` object.
+Requires either `digest` or both `namespace` and `alias`.
+
+- **store** — A gtars `RefgetStore` object.
+- **digest** / **namespace** / **alias** — Collection identifier.
+- **Returns** — A `RefgetGenome` object.
+
+```{r constructor, eval=FALSE}
+store <- gtars::refget_store_open_local("/path/to/store")
+genome <- RefgetGenome(store, namespace = "refseq", alias = "GRCh38")
+```
+
+---
+
+## Sequence Access
+
+### getSeq
+
+```r
+getSeq(x, names, start = NA, end = NA, strand = "+", as.character = FALSE, ...)
+```
+
+Extract sequences from a `RefgetGenome`. BSgenome-compatible interface.
+
+- **x** — A `RefgetGenome` object.
+- **names** — Character vector of sequence names, or a `GRanges` object.
+- **start** — Integer start position(s), 1-based inclusive. `NA` for full sequence.
+- **end** — Integer end position(s), 1-based inclusive. `NA` for full sequence.
+- **strand** — `"+"` (default) or `"-"` for reverse complement.
+- **as.character** — If `TRUE`, return character instead of DNAString/DNAStringSet.
+- **Returns** — Single sequence: `DNAString` (or character). Multiple: `DNAStringSet` (or character vector). Named as `"seqname:start-end"` for regions.
+
+```{r getseq, eval=FALSE}
+# Full chromosome
+getSeq(genome, "chr1")
+
+# Region
+getSeq(genome, "chr1", start = 100, end = 200)
+
+# Reverse complement
+getSeq(genome, "chr1", start = 100, end = 200, strand = "-")
+
+# Multiple regions
+getSeq(genome, c("chr1", "chr2"), c(100, 500), c(200, 600))
+
+# From GRanges
+getSeq(genome, GRanges("chr1:100-200:-"))
+```
+
+### `[[` (bracket extraction)
+
+```r
+genome[["chr1"]]
+```
+
+Extract a full sequence by name. Returns `DNAString` if Biostrings is
+installed, otherwise a character string.
+
+- **i** — Sequence name (character).
+- **Returns** — `DNAString` or character string.
+- **Errors** — If the sequence name is not found in the collection.
+
+---
+
+## Metadata Accessors
+
+### seqinfo
+
+```r
+seqinfo(x)
+```
+
+Returns the `Seqinfo` object containing sequence names and lengths.
+
+- **Returns** — A `GenomeInfoDb::Seqinfo` object.
+
+### seqnames
+
+```r
+seqnames(x)
+```
+
+Returns the sequence names.
+
+- **Returns** — Character vector (via `Seqinfo`).
+
+### seqlengths
+
+```r
+seqlengths(x)
+```
+
+Returns named integer vector of sequence lengths.
+
+- **Returns** — Named integer vector.
+
+```{r seqlengths, eval=FALSE}
+seqlengths(genome)
+#>   chr1   chr2   chr3
+#> 248956 242193 198295
+```
+
+### length
+
+```r
+length(x)
+```
+
+Returns the number of sequences in the genome.
+
+- **Returns** — Integer scalar.
+
+### names
+
+```r
+names(x)
+```
+
+Returns the sequence names as a character vector.
+
+- **Returns** — Character vector.
+
+### collection_digest
+
+```r
+collection_digest(genome)
+```
+
+Returns the GA4GH seqcol digest identifying this sequence collection.
+
+- **genome** — A `RefgetGenome` object.
+- **Returns** — Character string.
+
+### coordinate_system
+
+```r
+coordinate_system(genome)
+```
+
+Returns the `sorted_name_length_pairs` digest. Two genomes with the same
+`coordinate_system()` share the same coordinate system and are compatible for
+coordinate-based operations (e.g., lifting over annotations).
+
+- **genome** — A `RefgetGenome` object.
+- **Returns** — Character string.
+
+### sequence_digests
+
+```r
+sequence_digests(genome)
+```
+
+Returns a named character vector of per-sequence SHA512t24u digests.
+
+- **genome** — A `RefgetGenome` object.
+- **Returns** — Named character vector (names are sequence names, values are digests).
+
+```{r seq-digests, eval=FALSE}
+sequence_digests(genome)
+#>                         chr1                          chr2
+#> "SQ.2648ae1bacce4ec4b6cf337..." "SQ.f932a39b4c70..."
+```
+
+### store
+
+```r
+store(genome)
+```
+
+Returns the underlying `gtars::RefgetStore` object. Useful for calling gtars
+functions directly.
+
+- **genome** — A `RefgetGenome` object.
+- **Returns** — A gtars `RefgetStore` object.
+
+---
+
+## Bulk Extraction
+
+### extractRegions
+
+```r
+extractRegions(genome, regions, as.character = FALSE)
+```
+
+Extract multiple genomic regions efficiently using BED-based extraction.
+
+- **genome** — A `RefgetGenome` object.
+- **regions** — A `GRanges` object or a `data.frame` with columns `chrom`, `start`, `end` (1-based inclusive coordinates).
+- **as.character** — If `TRUE`, return character vector instead of `DNAStringSet`.
+- **Returns** — `DNAStringSet` or named character vector. Named as `"chrom:start-end"`.
+
+```{r extract-regions, eval=FALSE}
+regions <- data.frame(
+  chrom = c("chr1", "chr1", "chr2"),
+  start = c(100, 5000, 200),
+  end   = c(199, 5099, 299)
+)
+seqs <- extractRegions(genome, regions)
+```
+
+### extractToFasta
+
+```r
+extractToFasta(genome, regions, output_path)
+```
+
+Write extracted regions directly to a FASTA file.
+
+- **genome** — A `RefgetGenome` object.
+- **regions** — A `GRanges` object or data.frame (same as `extractRegions`).
+- **output_path** — Path for the output FASTA file.
+- **Returns** — Invisibly returns `output_path`.
+
+```{r extract-to-fasta, eval=FALSE}
+extractToFasta(genome, regions, "output.fa")
+```
+
+### exportChromosomes
+
+```r
+exportChromosomes(genome, names = NULL, output_path, line_width = 80L)
+```
+
+Export complete chromosomes to a FASTA file.
+
+- **genome** — A `RefgetGenome` object.
+- **names** — Character vector of chromosome names to export, or `NULL` for all.
+- **output_path** — Path for the output FASTA file.
+- **line_width** — Bases per line in output (default: 80).
+- **Returns** — Invisibly returns `output_path`.
+
+```{r export-chroms, eval=FALSE}
+# Specific chromosomes
+exportChromosomes(genome, c("chr1", "chr22"), "subset.fa")
+
+# All chromosomes
+exportChromosomes(genome, output_path = "full.fa")
+```
+
+---
+
+## Conversion Utilities
+
+### as_DNAString
+
+```r
+as_DNAString(seq_string)
+```
+
+Convert a character string to a Biostrings `DNAString` object.
+
+- **seq_string** — Character string containing a DNA sequence.
+- **Returns** — A `DNAString` object.
+- **Errors** — If Biostrings is not installed.
+
+```{r as-dnastring, eval=FALSE}
+dna <- as_DNAString("ACGTACGT")
+```
+
+### as_DNAStringSet
+
+```r
+as_DNAStringSet(seq_strings, names = NULL)
+```
+
+Convert a character vector to a Biostrings `DNAStringSet` object.
+
+- **seq_strings** — Character vector of DNA sequences.
+- **names** — Optional names for the sequences.
+- **Returns** — A `DNAStringSet` object.
+- **Errors** — If Biostrings is not installed.
+
+```{r as-dnastringset, eval=FALSE}
+seqs <- as_DNAStringSet(c("ACGT", "GGCC"), names = c("seq1", "seq2"))
+```
+
+---
+
+## Working with the Underlying Store
+
+The `store()` accessor gives you access to the full `gtars::RefgetStore` API
+for operations not directly exposed by BiocRefgetStore.
+
+```{r store-advanced, eval=FALSE}
+s <- store(genome)
+
+# List all aliases in the store
+gtars::get_aliases(s)
+
+# Compare two sequence collections
+gtars::compare_seqcols(s, digest_a, digest_b)
+
+# Get FHR (FASTA Header Record) metadata
+gtars::get_fhr(s, collection_digest(genome))
+
+# Access level 2 data (raw attribute arrays)
+level2 <- gtars::get_level2(s, collection_digest(genome))
+level2$names      # sequence names
+level2$lengths    # sequence lengths
+level2$sequences  # sequence digests
+```
+
+### show
+
+```r
+show(object)
+```
+
+Display method for `RefgetGenome`. Prints the number of sequences, collection
+digest, and first few sequence names.
+
+```{r show, eval=FALSE}
+genome
+#> RefgetGenome with 24 sequences
+#>   collection_digest: abc123...
+#>   seqnames: chr1, chr2, chr3, chr4, chr5 ... (19 more)
+```

From 190fd6557d560a0e1e485672d146fe0cd16c55d3 Mon Sep 17 00:00:00 2001
From: nsheff <nsheff@users.noreply.github.com>
Date: Mon, 2 Mar 2026 13:44:05 -0500
Subject: [PATCH 14/31] add py alias docstring and tests

---
 refget/store.py             |   8 ++
 tests/local/test_aliases.py | 162 ++++++++++++++++++++++++++++++++++++
 2 files changed, 170 insertions(+)
 create mode 100644 tests/local/test_aliases.py

diff --git a/refget/store.py b/refget/store.py
index 042b90c..30379e5 100644
--- a/refget/store.py
+++ b/refget/store.py
@@ -3,6 +3,14 @@
 
 This module re-exports the Rust-based gtars.refget components
 for local sequence collection storage and FASTA processing.
+
+RefgetStore also provides namespace-based alias management:
+  Sequence aliases: add_sequence_alias, get_sequence_by_alias,
+    get_aliases_for_sequence, list_sequence_alias_namespaces,
+    list_sequence_aliases, remove_sequence_alias, load_sequence_aliases
+  Collection aliases: add_collection_alias, get_collection_by_alias,
+    get_aliases_for_collection, list_collection_alias_namespaces,
+    list_collection_aliases, remove_collection_alias, load_collection_aliases
 """
 
 from .const import GTARS_INSTALLED
diff --git a/tests/local/test_aliases.py b/tests/local/test_aliases.py
new file mode 100644
index 0000000..8d077c4
--- /dev/null
+++ b/tests/local/test_aliases.py
@@ -0,0 +1,162 @@
+"""Tests for RefgetStore alias functionality."""
+
+import os
+import tempfile
+
+import pytest
+
+from refget.store import RefgetStore
+
+try:
+    from gtars.refget import RefgetStore as _check
+
+    _RUST_BINDINGS_AVAILABLE = True
+except ImportError:
+    _RUST_BINDINGS_AVAILABLE = False
+
+FASTA_PATH = "test_fasta/base.fa"
+
+
+@pytest.fixture
+def store():
+    """Create an in-memory RefgetStore with base.fa loaded."""
+    s = RefgetStore.in_memory()
+    s.disable_encoding()
+    s.add_sequence_collection_from_fasta(FASTA_PATH)
+    return s
+
+
+@pytest.fixture
+def seq_digest(store):
+    """Return the sha512t24u digest of the first sequence in the store."""
+    return store.list_sequences()[0].sha512t24u
+
+
+@pytest.fixture
+def col_digest(store):
+    """Return the digest of the first collection in the store."""
+    return store.list_collections()[0].digest
+
+
+@pytest.mark.skipif(not _RUST_BINDINGS_AVAILABLE, reason="gtars is not installed")
+class TestSequenceAliases:
+    def test_add_and_retrieve(self, store, seq_digest):
+        store.add_sequence_alias("chromosomes", "chr1", seq_digest)
+        result = store.get_sequence_by_alias("chromosomes", "chr1")
+        assert result is not None
+        assert result.metadata.sha512t24u == seq_digest
+
+    def test_list_namespaces(self, store, seq_digest):
+        store.add_sequence_alias("ucsc", "chrX", seq_digest)
+        namespaces = store.list_sequence_alias_namespaces()
+        assert "ucsc" in namespaces
+
+    def test_list_aliases_in_namespace(self, store, seq_digest):
+        store.add_sequence_alias("ucsc", "chr1", seq_digest)
+        store.add_sequence_alias("ucsc", "chr2", seq_digest)
+        aliases = store.list_sequence_aliases("ucsc")
+        assert "chr1" in aliases
+        assert "chr2" in aliases
+
+    def test_reverse_lookup(self, store, seq_digest):
+        store.add_sequence_alias("ucsc", "chr1", seq_digest)
+        result = store.get_aliases_for_sequence(seq_digest)
+        assert ("ucsc", "chr1") in result
+
+    def test_remove_alias(self, store, seq_digest):
+        store.add_sequence_alias("ucsc", "chr1", seq_digest)
+        removed = store.remove_sequence_alias("ucsc", "chr1")
+        assert removed is True
+        result = store.get_sequence_by_alias("ucsc", "chr1")
+        assert result is None
+
+    def test_remove_nonexistent_returns_false(self, store):
+        removed = store.remove_sequence_alias("fake", "fake")
+        assert removed is False
+
+    def test_get_nonexistent_returns_none(self, store):
+        result = store.get_sequence_by_alias("fake_ns", "fake_alias")
+        assert result is None
+
+    def test_multiple_namespaces_same_digest(self, store, seq_digest):
+        store.add_sequence_alias("ucsc", "chr1", seq_digest)
+        store.add_sequence_alias("ensembl", "1", seq_digest)
+        aliases = store.get_aliases_for_sequence(seq_digest)
+        namespaces = {ns for ns, _ in aliases}
+        assert "ucsc" in namespaces
+        assert "ensembl" in namespaces
+
+    def test_load_from_tsv(self, store, seq_digest):
+        with tempfile.NamedTemporaryFile(mode="w", suffix=".tsv", delete=False) as f:
+            f.write(f"chr1\t{seq_digest}\n")
+            f.write(f"chr2\t{seq_digest}\n")
+            tsv_path = f.name
+        try:
+            count = store.load_sequence_aliases("from_file", tsv_path)
+            assert count == 2
+            result = store.get_sequence_by_alias("from_file", "chr1")
+            assert result is not None
+        finally:
+            os.unlink(tsv_path)
+
+
+@pytest.mark.skipif(not _RUST_BINDINGS_AVAILABLE, reason="gtars is not installed")
+class TestCollectionAliases:
+    def test_add_and_retrieve(self, store, col_digest):
+        store.add_collection_alias("genomes", "hg38", col_digest)
+        result = store.get_collection_by_alias("genomes", "hg38")
+        assert result is not None
+        assert result.digest == col_digest
+
+    def test_list_namespaces(self, store, col_digest):
+        store.add_collection_alias("genomes", "hg38", col_digest)
+        namespaces = store.list_collection_alias_namespaces()
+        assert "genomes" in namespaces
+
+    def test_list_aliases_in_namespace(self, store, col_digest):
+        store.add_collection_alias("genomes", "hg38", col_digest)
+        store.add_collection_alias("genomes", "GRCh38", col_digest)
+        aliases = store.list_collection_aliases("genomes")
+        assert "hg38" in aliases
+        assert "GRCh38" in aliases
+
+    def test_reverse_lookup(self, store, col_digest):
+        store.add_collection_alias("genomes", "hg38", col_digest)
+        result = store.get_aliases_for_collection(col_digest)
+        assert ("genomes", "hg38") in result
+
+    def test_remove_alias(self, store, col_digest):
+        store.add_collection_alias("genomes", "hg38", col_digest)
+        removed = store.remove_collection_alias("genomes", "hg38")
+        assert removed is True
+        result = store.get_collection_by_alias("genomes", "hg38")
+        assert result is None
+
+    def test_remove_nonexistent_returns_false(self, store):
+        removed = store.remove_collection_alias("fake", "fake")
+        assert removed is False
+
+    def test_get_nonexistent_returns_none(self, store):
+        result = store.get_collection_by_alias("fake_ns", "fake_alias")
+        assert result is None
+
+    def test_multiple_namespaces_same_digest(self, store, col_digest):
+        store.add_collection_alias("ucsc", "hg38", col_digest)
+        store.add_collection_alias("ncbi", "GRCh38", col_digest)
+        aliases = store.get_aliases_for_collection(col_digest)
+        namespaces = {ns for ns, _ in aliases}
+        assert "ucsc" in namespaces
+        assert "ncbi" in namespaces
+
+    def test_load_from_tsv(self, store, col_digest):
+        with tempfile.NamedTemporaryFile(mode="w", suffix=".tsv", delete=False) as f:
+            f.write(f"hg38\t{col_digest}\n")
+            f.write(f"GRCh38\t{col_digest}\n")
+            tsv_path = f.name
+        try:
+            count = store.load_collection_aliases("from_file", tsv_path)
+            assert count == 2
+            result = store.get_collection_by_alias("from_file", "hg38")
+            assert result is not None
+        finally:
+            os.unlink(tsv_path)

From 142ccc195c8075de5e2442f72ff7ac8680d6fc07 Mon Sep 17 00:00:00 2001
From: nsheff <nsheff@users.noreply.github.com>
Date: Mon, 2 Mar 2026 18:01:41 -0500
Subject: [PATCH 15/31] clean up for new gtars updates

---
 data_loaders/demo_remote_store.py             |  17 +-
 .../riva_pangenome_analysis/README.md         |   4 +-
 examples/remote_store.py                      |  11 +-
 refget/cli/fasta.py                           | 110 ++++++-
 refget/cli/seqcol.py                          | 120 ++++---
 refget/cli/store.py                           | 140 ++------
 refget/clients.py                             |  12 +-
 tests/local/test_aliases.py                   | 179 ++++------
 tests/local/test_remove_collection.py         |  38 +++
 tests/local/test_store_seqcol_features.py     | 101 ++++++
 tests/test_cli/test_fasta_commands.py         | 307 +++++-------------
 11 files changed, 499 insertions(+), 540 deletions(-)
 create mode 100644 tests/local/test_remove_collection.py
 create mode 100644 tests/local/test_store_seqcol_features.py

diff --git a/data_loaders/demo_remote_store.py b/data_loaders/demo_remote_store.py
index 137fe52..af575e5 100644
--- a/data_loaders/demo_remote_store.py
+++ b/data_loaders/demo_remote_store.py
@@ -39,7 +39,7 @@ def main():
     print(f"\n1. Loading remote store from:\n   {REMOTE_URL}")
     print(f"   Cache directory: {CACHE_DIR}\n")
 
-    store = RefgetStore.load_remote(cache_path=str(CACHE_DIR), remote_url=REMOTE_URL)
+    store = RefgetStore.open_remote(cache_path=str(CACHE_DIR), remote_url=REMOTE_URL)
 
     print(f"   Loaded! {len(store)} sequences available (metadata only)")
 
@@ -51,9 +51,8 @@ def main():
 
     # 3. List sequences (first 5)
     print(f"\n3. Listing sequences (first 5 of {len(store)}):")
-    records = store.sequence_records()
-    for i, rec in enumerate(records[:5]):
-        m = rec.metadata
+    records = store.list_sequences()
+    for i, m in enumerate(records[:5]):
         print(f"   {i+1}. {m.name[:50]}...")
         print(f"      sha512t24u: {m.sha512t24u}")
         print(f"      length: {m.length:,} bp")
@@ -61,7 +60,7 @@ def main():
     # 4. Fetch a sequence by ID (downloads sequence data on first access)
     seq_digest = "du4GiRD_OcmdmCn_RmImyb71YZ4XoCdk"
     print(f"\n4. Get sequence record by ID (fetches from remote):")
-    record = store.get_sequence_by_id(seq_digest)
+    record = store.get_sequence(seq_digest)
     if record:
         print(f"   Name: {record.metadata.name}")
         print(f"   Length: {record.metadata.length:,} bp")
@@ -107,7 +106,7 @@ def main():
     print(f"   Collection: {EXAMPLE_COLLECTION}")
     print(f"   Sequence: {EXAMPLE_SEQ_NAME[:50]}...")
 
-    record = store.get_sequence_by_collection_and_name(EXAMPLE_COLLECTION, EXAMPLE_SEQ_NAME)
+    record = store.get_sequence_by_name(EXAMPLE_COLLECTION, EXAMPLE_SEQ_NAME)
     if record:
         print(f"   Found! Length: {record.metadata.length:,} bp")
         print(f"   Digest: {record.metadata.sha512t24u}")
@@ -149,9 +148,9 @@ def main():
     print(f"\nCache directory: {CACHE_DIR}")
     print(f"Temp files: {temp_dir}")
     print("\nKey features demonstrated:")
-    print("  - load_remote(): Load store from URL, fetch sequences on-demand")
-    print("  - get_sequence_by_id(): Lookup by SHA-512/24u or MD5 digest")
-    print("  - get_sequence_by_collection_and_name(): Lookup by sequence name")
+    print("  - open_remote(): Load store from URL, fetch sequences on-demand")
+    print("  - get_sequence(): Lookup by SHA-512/24u or MD5 digest")
+    print("  - get_sequence_by_name(): Lookup by collection digest + sequence name")
     print("  - substrings_from_regions(): Batch retrieval from BED file")
     print("  - export_fasta_by_digests(): Export sequences by digest")
     print("  - export_fasta_from_regions(): Export BED regions to FASTA")
diff --git a/data_loaders/riva_pangenome_analysis/README.md b/data_loaders/riva_pangenome_analysis/README.md
index 9ecce89..44d108a 100644
--- a/data_loaders/riva_pangenome_analysis/README.md
+++ b/data_loaders/riva_pangenome_analysis/README.md
@@ -38,7 +38,7 @@ cm = store.get_collection_metadata("s0nMiOFHPsIBrm2bd3PkzWXKLKWQZq70")
 EXAMPLE_COLLECTION = "0aHV7I-94paL9Z1H4LNlqsW3WxJhlou5"
 EXAMPLE_SEQ_NAME = "JAGYVX010000006.1 unmasked:primary_assembly HG03540.pri.mat.f1_v2:JAGYVX010000006.1:1:96320881:1"
 
-record = store.get_sequence_by_collection_and_name(EXAMPLE_COLLECTION, EXAMPLE_SEQ_NAME)
+record = store.get_sequence_by_name(EXAMPLE_COLLECTION, EXAMPLE_SEQ_NAME)
 
 
 ## Upload to S3
@@ -74,7 +74,7 @@ seq
 s1
 seq.decode()
 store.get_collection_metadata(col1.digest)
-col1_loaded.is_loaded()
+store.is_collection_loaded(col1.digest)
 
 
 ```
diff --git a/examples/remote_store.py b/examples/remote_store.py
index 6db0447..aa7d904 100644
--- a/examples/remote_store.py
+++ b/examples/remote_store.py
@@ -29,7 +29,7 @@
 # The store metadata (~1.5 MB) is fetched; sequences are loaded on-demand.
 
 # %%
-store = RefgetStore.load_remote(cache_path=str(CACHE_DIR), remote_url=REMOTE_URL)
+store = RefgetStore.open_remote(cache_path=str(CACHE_DIR), remote_url=REMOTE_URL)
 
 print(f"Loaded {len(store)} sequences from {REMOTE_URL}")
 
@@ -45,9 +45,8 @@
 # ## 3. List Sequences
 
 # %%
-records = store.sequence_records()
-for i, rec in enumerate(records[:5]):
-    m = rec.metadata
+records = store.list_sequences()
+for i, m in enumerate(records[:5]):
     print(f"{i+1}. {m.name[:60]}...")
     print(f"   sha512t24u: {m.sha512t24u}, length: {m.length:,} bp")
 
@@ -58,7 +57,7 @@
 
 # %%
 seq_digest = "du4GiRD_OcmdmCn_RmImyb71YZ4XoCdk"
-record = store.get_sequence_by_id(seq_digest)
+record = store.get_sequence(seq_digest)
 if record:
     print(f"Name: {record.metadata.name}")
     print(f"Length: {record.metadata.length:,} bp")
@@ -99,7 +98,7 @@
 # Look up sequences by collection digest + sequence name.
 
 # %%
-record = store.get_sequence_by_collection_and_name(EXAMPLE_COLLECTION, EXAMPLE_SEQ_NAME)
+record = store.get_sequence_by_name(EXAMPLE_COLLECTION, EXAMPLE_SEQ_NAME)
 if record:
     print(f"Collection: {EXAMPLE_COLLECTION}")
     print(f"Sequence: {EXAMPLE_SEQ_NAME[:50]}...")
diff --git a/refget/cli/fasta.py b/refget/cli/fasta.py
index 364c0cc..5c32442 100644
--- a/refget/cli/fasta.py
+++ b/refget/cli/fasta.py
@@ -25,7 +25,6 @@
     EXIT_FILE_NOT_FOUND,
     EXIT_FAILURE,
     EXIT_SUCCESS,
-    not_implemented,
     print_error,
     print_json,
     print_success,
@@ -67,6 +66,8 @@ def index(
         - genome.fa.fai       (FASTA index, samtools-compatible)
         - genome.seqcol.json  (Sequence collection JSON)
         - genome.chrom.sizes  (Chromosome sizes)
+        - genome.rgsi         (RefgetStore sequence index)
+        - genome.rgci         (RefgetStore collection index)
 
     Prints the seqcol digest to stdout.
     """
@@ -137,7 +138,36 @@ def index(
         with open(chrom_sizes_path, "w") as f:
             f.write(chrom_sizes_content)
 
-        files_created = [str(fai_path), str(seqcol_path), str(chrom_sizes_path)]
+        # Write RGSI file
+        stem = base_name
+        for ext in [".fa.gz", ".fasta.gz", ".fa", ".fasta"]:
+            if stem.endswith(ext):
+                stem = stem[: -len(ext)]
+                break
+        rgsi_path = out_dir / f"{stem}.rgsi"
+        sc.write_rgsi(str(rgsi_path))
+
+        # Write RGCI file
+        rgci_path = out_dir / f"{stem}.rgci"
+        with open(rgci_path, "w") as f:
+            meta = sc.metadata
+            f.write(
+                "#digest\tn_sequences\tnames_digest\tsequences_digest"
+                "\tlengths_digest\tname_length_pairs_digest"
+                "\tsorted_name_length_pairs_digest\tsorted_sequences_digest\n"
+            )
+            f.write(
+                f"{meta.digest}\t{meta.n_sequences}\t{meta.names_digest}"
+                f"\t{meta.sequences_digest}\t{meta.lengths_digest}"
+                f"\t{meta.name_length_pairs_digest or ''}"
+                f"\t{meta.sorted_name_length_pairs_digest or ''}"
+                f"\t{meta.sorted_sequences_digest or ''}\n"
+            )
+
+        files_created = [
+            str(fai_path), str(seqcol_path), str(chrom_sizes_path),
+            str(rgsi_path), str(rgci_path),
+        ]
 
         if json_output:
             print_json(
@@ -364,11 +394,34 @@ def rgsi(
     """
     Compute .rgsi (RefgetStore sequence index) from a FASTA file.
 
-    The .rgsi is a binary index file used by RefgetStore for efficient
-    on-disk sequence storage and retrieval. It maps sequence digests to
-    byte offsets.
+    The .rgsi is a TSV index file containing collection-level digest headers
+    and per-sequence metadata (name, length, alphabet, digests). Used by
+    RefgetStore for efficient collection storage and as a FASTA digest cache.
     """
-    not_implemented("fasta rgsi")
+    from gtars.refget import digest_fasta
+
+    try:
+        # Determine output path
+        if output is None:
+            # Replace FASTA extensions with .rgsi
+            stem = file.name
+            for ext in [".fa.gz", ".fasta.gz", ".fa", ".fasta"]:
+                if stem.endswith(ext):
+                    stem = stem[: -len(ext)]
+                    break
+            output = file.parent / f"{stem}.rgsi"
+
+        # Digest the FASTA file
+        with suppress_stdout():
+            sc = digest_fasta(str(file))
+
+        # Write RGSI file using gtars binding
+        sc.write_rgsi(str(output))
+
+        print_success(f"Wrote RGSI index to {output}")
+        raise typer.Exit(EXIT_SUCCESS)
+    except OSError as e:
+        print_error(f"Error processing FASTA file: {e}", EXIT_FAILURE)
 
 
 @app.command()
@@ -389,10 +442,49 @@ def rgci(
     """
     Compute .rgci (RefgetStore collection index) from a FASTA file.
 
-    The .rgci is a binary index file used by RefgetStore to store
-    collection metadata.
+    The .rgci is a TSV index file listing collection metadata (digest,
+    sequence count, and level 1 digests). Used by RefgetStore as a
+    master index of all collections.
     """
-    not_implemented("fasta rgci")
+    from gtars.refget import digest_fasta
+
+    try:
+        # Determine output path
+        if output is None:
+            stem = file.name
+            for ext in [".fa.gz", ".fasta.gz", ".fa", ".fasta"]:
+                if stem.endswith(ext):
+                    stem = stem[: -len(ext)]
+                    break
+            output = file.parent / f"{stem}.rgci"
+
+        # Digest the FASTA file
+        with suppress_stdout():
+            sc = digest_fasta(str(file))
+
+        meta = sc.metadata
+
+        # Write RGCI file (matches store.rs write_collections_rgci format)
+        with open(output, "w") as f:
+            # Header
+            f.write(
+                "#digest\tn_sequences\tnames_digest\tsequences_digest"
+                "\tlengths_digest\tname_length_pairs_digest"
+                "\tsorted_name_length_pairs_digest\tsorted_sequences_digest\n"
+            )
+            # Single collection row
+            f.write(
+                f"{meta.digest}\t{meta.n_sequences}\t{meta.names_digest}"
+                f"\t{meta.sequences_digest}\t{meta.lengths_digest}"
+                f"\t{meta.name_length_pairs_digest or ''}"
+                f"\t{meta.sorted_name_length_pairs_digest or ''}"
+                f"\t{meta.sorted_sequences_digest or ''}\n"
+            )
+
+        print_success(f"Wrote RGCI index to {output}")
+        raise typer.Exit(EXIT_SUCCESS)
+    except OSError as e:
+        print_error(f"Error processing FASTA file: {e}", EXIT_FAILURE)
 
 
 @app.command()
diff --git a/refget/cli/seqcol.py b/refget/cli/seqcol.py
index 0bc3881..69dc486 100644
--- a/refget/cli/seqcol.py
+++ b/refget/cli/seqcol.py
@@ -67,43 +67,14 @@ def _collection_to_seqcol_dict(store, digest: str, level: int = 2) -> Optional[d
     Returns:
         Seqcol dict in API format, or None if collection not found.
     """
-    from refget.utils import canonical_str
-    from refget.digests import sha512t24u_digest
-
-    names = []
-    lengths = []
-    sequences = []
-
-    for coll in store.iter_collections():
-        if coll.digest == digest:
-            for seq in coll.sequences:
-                m = seq.metadata
-                names.append(m.name)
-                lengths.append(m.length)
-                sequences.append("SQ." + m.sha512t24u)
-            break
-    else:
-        # Collection not found in iteration
-        return None
-
-    if not names:
+    try:
+        if level == 1:
+            return store.get_collection_level1(digest)
+        else:
+            return store.get_collection_level2(digest)
+    except Exception:
         return None
 
-    if level == 1:
-        # Return digests of arrays instead of arrays themselves
-        return {
-            "names": sha512t24u_digest(canonical_str(names)),
-            "lengths": sha512t24u_digest(canonical_str(lengths)),
-            "sequences": sha512t24u_digest(canonical_str(sequences)),
-        }
-    else:
-        # Level 2: return full arrays
-        return {
-            "names": names,
-            "lengths": lengths,
-            "sequences": sequences,
-        }
-
 
 def _get_local_seqcol(digest: str, level: int = 2) -> Optional[dict]:
     """
@@ -123,28 +94,15 @@ def _get_local_seqcol(digest: str, level: int = 2) -> Optional[dict]:
         return None
 
     store_path = get_store_path()
-    rgstore_path = store_path / "rgstore.json"
 
     # Check if store exists
-    if not store_path.exists() or not rgstore_path.exists():
+    if not RefgetStore.store_exists(str(store_path)):
         return None
 
     try:
         store = RefgetStore.open_local(str(store_path))
         store.set_quiet(True)
-
-        # Check if collection exists
-        collection_digests = {meta.digest for meta in store.list_collections()}
-        if digest not in collection_digests:
-            return None
-
-        # Load the collection (triggers lazy loading if needed)
-        if not store.is_collection_loaded(digest):
-            store.get_collection(digest)
-
-        # Convert to seqcol dict format
         return _collection_to_seqcol_dict(store, digest, level)
-
     except Exception:
         # Any error (store corruption, etc.) - fall back to remote
         return None
@@ -419,6 +377,44 @@ def list_collections(
     raise typer.Exit(EXIT_SUCCESS)
 
 
+def _search_local_store(filters: dict) -> Optional[list]:
+    """Search the local RefgetStore for collections matching attribute filters."""
+    try:
+        from refget.store import RefgetStore
+    except ImportError:
+        return None
+
+    store_path = get_store_path()
+
+    if not RefgetStore.store_exists(str(store_path)):
+        return None
+
+    try:
+        store = RefgetStore.open_local(str(store_path))
+        store.set_quiet(True)
+
+        # Search each filter; results must match ALL filters (intersection)
+        result_sets = []
+        for attr_name, attr_digest in filters.items():
+            matches = store.find_collections_by_attribute(attr_name, attr_digest)
+            result_sets.append(set(matches))
+
+        if not result_sets:
+            return None
+
+        # Intersection of all filter results
+        matching = result_sets[0]
+        for s in result_sets[1:]:
+            matching &= s
+
+        if not matching:
+            return None
+
+        return [{"digest": d} for d in sorted(matching)]
+    except Exception:
+        return None
+
+
 @app.command()
 def search(
     names: Optional[str] = typer.Option(
@@ -442,6 +438,16 @@ def search(
         "-s",
         help="Server URL override",
     ),
+    local: bool = typer.Option(
+        False,
+        "--local",
+        help="Search only the local store (skip remote)",
+    ),
+    no_local: bool = typer.Option(
+        False,
+        "--no-local",
+        help="Skip local store and search remote only",
+    ),
 ) -> None:
     """
     Find collections that share an attribute.
@@ -449,6 +455,9 @@ def search(
     The attribute digest is the digest of an attribute array
     (e.g., the names array digest from level 1 output).
 
+    By default, searches the local store first, then falls back to remote.
+    Use --local to search only locally, or --no-local to skip local search.
+
     Example workflow:
         # Get names digest from level 1
         names_digest=$(refget fasta seqcol genome.fa --level 1 | jq -r '.names')
@@ -471,6 +480,19 @@ def search(
         )
         return
 
+    # Try local store first (unless --no-local)
+    if not no_local:
+        local_results = _search_local_store(filters)
+        if local_results is not None:
+            print_json(local_results)
+            raise typer.Exit(EXIT_SUCCESS)
+
+        if local:
+            # --local flag set but no results found locally
+            print_error("No matching collections found in local store", EXIT_FAILURE)
+            return
+
+    # Fall back to remote server
     client = _get_client(server)
 
     try:
diff --git a/refget/cli/store.py b/refget/cli/store.py
index 9334630..fc04971 100644
--- a/refget/cli/store.py
+++ b/refget/cli/store.py
@@ -100,11 +100,8 @@ def _load_store(path: Optional[Path], must_exist: bool = True, remote: Optional[
     if must_exist:
         if not store_path.exists():
             print_error(f"Store not found at {store_path}", EXIT_FILE_NOT_FOUND)
-        # Check if rgstore.json exists - if not, it's an empty store that needs on_disk
-        # The store uses rgstore.json as its manifest file
-        rgstore_path = store_path / "rgstore.json"
-        if not rgstore_path.exists():
-            # Empty store - use on_disk which handles initialization
+        if not RefgetStore.store_exists(str(store_path)):
+            # Empty directory - use on_disk which handles initialization
             return RefgetStore.on_disk(str(store_path))
         return RefgetStore.open_local(str(store_path))
     else:
@@ -394,39 +391,13 @@ def get(
         print(seq_data)
     else:
         # Collection retrieval mode (default)
-        # Check if collection exists
-        if digest not in _get_collection_digests(store):
+        try:
+            result = store.get_collection_level2(digest)
+        except Exception:
             print_error(f"Collection not found: {digest}", EXIT_FAILURE)
             return
 
-        # Ensure collection is loaded
-        _ensure_collection_loaded(store, digest)
-
-        # Get collection data
-        names = []
-        lengths = []
-        sequences = []
-
-        for coll in store.iter_collections():
-            if coll.digest == digest:
-                for seq in coll.sequences:
-                    m = seq.metadata
-                    names.append(m.name)
-                    lengths.append(m.length)
-                    sequences.append("SQ." + m.sha512t24u)
-                break
-
-        if not names:
-            print_error(f"Collection not found: {digest}", EXIT_FAILURE)
-            return
-
-        print_json(
-            {
-                "names": names,
-                "lengths": lengths,
-                "sequences": sequences,
-            }
-        )
+        print_json(result)
 
     raise typer.Exit(EXIT_SUCCESS)
 
@@ -562,7 +533,7 @@ def pull(
 
     # Check local store first
     local_collections: set = set()
-    if store_path.exists() and (store_path / "rgstore.json").exists():
+    if RefgetStore.store_exists(str(store_path)):
         try:
             local_store = RefgetStore.open_local(str(store_path))
             local_collections = _get_collection_digests(local_store)
@@ -765,24 +736,15 @@ def fai(
     """
     store = _load_store(path, remote=remote)
 
-    # Ensure collection is loaded
-    _ensure_collection_loaded(store, digest)
+    try:
+        lvl2 = store.get_collection_level2(digest)
+    except Exception:
+        print_error(f"Collection not found: {digest}", EXIT_FAILURE)
+        return
 
     lines = []
-
-    # Find the collection and get its sequences
-    for coll in store.iter_collections():
-        if coll.digest == digest:
-            for seq in coll.sequences:
-                m = seq.metadata
-                # FAI format: name, length, offset, linebases, linewidth
-                # Since we don't have a specific FASTA file, offset is 0
-                # Using default line width of 80
-                lines.append(f"{m.name}\t{m.length}\t0\t80\t81")
-            break
-
-    if not lines:
-        print_error(f"Collection not found: {digest}", EXIT_FAILURE)
+    for name, length in zip(lvl2["names"], lvl2["lengths"]):
+        lines.append(f"{name}\t{length}\t0\t80\t81")
 
     fai_content = "\n".join(lines)
     if lines:
@@ -828,21 +790,15 @@ def chrom_sizes(
     """
     store = _load_store(path, remote=remote)
 
-    # Ensure collection is loaded
-    _ensure_collection_loaded(store, digest)
+    try:
+        lvl2 = store.get_collection_level2(digest)
+    except Exception:
+        print_error(f"Collection not found: {digest}", EXIT_FAILURE)
+        return
 
     lines = []
-
-    # Find the collection and get its sequences
-    for coll in store.iter_collections():
-        if coll.digest == digest:
-            for seq in coll.sequences:
-                m = seq.metadata
-                lines.append(f"{m.name}\t{m.length}")
-            break
-
-    if not lines:
-        print_error(f"Collection not found: {digest}", EXIT_FAILURE)
+    for name, length in zip(lvl2["names"], lvl2["lengths"]):
+        lines.append(f"{name}\t{length}")
 
     sizes_content = "\n".join(lines)
     if lines:
@@ -911,52 +867,6 @@ def stats(
     raise typer.Exit(EXIT_SUCCESS)
 
 
-def _remove_collection_from_store(store_path: Path, digest: str) -> bool:
-    """
-    Remove a collection from the store by manipulating store files.
-
-    gtars RefgetStore doesn't provide a remove_collection method, so we
-    implement it by modifying the collections index file directly.
-
-    Args:
-        store_path: Path to the store directory
-        digest: Collection digest to remove
-
-    Returns:
-        True if removed, False if not found
-    """
-    # Validate digest to prevent path traversal
-    if "/" in digest or "\\" in digest or ".." in digest:
-        return False
-
-    # Remove from collections index (TSV file)
-    collections_idx = store_path / "collections.rgci"
-    if collections_idx.exists():
-        lines = collections_idx.read_text().splitlines()
-        new_lines = []
-        found = False
-        for line in lines:
-            if line.startswith("#") or not line.strip():
-                new_lines.append(line)
-            elif line.startswith(digest + "\t"):
-                found = True  # Skip this line (remove it)
-            else:
-                new_lines.append(line)
-        if found:
-            collections_idx.write_text("\n".join(new_lines) + "\n" if new_lines else "")
-
-    # Remove the collection's .rgsi file
-    collection_file = store_path / "collections" / f"{digest}.rgsi"
-    if collection_file.exists():
-        collection_file.unlink()
-
-    # Remove the FHR metadata sidecar file (if it exists)
-    fhr_file = store_path / "collections" / f"{digest}.fhr.json"
-    fhr_file.unlink(missing_ok=True)
-
-    return True
-
-
 @app.command()
 def remove(
     digest: str = typer.Argument(
@@ -978,15 +888,11 @@ def remove(
     with other collections.
     """
     store = _load_store(path)
-    store_path = _get_store_path(path)
 
-    # Check if collection exists
-    if digest not in _get_collection_digests(store):
+    removed = store.remove_collection(digest)
+    if not removed:
         print_error(f"Collection not found: {digest}", EXIT_FAILURE)
 
-    # Remove the collection by manipulating store files
-    _remove_collection_from_store(store_path, digest)
-
     print_json(
         {
             "digest": digest,
diff --git a/refget/clients.py b/refget/clients.py
index 0e0030b..977eb68 100644
--- a/refget/clients.py
+++ b/refget/clients.py
@@ -199,9 +199,9 @@ def download_fasta_to_store(
             ImportError: If gtars/RefgetStore is not available
 
         Example:
-            >>> from refget.store import RefgetStore, StorageMode
+            >>> from refget.store import RefgetStore
             >>> from refget.clients import SequenceCollectionClient
-            >>> store = RefgetStore(StorageMode.Encoded)
+            >>> store = RefgetStore.in_memory()
             >>> client = SequenceCollectionClient()
             >>> collection_digest = client.download_fasta_to_store("abc123", store)
             >>> # Now you can retrieve sequences by digest from the local store
@@ -440,7 +440,7 @@ def get_refget_store(self, cache_dir: str) -> "RefgetStore":
         except ImportError:
             raise ImportError("gtars is required: pip install gtars")
 
-        return RefgetStore.load_remote(cache_dir, url)
+        return RefgetStore.open_remote(cache_dir, url)
 
 
 class PangenomeClient(RefgetClient):
@@ -597,8 +597,8 @@ def download_to_store(
             ImportError: If gtars/RefgetStore is not available
 
         Example:
-            >>> from refget.store import RefgetStore, StorageMode
-            >>> store = RefgetStore(StorageMode.Encoded)
+            >>> from refget.store import RefgetStore
+            >>> store = RefgetStore.in_memory()
             >>> client = FastaDrsClient()
             >>> collection_digest = client.download_to_store("abc123", store)
         """
@@ -627,7 +627,7 @@ def download_to_store(
             _LOGGER.info(f"Downloaded FASTA to {downloaded_path}")
 
             # Import into store
-            store.import_fasta(downloaded_path)
+            store.add_sequence_collection_from_fasta(downloaded_path)
             _LOGGER.info(f"Imported FASTA into RefgetStore: {digest}")
 
             return digest
diff --git a/tests/local/test_aliases.py b/tests/local/test_aliases.py
index 8d077c4..251cea4 100644
--- a/tests/local/test_aliases.py
+++ b/tests/local/test_aliases.py
@@ -1,4 +1,4 @@
-"""Tests for RefgetStore alias functionality."""
+"""Smoke tests for RefgetStore alias functionality via Python bindings."""
 
 import os
 import tempfile
@@ -28,135 +28,72 @@ def store():
 
 @pytest.fixture
 def seq_digest(store):
-    """Return the sha512t24u digest of the first sequence in the store."""
     return store.list_sequences()[0].sha512t24u
 
 
 @pytest.fixture
 def col_digest(store):
-    """Return the digest of the first collection in the store."""
     return store.list_collections()[0].digest
 
 
 @pytest.mark.skipif(not _RUST_BINDINGS_AVAILABLE, reason="gtars is not installed")
-class TestSequenceAliases:
-    def test_add_and_retrieve(self, store, seq_digest):
-        store.add_sequence_alias("chromosomes", "chr1", seq_digest)
-        result = store.get_sequence_by_alias("chromosomes", "chr1")
-        assert result is not None
-        assert result.metadata.sha512t24u == seq_digest
-
-    def test_list_namespaces(self, store, seq_digest):
-        store.add_sequence_alias("ucsc", "chrX", seq_digest)
-        namespaces = store.list_sequence_alias_namespaces()
-        assert "ucsc" in namespaces
-
-    def test_list_aliases_in_namespace(self, store, seq_digest):
-        store.add_sequence_alias("ucsc", "chr1", seq_digest)
-        store.add_sequence_alias("ucsc", "chr2", seq_digest)
-        aliases = store.list_sequence_aliases("ucsc")
-        assert "chr1" in aliases
-        assert "chr2" in aliases
-
-    def test_reverse_lookup(self, store, seq_digest):
-        store.add_sequence_alias("ucsc", "chr1", seq_digest)
-        result = store.get_aliases_for_sequence(seq_digest)
-        assert ("ucsc", "chr1") in result
-
-    def test_remove_alias(self, store, seq_digest):
-        store.add_sequence_alias("ucsc", "chr1", seq_digest)
-        removed = store.remove_sequence_alias("ucsc", "chr1")
-        assert removed is True
-        result = store.get_sequence_by_alias("ucsc", "chr1")
-        assert result is None
-
-    def test_remove_nonexistent_returns_false(self, store):
-        removed = store.remove_sequence_alias("fake", "fake")
-        assert removed is False
-
-    def test_get_nonexistent_returns_none(self, store):
-        result = store.get_sequence_by_alias("fake_ns", "fake_alias")
-        assert result is None
-
-    def test_multiple_namespaces_same_digest(self, store, seq_digest):
-        store.add_sequence_alias("ucsc", "chr1", seq_digest)
-        store.add_sequence_alias("ensembl", "1", seq_digest)
-        aliases = store.get_aliases_for_sequence(seq_digest)
-        namespaces = {ns for ns, _ in aliases}
-        assert "ucsc" in namespaces
-        assert "ensembl" in namespaces
-
-    def test_load_from_tsv(self, store, seq_digest):
-        with tempfile.NamedTemporaryFile(mode="w", suffix=".tsv", delete=False) as f:
-            f.write(f"chr1\t{seq_digest}\n")
-            f.write(f"chr2\t{seq_digest}\n")
-            tsv_path = f.name
-        try:
-            count = store.load_sequence_aliases("from_file", tsv_path)
-            assert count == 2
-            result = store.get_sequence_by_alias("from_file", "chr1")
-            assert result is not None
-        finally:
-            os.unlink(tsv_path)
+def test_sequence_alias_round_trip(store, seq_digest):
+    """Add, retrieve, and remove a sequence alias; verify None for missing aliases."""
+    # Not found returns None
+    assert store.get_sequence_by_alias("ucsc", "chr1") is None
+
+    # Add and retrieve
+    store.add_sequence_alias("ucsc", "chr1", seq_digest)
+    result = store.get_sequence_by_alias("ucsc", "chr1")
+    assert result is not None
+    assert result.metadata.sha512t24u == seq_digest
+
+    # Remove
+    assert store.remove_sequence_alias("ucsc", "chr1") is True
+    assert store.get_sequence_by_alias("ucsc", "chr1") is None
+    assert store.remove_sequence_alias("ucsc", "chr1") is False
+
+
+@pytest.mark.skipif(not _RUST_BINDINGS_AVAILABLE, reason="gtars is not installed")
+def test_collection_alias_round_trip(store, col_digest):
+    """Add, retrieve, and remove a collection alias; verify None for missing aliases."""
+    assert store.get_collection_by_alias("genomes", "hg38") is None
+
+    store.add_collection_alias("genomes", "hg38", col_digest)
+    result = store.get_collection_by_alias("genomes", "hg38")
+    assert result is not None
+    assert result.digest == col_digest
+
+    assert store.remove_collection_alias("genomes", "hg38") is True
+    assert store.get_collection_by_alias("genomes", "hg38") is None
+    assert store.remove_collection_alias("genomes", "hg38") is False
+
+
+@pytest.mark.skipif(not _RUST_BINDINGS_AVAILABLE, reason="gtars is not installed")
+def test_load_sequence_aliases_from_tsv(store, seq_digest):
+    """Load aliases from TSV; verify count return and post-load lookup."""
+    with tempfile.NamedTemporaryFile(mode="w", suffix=".tsv", delete=False) as f:
+        f.write(f"chr1\t{seq_digest}\n")
+        f.write(f"chr2\t{seq_digest}\n")
+        tsv_path = f.name
+    try:
+        count = store.load_sequence_aliases("from_file", tsv_path)
+        assert count == 2
+        assert store.get_sequence_by_alias("from_file", "chr1") is not None
+    finally:
+        os.unlink(tsv_path)
 
 
 @pytest.mark.skipif(not _RUST_BINDINGS_AVAILABLE, reason="gtars is not installed")
-class TestCollectionAliases:
-    def test_add_and_retrieve(self, store, col_digest):
-        store.add_collection_alias("genomes", "hg38", col_digest)
-        result = store.get_collection_by_alias("genomes", "hg38")
-        assert result is not None
-        assert result.digest == col_digest
-
-    def test_list_namespaces(self, store, col_digest):
-        store.add_collection_alias("genomes", "hg38", col_digest)
-        namespaces = store.list_collection_alias_namespaces()
-        assert "genomes" in namespaces
-
-    def test_list_aliases_in_namespace(self, store, col_digest):
-        store.add_collection_alias("genomes", "hg38", col_digest)
-        store.add_collection_alias("genomes", "GRCh38", col_digest)
-        aliases = store.list_collection_aliases("genomes")
-        assert "hg38" in aliases
-        assert "GRCh38" in aliases
-
-    def test_reverse_lookup(self, store, col_digest):
-        store.add_collection_alias("genomes", "hg38", col_digest)
-        result = store.get_aliases_for_collection(col_digest)
-        assert ("genomes", "hg38") in result
-
-    def test_remove_alias(self, store, col_digest):
-        store.add_collection_alias("genomes", "hg38", col_digest)
-        removed = store.remove_collection_alias("genomes", "hg38")
-        assert removed is True
-        result = store.get_collection_by_alias("genomes", "hg38")
-        assert result is None
-
-    def test_remove_nonexistent_returns_false(self, store):
-        removed = store.remove_collection_alias("fake", "fake")
-        assert removed is False
-
-    def test_get_nonexistent_returns_none(self, store):
-        result = store.get_collection_by_alias("fake_ns", "fake_alias")
-        assert result is None
-
-    def test_multiple_namespaces_same_digest(self, store, col_digest):
-        store.add_collection_alias("ucsc", "hg38", col_digest)
-        store.add_collection_alias("ncbi", "GRCh38", col_digest)
-        aliases = store.get_aliases_for_collection(col_digest)
-        namespaces = {ns for ns, _ in aliases}
-        assert "ucsc" in namespaces
-        assert "ncbi" in namespaces
-
-    def test_load_from_tsv(self, store, col_digest):
-        with tempfile.NamedTemporaryFile(mode="w", suffix=".tsv", delete=False) as f:
-            f.write(f"hg38\t{col_digest}\n")
-            f.write(f"GRCh38\t{col_digest}\n")
-            tsv_path = f.name
-        try:
-            count = store.load_collection_aliases("from_file", tsv_path)
-            assert count == 2
-            result = store.get_collection_by_alias("from_file", "hg38")
-            assert result is not None
-        finally:
-            os.unlink(tsv_path)
+def test_load_collection_aliases_from_tsv(store, col_digest):
+    """Load aliases from TSV; verify count return and post-load lookup."""
+    with tempfile.NamedTemporaryFile(mode="w", suffix=".tsv", delete=False) as f:
+        f.write(f"hg38\t{col_digest}\n")
+        f.write(f"GRCh38\t{col_digest}\n")
+        tsv_path = f.name
+    try:
+        count = store.load_collection_aliases("from_file", tsv_path)
+        assert count == 2
+        assert store.get_collection_by_alias("from_file", "hg38") is not None
+    finally:
+        os.unlink(tsv_path)
diff --git a/tests/local/test_remove_collection.py b/tests/local/test_remove_collection.py
new file mode 100644
index 0000000..88fb208
--- /dev/null
+++ b/tests/local/test_remove_collection.py
@@ -0,0 +1,38 @@
+"""Smoke test for RefgetStore.remove_collection() Python binding."""
+
+import os
+import tempfile
+
+import pytest
+
+from refget.store import RefgetStore
+
+try:
+    from gtars.refget import RefgetStore as _check
+
+    _RUST_BINDINGS_AVAILABLE = True
+except ImportError:
+    _RUST_BINDINGS_AVAILABLE = False
+
+FASTA_PATH = "test_fasta/base.fa"
+
+
+@pytest.mark.skipif(not _RUST_BINDINGS_AVAILABLE, reason="gtars is not installed")
+def test_remove_collection_round_trip():
+    """Add a collection, remove it with orphan cleanup, verify store is empty."""
+    store = RefgetStore.in_memory()
+    store.set_quiet(True)
+    store.add_sequence_collection_from_fasta(FASTA_PATH)
+
+    assert len(store.list_collections()) == 1
+    assert len(store.list_sequences()) > 0
+
+    digest = store.list_collections()[0].digest
+
+    # Nonexistent returns False
+    assert store.remove_collection("nonexistent") is False
+
+    # Real removal with orphan cleanup
+    assert store.remove_collection(digest, remove_orphan_sequences=True) is True
+    assert len(store.list_collections()) == 0
+    assert len(store.list_sequences()) == 0
diff --git a/tests/local/test_store_seqcol_features.py b/tests/local/test_store_seqcol_features.py
new file mode 100644
index 0000000..3779eb1
--- /dev/null
+++ b/tests/local/test_store_seqcol_features.py
@@ -0,0 +1,101 @@
+"""
+Tests for RefgetStore seqcol features: level1/level2, compare, find_collections_by_attribute.
+
+Only tests that verify Python-specific behavior beyond what Rust tests cover:
+- Rust/Python parity for compare()
+- Multi-collection attribute search
+- Basic level1/level2 smoke test
+"""
+
+import json
+import pytest
+from pathlib import Path
+
+try:
+    from refget.store import RefgetStore
+
+    _RUST_BINDINGS_AVAILABLE = True
+except ImportError:
+    _RUST_BINDINGS_AVAILABLE = False
+
+TEST_FASTA_DIR = Path("test_fasta")
+BASE_FASTA = TEST_FASTA_DIR / "base.fa"
+DIFFERENT_NAMES_FASTA = TEST_FASTA_DIR / "different_names.fa"
+
+with open(TEST_FASTA_DIR / "test_fasta_digests.json") as fp:
+    TEST_DIGESTS = json.load(fp)
+
+BASE_DIGEST = TEST_DIGESTS["base.fa"]["top_level_digest"]
+BASE_LEVEL1 = TEST_DIGESTS["base.fa"]["level1"]
+BASE_LEVEL2 = TEST_DIGESTS["base.fa"]["level2"]
+DIFFERENT_NAMES_DIGEST = TEST_DIGESTS["different_names.fa"]["top_level_digest"]
+
+
+@pytest.fixture
+def store_with_base():
+    """Create an in-memory store with base.fa loaded."""
+    store = RefgetStore.in_memory()
+    store.add_sequence_collection_from_fasta(str(BASE_FASTA))
+    return store
+
+
+@pytest.fixture
+def store_with_two():
+    """Create an in-memory store with base.fa and different_names.fa loaded."""
+    store = RefgetStore.in_memory()
+    store.add_sequence_collection_from_fasta(str(BASE_FASTA))
+    store.add_sequence_collection_from_fasta(str(DIFFERENT_NAMES_FASTA))
+    return store
+
+
+@pytest.mark.skipif(not _RUST_BINDINGS_AVAILABLE, reason="gtars is not installed")
+def test_level1_and_level2_smoke(store_with_base):
+    """Level1 returns digests, level2 returns arrays, both have required keys."""
+    lvl1 = store_with_base.get_collection_level1(BASE_DIGEST)
+    lvl2 = store_with_base.get_collection_level2(BASE_DIGEST)
+
+    for key in ("names", "lengths", "sequences"):
+        assert key in lvl1
+        assert key in lvl2
+        # Level1 values are digest strings, level2 values are lists
+        assert isinstance(lvl1[key], str)
+        assert isinstance(lvl2[key], list)
+
+    # Verify level2 matches expected values
+    assert sorted(lvl2["names"]) == sorted(BASE_LEVEL2["names"])
+    assert sorted(lvl2["lengths"]) == sorted(BASE_LEVEL2["lengths"])
+
+
+@pytest.mark.skipif(not _RUST_BINDINGS_AVAILABLE, reason="gtars is not installed")
+def test_compare_matches_python_implementation(store_with_two):
+    """Verify store.compare() (Rust) agrees with compare_seqcols() (Python) on core attributes."""
+    from refget.utils import compare_seqcols
+
+    lvl2_a = store_with_two.get_collection_level2(BASE_DIGEST)
+    lvl2_b = store_with_two.get_collection_level2(DIFFERENT_NAMES_DIGEST)
+
+    python_result = compare_seqcols(lvl2_a, lvl2_b)
+    rust_result = store_with_two.compare(BASE_DIGEST, DIFFERENT_NAMES_DIGEST)
+
+    core_attrs = {"names", "lengths", "sequences"}
+    assert core_attrs <= set(python_result["attributes"]["a_and_b"])
+    assert core_attrs <= set(rust_result["attributes"]["a_and_b"])
+
+    for attr in core_attrs:
+        assert (
+            rust_result["array_elements"]["a_and_b_count"][attr]
+            == python_result["array_elements"]["a_and_b_count"][attr]
+        )
+        assert (
+            rust_result["array_elements"]["a_and_b_same_order"][attr]
+            == python_result["array_elements"]["a_and_b_same_order"][attr]
+        )
+
+
+@pytest.mark.skipif(not _RUST_BINDINGS_AVAILABLE, reason="gtars is not installed")
+def test_shared_attribute_returns_multiple(store_with_two):
+    """base.fa and different_names.fa share lengths; searching by lengths returns both."""
+    lengths_digest = BASE_LEVEL1["lengths"]
+    results = store_with_two.find_collections_by_attribute("lengths", lengths_digest)
+    assert BASE_DIGEST in results
+    assert DIFFERENT_NAMES_DIGEST in results
diff --git a/tests/test_cli/test_fasta_commands.py b/tests/test_cli/test_fasta_commands.py
index df5c698..2f3ea6e 100644
--- a/tests/test_cli/test_fasta_commands.py
+++ b/tests/test_cli/test_fasta_commands.py
@@ -3,7 +3,7 @@
 """
 Tests for refget fasta CLI commands.
 
-These test the CLI wrapper behavior: output formatting, exit codes, argument parsing.
+These test CLI-specific behavior: output formatting, exit codes, argument parsing.
 """
 
 import pytest
@@ -26,20 +26,14 @@
 class TestFastaDigest:
     """Tests for: refget fasta digest <file>"""
 
-    def test_outputs_json(self, cli, sample_fasta):
-        """Output is valid JSON with digest."""
-        result = cli("fasta", "digest", str(sample_fasta))
-
-        data = assert_json_output(result, ["digest"])
-        assert_valid_digest(data["digest"])
-
-    def test_digest_with_file_key(self, cli, sample_fasta):
-        """Output may include file path."""
-        result = cli("fasta", "digest", str(sample_fasta))
+    def test_known_digest(self, cli):
+        """Verify digest matches expected value for known file."""
+        result = cli("fasta", "digest", str(BASE_FASTA))
 
         assert result.exit_code == 0
         data = json.loads(result.stdout)
-        assert "digest" in data
+        expected_digest = TEST_FASTA_DIGESTS["base.fa"]["top_level_digest"]
+        assert data["digest"] == expected_digest
 
     def test_gzipped_file(self, cli, sample_fasta_gz):
         """Handles gzipped files seamlessly."""
@@ -52,34 +46,13 @@ def test_gzipped_file(self, cli, sample_fasta_gz):
     def test_file_not_found_exit_code(self, cli):
         """Returns non-zero exit code for missing file."""
         result = cli("fasta", "digest", "/nonexistent/file.fa")
-
-        assert result.exit_code != 0
-        # Error message goes to stderr (correct Unix behavior)
-        assert "not found" in result.stderr.lower() or "error" in result.stderr.lower()
-
-    def test_missing_argument(self, cli):
-        """Returns non-zero exit for missing argument."""
-        result = cli("fasta", "digest")
-
         assert result.exit_code != 0
 
-    def test_known_digest(self, cli):
-        """Verify digest matches expected value for known file."""
-        result = cli("fasta", "digest", str(BASE_FASTA))
-
-        assert result.exit_code == 0
-        data = json.loads(result.stdout)
-        expected_digest = TEST_FASTA_DIGESTS["base.fa"]["top_level_digest"]
-        assert data["digest"] == expected_digest
-
     def test_different_files_different_digests(self, cli):
         """Different files produce different digests."""
         result1 = cli("fasta", "digest", str(BASE_FASTA))
         result2 = cli("fasta", "digest", str(DIFFERENT_NAMES_FASTA))
 
-        assert result1.exit_code == 0
-        assert result2.exit_code == 0
-
         digest1 = json.loads(result1.stdout)["digest"]
         digest2 = json.loads(result2.stdout)["digest"]
         assert digest1 != digest2
@@ -88,38 +61,6 @@ def test_different_files_different_digests(self, cli):
 class TestFastaSeqcol:
     """Tests for: refget fasta seqcol <file>"""
 
-    def test_outputs_seqcol_json(self, cli, sample_fasta):
-        """Output is valid seqcol JSON."""
-        result = cli("fasta", "seqcol", str(sample_fasta))
-
-        data = assert_json_output(result, ["names", "lengths", "sequences"])
-        assert isinstance(data["names"], list)
-        assert isinstance(data["lengths"], list)
-        assert isinstance(data["sequences"], list)
-
-    def test_seqcol_array_lengths_match(self, cli, sample_fasta):
-        """All seqcol arrays have same length."""
-        result = cli("fasta", "seqcol", str(sample_fasta))
-
-        assert result.exit_code == 0
-        data = json.loads(result.stdout)
-        n_seqs = len(data["names"])
-        assert len(data["lengths"]) == n_seqs
-        assert len(data["sequences"]) == n_seqs
-
-    def test_output_to_file(self, cli, sample_fasta, tmp_path):
-        """Writes to file with -o option."""
-        output = tmp_path / "out.seqcol.json"
-        result = cli("fasta", "seqcol", str(sample_fasta), "-o", str(output))
-
-        assert result.exit_code == 0
-        assert output.exists()
-
-        data = json.loads(output.read_text())
-        assert "names" in data
-        assert "lengths" in data
-        assert "sequences" in data
-
     def test_known_seqcol(self, cli):
         """Verify seqcol matches expected values for known file."""
         result = cli("fasta", "seqcol", str(BASE_FASTA))
@@ -131,41 +72,20 @@ def test_known_seqcol(self, cli):
         assert data["names"] == expected["names"]
         assert data["lengths"] == expected["lengths"]
 
-    def test_gzipped_file(self, cli, sample_fasta_gz):
-        """Handles gzipped FASTA files."""
-        result = cli("fasta", "seqcol", str(sample_fasta_gz))
+    def test_output_to_file(self, cli, sample_fasta, tmp_path):
+        """Writes to file with -o option."""
+        output = tmp_path / "out.seqcol.json"
+        result = cli("fasta", "seqcol", str(sample_fasta), "-o", str(output))
 
         assert result.exit_code == 0
-        data = json.loads(result.stdout)
+        assert output.exists()
+        data = json.loads(output.read_text())
         assert "names" in data
 
 
 class TestFastaFai:
     """Tests for: refget fasta fai <file>"""
 
-    def test_outputs_fai_format(self, cli, sample_fasta, tmp_path):
-        """Outputs valid FAI format."""
-        output = tmp_path / "test.fa.fai"
-        result = cli("fasta", "fai", str(sample_fasta), "-o", str(output))
-
-        assert result.exit_code == 0
-        assert output.exists()
-
-        # FAI format: name\tlength\toffset\tline_bases\tline_width
-        lines = output.read_text().strip().split("\n")
-        assert len(lines) > 0
-        for line in lines:
-            parts = line.split("\t")
-            assert len(parts) >= 2  # At least name and length
-
-    def test_fai_to_stdout(self, cli, sample_fasta):
-        """Outputs FAI to stdout when no -o specified."""
-        result = cli("fasta", "fai", str(sample_fasta))
-
-        assert result.exit_code == 0
-        lines = result.stdout.strip().split("\n")
-        assert len(lines) > 0
-
     def test_fai_sequence_count(self, cli, multi_seq_fasta, tmp_path):
         """FAI has one line per sequence."""
         output = tmp_path / "test.fa.fai"
@@ -179,42 +99,13 @@ def test_fai_sequence_count(self, cli, multi_seq_fasta, tmp_path):
 class TestFastaChromSizes:
     """Tests for: refget fasta chrom-sizes <file>"""
 
-    def test_outputs_chrom_sizes(self, cli, sample_fasta, tmp_path):
-        """Outputs valid chrom.sizes format."""
-        output = tmp_path / "test.chrom.sizes"
-        result = cli("fasta", "chrom-sizes", str(sample_fasta), "-o", str(output))
-
-        assert result.exit_code == 0
-        assert output.exists()
-
-        # Format: name\tlength
-        lines = output.read_text().strip().split("\n")
-        for line in lines:
-            parts = line.split("\t")
-            assert len(parts) == 2
-            assert parts[1].isdigit()
-
-    def test_chrom_sizes_to_stdout(self, cli, sample_fasta):
-        """Outputs chrom.sizes to stdout when no -o specified."""
-        result = cli("fasta", "chrom-sizes", str(sample_fasta))
-
-        assert result.exit_code == 0
-        lines = result.stdout.strip().split("\n")
-        assert len(lines) > 0
-        for line in lines:
-            parts = line.split("\t")
-            assert len(parts) == 2
-
     def test_chrom_sizes_values(self, cli):
         """Verify chrom.sizes values for known file."""
         result = cli("fasta", "chrom-sizes", str(BASE_FASTA))
 
         assert result.exit_code == 0
-        lines = result.stdout.strip().split("\n")
-
-        # base.fa has chrX(8), chr1(4), chr2(4)
         sizes = {}
-        for line in lines:
+        for line in result.stdout.strip().split("\n"):
             name, length = line.split("\t")
             sizes[name] = int(length)
 
@@ -226,62 +117,32 @@ def test_chrom_sizes_values(self, cli):
 class TestFastaIndex:
     """Tests for: refget fasta index <file>"""
 
-    def test_creates_fai_file(self, cli, sample_fasta):
-        """Creates .fai file."""
-        result = cli("fasta", "index", str(sample_fasta))
-
-        assert result.exit_code == 0
-        fai_path = Path(str(sample_fasta) + ".fai")
-        assert fai_path.exists()
-
-    def test_creates_seqcol_file(self, cli, sample_fasta):
-        """Creates .seqcol.json file."""
-        result = cli("fasta", "index", str(sample_fasta))
+    def test_index_creates_all_files(self, cli, sample_fasta):
+        """Index with --json lists all 5 created files."""
+        result = cli("fasta", "index", str(sample_fasta), "--json")
 
         assert result.exit_code == 0
-        seqcol_path = sample_fasta.parent / f"{sample_fasta.stem}.seqcol.json"
-        assert seqcol_path.exists()
-
-        data = json.loads(seqcol_path.read_text())
-        assert "names" in data
-
-    def test_creates_chrom_sizes_file(self, cli, sample_fasta):
-        """Creates .chrom.sizes file."""
-        result = cli("fasta", "index", str(sample_fasta))
-
-        assert result.exit_code == 0
-        sizes_path = sample_fasta.parent / f"{sample_fasta.stem}.chrom.sizes"
-        assert sizes_path.exists()
-
-    def test_index_summary_output(self, cli, sample_fasta):
-        """Index command provides summary output."""
-        result = cli("fasta", "index", str(sample_fasta))
-
-        assert result.exit_code == 0
-        # Should indicate files created
-        assert len(result.stdout) > 0
+        data = json.loads(result.stdout)
+        assert len(data["files_created"]) == 5
+        extensions = [Path(f).suffix for f in data["files_created"]]
+        assert ".fai" in extensions
+        assert ".json" in extensions
+        assert ".rgsi" in extensions
+        assert ".rgci" in extensions
 
 
 class TestFastaStats:
     """Tests for: refget fasta stats <file>"""
 
-    def test_outputs_stats_json(self, cli, sample_fasta):
-        """Outputs statistics in JSON format."""
-        result = cli("fasta", "stats", str(sample_fasta), "--json")
-
-        data = assert_json_output(result, ["sequences", "total_length"])
-        assert isinstance(data["sequences"], int)
-        assert data["sequences"] > 0
-
-    def test_stats_values(self, cli, sample_fasta):
-        """Stats values are correct."""
-        result = cli("fasta", "stats", str(sample_fasta), "--json")
+    def test_stats_known_file(self, cli):
+        """Stats for known test file."""
+        result = cli("fasta", "stats", str(BASE_FASTA), "--json")
 
         assert result.exit_code == 0
         data = json.loads(result.stdout)
 
-        # sample_fasta has 2 sequences, each 8 bases
-        assert data["sequences"] == 2
+        # base.fa: chrX(8), chr1(4), chr2(4) = 16 total
+        assert data["sequences"] == 3
         assert data["total_length"] == 16
 
     def test_stats_plain_output(self, cli, sample_fasta):
@@ -289,20 +150,8 @@ def test_stats_plain_output(self, cli, sample_fasta):
         result = cli("fasta", "stats", str(sample_fasta))
 
         assert result.exit_code == 0
-        # Should have some output
         assert len(result.stdout.strip()) > 0
 
-    def test_stats_known_file(self, cli):
-        """Stats for known test file."""
-        result = cli("fasta", "stats", str(BASE_FASTA), "--json")
-
-        assert result.exit_code == 0
-        data = json.loads(result.stdout)
-
-        # base.fa: chrX(8), chr1(4), chr2(4) = 16 total
-        assert data["sequences"] == 3
-        assert data["total_length"] == 16
-
 
 class TestFastaValidate:
     """Tests for: refget fasta validate <file>"""
@@ -310,57 +159,73 @@ class TestFastaValidate:
     def test_valid_fasta(self, cli, sample_fasta):
         """Valid FASTA passes validation."""
         result = cli("fasta", "validate", str(sample_fasta))
-
         assert result.exit_code == 0
 
     def test_invalid_fasta_exits_nonzero(self, cli, tmp_path):
         """Invalid FASTA fails validation."""
         invalid = tmp_path / "invalid.fa"
         invalid.write_text("This is not a valid FASTA file\nNo headers here\n")
-
         result = cli("fasta", "validate", str(invalid))
-
-        # Should fail with non-zero exit code
         assert result.exit_code != 0
 
 
-class TestFastaErrorHandling:
-    """Test error handling for fasta commands."""
+class TestFastaRgsi:
+    """Tests for: refget fasta rgsi <file>"""
 
-    def test_nonexistent_file(self, cli):
-        """Graceful error for nonexistent file."""
-        result = cli("fasta", "digest", "/path/to/nonexistent.fa")
+    def test_rgsi_format_and_content(self, cli, sample_fasta):
+        """Creates .rgsi with correct headers, columns, and sequence data."""
+        result = cli("fasta", "rgsi", str(sample_fasta))
 
-        assert result.exit_code != 0
-        # Should have informative error message
-        assert (
-            len(result.stdout) > 0 or len(result.stderr if hasattr(result, "stderr") else "") > 0
-        )
-
-    def test_empty_fasta(self, cli, tmp_path):
-        """Handle empty FASTA file."""
-        empty = tmp_path / "empty.fa"
-        empty.write_text("")
-
-        result = cli("fasta", "stats", str(empty), "--json")
-
-        # May succeed with 0 sequences or fail gracefully
-        if result.exit_code == 0:
-            data = json.loads(result.stdout)
-            assert data["sequences"] == 0
-
-    def test_permission_denied(self, cli, tmp_path):
-        """Handle permission denied."""
-        # This test may be skipped on systems where we can't change permissions
-        protected = tmp_path / "protected.fa"
-        protected.write_text(">chr1\nACGT\n")
-
-        import os
-        import stat
-
-        try:
-            os.chmod(protected, 0o000)
-            result = cli("fasta", "digest", str(protected))
-            assert result.exit_code != 0
-        finally:
-            os.chmod(protected, stat.S_IRUSR | stat.S_IWUSR)
+        assert result.exit_code == 0
+        rgsi_path = sample_fasta.parent / f"{sample_fasta.stem}.rgsi"
+        assert rgsi_path.exists()
+
+        content = rgsi_path.read_text()
+        assert "##seqcol_digest=" in content
+        assert "#name\tlength\talphabet\tsha512t24u\tmd5\tdescription" in content
+
+        data_lines = [l for l in content.strip().split("\n") if not l.startswith("#")]
+        assert len(data_lines) == 2  # sample_fasta has 2 sequences
+
+        # Verify first sequence
+        cols = data_lines[0].split("\t")
+        assert len(cols) == 6
+        assert cols[0] == "chr1"
+        assert cols[1] == "8"
+
+    def test_rgsi_custom_output(self, cli, sample_fasta, tmp_path):
+        """Writes to a custom output path with -o."""
+        custom_output = tmp_path / "custom.rgsi"
+        result = cli("fasta", "rgsi", str(sample_fasta), "-o", str(custom_output))
+
+        assert result.exit_code == 0
+        assert custom_output.exists()
+
+
+class TestFastaRgci:
+    """Tests for: refget fasta rgci <file>"""
+
+    def test_rgci_format_and_digest(self, cli, sample_fasta):
+        """Creates .rgci with correct columns, and digest matches fasta digest."""
+        # Get expected digest
+        digest_result = cli("fasta", "digest", str(sample_fasta))
+        expected_digest = json.loads(digest_result.stdout)["digest"]
+
+        # Generate RGCI
+        result = cli("fasta", "rgci", str(sample_fasta))
+        assert result.exit_code == 0
+
+        rgci_path = sample_fasta.parent / f"{sample_fasta.stem}.rgci"
+        content = rgci_path.read_text()
+        lines = content.strip().split("\n")
+
+        # Header has 8 columns
+        header_cols = lines[0].lstrip("#").split("\t")
+        assert len(header_cols) == 8
+        assert header_cols[0] == "digest"
+
+        # Data row: correct column count, digest matches, n_sequences correct
+        data_cols = lines[1].split("\t")
+        assert len(data_cols) == 8
+        assert data_cols[0] == expected_digest
+        assert data_cols[1] == "2"  # sample_fasta has 2 sequences

From 49529744370dbdd75e16e329049bd42c17f84d6e Mon Sep 17 00:00:00 2001
From: nsheff <nsheff@users.noreply.github.com>
Date: Tue, 3 Mar 2026 14:27:17 -0500
Subject: [PATCH 16/31] Add inventory_genomes.py for brickyard FASTA inventory

Stdlib-only script that walks the brickyard directory tree, extracts
metadata (accession, group, source, build), cross-references PEP,
and outputs refgenomes_inventory.csv.
---
 changelog.md                                  |  64 +++++++
 .../ref-genome-analysis/inventory_genomes.py  | 179 ++++++++++++++++++
 refget/cli/store.py                           |   1 +
 tests/conftest.py                             |   2 +-
 tests/test_cli/test_store_commands.py         |   2 +-
 5 files changed, 246 insertions(+), 2 deletions(-)
 create mode 100644 changelog.md
 create mode 100644 data_loaders/ref-genome-analysis/inventory_genomes.py

diff --git a/changelog.md b/changelog.md
new file mode 100644
index 0000000..65c3702
--- /dev/null
+++ b/changelog.md
@@ -0,0 +1,64 @@
+# Changelog
+
+All notable changes to the refget package will be documented in this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+
+## [0.11.0] - 2026-02-28
+
+This is a major release with significant restructuring, new features, and improved tooling.
+
+### Added
+
+- **CLI overhaul**: New `refget` CLI built with Typer, including subcommands for `store`, `seqcol`, `fasta`, `config`, and `admin`
+- **Local store**: `refget store pull` command to pull sequence collections from remote servers to a local store
+- **FASTA digesting**: `refget fasta digest` CLI command for computing sequence collection digests from FASTA files
+- **Sequence collection similarities**: `calc_similarities` and `calc_similarities_from_json` functions with Jaccard similarity metrics and API endpoint
+- **FASTA DRS objects**: `FastaDrsObject` model for serving FASTA files via DRS endpoints
+- **Comparison interpreter**: Local sequence collection comparison interpretation module (SCIM)
+- **Species filtering**: Filter similarities endpoint by species
+- **Human-readable names**: `human_readable_name` field on `SequenceCollection` model
+- **Pydantic API models**: Structured response models for API endpoints (fixes #33)
+- **Swagger documentation**: API query parameter documentation
+- **Frontend features**: Strip plots, one-to-many comparison view, FASTA digest tool, species selector, SCIM integration, dynamic version display
+- **Compliance testing**: Comprehensive API compliance test suite
+- **Integration test framework**: New integration test infrastructure with ephemeral databases
+- **CLI test suite**: Extensive CLI tests covering store, seqcol, fasta, config, admin, and help commands
+- **Service info**: `/service-info` endpoints for fasta_drs and refget_store features
+- **Attribute listing**: `/list/attributes` endpoint per GA4GH paging guide
+- **Bulk query**: Preload and bulk query support for sequence collections
+- **R package**: First pass at `refget-r` R bindings (experimental)
+
+### Changed
+
+- **Switched to gtars**: Replaced pyfaidx and henge with gtars for FASTA parsing and digest computation
+- **Major code restructure**: Consolidated schemas, reorganized modules, reduced code duplication
+- **Improved error messages**: Better dependency error messages (fixes #49), clearer import errors
+- **Performance optimizations**: Faster level 2 retrieval using `get_many`, optimized similarity calculations
+- **Updated GA4GH compliance**: Aligned with latest refget sequence collections specification
+- **Schema consolidation**: Single unified schema replacing multiple schema files
+- **Collated attribute validation**: Validation for collated attributes in sequence collections
+- **Frontend overhaul**: Updated comparison view, heatmap aliases, loading states, error handling
+
+### Removed
+
+- **Henge dependency**: Removed henge and biopython requirements
+- **Legacy code**: Removed old flags code, duplicate functions, unused yacman imports
+
+### Fixed
+
+- `from_PySequenceCollection` construction and associated tests
+- Circular dependency import issues in utilities
+- Level 1 model representation
+- Comparison links
+- Cancel handling in frontend
+- Various linting and type hint improvements
+
+### Security
+
+- Bumped frontend dependencies: vite, minimatch, rollup, esbuild, js-yaml, vega
+
+## [0.10.1] - 2025-06-01
+
+Previous release. See git history for details.
diff --git a/data_loaders/ref-genome-analysis/inventory_genomes.py b/data_loaders/ref-genome-analysis/inventory_genomes.py
new file mode 100644
index 0000000..d741601
--- /dev/null
+++ b/data_loaders/ref-genome-analysis/inventory_genomes.py
@@ -0,0 +1,179 @@
+#!/usr/bin/env python3
+"""
+Inventory all FASTA files in the brickyard refgenomes directory.
+
+Walks the brickyard directory tree, extracts structured metadata from paths
+and filenames, cross-references against the PEP project, and produces a
+master CSV inventory.
+
+Zero non-stdlib dependencies.
+
+Usage:
+    python inventory_genomes.py
+    python inventory_genomes.py --dry-run --no-pep
+    python inventory_genomes.py --root /tmp/mock_brickyard --dry-run --no-pep
+"""
+
+import argparse
+import csv
+import json
+import os
+import os.path
+import pathlib
+import re
+import sys
+import urllib.error
+import urllib.request
+
+BRICKYARD_ROOT = "/project/shefflab/brickyard/datasets_downloaded/refgenomes_fasta"
+PEP_URL = "https://pephub-api.databio.org/api/v1/projects/donaldcampbelljr/human_mouse_fasta_brickyard/samples?tag=default"
+OUTPUT_FILE = os.path.join(BRICKYARD_ROOT, "refgenomes_inventory.csv")
+FASTA_EXTENSIONS = {".fa", ".fa.gz", ".fna", ".fna.gz", ".fasta", ".fasta.gz"}
+ACCESSION_PATTERN = re.compile(r"(GC[AF]_\d+\.\d+)")
+
+
+def fetch_pep_samples():
+    """Fetch PEP samples from the PEPHub API.
+
+    Returns a dict mapping absolute fasta path to sample_name.
+    Falls back to an empty dict if the API is unreachable.
+    """
+    try:
+        with urllib.request.urlopen(PEP_URL) as response:
+            data = json.loads(response.read().decode("utf-8"))
+        lookup = {}
+        for item in data.get("items", []):
+            fasta_path = item.get("fasta", "")
+            sample_name = item.get("sample_name", "")
+            if fasta_path:
+                lookup[fasta_path] = sample_name
+        print(f"Fetched {len(lookup)} PEP samples.", file=sys.stderr)
+        return lookup
+    except urllib.error.URLError as e:
+        print(f"Warning: Could not fetch PEP samples: {e}", file=sys.stderr)
+        return {}
+
+
+def walk_fasta_files(root):
+    """Walk the directory tree and yield absolute paths of FASTA files."""
+    for dirpath, _dirnames, filenames in os.walk(root):
+        for name in filenames:
+            if any(name.endswith(ext) for ext in FASTA_EXTENSIONS):
+                yield os.path.join(dirpath, name)
+
+
+def extract_metadata(filepath, root):
+    """Extract structured metadata from a FASTA file path.
+
+    Returns a dict with: path, filename, accession, group, source, build.
+    """
+    filename = os.path.basename(filepath)
+    match = ACCESSION_PATTERN.search(filename)
+    accession = match.group(1) if match else ""
+
+    rel = os.path.relpath(filepath, root)
+    parts = pathlib.PurePosixPath(rel).parts
+    # parts[0] = group, parts[1] = source, parts[2] = build (or subdir), parts[-1] = filename
+    group = parts[0] if len(parts) > 1 else ""
+    source = parts[1] if len(parts) > 2 else ""
+    build = parts[2] if len(parts) > 3 else ""
+
+    return {
+        "path": filepath,
+        "filename": filename,
+        "accession": accession,
+        "group": group,
+        "source": source,
+        "build": build,
+    }
+
+
+def add_pep_info(record, pep_lookup):
+    """Add PEP sample name to a record if it exists in the lookup."""
+    record["pep_sample_name"] = pep_lookup.get(record["path"], "")
+
+
+def write_inventory(records, output_path):
+    """Write the inventory records to a CSV file."""
+    fieldnames = ["path", "filename", "accession", "group", "source", "build", "pep_sample_name"]
+    with open(output_path, "w", newline="") as f:
+        writer = csv.DictWriter(f, fieldnames=fieldnames)
+        writer.writeheader()
+        writer.writerows(records)
+    print(f"Wrote {len(records)} records to {output_path}", file=sys.stderr)
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Inventory FASTA files in the brickyard refgenomes directory."
+    )
+    parser.add_argument(
+        "--root",
+        default=BRICKYARD_ROOT,
+        help=f"Root directory to scan (default: {BRICKYARD_ROOT})",
+    )
+    parser.add_argument(
+        "--output",
+        default=None,
+        help=f"Output CSV path (default: <root>/refgenomes_inventory.csv)",
+    )
+    parser.add_argument(
+        "--dry-run",
+        action="store_true",
+        help="Print the first 10 rows to stdout instead of writing CSV.",
+    )
+    parser.add_argument(
+        "--no-pep",
+        action="store_true",
+        help="Skip PEP fetching (useful for offline HPC nodes).",
+    )
+    args = parser.parse_args()
+
+    root = args.root
+    output_path = args.output if args.output else os.path.join(root, "refgenomes_inventory.csv")
+
+    # Step 1: Fetch PEP samples
+    if args.no_pep:
+        pep_lookup = {}
+        print("Skipping PEP fetch (--no-pep).", file=sys.stderr)
+    else:
+        pep_lookup = fetch_pep_samples()
+
+    # Step 2: Walk and collect FASTA files
+    print(f"Scanning {root} ...", file=sys.stderr)
+    records = []
+    for filepath in walk_fasta_files(root):
+        record = extract_metadata(filepath, root)
+        add_pep_info(record, pep_lookup)
+        records.append(record)
+
+    # Step 3: Sort for deterministic output
+    records.sort(key=lambda r: r["path"])
+
+    # Step 4: Output
+    if args.dry_run:
+        fieldnames = ["path", "filename", "accession", "group", "source", "build", "pep_sample_name"]
+        writer = csv.DictWriter(sys.stdout, fieldnames=fieldnames)
+        writer.writeheader()
+        for row in records[:10]:
+            writer.writerow(row)
+    else:
+        write_inventory(records, output_path)
+
+    # Step 5: Summary stats
+    total = len(records)
+    with_accession = sum(1 for r in records if r["accession"])
+    in_pep = sum(1 for r in records if r["pep_sample_name"])
+    unique_groups = len({r["group"] for r in records if r["group"]})
+    unique_sources = len({r["source"] for r in records if r["source"]})
+
+    print(f"\nSummary:", file=sys.stderr)
+    print(f"  Total FASTA files: {total}", file=sys.stderr)
+    print(f"  Files with accessions: {with_accession}", file=sys.stderr)
+    print(f"  Files in PEP: {in_pep}", file=sys.stderr)
+    print(f"  Unique groups: {unique_groups}", file=sys.stderr)
+    print(f"  Unique sources: {unique_sources}", file=sys.stderr)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/refget/cli/store.py b/refget/cli/store.py
index 9334630..6b97082 100644
--- a/refget/cli/store.py
+++ b/refget/cli/store.py
@@ -488,6 +488,7 @@ def pull(
     ),
     remote: Optional[str] = typer.Option(
         None,
+        "--server",
         "--remote",
         "-r",
         help="Remote store URL (default: try configured remote_stores)",
diff --git a/tests/conftest.py b/tests/conftest.py
index 536bc8c..1d26369 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -38,7 +38,7 @@
 @pytest.fixture
 def runner():
     """Typer CLI test runner."""
-    return CliRunner()
+    return CliRunner(mix_stderr=False)
 
 
 @pytest.fixture
diff --git a/tests/test_cli/test_store_commands.py b/tests/test_cli/test_store_commands.py
index 120a53d..5544151 100644
--- a/tests/test_cli/test_store_commands.py
+++ b/tests/test_cli/test_store_commands.py
@@ -502,7 +502,7 @@ def test_metadata_no_fhr_set(self, cli, tmp_path):
         result = cli("store", "metadata", digest, "--path", str(store_path))
 
         assert result.exit_code != 0
-        assert "No FHR metadata" in result.stdout
+        assert "No FHR metadata" in result.stderr
 
     def test_metadata_set_from_json_file(self, cli, tmp_path):
         """Happy path: set FHR metadata from a JSON file."""

From c49e55f4c6c3002537d8e53bca4e2971fbf424bd Mon Sep 17 00:00:00 2001
From: nsheff <nsheff@users.noreply.github.com>
Date: Tue, 3 Mar 2026 15:50:01 -0500
Subject: [PATCH 17/31] parallel encoding for loading fasta

---
 changelog.md                                  |  64 ---------
 .../ref-genome-analysis/build_refgetstore.py  | 129 ++++++++++++++++++
 .../riva_pangenome_analysis/README.md         |  28 +++-
 refget/cli/store.py                           |  10 +-
 4 files changed, 162 insertions(+), 69 deletions(-)
 delete mode 100644 changelog.md
 create mode 100644 data_loaders/ref-genome-analysis/build_refgetstore.py

diff --git a/changelog.md b/changelog.md
deleted file mode 100644
index 65c3702..0000000
--- a/changelog.md
+++ /dev/null
@@ -1,64 +0,0 @@
-# Changelog
-
-All notable changes to the refget package will be documented in this file.
-
-The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
-and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
-
-## [0.11.0] - 2026-02-28
-
-This is a major release with significant restructuring, new features, and improved tooling.
-
-### Added
-
-- **CLI overhaul**: New `refget` CLI built with Typer, including subcommands for `store`, `seqcol`, `fasta`, `config`, and `admin`
-- **Local store**: `refget store pull` command to pull sequence collections from remote servers to a local store
-- **FASTA digesting**: `refget fasta digest` CLI command for computing sequence collection digests from FASTA files
-- **Sequence collection similarities**: `calc_similarities` and `calc_similarities_from_json` functions with Jaccard similarity metrics and API endpoint
-- **FASTA DRS objects**: `FastaDrsObject` model for serving FASTA files via DRS endpoints
-- **Comparison interpreter**: Local sequence collection comparison interpretation module (SCIM)
-- **Species filtering**: Filter similarities endpoint by species
-- **Human-readable names**: `human_readable_name` field on `SequenceCollection` model
-- **Pydantic API models**: Structured response models for API endpoints (fixes #33)
-- **Swagger documentation**: API query parameter documentation
-- **Frontend features**: Strip plots, one-to-many comparison view, FASTA digest tool, species selector, SCIM integration, dynamic version display
-- **Compliance testing**: Comprehensive API compliance test suite
-- **Integration test framework**: New integration test infrastructure with ephemeral databases
-- **CLI test suite**: Extensive CLI tests covering store, seqcol, fasta, config, admin, and help commands
-- **Service info**: `/service-info` endpoints for fasta_drs and refget_store features
-- **Attribute listing**: `/list/attributes` endpoint per GA4GH paging guide
-- **Bulk query**: Preload and bulk query support for sequence collections
-- **R package**: First pass at `refget-r` R bindings (experimental)
-
-### Changed
-
-- **Switched to gtars**: Replaced pyfaidx and henge with gtars for FASTA parsing and digest computation
-- **Major code restructure**: Consolidated schemas, reorganized modules, reduced code duplication
-- **Improved error messages**: Better dependency error messages (fixes #49), clearer import errors
-- **Performance optimizations**: Faster level 2 retrieval using `get_many`, optimized similarity calculations
-- **Updated GA4GH compliance**: Aligned with latest refget sequence collections specification
-- **Schema consolidation**: Single unified schema replacing multiple schema files
-- **Collated attribute validation**: Validation for collated attributes in sequence collections
-- **Frontend overhaul**: Updated comparison view, heatmap aliases, loading states, error handling
-
-### Removed
-
-- **Henge dependency**: Removed henge and biopython requirements
-- **Legacy code**: Removed old flags code, duplicate functions, unused yacman imports
-
-### Fixed
-
-- `from_PySequenceCollection` construction and associated tests
-- Circular dependency import issues in utilities
-- Level 1 model representation
-- Comparison links
-- Cancel handling in frontend
-- Various linting and type hint improvements
-
-### Security
-
-- Bumped frontend dependencies: vite, minimatch, rollup, esbuild, js-yaml, vega
-
-## [0.10.1] - 2025-06-01
-
-Previous release. See git history for details.
diff --git a/data_loaders/ref-genome-analysis/build_refgetstore.py b/data_loaders/ref-genome-analysis/build_refgetstore.py
new file mode 100644
index 0000000..7f3e09a
--- /dev/null
+++ b/data_loaders/ref-genome-analysis/build_refgetstore.py
@@ -0,0 +1,129 @@
+"""
+Build a RefgetStore from the refgenomes inventory CSV.
+
+Reads refgenomes_inventory.csv and populates a RefgetStore with all FASTA
+files. No alias registration -- that is a separate, deliberate step.
+
+Usage:
+    python build_refgetstore.py [--inventory PATH] [--store-path PATH] [--output PATH] [--limit N]
+"""
+
+import argparse
+import csv
+import sys
+import time
+
+from refget.store import RefgetStore
+
+STORE_PATH = "/project/shefflab/brickyard/refget_store"
+INVENTORY_CSV = "/project/shefflab/brickyard/datasets_downloaded/refgenomes_fasta/refgenomes_inventory.csv"
+OUTPUT_CSV = "digest_map.csv"
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description="Build RefgetStore from inventory CSV")
+    parser.add_argument("--inventory", default=INVENTORY_CSV, help="Input inventory CSV")
+    parser.add_argument("--store-path", default=STORE_PATH, help="RefgetStore path")
+    parser.add_argument("--output", default=OUTPUT_CSV, help="Output digest map CSV")
+    parser.add_argument("--limit", type=int, default=None, help="Process only first N rows (for testing)")
+    return parser.parse_args()
+
+
+def read_inventory(csv_path):
+    """Read inventory CSV and return list of row dicts."""
+    rows = []
+    with open(csv_path, newline="") as f:
+        reader = csv.DictReader(f)
+        if reader.fieldnames is None:
+            print(f"ERROR: {csv_path} appears to be empty", file=sys.stderr)
+            sys.exit(1)
+        if "path" not in reader.fieldnames:
+            print(f"ERROR: {csv_path} missing required 'path' column", file=sys.stderr)
+            sys.exit(1)
+        for row in reader:
+            rows.append(row)
+    return rows
+
+
+def write_digest_map(output_path, results):
+    """Write results to digest_map.csv."""
+    fieldnames = ["path", "filename", "digest", "n_sequences", "was_new", "error"]
+    with open(output_path, "w", newline="") as f:
+        writer = csv.DictWriter(f, fieldnames=fieldnames)
+        writer.writeheader()
+        writer.writerows(results)
+
+
+def main():
+    args = parse_args()
+
+    inventory = read_inventory(args.inventory)
+    if args.limit:
+        inventory = inventory[:args.limit]
+        print(f"Limited to first {args.limit} records")
+    total = len(inventory)
+    print(f"Processing {total} records from {args.inventory}")
+
+    store = RefgetStore.on_disk(args.store_path)
+    store.set_quiet(True)
+    print(f"Store initialized at {args.store_path}")
+
+    results = []
+    n_success = 0
+    n_failed = 0
+    n_new = 0
+    t_start = time.time()
+
+    for i, row in enumerate(inventory, 1):
+        fasta_path = row["path"]
+        filename = row.get("filename", "")
+
+        t0 = time.time()
+        print(f"[{i}/{total}] {filename}...", end=" ", flush=True)
+
+        try:
+            meta, was_new = store.add_sequence_collection_from_fasta(fasta_path)
+            elapsed = time.time() - t0
+            status = "NEW" if was_new else "exists"
+            if was_new:
+                n_new += 1
+            print(f"{meta.digest} ({meta.n_sequences} seqs, {status}, {elapsed:.1f}s)")
+            n_success += 1
+            results.append({
+                "path": fasta_path,
+                "filename": filename,
+                "digest": meta.digest,
+                "n_sequences": meta.n_sequences,
+                "was_new": was_new,
+                "error": "",
+            })
+        except Exception as e:
+            error_msg = f"{type(e).__name__}: {e}"
+            print(f"FAILED: {error_msg}")
+            n_failed += 1
+            results.append({
+                "path": fasta_path,
+                "filename": filename,
+                "digest": "",
+                "n_sequences": 0,
+                "was_new": False,
+                "error": error_msg,
+            })
+
+    write_digest_map(args.output, results)
+
+    total_time = time.time() - t_start
+    print(f"\nDone in {total_time:.1f}s. {n_success}/{total} succeeded, {n_new} new, {n_failed} failed.")
+    print(f"Digest map written to {args.output}")
+    print(f"\nStore stats: {store.stats()}")
+
+    if n_failed > 0:
+        print(f"\nFailed files:")
+        for r in results:
+            if r["error"]:
+                print(f"  {r['filename']}: {r['error']}")
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/data_loaders/riva_pangenome_analysis/README.md b/data_loaders/riva_pangenome_analysis/README.md
index 9ecce89..3acd622 100644
--- a/data_loaders/riva_pangenome_analysis/README.md
+++ b/data_loaders/riva_pangenome_analysis/README.md
@@ -1,5 +1,23 @@
 # RIVA Pangenome RefgetStore
 
+## Prep
+
+
+```sh
+# Build gtars
+cd ~/code/gtars
+git checkout refgetstore
+git pull
+cd gtars-python
+python -m pip install -e .
+
+# Next, install local refget:
+cd ~/code/refget
+git checkout dev
+git pull
+python -m pip install -e .
+```
+
 ## Build the store
 
 ```python
@@ -27,7 +45,7 @@ import os
 from pathlib import Path
 from refget.store import RefgetStore
 
-store_dir = Path(os.path.expandvars("$BRICKYARD/datasets_downloaded/pangenome_fasta/refget_store2"))
+store_dir = Path(os.path.expandvars("$BRICKYARD/datasets_downloaded/pangenome_fasta/refget_store"))
 
 store = RefgetStore.on_disk(str(store_dir))
 
@@ -35,10 +53,12 @@ store.list_collections()
 cm = store.get_collection_metadata("s0nMiOFHPsIBrm2bd3PkzWXKLKWQZq70")
 
 
-EXAMPLE_COLLECTION = "0aHV7I-94paL9Z1H4LNlqsW3WxJhlou5"
-EXAMPLE_SEQ_NAME = "JAGYVX010000006.1 unmasked:primary_assembly HG03540.pri.mat.f1_v2:JAGYVX010000006.1:1:96320881:1"
+EXAMPLE_COLLECTION = "L5fggdWYz5tCr4v8XbPYoOwv79Sqmf1W"
+EXAMPLE_SEQ_NAME = "JAGYVI010000261.1"
+
+
+record = store.get_sequence_by_name(EXAMPLE_COLLECTION, EXAMPLE_SEQ_NAME)
 
-record = store.get_sequence_by_collection_and_name(EXAMPLE_COLLECTION, EXAMPLE_SEQ_NAME)
 
 
 ## Upload to S3
diff --git a/refget/cli/store.py b/refget/cli/store.py
index 6b97082..d2eb603 100644
--- a/refget/cli/store.py
+++ b/refget/cli/store.py
@@ -190,6 +190,12 @@ def add(
         "-q",
         help="Suppress progress output",
     ),
+    threads: Optional[int] = typer.Option(
+        None,
+        "--threads",
+        "-t",
+        help="Number of threads for parallel encoding (default: all CPUs)",
+    ),
 ) -> None:
     """
     Import a FASTA file to the local store.
@@ -220,7 +226,9 @@ def add(
             store.set_encoding_mode(StorageMode.Encoded)
 
     # Add the FASTA file - returns (metadata, was_new) with all info we need
-    metadata, was_new = store.add_sequence_collection_from_fasta(str(fasta.resolve()))
+    metadata, was_new = store.add_sequence_collection_from_fasta(
+        str(fasta.resolve()), threads=threads
+    )
 
     print_json(
         {

From ba6f35f2a3af69561d7e19195b466737760c8c27 Mon Sep 17 00:00:00 2001
From: nsheff <nsheff@users.noreply.github.com>
Date: Tue, 3 Mar 2026 21:43:18 -0500
Subject: [PATCH 18/31] add explorer, store builders

---
 data_loaders/load_demo_seqcols.py             |   2 +-
 .../process-all-genomes.sbatch                |  15 +
 .../ref-genome-analysis/verify_refgetstore.py | 444 ++++++++++++++++++
 frontend/src/components/CliSnippet.jsx        | 136 ++++++
 frontend/src/components/StoreNav.jsx          | 222 +++++++++
 frontend/src/main.jsx                         |  38 ++
 frontend/src/pages/StoreAliases.jsx           | 198 ++++++++
 frontend/src/pages/StoreCollection.jsx        | 279 +++++++++++
 frontend/src/pages/StoreExplorer.jsx          | 135 ++++++
 frontend/src/pages/StoreOverview.jsx          | 309 ++++++++++++
 frontend/src/pages/StoreSequences.jsx         | 337 +++++++++++++
 frontend/src/services/fetchData.jsx           |  28 +-
 frontend/src/services/storeService.js         | 217 +++++++++
 frontend/src/stores/explorerStore.js          | 113 +++++
 14 files changed, 2462 insertions(+), 11 deletions(-)
 create mode 100644 data_loaders/ref-genome-analysis/process-all-genomes.sbatch
 create mode 100644 data_loaders/ref-genome-analysis/verify_refgetstore.py
 create mode 100644 frontend/src/components/CliSnippet.jsx
 create mode 100644 frontend/src/components/StoreNav.jsx
 create mode 100644 frontend/src/pages/StoreAliases.jsx
 create mode 100644 frontend/src/pages/StoreCollection.jsx
 create mode 100644 frontend/src/pages/StoreExplorer.jsx
 create mode 100644 frontend/src/pages/StoreOverview.jsx
 create mode 100644 frontend/src/pages/StoreSequences.jsx
 create mode 100644 frontend/src/services/storeService.js
 create mode 100644 frontend/src/stores/explorerStore.js

diff --git a/data_loaders/load_demo_seqcols.py b/data_loaders/load_demo_seqcols.py
index 21c9499..cb49246 100644
--- a/data_loaders/load_demo_seqcols.py
+++ b/data_loaders/load_demo_seqcols.py
@@ -19,7 +19,7 @@
 DEMO_FASTA = json.load(open("test_fasta/test_fasta_digests.json"))
 
 # Storage locations from environment (if set, will upload; otherwise use demo defaults with skip_upload)
-ENV_STORAGE = json.loads(os.environ.get("FASTA_STORAGE_LOCATIONS", "[]"))
+ENV_STORAGE = json.loads(os.environ.get("FASTA_STORAGE_LOCATIONS") or "[]")
 if ENV_STORAGE:
     DEMO_STORAGE = ENV_STORAGE
     SKIP_UPLOAD = False
diff --git a/data_loaders/ref-genome-analysis/process-all-genomes.sbatch b/data_loaders/ref-genome-analysis/process-all-genomes.sbatch
new file mode 100644
index 0000000..28f3de7
--- /dev/null
+++ b/data_loaders/ref-genome-analysis/process-all-genomes.sbatch
@@ -0,0 +1,15 @@
+#!/bin/bash
+#SBATCH --job-name=refgetstore
+#SBATCH --output=refgetstore_%j.log
+#SBATCH --error=refgetstore_%j.log
+#SBATCH --partition=standard
+#SBATCH --time=24:00:00
+#SBATCH --mem=16G
+#SBATCH --cpus-per-task=4
+#SBATCH --account=shefflab
+
+module load miniforge/24.3.0-py3.11
+
+cd /project/shefflab/brickyard/datasets_downloaded/refgenomes_fasta/refget/data_loaders/ref-genome-analysis
+
+python build_refgetstore.py --store-path /project/shefflab/brickyard/refget_store
diff --git a/data_loaders/ref-genome-analysis/verify_refgetstore.py b/data_loaders/ref-genome-analysis/verify_refgetstore.py
new file mode 100644
index 0000000..fc53a59
--- /dev/null
+++ b/data_loaders/ref-genome-analysis/verify_refgetstore.py
@@ -0,0 +1,444 @@
+#!/usr/bin/env python3
+"""
+Verification script for the brickyard RefgetStore.
+
+Runs automated checks against the store at STORE_PATH and produces a
+structured pass/fail report. Designed to work with a partial store
+(not all files loaded yet) and without aliases (alias registration
+has not been done yet).
+
+Usage:
+    python verify_refgetstore.py
+    python verify_refgetstore.py --store-path /alt/path --limit 5
+
+Expected results (update after first successful run):
+- collections: ~XXX unique (out of ~1,147 input FASTAs processed so far)
+- sequences: ~XXX unique
+- roundtrip digest match: PASS for at least one collection
+"""
+
+import argparse
+import csv
+import json
+import os
+import subprocess
+import sys
+import tempfile
+import time
+
+STORE_PATH = "/project/shefflab/brickyard/refget_store"
+INVENTORY_CSV = "/project/shefflab/brickyard/datasets_downloaded/refgenomes_fasta/refgenomes_inventory.csv"
+DIGEST_MAP_CSV = "/home/nsheff/Dropbox/workspaces/refgenie/repos/refget/data_loaders/ref-genome-analysis/digest_map.csv"
+
+results = []
+
+
+def check(name, passed, detail=""):
+    """Record and print a check result."""
+    status = "PASS" if passed else "FAIL"
+    results.append({"name": name, "status": status, "detail": detail})
+    print(f"[{status}] {name}" + (f" -- {detail}" if detail else ""))
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description="Verify brickyard RefgetStore")
+    parser.add_argument("--store-path", default=STORE_PATH, help="RefgetStore path")
+    parser.add_argument("--inventory", default=INVENTORY_CSV, help="Inventory CSV path")
+    parser.add_argument("--digest-map", default=DIGEST_MAP_CSV, help="Digest map CSV path")
+    parser.add_argument(
+        "--limit",
+        type=int,
+        default=3,
+        help="Number of collections to test for round-trip export (default: 3)",
+    )
+    parser.add_argument(
+        "--skip-roundtrip",
+        action="store_true",
+        help="Skip round-trip FASTA export checks (slow for large genomes)",
+    )
+    return parser.parse_args()
+
+
+# ── Check 1: Store opens and stats are valid ───────────────────────────
+
+
+def check_store_opens(store_path):
+    """Open the store and verify basic stats."""
+    try:
+        from refget.store import RefgetStore
+
+        store = RefgetStore.open_local(store_path)
+        check("store_opens", True, f"path={store_path}")
+    except Exception as e:
+        check("store_opens", False, f"path={store_path}, error={e}")
+        return None
+
+    # Count collections and sequences
+    try:
+        collections = list(store.list_collections())
+        n_collections = len(collections)
+    except Exception as e:
+        check("list_collections", False, f"error={e}")
+        n_collections = 0
+
+    try:
+        sequences = list(store.list_sequences())
+        n_sequences = len(sequences)
+    except Exception as e:
+        check("list_sequences", False, f"error={e}")
+        n_sequences = 0
+
+    check("collections_nonzero", n_collections > 0, f"collections={n_collections}")
+    check("sequences_nonzero", n_sequences > 0, f"sequences={n_sequences}")
+
+    # Stats object
+    try:
+        stats = store.stats()
+        check("stats_callable", True, f"stats={stats}")
+    except Exception as e:
+        check("stats_callable", False, f"error={e}")
+
+    return store
+
+
+# ── Check 2: Digest map coverage ──────────────────────────────────────
+
+
+def check_digest_map(store, digest_map_path):
+    """Verify that digests in the digest map are present in the store."""
+    if not os.path.exists(digest_map_path):
+        check("digest_map_exists", False, f"not found: {digest_map_path}")
+        return
+
+    check("digest_map_exists", True, f"path={digest_map_path}")
+
+    # Read digest map
+    rows = []
+    with open(digest_map_path, newline="") as f:
+        reader = csv.DictReader(f)
+        for row in reader:
+            rows.append(row)
+
+    total = len(rows)
+    with_digest = [r for r in rows if r.get("digest")]
+    with_error = [r for r in rows if r.get("error")]
+
+    check(
+        "digest_map_stats",
+        len(with_digest) > 0,
+        f"total_rows={total}, with_digest={len(with_digest)}, with_error={len(with_error)}",
+    )
+
+    # Get store collection digests for comparison
+    store_digests = {meta.digest for meta in store.list_collections()}
+
+    # Check how many digest_map digests are in the store
+    matched = 0
+    missing = []
+    for row in with_digest:
+        d = row["digest"]
+        if d in store_digests:
+            matched += 1
+        else:
+            missing.append(d[:16] + "...")
+
+    check(
+        "digest_map_coverage",
+        matched == len(with_digest),
+        f"in_store={matched}/{len(with_digest)}"
+        + (f", missing_sample={missing[:5]}" if missing else ""),
+    )
+
+
+# ── Check 3: Collection level2 data integrity ─────────────────────────
+
+
+def check_level2_integrity(store, n_to_check=3):
+    """Verify level2 data for a sample of collections."""
+    collections = list(store.list_collections())
+    if not collections:
+        check("level2_integrity", False, "no collections to check")
+        return
+
+    sample = collections[:n_to_check]
+    all_ok = True
+    details = []
+
+    for meta in sample:
+        digest = meta.digest
+        try:
+            level2 = store.get_collection_level2(digest)
+            names = level2.get("names", [])
+            lengths = level2.get("lengths", [])
+            sequences = level2.get("sequences", [])
+
+            arrays_ok = (
+                len(names) == len(lengths) == len(sequences) and len(names) > 0
+            )
+            lengths_ok = all(l > 0 for l in lengths) if lengths else False
+
+            if not arrays_ok or not lengths_ok:
+                all_ok = False
+                details.append(
+                    f"{digest[:16]}: names={len(names)} lengths={len(lengths)} "
+                    f"sequences={len(sequences)} lengths_positive={lengths_ok}"
+                )
+            else:
+                details.append(
+                    f"{digest[:16]}: {len(names)} seqs, OK"
+                )
+        except Exception as e:
+            all_ok = False
+            details.append(f"{digest[:16]}: ERROR {e}")
+
+    check(
+        "level2_arrays_valid",
+        all_ok,
+        f"checked={len(sample)}, results=[{'; '.join(details)}]",
+    )
+
+
+# ── Check 4: Round-trip FASTA export and digest comparison ─────────────
+
+
+def check_roundtrip_export(store, store_path, digest_map_path, inventory_path, limit=3):
+    """Export FASTAs from the store and compare digests to originals."""
+    try:
+        from gtars.refget import digest_fasta
+    except ImportError:
+        check("roundtrip_export", False, "gtars.refget.digest_fasta not available")
+        return
+
+    # Build a mapping from digest -> original path using digest_map + inventory
+    digest_to_original = {}
+
+    if os.path.exists(digest_map_path) and os.path.exists(inventory_path):
+        # Read inventory to get path -> accession mapping (for reference)
+        inv_lookup = {}
+        with open(inventory_path, newline="") as f:
+            for row in csv.DictReader(f):
+                inv_lookup[row["path"]] = row
+
+        # Read digest_map to get digest -> path mapping
+        with open(digest_map_path, newline="") as f:
+            for row in csv.DictReader(f):
+                if row.get("digest") and row.get("path"):
+                    # Only keep the first mapping per digest (avoid duplicates)
+                    if row["digest"] not in digest_to_original:
+                        digest_to_original[row["digest"]] = row["path"]
+
+    if not digest_to_original:
+        check("roundtrip_export", False, "no digest-to-path mappings found")
+        return
+
+    # Pick a sample of collections that have original files
+    collections = list(store.list_collections())
+    test_pairs = []
+    for meta in collections:
+        if meta.digest in digest_to_original:
+            original_path = digest_to_original[meta.digest]
+            if os.path.exists(original_path):
+                test_pairs.append((meta.digest, original_path))
+                if len(test_pairs) >= limit:
+                    break
+
+    if not test_pairs:
+        check("roundtrip_export", False, "no original FASTA files accessible for comparison")
+        return
+
+    all_match = True
+    details = []
+
+    for digest, original_path in test_pairs:
+        fd, tmp_path = tempfile.mkstemp(suffix=".fa")
+        os.close(fd)
+        try:
+            store.export_fasta(digest, tmp_path, None, 80)
+
+            exported_sc = digest_fasta(tmp_path)
+            original_sc = digest_fasta(original_path)
+
+            match = exported_sc.digest == original_sc.digest
+            if not match:
+                all_match = False
+            basename = os.path.basename(original_path)
+            details.append(
+                f"{basename}: {'MATCH' if match else 'MISMATCH'} "
+                f"(exported={exported_sc.digest[:16]}... "
+                f"original={original_sc.digest[:16]}...)"
+            )
+        except Exception as e:
+            all_match = False
+            basename = os.path.basename(original_path)
+            details.append(f"{basename}: ERROR {e}")
+        finally:
+            if os.path.exists(tmp_path):
+                os.unlink(tmp_path)
+
+    check(
+        "roundtrip_digest_match",
+        all_match,
+        f"tested={len(test_pairs)}, results=[{'; '.join(details)}]",
+    )
+
+
+# ── Check 5: CLI stats command works ──────────────────────────────────
+
+
+def check_cli_stats(store_path):
+    """Verify the CLI stats command runs against the store."""
+    try:
+        result = subprocess.run(
+            ["refget", "store", "stats", "--path", store_path],
+            capture_output=True,
+            text=True,
+            timeout=60,
+        )
+        if result.returncode == 0:
+            check("cli_stats_runs", True, f"stdout={result.stdout.strip()[:200]}")
+        else:
+            check(
+                "cli_stats_runs",
+                False,
+                f"returncode={result.returncode}, stderr={result.stderr.strip()[:200]}",
+            )
+    except FileNotFoundError:
+        check("cli_stats_runs", False, "refget CLI not found in PATH")
+    except subprocess.TimeoutExpired:
+        check("cli_stats_runs", False, "timed out after 60s")
+    except Exception as e:
+        check("cli_stats_runs", False, f"error={e}")
+
+
+# ── Check 6: Inventory cross-reference ────────────────────────────────
+
+
+def check_inventory_crossref(store, inventory_path, digest_map_path):
+    """Cross-check inventory against digest_map to verify completeness."""
+    if not os.path.exists(inventory_path):
+        check("inventory_exists", False, f"not found: {inventory_path}")
+        return
+    if not os.path.exists(digest_map_path):
+        check("inventory_crossref", False, f"digest_map not found: {digest_map_path}")
+        return
+
+    # Count inventory rows
+    with open(inventory_path, newline="") as f:
+        inv_rows = list(csv.DictReader(f))
+
+    # Count digest_map rows
+    with open(digest_map_path, newline="") as f:
+        dm_rows = list(csv.DictReader(f))
+
+    inv_paths = {r["path"] for r in inv_rows}
+    dm_paths = {r["path"] for r in dm_rows}
+
+    # How many inventory files have been processed?
+    processed = inv_paths & dm_paths
+    unprocessed = inv_paths - dm_paths
+
+    check(
+        "inventory_processing_coverage",
+        True,  # Always pass -- partial is expected
+        f"inventory={len(inv_rows)}, digest_map={len(dm_rows)}, "
+        f"processed={len(processed)}, unprocessed={len(unprocessed)}",
+    )
+
+    # Check error rate in digest_map
+    errors = [r for r in dm_rows if r.get("error")]
+    check(
+        "digest_map_error_rate",
+        len(errors) == 0,
+        f"errors={len(errors)}/{len(dm_rows)}"
+        + (f", samples={[r['filename'] + ': ' + r['error'] for r in errors[:3]]}" if errors else ""),
+    )
+
+
+# ── Summary and report ────────────────────────────────────────────────
+
+
+def print_summary(store_path):
+    """Print summary and write JSON report."""
+    print("\n" + "=" * 60)
+    print("VERIFICATION SUMMARY")
+    print("=" * 60)
+    passed = sum(1 for r in results if r["status"] == "PASS")
+    failed = sum(1 for r in results if r["status"] == "FAIL")
+    print(f"Passed: {passed}")
+    print(f"Failed: {failed}")
+    print(f"Total:  {passed + failed}")
+
+    if failed > 0:
+        print("\nFailed checks:")
+        for r in results:
+            if r["status"] == "FAIL":
+                print(f"  - {r['name']}: {r['detail']}")
+
+    # Write JSON report next to the store
+    report_dir = os.path.dirname(os.path.abspath(__file__))
+    report_path = os.path.join(report_dir, "verification_report.json")
+    with open(report_path, "w") as f:
+        json.dump(
+            {"results": results, "passed": passed, "failed": failed},
+            f,
+            indent=2,
+        )
+    print(f"\nJSON report: {report_path}")
+
+    return failed
+
+
+def main():
+    args = parse_args()
+    store_path = args.store_path
+
+    print(f"Verifying RefgetStore at: {store_path}")
+    print(f"Inventory CSV: {args.inventory}")
+    print(f"Digest map CSV: {args.digest_map}")
+    print("=" * 60)
+
+    t_start = time.time()
+
+    # Check 1: Store opens and stats
+    print("\n── Check 1: Store opens and stats ──")
+    store = check_store_opens(store_path)
+    if store is None:
+        print("\nStore failed to open. Cannot continue.")
+        print_summary(store_path)
+        sys.exit(1)
+
+    # Check 2: Digest map coverage
+    print("\n── Check 2: Digest map coverage ──")
+    check_digest_map(store, args.digest_map)
+
+    # Check 3: Level2 data integrity
+    print("\n── Check 3: Collection level2 data integrity ──")
+    check_level2_integrity(store, n_to_check=min(args.limit, 5))
+
+    # Check 4: Round-trip FASTA export
+    if args.skip_roundtrip:
+        print("\n── Check 4: Round-trip export (SKIPPED) ──")
+        check("roundtrip_digest_match", True, "skipped via --skip-roundtrip")
+    else:
+        print("\n── Check 4: Round-trip FASTA export ──")
+        check_roundtrip_export(
+            store, store_path, args.digest_map, args.inventory, limit=args.limit
+        )
+
+    # Check 5: CLI stats command
+    print("\n── Check 5: CLI stats command ──")
+    check_cli_stats(store_path)
+
+    # Check 6: Inventory cross-reference
+    print("\n── Check 6: Inventory cross-reference ──")
+    check_inventory_crossref(store, args.inventory, args.digest_map)
+
+    elapsed = time.time() - t_start
+    print(f"\nVerification completed in {elapsed:.1f}s")
+
+    failed = print_summary(store_path)
+    sys.exit(1 if failed > 0 else 0)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/frontend/src/components/CliSnippet.jsx b/frontend/src/components/CliSnippet.jsx
new file mode 100644
index 0000000..61e5ec5
--- /dev/null
+++ b/frontend/src/components/CliSnippet.jsx
@@ -0,0 +1,136 @@
+import { useState } from 'react';
+
+/**
+ * A copyable CLI command snippet.
+ * Shows a monospace command with a copy button.
+ */
+const CliCommand = ({ command }) => {
+  const [copied, setCopied] = useState(false);
+
+  const handleCopy = () => {
+    navigator.clipboard.writeText(command).then(() => {
+      setCopied(true);
+      setTimeout(() => setCopied(false), 1500);
+    });
+  };
+
+  return (
+    <div className="d-flex align-items-start mb-1">
+      <pre className="bg-light px-2 py-1 rounded flex-grow-1 small mb-0 text-start" style={{ whiteSpace: 'pre-wrap' }}><code>{command}</code></pre>
+      <button
+        className="btn btn-sm btn-link text-muted p-0 ms-2 mt-1"
+        onClick={handleCopy}
+        title="Copy to clipboard"
+      >
+        <i className={`bi ${copied ? 'bi-check-lg text-success' : 'bi-clipboard'}`} />
+      </button>
+    </div>
+  );
+};
+
+/**
+ * A collapsible panel of CLI commands for a given context.
+ * Props:
+ *   commands: [{label, command}]
+ */
+const CliSnippet = ({ commands }) => {
+  const [open, setOpen] = useState(false);
+
+  if (!commands || commands.length === 0) return null;
+
+  return (
+    <div className="mt-3">
+      <button
+        className="btn btn-sm btn-outline-secondary"
+        onClick={() => setOpen(!open)}
+      >
+        <i className={`bi bi-terminal me-1`} />
+        CLI {open ? '▾' : '▸'}
+      </button>
+      {open && (
+        <div className="mt-2 p-3 bg-light rounded border">
+          {commands.map(({ label, command }, i) => (
+            <div key={i} className="mb-2">
+              {label && <small className="text-muted d-block mb-1">{label}</small>}
+              <CliCommand command={command} />
+            </div>
+          ))}
+          <small className="text-muted d-block mt-2">
+            Install: <code>pip install refget</code>
+          </small>
+        </div>
+      )}
+    </div>
+  );
+};
+
+/**
+ * A small icon button for table rows that opens a modal with CLI/Python snippets.
+ * Props:
+ *   snippets: [{ label, cli, python }]
+ *   title: modal title
+ */
+const RowCodeButton = ({ snippets, title = 'Code' }) => {
+  const [show, setShow] = useState(false);
+  const [tab, setTab] = useState('cli');
+
+  return (
+    <>
+      <button
+        className="btn btn-sm btn-link text-muted p-0"
+        onClick={() => setShow(true)}
+        title={title}
+      >
+        <i className="bi bi-code-slash" />
+      </button>
+      {show && (
+        <>
+          <div className="modal-backdrop fade show" onClick={() => setShow(false)} />
+          <div className="modal fade show d-block" tabIndex="-1" onClick={() => setShow(false)}>
+            <div className="modal-dialog" onClick={(e) => e.stopPropagation()}>
+              <div className="modal-content">
+                <div className="modal-header py-2">
+                  <h6 className="modal-title">
+                    <i className="bi bi-code-slash me-2" />
+                    {title}
+                  </h6>
+                  <button type="button" className="btn-close" onClick={() => setShow(false)} />
+                </div>
+                <div className="modal-body">
+                  <ul className="nav nav-pills nav-pills-sm mb-3">
+                    <li className="nav-item">
+                      <button
+                        className={`nav-link py-1 px-2 ${tab === 'cli' ? 'active' : ''}`}
+                        onClick={() => setTab('cli')}
+                      >
+                        <i className="bi bi-terminal me-1" />
+                        CLI
+                      </button>
+                    </li>
+                    <li className="nav-item">
+                      <button
+                        className={`nav-link py-1 px-2 ${tab === 'python' ? 'active' : ''}`}
+                        onClick={() => setTab('python')}
+                      >
+                        <i className="bi bi-filetype-py me-1" />
+                        Python
+                      </button>
+                    </li>
+                  </ul>
+                  {snippets.map((snippet, i) => (
+                    <div key={i} className={i < snippets.length - 1 ? 'mb-3' : ''}>
+                      {snippet.label && <small className="text-muted d-block mb-1">{snippet.label}</small>}
+                      <CliCommand command={snippet[tab]} />
+                    </div>
+                  ))}
+                </div>
+              </div>
+            </div>
+          </div>
+        </>
+      )}
+    </>
+  );
+};
+
+export { CliSnippet, CliCommand, RowCodeButton };
diff --git a/frontend/src/components/StoreNav.jsx b/frontend/src/components/StoreNav.jsx
new file mode 100644
index 0000000..f24d50f
--- /dev/null
+++ b/frontend/src/components/StoreNav.jsx
@@ -0,0 +1,222 @@
+import { useState } from 'react';
+import { Link } from 'react-router-dom';
+import { useExplorerStore } from '../stores/explorerStore.js';
+import { CliCommand } from './CliSnippet.jsx';
+
+const StoreNav = ({ active, storeUrlParam, collectionDigest }) => {
+  const [showCode, setShowCode] = useState(false);
+  const [codeTab, setCodeTab] = useState('cli');
+  const { storeUrl } = useExplorerStore();
+
+  const remote = storeUrl || new URLSearchParams(storeUrlParam).get('url') || '';
+
+  const items = [
+    { key: 'overview', label: 'Overview', path: '/explore/store', icon: 'bi-house' },
+    { key: 'sequences', label: 'Sequences', path: '/explore/store/sequences', icon: 'bi-list-ol' },
+    { key: 'aliases', label: 'Aliases', path: '/explore/store/aliases', icon: 'bi-tag' },
+  ];
+
+  const snippetGroups = [
+    {
+      heading: 'Setup',
+      snippets: [
+        {
+          label: 'Subscribe to this remote store',
+          cli: `refget config add store \\
+  ${remote}`,
+          python: `import refget
+
+refget.config.add("store", "${remote}")`,
+        },
+      ],
+    },
+    {
+      heading: 'Browse',
+      snippets: [
+        {
+          label: 'List collections',
+          cli: `refget store list \\
+  --remote ${remote}`,
+          python: `import refget
+
+store = refget.RefgetStore("${remote}")
+store.list()`,
+        },
+        {
+          label: 'List sequences',
+          cli: `refget store list --sequences \\
+  --remote ${remote}`,
+          python: `import refget
+
+store = refget.RefgetStore("${remote}")
+store.list(sequences=True)`,
+        },
+        {
+          label: 'Store statistics',
+          cli: `refget store stats \\
+  --remote ${remote}`,
+          python: `import refget
+
+store = refget.RefgetStore("${remote}")
+print(store)`,
+        },
+      ],
+    },
+  ];
+
+  if (collectionDigest) {
+    snippetGroups.push({
+      heading: 'Collection',
+      snippets: [
+        {
+          label: 'Get collection metadata',
+          cli: `refget store get \\
+  ${collectionDigest} \\
+  --remote ${remote}`,
+          python: `import refget
+
+store = refget.RefgetStore("${remote}")
+store.get("${collectionDigest}")`,
+        },
+        {
+          label: 'Pull collection to local cache',
+          cli: `refget store pull \\
+  ${collectionDigest} \\
+  --remote ${remote}`,
+          python: `import refget
+
+store = refget.RefgetStore("${remote}")
+store.pull("${collectionDigest}")`,
+        },
+        {
+          label: 'Export as FASTA',
+          cli: `refget store export \\
+  ${collectionDigest} \\
+  --remote ${remote}`,
+          python: `import refget
+
+store = refget.RefgetStore("${remote}")
+store.export("${collectionDigest}")`,
+        },
+        {
+          label: 'Generate .fai index',
+          cli: `refget store fai \\
+  ${collectionDigest} \\
+  --remote ${remote}`,
+          python: `import refget
+
+store = refget.RefgetStore("${remote}")
+store.fai("${collectionDigest}")`,
+        },
+        {
+          label: 'Generate chrom.sizes',
+          cli: `refget store chrom-sizes \\
+  ${collectionDigest} \\
+  --remote ${remote}`,
+          python: `import refget
+
+store = refget.RefgetStore("${remote}")
+store.chrom_sizes("${collectionDigest}")`,
+        },
+      ],
+    });
+  }
+
+  return (
+    <div className="mb-4">
+      <div className="d-flex justify-content-between align-items-center mb-3">
+        <h4 className="fw-light mb-0">
+          <i className="bi bi-archive me-2" />
+          RefgetStore Explorer
+        </h4>
+        <div>
+          <button
+            className="btn btn-sm btn-outline-secondary me-2"
+            onClick={() => setShowCode(true)}
+          >
+            <i className="bi bi-code-slash me-1" />
+            Code
+          </button>
+          <Link to="/explore" className="btn btn-sm btn-outline-secondary">
+            <i className="bi bi-arrow-left me-1" />
+            Change Store
+          </Link>
+        </div>
+      </div>
+
+      {/* Code Snippets Modal */}
+      {showCode && (
+        <>
+          <div className="modal-backdrop fade show" onClick={() => setShowCode(false)} />
+          <div className="modal fade show d-block" tabIndex="-1" onClick={() => setShowCode(false)}>
+            <div className="modal-dialog modal-lg" onClick={(e) => e.stopPropagation()}>
+              <div className="modal-content">
+                <div className="modal-header">
+                  <h5 className="modal-title">
+                    <i className="bi bi-code-slash me-2" />
+                    Code Snippets
+                  </h5>
+                  <button type="button" className="btn-close" onClick={() => setShowCode(false)} />
+                </div>
+                <div className="modal-body">
+                  <ul className="nav nav-pills mb-3">
+                    <li className="nav-item">
+                      <button
+                        className={`nav-link ${codeTab === 'cli' ? 'active' : ''}`}
+                        onClick={() => setCodeTab('cli')}
+                      >
+                        <i className="bi bi-terminal me-1" />
+                        CLI
+                      </button>
+                    </li>
+                    <li className="nav-item">
+                      <button
+                        className={`nav-link ${codeTab === 'python' ? 'active' : ''}`}
+                        onClick={() => setCodeTab('python')}
+                      >
+                        <i className="bi bi-filetype-py me-1" />
+                        Python
+                      </button>
+                    </li>
+                  </ul>
+
+                  {snippetGroups.map((group, gi) => (
+                    <div key={gi} className={gi < snippetGroups.length - 1 ? 'mb-4' : ''}>
+                      <h6 className="text-muted mb-2">{group.heading}</h6>
+                      {group.snippets.map((snippet, i) => (
+                        <div key={i} className={i < group.snippets.length - 1 ? 'mb-3' : ''}>
+                          <small className="text-muted d-block mb-1">{snippet.label}</small>
+                          <CliCommand command={snippet[codeTab]} />
+                        </div>
+                      ))}
+                    </div>
+                  ))}
+                  <hr />
+                  <small className="text-muted">
+                    Install: <code>pip install refget</code>
+                  </small>
+                </div>
+              </div>
+            </div>
+          </div>
+        </>
+      )}
+
+      <ul className="nav nav-tabs mb-3">
+        {items.map((item) => (
+          <li className="nav-item" key={item.key}>
+            <Link
+              to={`${item.path}${storeUrlParam}`}
+              className={`nav-link ${active === item.key ? 'active' : ''}`}
+            >
+              <i className={`bi ${item.icon} me-1`} />
+              {item.label}
+            </Link>
+          </li>
+        ))}
+      </ul>
+    </div>
+  );
+};
+
+export { StoreNav };
diff --git a/frontend/src/main.jsx b/frontend/src/main.jsx
index 4a65f6b..5a6f613 100644
--- a/frontend/src/main.jsx
+++ b/frontend/src/main.jsx
@@ -22,6 +22,11 @@ import { HPRCGenomes } from './pages/HPRCGenomes.jsx';
 import { HumanReferencesView } from './pages/HumanReferences.jsx';
 import { DigestPage } from './pages/DigestPage.jsx';
 import { CompliancePage } from './pages/CompliancePage.jsx';
+import { StoreExplorer } from './pages/StoreExplorer.jsx';
+import { StoreOverview } from './pages/StoreOverview.jsx';
+import { StoreSequences } from './pages/StoreSequences.jsx';
+import { StoreCollection } from './pages/StoreCollection.jsx';
+import { StoreAliases } from './pages/StoreAliases.jsx';
 
 import {
   fetchServiceInfo,
@@ -127,6 +132,14 @@ const Nav = () => {
                 Compliance
               </span>
             </li>
+            <li className='nav-item mx-2 my-0 h6'>
+              <span
+                onClick={() => navigate('/explore')}
+                className={`nav-link cursor-pointer ${location.startsWith('explore') ? 'fw-medium text-black' : 'fw-light'}`}
+              >
+                Explore Store
+              </span>
+            </li>
             <li className='nav-item mx-2 my-0 h6'>
               <a
                 href={`${API_BASE}/docs`}
@@ -397,6 +410,31 @@ const router = createBrowserRouter([
         errorElement: <ErrorBoundary />,
         loader: (request) => fetchPangenomeLevels(request.params.digest),
       },
+      {
+        path: '/explore',
+        element: <StoreExplorer />,
+        errorElement: <ErrorBoundary />,
+      },
+      {
+        path: '/explore/store',
+        element: <StoreOverview />,
+        errorElement: <ErrorBoundary />,
+      },
+      {
+        path: '/explore/store/sequences',
+        element: <StoreSequences />,
+        errorElement: <ErrorBoundary />,
+      },
+      {
+        path: '/explore/store/collection/:digest',
+        element: <StoreCollection />,
+        errorElement: <ErrorBoundary />,
+      },
+      {
+        path: '/explore/store/aliases',
+        element: <StoreAliases />,
+        errorElement: <ErrorBoundary />,
+      },
     ],
   },
 ]);
diff --git a/frontend/src/pages/StoreAliases.jsx b/frontend/src/pages/StoreAliases.jsx
new file mode 100644
index 0000000..beec357
--- /dev/null
+++ b/frontend/src/pages/StoreAliases.jsx
@@ -0,0 +1,198 @@
+import { useState, useEffect } from 'react';
+import { Link, useSearchParams } from 'react-router-dom';
+import { useExplorerStore } from '../stores/explorerStore.js';
+import { StoreNav } from '../components/StoreNav.jsx';
+
+const AliasNamespacePanel = ({ type, storeUrlParam, availableNamespaces }) => {
+  const { loadAliases } = useExplorerStore();
+  const [namespace, setNamespace] = useState('');
+  const [aliases, setAliases] = useState(null);
+  const [loading, setLoading] = useState(false);
+  const [error, setError] = useState(null);
+  const [filter, setFilter] = useState('');
+
+  const handleLoad = async (e) => {
+    e?.preventDefault();
+    if (!namespace.trim()) return;
+    setLoading(true);
+    setError(null);
+    try {
+      const data = await loadAliases(type, namespace.trim());
+      if (!data) {
+        setError(`Namespace "${namespace}" not found.`);
+        setAliases(null);
+      } else {
+        setAliases(data);
+      }
+    } catch (err) {
+      setError(err.message);
+    } finally {
+      setLoading(false);
+    }
+  };
+
+  const handleNamespaceClick = (ns) => {
+    setNamespace(ns);
+    setFilter('');
+    setError(null);
+    setLoading(true);
+    loadAliases(type, ns)
+      .then((data) => {
+        if (!data) {
+          setError(`Namespace "${ns}" not found.`);
+          setAliases(null);
+        } else {
+          setAliases(data);
+        }
+      })
+      .catch((err) => setError(err.message))
+      .finally(() => setLoading(false));
+  };
+
+  const filtered = aliases
+    ? aliases.filter(
+        (a) =>
+          !filter ||
+          a.alias.toLowerCase().includes(filter.toLowerCase()) ||
+          a.digest.toLowerCase().includes(filter.toLowerCase()),
+      )
+    : null;
+
+  const linkPrefix =
+    type === 'sequences'
+      ? null // sequences don't have a detail page in the explorer
+      : `/explore/store/collection/`;
+
+  return (
+    <div className="card mb-3">
+      <div className="card-header">
+        <h6 className="mb-0 text-capitalize">
+          <i
+            className={`bi ${type === 'sequences' ? 'bi-list-ol' : 'bi-collection'} me-2`}
+          />
+          {type} aliases
+        </h6>
+      </div>
+      <div className="card-body">
+        {availableNamespaces && availableNamespaces.length > 0 ? (
+          <div className="mb-3">
+            <span className="text-muted small me-2">Namespaces:</span>
+            {availableNamespaces.map((ns) => (
+              <button
+                key={ns}
+                className={`btn btn-sm me-1 mb-1 ${namespace === ns ? 'btn-primary' : 'btn-outline-primary'}`}
+                onClick={() => handleNamespaceClick(ns)}
+                disabled={loading}
+              >
+                {ns}
+              </button>
+            ))}
+            {loading && <span className="spinner-border spinner-border-sm ms-2" />}
+          </div>
+        ) : (
+          <p className="text-muted small mb-0">
+            <i className="bi bi-info-circle me-1" />
+            No {type} alias namespaces found in this store.
+          </p>
+        )}
+
+        {error && (
+          <div className="alert alert-warning small py-2">{error}</div>
+        )}
+
+        {filtered && (
+          <>
+            <div className="d-flex justify-content-between align-items-center mb-2">
+              <span className="text-muted small">
+                {filtered.length} aliases in "{namespace}"
+              </span>
+              <input
+                type="search"
+                className="form-control form-control-sm"
+                style={{ maxWidth: '250px' }}
+                placeholder="Filter..."
+                value={filter}
+                onChange={(e) => setFilter(e.target.value)}
+              />
+            </div>
+            <div className="table-responsive" style={{ maxHeight: '400px' }}>
+              <table className="table table-sm table-hover mb-0">
+                <thead className="sticky-top bg-white">
+                  <tr>
+                    <th>Alias</th>
+                    <th>Digest</th>
+                  </tr>
+                </thead>
+                <tbody>
+                  {filtered.map((a, i) => (
+                    <tr key={`${a.alias}-${i}`}>
+                      <td>{a.alias}</td>
+                      <td className="font-monospace small">
+                        {linkPrefix ? (
+                          <Link
+                            to={`${linkPrefix}${a.digest}${storeUrlParam}`}
+                          >
+                            {a.digest}
+                          </Link>
+                        ) : (
+                          a.digest
+                        )}
+                      </td>
+                    </tr>
+                  ))}
+                </tbody>
+              </table>
+            </div>
+          </>
+        )}
+      </div>
+    </div>
+  );
+};
+
+const StoreAliases = () => {
+  const [searchParams] = useSearchParams();
+  const { storeUrl, metadata, loading, loadStore } = useExplorerStore();
+
+  const urlParam = searchParams.get('url');
+  const storeUrlParam = `?url=${encodeURIComponent(storeUrl || urlParam)}`;
+
+  useEffect(() => {
+    if (urlParam && !metadata && !loading) {
+      loadStore(urlParam).catch(() => {});
+    }
+  }, [urlParam]); // eslint-disable-line react-hooks/exhaustive-deps
+
+  if (!metadata && !loading) {
+    return (
+      <div className="alert alert-warning">
+        No store loaded.{' '}
+        <Link to="/explore">Go back to enter a store URL.</Link>
+      </div>
+    );
+  }
+
+  if (loading) {
+    return (
+      <div className="text-center py-5">
+        <div className="spinner-border" />
+      </div>
+    );
+  }
+
+  return (
+    <div className="mb-5">
+      <StoreNav active="aliases" storeUrlParam={storeUrlParam} />
+
+      <p className="text-muted">
+        Aliases map human-readable names to digests. Select a namespace to
+        browse its alias mappings.
+      </p>
+
+      <AliasNamespacePanel type="sequences" storeUrlParam={storeUrlParam} availableNamespaces={metadata.sequence_alias_namespaces} />
+      <AliasNamespacePanel type="collections" storeUrlParam={storeUrlParam} availableNamespaces={metadata.collection_alias_namespaces} />
+    </div>
+  );
+};
+
+export { StoreAliases };
diff --git a/frontend/src/pages/StoreCollection.jsx b/frontend/src/pages/StoreCollection.jsx
new file mode 100644
index 0000000..4b9ea21
--- /dev/null
+++ b/frontend/src/pages/StoreCollection.jsx
@@ -0,0 +1,279 @@
+import { useState, useEffect } from 'react';
+import { Link, useParams, useSearchParams } from 'react-router-dom';
+import { useExplorerStore } from '../stores/explorerStore.js';
+import { StoreNav } from '../components/StoreNav.jsx';
+import { CliCommand } from '../components/CliSnippet.jsx';
+
+const StoreCollection = () => {
+  const { digest } = useParams();
+  const [searchParams] = useSearchParams();
+  const { storeUrl, metadata, loadStore, loadCollection, loadFhrMetadata, loading } =
+    useExplorerStore();
+  const [collection, setCollection] = useState(null);
+  const [fhr, setFhr] = useState(undefined);
+  const [error, setError] = useState(null);
+  const [loadingCol, setLoadingCol] = useState(true);
+  const [selectedSeq, setSelectedSeq] = useState(null);
+  const [seqCodeTab, setSeqCodeTab] = useState('cli');
+
+  const urlParam = searchParams.get('url');
+  const storeUrlParam = `?url=${encodeURIComponent(storeUrl || urlParam)}`;
+
+  useEffect(() => {
+    const load = async () => {
+      try {
+        // Ensure store is loaded
+        if (!metadata && urlParam) {
+          await loadStore(urlParam);
+        }
+        const col = await loadCollection(digest);
+        setCollection(col);
+        const fhrData = await loadFhrMetadata(digest);
+        setFhr(fhrData);
+      } catch (err) {
+        setError(err.message);
+      } finally {
+        setLoadingCol(false);
+      }
+    };
+    load();
+  }, [digest, urlParam]); // eslint-disable-line react-hooks/exhaustive-deps
+
+  if (!metadata && !loading && !loadingCol) {
+    return (
+      <div className="alert alert-warning">
+        No store loaded.{' '}
+        <Link to="/explore">Go back to enter a store URL.</Link>
+      </div>
+    );
+  }
+
+  if (loading || loadingCol) {
+    return (
+      <div className="text-center py-5">
+        <div className="spinner-border" />
+        <p className="mt-3 text-muted">Loading collection...</p>
+      </div>
+    );
+  }
+
+  if (error) {
+    return (
+      <div>
+        <StoreNav active="collections" storeUrlParam={storeUrlParam} />
+        <div className="alert alert-danger">{error}</div>
+      </div>
+    );
+  }
+
+  const { metadata: colMeta, sequences } = collection;
+  const totalBases = sequences.reduce((sum, s) => sum + s.length, 0);
+  const alphabetCounts = {};
+  sequences.forEach((s) => {
+    alphabetCounts[s.alphabet] = (alphabetCounts[s.alphabet] || 0) + 1;
+  });
+
+  return (
+    <div className="mb-5">
+      <StoreNav active="collections" storeUrlParam={storeUrlParam} collectionDigest={digest} />
+
+      <h5 className="fw-light font-monospace mb-3">{digest}</h5>
+
+      {/* Summary stats */}
+      <div className="row g-3 mb-4">
+        <div className="col-auto">
+          <div className="card">
+            <div className="card-body py-2 px-3">
+              <small className="text-muted d-block">Sequences</small>
+              <strong>{sequences.length.toLocaleString()}</strong>
+            </div>
+          </div>
+        </div>
+        <div className="col-auto">
+          <div className="card">
+            <div className="card-body py-2 px-3">
+              <small className="text-muted d-block">Total bases</small>
+              <strong>{totalBases.toLocaleString()}</strong>
+            </div>
+          </div>
+        </div>
+        {Object.keys(alphabetCounts).length > 0 && (
+          <div className="col-auto">
+            <div className="card">
+              <div className="card-body py-2 px-3">
+                <small className="text-muted d-block">Alphabets</small>
+                <table className="table table-sm table-borderless mb-0" style={{ minWidth: 0 }}>
+                  <tbody>
+                    {Object.entries(alphabetCounts).map(([alph, count]) => (
+                      <tr key={alph}>
+                        <td className="py-0 ps-0 pe-2">{alph}</td>
+                        <td className="py-0 ps-0 text-end"><strong>{count.toLocaleString()}</strong></td>
+                      </tr>
+                    ))}
+                  </tbody>
+                </table>
+              </div>
+            </div>
+          </div>
+        )}
+      </div>
+
+      {/* Collection metadata from ## headers */}
+      {Object.keys(colMeta).length > 0 && (
+        <div className="card mb-3">
+          <div className="card-header">
+            <h6 className="mb-0">Collection Metadata</h6>
+          </div>
+          <div className="card-body">
+            <table className="table table-sm mb-0">
+              <tbody>
+                {Object.entries(colMeta).map(([key, value]) => (
+                  <tr key={key}>
+                    <td className="text-muted">{key}</td>
+                    <td className="font-monospace small">{value}</td>
+                  </tr>
+                ))}
+              </tbody>
+            </table>
+          </div>
+        </div>
+      )}
+
+      {/* FHR metadata */}
+      {fhr ? (
+        <div className="card mb-3">
+          <div className="card-header">
+            <h6 className="mb-0">
+              <i className="bi bi-file-earmark-text me-2" />
+              FHR Metadata
+            </h6>
+          </div>
+          <div className="card-body">
+            <pre className="bg-light p-3 rounded mb-0 small">
+              {JSON.stringify(fhr, null, 2)}
+            </pre>
+          </div>
+        </div>
+      ) : fhr === null ? (
+        <p className="text-muted small">
+          <i className="bi bi-info-circle me-1" />
+          No FHR metadata sidecar found for this collection.
+        </p>
+      ) : null}
+
+      {/* Sequence table */}
+      <div className="card">
+        <div className="card-header">
+          <h6 className="mb-0">Sequences in this collection</h6>
+        </div>
+        <div className="card-body p-0">
+          <div className="table-responsive">
+            <table className="table table-sm table-hover mb-0">
+              <thead>
+                <tr>
+                  <th>Name</th>
+                  <th className="text-end">Length</th>
+                  <th>SHA-512/24u</th>
+                </tr>
+              </thead>
+              <tbody>
+                {sequences.map((seq, i) => (
+                  <tr
+                    key={`${seq.sha512t24u}-${i}`}
+                    style={{ cursor: 'pointer' }}
+                    onClick={() => setSelectedSeq(seq)}
+                  >
+                    <td>{seq.name}</td>
+                    <td className="text-end font-monospace">
+                      {seq.length.toLocaleString()}
+                    </td>
+                    <td className="font-monospace small">{seq.sha512t24u}</td>
+                  </tr>
+                ))}
+              </tbody>
+            </table>
+          </div>
+        </div>
+      </div>
+
+      {/* Sequence detail modal */}
+      {selectedSeq && (
+        <>
+          <div className="modal-backdrop fade show" onClick={() => setSelectedSeq(null)} />
+          <div className="modal fade show d-block" tabIndex="-1" onClick={() => setSelectedSeq(null)}>
+            <div className="modal-dialog modal-lg" onClick={(e) => e.stopPropagation()}>
+              <div className="modal-content">
+                <div className="modal-header">
+                  <h5 className="modal-title">{selectedSeq.name}</h5>
+                  <button type="button" className="btn-close" onClick={() => setSelectedSeq(null)} />
+                </div>
+                <div className="modal-body">
+                  <table className="table table-sm mb-4">
+                    <tbody>
+                      <tr>
+                        <td className="text-muted">Length</td>
+                        <td className="font-monospace">{selectedSeq.length.toLocaleString()}</td>
+                      </tr>
+                      <tr>
+                        <td className="text-muted">Alphabet</td>
+                        <td><span className="badge bg-secondary">{selectedSeq.alphabet}</span></td>
+                      </tr>
+                      <tr>
+                        <td className="text-muted">SHA-512/24u</td>
+                        <td className="font-monospace small">{selectedSeq.sha512t24u}</td>
+                      </tr>
+                      <tr>
+                        <td className="text-muted">MD5</td>
+                        <td className="font-monospace small">{selectedSeq.md5}</td>
+                      </tr>
+                      {selectedSeq.description && (
+                        <tr>
+                          <td className="text-muted">Description</td>
+                          <td className="small">{selectedSeq.description}</td>
+                        </tr>
+                      )}
+                    </tbody>
+                  </table>
+
+                  <h6 className="text-muted mb-2">Code</h6>
+                  <ul className="nav nav-pills nav-pills-sm mb-3">
+                    <li className="nav-item">
+                      <button
+                        className={`nav-link py-1 px-2 ${seqCodeTab === 'cli' ? 'active' : ''}`}
+                        onClick={() => setSeqCodeTab('cli')}
+                      >
+                        <i className="bi bi-terminal me-1" />
+                        CLI
+                      </button>
+                    </li>
+                    <li className="nav-item">
+                      <button
+                        className={`nav-link py-1 px-2 ${seqCodeTab === 'python' ? 'active' : ''}`}
+                        onClick={() => setSeqCodeTab('python')}
+                      >
+                        <i className="bi bi-filetype-py me-1" />
+                        Python
+                      </button>
+                    </li>
+                  </ul>
+                  <small className="text-muted d-block mb-1">Get sequence</small>
+                  <CliCommand command={seqCodeTab === 'cli'
+                    ? `refget store get --sequence \\
+  ${selectedSeq.sha512t24u} \\
+  --remote ${storeUrl || urlParam}`
+                    : `import refget
+
+store = refget.RefgetStore("${storeUrl || urlParam}")
+store.get("${selectedSeq.sha512t24u}", sequence=True)`
+                  } />
+                </div>
+              </div>
+            </div>
+          </div>
+        </>
+      )}
+    </div>
+  );
+};
+
+export { StoreCollection };
diff --git a/frontend/src/pages/StoreExplorer.jsx b/frontend/src/pages/StoreExplorer.jsx
new file mode 100644
index 0000000..810ef30
--- /dev/null
+++ b/frontend/src/pages/StoreExplorer.jsx
@@ -0,0 +1,135 @@
+import { useState, useEffect } from 'react';
+import { useNavigate, useSearchParams } from 'react-router-dom';
+import { useExplorerStore } from '../stores/explorerStore.js';
+
+const RECENT_STORES_KEY = 'refget-explorer-recent-stores';
+const MAX_RECENT = 5;
+
+const getRecentStores = () => {
+  try {
+    return JSON.parse(localStorage.getItem(RECENT_STORES_KEY)) || [];
+  } catch {
+    return [];
+  }
+};
+
+const saveRecentStore = (url) => {
+  const recent = getRecentStores().filter((u) => u !== url);
+  recent.unshift(url);
+  localStorage.setItem(
+    RECENT_STORES_KEY,
+    JSON.stringify(recent.slice(0, MAX_RECENT)),
+  );
+};
+
+const StoreExplorer = () => {
+  const [searchParams] = useSearchParams();
+  const navigate = useNavigate();
+  const { loadStore, loading, error, storeUrl } = useExplorerStore();
+  const [url, setUrl] = useState(searchParams.get('url') || '');
+  const [localError, setLocalError] = useState(null);
+  const recentStores = getRecentStores();
+
+  // Auto-load if URL param provided
+  useEffect(() => {
+    const paramUrl = searchParams.get('url');
+    if (paramUrl && paramUrl !== storeUrl) {
+      handleExplore(paramUrl);
+    }
+  }, []); // eslint-disable-line react-hooks/exhaustive-deps
+
+  const handleExplore = async (targetUrl) => {
+    const trimmed = (targetUrl || url).trim();
+    if (!trimmed) return;
+    setLocalError(null);
+    try {
+      await loadStore(trimmed);
+      saveRecentStore(trimmed);
+      navigate(`/explore/store?url=${encodeURIComponent(trimmed)}`);
+    } catch (err) {
+      setLocalError(err.message);
+    }
+  };
+
+  const handleSubmit = (e) => {
+    e.preventDefault();
+    handleExplore();
+  };
+
+  return (
+    <div className="mb-5">
+      <h3 className="fw-light mb-3">
+        <i className="bi bi-archive me-2" />
+        RefgetStore Explorer
+      </h3>
+      <p className="text-muted">
+        Browse the contents of any RefgetStore — sequences, collections, aliases,
+        and metadata. Enter the URL of a store hosted on any HTTP server.
+      </p>
+
+      <form onSubmit={handleSubmit} className="mb-4">
+        <div className="input-group input-group-lg">
+          <input
+            type="url"
+            className="form-control"
+            placeholder="https://example.com/path/to/refget-store/"
+            value={url}
+            onChange={(e) => setUrl(e.target.value)}
+            required
+          />
+          <button
+            className="btn btn-primary"
+            type="submit"
+            disabled={loading}
+          >
+            {loading ? (
+              <>
+                <span className="spinner-border spinner-border-sm me-2" />
+                Loading...
+              </>
+            ) : (
+              <>
+                <i className="bi bi-search me-2" />
+                Explore
+              </>
+            )}
+          </button>
+        </div>
+      </form>
+
+      {(localError || error) && (
+        <div className="alert alert-danger">
+          <strong>Failed to load store:</strong> {localError || error}
+          <p className="mt-2 mb-0 text-muted small">
+            Make sure the URL points to a valid RefgetStore directory with an{' '}
+            <code>rgstore.json</code> file. The server must allow cross-origin
+            requests (CORS).
+          </p>
+        </div>
+      )}
+
+      {recentStores.length > 0 && (
+        <div className="mt-4">
+          <h6 className="text-muted">Recent stores</h6>
+          <div className="list-group">
+            {recentStores.map((recentUrl) => (
+              <button
+                key={recentUrl}
+                className="list-group-item list-group-item-action font-monospace small"
+                onClick={() => {
+                  setUrl(recentUrl);
+                  handleExplore(recentUrl);
+                }}
+              >
+                <i className="bi bi-clock-history me-2" />
+                {recentUrl}
+              </button>
+            ))}
+          </div>
+        </div>
+      )}
+    </div>
+  );
+};
+
+export { StoreExplorer };
diff --git a/frontend/src/pages/StoreOverview.jsx b/frontend/src/pages/StoreOverview.jsx
new file mode 100644
index 0000000..fb17740
--- /dev/null
+++ b/frontend/src/pages/StoreOverview.jsx
@@ -0,0 +1,309 @@
+import { useState, useEffect } from 'react';
+import { Link, useNavigate, useSearchParams } from 'react-router-dom';
+import { useExplorerStore } from '../stores/explorerStore.js';
+import { StoreNav } from '../components/StoreNav.jsx';
+import { RowCodeButton } from '../components/CliSnippet.jsx';
+
+const StoreOverview = () => {
+  const [searchParams] = useSearchParams();
+  const navigate = useNavigate();
+  const { storeUrl, metadata, sequenceIndex, collections, loading, loadStore, loadSequenceIndex } =
+    useExplorerStore();
+  const [seqLoading, setSeqLoading] = useState(false);
+
+  const urlParam = searchParams.get('url');
+
+  // If we have a URL param but no loaded store, load it
+  useEffect(() => {
+    const init = async () => {
+      if (urlParam && !metadata && !loading) {
+        await loadStore(urlParam).catch(() => {});
+      }
+    };
+    init();
+  }, [urlParam]); // eslint-disable-line react-hooks/exhaustive-deps
+
+  // Auto-load sequence index (fetchSequenceIndex handles size check internally)
+  useEffect(() => {
+    if (metadata && !sequenceIndex && !seqLoading) {
+      setSeqLoading(true);
+      loadSequenceIndex()
+        .catch(() => {})
+        .finally(() => setSeqLoading(false));
+    }
+  }, [metadata]); // eslint-disable-line react-hooks/exhaustive-deps
+
+  if (!metadata && !loading) {
+    return (
+      <div className="alert alert-warning">
+        No store loaded.{' '}
+        <Link to="/explore">Go back to enter a store URL.</Link>
+      </div>
+    );
+  }
+
+  if (loading) {
+    return (
+      <div className="text-center py-5">
+        <div className="spinner-border" />
+        <p className="mt-3 text-muted">Loading store...</p>
+      </div>
+    );
+  }
+
+  const totalBases = sequenceIndex
+    ? sequenceIndex.reduce((sum, s) => sum + s.length, 0)
+    : 0;
+
+  const alphabetCounts = {};
+  if (sequenceIndex) {
+    sequenceIndex.forEach((s) => {
+      alphabetCounts[s.alphabet] = (alphabetCounts[s.alphabet] || 0) + 1;
+    });
+  }
+
+  const storeUrlParam = `?url=${encodeURIComponent(storeUrl || urlParam)}`;
+
+  return (
+    <div className="mb-5">
+      <StoreNav active="overview" storeUrlParam={storeUrlParam} />
+
+      <div className="row g-3 mb-4">
+        {/* Store info card */}
+        <div className="col-md-6">
+          <div className="card h-100">
+            <div className="card-header">
+              <h6 className="mb-0">
+                <i className="bi bi-info-circle me-2" />
+                Store Info
+              </h6>
+            </div>
+            <div className="card-body">
+              <table className="table table-sm mb-0">
+                <tbody>
+                  <tr>
+                    <td className="text-muted">URL</td>
+                    <td className="font-monospace small text-break">
+                      {storeUrl || urlParam}
+                    </td>
+                  </tr>
+                  <tr>
+                    <td className="text-muted">Version</td>
+                    <td>{metadata.version}</td>
+                  </tr>
+                  <tr>
+                    <td className="text-muted">Storage Mode</td>
+                    <td>
+                      <span
+                        className={`badge ${metadata.mode === 'Raw' ? 'bg-success' : 'bg-primary'}`}
+                      >
+                        {metadata.mode}
+                      </span>
+                    </td>
+                  </tr>
+                  {metadata.created_at && (
+                    <tr>
+                      <td className="text-muted">Created</td>
+                      <td>{new Date(metadata.created_at).toLocaleString()}</td>
+                    </tr>
+                  )}
+                </tbody>
+              </table>
+            </div>
+          </div>
+        </div>
+
+        {/* Sequences summary card */}
+        <div className="col-md-6">
+          <div className="card h-100">
+            <div className="card-header d-flex justify-content-between align-items-center">
+              <h6 className="mb-0">
+                <i className="bi bi-list-ol me-2" />
+                Sequences
+              </h6>
+              <Link
+                to={`/explore/store/sequences${storeUrlParam}`}
+                className="btn btn-sm btn-outline-primary"
+              >
+                Browse all
+              </Link>
+            </div>
+            <div className="card-body">
+              {sequenceIndex ? (
+                <table className="table table-sm mb-0">
+                  <tbody>
+                    <tr>
+                      <td className="text-muted">Total sequences</td>
+                      <td>{sequenceIndex.length.toLocaleString()}</td>
+                    </tr>
+                    <tr>
+                      <td className="text-muted">Total bases</td>
+                      <td>{totalBases.toLocaleString()}</td>
+                    </tr>
+                    <tr>
+                      <td className="text-muted">Alphabets</td>
+                      <td>
+                        {Object.entries(alphabetCounts).map(([alph, count]) => (
+                          <span
+                            key={alph}
+                            className="badge bg-secondary me-1"
+                          >
+                            {alph}: {count}
+                          </span>
+                        ))}
+                      </td>
+                    </tr>
+                  </tbody>
+                </table>
+              ) : seqLoading ? (
+                <div className="text-center py-2">
+                  <span className="spinner-border spinner-border-sm me-2" />
+                  Loading sequence index...
+                </div>
+              ) : (
+                <p className="text-muted mb-0 small">
+                  Sequence index not available.
+                </p>
+              )}
+            </div>
+          </div>
+        </div>
+      </div>
+
+      {/* Collections */}
+      <div className="card mb-3">
+        <div className="card-header d-flex justify-content-between align-items-center">
+          <h6 className="mb-0">
+            <i className="bi bi-collection me-2" />
+            Collections
+          </h6>
+        </div>
+        <div className="card-body">
+          {collections && collections.length > 0 ? (
+            <div className="table-responsive">
+              <table className="table table-sm table-hover mb-0">
+                <thead>
+                  <tr>
+                    <th>Digest</th>
+                    <th className="text-end">Sequences</th>
+                    <th></th>
+                  </tr>
+                </thead>
+                <tbody>
+                  {collections.map((col) => (
+                    <tr key={col.digest}>
+                      <td>
+                        <Link
+                          to={`/explore/store/collection/${col.digest}${storeUrlParam}`}
+                          className="font-monospace small"
+                        >
+                          {col.digest}
+                        </Link>
+                      </td>
+                      <td className="text-end">{col.n_sequences}</td>
+                      <td className="text-end">
+                        <RowCodeButton
+                          title="Collection commands"
+                          snippets={[
+                            {
+                              label: 'Pull to local cache',
+                              cli: `refget store pull \\
+  ${col.digest} \\
+  --remote ${storeUrl || urlParam}`,
+                              python: `import refget
+
+store = refget.RefgetStore("${storeUrl || urlParam}")
+store.pull("${col.digest}")`,
+                            },
+                            {
+                              label: 'Export as FASTA',
+                              cli: `refget store export \\
+  ${col.digest} \\
+  --remote ${storeUrl || urlParam}`,
+                              python: `import refget
+
+store = refget.RefgetStore("${storeUrl || urlParam}")
+store.export("${col.digest}")`,
+                            },
+                          ]}
+                        />
+                      </td>
+                    </tr>
+                  ))}
+                </tbody>
+              </table>
+            </div>
+          ) : (
+            <p className="text-muted mb-0">
+              No collection index (collections.rgci) found. Individual
+              collections can still be viewed if you know the digest.
+            </p>
+          )}
+        </div>
+      </div>
+
+      {/* Aliases section */}
+      <div className="card mb-3">
+        <div className="card-header d-flex justify-content-between align-items-center">
+          <h6 className="mb-0">
+            <i className="bi bi-tag me-2" />
+            Aliases
+          </h6>
+          <Link
+            to={`/explore/store/aliases${storeUrlParam}`}
+            className="btn btn-sm btn-outline-primary"
+          >
+            Browse aliases
+          </Link>
+        </div>
+        <div className="card-body">
+          {(metadata.sequence_alias_namespaces?.length > 0 || metadata.collection_alias_namespaces?.length > 0) ? (
+            <table className="table table-sm mb-0">
+              <tbody>
+                {metadata.sequence_alias_namespaces?.length > 0 && (
+                  <tr>
+                    <td className="text-muted">Sequence namespaces</td>
+                    <td>
+                      {metadata.sequence_alias_namespaces.map((ns) => (
+                        <Link
+                          key={ns}
+                          to={`/explore/store/aliases${storeUrlParam}`}
+                          className="badge bg-secondary me-1 text-decoration-none"
+                        >
+                          {ns}
+                        </Link>
+                      ))}
+                    </td>
+                  </tr>
+                )}
+                {metadata.collection_alias_namespaces?.length > 0 && (
+                  <tr>
+                    <td className="text-muted">Collection namespaces</td>
+                    <td>
+                      {metadata.collection_alias_namespaces.map((ns) => (
+                        <Link
+                          key={ns}
+                          to={`/explore/store/aliases${storeUrlParam}`}
+                          className="badge bg-secondary me-1 text-decoration-none"
+                        >
+                          {ns}
+                        </Link>
+                      ))}
+                    </td>
+                  </tr>
+                )}
+              </tbody>
+            </table>
+          ) : (
+            <p className="text-muted mb-0">
+              No alias namespace information available.
+            </p>
+          )}
+        </div>
+      </div>
+
+    </div>
+  );
+};
+
+export { StoreOverview };
diff --git a/frontend/src/pages/StoreSequences.jsx b/frontend/src/pages/StoreSequences.jsx
new file mode 100644
index 0000000..da0fca5
--- /dev/null
+++ b/frontend/src/pages/StoreSequences.jsx
@@ -0,0 +1,337 @@
+import { useState, useMemo, useEffect } from 'react';
+import { Link, useSearchParams } from 'react-router-dom';
+import { useExplorerStore } from '../stores/explorerStore.js';
+import { StoreNav } from '../components/StoreNav.jsx';
+import { CliCommand } from '../components/CliSnippet.jsx';
+
+const PAGE_SIZE = 50;
+
+const formatBytes = (bytes) => {
+  if (bytes < 1024) return `${bytes} B`;
+  if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(0)} KB`;
+  return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
+};
+
+const StoreSequences = () => {
+  const [searchParams] = useSearchParams();
+  const {
+    storeUrl, sequenceIndex, sequenceIndexPartial, sequenceIndexTotalSize,
+    metadata, loading, loadStore, loadSequenceIndex,
+  } = useExplorerStore();
+  const [filter, setFilter] = useState('');
+  const [sortCol, setSortCol] = useState(null);
+  const [sortAsc, setSortAsc] = useState(true);
+  const [page, setPage] = useState(0);
+  const [seqLoading, setSeqLoading] = useState(false);
+  const [seqError, setSeqError] = useState(null);
+  const [selectedSeq, setSelectedSeq] = useState(null);
+  const [seqCodeTab, setSeqCodeTab] = useState('cli');
+
+  const urlParam = searchParams.get('url');
+  const storeUrlParam = `?url=${encodeURIComponent(storeUrl || urlParam)}`;
+
+  // Auto-load on mount — fetchSequenceIndex handles the size check internally
+  useEffect(() => {
+    const init = async () => {
+      if (urlParam && !metadata && !loading) {
+        await loadStore(urlParam).catch(() => {});
+      }
+      if (!sequenceIndex && !seqLoading) {
+        setSeqLoading(true);
+        try {
+          await loadSequenceIndex();
+        } catch (err) {
+          setSeqError(err.message);
+        } finally {
+          setSeqLoading(false);
+        }
+      }
+    };
+    init();
+  }, [urlParam, metadata]); // eslint-disable-line react-hooks/exhaustive-deps
+
+  const handleLoadMore = async (maxBytes) => {
+    setSeqLoading(true);
+    setSeqError(null);
+    try {
+      await loadSequenceIndex(maxBytes ? { maxBytes } : {});
+    } catch (err) {
+      setSeqError(err.message);
+    } finally {
+      setSeqLoading(false);
+    }
+  };
+
+  const filtered = useMemo(() => {
+    if (!sequenceIndex) return [];
+    const term = filter.toLowerCase();
+    return sequenceIndex.filter(
+      (s) =>
+        !term ||
+        s.name?.toLowerCase().includes(term) ||
+        s.sha512t24u?.toLowerCase().includes(term) ||
+        s.md5?.toLowerCase().includes(term) ||
+        s.description?.toLowerCase().includes(term),
+    );
+  }, [sequenceIndex, filter]);
+
+  const sorted = useMemo(() => {
+    if (!sortCol) return filtered;
+    return [...filtered].sort((a, b) => {
+      const va = a[sortCol];
+      const vb = b[sortCol];
+      if (typeof va === 'number' && typeof vb === 'number') {
+        return sortAsc ? va - vb : vb - va;
+      }
+      return sortAsc
+        ? String(va).localeCompare(String(vb))
+        : String(vb).localeCompare(String(va));
+    });
+  }, [filtered, sortCol, sortAsc]);
+
+  const totalPages = Math.ceil(sorted.length / PAGE_SIZE);
+  const paged = sorted.slice(page * PAGE_SIZE, (page + 1) * PAGE_SIZE);
+
+  const handleSort = (col) => {
+    if (sortCol === col) {
+      setSortAsc(!sortAsc);
+    } else {
+      setSortCol(col);
+      setSortAsc(true);
+    }
+    setPage(0);
+  };
+
+  const SortIcon = ({ col }) => {
+    if (sortCol !== col) return null;
+    return <i className={`bi bi-caret-${sortAsc ? 'up' : 'down'}-fill ms-1`} />;
+  };
+
+  if (!metadata && !loading) {
+    return (
+      <div className="alert alert-warning">
+        No store loaded.{' '}
+        <Link to="/explore">Go back to enter a store URL.</Link>
+      </div>
+    );
+  }
+
+  if (loading || seqLoading) {
+    return (
+      <div className="text-center py-5">
+        <div className="spinner-border" />
+        <p className="mt-3 text-muted">
+          {seqLoading ? 'Loading sequence index...' : 'Loading store...'}
+        </p>
+      </div>
+    );
+  }
+
+  if (seqError) {
+    return (
+      <div>
+        <StoreNav active="sequences" storeUrlParam={storeUrlParam} />
+        <div className="alert alert-danger">{seqError}</div>
+      </div>
+    );
+  }
+
+  if (!sequenceIndex) {
+    return (
+      <div>
+        <StoreNav active="sequences" storeUrlParam={storeUrlParam} />
+        <div className="alert alert-info">
+          No sequence index (sequences.rgsi) found in this store.
+        </div>
+      </div>
+    );
+  }
+
+  const columns = [
+    { key: 'name', label: 'Name' },
+    { key: 'length', label: 'Length' },
+    { key: 'sha512t24u', label: 'SHA-512/24u' },
+  ];
+
+  return (
+    <div className="mb-5">
+      <StoreNav active="sequences" storeUrlParam={storeUrlParam} />
+
+      {/* Partial load banner */}
+      {sequenceIndexPartial && (
+        <div className="alert alert-warning d-flex justify-content-between align-items-center py-2">
+          <span>
+            <i className="bi bi-exclamation-triangle me-2" />
+            Sequence index is {formatBytes(sequenceIndexTotalSize)} — showing first{' '}
+            {sequenceIndex.length.toLocaleString()} sequences.
+            Sorting and filtering apply only to loaded data.
+          </span>
+          <button
+            className="btn btn-sm btn-warning ms-3"
+            onClick={() => handleLoadMore(sequenceIndexTotalSize)}
+            disabled={seqLoading}
+          >
+            Load all ({formatBytes(sequenceIndexTotalSize)})
+          </button>
+        </div>
+      )}
+
+      <div className="d-flex justify-content-between align-items-center mb-3">
+        <span className="text-muted">
+          {filtered.length.toLocaleString()} sequences
+          {filter && ` (filtered from ${sequenceIndex.length.toLocaleString()})`}
+          {sequenceIndexPartial && ' (partial)'}
+        </span>
+        <input
+          type="search"
+          className="form-control form-control-sm"
+          style={{ maxWidth: '300px' }}
+          placeholder="Filter by name, digest, description..."
+          value={filter}
+          onChange={(e) => {
+            setFilter(e.target.value);
+            setPage(0);
+          }}
+        />
+      </div>
+
+      <div className="table-responsive">
+        <table className="table table-sm table-hover">
+          <thead>
+            <tr>
+              {columns.map((col) => (
+                <th
+                  key={col.key}
+                  onClick={() => handleSort(col.key)}
+                  style={{ cursor: 'pointer' }}
+                  className={col.key === 'length' ? 'text-end' : ''}
+                >
+                  {col.label}
+                  <SortIcon col={col.key} />
+                </th>
+              ))}
+            </tr>
+          </thead>
+          <tbody>
+            {paged.map((seq, i) => (
+              <tr
+                key={`${seq.sha512t24u}-${i}`}
+                style={{ cursor: 'pointer' }}
+                onClick={() => setSelectedSeq(seq)}
+              >
+                <td>{seq.name}</td>
+                <td className="text-end font-monospace">
+                  {seq.length.toLocaleString()}
+                </td>
+                <td className="font-monospace small">{seq.sha512t24u}</td>
+              </tr>
+            ))}
+          </tbody>
+        </table>
+      </div>
+
+      {/* Sequence detail modal */}
+      {selectedSeq && (
+        <>
+          <div className="modal-backdrop fade show" onClick={() => setSelectedSeq(null)} />
+          <div className="modal fade show d-block" tabIndex="-1" onClick={() => setSelectedSeq(null)}>
+            <div className="modal-dialog modal-lg" onClick={(e) => e.stopPropagation()}>
+              <div className="modal-content">
+                <div className="modal-header">
+                  <h5 className="modal-title">{selectedSeq.name}</h5>
+                  <button type="button" className="btn-close" onClick={() => setSelectedSeq(null)} />
+                </div>
+                <div className="modal-body">
+                  <table className="table table-sm mb-4">
+                    <tbody>
+                      <tr>
+                        <td className="text-muted">Length</td>
+                        <td className="font-monospace">{selectedSeq.length.toLocaleString()}</td>
+                      </tr>
+                      <tr>
+                        <td className="text-muted">Alphabet</td>
+                        <td><span className="badge bg-secondary">{selectedSeq.alphabet}</span></td>
+                      </tr>
+                      <tr>
+                        <td className="text-muted">SHA-512/24u</td>
+                        <td className="font-monospace small">{selectedSeq.sha512t24u}</td>
+                      </tr>
+                      <tr>
+                        <td className="text-muted">MD5</td>
+                        <td className="font-monospace small">{selectedSeq.md5}</td>
+                      </tr>
+                      {selectedSeq.description && (
+                        <tr>
+                          <td className="text-muted">Description</td>
+                          <td className="small">{selectedSeq.description}</td>
+                        </tr>
+                      )}
+                    </tbody>
+                  </table>
+
+                  <h6 className="text-muted mb-2">Code</h6>
+                  <ul className="nav nav-pills nav-pills-sm mb-3">
+                    <li className="nav-item">
+                      <button
+                        className={`nav-link py-1 px-2 ${seqCodeTab === 'cli' ? 'active' : ''}`}
+                        onClick={() => setSeqCodeTab('cli')}
+                      >
+                        <i className="bi bi-terminal me-1" />
+                        CLI
+                      </button>
+                    </li>
+                    <li className="nav-item">
+                      <button
+                        className={`nav-link py-1 px-2 ${seqCodeTab === 'python' ? 'active' : ''}`}
+                        onClick={() => setSeqCodeTab('python')}
+                      >
+                        <i className="bi bi-filetype-py me-1" />
+                        Python
+                      </button>
+                    </li>
+                  </ul>
+                  <small className="text-muted d-block mb-1">Get sequence</small>
+                  <CliCommand command={seqCodeTab === 'cli'
+                    ? `refget store get --sequence \\
+  ${selectedSeq.sha512t24u} \\
+  --remote ${storeUrl || urlParam}`
+                    : `import refget
+
+store = refget.RefgetStore("${storeUrl || urlParam}")
+store.get("${selectedSeq.sha512t24u}", sequence=True)`
+                  } />
+                </div>
+              </div>
+            </div>
+          </div>
+        </>
+      )}
+
+      {totalPages > 1 && (
+        <nav>
+          <ul className="pagination pagination-sm justify-content-center">
+            <li className={`page-item ${page === 0 ? 'disabled' : ''}`}>
+              <button className="page-link" onClick={() => setPage(page - 1)}>
+                Previous
+              </button>
+            </li>
+            <li className="page-item disabled">
+              <span className="page-link">
+                Page {page + 1} of {totalPages}
+              </span>
+            </li>
+            <li
+              className={`page-item ${page >= totalPages - 1 ? 'disabled' : ''}`}
+            >
+              <button className="page-link" onClick={() => setPage(page + 1)}>
+                Next
+              </button>
+            </li>
+          </ul>
+        </nav>
+      )}
+    </div>
+  );
+};
+
+export { StoreSequences };
diff --git a/frontend/src/services/fetchData.jsx b/frontend/src/services/fetchData.jsx
index ac6fbab..f911b93 100644
--- a/frontend/src/services/fetchData.jsx
+++ b/frontend/src/services/fetchData.jsx
@@ -56,19 +56,27 @@ export const fetchPangenomeLevels = async (digest) => {
 };
 
 export const fetchSeqColList = async () => {
-  const urls = [
-    `${API_BASE}/list/collection?page_size=10&page=0`,
-    `${API_BASE}/list/pangenome?page_size=5`,
-    `${API_BASE}/list/attributes/name_length_pairs?page_size=5`,
-  ];
+  const fetchRequired = async (url) => {
+    const response = await fetch(url);
+    await checkResponse(response, url);
+    return response.json();
+  };
 
-  return Promise.all(
-    urls.map(async (url) => {
+  const fetchOptional = async (url) => {
+    try {
       const response = await fetch(url);
-      await checkResponse(response, url);
+      if (!response.ok) return null;
       return response.json();
-    }),
-  );
+    } catch {
+      return null;
+    }
+  };
+
+  return Promise.all([
+    fetchRequired(`${API_BASE}/list/collection?page_size=10&page=0`),
+    fetchOptional(`${API_BASE}/list/pangenome?page_size=5`),
+    fetchRequired(`${API_BASE}/list/attributes/name_length_pairs?page_size=5`),
+  ]);
 };
 
 export const fetchAllSeqCols = async () => {
diff --git a/frontend/src/services/storeService.js b/frontend/src/services/storeService.js
new file mode 100644
index 0000000..4ac46b2
--- /dev/null
+++ b/frontend/src/services/storeService.js
@@ -0,0 +1,217 @@
+/**
+ * Service for fetching and parsing RefgetStore static files.
+ * A RefgetStore is a directory of static TSV/JSON files — no backend needed.
+ */
+
+// Ensure URL ends without trailing slash
+const normalizeUrl = (url) => url.replace(/\/+$/, '');
+
+/**
+ * Parse TSV text into array of objects.
+ * Handles # comment header lines and ## metadata headers.
+ * Returns { metadata: {key: value}, rows: [{col: val}] }
+ */
+const parseTsv = (text) => {
+  const lines = text.split('\n').filter((l) => l.length > 0);
+  const metadata = {};
+  let headerCols = null;
+  const rows = [];
+
+  for (const line of lines) {
+    if (line.startsWith('##')) {
+      // Metadata header: ##key=value
+      const eq = line.indexOf('=');
+      if (eq > 2) {
+        metadata[line.substring(2, eq)] = line.substring(eq + 1);
+      }
+    } else if (line.startsWith('#')) {
+      // Column header
+      headerCols = line.substring(1).split('\t');
+    } else if (headerCols) {
+      const fields = line.split('\t');
+      const row = {};
+      headerCols.forEach((col, i) => {
+        row[col] = fields[i] ?? '';
+      });
+      rows.push(row);
+    }
+  }
+
+  return { metadata, rows };
+};
+
+/**
+ * Parse a two-column TSV (alias files have no header).
+ * Returns [{alias, digest}]
+ */
+const parseAliasTsv = (text) => {
+  return text
+    .split('\n')
+    .filter((l) => l.length > 0 && !l.startsWith('#'))
+    .map((line) => {
+      const [alias, digest] = line.split('\t');
+      return { alias, digest };
+    });
+};
+
+/**
+ * Parse collections.rgci — a TSV with #header row.
+ * Columns: digest, n_sequences, names_digest, sequences_digest, lengths_digest,
+ *          name_length_pairs_digest, sorted_name_length_pairs_digest, sorted_sequences_digest
+ */
+const parseRgci = (text) => {
+  const { rows } = parseTsv(text);
+  return rows.map((r) => ({
+    ...r,
+    n_sequences: r.n_sequences ? parseInt(r.n_sequences, 10) : 0,
+  }));
+};
+
+/** Fetch with error handling */
+const fetchFile = async (url) => {
+  const response = await fetch(url);
+  if (!response.ok) {
+    if (response.status === 404 || response.status === 403) return null;
+    throw new Error(`HTTP ${response.status} fetching ${url}`);
+  }
+  return response;
+};
+
+/** GET rgstore.json → parsed JSON */
+export const fetchStoreMetadata = async (baseUrl) => {
+  const url = `${normalizeUrl(baseUrl)}/rgstore.json`;
+  const response = await fetchFile(url);
+  if (!response) throw new Error('rgstore.json not found at this URL');
+  return response.json();
+};
+
+/** Size threshold for auto-loading sequence index (10 MB) */
+const AUTO_LOAD_THRESHOLD = 10 * 1024 * 1024;
+/** Default partial load size (2 MB) */
+const PARTIAL_LOAD_SIZE = 2 * 1024 * 1024;
+
+const parseSequenceRows = (text) => {
+  const { rows } = parseTsv(text);
+  return rows.map((r) => ({
+    ...r,
+    length: r.length ? parseInt(r.length, 10) : 0,
+  }));
+};
+
+/**
+ * Check the size of sequences.rgsi via HEAD request.
+ * Returns { url, size } or null if not found.
+ */
+export const checkSequenceIndexSize = async (baseUrl) => {
+  const url = `${normalizeUrl(baseUrl)}/sequences.rgsi`;
+  try {
+    const response = await fetch(url, { method: 'HEAD' });
+    if (!response.ok) return null;
+    const size = parseInt(response.headers.get('content-length') || '0', 10);
+    return { url, size };
+  } catch {
+    return null;
+  }
+};
+
+/**
+ * Fetch sequences.rgsi — auto-loads if small, otherwise requires explicit call.
+ * Returns { rows, partial, totalSize }
+ *   partial: true if only a prefix was loaded
+ *   totalSize: file size in bytes
+ */
+export const fetchSequenceIndex = async (baseUrl, { maxBytes } = {}) => {
+  const url = `${normalizeUrl(baseUrl)}/sequences.rgsi`;
+
+  // Check file size first
+  let totalSize = 0;
+  try {
+    const head = await fetch(url, { method: 'HEAD' });
+    if (!head.ok) {
+      if (head.status === 404 || head.status === 403) throw new Error('sequences.rgsi not found');
+      throw new Error(`HTTP ${head.status} fetching ${url}`);
+    }
+    totalSize = parseInt(head.headers.get('content-length') || '0', 10);
+  } catch (err) {
+    if (err.message.includes('not found')) throw err;
+    // HEAD failed (CORS?), fall back to full fetch
+    const response = await fetchFile(url);
+    if (!response) throw new Error('sequences.rgsi not found');
+    const text = await response.text();
+    return { rows: parseSequenceRows(text), partial: false, totalSize: text.length };
+  }
+
+  const limit = maxBytes || (totalSize <= AUTO_LOAD_THRESHOLD ? totalSize : PARTIAL_LOAD_SIZE);
+  const loadFull = limit >= totalSize;
+
+  if (loadFull) {
+    const response = await fetchFile(url);
+    if (!response) throw new Error('sequences.rgsi not found');
+    const text = await response.text();
+    return { rows: parseSequenceRows(text), partial: false, totalSize };
+  }
+
+  // Partial load via Range header
+  const response = await fetch(url, {
+    headers: { Range: `bytes=0-${limit - 1}` },
+  });
+  if (!response.ok && response.status !== 206) {
+    // Server doesn't support Range — fall back to full fetch
+    const fullResponse = await fetchFile(url);
+    if (!fullResponse) throw new Error('sequences.rgsi not found');
+    const text = await fullResponse.text();
+    return { rows: parseSequenceRows(text), partial: false, totalSize };
+  }
+  const text = await response.text();
+  // Discard last partial line
+  const lastNewline = text.lastIndexOf('\n');
+  const cleanText = lastNewline > 0 ? text.substring(0, lastNewline) : text;
+  return { rows: parseSequenceRows(cleanText), partial: true, totalSize };
+};
+
+/** GET collections.rgci → array of collection summaries */
+export const fetchCollectionIndex = async (baseUrl) => {
+  const url = `${normalizeUrl(baseUrl)}/collections.rgci`;
+  const response = await fetchFile(url);
+  if (!response) return null; // No collection index available
+  const text = await response.text();
+  return parseRgci(text);
+};
+
+/** GET collections/{digest}.rgsi → {metadata, sequences} */
+export const fetchCollection = async (baseUrl, digest) => {
+  const base = normalizeUrl(baseUrl);
+  // Try .rgsi first (format spec default), then .rgci
+  let response = await fetchFile(`${base}/collections/${digest}.rgsi`);
+  if (!response) {
+    response = await fetchFile(`${base}/collections/${digest}.rgci`);
+  }
+  if (!response)
+    throw new Error(`Collection ${digest} not found`);
+  const text = await response.text();
+  const { metadata, rows } = parseTsv(text);
+  return {
+    metadata,
+    sequences: rows.map((r) => ({
+      ...r,
+      length: r.length ? parseInt(r.length, 10) : 0,
+    })),
+  };
+};
+
+/** GET aliases/{type}/{namespace}.tsv → [{alias, digest}] */
+export const fetchAliases = async (baseUrl, type, namespace) => {
+  const url = `${normalizeUrl(baseUrl)}/aliases/${type}/${namespace}.tsv`;
+  const response = await fetchFile(url);
+  if (!response) return null;
+  const text = await response.text();
+  return parseAliasTsv(text);
+};
+
+/** GET collections/{digest}.fhr.json → parsed JSON or null */
+export const fetchFhrMetadata = async (baseUrl, digest) => {
+  const url = `${normalizeUrl(baseUrl)}/collections/${digest}.fhr.json`;
+  const response = await fetchFile(url);
+  if (!response) return null;
+  return response.json();
+};
diff --git a/frontend/src/stores/explorerStore.js b/frontend/src/stores/explorerStore.js
new file mode 100644
index 0000000..9edd6c1
--- /dev/null
+++ b/frontend/src/stores/explorerStore.js
@@ -0,0 +1,113 @@
+import { create } from 'zustand';
+import {
+  fetchStoreMetadata,
+  fetchSequenceIndex,
+  fetchCollectionIndex,
+  fetchCollection,
+  fetchAliases,
+  fetchFhrMetadata,
+} from '../services/storeService.js';
+
+export const useExplorerStore = create((set, get) => ({
+  storeUrl: null,
+  metadata: null,
+  sequenceIndex: null,       // array of sequence rows (or null if not loaded)
+  sequenceIndexPartial: false, // true if only a prefix was loaded
+  sequenceIndexTotalSize: 0,   // total file size in bytes
+  collections: null,
+  loadedCollections: {},
+  aliases: {},
+  fhrMetadata: {},
+  loading: false,
+  error: null,
+
+  setStoreUrl: (url) => set({ storeUrl: url }),
+
+  /** Fetch store metadata + collection index (sequence index is lazy-loaded) */
+  loadStore: async (url) => {
+    set({ loading: true, error: null, storeUrl: url });
+    try {
+      const metadata = await fetchStoreMetadata(url);
+      set({ metadata });
+
+      const collections = await fetchCollectionIndex(url).catch(() => null);
+
+      set({
+        sequenceIndex: null,
+        sequenceIndexPartial: false,
+        sequenceIndexTotalSize: 0,
+        collections,
+        loading: false,
+        loadedCollections: {},
+        aliases: {},
+        fhrMetadata: {},
+      });
+    } catch (err) {
+      set({ loading: false, error: err.message });
+      throw err;
+    }
+  },
+
+  /** Fetch and cache the sequence index (lazy — only when needed).
+   *  Options: { maxBytes } to limit partial load size. */
+  loadSequenceIndex: async (options) => {
+    const { storeUrl, sequenceIndex } = get();
+    // If already fully loaded, return cached
+    if (sequenceIndex && !get().sequenceIndexPartial) return sequenceIndex;
+
+    const { rows, partial, totalSize } = await fetchSequenceIndex(storeUrl, options);
+    set({
+      sequenceIndex: rows,
+      sequenceIndexPartial: partial,
+      sequenceIndexTotalSize: totalSize,
+    });
+    return rows;
+  },
+
+  /** Fetch and cache a single collection */
+  loadCollection: async (digest) => {
+    const { storeUrl, loadedCollections } = get();
+    if (loadedCollections[digest]) return loadedCollections[digest];
+
+    const data = await fetchCollection(storeUrl, digest);
+    set({ loadedCollections: { ...get().loadedCollections, [digest]: data } });
+    return data;
+  },
+
+  /** Fetch and cache aliases for a type/namespace */
+  loadAliases: async (type, namespace) => {
+    const { storeUrl, aliases } = get();
+    const key = `${type}/${namespace}`;
+    if (aliases[key]) return aliases[key];
+
+    const data = await fetchAliases(storeUrl, type, namespace);
+    set({ aliases: { ...get().aliases, [key]: data } });
+    return data;
+  },
+
+  /** Fetch and cache FHR metadata for a collection */
+  loadFhrMetadata: async (digest) => {
+    const { storeUrl, fhrMetadata } = get();
+    if (fhrMetadata[digest] !== undefined) return fhrMetadata[digest];
+
+    const data = await fetchFhrMetadata(storeUrl, digest);
+    set({ fhrMetadata: { ...get().fhrMetadata, [digest]: data } });
+    return data;
+  },
+
+  /** Reset store state */
+  reset: () =>
+    set({
+      storeUrl: null,
+      metadata: null,
+      sequenceIndex: null,
+      sequenceIndexPartial: false,
+      sequenceIndexTotalSize: 0,
+      collections: null,
+      loadedCollections: {},
+      aliases: {},
+      fhrMetadata: {},
+      loading: false,
+      error: null,
+    }),
+}));

From 6e8f97879fc012112b039fbb9dd267cc77d07caf Mon Sep 17 00:00:00 2001
From: nsheff <nsheff@users.noreply.github.com>
Date: Fri, 13 Mar 2026 07:29:28 -0400
Subject: [PATCH 19/31] Modernize build system, expand store CLI, and add
 store-backed backend

- Migrate from setup.py to pyproject.toml with hatchling
- Replace Black with Ruff, update GH Actions, use trusted publishing
- Remove legacy requirements/ files and codecov workflow
- Add SeqColBackend protocol and RefgetStoreBackend (database-free API serving)
- Expand store CLI subcommands
- Update seqcolapi configuration and compliance checks
- Add tests for backend, store crate, and updated CLI commands
---
 .github/workflows/black.yml                   |  10 +-
 .github/workflows/python-publish.yml          |   9 +-
 .github/workflows/run-codecov.yml             |  21 -
 .github/workflows/run-pytest.yml              |  11 +-
 MANIFEST.in                                   |   3 -
 examples/remote_store.py                      |   2 +-
 pyproject.toml                                |  71 +++-
 refget/__init__.py                            |  18 +-
 refget/agents.py                              | 101 +++--
 refget/backend.py                             | 144 +++++++
 refget/cli/admin.py                           |   6 +-
 refget/cli/config.py                          |  20 +-
 refget/cli/config_manager.py                  |   2 +-
 refget/cli/fasta.py                           |   9 +-
 refget/cli/main.py                            |   8 +-
 refget/cli/seqcol.py                          |   2 +-
 refget/cli/store.py                           | 364 ++++++++++++++++--
 refget/clients.py                             |  11 +-
 refget/compliance.py                          | 106 +++--
 refget/const.py                               |   2 +-
 refget/digests.py                             |   5 +-
 refget/examples.py                            |   2 +-
 refget/models.py                              |  28 +-
 refget/router.py                              | 106 ++---
 refget/store.py                               |   4 +-
 refget/utils.py                               |  10 +-
 requirements/requirements-all.txt             |   7 -
 requirements/requirements-dev.txt             |   1 -
 requirements/requirements-docs.txt            |   2 -
 requirements/requirements-seqcolapi.txt       |   6 -
 requirements/requirements-test.txt            |   3 -
 seqcolapi/__main__.py                         |   1 +
 seqcolapi/const.py                            |   5 +-
 seqcolapi/examples.py                         |   2 +-
 seqcolapi/main.py                             |  65 +++-
 setup.py                                      |  56 ---
 tests/api/conftest.py                         |   3 +-
 tests/api/test_compliance.py                  |  25 +-
 tests/conftest.py                             |  17 +-
 tests/integration/conftest.py                 |   9 +-
 .../integration/test_cli_admin_integration.py |   2 -
 .../test_cli_seqcol_integration.py            |   1 -
 tests/integration/test_run_compliance.py      |   1 +
 tests/local/test_aliases.py                   |   4 +-
 tests/local/test_backend.py                   | 216 +++++++++++
 tests/local/test_digest_functions.py          |  13 +-
 tests/local/test_local_models.py              |   5 +-
 tests/local/test_local_models_gtars.py        |  22 +-
 tests/local/test_refget_clients.py            |   2 +-
 tests/local/test_remove_collection.py         |  11 +-
 tests/local/test_store_seqcol_features.py     |   3 +-
 tests/test_cli/test_admin_commands.py         |   3 -
 tests/test_cli/test_config_commands.py        |   7 +-
 tests/test_cli/test_fasta_commands.py         |  26 +-
 tests/test_cli/test_help.py                   |   2 -
 tests/test_cli/test_seqcol_commands.py        |  26 +-
 tests/test_cli/test_store_commands.py         | 111 ++++--
 tests/test_cli/test_store_crate.py            | 305 +++++++++++++++
 tests/test_cli/test_store_pull.py             | 105 +++--
 tests/test_cli_integration/test_workflows.py  |  24 +-
 60 files changed, 1667 insertions(+), 499 deletions(-)
 delete mode 100644 .github/workflows/run-codecov.yml
 delete mode 100644 MANIFEST.in
 create mode 100644 refget/backend.py
 delete mode 100644 requirements/requirements-all.txt
 delete mode 100644 requirements/requirements-dev.txt
 delete mode 100644 requirements/requirements-docs.txt
 delete mode 100644 requirements/requirements-seqcolapi.txt
 delete mode 100644 requirements/requirements-test.txt
 delete mode 100644 setup.py
 create mode 100644 tests/local/test_backend.py
 create mode 100644 tests/test_cli/test_store_crate.py

diff --git a/.github/workflows/black.yml b/.github/workflows/black.yml
index 8b48ddf..f329b68 100644
--- a/.github/workflows/black.yml
+++ b/.github/workflows/black.yml
@@ -6,6 +6,10 @@ jobs:
   lint:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v2
-      - uses: actions/setup-python@v2
-      - uses: psf/black@stable
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+      - run: pip install ruff
+      - run: ruff check .
+      - run: ruff format --check .
diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
index e54ad87..b8cb119 100644
--- a/.github/workflows/python-publish.yml
+++ b/.github/workflows/python-publish.yml
@@ -1,4 +1,4 @@
-# This workflows will upload a Python Package using Twine when a release is created
+# This workflow uploads a Python Package using trusted publishing when a release is created
 # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
 
 name: Upload Python Package
@@ -23,10 +23,9 @@ jobs:
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
-        pip install setuptools wheel twine
-    - name: Build and publish
+        pip install build
+    - name: Build package
       run: |
-        python setup.py sdist bdist_wheel
+        python -m build
     - name: Publish package distributions to PyPI
       uses: pypa/gh-action-pypi-publish@release/v1
-
diff --git a/.github/workflows/run-codecov.yml b/.github/workflows/run-codecov.yml
deleted file mode 100644
index de9e8f6..0000000
--- a/.github/workflows/run-codecov.yml
+++ /dev/null
@@ -1,21 +0,0 @@
-name: Run codecov
-
-on:
-  pull_request:
-    branches: [master]
-
-jobs:
-  pytest:
-    runs-on: ${{ matrix.os }}
-    strategy:
-      matrix:
-        python-version: ["3.13"]
-        os: [ubuntu-latest]
-
-    steps:
-    - uses: actions/checkout@v2
-    - name: Upload coverage to Codecov
-      uses: codecov/codecov-action@v2
-      with:
-        file: ./coverage.xml
-        name: py-${{ matrix.python-version }}-${{ matrix.os }}
diff --git a/.github/workflows/run-pytest.yml b/.github/workflows/run-pytest.yml
index 446f4dc..637e616 100644
--- a/.github/workflows/run-pytest.yml
+++ b/.github/workflows/run-pytest.yml
@@ -9,7 +9,7 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        python-version: ["3.10", "3.13"]
+        python-version: ["3.10", "3.14"]
         os: [ubuntu-latest]
 
     steps:
@@ -20,13 +20,10 @@ jobs:
       with:
         python-version: ${{ matrix.python-version }}
 
-    - name: Install test dependencies
-      run: if [ -f requirements/requirements-test.txt ]; then pip install -r requirements/requirements-test.txt; fi
-
-    - name: Install package
+    - name: Install package with test extras
       env:
         PYO3_USE_ABI3_FORWARD_COMPATIBILITY: 1
-      run: python -m pip install .
+      run: python -m pip install ".[test]"
 
     - name: Run pytest tests
-      run: pytest -x -vv --cov=./ --cov-report=xml
\ No newline at end of file
+      run: pytest -x -vv --cov=./ --cov-report=xml
diff --git a/MANIFEST.in b/MANIFEST.in
deleted file mode 100644
index 9c9f250..0000000
--- a/MANIFEST.in
+++ /dev/null
@@ -1,3 +0,0 @@
-include requirements/*
-include README.md
-include refget/schemas/*
\ No newline at end of file
diff --git a/examples/remote_store.py b/examples/remote_store.py
index aa7d904..09c8f79 100644
--- a/examples/remote_store.py
+++ b/examples/remote_store.py
@@ -47,7 +47,7 @@
 # %%
 records = store.list_sequences()
 for i, m in enumerate(records[:5]):
-    print(f"{i+1}. {m.name[:60]}...")
+    print(f"{i + 1}. {m.name[:60]}...")
     print(f"   sha512t24u: {m.sha512t24u}, length: {m.length:,} bp")
 
 # %% [markdown]
diff --git a/pyproject.toml b/pyproject.toml
index 458f68e..47a1411 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,7 +1,72 @@
-[tool.black]
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project]
+name = "refget"
+dynamic = ["version"]
+description = "GA4GH refget - reference sequence and sequence collection tools"
+readme = "README.md"
+license = "BSD-2-Clause"
+requires-python = ">=3.10"
+authors = [
+    { name = "Nathan Sheffield", email = "nathan@code.databio.org" },
+    { name = "Michal Stolarczyk" },
+]
+keywords = ["genome", "assembly", "bioinformatics", "reference", "sequence"]
+classifiers = [
+    "Development Status :: 4 - Beta",
+    "License :: OSI Approved :: BSD License",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
+    "Programming Language :: Python :: 3.14",
+]
+dependencies = [
+    "gtars>=0.7.0",
+    "jsonschema",
+    "pyyaml",
+    "requests",
+    "sqlmodel",
+    "tomli_w",
+    "typer>=0.9.0",
+]
+
+[project.scripts]
+refget = "refget.cli:main"
+
+[project.optional-dependencies]
+test = ["pytest", "pytest-cov>=6.0.0"]
+seqcolapi = [
+    "fastapi",
+    "psycopg2-binary",
+    "sqlmodel",
+    "uvicorn>=0.30.0",
+    "ubiquerg>=0.6.1",
+]
+
+[project.urls]
+Homepage = "https://github.com/refgenie/refget"
+
+[tool.hatch.version]
+path = "refget/_version.py"
+
+[tool.ruff]
 line-length = 99
-target-version = ['py38', 'py311']
-include = '\.pyi?$'
+exclude = [
+    "array_overlap.py",
+    "create_compliance_answers.py",
+    "data_loaders",
+    "interactive_tests.py",
+]
+
+[tool.ruff.lint]
+select = ["E", "F", "I"]
+ignore = ["F403", "F405", "E501"]
+
+[tool.ruff.lint.isort]
+known-first-party = ["refget"]
 
 [tool.pytest.ini_options]
 testpaths = ["tests/local"]
diff --git a/refget/__init__.py b/refget/__init__.py
index e739c06..196ab19 100644
--- a/refget/__init__.py
+++ b/refget/__init__.py
@@ -11,12 +11,20 @@
 """
 
 from ._version import __version__
-from .exceptions import InvalidSeqColError
+from .backend import RefgetStoreBackend, SeqColBackend
+from .clients import SequenceCollectionClient
+from .compliance import run_compliance
 from .const import GTARS_INSTALLED
+from .exceptions import InvalidSeqColError
+from .store import (
+    RefgetStore,
+    SequenceCollection,
+    StorageMode,
+    compute_fai,
+    digest_fasta,
+    digest_sequence,
+)
 from .utils import canonical_str
-from .store import RefgetStore, StorageMode, digest_fasta, compute_fai, digest_sequence, SequenceCollection
-from .compliance import run_compliance
-from .clients import SequenceCollectionClient
 
 __all__ = [
     "__version__",
@@ -30,5 +38,7 @@
     "digest_sequence",
     "SequenceCollection",
     "run_compliance",
+    "SeqColBackend",
+    "RefgetStoreBackend",
     "SequenceCollectionClient",
 ]
diff --git a/refget/agents.py b/refget/agents.py
index e98c837..e40ad1d 100644
--- a/refget/agents.py
+++ b/refget/agents.py
@@ -2,42 +2,43 @@
 
 import json
 import os
-import requests
-
 from typing import TYPE_CHECKING
-from sqlmodel import create_engine, select, Session, delete, func, SQLModel
+
+import requests
+from sqlmodel import Session, SQLModel, create_engine, delete, func, select
 
 if TYPE_CHECKING:
     import peppy
-from sqlalchemy.orm import selectinload
+from typing import List, Optional
+
 from sqlalchemy import URL
 from sqlalchemy.engine import Engine as SqlalchemyDatabaseEngine
-from typing import Optional, List
+from sqlalchemy.orm import selectinload
 
+from .const import _LOGGER, DEFAULT_INHERENT_ATTRS, SEQCOL_SCHEMA_PATH
 from .models import (
-    Sequence,
-    SequenceCollection,
-    Pangenome,
-    NamesAttr,
-    LengthsAttr,
-    SequencesAttr,
-    SortedSequencesAttr,
-    NameLengthPairsAttr,
+    AccessMethod,
+    AccessURL,
     CollectionNamesAttr,
+    FastaDrsObject,
     HumanReadableNames,
+    LengthsAttr,
+    NameLengthPairsAttr,
+    NamesAttr,
     PaginationResult,
+    Pangenome,
     ResultsSequenceCollections,
-    FastaDrsObject,
-    AccessMethod,
-    AccessURL,
+    Sequence,
+    SequenceCollection,
+    SequencesAttr,
+    SortedSequencesAttr,
 )
 from .utils import (
-    compare_seqcols,
     build_pangenome_model,
     calc_jaccard_similarities,
+    compare_seqcols,
     fasta_to_seqcol_dict,
 )
-from .const import _LOGGER, DEFAULT_INHERENT_ATTRS, SEQCOL_SCHEMA_PATH
 
 ATTR_TYPE_MAP = {
     "sequences": SequencesAttr,
@@ -304,7 +305,6 @@ def add(self, seqcol: SequenceCollection, update: bool = False) -> SequenceColle
 
                     for name_model in seqcol.human_readable_names:
                         if name_model.human_readable_name not in existing_names:
-
                             new_name = HumanReadableNames(
                                 human_readable_name=name_model.human_readable_name,
                                 digest=existing.digest,
@@ -659,7 +659,6 @@ def list(self, attribute_type: str, offset: int = 0, limit: int = 50) -> dict:
             }
 
     def search(self, attribute_type: str, digest: str, offset: int = 0, limit: int = 50) -> dict:
-        Attribute = ATTR_TYPE_MAP[attribute_type]
         with Session(self.engine) as session:
             list_stmt = (
                 select(SequenceCollection)
@@ -825,11 +824,53 @@ def __init__(
         self.__attribute = AttributeAgent(self.engine)
         self.__fasta_drs = FastaDrsAgent(self.engine, fasta_drs_url_prefix)
 
+    # =========================================================================
+    # SeqColBackend protocol methods
+    # =========================================================================
+
+    def get_collection(self, digest: str, level: int = 2) -> dict:
+        format_map = {1: "level1", 2: "level2"}
+        return self.seqcol.get(digest, return_format=format_map.get(level, "level2"))
+
+    def get_collection_attribute(self, digest: str, attribute: str) -> list:
+        return self.seqcol.get(digest, attribute=attribute)
+
+    def get_collection_itemwise(self, digest: str, limit: int | None = None) -> list[dict]:
+        return self.seqcol.get(digest, return_format="itemwise", itemwise_limit=limit)
+
+    def get_attribute(self, attribute_name: str, attribute_digest: str) -> list:
+        return self.attribute.get(attribute_name, attribute_digest)
+
     def compare_digests(self, digestA: str, digestB: str) -> dict:
         A = self.seqcol.get(digestA, return_format="level2")
         B = self.seqcol.get(digestB, return_format="level2")
         return compare_seqcols(A, B)
 
+    def compare_digest_with_level2(self, digest: str, level2_b: dict) -> dict:
+        return self.compare_1_digest(digest, level2_b)
+
+    def list_collections(
+        self, page: int = 0, page_size: int = 100, filters: dict | None = None
+    ) -> dict:
+        if filters:
+            return self.seqcol.search_by_attributes(
+                filters, limit=page_size, offset=page * page_size
+            )
+        return self.seqcol.list_by_offset(limit=page_size, offset=page * page_size)
+
+    def collection_count(self) -> int:
+        result = self.seqcol.list_by_offset(limit=1, offset=0)
+        return result["pagination"]["total"]
+
+    def capabilities(self) -> dict:
+        return {
+            "backend_type": "database",
+            "n_collections": self.collection_count(),
+            "has_sequence_data": True,  # database always has sequences
+            "collection_alias_namespaces": [],
+            "sequence_alias_namespaces": [],
+        }
+
     def calc_similarities(self, digestA: str, digestB: str) -> dict:
         """
         Calculates the Jaccard similarity between two sequence collections.
@@ -910,20 +951,12 @@ def truncate(self) -> int:
         with Session(self.engine) as session:
             statement = delete(SequenceCollection)
             result1 = session.exec(statement)
-            statement = delete(Pangenome)
-            result = session.exec(statement)
-            statement = delete(NamesAttr)
-            result = session.exec(statement)
-            statement = delete(LengthsAttr)
-            result = session.exec(statement)
-            statement = delete(SequencesAttr)
-            result = session.exec(statement)
-            # statement = delete(SortedNameLengthPairsAttr)
-            # result = session.exec(statement)
-            statement = delete(NameLengthPairsAttr)
-            result = session.exec(statement)
-            statement = delete(SortedSequencesAttr)
-            result = session.exec(statement)
+            session.exec(delete(Pangenome))
+            session.exec(delete(NamesAttr))
+            session.exec(delete(LengthsAttr))
+            session.exec(delete(SequencesAttr))
+            session.exec(delete(NameLengthPairsAttr))
+            session.exec(delete(SortedSequencesAttr))
 
             session.commit()
             return result1.rowcount
diff --git a/refget/backend.py b/refget/backend.py
new file mode 100644
index 0000000..9408230
--- /dev/null
+++ b/refget/backend.py
@@ -0,0 +1,144 @@
+"""
+SeqColBackend protocol and RefgetStoreBackend implementation.
+
+The SeqColBackend protocol defines the interface for serving seqcol API endpoints.
+Two implementations:
+- RefgetDBAgent (PostgreSQL) — full features including similarities, pangenomes, DRS
+- RefgetStoreBackend (RefgetStore) — core seqcol operations, no database required
+"""
+
+from __future__ import annotations
+
+from typing import Protocol, runtime_checkable
+
+from .utils import compare_seqcols
+
+
+@runtime_checkable
+class SeqColBackend(Protocol):
+    """Backend protocol for serving seqcol API endpoints."""
+
+    def get_collection(self, digest: str, level: int = 2) -> dict:
+        """Get a collection at level 1 or 2. Raises ValueError if not found."""
+        ...
+
+    def get_collection_attribute(self, digest: str, attribute: str) -> list:
+        """Get a single attribute array from a collection. Raises ValueError if not found."""
+        ...
+
+    def get_collection_itemwise(self, digest: str, limit: int | None = None) -> list[dict]:
+        """Get collection in itemwise format. Raises ValueError if not found."""
+        ...
+
+    def get_attribute(self, attribute_name: str, attribute_digest: str) -> list:
+        """Get an attribute by its own digest. Raises KeyError if not found."""
+        ...
+
+    def compare_digests(self, digest_a: str, digest_b: str) -> dict:
+        """Compare two collections by digest. Raises ValueError if not found."""
+        ...
+
+    def compare_digest_with_level2(self, digest: str, level2_b: dict) -> dict:
+        """Compare a stored collection with a POSTed level2 dict. Raises ValueError if not found."""
+        ...
+
+    def list_collections(
+        self, page: int = 0, page_size: int = 100, filters: dict | None = None
+    ) -> dict:
+        """List collections with pagination and optional attribute filters.
+        Returns {"results": [...], "pagination": {...}}"""
+        ...
+
+    def collection_count(self) -> int:
+        """Total number of collections."""
+        ...
+
+    def capabilities(self) -> dict:
+        """Return backend capabilities for service-info."""
+        ...
+
+
+class RefgetStoreBackend:
+    """SeqColBackend backed by a RefgetStore (no database)."""
+
+    def __init__(self, store):
+        """
+        Args:
+            store: A RefgetStore or ReadonlyRefgetStore instance from gtars.
+        """
+        self._store = store
+
+    def get_collection(self, digest: str, level: int = 2) -> dict:
+        try:
+            if level == 1:
+                result = self._store.get_collection_level1(digest)
+            else:
+                result = self._store.get_collection_level2(digest)
+        except (OSError, IOError):
+            raise ValueError(f"Collection '{digest}' not found")
+        if result is None:
+            raise ValueError(f"Collection '{digest}' not found")
+        return result
+
+    def get_collection_attribute(self, digest: str, attribute: str) -> list:
+        level2 = self.get_collection(digest, level=2)
+        if attribute not in level2:
+            raise ValueError(f"Attribute '{attribute}' not found")
+        return level2[attribute]
+
+    def get_collection_itemwise(self, digest: str, limit: int | None = None) -> list[dict]:
+        level2 = self.get_collection(digest, level=2)
+        # Transpose: {"names": [a,b], "lengths": [1,2]} -> [{"names": a, "lengths": 1}, ...]
+        keys = list(level2.keys())
+        n = len(level2[keys[0]])
+        if limit:
+            n = min(n, limit)
+        return [{k: level2[k][i] for k in keys} for i in range(n)]
+
+    def get_attribute(self, attribute_name: str, attribute_digest: str) -> list:
+        result = self._store.get_attribute(attribute_name, attribute_digest)
+        if result is None:
+            raise KeyError(f"Attribute {attribute_name}/{attribute_digest} not found")
+        return result
+
+    def compare_digests(self, digest_a: str, digest_b: str) -> dict:
+        try:
+            result = self._store.compare(digest_a, digest_b)
+        except (OSError, IOError):
+            raise ValueError("Collection not found")
+        if result is None:
+            raise ValueError("Collection not found")
+        return result
+
+    def compare_digest_with_level2(self, digest: str, level2_b: dict) -> dict:
+        """Compare a stored collection with a POSTed level2 dict.
+
+        The store does not have a native compare_with_level2, so we retrieve
+        level2 for the stored collection and use the Python compare utility.
+        """
+        level2_a = self.get_collection(digest, level=2)
+        return compare_seqcols(level2_a, level2_b)
+
+    def list_collections(
+        self, page: int = 0, page_size: int = 100, filters: dict | None = None
+    ) -> dict:
+        if filters:
+            raise ValueError("Filtering by attribute is not supported by RefgetStore backend")
+        return self._store.list_collections(page=page, page_size=page_size)
+
+    def collection_count(self) -> int:
+        result = self._store.list_collections(page=0, page_size=1)
+        return result["pagination"]["total"]
+
+    def capabilities(self) -> dict:
+        stats = self._store.stats()
+        n_collections = int(stats.get("n_collections", 0))
+        n_sequences = int(stats.get("n_sequences", 0))
+        return {
+            "backend_type": "refget_store",
+            "n_collections": n_collections,
+            "n_sequences": n_sequences,
+            "has_sequence_data": n_sequences > 0,
+            "collection_alias_namespaces": self._store.list_collection_alias_namespaces(),
+            "sequence_alias_namespaces": self._store.list_sequence_alias_namespaces(),
+        }
diff --git a/refget/cli/admin.py b/refget/cli/admin.py
index 1787c82..9e6b067 100644
--- a/refget/cli/admin.py
+++ b/refget/cli/admin.py
@@ -15,20 +15,18 @@
 import json
 import os
 from pathlib import Path
-from typing import Optional, List, Dict, Any
+from typing import Any, Dict, List, Optional
 
 import typer
 
 from refget.cli.output import (
     EXIT_CONFIG_ERROR,
-    EXIT_FILE_NOT_FOUND,
     EXIT_FAILURE,
-    EXIT_SUCCESS,
+    EXIT_FILE_NOT_FOUND,
     print_error,
     print_info,
     print_json,
     print_success,
-    print_warning,
 )
 
 # Heavy imports (sqlmodel) are done lazily inside functions that need them
diff --git a/refget/cli/config.py b/refget/cli/config.py
index 6f5756f..7614ad5 100644
--- a/refget/cli/config.py
+++ b/refget/cli/config.py
@@ -14,14 +14,6 @@
 
 import typer
 
-from refget.cli.output import (
-    EXIT_CONFIG_ERROR,
-    EXIT_FAILURE,
-    EXIT_SUCCESS,
-    print_error,
-    print_json,
-    print_success,
-)
 from refget.cli.config_manager import (
     DEFAULTS,
     get_config_path,
@@ -30,6 +22,14 @@
     save_config,
     set_value,
 )
+from refget.cli.output import (
+    EXIT_CONFIG_ERROR,
+    EXIT_FAILURE,
+    EXIT_SUCCESS,
+    print_error,
+    print_json,
+    print_success,
+)
 
 app = typer.Typer(
     name="config",
@@ -220,7 +220,7 @@ def add(
     if resource_type not in RESOURCE_TYPE_MAP:
         valid_types = ", ".join(RESOURCE_TYPE_MAP.keys())
         print_error(
-            f"Invalid resource type '{resource_type}'.\n" f"Valid types: {valid_types}",
+            f"Invalid resource type '{resource_type}'.\nValid types: {valid_types}",
             EXIT_CONFIG_ERROR,
         )
         return  # Unreachable, but clarifies control flow
@@ -314,7 +314,7 @@ def remove(
     if resource_type not in RESOURCE_TYPE_MAP:
         valid_types = ", ".join(RESOURCE_TYPE_MAP.keys())
         print_error(
-            f"Invalid resource type '{resource_type}'.\n" f"Valid types: {valid_types}",
+            f"Invalid resource type '{resource_type}'.\nValid types: {valid_types}",
             EXIT_CONFIG_ERROR,
         )
         return  # Unreachable, but clarifies control flow
diff --git a/refget/cli/config_manager.py b/refget/cli/config_manager.py
index aff7c8b..1eb968f 100644
--- a/refget/cli/config_manager.py
+++ b/refget/cli/config_manager.py
@@ -105,7 +105,7 @@ def save_config(config: Dict[str, Any]) -> None:
     """
     if tomli_w is None:
         raise ImportError(
-            "tomli_w is required to save configuration.\n" "Install with: pip install tomli-w"
+            "tomli_w is required to save configuration.\nInstall with: pip install tomli-w"
         )
 
     config_path = get_config_path()
diff --git a/refget/cli/fasta.py b/refget/cli/fasta.py
index 5c32442..d5f3313 100644
--- a/refget/cli/fasta.py
+++ b/refget/cli/fasta.py
@@ -22,8 +22,8 @@
 import typer
 
 from refget.cli.output import (
-    EXIT_FILE_NOT_FOUND,
     EXIT_FAILURE,
+    EXIT_FILE_NOT_FOUND,
     EXIT_SUCCESS,
     print_error,
     print_json,
@@ -165,8 +165,11 @@ def index(
             )
 
         files_created = [
-            str(fai_path), str(seqcol_path), str(chrom_sizes_path),
-            str(rgsi_path), str(rgci_path),
+            str(fai_path),
+            str(seqcol_path),
+            str(chrom_sizes_path),
+            str(rgsi_path),
+            str(rgci_path),
         ]
 
         if json_output:
diff --git a/refget/cli/main.py b/refget/cli/main.py
index 61bb2a2..39129e8 100644
--- a/refget/cli/main.py
+++ b/refget/cli/main.py
@@ -4,16 +4,16 @@
 This module defines the main CLI app and registers all command groups.
 """
 
-import typer
 from typing import Optional
 
-from refget._version import __version__
+import typer
 
+from refget._version import __version__
+from refget.cli.admin import app as admin_app
 from refget.cli.config import app as config_app
 from refget.cli.fasta import app as fasta_app
-from refget.cli.store import app as store_app
 from refget.cli.seqcol import app as seqcol_app
-from refget.cli.admin import app as admin_app
+from refget.cli.store import app as store_app
 
 app = typer.Typer(
     name="refget",
diff --git a/refget/cli/seqcol.py b/refget/cli/seqcol.py
index 69dc486..14cc4c0 100644
--- a/refget/cli/seqcol.py
+++ b/refget/cli/seqcol.py
@@ -118,8 +118,8 @@ def _compute_snlp_digest(seqcol_dict: dict) -> str:
     Returns:
         The snlp digest (coordinate system identifier)
     """
-    from refget.utils import build_sorted_name_length_pairs, canonical_str
     from refget.digests import sha512t24u_digest
+    from refget.utils import build_sorted_name_length_pairs, canonical_str
 
     snlp_digests = build_sorted_name_length_pairs(seqcol_dict)
     return sha512t24u_digest(canonical_str(snlp_digests))
diff --git a/refget/cli/store.py b/refget/cli/store.py
index 904e1ce..0fe33e3 100644
--- a/refget/cli/store.py
+++ b/refget/cli/store.py
@@ -27,8 +27,8 @@
 
 from refget.cli.config_manager import get_remote_stores, get_seqcol_servers, get_store_path
 from refget.cli.output import (
-    EXIT_FILE_NOT_FOUND,
     EXIT_FAILURE,
+    EXIT_FILE_NOT_FOUND,
     EXIT_SUCCESS,
     check_dependency,
     print_error,
@@ -71,7 +71,7 @@ def _get_store_path(path: Optional[Path]) -> Path:
 
 def _get_collection_digests(store) -> set:
     """Get the set of collection digest strings from a store."""
-    return {meta.digest for meta in store.list_collections()}
+    return {meta.digest for meta in store.list_collections()["results"]}
 
 
 def _load_store(path: Optional[Path], must_exist: bool = True, remote: Optional[str] = None):
@@ -150,7 +150,7 @@ def init(
     store_path.parent.mkdir(parents=True, exist_ok=True)
 
     # Initialize the store (creates index files)
-    store = RefgetStore.on_disk(str(store_path))
+    RefgetStore.on_disk(str(store_path))
 
     print_json(
         {
@@ -187,12 +187,6 @@ def add(
         "-q",
         help="Suppress progress output",
     ),
-    threads: Optional[int] = typer.Option(
-        None,
-        "--threads",
-        "-t",
-        help="Number of threads for parallel encoding (default: 1)",
-    ),
 ) -> None:
     """
     Import a FASTA file to the local store.
@@ -223,9 +217,7 @@ def add(
             store.set_encoding_mode(StorageMode.Encoded)
 
     # Add the FASTA file - returns (metadata, was_new) with all info we need
-    metadata, was_new = store.add_sequence_collection_from_fasta(
-        str(fasta.resolve()), threads=threads
-    )
+    metadata, was_new = store.add_sequence_collection_from_fasta(str(fasta.resolve()))
 
     print_json(
         {
@@ -283,7 +275,7 @@ def list_items(
         print_json({"sequences": items})
     else:
         collections = []
-        for meta in store.list_collections():
+        for meta in store.list_collections()["results"]:
             collections.append(
                 {
                     "digest": meta.digest,
@@ -353,7 +345,9 @@ def get(
     store = _load_store(path, remote=remote)
 
     if sequence:
-        # Sequence retrieval mode
+        # Sequence retrieval mode — load sequence data
+        store.load_all_collections()
+        store.load_all_sequences()
         seq_data = None
 
         if name is not None:
@@ -676,8 +670,9 @@ def export(
     """
     store = _load_store(path, remote=remote)
 
-    # Ensure collection is loaded (required for export)
+    # Ensure collection and sequence data are loaded (required for export)
     _ensure_collection_loaded(store, digest)
+    store.load_all_sequences()
 
     def _do_export(output_path: str) -> None:
         """Perform the actual export to a file path."""
@@ -870,7 +865,7 @@ def stats(
         stats_dict["collections"] = int(stats_dict["collections"])
     else:
         # Fallback: count collections ourselves
-        stats_dict["collections"] = len(store.list_collections())
+        stats_dict["collections"] = store.list_collections()["pagination"]["total"]
 
     print_json(stats_dict)
     raise typer.Exit(EXIT_SUCCESS)
@@ -914,9 +909,7 @@ def remove(
 @app.command()
 def metadata(
     digest: str = typer.Argument(help="Collection digest"),
-    path: Optional[Path] = typer.Option(
-        None, "--path", "-p", help="Store path"
-    ),
+    path: Optional[Path] = typer.Option(None, "--path", "-p", help="Store path"),
 ):
     """Show FHR metadata for a collection."""
     store = _load_store(path)
@@ -933,12 +926,339 @@ def metadata(
 def metadata_set(
     digest: str = typer.Argument(help="Collection digest"),
     file: Path = typer.Argument(help="Path to FHR JSON file"),
-    path: Optional[Path] = typer.Option(
-        None, "--path", "-p", help="Store path"
-    ),
+    path: Optional[Path] = typer.Option(None, "--path", "-p", help="Store path"),
 ):
     """Set FHR metadata for a collection from a JSON file."""
     store = _load_store(path)
     store.load_fhr_metadata(digest, str(file))
     print(f"Set FHR metadata for collection {digest}")
     raise typer.Exit(EXIT_SUCCESS)
+
+
+@app.command("crate")
+def crate(
+    path: Optional[Path] = typer.Option(
+        None,
+        "--path",
+        "-p",
+        help="Store path (default: from config)",
+    ),
+    name: str = typer.Option(
+        ...,
+        "--name",
+        "-n",
+        help="Name for the RO-Crate root dataset",
+    ),
+    description: Optional[str] = typer.Option(
+        None,
+        "--description",
+        "-d",
+        help="Description of the store",
+    ),
+    author: Optional[str] = typer.Option(
+        None,
+        "--author",
+        "-a",
+        help='Author in "Name <URL>" format, e.g. "Jane Doe <https://orcid.org/...>"',
+    ),
+    license: Optional[str] = typer.Option(
+        None,
+        "--license",
+        "-l",
+        help="License URL, e.g. https://creativecommons.org/publicdomain/zero/1.0/",
+    ),
+    output: Optional[Path] = typer.Option(
+        None,
+        "--output",
+        "-o",
+        help="Output path (default: <store-path>/ro-crate-metadata.json)",
+    ),
+) -> None:
+    """Generate an RO-Crate metadata file for a RefgetStore.
+
+    Creates a ro-crate-metadata.json describing the store as a FAIR
+    research object, including structure, provenance, and statistics.
+
+    Examples:
+        refget store crate --path /store --name "My genomes" --author "J Doe <https://orcid.org/0000-0001-1234-5678>"
+        refget store crate -p /store -n "Store" -l https://creativecommons.org/publicdomain/zero/1.0/
+    """
+    import json
+    import re
+    from datetime import datetime, timezone
+
+    from refget._version import __version__
+
+    store = _load_store(path)
+    store_path = _get_store_path(path)
+
+    # Gather stats
+    stats_obj = store.stats()
+    stats_dict = {}
+    if hasattr(stats_obj, "__iter__"):
+        for key, value in stats_obj.items():
+            stats_dict[key] = value
+    elif hasattr(stats_obj, "__dict__"):
+        stats_dict = vars(stats_obj)
+
+    storage_mode = stats_dict.get("storage_mode", "Unknown")
+    seq_count = int(stats_dict.get("n_sequences", stats_dict.get("sequences", 0)))
+
+    # Count collections
+    try:
+        coll_count = store.list_collections()["pagination"]["total"]
+    except Exception:
+        coll_count = 0
+
+    # Build the @graph
+    now = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
+    today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
+
+    graph = [
+        # Metadata descriptor
+        {
+            "@id": "ro-crate-metadata.json",
+            "@type": "CreativeWork",
+            "conformsTo": [
+                {"@id": "https://w3id.org/ro/crate/1.2"},
+                {"@id": "https://w3id.org/ga4gh/refget/refgetstore-crate/0.1"},
+            ],
+            "about": {"@id": "./"},
+        },
+    ]
+
+    # Root dataset
+    root = {
+        "@id": "./",
+        "@type": "Dataset",
+        "name": name,
+        "datePublished": today,
+        "conformsTo": {"@id": "https://w3id.org/ga4gh/refget/refgetstore-crate/0.1"},
+        "hasPart": [
+            {"@id": "rgstore.json"},
+            {"@id": "sequences.rgsi"},
+            {"@id": "sequences/"},
+            {"@id": "collections/"},
+        ],
+        "additionalProperty": [
+            {"@id": "#prop-storageMode"},
+            {"@id": "#prop-sequenceCount"},
+            {"@id": "#prop-collectionCount"},
+            {"@id": "#prop-refgetDigestAlgorithm"},
+        ],
+    }
+    if description:
+        root["description"] = description
+    if license:
+        root["license"] = {"@id": license}
+    if author:
+        # Parse "Name <URL>" format
+        match = re.match(r"^(.+?)\s*<(.+?)>\s*$", author)
+        if match:
+            author_name = match.group(1).strip()
+            author_url = match.group(2).strip()
+            root["author"] = {"@id": author_url}
+        else:
+            author_name = author.strip()
+            author_url = None
+            root["author"] = {"@id": f"#author-{author_name.replace(' ', '-').lower()}"}
+
+    # Add aliases/ if it exists
+    aliases_path = store_path / "aliases"
+    if aliases_path.exists() and aliases_path.is_dir():
+        root["hasPart"].append({"@id": "aliases/"})
+
+    graph.append(root)
+
+    # Data entities
+    graph.extend([
+        {
+            "@id": "rgstore.json",
+            "@type": "File",
+            "name": "Store configuration",
+            "description": "Operational configuration for RefgetStore: path templates, storage mode, format version.",
+            "encodingFormat": "application/json",
+        },
+        {
+            "@id": "sequences.rgsi",
+            "@type": "File",
+            "name": "Master sequence index",
+            "description": "Tab-separated index of all sequences in the store with names, lengths, alphabets, and GA4GH digests.",
+            "encodingFormat": "text/tab-separated-values",
+        },
+        {
+            "@id": "sequences/",
+            "@type": "Dataset",
+            "name": "Sequence data",
+            "description": "Content-addressable sequence files organized by digest prefix.",
+        },
+        {
+            "@id": "collections/",
+            "@type": "Dataset",
+            "name": "Sequence collections",
+            "description": "GA4GH sequence collection metadata. Each .rgsi file defines a collection with its member sequences and digests.",
+        },
+    ])
+
+    if aliases_path.exists() and aliases_path.is_dir():
+        graph.append({
+            "@id": "aliases/",
+            "@type": "Dataset",
+            "name": "Alias namespaces",
+            "description": "Human-readable name mappings for sequences and collections.",
+        })
+
+    # PropertyValue entities
+    graph.extend([
+        {
+            "@id": "#prop-storageMode",
+            "@type": "PropertyValue",
+            "propertyID": "storageMode",
+            "name": "Storage Mode",
+            "value": storage_mode,
+        },
+        {
+            "@id": "#prop-sequenceCount",
+            "@type": "PropertyValue",
+            "propertyID": "sequenceCount",
+            "name": "Sequence Count",
+            "value": seq_count,
+        },
+        {
+            "@id": "#prop-collectionCount",
+            "@type": "PropertyValue",
+            "propertyID": "collectionCount",
+            "name": "Collection Count",
+            "value": coll_count,
+        },
+        {
+            "@id": "#prop-refgetDigestAlgorithm",
+            "@type": "PropertyValue",
+            "propertyID": "refgetDigestAlgorithm",
+            "name": "Refget Digest Algorithm",
+            "value": "sha512t24u",
+        },
+    ])
+
+    # CreateAction provenance
+    graph.extend([
+        {
+            "@id": "#crate-creation",
+            "@type": "CreateAction",
+            "name": "Generate RO-Crate metadata for RefgetStore",
+            "endTime": now,
+            "instrument": {"@id": "#refget-software"},
+            "result": {"@id": "./"},
+        },
+        {
+            "@id": "#refget-software",
+            "@type": "SoftwareApplication",
+            "name": "refget",
+            "version": __version__,
+            "url": "https://github.com/refgenie/refget",
+            "description": "Python package implementing GA4GH refget standards for sequences and sequence collections.",
+        },
+    ])
+
+    # Add agent to CreateAction if author provided
+    if author:
+        graph[-2]["agent"] = root["author"]
+
+    # Profile entity
+    graph.append({
+        "@id": "https://w3id.org/ga4gh/refget/refgetstore-crate/0.1",
+        "@type": ["CreativeWork", "Profile"],
+        "name": "RefgetStore RO-Crate Profile",
+        "version": "0.1",
+        "description": "Profile for RO-Crates containing GA4GH RefgetStore sequence databases.",
+    })
+
+    # Author entity
+    if author:
+        match = re.match(r"^(.+?)\s*<(.+?)>\s*$", author)
+        if match:
+            graph.append({
+                "@id": author_url,
+                "@type": "Person",
+                "name": author_name,
+            })
+        else:
+            graph.append({
+                "@id": root["author"]["@id"],
+                "@type": "Person",
+                "name": author_name,
+            })
+
+    # License entity
+    if license:
+        graph.append({
+            "@id": license,
+            "@type": "CreativeWork",
+            "name": license.rstrip("/").split("/")[-1] or "License",
+        })
+
+    crate = {
+        "@context": "https://w3id.org/ro/crate/1.2/context",
+        "@graph": graph,
+    }
+
+    # Write output
+    output_path = output or (store_path / "ro-crate-metadata.json")
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    output_path.write_text(json.dumps(crate, indent=2) + "\n")
+
+    print_json({
+        "output": str(output_path),
+        "status": "created",
+        "entities": len(graph),
+    })
+    raise typer.Exit(EXIT_SUCCESS)
+
+
+@app.command("serve")
+def serve(
+    path: Optional[Path] = typer.Option(None, "--path", "-p", help="Local store path"),
+    remote: Optional[str] = typer.Option(
+        None, "--remote", "-r", help="Remote store URL (e.g. s3://bucket/store/)"
+    ),
+    port: int = typer.Option(8000, "--port", help="Port to serve on"),
+    host: str = typer.Option("0.0.0.0", "--host", help="Host to bind to"),
+):
+    """Serve a seqcol API backed by a RefgetStore (no database required).
+
+    Examples:
+        refget store serve --path /path/to/store --port 8000
+        refget store serve --remote s3://bucket/store/ --port 8000
+    """
+    try:
+        import uvicorn
+    except ImportError:
+        print_error("uvicorn is required: pip install uvicorn", EXIT_FAILURE)
+
+    from refget.backend import RefgetStoreBackend
+
+    if remote:
+        store = _load_store(path=None, remote=remote)
+    elif path:
+        store = _load_store(path)
+    else:
+        store = _load_store(None)
+
+    backend = RefgetStoreBackend(store.into_readonly())
+
+    from fastapi import FastAPI
+
+    from refget.router import create_refget_router
+
+    app = FastAPI(title="Sequence Collections API (Store-backed)")
+    app.state.backend = backend
+    router = create_refget_router(
+        sequences=False,
+        pangenomes=False,
+        refget_store_url=remote,
+    )
+    app.include_router(router)
+
+    typer.echo(f"Serving store-backed seqcol API on {host}:{port}")
+    uvicorn.run(app, host=host, port=port)
+    raise typer.Exit(EXIT_SUCCESS)
diff --git a/refget/clients.py b/refget/clients.py
index 977eb68..6ad9196 100644
--- a/refget/clients.py
+++ b/refget/clients.py
@@ -1,8 +1,13 @@
+from __future__ import annotations
+
 import logging
 import re
+from typing import TYPE_CHECKING, Optional
+
 import requests
 
-from typing import Optional
+if TYPE_CHECKING:
+    from .store import RefgetStore
 
 _LOGGER = logging.getLogger(__name__)
 
@@ -602,12 +607,12 @@ def download_to_store(
             >>> client = FastaDrsClient()
             >>> collection_digest = client.download_to_store("abc123", store)
         """
-        import tempfile
         import os
+        import tempfile
 
         # Verify store is available
         try:
-            from .store import RefgetStore as RefgetStoreClass
+            from .store import RefgetStore as RefgetStoreClass  # noqa: F401
         except ImportError:
             raise ImportError("gtars is required for download_to_store functionality")
 
diff --git a/refget/compliance.py b/refget/compliance.py
index 7020d9e..eaa64f7 100644
--- a/refget/compliance.py
+++ b/refget/compliance.py
@@ -15,7 +15,7 @@
 import json
 import logging
 import time
-from dataclasses import dataclass, field, asdict
+from dataclasses import asdict, dataclass, field
 from datetime import datetime, timezone
 from pathlib import Path
 
@@ -96,11 +96,17 @@ def _timed_check(name: str, func, *args, **kwargs) -> CheckResult:
     try:
         func(*args, **kwargs)
         elapsed = (time.monotonic() - start) * 1000
-        return CheckResult(name=name, passed=True, duration_ms=round(elapsed, 2), description=description)
+        return CheckResult(
+            name=name, passed=True, duration_ms=round(elapsed, 2), description=description
+        )
     except AssertionError as e:
         elapsed = (time.monotonic() - start) * 1000
         return CheckResult(
-            name=name, passed=False, duration_ms=round(elapsed, 2), description=description, error=str(e)
+            name=name,
+            passed=False,
+            duration_ms=round(elapsed, 2),
+            description=description,
+            error=str(e),
         )
     except requests.exceptions.RequestException as e:
         elapsed = (time.monotonic() - start) * 1000
@@ -163,9 +169,9 @@ def check_list_attributes(api_root, attribute_name):
     res = requests.get(f"{api_root}/list/attributes/{attribute_name}", timeout=COMPLIANCE_TIMEOUT)
     data = res.json()
     assert "results" in data, f"list/attributes/{attribute_name} missing 'results' field"
-    assert isinstance(
-        data["results"], list
-    ), f"list/attributes/{attribute_name} 'results' should be a list"
+    assert isinstance(data["results"], list), (
+        f"list/attributes/{attribute_name} 'results' should be a list"
+    )
 
 
 def check_openapi_available(api_root):
@@ -185,7 +191,9 @@ def check_collection_level1(api_root, fa_name, bundle):
     """Level 1 response returns digest strings for all attributes."""
     digest = bundle["top_level_digest"]
     res = requests.get(f"{api_root}/collection/{digest}?level=1", timeout=COMPLIANCE_TIMEOUT)
-    assert res.status_code == 200, f"Collection {digest} returned HTTP {res.status_code} (expected 200)"
+    assert res.status_code == 200, (
+        f"Collection {digest} returned HTTP {res.status_code} (expected 200)"
+    )
     data = res.json()
     for attr in ["names", "lengths", "sequences"]:
         assert isinstance(data[attr], str), (
@@ -201,7 +209,9 @@ def check_collection_level2(api_root, fa_name, bundle):
     """Level 2 response returns arrays matching expected content."""
     digest = bundle["top_level_digest"]
     res = requests.get(f"{api_root}/collection/{digest}?level=2", timeout=COMPLIANCE_TIMEOUT)
-    assert res.status_code == 200, f"Collection {digest} returned HTTP {res.status_code} (expected 200)"
+    assert res.status_code == 200, (
+        f"Collection {digest} returned HTTP {res.status_code} (expected 200)"
+    )
     data = res.json()
     for attr in ["names", "lengths", "sequences"]:
         assert isinstance(data[attr], list), (
@@ -210,14 +220,18 @@ def check_collection_level2(api_root, fa_name, bundle):
         assert data[attr] == bundle["level2"][attr], (
             f"Level 2 {attr} for {fa_name}: expected {bundle['level2'][attr]}, got {data[attr]}"
         )
-    assert "sorted_name_length_pairs" not in data, "Level 2 should not have sorted_name_length_pairs"
+    assert "sorted_name_length_pairs" not in data, (
+        "Level 2 should not have sorted_name_length_pairs"
+    )
 
 
 def check_default_level_returns_level2(api_root, fa_name, bundle):
     """Collection without ?level= param returns level 2 arrays (spec default)."""
     digest = bundle["top_level_digest"]
     res = requests.get(f"{api_root}/collection/{digest}", timeout=COMPLIANCE_TIMEOUT)
-    assert res.status_code == 200, f"Collection {digest} returned HTTP {res.status_code} (expected 200)"
+    assert res.status_code == 200, (
+        f"Collection {digest} returned HTTP {res.status_code} (expected 200)"
+    )
     data = res.json()
     for attr in ["names", "lengths", "sequences"]:
         assert isinstance(data[attr], list), (
@@ -229,13 +243,13 @@ def check_sorted_name_length_pairs(api_root, fa_name, bundle):
     """Level 1 sorted_name_length_pairs digest matches expected value."""
     digest = bundle["top_level_digest"]
     res = requests.get(f"{api_root}/collection/{digest}?level=1", timeout=COMPLIANCE_TIMEOUT)
-    assert res.status_code == 200, f"Collection {digest} returned HTTP {res.status_code} (expected 200)"
+    assert res.status_code == 200, (
+        f"Collection {digest} returned HTTP {res.status_code} (expected 200)"
+    )
     data = res.json()
     expected = bundle["sorted_name_length_pairs_digest"]
     actual = data.get("sorted_name_length_pairs")
-    assert actual == expected, (
-        f"SNLP for {fa_name}: expected {expected}, got {actual}"
-    )
+    assert actual == expected, f"SNLP for {fa_name}: expected {expected}, got {actual}"
 
 
 # ============================================================
@@ -263,13 +277,17 @@ def check_transient_attribute_not_served(api_root):
     """Transient attributes (sorted_name_length_pairs) return 404 from /attribute."""
     bundle = DIGEST_TESTS[0][1]
     digest = bundle["top_level_digest"]
-    level1 = requests.get(f"{api_root}/collection/{digest}?level=1", timeout=COMPLIANCE_TIMEOUT).json()
+    level1 = requests.get(
+        f"{api_root}/collection/{digest}?level=1", timeout=COMPLIANCE_TIMEOUT
+    ).json()
     snlp_digest = level1["sorted_name_length_pairs"]
     res = requests.get(
         f"{api_root}/attribute/collection/sorted_name_length_pairs/{snlp_digest}",
         timeout=COMPLIANCE_TIMEOUT,
     )
-    assert res.status_code == 404, "Transient attributes should not be served by /attribute endpoint"
+    assert res.status_code == 404, (
+        "Transient attributes should not be served by /attribute endpoint"
+    )
 
 
 # ============================================================
@@ -411,33 +429,53 @@ def build_checks(api_root: str) -> list[tuple[str, callable, list]]:
     # Collection content checks (per FASTA file)
     for fa_name, bundle in DIGEST_TESTS:
         tag = fa_name.replace(".fa", "")
-        checks.append((f"collection_level1_{tag}", check_collection_level1, [api_root, fa_name, bundle]))
-        checks.append((f"collection_level2_{tag}", check_collection_level2, [api_root, fa_name, bundle]))
-        checks.append((f"default_level2_{tag}", check_default_level_returns_level2, [api_root, fa_name, bundle]))
-        checks.append((f"snlp_digest_{tag}", check_sorted_name_length_pairs, [api_root, fa_name, bundle]))
+        checks.append(
+            (f"collection_level1_{tag}", check_collection_level1, [api_root, fa_name, bundle])
+        )
+        checks.append(
+            (f"collection_level2_{tag}", check_collection_level2, [api_root, fa_name, bundle])
+        )
+        checks.append(
+            (
+                f"default_level2_{tag}",
+                check_default_level_returns_level2,
+                [api_root, fa_name, bundle],
+            )
+        )
+        checks.append(
+            (f"snlp_digest_{tag}", check_sorted_name_length_pairs, [api_root, fa_name, bundle])
+        )
 
     # Attribute retrieval checks (per FASTA, per attribute)
     for fa_name, bundle in DIGEST_TESTS:
         tag = fa_name.replace(".fa", "")
         for attr in ["lengths", "names", "sequences"]:
-            checks.append((
-                f"attribute_{attr}_{tag}",
-                check_attribute_retrieval,
-                [api_root, fa_name, bundle, attr],
-            ))
+            checks.append(
+                (
+                    f"attribute_{attr}_{tag}",
+                    check_attribute_retrieval,
+                    [api_root, fa_name, bundle, attr],
+                )
+            )
 
     # Attribute filtering checks
-    checks.append(("transient_attribute_not_served", check_transient_attribute_not_served, [api_root]))
-    checks.append(("multi_attribute_filter_and", check_list_multi_attribute_filter_and, [api_root]))
+    checks.append(
+        ("transient_attribute_not_served", check_transient_attribute_not_served, [api_root])
+    )
+    checks.append(
+        ("multi_attribute_filter_and", check_list_multi_attribute_filter_and, [api_root])
+    )
 
     # List filter checks (base.fa, filter by each attribute)
     base_name, base_bundle = DIGEST_TESTS[0]
     for attr in ["lengths", "names", "sequences"]:
-        checks.append((
-            f"list_filter_{attr}",
-            check_list_filter_by_attribute,
-            [api_root, base_name, base_bundle, attr],
-        ))
+        checks.append(
+            (
+                f"list_filter_{attr}",
+                check_list_filter_by_attribute,
+                [api_root, base_name, base_bundle, attr],
+            )
+        )
 
     # Comparison checks
     checks.append(("comparison_structure", check_comparison_structure, [api_root]))
@@ -446,7 +484,9 @@ def build_checks(api_root: str) -> list[tuple[str, callable, list]]:
     for fixture_name, expected in COMPARISON_FIXTURES.items():
         tag = fixture_name.replace("compare_", "").replace(".json", "")
         checks.append((f"comparison_{tag}", check_comparison, [api_root, fixture_name, expected]))
-        checks.append((f"comparison_post_{tag}", check_comparison_post, [api_root, fixture_name, expected]))
+        checks.append(
+            (f"comparison_post_{tag}", check_comparison_post, [api_root, fixture_name, expected])
+        )
 
     return checks
 
diff --git a/refget/const.py b/refget/const.py
index 68104fe..66f3175 100644
--- a/refget/const.py
+++ b/refget/const.py
@@ -1,5 +1,5 @@
-import os
 import logging
+import os
 
 _LOGGER = logging.getLogger(__name__)
 
diff --git a/refget/digests.py b/refget/digests.py
index b72ba0c..6ffa265 100644
--- a/refget/digests.py
+++ b/refget/digests.py
@@ -4,9 +4,8 @@
 When gtars is not available, falls back to pure Python implementations (slower).
 """
 
-import hashlib
 import base64
-
+import hashlib
 from typing import Callable, Union
 
 from .const import GTARS_INSTALLED
@@ -34,7 +33,7 @@ def py_md5_digest(seq) -> str:
 
 # Default exports - use gtars if available, else Python fallback
 if GTARS_INSTALLED:
-    from gtars.refget import sha512t24u_digest, md5_digest
+    from gtars.refget import md5_digest, sha512t24u_digest
 else:
     sha512t24u_digest = py_sha512t24u_digest
     md5_digest = py_md5_digest
diff --git a/refget/examples.py b/refget/examples.py
index 94ac812..064c30b 100644
--- a/refget/examples.py
+++ b/refget/examples.py
@@ -1,7 +1,7 @@
 # Models
 # Used for documentation examples in OpenAPI
 
-from fastapi import Path, Body
+from fastapi import Body, Path
 
 example_digest = Path(
     ...,
diff --git a/refget/models.py b/refget/models.py
index 5476e86..b6e4e87 100644
--- a/refget/models.py
+++ b/refget/models.py
@@ -2,12 +2,11 @@
 import logging
 from copy import copy
 from datetime import datetime, timezone
-from sqlalchemy.types import TypeDecorator
-from sqlmodel import Field, SQLModel, Column, Relationship
-from sqlmodel import JSON
-from typing import List, Optional, Dict, Any, Literal, TYPE_CHECKING
-from pydantic import BaseModel, field_validator, field_serializer
+from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional
 
+from pydantic import BaseModel, field_serializer, field_validator
+from sqlalchemy.types import TypeDecorator
+from sqlmodel import JSON, Column, Field, Relationship, SQLModel
 
 from .digests import sha512t24u_digest
 
@@ -38,19 +37,18 @@ def _serialize_item(self, item):
         return item
 
 
-from .const import (
+from .const import (  # noqa: E402
     DEFAULT_INHERENT_ATTRS,
-    DEFAULT_PASSTHRU_ATTRS,
-    SEQCOL_SCHEMA_PATH,
     GTARS_INSTALLED,
+    SEQCOL_SCHEMA_PATH,
 )
-from .exceptions import InvalidSeqColError
-from .utils import (
-    canonical_str,
+from .exceptions import InvalidSeqColError  # noqa: E402
+from .utils import (  # noqa: E402
     build_name_length_pairs,
-    seqcol_dict_to_level1_dict,
-    level1_dict_to_seqcol_digest,
+    canonical_str,
     fasta_to_seqcol_dict,
+    level1_dict_to_seqcol_digest,
+    seqcol_dict_to_level1_dict,
 )
 
 _LOGGER = logging.getLogger(__name__)
@@ -71,9 +69,9 @@ def create_fasta_drs_object(fasta_file: str, digest: str = None) -> "FastaDrsObj
     Raises:
         ImportError: If gtars is not installed (required for FASTA processing)
     """
-    import os
     import hashlib
-    from datetime import datetime, timezone
+    import os
+    from datetime import datetime
 
     if not GTARS_INSTALLED:
         raise ImportError(
diff --git a/refget/router.py b/refget/router.py
index eeb76ef..a2af04d 100644
--- a/refget/router.py
+++ b/refget/router.py
@@ -7,25 +7,26 @@
 by the main app.
 
 To use, first import it, then attach it to the app,
-then create a dbagent object to connect to the database,
-and attach it to the app state like this:
+then create a backend object and attach it to the app state like this:
 
 from refget.router import create_refget_router
 from refget.agents import RefgetDBAgent
 
 router = create_refget_router(sequences=False, collections=True, pangenomes=False)
 app.include_router(router, prefix="/seqcol")
-app.state.dbagent = RefgetDBAgent()
+dbagent = RefgetDBAgent()
+app.state.backend = dbagent       # RefgetDBAgent satisfies SeqColBackend
+app.state.dbagent = dbagent       # For DB-only endpoints (similarities, pangenomes, DRS)
 """
 
 import logging
 
-from fastapi import APIRouter, Response, HTTPException, Request, Depends, Query
+from fastapi import APIRouter, Depends, HTTPException, Query, Request, Response
 from fastapi.responses import StreamingResponse
-from .models import Similarities, PaginationResult, PaginatedDigestList
-from .agents import RefgetDBAgent
 
+from .backend import SeqColBackend
 from .examples import *
+from .models import PaginatedDigestList, PaginationResult, Similarities
 
 _LOGGER = logging.getLogger(__name__)
 
@@ -36,9 +37,17 @@
 _ROUTER_CONFIG: dict = {}
 
 
-# dbagent is a RefgetDBAgent, which handles connection to the POSTGRES database
-async def get_dbagent(request: Request) -> RefgetDBAgent:
-    return request.app.state.dbagent
+async def get_backend(request: Request) -> SeqColBackend:
+    """Get the SeqColBackend from the app state."""
+    return request.app.state.backend
+
+
+async def get_dbagent(request: Request):
+    """Get the RefgetDBAgent for DB-only endpoints. Returns None if not configured."""
+    dbagent = getattr(request.app.state, "dbagent", None)
+    if dbagent is None:
+        raise HTTPException(status_code=501, detail="This endpoint requires database backend")
+    return dbagent
 
 
 def create_refget_router(
@@ -103,10 +112,10 @@ def create_refget_router(
     tags=["Retrieving data"],
 )
 async def sequence(
-    dbagent=Depends(get_dbagent),
     sequence_digest: str = example_sequence,
     start: int | None = Query(None, description="Start position (0-based, inclusive)"),
     end: int | None = Query(None, description="End position (0-based, exclusive)"),
+    dbagent=Depends(get_dbagent),
 ):
     return Response(content=dbagent.seq.get(sequence_digest, start, end), media_type="text/plain")
 
@@ -116,7 +125,7 @@ async def sequence(
     summary="Retrieve metadata for a sequence",
     tags=["Retrieving data"],
 )
-async def seq_metadata(dbagent=Depends(get_dbagent), sequence_digest: str = example_sequence):
+async def seq_metadata(sequence_digest: str = example_sequence, dbagent=Depends(get_dbagent)):
     raise HTTPException(status_code=501, detail="Metadata retrieval not yet implemented.")
 
 
@@ -129,13 +138,15 @@ async def seq_metadata(dbagent=Depends(get_dbagent), sequence_digest: str = exam
     tags=["Retrieving data"],
 )
 async def collection(
-    dbagent=Depends(get_dbagent),
     collection_digest: str = example_collection_digest,
     level: int | None = Query(None, description="Recursion depth (1 or 2)", ge=1, le=2),
     collated: bool = Query(True, description="Return collated format (arrays) vs itemwise"),
-    attribute: str | None = Query(None, description="Return only this attribute (e.g., 'names', 'lengths')"),
+    attribute: str | None = Query(
+        None, description="Return only this attribute (e.g., 'names', 'lengths')"
+    ),
+    backend=Depends(get_backend),
 ):
-    if level == None:
+    if level is None:
         level = 2
     if level > 2:
         raise HTTPException(
@@ -144,16 +155,10 @@ async def collection(
         )
     try:
         if not collated:
-            return dbagent.seqcol.get(
-                collection_digest, return_format="itemwise", itemwise_limit=10000
-            )
+            return backend.get_collection_itemwise(collection_digest, limit=10000)
         if attribute:
-            return dbagent.seqcol.get(collection_digest, attribute=attribute)
-        if level == 1:
-            return dbagent.seqcol.get(collection_digest, return_format="level1")
-        if level == 2:
-            return dbagent.seqcol.get(collection_digest, return_format="level2")
-        return {"error": "Invalid level specified."}
+            return backend.get_collection_attribute(collection_digest, attribute)
+        return backend.get_collection(collection_digest, level=level)
     except ValueError as e:
         raise HTTPException(
             status_code=404,
@@ -167,18 +172,18 @@ async def collection(
     tags=["Retrieving data"],
 )
 async def attribute(
-    dbagent=Depends(get_dbagent),
     attribute_name: str = "names",
     attribute_digest: str = example_attribute_digest,
+    backend=Depends(get_backend),
 ):
     try:
-        return dbagent.attribute.get(attribute_name, attribute_digest)
-    except KeyError as e:
+        return backend.get_attribute(attribute_name, attribute_digest)
+    except KeyError:
         raise HTTPException(
             status_code=404,
             detail="Error: attribute not found. Check the attribute and try again.",
         )
-    except AttributeError as e:
+    except AttributeError:
         raise HTTPException(
             status_code=404,
             detail="Digest not found. Check the digest and try again.",
@@ -191,15 +196,15 @@ async def attribute(
     tags=["Comparing sequence collections"],
 )
 async def compare_2_digests(
-    dbagent=Depends(get_dbagent),
     collection_digest1: str = example_digest_hg38,
     collection_digest2: str = example_digest_hg38_primary,
+    backend=Depends(get_backend),
 ):
     _LOGGER.info("Comparing two digests...")
     result = {}
     result["digests"] = {"a": collection_digest1, "b": collection_digest2}
     try:
-        result.update(dbagent.compare_digests(collection_digest1, collection_digest2))
+        result.update(backend.compare_digests(collection_digest1, collection_digest2))
     except ValueError as e:
         _LOGGER.debug(e)
         raise HTTPException(
@@ -319,9 +324,9 @@ async def calc_similarities_from_json(
     tags=["Comparing sequence collections"],
 )
 async def compare_1_digest(
-    dbagent=Depends(get_dbagent),
     collection_digest1: str = example_digest_hg38,
     seqcolB: dict = example_hg38_sc,
+    backend=Depends(get_backend),
 ):
     _LOGGER.info("Comparing one digests and one POSTed seqcol...")
     _LOGGER.info(f"digest1: {collection_digest1}")
@@ -329,7 +334,7 @@ async def compare_1_digest(
     result = {}
     result["digests"] = {"a": collection_digest1, "b": "POSTed seqcol"}
     try:
-        result.update(dbagent.compare_1_digest(collection_digest1, seqcolB))
+        result.update(backend.compare_digest_with_level2(collection_digest1, seqcolB))
     except ValueError as e:
         _LOGGER.debug(e)
         raise HTTPException(
@@ -346,7 +351,6 @@ async def compare_1_digest(
     response_model=PaginatedDigestList,
 )
 async def list_collections_by_offset(
-    dbagent=Depends(get_dbagent),
     page_size: int = Query(100, description="Number of results per page"),
     page: int = Query(0, description="Page number (0-indexed)"),
     names: str | None = Query(None, description="Filter by names attribute digest"),
@@ -354,32 +358,28 @@ async def list_collections_by_offset(
     sequences: str | None = Query(None, description="Filter by sequences attribute digest"),
     name_length_pairs: str | None = Query(None, description="Filter by name_length_pairs digest"),
     sorted_sequences: str | None = Query(None, description="Filter by sorted_sequences digest"),
+    backend=Depends(get_backend),
 ):
     # Build filters from explicit parameters
     filters = {
-        k: v for k, v in {
+        k: v
+        for k, v in {
             "names": names,
             "lengths": lengths,
             "sequences": sequences,
             "name_length_pairs": name_length_pairs,
             "sorted_sequences": sorted_sequences,
-        }.items() if v is not None
+        }.items()
+        if v is not None
     }
 
-    if filters:
-        try:
-            # Multi-attribute filtering with AND logic
-            res = dbagent.seqcol.search_by_attributes(
-                filters, limit=page_size, offset=page * page_size
-            )
-        except ValueError as e:
-            # Invalid attribute name
-            raise HTTPException(status_code=400, detail=str(e))
-    else:
-        # No filters, return all collections
-        res = dbagent.seqcol.list_by_offset(limit=page_size, offset=page * page_size)
+    try:
+        res = backend.list_collections(page=page, page_size=page_size, filters=filters or None)
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
 
-    res["results"] = [x.digest for x in res["results"]]
+    # Normalize results to digest strings (DB backend returns model objects)
+    res["results"] = [x.digest if hasattr(x, "digest") else x for x in res["results"]]
     return res
 
 
@@ -399,7 +399,7 @@ async def list_attributes(
         res = dbagent.attribute.list(attribute, limit=page_size, offset=page * page_size)
         res["results"] = [x.digest for x in res["results"]]
         return res
-    except KeyError as e:
+    except KeyError:
         raise HTTPException(
             status_code=404,
             detail="Error: attribute not found. Check the attribute and try again.",
@@ -438,7 +438,7 @@ async def pangenome(
     level: int | None = Query(None, description="Recursion depth (1-4)", ge=1, le=4),
     collated: bool = Query(True, description="Return collated format (arrays) vs itemwise"),
 ):
-    if level == None:
+    if level is None:
         level = 2
     try:
         if not collated:
@@ -579,7 +579,9 @@ async def get_fasta_index(
 )
 def run_compliance_endpoint(
     request: Request,
-    target_url: str | None = Query(None, description="Target server URL to test (defaults to self)"),
+    target_url: str | None = Query(
+        None, description="Target server URL to test (defaults to self)"
+    ),
 ):
     """
     Run GA4GH SeqCol compliance structure tests against a server.
@@ -606,7 +608,9 @@ def run_compliance_endpoint(
 )
 def stream_compliance_endpoint(
     request: Request,
-    target_url: str | None = Query(None, description="Target server URL to test (defaults to self)"),
+    target_url: str | None = Query(
+        None, description="Target server URL to test (defaults to self)"
+    ),
 ):
     """
     Stream compliance check results in real-time via Server-Sent Events.
diff --git a/refget/store.py b/refget/store.py
index 30379e5..41f79ee 100644
--- a/refget/store.py
+++ b/refget/store.py
@@ -18,11 +18,11 @@
 if GTARS_INSTALLED:
     from gtars.refget import (
         RefgetStore,
+        SequenceCollection,
         StorageMode,
-        digest_fasta,
         compute_fai,
+        digest_fasta,
         digest_sequence,
-        SequenceCollection,
     )
 else:
     RefgetStore = None
diff --git a/refget/utils.py b/refget/utils.py
index 7c73799..c3b3e16 100644
--- a/refget/utils.py
+++ b/refget/utils.py
@@ -1,19 +1,19 @@
 import json
 import logging
-
-from jsonschema import Draft7Validator
 from pathlib import Path
 from typing import Optional, Union
 
+from jsonschema import Draft7Validator
+
 from .const import (
-    SeqColDict,
     DEFAULT_INHERENT_ATTRS,
     DEFAULT_PASSTHRU_ATTRS,
-    SEQCOL_SCHEMA_PATH,
     GTARS_INSTALLED,
+    SEQCOL_SCHEMA_PATH,
+    SeqColDict,
 )
+from .digests import DigestFunction, sha512t24u_digest
 from .exceptions import InvalidSeqColError
-from .digests import sha512t24u_digest, DigestFunction
 
 _LOGGER = logging.getLogger(__name__)
 
diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt
deleted file mode 100644
index 1d1c6e3..0000000
--- a/requirements/requirements-all.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-jsonschema
-gtars>=0.7.0
-pyyaml
-requests
-sqlmodel
-tomli_w
-typer>=0.9.0
diff --git a/requirements/requirements-dev.txt b/requirements/requirements-dev.txt
deleted file mode 100644
index 60c9958..0000000
--- a/requirements/requirements-dev.txt
+++ /dev/null
@@ -1 +0,0 @@
--e git+git://github.com/databio/henge@master#egg=henge
\ No newline at end of file
diff --git a/requirements/requirements-docs.txt b/requirements/requirements-docs.txt
deleted file mode 100644
index b2a9546..0000000
--- a/requirements/requirements-docs.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-https://github.com/refgenie/refget/archive/master.zip
-https://github.com/databio/mkdocs-databio/archive/master.zip
diff --git a/requirements/requirements-seqcolapi.txt b/requirements/requirements-seqcolapi.txt
deleted file mode 100644
index bbd3811..0000000
--- a/requirements/requirements-seqcolapi.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-fastapi
-psycopg2-binary
-refget
-sqlmodel
-uvicorn>=0.30.0
-ubiquerg>=0.6.1
diff --git a/requirements/requirements-test.txt b/requirements/requirements-test.txt
deleted file mode 100644
index aadcdae..0000000
--- a/requirements/requirements-test.txt
+++ /dev/null
@@ -1,3 +0,0 @@
--r requirements-all.txt
-coveralls>=1.1
-pytest-cov>=6.0.0
\ No newline at end of file
diff --git a/seqcolapi/__main__.py b/seqcolapi/__main__.py
index 2e4396a..3ea99ba 100644
--- a/seqcolapi/__main__.py
+++ b/seqcolapi/__main__.py
@@ -1,4 +1,5 @@
 import sys
+
 from .main import main
 
 if __name__ == "__main__":
diff --git a/seqcolapi/const.py b/seqcolapi/const.py
index ccf3895..34e6aac 100644
--- a/seqcolapi/const.py
+++ b/seqcolapi/const.py
@@ -1,8 +1,9 @@
 import os
+from platform import python_version
 
-from refget._version import __version__ as refget_version
 from gtars import __version__ as gtars_version
-from platform import python_version
+
+from refget._version import __version__ as refget_version
 
 ALL_VERSIONS = {
     "refget_version": refget_version,
diff --git a/seqcolapi/examples.py b/seqcolapi/examples.py
index 2704252..032b863 100644
--- a/seqcolapi/examples.py
+++ b/seqcolapi/examples.py
@@ -1,7 +1,7 @@
 # Models
 # Used for documentation examples in OpenAPI
 
-from fastapi import Path, Body
+from fastapi import Body, Path
 
 example_digest = Path(
     ...,
diff --git a/seqcolapi/main.py b/seqcolapi/main.py
index b13cc72..c21c995 100644
--- a/seqcolapi/main.py
+++ b/seqcolapi/main.py
@@ -1,22 +1,20 @@
 import logging
+from contextlib import asynccontextmanager
 
-from fastapi import FastAPI, Depends
-from fastapi import HTTPException
+from fastapi import FastAPI, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
-from fastapi.responses import JSONResponse, FileResponse, HTMLResponse
-from refget.router import create_refget_router, get_dbagent
+from fastapi.responses import FileResponse, HTMLResponse, JSONResponse
+from sqlmodel import Session, select
 from starlette.requests import Request
 from starlette.staticfiles import StaticFiles
-from sqlmodel import Session, select
-from contextlib import asynccontextmanager
 
-from .const import ALL_VERSIONS, STATIC_PATH, STATIC_DIRNAME
+from refget.agents import RefgetDBAgent
 from refget.const import HUMANS_SAMPLE_LIST, MOUSE_SAMPLES_LIST
 from refget.models import HumanReadableNames
-from .examples import *
+from refget.router import _ROUTER_CONFIG, _SAMPLE_DIGESTS, create_refget_router
 
-from refget.router import _SAMPLE_DIGESTS, _ROUTER_CONFIG
-from refget.agents import RefgetDBAgent
+from .const import ALL_VERSIONS, STATIC_DIRNAME, STATIC_PATH
+from .examples import *
 
 global _LOGGER
 _LOGGER = logging.getLogger(__name__)
@@ -35,6 +33,7 @@ async def lifespan_loader(app):
     # Initialize database agent and store in app state
     dbagent = RefgetDBAgent()
     app.state.dbagent = dbagent
+    app.state.backend = dbagent  # RefgetDBAgent satisfies SeqColBackend
 
     species_samples = {"human": HUMANS_SAMPLE_LIST, "mouse": MOUSE_SAMPLES_LIST}
 
@@ -121,13 +120,13 @@ async def http_exception_handler(request: Request, exc: HTTPException):
 
 
 @app.exception_handler(ValueError)
-async def generic_exception_handler(request: Request, exc: Exception):
+async def value_error_handler(request: Request, exc: Exception):
     raise HTTPException(status_code=404, detail=str(exc))
 
 
 @app.get("favicon.ico", include_in_schema=False)
 async def favicon():
-    return FileResponse(f"/static/favicon.ico")
+    return FileResponse("/static/favicon.ico")
 
 
 @app.get("/", summary="Home page", tags=["General endpoints"], response_class=HTMLResponse)
@@ -149,10 +148,14 @@ async def service_info():
         "fasta_drs": {"enabled": _ROUTER_CONFIG.get("fasta_drs", False)},
     }
 
+    # Get backend capabilities
+    backend = getattr(app.state, "backend", None)
+    caps = backend.capabilities() if backend and hasattr(backend, "capabilities") else {}
+
     # Add refget_store info
     store_url = _ROUTER_CONFIG.get("refget_store_url")
     if store_url:
-        seqcol_info["refget_store"] = {"enabled": True, "url": store_url}
+        seqcol_info["refget_store"] = {"enabled": True, "url": store_url, **caps}
     else:
         seqcol_info["refget_store"] = {"enabled": False}
 
@@ -176,7 +179,7 @@ async def service_info():
 
 
 # Mount statics after other routes for lower precedence
-app.mount(f"/", StaticFiles(directory=STATIC_PATH), name=STATIC_DIRNAME)
+app.mount("/", StaticFiles(directory=STATIC_PATH), name=STATIC_DIRNAME)
 
 
 def create_global_dbagent():
@@ -188,5 +191,37 @@ def create_global_dbagent():
     return dbagent
 
 
+def create_store_app(store_path: str, remote: bool = False, cache_dir: str = "/tmp/seqcol_cache"):
+    """Create a seqcolapi FastAPI app backed by a RefgetStore (no database).
+
+    Args:
+        store_path: Path to store on disk, or S3 URL for remote stores.
+        remote: If True, open as a remote (S3) store.
+        cache_dir: Local cache directory for remote stores.
+
+    Returns:
+        FastAPI app with store-backed seqcol endpoints.
+    """
+    from refget.backend import RefgetStoreBackend
+    from refget.store import RefgetStore
+
+    if remote:
+        store = RefgetStore.open_remote(cache_dir, store_path)
+    else:
+        store = RefgetStore.on_disk(store_path)
+
+    backend = RefgetStoreBackend(store.into_readonly())
+
+    store_app = FastAPI(title="Sequence Collections API (Store-backed)")
+    store_app.state.backend = backend
+    router = create_refget_router(
+        sequences=False, pangenomes=False, refget_store_url=store_path if remote else None
+    )
+    store_app.include_router(router)
+    return store_app
+
+
 if __name__ != "__main__":
-    app.state.dbagent = create_global_dbagent()
+    _dbagent = create_global_dbagent()
+    app.state.dbagent = _dbagent
+    app.state.backend = _dbagent  # RefgetDBAgent satisfies SeqColBackend
diff --git a/setup.py b/setup.py
deleted file mode 100644
index 4e22f29..0000000
--- a/setup.py
+++ /dev/null
@@ -1,56 +0,0 @@
-#! /usr/bin/env python
-
-import os
-from setuptools import setup, find_packages
-import sys
-
-PACKAGE = "refget"
-
-# Additional keyword arguments for setup().
-extra = {}
-
-# Ordinary dependencies
-DEPENDENCIES = []
-with open("requirements/requirements-all.txt", "r") as reqs_file:
-    for line in reqs_file:
-        if not line.strip():
-            continue
-        DEPENDENCIES.append(line)
-
-extra["install_requires"] = DEPENDENCIES
-
-with open("{}/_version.py".format(PACKAGE), "r") as versionfile:
-    version = versionfile.readline().split()[-1].strip("\"'\n")
-
-long_description = open("README.md").read()
-
-setup(
-    name=PACKAGE,
-    packages=find_packages(include=[PACKAGE, f"{PACKAGE}.*"]),
-    version=version,
-    description="Python client for refget",
-    long_description=long_description,
-    long_description_content_type="text/markdown",
-    classifiers=[
-        "Development Status :: 4 - Beta",
-        "License :: OSI Approved :: BSD License",
-        "Programming Language :: Python :: 3.10",
-        "Programming Language :: Python :: 3.11",
-        "Programming Language :: Python :: 3.12",
-        "Programming Language :: Python :: 3.13",
-    ],
-    keywords="genome, assembly, bioinformatics, reference, sequence",
-    url="https://github.com/refgenie/refget",
-    author="Nathan Sheffield, Michal Stolarczyk",
-    author_email="nathan@code.databio.org",
-    license="BSD2",
-    entry_points={
-        "console_scripts": ["refget = refget.cli:main"],
-    },
-    # package_data={"refget": [os.path.join("refget", "*")]},
-    include_package_data=True,
-    test_suite="tests",
-    tests_require=(["mock", "pytest"]),
-    setup_requires=(["pytest-runner"] if {"test", "pytest", "ptr"} & set(sys.argv) else []),
-    **extra,
-)
diff --git a/tests/api/conftest.py b/tests/api/conftest.py
index 9ebaaca..49ea738 100644
--- a/tests/api/conftest.py
+++ b/tests/api/conftest.py
@@ -1,6 +1,7 @@
-import pytest
 from pathlib import Path
 
+import pytest
+
 
 @pytest.fixture(scope="session")
 def test_data_root():
diff --git a/tests/api/test_compliance.py b/tests/api/test_compliance.py
index 0e8652d..8960493 100644
--- a/tests/api/test_compliance.py
+++ b/tests/api/test_compliance.py
@@ -10,25 +10,26 @@
 #   ./scripts/test-integration.sh
 
 import pytest
+
 from refget.compliance import (
-    DIGEST_TESTS,
     COMPARISON_FIXTURES,
-    check_service_info,
-    check_list_collections,
-    check_list_attributes,
-    check_openapi_available,
+    DIGEST_TESTS,
+    check_attribute_retrieval,
     check_collection_level1,
     check_collection_level2,
+    check_comparison,
+    check_comparison_post,
+    check_comparison_same_order_values,
+    check_comparison_structure,
     check_default_level_returns_level2,
-    check_sorted_name_length_pairs,
-    check_attribute_retrieval,
-    check_transient_attribute_not_served,
+    check_list_attributes,
+    check_list_collections,
     check_list_filter_by_attribute,
     check_list_multi_attribute_filter_and,
-    check_comparison,
-    check_comparison_structure,
-    check_comparison_same_order_values,
-    check_comparison_post,
+    check_openapi_available,
+    check_service_info,
+    check_sorted_name_length_pairs,
+    check_transient_attribute_not_served,
 )
 
 
diff --git a/tests/conftest.py b/tests/conftest.py
index 1d26369..2e2f637 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -3,6 +3,7 @@
 import json
 import os
 from pathlib import Path
+
 import pytest
 from typer.testing import CliRunner
 
@@ -38,7 +39,7 @@
 @pytest.fixture
 def runner():
     """Typer CLI test runner."""
-    return CliRunner(mix_stderr=False)
+    return CliRunner()
 
 
 @pytest.fixture
@@ -70,6 +71,8 @@ def invoke(*args):
 def test_data_root():
     """Provides the absolute path to the test_fasta directory."""
     return TEST_DATA_DIR
+
+
 DIFFERENT_NAMES_FASTA = TEST_DATA_DIR / "different_names.fa"
 DIFFERENT_ORDER_FASTA = TEST_DATA_DIR / "different_order.fa"
 PAIR_SWAP_FASTA = TEST_DATA_DIR / "pair_swap.fa"
@@ -253,8 +256,12 @@ def pytest_configure(config):
     config.addinivalue_line("markers", "requires_network: mark test as requiring network access")
     config.addinivalue_line("markers", "requires_db: mark test as requiring database access")
     config.addinivalue_line("markers", "slow: mark test as slow running")
-    config.addinivalue_line("markers", "recommended: mark test as RECOMMENDED (not REQUIRED) by GA4GH spec")
-    config.addinivalue_line("markers", "require_service: mark test as requiring a running seqcol service")
+    config.addinivalue_line(
+        "markers", "recommended: mark test as RECOMMENDED (not REQUIRED) by GA4GH spec"
+    )
+    config.addinivalue_line(
+        "markers", "require_service: mark test as requiring a running seqcol service"
+    )
 
 
 def pytest_collection_modifyitems(config, items):
@@ -286,7 +293,9 @@ def pytest_collection_modifyitems(config, items):
     # Skip require_service tests if no api_root or test_server available
     api_root = config.getoption("api_root")
     if api_root is None:
-        skip_service = pytest.mark.skip(reason="No --api-root provided and not running via integration test_server")
+        skip_service = pytest.mark.skip(
+            reason="No --api-root provided and not running via integration test_server"
+        )
         for item in items:
             if "require_service" in item.keywords:
                 # Only skip if this is the base TestAPI class, not a subclass with test_server
diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
index 7f7cdff..d840250 100644
--- a/tests/integration/conftest.py
+++ b/tests/integration/conftest.py
@@ -7,12 +7,13 @@
 """
 
 import os
-import pytest
 import socket
 import threading
 import time
 from pathlib import Path
 
+import pytest
+
 # Set environment variables BEFORE any app imports
 # Must match test-db.sh settings
 os.environ["POSTGRES_HOST"] = "localhost"
@@ -78,8 +79,8 @@ def loaded_dbagent(test_dbagent, test_fasta_path):
 @pytest.fixture(scope="session")
 def client(loaded_dbagent):
     """Create TestClient with test database"""
-    from seqcolapi.main import app
     from refget.router import get_dbagent
+    from seqcolapi.main import app
 
     def override_get_dbagent():
         return loaded_dbagent
@@ -131,8 +132,9 @@ def test_server(request):
     loaded_dbagent = request.getfixturevalue("loaded_dbagent")
 
     import uvicorn
-    from seqcolapi.main import app
+
     from refget.router import get_dbagent
+    from seqcolapi.main import app
 
     def override_get_dbagent():
         return loaded_dbagent
@@ -173,6 +175,7 @@ def override_get_dbagent():
 def cli_runner():
     """CLI runner for integration tests."""
     from typer.testing import CliRunner
+
     from refget.cli.main import app
 
     runner = CliRunner()
diff --git a/tests/integration/test_cli_admin_integration.py b/tests/integration/test_cli_admin_integration.py
index f3dc832..9642803 100644
--- a/tests/integration/test_cli_admin_integration.py
+++ b/tests/integration/test_cli_admin_integration.py
@@ -7,8 +7,6 @@
 """
 
 import pytest
-import json
-from pathlib import Path
 from typer.testing import CliRunner
 
 from refget.cli.main import app
diff --git a/tests/integration/test_cli_seqcol_integration.py b/tests/integration/test_cli_seqcol_integration.py
index 9f13a1a..da6ab12 100644
--- a/tests/integration/test_cli_seqcol_integration.py
+++ b/tests/integration/test_cli_seqcol_integration.py
@@ -6,7 +6,6 @@
 Run with: ./scripts/test-integration.sh
 """
 
-import pytest
 import json
 
 
diff --git a/tests/integration/test_run_compliance.py b/tests/integration/test_run_compliance.py
index d240024..6cb7df7 100644
--- a/tests/integration/test_run_compliance.py
+++ b/tests/integration/test_run_compliance.py
@@ -1,6 +1,7 @@
 """Run the standalone compliance suite against the integration test server."""
 
 import pytest
+
 from tests.api.test_compliance import TestAPI
 
 
diff --git a/tests/local/test_aliases.py b/tests/local/test_aliases.py
index 251cea4..58aeacc 100644
--- a/tests/local/test_aliases.py
+++ b/tests/local/test_aliases.py
@@ -8,7 +8,7 @@
 from refget.store import RefgetStore
 
 try:
-    from gtars.refget import RefgetStore as _check
+    from gtars.refget import RefgetStore as _check  # noqa: F401
 
     _RUST_BINDINGS_AVAILABLE = True
 except ImportError:
@@ -33,7 +33,7 @@ def seq_digest(store):
 
 @pytest.fixture
 def col_digest(store):
-    return store.list_collections()[0].digest
+    return store.list_collections()["results"][0].digest
 
 
 @pytest.mark.skipif(not _RUST_BINDINGS_AVAILABLE, reason="gtars is not installed")
diff --git a/tests/local/test_backend.py b/tests/local/test_backend.py
new file mode 100644
index 0000000..e52f309
--- /dev/null
+++ b/tests/local/test_backend.py
@@ -0,0 +1,216 @@
+"""
+Tests for SeqColBackend protocol and RefgetStoreBackend implementation.
+
+Verifies that:
+- RefgetStoreBackend wraps RefgetStore correctly
+- All SeqColBackend protocol methods work
+- Error handling (ValueError, KeyError) works properly
+"""
+
+import json
+from pathlib import Path
+
+import pytest
+from fastapi import FastAPI
+from fastapi.testclient import TestClient
+
+try:
+    from refget.backend import RefgetStoreBackend, SeqColBackend
+    from refget.store import RefgetStore
+
+    _RUST_BINDINGS_AVAILABLE = True
+except ImportError:
+    _RUST_BINDINGS_AVAILABLE = False
+
+from refget.router import create_refget_router
+
+TEST_FASTA_DIR = Path("test_fasta")
+BASE_FASTA = TEST_FASTA_DIR / "base.fa"
+DIFFERENT_NAMES_FASTA = TEST_FASTA_DIR / "different_names.fa"
+
+with open(TEST_FASTA_DIR / "test_fasta_digests.json") as fp:
+    TEST_DIGESTS = json.load(fp)
+
+BASE_DIGEST = TEST_DIGESTS["base.fa"]["top_level_digest"]
+BASE_LEVEL1 = TEST_DIGESTS["base.fa"]["level1"]
+BASE_LEVEL2 = TEST_DIGESTS["base.fa"]["level2"]
+DIFFERENT_NAMES_DIGEST = TEST_DIGESTS["different_names.fa"]["top_level_digest"]
+
+
+@pytest.fixture
+def backend():
+    """Create a RefgetStoreBackend with base.fa and different_names.fa loaded."""
+    store = RefgetStore.in_memory()
+    store.add_sequence_collection_from_fasta(str(BASE_FASTA))
+    store.add_sequence_collection_from_fasta(str(DIFFERENT_NAMES_FASTA))
+    return RefgetStoreBackend(store.into_readonly())
+
+
+@pytest.mark.skipif(not _RUST_BINDINGS_AVAILABLE, reason="gtars is not installed")
+class TestRefgetStoreBackend:
+    """Tests for RefgetStoreBackend."""
+
+    def test_satisfies_protocol(self, backend):
+        """RefgetStoreBackend satisfies the SeqColBackend protocol."""
+        assert isinstance(backend, SeqColBackend)
+
+    def test_get_collection_level2(self, backend):
+        """get_collection returns level2 by default."""
+        result = backend.get_collection(BASE_DIGEST)
+        assert "names" in result
+        assert "lengths" in result
+        assert "sequences" in result
+        assert isinstance(result["names"], list)
+
+    def test_get_collection_level1(self, backend):
+        """get_collection with level=1 returns digest strings."""
+        result = backend.get_collection(BASE_DIGEST, level=1)
+        assert "names" in result
+        assert isinstance(result["names"], str)
+
+    def test_get_collection_not_found(self, backend):
+        """get_collection raises ValueError for missing digest."""
+        with pytest.raises(ValueError, match="not found"):
+            backend.get_collection("nonexistent_digest")
+
+    def test_get_collection_attribute(self, backend):
+        """get_collection_attribute returns a single attribute array matching level2."""
+        names = backend.get_collection_attribute(BASE_DIGEST, "names")
+        assert isinstance(names, list)
+        # Should match what get_collection returns
+        level2 = backend.get_collection(BASE_DIGEST, level=2)
+        assert names == level2["names"]
+
+    def test_get_collection_attribute_not_found(self, backend):
+        """get_collection_attribute raises ValueError for missing attribute."""
+        with pytest.raises(ValueError, match="not found"):
+            backend.get_collection_attribute(BASE_DIGEST, "nonexistent_attr")
+
+    def test_get_collection_itemwise(self, backend):
+        """get_collection_itemwise returns transposed list of dicts."""
+        items = backend.get_collection_itemwise(BASE_DIGEST)
+        assert isinstance(items, list)
+        assert len(items) > 0
+        for item in items:
+            assert "names" in item
+            assert "lengths" in item
+
+    def test_get_collection_itemwise_with_limit(self, backend):
+        """get_collection_itemwise respects limit parameter."""
+        items = backend.get_collection_itemwise(BASE_DIGEST, limit=1)
+        assert len(items) == 1
+
+    def test_get_attribute(self, backend):
+        """get_attribute returns attribute by its own digest."""
+        names_digest = BASE_LEVEL1["names"]
+        result = backend.get_attribute("names", names_digest)
+        assert isinstance(result, list)
+
+    def test_get_attribute_not_found(self, backend):
+        """get_attribute raises KeyError for missing attribute."""
+        with pytest.raises(KeyError):
+            backend.get_attribute("names", "nonexistent_digest")
+
+    def test_compare_digests(self, backend):
+        """compare_digests returns comparison dict."""
+        result = backend.compare_digests(BASE_DIGEST, DIFFERENT_NAMES_DIGEST)
+        assert "attributes" in result
+        assert "array_elements" in result
+
+    def test_compare_digests_not_found(self, backend):
+        """compare_digests raises ValueError for missing digest."""
+        with pytest.raises(ValueError):
+            backend.compare_digests("nonexistent", DIFFERENT_NAMES_DIGEST)
+
+    def test_compare_digest_with_level2(self, backend):
+        """compare_digest_with_level2 compares stored vs POSTed collection."""
+        level2_b = backend.get_collection(DIFFERENT_NAMES_DIGEST, level=2)
+        result = backend.compare_digest_with_level2(BASE_DIGEST, level2_b)
+        assert "attributes" in result
+        assert "array_elements" in result
+
+    def test_list_collections(self, backend):
+        """list_collections returns paginated results."""
+        result = backend.list_collections()
+        assert "results" in result
+        assert "pagination" in result
+        assert result["pagination"]["total"] >= 2
+
+    def test_list_collections_pagination(self, backend):
+        """list_collections respects page_size."""
+        result = backend.list_collections(page=0, page_size=1)
+        assert len(result["results"]) <= 1
+
+    def test_collection_count(self, backend):
+        """collection_count returns total number of collections."""
+        count = backend.collection_count()
+        assert count >= 2
+
+    def test_capabilities(self, backend):
+        """capabilities returns expected keys for RefgetStoreBackend."""
+        caps = backend.capabilities()
+        assert caps["backend_type"] == "refget_store"
+        assert "n_collections" in caps
+        assert "n_sequences" in caps
+        assert "has_sequence_data" in caps
+        assert isinstance(caps["collection_alias_namespaces"], list)
+        assert isinstance(caps["sequence_alias_namespaces"], list)
+        assert caps["n_collections"] >= 2
+
+
+@pytest.mark.skipif(not _RUST_BINDINGS_AVAILABLE, reason="gtars is not installed")
+class TestStoreBackend501:
+    """Verify DB-only endpoints return 501 when only RefgetStoreBackend is configured."""
+
+    @pytest.fixture
+    def store_client(self):
+        """Create a TestClient with RefgetStoreBackend but no dbagent."""
+        app = FastAPI()
+        router = create_refget_router(sequences=False, collections=True, pangenomes=False)
+        app.include_router(router, prefix="/seqcol")
+
+        store = RefgetStore.in_memory()
+        store.add_sequence_collection_from_fasta(str(BASE_FASTA))
+        backend = RefgetStoreBackend(store.into_readonly())
+        app.state.backend = backend
+        # Deliberately do NOT set app.state.dbagent
+        return TestClient(app)
+
+    def test_list_attributes_returns_501(self, store_client):
+        """GET /list/attributes/names returns 501 without dbagent."""
+        response = store_client.get("/seqcol/list/attributes/names")
+        assert response.status_code == 501
+        assert "database backend" in response.json()["detail"].lower()
+
+    def test_similarities_post_returns_501(self, store_client):
+        """POST /similarities/{digest} returns 501 without dbagent."""
+        response = store_client.post(
+            f"/seqcol/similarities/{BASE_DIGEST}",
+            params={"species": "human"},
+        )
+        assert response.status_code == 501
+
+    def test_similarities_json_post_returns_501(self, store_client):
+        """POST /similarities/ returns 501 without dbagent."""
+        response = store_client.post(
+            "/seqcol/similarities/",
+            json={"names": ["chr1"], "lengths": [100], "sequences": ["abc"]},
+        )
+        assert response.status_code == 501
+
+    def test_backend_endpoints_still_work(self, store_client):
+        """Backend-powered endpoints work fine without dbagent."""
+        # GET /collection/{digest} uses get_backend, should work
+        response = store_client.get(f"/seqcol/collection/{BASE_DIGEST}")
+        assert response.status_code == 200
+        data = response.json()
+        assert "names" in data
+        assert "lengths" in data
+
+    def test_list_collections_still_works(self, store_client):
+        """GET /list/collection uses get_backend, should work."""
+        response = store_client.get("/seqcol/list/collection")
+        assert response.status_code == 200
+        data = response.json()
+        assert "results" in data
+        assert "pagination" in data
diff --git a/tests/local/test_digest_functions.py b/tests/local/test_digest_functions.py
index da6fed8..d3b4b34 100644
--- a/tests/local/test_digest_functions.py
+++ b/tests/local/test_digest_functions.py
@@ -1,15 +1,20 @@
+from pathlib import Path
+
 import pytest
 
 from refget import GTARS_INSTALLED
-from refget.digests import ga4gh_digest, py_sha512t24u_digest, py_md5_digest
-from pathlib import Path
+from refget.digests import ga4gh_digest, py_md5_digest, py_sha512t24u_digest
 
 if GTARS_INSTALLED:
     from gtars.refget import (
-        sha512t24u_digest as gtars_sha512t24u_digest,
-        md5_digest as gtars_md5_digest,
         digest_fasta,
     )
+    from gtars.refget import (
+        md5_digest as gtars_md5_digest,
+    )
+    from gtars.refget import (
+        sha512t24u_digest as gtars_sha512t24u_digest,
+    )
 
 
 @pytest.mark.skipif(not GTARS_INSTALLED, reason="gtars is not installed")
diff --git a/tests/local/test_local_models.py b/tests/local/test_local_models.py
index cc84e4f..75157f1 100644
--- a/tests/local/test_local_models.py
+++ b/tests/local/test_local_models.py
@@ -1,11 +1,12 @@
 import json
 import os
+
 import pytest
+
 from refget import InvalidSeqColError
 from refget.models import SequenceCollection
 from refget.utils import compare_seqcols, validate_seqcol
-
-from tests.conftest import DEMO_FILES, DIGEST_TESTS, API_TEST_DIR
+from tests.conftest import API_TEST_DIR, DEMO_FILES, DIGEST_TESTS
 
 # Pairs of files to compare, with the "correct" compare response
 COMPARE_TESTS = [
diff --git a/tests/local/test_local_models_gtars.py b/tests/local/test_local_models_gtars.py
index 8ac2445..e0b2122 100644
--- a/tests/local/test_local_models_gtars.py
+++ b/tests/local/test_local_models_gtars.py
@@ -1,24 +1,26 @@
-import pytest
 import logging
-
-_LOGGER = logging.getLogger(__name__)
 from pathlib import Path
 
+import pytest
+
 from refget.models import SequenceCollection as pythonSequenceCollection
+from refget.store import RefgetStore
 
-from refget.store import RefgetStore, StorageMode
+_LOGGER = logging.getLogger(__name__)
 
 try:
-    from gtars.refget import (
+    from gtars.refget import (  # noqa: F401
         SequenceCollection as gtarsSequenceCollection,
+    )
+    from gtars.refget import (
         digest_fasta,
     )
 
     _RUST_BINDINGS_AVAILABLE = True
 
-except ImportError as e:
+except ImportError:
     _LOGGER.warning(
-        f"Could not import gtars python bindings. `from_PySequenceCollection` will not be available."
+        "Could not import gtars python bindings. `from_PySequenceCollection` will not be available."
     )
     _RUST_BINDINGS_AVAILABLE = False
 
@@ -35,9 +37,9 @@ def test_pysequencecollection(self):
         bridged_seq_col = pythonSequenceCollection.from_PySequenceCollection(
             gtars_seq_col=gtars_digested_seq_col
         )
-        assert (
-            bridged_seq_col.digest == python_seq_col.digest == gtars_digested_seq_col.digest
-        ), "Top-level digest mismatch!"
+        assert bridged_seq_col.digest == python_seq_col.digest == gtars_digested_seq_col.digest, (
+            "Top-level digest mismatch!"
+        )
 
         assert bridged_seq_col.sequences.digest == python_seq_col.sequences.digest
         assert bridged_seq_col.sequences.value == python_seq_col.sequences.value
diff --git a/tests/local/test_refget_clients.py b/tests/local/test_refget_clients.py
index 13b81e4..77941df 100644
--- a/tests/local/test_refget_clients.py
+++ b/tests/local/test_refget_clients.py
@@ -8,7 +8,7 @@
 see tests/integration/test_seqcolapi_client.py
 """
 
-from refget.clients import SequenceCollectionClient, FastaDrsClient
+from refget.clients import FastaDrsClient, SequenceCollectionClient
 
 
 class TestClientConstruction:
diff --git a/tests/local/test_remove_collection.py b/tests/local/test_remove_collection.py
index 88fb208..b998770 100644
--- a/tests/local/test_remove_collection.py
+++ b/tests/local/test_remove_collection.py
@@ -1,14 +1,11 @@
 """Smoke test for RefgetStore.remove_collection() Python binding."""
 
-import os
-import tempfile
-
 import pytest
 
 from refget.store import RefgetStore
 
 try:
-    from gtars.refget import RefgetStore as _check
+    from gtars.refget import RefgetStore as _check  # noqa: F401
 
     _RUST_BINDINGS_AVAILABLE = True
 except ImportError:
@@ -24,15 +21,15 @@ def test_remove_collection_round_trip():
     store.set_quiet(True)
     store.add_sequence_collection_from_fasta(FASTA_PATH)
 
-    assert len(store.list_collections()) == 1
+    assert len(store.list_collections()["results"]) == 1
     assert len(store.list_sequences()) > 0
 
-    digest = store.list_collections()[0].digest
+    digest = store.list_collections()["results"][0].digest
 
     # Nonexistent returns False
     assert store.remove_collection("nonexistent") is False
 
     # Real removal with orphan cleanup
     assert store.remove_collection(digest, remove_orphan_sequences=True) is True
-    assert len(store.list_collections()) == 0
+    assert len(store.list_collections()["results"]) == 0
     assert len(store.list_sequences()) == 0
diff --git a/tests/local/test_store_seqcol_features.py b/tests/local/test_store_seqcol_features.py
index 3779eb1..8a9bae2 100644
--- a/tests/local/test_store_seqcol_features.py
+++ b/tests/local/test_store_seqcol_features.py
@@ -8,9 +8,10 @@
 """
 
 import json
-import pytest
 from pathlib import Path
 
+import pytest
+
 try:
     from refget.store import RefgetStore
 
diff --git a/tests/test_cli/test_admin_commands.py b/tests/test_cli/test_admin_commands.py
index f9d80de..3761d8f 100644
--- a/tests/test_cli/test_admin_commands.py
+++ b/tests/test_cli/test_admin_commands.py
@@ -7,9 +7,6 @@
 Database-dependent admin tests are in tests/integration/test_cli_admin_integration.py
 """
 
-import pytest
-import json
-
 
 class TestAdminStatus:
     """Tests for: refget admin status
diff --git a/tests/test_cli/test_config_commands.py b/tests/test_cli/test_config_commands.py
index 8e9f78a..666864e 100644
--- a/tests/test_cli/test_config_commands.py
+++ b/tests/test_cli/test_config_commands.py
@@ -2,7 +2,6 @@
 
 """Tests for refget config CLI commands."""
 
-import pytest
 import json
 
 
@@ -101,11 +100,14 @@ def test_creates_config_file(self, cli, tmp_path, monkeypatch):
 
         # Provide minimal input for interactive prompts
         from typer.testing import CliRunner
+
         from refget.cli import app
 
         runner = CliRunner()
         result = runner.invoke(
-            app, ["config", "init"], input=f"{tmp_path}/store\n\n\n"  # Store path + defaults
+            app,
+            ["config", "init"],
+            input=f"{tmp_path}/store\n\n\n",  # Store path + defaults
         )
 
         # Config init should succeed or prompt for input
@@ -116,6 +118,7 @@ def test_init_no_overwrite(self, cli, temp_config, monkeypatch):
         monkeypatch.setenv("REFGET_CONFIG", str(temp_config))
 
         from typer.testing import CliRunner
+
         from refget.cli import app
 
         runner = CliRunner()
diff --git a/tests/test_cli/test_fasta_commands.py b/tests/test_cli/test_fasta_commands.py
index 2f3ea6e..99060c3 100644
--- a/tests/test_cli/test_fasta_commands.py
+++ b/tests/test_cli/test_fasta_commands.py
@@ -6,21 +6,23 @@
 These test CLI-specific behavior: output formatting, exit codes, argument parsing.
 """
 
-import pytest
+import importlib.util
 import json
-from pathlib import Path
-
-import sys
 import os
+from pathlib import Path
 
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from conftest import (
-    BASE_FASTA,
-    DIFFERENT_NAMES_FASTA,
-    TEST_FASTA_DIGESTS,
-    assert_json_output,
-    assert_valid_digest,
+_conftest_path = os.path.join(
+    os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "conftest.py"
 )
+_spec = importlib.util.spec_from_file_location("tests_conftest", _conftest_path)
+_conftest = importlib.util.module_from_spec(_spec)
+_spec.loader.exec_module(_conftest)
+
+BASE_FASTA = _conftest.BASE_FASTA
+DIFFERENT_NAMES_FASTA = _conftest.DIFFERENT_NAMES_FASTA
+TEST_FASTA_DIGESTS = _conftest.TEST_FASTA_DIGESTS
+assert_json_output = _conftest.assert_json_output
+assert_valid_digest = _conftest.assert_valid_digest
 
 
 class TestFastaDigest:
@@ -184,7 +186,7 @@ def test_rgsi_format_and_content(self, cli, sample_fasta):
         assert "##seqcol_digest=" in content
         assert "#name\tlength\talphabet\tsha512t24u\tmd5\tdescription" in content
 
-        data_lines = [l for l in content.strip().split("\n") if not l.startswith("#")]
+        data_lines = [line for line in content.strip().split("\n") if not line.startswith("#")]
         assert len(data_lines) == 2  # sample_fasta has 2 sequences
 
         # Verify first sequence
diff --git a/tests/test_cli/test_help.py b/tests/test_cli/test_help.py
index b1e599b..f80ff4d 100644
--- a/tests/test_cli/test_help.py
+++ b/tests/test_cli/test_help.py
@@ -2,8 +2,6 @@
 
 """Tests for CLI help output."""
 
-import pytest
-
 
 class TestHelpOutput:
     """Verify help text displays correctly."""
diff --git a/tests/test_cli/test_seqcol_commands.py b/tests/test_cli/test_seqcol_commands.py
index 88887d6..398fc47 100644
--- a/tests/test_cli/test_seqcol_commands.py
+++ b/tests/test_cli/test_seqcol_commands.py
@@ -7,21 +7,23 @@
 Network-dependent tests are in tests/integration/test_cli_seqcol_integration.py
 """
 
-import pytest
+import importlib.util
 import json
-import sys
 import os
-from pathlib import Path
-
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from conftest import (
-    BASE_FASTA,
-    DIFFERENT_NAMES_FASTA,
-    DIFFERENT_ORDER_FASTA,
-    SUBSET_FASTA,
-    TEST_FASTA_DIGESTS,
-    assert_json_output,
+
+_conftest_path = os.path.join(
+    os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "conftest.py"
 )
+_spec = importlib.util.spec_from_file_location("tests_conftest", _conftest_path)
+_conftest = importlib.util.module_from_spec(_spec)
+_spec.loader.exec_module(_conftest)
+
+BASE_FASTA = _conftest.BASE_FASTA
+DIFFERENT_NAMES_FASTA = _conftest.DIFFERENT_NAMES_FASTA
+DIFFERENT_ORDER_FASTA = _conftest.DIFFERENT_ORDER_FASTA
+SUBSET_FASTA = _conftest.SUBSET_FASTA
+TEST_FASTA_DIGESTS = _conftest.TEST_FASTA_DIGESTS
+assert_json_output = _conftest.assert_json_output
 
 
 class TestSeqcolCompare:
diff --git a/tests/test_cli/test_store_commands.py b/tests/test_cli/test_store_commands.py
index 5544151..dd3c60a 100644
--- a/tests/test_cli/test_store_commands.py
+++ b/tests/test_cli/test_store_commands.py
@@ -2,21 +2,23 @@
 
 """Tests for refget store CLI commands."""
 
-import pytest
+import importlib.util
 import json
-import sys
 import os
-from pathlib import Path
-
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from conftest import (
-    BASE_FASTA,
-    DIFFERENT_NAMES_FASTA,
-    DIFFERENT_ORDER_FASTA,
-    SAMPLE_FHR_JSON,
-    TEST_FASTA_DIGESTS,
-    assert_json_output,
+
+_conftest_path = os.path.join(
+    os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "conftest.py"
 )
+_spec = importlib.util.spec_from_file_location("tests_conftest", _conftest_path)
+_conftest = importlib.util.module_from_spec(_spec)
+_spec.loader.exec_module(_conftest)
+
+BASE_FASTA = _conftest.BASE_FASTA
+DIFFERENT_NAMES_FASTA = _conftest.DIFFERENT_NAMES_FASTA
+DIFFERENT_ORDER_FASTA = _conftest.DIFFERENT_ORDER_FASTA
+SAMPLE_FHR_JSON = _conftest.SAMPLE_FHR_JSON
+TEST_FASTA_DIGESTS = _conftest.TEST_FASTA_DIGESTS
+assert_json_output = _conftest.assert_json_output
 
 
 class TestStoreInit:
@@ -215,7 +217,7 @@ def test_get_collection(self, cli, tmp_path):
 
         result = cli("store", "get", digest, "--path", str(store_path))
 
-        data = assert_json_output(result, ["names", "lengths", "sequences"])
+        assert_json_output(result, ["names", "lengths", "sequences"])
 
     def test_get_nonexistent_digest(self, cli, tmp_path):
         """Returns error for nonexistent digest."""
@@ -290,9 +292,7 @@ def test_gets_sequence_by_name(self, cli, tmp_path):
         add_result = cli("store", "add", str(BASE_FASTA), "--path", str(store_path))
         digest = json.loads(add_result.stdout)["digest"]
 
-        result = cli(
-            "store", "get", digest, "-s", "--name", "chr1", "--path", str(store_path)
-        )
+        result = cli("store", "get", digest, "-s", "--name", "chr1", "--path", str(store_path))
 
         assert result.exit_code == 0
         # Output should be sequence (GGAA for chr1 in base.fa)
@@ -509,8 +509,12 @@ def test_metadata_set_from_json_file(self, cli, tmp_path):
         store_path, digest = _setup_store_with_fasta(cli, tmp_path)
 
         result = cli(
-            "store", "metadata-set", digest, str(SAMPLE_FHR_JSON),
-            "--path", str(store_path),
+            "store",
+            "metadata-set",
+            digest,
+            str(SAMPLE_FHR_JSON),
+            "--path",
+            str(store_path),
         )
 
         assert result.exit_code == 0
@@ -521,8 +525,12 @@ def test_metadata_read_after_set(self, cli, tmp_path):
         store_path, digest = _setup_store_with_fasta(cli, tmp_path)
 
         cli(
-            "store", "metadata-set", digest, str(SAMPLE_FHR_JSON),
-            "--path", str(store_path),
+            "store",
+            "metadata-set",
+            digest,
+            str(SAMPLE_FHR_JSON),
+            "--path",
+            str(store_path),
         )
 
         result = cli("store", "metadata", digest, "--path", str(store_path))
@@ -539,8 +547,12 @@ def test_metadata_output_is_valid_json(self, cli, tmp_path):
         store_path, digest = _setup_store_with_fasta(cli, tmp_path)
 
         cli(
-            "store", "metadata-set", digest, str(SAMPLE_FHR_JSON),
-            "--path", str(store_path),
+            "store",
+            "metadata-set",
+            digest,
+            str(SAMPLE_FHR_JSON),
+            "--path",
+            str(store_path),
         )
 
         result = cli("store", "metadata", digest, "--path", str(store_path))
@@ -564,8 +576,12 @@ def test_metadata_set_nonexistent_file(self, cli, tmp_path):
         store_path, digest = _setup_store_with_fasta(cli, tmp_path)
 
         result = cli(
-            "store", "metadata-set", digest, "/nonexistent/fhr.json",
-            "--path", str(store_path),
+            "store",
+            "metadata-set",
+            digest,
+            "/nonexistent/fhr.json",
+            "--path",
+            str(store_path),
         )
 
         assert result.exit_code != 0
@@ -576,8 +592,11 @@ def test_metadata_nonexistent_digest(self, cli, tmp_path):
         cli("store", "init", "--path", str(store_path))
 
         result = cli(
-            "store", "metadata", "nonexistent_digest_123",
-            "--path", str(store_path),
+            "store",
+            "metadata",
+            "nonexistent_digest_123",
+            "--path",
+            str(store_path),
         )
 
         assert result.exit_code != 0
@@ -588,23 +607,35 @@ def test_metadata_set_then_overwrite(self, cli, tmp_path):
 
         # Set original metadata
         cli(
-            "store", "metadata-set", digest, str(SAMPLE_FHR_JSON),
-            "--path", str(store_path),
+            "store",
+            "metadata-set",
+            digest,
+            str(SAMPLE_FHR_JSON),
+            "--path",
+            str(store_path),
         )
 
         # Create updated FHR JSON
         updated_fhr = tmp_path / "updated_fhr.json"
-        updated_fhr.write_text(json.dumps({
-            "schema": "https://raw.githubusercontent.com/FAIR-bioHeaders/FHR-Specification/main/fhr.json",
-            "schemaVersion": 1.0,
-            "genome": "Updated organism",
-            "version": "v2.0",
-        }))
+        updated_fhr.write_text(
+            json.dumps(
+                {
+                    "schema": "https://raw.githubusercontent.com/FAIR-bioHeaders/FHR-Specification/main/fhr.json",
+                    "schemaVersion": 1.0,
+                    "genome": "Updated organism",
+                    "version": "v2.0",
+                }
+            )
+        )
 
         # Overwrite
         cli(
-            "store", "metadata-set", digest, str(updated_fhr),
-            "--path", str(store_path),
+            "store",
+            "metadata-set",
+            digest,
+            str(updated_fhr),
+            "--path",
+            str(store_path),
         )
 
         result = cli("store", "metadata", digest, "--path", str(store_path))
@@ -619,8 +650,12 @@ def test_metadata_removed_with_collection(self, cli, tmp_path):
 
         # Set metadata
         cli(
-            "store", "metadata-set", digest, str(SAMPLE_FHR_JSON),
-            "--path", str(store_path),
+            "store",
+            "metadata-set",
+            digest,
+            str(SAMPLE_FHR_JSON),
+            "--path",
+            str(store_path),
         )
 
         # Remove the collection
diff --git a/tests/test_cli/test_store_crate.py b/tests/test_cli/test_store_crate.py
new file mode 100644
index 0000000..2be442d
--- /dev/null
+++ b/tests/test_cli/test_store_crate.py
@@ -0,0 +1,305 @@
+# tests/test_cli/test_store_crate.py
+
+"""Tests for refget store crate CLI command."""
+
+import importlib.util
+import json
+import os
+
+_conftest_path = os.path.join(
+    os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "conftest.py"
+)
+_spec = importlib.util.spec_from_file_location("tests_conftest", _conftest_path)
+_conftest = importlib.util.module_from_spec(_spec)
+_spec.loader.exec_module(_conftest)
+
+BASE_FASTA = _conftest.BASE_FASTA
+assert_json_output = _conftest.assert_json_output
+
+
+def _init_and_add(cli, tmp_path):
+    """Initialize a store and add a FASTA, return store_path."""
+    store_path = tmp_path / "store"
+    cli("store", "init", "--path", str(store_path))
+    cli("store", "add", str(BASE_FASTA), "--path", str(store_path))
+    return store_path
+
+
+class TestStoreCrate:
+    """Tests for: refget store crate"""
+
+    def test_produces_valid_json(self, cli, tmp_path):
+        """Crate command produces valid JSON output file."""
+        store_path = _init_and_add(cli, tmp_path)
+
+        result = cli(
+            "store", "crate",
+            "--path", str(store_path),
+            "--name", "Test Store",
+        )
+
+        assert result.exit_code == 0
+        crate_path = store_path / "ro-crate-metadata.json"
+        assert crate_path.exists()
+
+        crate = json.loads(crate_path.read_text())
+        assert "@context" in crate
+        assert "@graph" in crate
+        assert isinstance(crate["@graph"], list)
+
+    def test_has_must_entities(self, cli, tmp_path):
+        """Crate contains all MUST entities per the profile."""
+        store_path = _init_and_add(cli, tmp_path)
+
+        cli(
+            "store", "crate",
+            "--path", str(store_path),
+            "--name", "Test Store",
+        )
+
+        crate = json.loads((store_path / "ro-crate-metadata.json").read_text())
+        ids = {e["@id"] for e in crate["@graph"]}
+
+        # MUST entities
+        assert "ro-crate-metadata.json" in ids
+        assert "./" in ids
+        assert "rgstore.json" in ids
+        assert "sequences.rgsi" in ids
+        assert "sequences/" in ids
+        assert "collections/" in ids
+
+    def test_metadata_descriptor_conformsto(self, cli, tmp_path):
+        """Metadata descriptor has correct conformsTo."""
+        store_path = _init_and_add(cli, tmp_path)
+
+        cli(
+            "store", "crate",
+            "--path", str(store_path),
+            "--name", "Test Store",
+        )
+
+        crate = json.loads((store_path / "ro-crate-metadata.json").read_text())
+        descriptor = next(e for e in crate["@graph"] if e["@id"] == "ro-crate-metadata.json")
+
+        conforms = [c["@id"] for c in descriptor["conformsTo"]]
+        assert "https://w3id.org/ro/crate/1.2" in conforms
+        assert "https://w3id.org/ga4gh/refget/refgetstore-crate/0.1" in conforms
+
+    def test_root_dataset_name(self, cli, tmp_path):
+        """Root dataset has the specified name."""
+        store_path = _init_and_add(cli, tmp_path)
+
+        cli(
+            "store", "crate",
+            "--path", str(store_path),
+            "--name", "My Genome Store",
+        )
+
+        crate = json.loads((store_path / "ro-crate-metadata.json").read_text())
+        root = next(e for e in crate["@graph"] if e["@id"] == "./")
+        assert root["name"] == "My Genome Store"
+
+    def test_property_values(self, cli, tmp_path):
+        """Crate contains PropertyValue entities with correct stats."""
+        store_path = _init_and_add(cli, tmp_path)
+
+        cli(
+            "store", "crate",
+            "--path", str(store_path),
+            "--name", "Test Store",
+        )
+
+        crate = json.loads((store_path / "ro-crate-metadata.json").read_text())
+        props = {
+            e["propertyID"]: e["value"]
+            for e in crate["@graph"]
+            if e.get("@type") == "PropertyValue"
+        }
+
+        assert "storageMode" in props
+        assert "sequenceCount" in props
+        assert props["sequenceCount"] > 0
+        assert "collectionCount" in props
+        assert props["collectionCount"] >= 1
+        assert props["refgetDigestAlgorithm"] == "sha512t24u"
+
+    def test_author_parsing_orcid(self, cli, tmp_path):
+        """Parses 'Name <ORCID>' format into Person entity."""
+        store_path = _init_and_add(cli, tmp_path)
+
+        cli(
+            "store", "crate",
+            "--path", str(store_path),
+            "--name", "Test Store",
+            "--author", "Jane Doe <https://orcid.org/0000-0001-1234-5678>",
+        )
+
+        crate = json.loads((store_path / "ro-crate-metadata.json").read_text())
+
+        # Find Person entity
+        person = next(
+            (e for e in crate["@graph"] if e.get("@type") == "Person"),
+            None,
+        )
+        assert person is not None
+        assert person["@id"] == "https://orcid.org/0000-0001-1234-5678"
+        assert person["name"] == "Jane Doe"
+
+        # Root dataset references author
+        root = next(e for e in crate["@graph"] if e["@id"] == "./")
+        assert root["author"]["@id"] == "https://orcid.org/0000-0001-1234-5678"
+
+    def test_author_plain_name(self, cli, tmp_path):
+        """Handles plain name without URL."""
+        store_path = _init_and_add(cli, tmp_path)
+
+        cli(
+            "store", "crate",
+            "--path", str(store_path),
+            "--name", "Test Store",
+            "--author", "John Smith",
+        )
+
+        crate = json.loads((store_path / "ro-crate-metadata.json").read_text())
+        person = next(
+            (e for e in crate["@graph"] if e.get("@type") == "Person"),
+            None,
+        )
+        assert person is not None
+        assert person["name"] == "John Smith"
+
+    def test_license(self, cli, tmp_path):
+        """License creates a CreativeWork entity."""
+        store_path = _init_and_add(cli, tmp_path)
+
+        cli(
+            "store", "crate",
+            "--path", str(store_path),
+            "--name", "Test Store",
+            "--license", "https://creativecommons.org/publicdomain/zero/1.0/",
+        )
+
+        crate = json.loads((store_path / "ro-crate-metadata.json").read_text())
+
+        root = next(e for e in crate["@graph"] if e["@id"] == "./")
+        assert root["license"]["@id"] == "https://creativecommons.org/publicdomain/zero/1.0/"
+
+        license_entity = next(
+            (e for e in crate["@graph"]
+             if e["@id"] == "https://creativecommons.org/publicdomain/zero/1.0/"),
+            None,
+        )
+        assert license_entity is not None
+        assert license_entity["@type"] == "CreativeWork"
+
+    def test_custom_output_path(self, cli, tmp_path):
+        """Writes to custom output path."""
+        store_path = _init_and_add(cli, tmp_path)
+        output_path = tmp_path / "custom" / "crate.json"
+
+        result = cli(
+            "store", "crate",
+            "--path", str(store_path),
+            "--name", "Test Store",
+            "--output", str(output_path),
+        )
+
+        assert result.exit_code == 0
+        assert output_path.exists()
+
+        crate = json.loads(output_path.read_text())
+        assert "@graph" in crate
+
+    def test_no_aliases_when_absent(self, cli, tmp_path):
+        """Does not include aliases/ when directory doesn't exist."""
+        store_path = _init_and_add(cli, tmp_path)
+
+        # Remove aliases dir if it exists
+        aliases = store_path / "aliases"
+        if aliases.exists():
+            import shutil
+            shutil.rmtree(aliases)
+
+        cli(
+            "store", "crate",
+            "--path", str(store_path),
+            "--name", "Test Store",
+        )
+
+        crate = json.loads((store_path / "ro-crate-metadata.json").read_text())
+        ids = {e["@id"] for e in crate["@graph"]}
+        assert "aliases/" not in ids
+
+    def test_create_action_provenance(self, cli, tmp_path):
+        """Crate includes CreateAction with refget version."""
+        store_path = _init_and_add(cli, tmp_path)
+
+        cli(
+            "store", "crate",
+            "--path", str(store_path),
+            "--name", "Test Store",
+        )
+
+        crate = json.loads((store_path / "ro-crate-metadata.json").read_text())
+
+        action = next(
+            (e for e in crate["@graph"] if e.get("@type") == "CreateAction"),
+            None,
+        )
+        assert action is not None
+        assert "endTime" in action
+        assert action["instrument"]["@id"] == "#refget-software"
+
+        sw = next(
+            (e for e in crate["@graph"] if e["@id"] == "#refget-software"),
+            None,
+        )
+        assert sw is not None
+        assert sw["@type"] == "SoftwareApplication"
+        assert "version" in sw
+
+    def test_description_optional(self, cli, tmp_path):
+        """Description is included when provided, absent when not."""
+        store_path = _init_and_add(cli, tmp_path)
+
+        # Without description
+        cli(
+            "store", "crate",
+            "--path", str(store_path),
+            "--name", "Test Store",
+        )
+        crate = json.loads((store_path / "ro-crate-metadata.json").read_text())
+        root = next(e for e in crate["@graph"] if e["@id"] == "./")
+        assert "description" not in root
+
+        # With description
+        cli(
+            "store", "crate",
+            "--path", str(store_path),
+            "--name", "Test Store",
+            "--description", "A test store for genomes",
+        )
+        crate = json.loads((store_path / "ro-crate-metadata.json").read_text())
+        root = next(e for e in crate["@graph"] if e["@id"] == "./")
+        assert root["description"] == "A test store for genomes"
+
+    def test_empty_store(self, cli, tmp_path):
+        """Crate works for empty store with zero counts."""
+        store_path = tmp_path / "store"
+        cli("store", "init", "--path", str(store_path))
+
+        result = cli(
+            "store", "crate",
+            "--path", str(store_path),
+            "--name", "Empty Store",
+        )
+
+        assert result.exit_code == 0
+        crate = json.loads((store_path / "ro-crate-metadata.json").read_text())
+        props = {
+            e["propertyID"]: e["value"]
+            for e in crate["@graph"]
+            if e.get("@type") == "PropertyValue"
+        }
+        assert props["sequenceCount"] == 0
+        assert props["collectionCount"] == 0
diff --git a/tests/test_cli/test_store_pull.py b/tests/test_cli/test_store_pull.py
index c90cd72..0b737a4 100644
--- a/tests/test_cli/test_store_pull.py
+++ b/tests/test_cli/test_store_pull.py
@@ -7,22 +7,25 @@
 would deadlock a Python-thread-based HTTP server.
 """
 
+import importlib.util
 import json
 import os
-import signal
 import socket
 import subprocess
 import sys
 import time
-from pathlib import Path
 
 import pytest
 
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from conftest import (
-    BASE_FASTA,
-    DIFFERENT_NAMES_FASTA,
+_conftest_path = os.path.join(
+    os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "conftest.py"
 )
+_spec = importlib.util.spec_from_file_location("tests_conftest", _conftest_path)
+_conftest = importlib.util.module_from_spec(_spec)
+_spec.loader.exec_module(_conftest)
+
+BASE_FASTA = _conftest.BASE_FASTA
+DIFFERENT_NAMES_FASTA = _conftest.DIFFERENT_NAMES_FASTA
 
 # Skip entire module if gtars is not installed
 pytest.importorskip("gtars")
@@ -168,8 +171,7 @@ def test_pull_eager_fetches_sequences(self, cli, tmp_path, remote_store_server):
         cli("store", "init", "--path", str(local_store))
 
         result = cli(
-            "store", "pull", digest, "--server", server_url,
-            "--path", str(local_store), "--eager"
+            "store", "pull", digest, "--server", server_url, "--path", str(local_store), "--eager"
         )
 
         assert result.exit_code == 0, f"Eager pull failed: {result.stdout}"
@@ -204,8 +206,14 @@ def test_pull_from_file(self, cli, tmp_path, multi_remote_store_server):
         digest_file.write_text(f"{digest1}\n{digest2}\n")
 
         result = cli(
-            "store", "pull", "--file", str(digest_file),
-            "--server", server_url, "--path", str(local_store)
+            "store",
+            "pull",
+            "--file",
+            str(digest_file),
+            "--server",
+            server_url,
+            "--path",
+            str(local_store),
         )
 
         assert result.exit_code == 0, f"Batch pull failed: {result.stdout}"
@@ -223,8 +231,14 @@ def test_pull_file_with_blank_lines(self, cli, tmp_path, remote_store_server):
         digest_file.write_text(f"\n  \n{digest}\n\n  \n")
 
         result = cli(
-            "store", "pull", "--file", str(digest_file),
-            "--server", server_url, "--path", str(local_store)
+            "store",
+            "pull",
+            "--file",
+            str(digest_file),
+            "--server",
+            server_url,
+            "--path",
+            str(local_store),
         )
 
         assert result.exit_code == 0
@@ -239,8 +253,14 @@ def test_pull_file_not_found(self, cli, tmp_path):
         cli("store", "init", "--path", str(local_store))
 
         result = cli(
-            "store", "pull", "--file", "/nonexistent/digests.txt",
-            "--server", "http://127.0.0.1:1", "--path", str(local_store)
+            "store",
+            "pull",
+            "--file",
+            "/nonexistent/digests.txt",
+            "--server",
+            "http://127.0.0.1:1",
+            "--path",
+            str(local_store),
         )
 
         assert result.exit_code != 0
@@ -255,8 +275,14 @@ def test_pull_empty_file(self, cli, tmp_path, remote_store_server):
         digest_file.write_text("")
 
         result = cli(
-            "store", "pull", "--file", str(digest_file),
-            "--server", server_url, "--path", str(local_store)
+            "store",
+            "pull",
+            "--file",
+            str(digest_file),
+            "--server",
+            server_url,
+            "--path",
+            str(local_store),
         )
 
         assert result.exit_code != 0
@@ -292,8 +318,13 @@ def test_pull_nonexistent_digest(self, cli, tmp_path, remote_store_server):
         cli("store", "init", "--path", str(local_store))
 
         result = cli(
-            "store", "pull", "NONEXISTENT_DIGEST_12345678901234",
-            "--server", server_url, "--path", str(local_store)
+            "store",
+            "pull",
+            "NONEXISTENT_DIGEST_12345678901234",
+            "--server",
+            server_url,
+            "--path",
+            str(local_store),
         )
 
         assert result.exit_code != 0
@@ -306,8 +337,13 @@ def test_pull_unreachable_server(self, cli, tmp_path):
         cli("store", "init", "--path", str(local_store))
 
         result = cli(
-            "store", "pull", "some_digest_abc123",
-            "--server", "http://127.0.0.1:1", "--path", str(local_store)
+            "store",
+            "pull",
+            "some_digest_abc123",
+            "--server",
+            "http://127.0.0.1:1",
+            "--path",
+            str(local_store),
         )
 
         assert result.exit_code != 0
@@ -317,10 +353,7 @@ def test_pull_no_digest_or_file(self, cli, tmp_path):
         local_store = tmp_path / "noarg_store"
         cli("store", "init", "--path", str(local_store))
 
-        result = cli(
-            "store", "pull",
-            "--server", "http://127.0.0.1:1", "--path", str(local_store)
-        )
+        result = cli("store", "pull", "--server", "http://127.0.0.1:1", "--path", str(local_store))
 
         assert result.exit_code != 0
 
@@ -333,9 +366,15 @@ def test_pull_both_digest_and_file(self, cli, tmp_path):
         digest_file.write_text("some_digest\n")
 
         result = cli(
-            "store", "pull", "some_digest",
-            "--file", str(digest_file),
-            "--server", "http://127.0.0.1:1", "--path", str(local_store)
+            "store",
+            "pull",
+            "some_digest",
+            "--file",
+            str(digest_file),
+            "--server",
+            "http://127.0.0.1:1",
+            "--path",
+            str(local_store),
         )
 
         assert result.exit_code != 0
@@ -346,15 +385,9 @@ def test_pull_no_server_configured(self, cli, tmp_path, monkeypatch):
         cli("store", "init", "--path", str(local_store))
 
         # Patch _find_remote_urls to return empty list
-        monkeypatch.setattr(
-            "refget.cli.store._find_remote_urls",
-            lambda server_override=None: []
-        )
+        monkeypatch.setattr("refget.cli.store._find_remote_urls", lambda server_override=None: [])
 
-        result = cli(
-            "store", "pull", "some_digest",
-            "--path", str(local_store)
-        )
+        result = cli("store", "pull", "some_digest", "--path", str(local_store))
 
         assert result.exit_code != 0
 
@@ -383,7 +416,7 @@ def test_pull_tries_next_remote_on_failure(
             # Patch to return empty server first, then the populated one
             monkeypatch.setattr(
                 "refget.cli.store._find_remote_urls",
-                lambda server_override=None: [empty_url, server_url]
+                lambda server_override=None: [empty_url, server_url],
             )
 
             result = cli("store", "pull", digest, "--path", str(local_store), "--quiet")
diff --git a/tests/test_cli_integration/test_workflows.py b/tests/test_cli_integration/test_workflows.py
index 2fce20c..8872f6a 100644
--- a/tests/test_cli_integration/test_workflows.py
+++ b/tests/test_cli_integration/test_workflows.py
@@ -6,20 +6,22 @@
 These tests verify that commands work together correctly in typical usage patterns.
 """
 
-import pytest
+import importlib.util
 import json
-import sys
 import os
-from pathlib import Path
-
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from conftest import (
-    BASE_FASTA,
-    DIFFERENT_NAMES_FASTA,
-    DIFFERENT_ORDER_FASTA,
-    SUBSET_FASTA,
-    TEST_FASTA_DIGESTS,
+
+_conftest_path = os.path.join(
+    os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "conftest.py"
 )
+_spec = importlib.util.spec_from_file_location("tests_conftest", _conftest_path)
+_conftest = importlib.util.module_from_spec(_spec)
+_spec.loader.exec_module(_conftest)
+
+BASE_FASTA = _conftest.BASE_FASTA
+DIFFERENT_NAMES_FASTA = _conftest.DIFFERENT_NAMES_FASTA
+DIFFERENT_ORDER_FASTA = _conftest.DIFFERENT_ORDER_FASTA
+SUBSET_FASTA = _conftest.SUBSET_FASTA
+TEST_FASTA_DIGESTS = _conftest.TEST_FASTA_DIGESTS
 
 
 class TestDigestAndCompare:

From 82e41553f906754c6a33d1f4fc8b6c35ee25916d Mon Sep 17 00:00:00 2001
From: nsheff <nsheff@users.noreply.github.com>
Date: Fri, 13 Mar 2026 08:10:22 -0400
Subject: [PATCH 20/31] Reorganize data_loaders into task-specific
 subdirectories

---
 data_loaders/ref-genome-analysis/README.md    |  60 +++
 .../aliases/build_ncbi_alias_table.py         | 381 ++++++++++++++++++
 .../aliases/register_aliases.sbatch           |  15 +
 .../aliases/register_ncbi_aliases.py          | 231 +++++++++++
 .../ref-genome-analysis/build_refgetstore.py  | 132 ------
 .../examples/test_20_genomes.py               | 165 ++++++++
 .../fhr/batch_generate_fhr.py                 |  98 +++++
 .../fhr/genomeark_to_fhr.py                   | 214 ++++++++++
 .../fhr/load_fhr_metadata.py                  |  97 +++++
 .../fhr/metadata/GCA_000001405.29.fhr.json    |  36 ++
 .../fhr/metadata/GCA_000001405.fhr.json       |  36 ++
 .../fhr/metadata/GCA_964261635.1.fhr.json     |  41 ++
 .../fhr/metadata/GCA_964263255.1.fhr.json     |  41 ++
 .../{ => inventory}/inventory_genomes.py      |   0
 .../process-all-genomes.sbatch                |  15 -
 .../profiling/profile_all.py                  |  48 +++
 .../profiling/profile_all.sbatch              |  15 +
 .../profiling/profile_batch.py                |  28 ++
 .../profiling/profile_memory.py               |  87 ++++
 .../profiling/profile_memory.sbatch           |  15 +
 .../profiling/profile_newt.py                 |  57 +++
 .../profiling/profile_newt.sbatch             |  15 +
 .../profiling/profile_normal.py               |  34 ++
 .../profiling/profile_normal.sbatch           |  15 +
 .../{ => verify}/verify_refgetstore.py        |  15 +-
 .../riva_pangenome_analysis/update-gtars.sh   |  25 ++
 26 files changed, 1762 insertions(+), 154 deletions(-)
 create mode 100644 data_loaders/ref-genome-analysis/README.md
 create mode 100644 data_loaders/ref-genome-analysis/aliases/build_ncbi_alias_table.py
 create mode 100644 data_loaders/ref-genome-analysis/aliases/register_aliases.sbatch
 create mode 100644 data_loaders/ref-genome-analysis/aliases/register_ncbi_aliases.py
 delete mode 100644 data_loaders/ref-genome-analysis/build_refgetstore.py
 create mode 100644 data_loaders/ref-genome-analysis/examples/test_20_genomes.py
 create mode 100755 data_loaders/ref-genome-analysis/fhr/batch_generate_fhr.py
 create mode 100755 data_loaders/ref-genome-analysis/fhr/genomeark_to_fhr.py
 create mode 100644 data_loaders/ref-genome-analysis/fhr/load_fhr_metadata.py
 create mode 100755 data_loaders/ref-genome-analysis/fhr/metadata/GCA_000001405.29.fhr.json
 create mode 100755 data_loaders/ref-genome-analysis/fhr/metadata/GCA_000001405.fhr.json
 create mode 100644 data_loaders/ref-genome-analysis/fhr/metadata/GCA_964261635.1.fhr.json
 create mode 100644 data_loaders/ref-genome-analysis/fhr/metadata/GCA_964263255.1.fhr.json
 rename data_loaders/ref-genome-analysis/{ => inventory}/inventory_genomes.py (100%)
 delete mode 100644 data_loaders/ref-genome-analysis/process-all-genomes.sbatch
 create mode 100644 data_loaders/ref-genome-analysis/profiling/profile_all.py
 create mode 100644 data_loaders/ref-genome-analysis/profiling/profile_all.sbatch
 create mode 100644 data_loaders/ref-genome-analysis/profiling/profile_batch.py
 create mode 100644 data_loaders/ref-genome-analysis/profiling/profile_memory.py
 create mode 100644 data_loaders/ref-genome-analysis/profiling/profile_memory.sbatch
 create mode 100644 data_loaders/ref-genome-analysis/profiling/profile_newt.py
 create mode 100644 data_loaders/ref-genome-analysis/profiling/profile_newt.sbatch
 create mode 100644 data_loaders/ref-genome-analysis/profiling/profile_normal.py
 create mode 100644 data_loaders/ref-genome-analysis/profiling/profile_normal.sbatch
 rename data_loaders/ref-genome-analysis/{ => verify}/verify_refgetstore.py (97%)
 create mode 100644 data_loaders/riva_pangenome_analysis/update-gtars.sh

diff --git a/data_loaders/ref-genome-analysis/README.md b/data_loaders/ref-genome-analysis/README.md
new file mode 100644
index 0000000..6f71ee6
--- /dev/null
+++ b/data_loaders/ref-genome-analysis/README.md
@@ -0,0 +1,60 @@
+# ref-genome-analysis
+
+Pipeline for loading reference genome FASTA files into a RefgetStore and enriching them with NCBI aliases and FHR provenance metadata.
+
+## Pipeline stages
+
+Execute in order:
+
+```
+inventory --> build --> aliases --> fhr --> verify
+```
+
+| Stage | Directory | Purpose |
+|---|---|---|
+| **inventory** | `inventory/` | Scan brickyard FASTA files, produce `refgenomes_inventory.csv` |
+| **build** | `build/` | Load FASTAs into RefgetStore, produce `digest_map.csv` |
+| **aliases** | `aliases/` | Download NCBI assembly reports, build alias table, register sequence/collection aliases |
+| **fhr** | `fhr/` | Generate and attach FHR provenance metadata (species, taxon, accession, submitter, etc.) |
+| **verify** | `verify/` | Automated pass/fail checks against the store |
+| **profiling** | `profiling/` | Memory and timing benchmarks |
+| **examples** | `examples/` | End-to-end test scripts (e.g., load 20 genomes with FHR) |
+
+## Rivanna paths
+
+All data lives within the `refgenomes_fasta` brickyard brick:
+
+```
+/project/shefflab/brickyard/datasets_downloaded/refgenomes_fasta/
+├── homo_sapiens/...              # Source FASTAs
+├── mus_musculus/...
+├── refgenomes_inventory.csv      # Inventory of all FASTAs
+├── refget_store/                 # The RefgetStore (fixed-format, don't modify manually)
+└── refget_staging/               # Pipeline intermediates
+    ├── assembly_reports/         # Downloaded NCBI assembly_report.txt files
+    ├── ncbi_alias_table.csv      # Parsed alias table (367K sequence rows)
+    ├── fhr_metadata/             # Generated FHR provenance JSON files
+    └── digest_map.csv            # Build output mapping FASTAs to digests
+```
+
+- **Store**: `.../refgenomes_fasta/refget_store`
+- **Staging**: `.../refgenomes_fasta/refget_staging`
+- **This pipeline**: `.../refgenomes_fasta/refget/data_loaders/ref-genome-analysis/`
+
+## Quick start (Rivanna)
+
+```bash
+module load miniforge/24.3.0-py3.11
+
+# 1. Build store
+sbatch build/build_refgetstore.sbatch
+
+# 2. Register NCBI aliases
+sbatch aliases/register_aliases.sbatch
+
+# 3. Attach FHR metadata
+cd fhr && python load_fhr_metadata.py --store-path /project/shefflab/brickyard/datasets_downloaded/refgenomes_fasta/refget_store --fhr-dir metadata/
+
+# 4. Verify
+cd verify && python verify_refgetstore.py
+```
diff --git a/data_loaders/ref-genome-analysis/aliases/build_ncbi_alias_table.py b/data_loaders/ref-genome-analysis/aliases/build_ncbi_alias_table.py
new file mode 100644
index 0000000..8c0d07c
--- /dev/null
+++ b/data_loaders/ref-genome-analysis/aliases/build_ncbi_alias_table.py
@@ -0,0 +1,381 @@
+#!/usr/bin/env python3
+"""
+Build NCBI alias mapping table from assembly reports.
+
+Downloads NCBI assembly_report.txt files for each accession in the inventory
+CSV and parses them into a flat CSV mapping sequence names to accessions.
+
+This is Phase A of the alias registration pipeline -- it produces a standalone
+CSV with no store dependency. Needs only the inventory CSV and internet access.
+
+Usage:
+    python build_ncbi_alias_table.py --inventory refgenomes_inventory.csv
+    python build_ncbi_alias_table.py --inventory refgenomes_inventory.csv --limit 3
+    python build_ncbi_alias_table.py --inventory refgenomes_inventory.csv --download-only
+"""
+
+import argparse
+import csv
+import os
+import re
+import sys
+import time
+import urllib.error
+import urllib.request
+
+BRICK_ROOT = "/project/shefflab/brickyard/datasets_downloaded/refgenomes_fasta"
+INVENTORY_CSV = f"{BRICK_ROOT}/refgenomes_inventory.csv"
+STAGING_DIR = f"{BRICK_ROOT}/refget_staging"
+ACCESSION_PATTERN = re.compile(r"(GC[AF]_\d+\.\d+)")
+NCBI_FTP_BASE = "https://ftp.ncbi.nlm.nih.gov/genomes/all"
+
+OUTPUT_COLUMNS = [
+    "accession",
+    "sequence_name",
+    "sequence_length",
+    "refseq_accn",
+    "genbank_accn",
+    "ucsc_name",
+    "genbank_assembly_accn",
+    "refseq_assembly_accn",
+]
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description="Download NCBI assembly reports and build alias mapping table."
+    )
+    parser.add_argument(
+        "--inventory", default=INVENTORY_CSV, help="Path to refgenomes_inventory.csv"
+    )
+    parser.add_argument(
+        "--report-cache",
+        default=f"{STAGING_DIR}/assembly_reports",
+        help="Directory to cache downloaded assembly_report.txt files",
+    )
+    parser.add_argument(
+        "--output",
+        default=f"{STAGING_DIR}/ncbi_alias_table.csv",
+        help="Output CSV path",
+    )
+    parser.add_argument(
+        "--limit", type=int, default=None, help="Process only first N accessions"
+    )
+    parser.add_argument(
+        "--offset", type=int, default=0, help="Skip first N accessions"
+    )
+    parser.add_argument(
+        "--download-only",
+        action="store_true",
+        help="Download reports but don't parse into table",
+    )
+    return parser.parse_args()
+
+
+# ---------------------------------------------------------------------------
+# Step A2: Read inventory and extract accessions
+# ---------------------------------------------------------------------------
+
+def read_accessions_from_inventory(csv_path):
+    """Read inventory CSV and return list of (accession, filename) pairs.
+
+    Filters to rows with a non-empty accession matching the GCF_/GCA_ pattern.
+    """
+    pairs = []
+    seen_accessions = set()
+    with open(csv_path, newline="") as f:
+        reader = csv.DictReader(f)
+        if reader.fieldnames is None:
+            print(f"ERROR: {csv_path} appears to be empty", file=sys.stderr)
+            sys.exit(1)
+        for row in reader:
+            accession = row.get("accession", "").strip()
+            filename = row.get("filename", "").strip()
+            if not accession or not ACCESSION_PATTERN.match(accession):
+                continue
+            if accession in seen_accessions:
+                continue
+            seen_accessions.add(accession)
+            pairs.append((accession, filename))
+    return pairs
+
+
+# ---------------------------------------------------------------------------
+# Step A3: Construct NCBI FTP URLs from filename
+# ---------------------------------------------------------------------------
+
+def derive_assembly_name(accession, filename):
+    """Derive the assembly name from the FASTA filename.
+
+    Example:
+        accession = "GCF_000001405.40"
+        filename  = "GCF_000001405.40_GRCh38.p14_genomic.fna.gz"
+        returns   "GRCh38.p14"
+
+    The filename pattern is: {accession}_{assembly_name}_genomic.fna[.gz]
+    """
+    # Strip the accession prefix and _genomic.fna[.gz] suffix
+    prefix = accession + "_"
+    if not filename.startswith(prefix):
+        return None
+    rest = filename[len(prefix):]
+    # Remove _genomic.fna, _genomic.fna.gz, _genomic.fa.gz, etc.
+    rest = re.sub(r"_genomic\.(fna|fa|fasta)(\.gz)?$", "", rest)
+    if not rest:
+        return None
+    return rest
+
+
+def accession_to_ftp_dir(accession):
+    """Convert an accession to its NCBI FTP parent directory URL.
+
+    GCF_963692335.1 -> https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/963/692/335/
+    """
+    match = re.match(r"(GC[AF])_(\d+)\.\d+", accession)
+    if not match:
+        return None
+    prefix = match.group(1)
+    numeric = match.group(2).zfill(9)
+    d1, d2, d3 = numeric[0:3], numeric[3:6], numeric[6:9]
+    return f"{NCBI_FTP_BASE}/{prefix}/{d1}/{d2}/{d3}/"
+
+
+def lookup_assembly_name_from_ftp(accession):
+    """Scrape the NCBI FTP directory listing to find the assembly name.
+
+    The directory contains a single subdirectory like GCF_963692335.1_fOsmEpe2.1/.
+    We extract the assembly name from that.
+    """
+    dir_url = accession_to_ftp_dir(accession)
+    if not dir_url:
+        return None
+    try:
+        req = urllib.request.Request(dir_url, headers={"User-Agent": "refget-alias-builder/1.0"})
+        with urllib.request.urlopen(req, timeout=15) as response:
+            html = response.read().decode("utf-8", errors="replace")
+        # Look for a link like GCF_963692335.1_fOsmEpe2.1/
+        pattern = re.escape(accession) + r"_([^/\"]+)/"
+        m = re.search(pattern, html)
+        if m:
+            return m.group(1)
+    except (urllib.error.URLError, urllib.error.HTTPError, OSError):
+        pass
+    return None
+
+
+def construct_report_url(accession, assembly_name):
+    """Construct the NCBI FTP URL for an assembly_report.txt.
+
+    URL pattern:
+        https://ftp.ncbi.nlm.nih.gov/genomes/all/{GCF|GCA}/{d1}/{d2}/{d3}/
+        {accession}_{assembly_name}/{accession}_{assembly_name}_assembly_report.txt
+
+    Where d1/d2/d3 are 3-char chunks of the numeric part of the accession
+    (the digits between the underscore and the dot).
+    """
+    dir_url = accession_to_ftp_dir(accession)
+    if not dir_url:
+        return None
+    stem = f"{accession}_{assembly_name}"
+    return f"{dir_url}{stem}/{stem}_assembly_report.txt"
+
+
+# ---------------------------------------------------------------------------
+# Step A4: Download with caching and rate limiting
+# ---------------------------------------------------------------------------
+
+def download_report(accession, filename, cache_dir, sleep_sec=0.3):
+    """Download assembly_report.txt for a given accession.
+
+    Returns (cache_path, status) where status is one of:
+        "cached"    - already existed in cache
+        "downloaded" - freshly downloaded
+        "failed"    - download failed (logged to stderr)
+        "skipped"   - could not derive assembly name from filename
+    """
+    cache_path = os.path.join(cache_dir, f"{accession}_assembly_report.txt")
+
+    # Check cache first
+    if os.path.exists(cache_path) and os.path.getsize(cache_path) > 0:
+        return cache_path, "cached"
+
+    # Derive assembly name from filename, fall back to FTP directory lookup
+    assembly_name = derive_assembly_name(accession, filename)
+    if not assembly_name:
+        assembly_name = lookup_assembly_name_from_ftp(accession)
+        if assembly_name:
+            time.sleep(sleep_sec)  # Rate limit the directory lookup too
+        else:
+            return cache_path, "skipped"
+
+    url = construct_report_url(accession, assembly_name)
+    if not url:
+        print(f"  WARNING: Cannot construct URL for {accession}", file=sys.stderr)
+        return cache_path, "skipped"
+
+    # Download
+    try:
+        req = urllib.request.Request(url, headers={"User-Agent": "refget-alias-builder/1.0"})
+        with urllib.request.urlopen(req, timeout=30) as response:
+            data = response.read()
+        with open(cache_path, "wb") as f:
+            f.write(data)
+        time.sleep(sleep_sec)
+        return cache_path, "downloaded"
+    except (urllib.error.URLError, urllib.error.HTTPError, OSError) as e:
+        print(f"  FAILED: {accession} ({url}): {e}", file=sys.stderr)
+        return cache_path, "failed"
+
+
+# ---------------------------------------------------------------------------
+# Step A5: Parse reports into flat CSV
+# ---------------------------------------------------------------------------
+
+def parse_assembly_report(filepath, accession):
+    """Parse an assembly_report.txt file into a list of row dicts.
+
+    Returns (rows, genbank_assembly_accn, refseq_assembly_accn).
+    """
+    genbank_assembly_accn = ""
+    refseq_assembly_accn = ""
+    rows = []
+
+    with open(filepath, "r", errors="replace") as f:
+        for line in f:
+            line = line.rstrip("\n")
+            # Parse header metadata
+            if line.startswith("#"):
+                if "GenBank assembly accession:" in line:
+                    m = ACCESSION_PATTERN.search(line)
+                    if m:
+                        genbank_assembly_accn = m.group(1)
+                elif "RefSeq assembly accession:" in line:
+                    m = ACCESSION_PATTERN.search(line)
+                    if m:
+                        refseq_assembly_accn = m.group(1)
+                continue
+
+            # Data rows: tab-separated, 10 columns
+            fields = line.split("\t")
+            if len(fields) < 9:
+                continue
+
+            sequence_name = fields[0].strip()
+            genbank_accn = fields[4].strip() if len(fields) > 4 else "na"
+            refseq_accn = fields[6].strip() if len(fields) > 6 else "na"
+            sequence_length = fields[8].strip() if len(fields) > 8 else "na"
+            ucsc_name = fields[9].strip() if len(fields) > 9 else "na"
+
+            # Normalize "na" to empty string
+            if genbank_accn == "na":
+                genbank_accn = ""
+            if refseq_accn == "na":
+                refseq_accn = ""
+            if ucsc_name == "na":
+                ucsc_name = ""
+            if sequence_length == "na":
+                sequence_length = ""
+
+            rows.append({
+                "accession": accession,
+                "sequence_name": sequence_name,
+                "sequence_length": sequence_length,
+                "refseq_accn": refseq_accn,
+                "genbank_accn": genbank_accn,
+                "ucsc_name": ucsc_name,
+                "genbank_assembly_accn": genbank_assembly_accn,
+                "refseq_assembly_accn": refseq_assembly_accn,
+            })
+
+    return rows
+
+
+def write_alias_table(output_path, all_rows):
+    """Write the alias table CSV."""
+    with open(output_path, "w", newline="") as f:
+        writer = csv.DictWriter(f, fieldnames=OUTPUT_COLUMNS)
+        writer.writeheader()
+        writer.writerows(all_rows)
+
+
+def main():
+    args = parse_args()
+
+    # Step A2: Read inventory and extract accessions
+    print(f"Reading inventory from {args.inventory}", file=sys.stderr)
+    pairs = read_accessions_from_inventory(args.inventory)
+    print(f"Found {len(pairs)} unique accessions", file=sys.stderr)
+
+    # Apply offset and limit
+    if args.offset:
+        pairs = pairs[args.offset:]
+        print(f"Skipped first {args.offset} accessions", file=sys.stderr)
+    if args.limit:
+        pairs = pairs[: args.limit]
+        print(f"Limited to {args.limit} accessions", file=sys.stderr)
+
+    # Create cache directory
+    os.makedirs(args.report_cache, exist_ok=True)
+
+    # Step A4: Download reports
+    n_cached = 0
+    n_downloaded = 0
+    n_failed = 0
+    n_skipped = 0
+    downloaded_reports = []  # (accession, cache_path)
+
+    print(f"\nDownloading assembly reports...", file=sys.stderr)
+    for i, (accession, filename) in enumerate(pairs, 1):
+        print(
+            f"[{i}/{len(pairs)}] {accession}...",
+            end=" ",
+            flush=True,
+            file=sys.stderr,
+        )
+        cache_path, status = download_report(accession, filename, args.report_cache)
+        print(status, file=sys.stderr)
+
+        if status == "cached":
+            n_cached += 1
+            downloaded_reports.append((accession, cache_path))
+        elif status == "downloaded":
+            n_downloaded += 1
+            downloaded_reports.append((accession, cache_path))
+        elif status == "failed":
+            n_failed += 1
+        elif status == "skipped":
+            n_skipped += 1
+
+    print(
+        f"\nDownload summary: {n_downloaded} downloaded, {n_cached} cached, "
+        f"{n_failed} failed, {n_skipped} skipped",
+        file=sys.stderr,
+    )
+
+    if args.download_only:
+        print("--download-only specified, stopping before parsing.", file=sys.stderr)
+        return
+
+    # Step A5: Parse reports into flat CSV
+    print(f"\nParsing assembly reports...", file=sys.stderr)
+    all_rows = []
+    n_parsed = 0
+    for accession, cache_path in downloaded_reports:
+        if not os.path.exists(cache_path) or os.path.getsize(cache_path) == 0:
+            continue
+        rows = parse_assembly_report(cache_path, accession)
+        all_rows.extend(rows)
+        n_parsed += 1
+
+    write_alias_table(args.output, all_rows)
+
+    # Summary
+    print(f"\nResults:", file=sys.stderr)
+    print(f"  Accessions processed: {len(pairs)}", file=sys.stderr)
+    print(f"  Reports parsed: {n_parsed}", file=sys.stderr)
+    print(f"  Total sequence rows: {len(all_rows)}", file=sys.stderr)
+    print(f"  Output written to: {args.output}", file=sys.stderr)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/data_loaders/ref-genome-analysis/aliases/register_aliases.sbatch b/data_loaders/ref-genome-analysis/aliases/register_aliases.sbatch
new file mode 100644
index 0000000..10dfe48
--- /dev/null
+++ b/data_loaders/ref-genome-analysis/aliases/register_aliases.sbatch
@@ -0,0 +1,15 @@
+#!/bin/bash
+#SBATCH --job-name=ncbi_aliases
+#SBATCH --output=ncbi_aliases_%j.log
+#SBATCH --error=ncbi_aliases_%j.log
+#SBATCH --partition=standard
+#SBATCH --time=4:00:00
+#SBATCH --mem=8G
+#SBATCH --cpus-per-task=1
+#SBATCH --account=shefflab
+
+module load miniforge/24.3.0-py3.11
+
+cd /project/shefflab/brickyard/datasets_downloaded/refgenomes_fasta/refget/data_loaders/ref-genome-analysis/aliases
+
+python register_ncbi_aliases.py --store-path /project/shefflab/brickyard/datasets_downloaded/refgenomes_fasta/refget_store
diff --git a/data_loaders/ref-genome-analysis/aliases/register_ncbi_aliases.py b/data_loaders/ref-genome-analysis/aliases/register_ncbi_aliases.py
new file mode 100644
index 0000000..c344d75
--- /dev/null
+++ b/data_loaders/ref-genome-analysis/aliases/register_ncbi_aliases.py
@@ -0,0 +1,231 @@
+#!/usr/bin/env python3
+"""
+Register NCBI sequence and collection aliases in a RefgetStore.
+
+Phase B of the alias registration pipeline. Reads the ncbi_alias_table.csv
+(from Phase A), matches sequences to store digests, and bulk-loads aliases
+via temporary TSV files.
+
+Usage:
+    python register_ncbi_aliases.py --store-path /path/to/store
+    python register_ncbi_aliases.py --store-path /path/to/store --dry-run
+    python register_ncbi_aliases.py --store-path /path/to/store --limit 5
+"""
+
+import argparse
+import csv
+import os
+import sys
+import tempfile
+import time
+from collections import defaultdict
+
+from refget.store import RefgetStore
+
+BRICK_ROOT = "/project/shefflab/brickyard/datasets_downloaded/refgenomes_fasta"
+STORE_PATH = f"{BRICK_ROOT}/refget_store"
+INVENTORY_CSV = f"{BRICK_ROOT}/refgenomes_inventory.csv"
+ALIAS_TABLE_CSV = f"{BRICK_ROOT}/refget_staging/ncbi_alias_table.csv"
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description="Register NCBI aliases in RefgetStore")
+    parser.add_argument("--store-path", default=STORE_PATH, help="Path to RefgetStore")
+    parser.add_argument("--alias-table", default=ALIAS_TABLE_CSV, help="Path to ncbi_alias_table.csv")
+    parser.add_argument("--inventory", default=INVENTORY_CSV, help="Path to refgenomes_inventory.csv")
+    parser.add_argument("--dry-run", action="store_true", help="Parse and match but don't register")
+    parser.add_argument("--limit", type=int, default=None, help="Process only first N accessions")
+    parser.add_argument("--offset", type=int, default=0, help="Skip first N accessions")
+    return parser.parse_args()
+
+
+def read_inventory(csv_path):
+    """Read inventory CSV, return accession -> path mapping."""
+    acc_to_path = {}
+    with open(csv_path, newline="") as f:
+        for row in csv.DictReader(f):
+            acc = row.get("accession", "").strip()
+            path = row.get("path", "").strip()
+            if acc and path:
+                acc_to_path[acc] = path
+    return acc_to_path
+
+
+def read_alias_table(csv_path):
+    """Read alias table CSV, return accession -> list of row dicts."""
+    acc_to_rows = defaultdict(list)
+    with open(csv_path, newline="") as f:
+        for row in csv.DictReader(f):
+            acc = row.get("accession", "").strip()
+            if acc:
+                acc_to_rows[acc].append(row)
+    return acc_to_rows
+
+
+def write_tsv(path, pairs):
+    """Write alias\tdigest pairs to a TSV file."""
+    with open(path, "w") as f:
+        for alias, digest in pairs:
+            f.write(f"{alias}\t{digest}\n")
+
+
+def main():
+    args = parse_args()
+
+    # Read inputs
+    print(f"Reading inventory from {args.inventory}")
+    acc_to_path = read_inventory(args.inventory)
+    print(f"  {len(acc_to_path)} accessions with paths")
+
+    print(f"Reading alias table from {args.alias_table}")
+    acc_to_rows = read_alias_table(args.alias_table)
+    print(f"  {len(acc_to_rows)} accessions, {sum(len(v) for v in acc_to_rows.values())} sequence rows")
+
+    # Filter to accessions present in both
+    common_accessions = sorted(set(acc_to_path) & set(acc_to_rows))
+    print(f"  {len(common_accessions)} accessions in both inventory and alias table")
+
+    if args.offset:
+        common_accessions = common_accessions[args.offset:]
+        print(f"  Skipped first {args.offset}")
+    if args.limit:
+        common_accessions = common_accessions[:args.limit]
+        print(f"  Limited to {args.limit}")
+
+    # Open store
+    store = RefgetStore.on_disk(args.store_path)
+    store.set_quiet(True)
+    print(f"Store opened: {store.stats()}")
+
+    # Accumulate all aliases in memory, then bulk-load at the end
+    seq_aliases = {"refseq": [], "insdc": [], "ucsc": []}
+    coll_aliases = {"refseq": [], "insdc": []}
+
+    n_collections = 0
+    n_matched = 0
+    n_unmatched = 0
+    n_skipped_files = 0
+    t_start = time.time()
+
+    for i, accession in enumerate(common_accessions, 1):
+        fasta_path = acc_to_path[accession]
+        alias_rows = acc_to_rows[accession]
+
+        print(f"[{i}/{len(common_accessions)}] {accession} ({len(alias_rows)} seqs)...", end=" ", flush=True)
+
+        # Get collection digest by loading (returns immediately if exists)
+        if not os.path.exists(fasta_path):
+            print("SKIP (file missing)")
+            n_skipped_files += 1
+            continue
+
+        try:
+            meta, was_new = store.add_sequence_collection_from_fasta(fasta_path)
+        except Exception as e:
+            print(f"SKIP ({e})")
+            n_skipped_files += 1
+            continue
+
+        coll_digest = meta.digest
+        n_collections += 1
+
+        # Collection-level aliases from report header
+        first_row = alias_rows[0]
+        genbank_acc = first_row.get("genbank_assembly_accn", "").strip()
+        refseq_acc = first_row.get("refseq_assembly_accn", "").strip()
+
+        if refseq_acc:
+            coll_aliases["refseq"].append((refseq_acc, coll_digest))
+        if genbank_acc:
+            coll_aliases["insdc"].append((genbank_acc, coll_digest))
+
+        # Get collection's sequences to match against alias table
+        level2 = store.get_collection_level2(coll_digest)
+        names = level2.get("names", [])
+        lengths = level2.get("lengths", [])
+        sequences = level2.get("sequences", [])
+
+        # Build name -> (seq_digest, length) lookup
+        name_to_info = {}
+        for name, length, seq_digest in zip(names, lengths, sequences):
+            name_to_info[name] = (seq_digest, int(length))
+
+        # Match alias table rows to store sequences
+        matched_this = 0
+        unmatched_this = 0
+        for row in alias_rows:
+            seq_name = row.get("sequence_name", "").strip()
+            seq_length_str = row.get("sequence_length", "").strip()
+            refseq_accn = row.get("refseq_accn", "").strip()
+            genbank_accn = row.get("genbank_accn", "").strip()
+            ucsc_name = row.get("ucsc_name", "").strip()
+
+            seq_length = int(seq_length_str) if seq_length_str else None
+
+            # Try matching by sequence_name, then refseq_accn, then genbank_accn, then ucsc_name
+            seq_digest = None
+            for candidate in [seq_name, refseq_accn, genbank_accn, ucsc_name]:
+                if candidate and candidate in name_to_info:
+                    store_digest, store_length = name_to_info[candidate]
+                    if seq_length is None or store_length == seq_length:
+                        seq_digest = store_digest
+                        break
+
+            if seq_digest is None:
+                unmatched_this += 1
+                continue
+
+            matched_this += 1
+
+            if refseq_accn:
+                seq_aliases["refseq"].append((refseq_accn, seq_digest))
+            if genbank_accn:
+                seq_aliases["insdc"].append((genbank_accn, seq_digest))
+            if ucsc_name:
+                seq_aliases["ucsc"].append((ucsc_name, seq_digest))
+
+        n_matched += matched_this
+        n_unmatched += unmatched_this
+        print(f"{coll_digest[:12]}... {matched_this}/{len(alias_rows)} matched")
+
+    match_elapsed = time.time() - t_start
+
+    # Summary of what was collected
+    n_seq_aliases = sum(len(v) for v in seq_aliases.values())
+    n_coll_aliases = sum(len(v) for v in coll_aliases.values())
+    print(f"\nMatching done in {match_elapsed:.1f}s")
+    print(f"  Collections: {n_collections}, skipped: {n_skipped_files}")
+    print(f"  Sequences matched: {n_matched}, unmatched: {n_unmatched}")
+    print(f"  Sequence aliases to register: {n_seq_aliases}")
+    print(f"  Collection aliases to register: {n_coll_aliases}")
+
+    if args.dry_run:
+        print("\n[DRY RUN] Skipping alias registration.")
+        return
+
+    # Bulk-load aliases via temp TSV files
+    print(f"\nRegistering aliases...")
+    with tempfile.TemporaryDirectory() as tmpdir:
+        for namespace, pairs in seq_aliases.items():
+            if not pairs:
+                continue
+            tsv_path = os.path.join(tmpdir, f"seq_{namespace}.tsv")
+            write_tsv(tsv_path, pairs)
+            n = store.load_sequence_aliases(namespace, tsv_path)
+            print(f"  sequences/{namespace}: {n} aliases loaded")
+
+        for namespace, pairs in coll_aliases.items():
+            if not pairs:
+                continue
+            tsv_path = os.path.join(tmpdir, f"coll_{namespace}.tsv")
+            write_tsv(tsv_path, pairs)
+            n = store.load_collection_aliases(namespace, tsv_path)
+            print(f"  collections/{namespace}: {n} aliases loaded")
+
+    total_elapsed = time.time() - t_start
+    print(f"\nDone in {total_elapsed:.1f}s")
+    print(f"  Store stats: {store.stats()}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/data_loaders/ref-genome-analysis/build_refgetstore.py b/data_loaders/ref-genome-analysis/build_refgetstore.py
deleted file mode 100644
index 68ef281..0000000
--- a/data_loaders/ref-genome-analysis/build_refgetstore.py
+++ /dev/null
@@ -1,132 +0,0 @@
-"""
-Build a RefgetStore from the refgenomes inventory CSV.
-
-Reads refgenomes_inventory.csv and populates a RefgetStore with all FASTA
-files. No alias registration -- that is a separate, deliberate step.
-
-Usage:
-    python build_refgetstore.py [--inventory PATH] [--store-path PATH] [--output PATH] [--limit N]
-"""
-
-import argparse
-import csv
-import sys
-import time
-
-from refget.store import RefgetStore
-
-STORE_PATH = "/project/shefflab/brickyard/refget_store"
-INVENTORY_CSV = "/project/shefflab/brickyard/datasets_downloaded/refgenomes_fasta/refgenomes_inventory.csv"
-OUTPUT_CSV = "digest_map.csv"
-
-
-def parse_args():
-    parser = argparse.ArgumentParser(description="Build RefgetStore from inventory CSV")
-    parser.add_argument("--inventory", default=INVENTORY_CSV, help="Input inventory CSV")
-    parser.add_argument("--store-path", default=STORE_PATH, help="RefgetStore path")
-    parser.add_argument("--output", default=OUTPUT_CSV, help="Output digest map CSV")
-    parser.add_argument("--limit", type=int, default=None, help="Process only first N rows (for testing)")
-    parser.add_argument("--offset", type=int, default=0, help="Skip first N rows")
-    return parser.parse_args()
-
-
-def read_inventory(csv_path):
-    """Read inventory CSV and return list of row dicts."""
-    rows = []
-    with open(csv_path, newline="") as f:
-        reader = csv.DictReader(f)
-        if reader.fieldnames is None:
-            print(f"ERROR: {csv_path} appears to be empty", file=sys.stderr)
-            sys.exit(1)
-        if "path" not in reader.fieldnames:
-            print(f"ERROR: {csv_path} missing required 'path' column", file=sys.stderr)
-            sys.exit(1)
-        for row in reader:
-            rows.append(row)
-    return rows
-
-
-def write_digest_map(output_path, results):
-    """Write results to digest_map.csv."""
-    fieldnames = ["path", "filename", "digest", "n_sequences", "was_new", "error"]
-    with open(output_path, "w", newline="") as f:
-        writer = csv.DictWriter(f, fieldnames=fieldnames)
-        writer.writeheader()
-        writer.writerows(results)
-
-
-def main():
-    args = parse_args()
-
-    inventory = read_inventory(args.inventory)
-    if args.offset:
-        inventory = inventory[args.offset:]
-        print(f"Skipped first {args.offset} records")
-    if args.limit:
-        inventory = inventory[:args.limit]
-        print(f"Limited to {args.limit} records")
-    total = len(inventory)
-    print(f"Processing {total} records from {args.inventory}")
-
-    store = RefgetStore.on_disk(args.store_path)
-    print(f"Store initialized at {args.store_path}")
-
-    results = []
-    n_success = 0
-    n_failed = 0
-    n_new = 0
-    t_start = time.time()
-
-    for i, row in enumerate(inventory, 1):
-        fasta_path = row["path"]
-        filename = row.get("filename", "")
-
-        t0 = time.time()
-        print(f"[{i}/{total}] {filename}...", end=" ", flush=True)
-
-        try:
-            meta, was_new = store.add_sequence_collection_from_fasta(fasta_path, threads=4)
-            elapsed = time.time() - t0
-            status = "NEW" if was_new else "exists"
-            if was_new:
-                n_new += 1
-            print(f"{meta.digest} ({meta.n_sequences} seqs, {status}, {elapsed:.1f}s)")
-            n_success += 1
-            results.append({
-                "path": fasta_path,
-                "filename": filename,
-                "digest": meta.digest,
-                "n_sequences": meta.n_sequences,
-                "was_new": was_new,
-                "error": "",
-            })
-        except Exception as e:
-            error_msg = f"{type(e).__name__}: {e}"
-            print(f"FAILED: {error_msg}")
-            n_failed += 1
-            results.append({
-                "path": fasta_path,
-                "filename": filename,
-                "digest": "",
-                "n_sequences": 0,
-                "was_new": False,
-                "error": error_msg,
-            })
-
-    write_digest_map(args.output, results)
-
-    total_time = time.time() - t_start
-    print(f"\nDone in {total_time:.1f}s. {n_success}/{total} succeeded, {n_new} new, {n_failed} failed.")
-    print(f"Digest map written to {args.output}")
-    print(f"\nStore stats: {store.stats()}")
-
-    if n_failed > 0:
-        print(f"\nFailed files:")
-        for r in results:
-            if r["error"]:
-                print(f"  {r['filename']}: {r['error']}")
-        sys.exit(1)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/data_loaders/ref-genome-analysis/examples/test_20_genomes.py b/data_loaders/ref-genome-analysis/examples/test_20_genomes.py
new file mode 100644
index 0000000..6e98e85
--- /dev/null
+++ b/data_loaders/ref-genome-analysis/examples/test_20_genomes.py
@@ -0,0 +1,165 @@
+"""
+Quick test: load 20 genomes into a RefgetStore and attach FHR metadata.
+
+Usage:
+    python test_20_genomes.py [--inventory PATH] [--limit N]
+"""
+
+import argparse
+import csv
+import json
+import os
+import re
+import sys
+import tempfile
+import time
+
+from gtars.refget import RefgetStore
+
+BRICK_ROOT = "/project/shefflab/brickyard/datasets_downloaded/refgenomes_fasta"
+INVENTORY_CSV = f"{BRICK_ROOT}/refgenomes_inventory.csv"
+FHR_DIR = f"{BRICK_ROOT}/refget_staging/fhr_metadata"
+STORE_PATH = "/scratch/ns5bc/test_refget_store_20"
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--inventory", default=INVENTORY_CSV)
+    parser.add_argument("--store-path", default=STORE_PATH)
+    parser.add_argument("--limit", type=int, default=20)
+    args = parser.parse_args()
+
+    # Read inventory
+    with open(args.inventory, newline="") as f:
+        rows = list(csv.DictReader(f))
+    rows = rows[:args.limit]
+
+    print(f"Loading {len(rows)} genomes into {args.store_path}")
+    os.makedirs(args.store_path, exist_ok=True)
+    store = RefgetStore.on_disk(args.store_path)
+
+    # Phase 1: Load FASTAs
+    print("\n=== Phase 1: Load FASTAs ===")
+    digest_map = {}  # filename -> digest
+    t_start = time.time()
+
+    for i, row in enumerate(rows, 1):
+        fasta_path = row["path"]
+        filename = row.get("filename", os.path.basename(fasta_path))
+        t0 = time.time()
+        print(f"[{i}/{len(rows)}] {filename}...", end=" ", flush=True)
+        try:
+            meta, was_new = store.add_sequence_collection_from_fasta(fasta_path)
+            elapsed = time.time() - t0
+            status = "NEW" if was_new else "exists"
+            print(f"{meta.digest} ({meta.n_sequences} seqs, {status}, {elapsed:.1f}s)")
+            digest_map[filename] = meta.digest
+        except Exception as e:
+            print(f"FAILED: {e}")
+
+    t_fasta = time.time() - t_start
+    print(f"\nPhase 1 done: {len(digest_map)} loaded in {t_fasta:.1f}s")
+
+    # Phase 2: Load FHR metadata (provenance only, no vitalStats) for all collections
+    # Map build names to GCA accessions for known species
+    BUILD_TO_ACCESSION = {
+        ("homo_sapiens", "hg19"): "GCA_000001405",
+        ("homo_sapiens", "hg38"): "GCA_000001405",
+        ("mus_musculus", "mm9"):  "GCA_000001635",
+        ("mus_musculus", "mm10"): "GCA_000001635",
+        ("mus_musculus", "mm39"): "GCA_000001635",
+    }
+
+    print("\n=== Phase 2: Load FHR metadata ===")
+    fhr_loaded = 0
+
+    # Build accession -> set of digests from inventory metadata
+    accession_digests = {}  # accession -> set of digests
+    for row in rows:
+        filename = row.get("filename", "")
+        if filename not in digest_map:
+            continue
+        digest = digest_map[filename]
+
+        # Try explicit accession column first
+        accession = row.get("accession", "").strip()
+
+        # Try extracting from filename
+        if not accession:
+            m = re.search(r'(GCA_\d+(?:\.\d+)?)', filename)
+            if m:
+                accession = m.group(1)
+
+        # Fall back to group+build mapping
+        if not accession:
+            group = row.get("group", "").strip()
+            build = row.get("build", "").strip()
+            accession = BUILD_TO_ACCESSION.get((group, build), "")
+
+        if accession:
+            accession_digests.setdefault(accession, set()).add(digest)
+
+    print(f"  Found {len(accession_digests)} accessions across {sum(len(v) for v in accession_digests.values())} collections")
+
+    def load_fhr_for_accession(store, accession, fhr_data, digests):
+        """Strip vitalStats and attach provenance FHR to all matching collections."""
+        provenance = {k: v for k, v in fhr_data.items() if k != "vitalStats"}
+        loaded = 0
+        with tempfile.NamedTemporaryFile(mode="w", suffix=".fhr.json", delete=False) as tmp:
+            json.dump(provenance, tmp, indent=2)
+            tmp_path = tmp.name
+        try:
+            for digest in digests:
+                store.load_fhr_metadata(digest, tmp_path)
+                print(f"    {accession} -> {digest}")
+                loaded += 1
+        finally:
+            os.unlink(tmp_path)
+        return loaded
+
+    for accession, digests in sorted(accession_digests.items()):
+        # Check for pre-generated FHR file
+        fhr_path = os.path.join(FHR_DIR, f"{accession}.fhr.json")
+        if os.path.exists(fhr_path):
+            with open(fhr_path) as f:
+                fhr_data = json.load(f)
+            print(f"  {accession}: loading from file ({len(digests)} collections)")
+            fhr_loaded += load_fhr_for_accession(store, accession, fhr_data, digests)
+            continue
+
+        # Try NCBI API
+        try:
+            sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "fhr"))
+            from genomeark_to_fhr import fetch_ncbi_report, ncbi_to_fhr
+            print(f"  {accession}: fetching from NCBI...", end=" ", flush=True)
+            report = fetch_ncbi_report(accession)
+            fhr_data = ncbi_to_fhr(report)
+            # Save full FHR (with vitalStats) for reference
+            os.makedirs(FHR_DIR, exist_ok=True)
+            with open(fhr_path, "w") as f:
+                json.dump(fhr_data, f, indent=2)
+            print(f"OK ({len(digests)} collections)")
+            fhr_loaded += load_fhr_for_accession(store, accession, fhr_data, digests)
+        except Exception as e:
+            print(f"  {accession}: SKIP ({e})")
+
+    print(f"\nPhase 2 done: {fhr_loaded} FHR entries loaded")
+
+    # Summary
+    print("\n=== Summary ===")
+    store_stats = store.stats()
+    print(f"Store stats: {store_stats}")
+    fhr_digests = store.list_fhr_metadata()
+    print(f"FHR entries: {len(fhr_digests)}")
+
+    # Verify FHR data is readable
+    for digest in fhr_digests:
+        fhr = store.get_fhr_metadata(digest)
+        print(f"  {digest}: genome={fhr.genome}, version={fhr.version}")
+
+    print(f"\nStore path: {args.store_path}")
+    print("Done!")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/data_loaders/ref-genome-analysis/fhr/batch_generate_fhr.py b/data_loaders/ref-genome-analysis/fhr/batch_generate_fhr.py
new file mode 100755
index 0000000..63e3d81
--- /dev/null
+++ b/data_loaders/ref-genome-analysis/fhr/batch_generate_fhr.py
@@ -0,0 +1,98 @@
+#!/usr/bin/env python3
+"""Batch-generate FHR metadata for all VGP vertebrate genomes.
+
+Reads the inventory CSV, extracts unique GCA accessions for vertebrate genomes,
+and fetches FHR metadata from NCBI for each. Skips accessions that already have
+an FHR file in the output directory, so it's safe to re-run.
+
+Usage:
+    python batch_generate_fhr.py --inventory /path/to/inventory.csv --output-dir /path/to/fhr_metadata/
+    python batch_generate_fhr.py --inventory /path/to/inventory.csv --output-dir /path/to/fhr_metadata/ --group vertebrates
+"""
+
+import argparse
+import csv
+import re
+import sys
+import os
+import time
+
+from genomeark_to_fhr import process_accession
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Batch-generate FHR metadata from inventory CSV")
+    parser.add_argument("--inventory", required=True, help="Path to refgenomes_inventory.csv")
+    parser.add_argument("--output-dir", required=True, help="Output directory for .fhr.json files")
+    parser.add_argument("--group", default="vertebrates", help="Filter by group column (default: vertebrates)")
+    parser.add_argument("--limit", type=int, default=None, help="Process only first N accessions")
+    parser.add_argument("--skip-genomeark", action="store_true", help="Skip GenomeArk YAML fetch (faster)")
+    args = parser.parse_args()
+
+    # Read inventory and extract unique accessions for the target group
+    with open(args.inventory, newline="") as f:
+        rows = list(csv.DictReader(f))
+
+    accessions = set()
+    for row in rows:
+        if row.get("group", "").strip() != args.group:
+            continue
+        acc = row.get("accession", "").strip()
+        if not acc:
+            m = re.search(r'(GCA_\d+(?:\.\d+)?)', row.get("filename", ""))
+            if m:
+                acc = m.group(1)
+        if acc:
+            accessions.add(acc)
+
+    accessions = sorted(accessions)
+    if args.limit:
+        accessions = accessions[:args.limit]
+
+    # Check which ones already exist
+    os.makedirs(args.output_dir, exist_ok=True)
+    existing = {f.replace(".fhr.json", "") for f in os.listdir(args.output_dir) if f.endswith(".fhr.json")}
+    todo = [a for a in accessions if a not in existing]
+
+    print(f"Group: {args.group}", file=sys.stderr)
+    print(f"Total accessions: {len(accessions)}", file=sys.stderr)
+    print(f"Already done: {len(accessions) - len(todo)}", file=sys.stderr)
+    print(f"To process: {len(todo)}", file=sys.stderr)
+
+    if not todo:
+        print("Nothing to do!", file=sys.stderr)
+        return
+
+    n_ok = 0
+    n_fail = 0
+    t_start = time.time()
+
+    for i, acc in enumerate(todo, 1):
+        output_path = os.path.join(args.output_dir, f"{acc}.fhr.json")
+        ok = False
+        for attempt in range(3):
+            try:
+                print(f"[{i}/{len(todo)}] ", end="", file=sys.stderr)
+                process_accession(acc, output_path)
+                n_ok += 1
+                ok = True
+                break
+            except Exception as e:
+                if "429" in str(e) and attempt < 2:
+                    wait = 5 * (attempt + 1)
+                    print(f"[{i}/{len(todo)}] {acc}: rate limited, waiting {wait}s...", file=sys.stderr)
+                    time.sleep(wait)
+                else:
+                    print(f"[{i}/{len(todo)}] {acc}: FAILED ({e})", file=sys.stderr)
+                    n_fail += 1
+                    break
+
+        # Throttle to ~3 requests/sec (NCBI + GenomeArk = 2 requests per accession)
+        time.sleep(0.3)
+
+    elapsed = time.time() - t_start
+    print(f"\nDone in {elapsed:.0f}s: {n_ok} OK, {n_fail} failed out of {len(todo)}", file=sys.stderr)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/data_loaders/ref-genome-analysis/fhr/genomeark_to_fhr.py b/data_loaders/ref-genome-analysis/fhr/genomeark_to_fhr.py
new file mode 100755
index 0000000..e8a1e72
--- /dev/null
+++ b/data_loaders/ref-genome-analysis/fhr/genomeark_to_fhr.py
@@ -0,0 +1,214 @@
+#!/usr/bin/env python3
+"""Generate FHR metadata JSON files from GenomeArk + NCBI Datasets API.
+
+Given a GCA accession, fetches:
+  1. Assembly metadata from NCBI Datasets API (taxonomy, stats, sequencing tech)
+  2. Species metadata from GenomeArk GitHub repo (common name, genome size, project)
+
+Outputs an FHR-compatible JSON file that can be loaded into a RefgetStore via
+store.load_fhr_metadata(digest, path).
+
+Usage:
+    python genomeark_to_fhr.py GCA_964261635.1 [output.fhr.json]
+    python genomeark_to_fhr.py GCA_964261635.1 GCA_964263255.1  # multiple accessions
+"""
+
+import json
+import sys
+import urllib.request
+from pathlib import Path
+
+
+def fetch_ncbi_report(accession: str) -> dict:
+    """Fetch assembly report from NCBI Datasets API."""
+    url = f"https://api.ncbi.nlm.nih.gov/datasets/v2/genome/accession/{accession}/dataset_report"
+    req = urllib.request.Request(url, headers={"Accept": "application/json"})
+    with urllib.request.urlopen(req) as resp:
+        data = json.loads(resp.read())
+    reports = data.get("reports", [])
+    if not reports:
+        raise ValueError(f"No assembly report found for {accession}")
+    return reports[0]
+
+
+def fetch_genomeark_yaml(species_name: str) -> dict | None:
+    """Fetch species YAML from genomeark-metadata GitHub repo."""
+    filename = species_name.replace(" ", "_")
+    url = f"https://raw.githubusercontent.com/genomeark/genomeark-metadata/main/species/{filename}.yaml"
+    try:
+        import yaml
+    except ImportError:
+        # Fall back to basic parsing if PyYAML not available
+        try:
+            with urllib.request.urlopen(url) as resp:
+                text = resp.read().decode()
+            # Basic extraction without full YAML parsing
+            result = {"_raw": text}
+            for line in text.split("\n"):
+                line = line.strip()
+                if line.startswith("common_name:"):
+                    result["common_name"] = line.split(":", 1)[1].strip().strip("'\"")
+                elif line.startswith("genome_size:"):
+                    try:
+                        result["genome_size"] = int(line.split(":", 1)[1].strip())
+                    except ValueError:
+                        pass
+                elif line.startswith("project:"):
+                    result["project"] = line.split(":", 1)[1].strip()
+            return result
+        except Exception:
+            return None
+
+    try:
+        with urllib.request.urlopen(url) as resp:
+            return yaml.safe_load(resp.read())
+    except Exception:
+        return None
+
+
+def ncbi_to_fhr(report: dict, genomeark: dict | None = None) -> dict:
+    """Convert NCBI assembly report + GenomeArk data to FHR metadata."""
+    organism = report.get("organism", {})
+    assembly = report.get("assembly_info", {})
+    stats = report.get("assembly_stats", {})
+
+    species_name = organism.get("organism_name", "")
+    tax_id = organism.get("tax_id")
+    common_name = organism.get("common_name", "")
+
+    # GenomeArk may have a better common name
+    if genomeark:
+        species = genomeark.get("species", genomeark)
+        common_name = common_name or species.get("common_name", "")
+
+    fhr = {
+        "schema": "https://raw.githubusercontent.com/FAIR-bioHeaders/FHR-Specification/main/fhr.json",
+        "schemaVersion": 1,
+        "genome": species_name,
+        "version": assembly.get("assembly_name", ""),
+        "dateCreated": assembly.get("release_date", ""),
+    }
+
+    # Taxonomy
+    if tax_id:
+        fhr["taxon"] = {
+            "name": species_name,
+            "uri": f"https://identifiers.org/taxonomy:{tax_id}",
+        }
+
+    # Common name as synonym
+    if common_name:
+        fhr["genomeSynonym"] = [common_name]
+
+    # Accession
+    accession = report.get("accession", "")
+    if accession:
+        fhr["accessionID"] = {
+            "name": accession,
+            "url": f"https://www.ncbi.nlm.nih.gov/datasets/genome/{accession}/",
+        }
+
+    # Submitter as assembly author
+    submitter = assembly.get("submitter", "")
+    if submitter:
+        fhr["assemblyAuthor"] = [{"name": submitter}]
+
+    # Sequencing technology
+    seq_tech = assembly.get("sequencing_tech", "")
+    if seq_tech:
+        fhr["instrument"] = [t.strip() for t in seq_tech.split(",")]
+
+    # Assembly method
+    method = assembly.get("assembly_method", "")
+    if method and method != "various":
+        fhr["assemblySoftware"] = method
+
+    # Vital statistics
+    vital = {}
+    if stats.get("contig_n50"):
+        vital["N50"] = stats["contig_n50"]
+    if stats.get("contig_l50"):
+        vital["L50"] = stats["contig_l50"]
+    if stats.get("total_sequence_length"):
+        vital["totalBasePairs"] = int(stats["total_sequence_length"])
+    if stats.get("number_of_contigs"):
+        vital["numberContigs"] = stats["number_of_contigs"]
+    if stats.get("number_of_scaffolds"):
+        vital["numberScaffolds"] = stats["number_of_scaffolds"]
+    if stats.get("scaffold_n50"):
+        vital["scaffoldN50"] = stats["scaffold_n50"]
+    if vital:
+        fhr["vitalStats"] = vital
+
+    # Related links
+    links = []
+    links.append(f"https://www.genomeark.org/genomeark-all/{species_name.replace(' ', '_')}.html")
+    if accession:
+        links.append(f"https://www.ncbi.nlm.nih.gov/datasets/genome/{accession}/")
+    fhr["relatedLink"] = links
+
+    # BioProject lineage — note VGP/DToL/EBP affiliations
+    projects = []
+    for lineage in assembly.get("bioproject_lineage", []):
+        for bp in lineage.get("bioprojects", []):
+            title = bp.get("title", "")
+            if any(kw in title.lower() for kw in ["vertebrate genomes", "darwin tree", "earth biogenome"]):
+                projects.append(title)
+    if projects:
+        fhr["documentation"] = "Projects: " + "; ".join(projects)
+
+    # License
+    fhr["license"] = "https://www.genomeark.org/documentation/data-use-policy.html"
+
+    return fhr
+
+
+def process_accession(accession: str, output_path: str | None = None) -> str:
+    """Process a single accession and write FHR JSON."""
+    print(f"Fetching NCBI report for {accession}...", file=sys.stderr)
+    report = fetch_ncbi_report(accession)
+
+    species_name = report.get("organism", {}).get("organism_name", "")
+    print(f"  Species: {species_name}", file=sys.stderr)
+
+    print(f"  Fetching GenomeArk metadata...", file=sys.stderr)
+    genomeark = fetch_genomeark_yaml(species_name) if species_name else None
+
+    fhr = ncbi_to_fhr(report, genomeark)
+
+    if output_path is None:
+        output_path = f"{accession}.fhr.json"
+
+    Path(output_path).parent.mkdir(parents=True, exist_ok=True)
+    with open(output_path, "w") as f:
+        json.dump(fhr, f, indent=2)
+
+    print(f"  Wrote: {output_path}", file=sys.stderr)
+    return output_path
+
+
+def main():
+    if len(sys.argv) < 2:
+        print("Usage: genomeark_to_fhr.py <accession> [accession2 ...] [--output-dir DIR]")
+        print("       genomeark_to_fhr.py GCA_964261635.1")
+        print("       genomeark_to_fhr.py GCA_964261635.1 GCA_964263255.1 --output-dir fhr/")
+        sys.exit(1)
+
+    args = sys.argv[1:]
+    output_dir = None
+
+    if "--output-dir" in args:
+        idx = args.index("--output-dir")
+        output_dir = args[idx + 1]
+        args = args[:idx] + args[idx + 2:]
+
+    for accession in args:
+        if output_dir:
+            output_path = f"{output_dir}/{accession}.fhr.json"
+        else:
+            output_path = f"{accession}.fhr.json"
+        process_accession(accession, output_path)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/data_loaders/ref-genome-analysis/fhr/load_fhr_metadata.py b/data_loaders/ref-genome-analysis/fhr/load_fhr_metadata.py
new file mode 100644
index 0000000..78a9bfd
--- /dev/null
+++ b/data_loaders/ref-genome-analysis/fhr/load_fhr_metadata.py
@@ -0,0 +1,97 @@
+"""
+Load FHR metadata JSON files into an existing RefgetStore.
+
+Resolves accessions to collection digests via the store's 'insdc' alias
+namespace. Strips vitalStats before loading, since those describe the source
+assembly, not the specific sequence collection.
+
+Usage:
+    python load_fhr_metadata.py --store-path /path/to/store --fhr-dir fhr_metadata/
+    python load_fhr_metadata.py --store-path /path/to/store --fhr file.fhr.json --digest abc123
+"""
+
+import argparse
+import glob
+import json
+import os
+import sys
+import tempfile
+
+from gtars.refget import RefgetStore
+
+
+def strip_vital_stats(fhr_path):
+    """Write a temp FHR file with vitalStats removed. Returns temp path."""
+    with open(fhr_path) as f:
+        fhr_data = json.load(f)
+    provenance = {k: v for k, v in fhr_data.items() if k != "vitalStats"}
+    tmp = tempfile.NamedTemporaryFile(mode="w", suffix=".fhr.json", delete=False)
+    json.dump(provenance, tmp, indent=2)
+    tmp.close()
+    return tmp.name
+
+
+def load_fhr_dir(store, fhr_dir, namespaces=("insdc", "refseq")):
+    """Load all .fhr.json files, resolving accession -> digest via alias namespaces."""
+    fhr_files = sorted(glob.glob(os.path.join(fhr_dir, "*.fhr.json")))
+    if not fhr_files:
+        print(f"No .fhr.json files found in {fhr_dir}", file=sys.stderr)
+        return
+
+    print(f"Loading {len(fhr_files)} FHR files, resolving via {namespaces} aliases...", file=sys.stderr)
+
+    n_loaded = 0
+    n_skipped = 0
+    for fhr_path in fhr_files:
+        basename = os.path.basename(fhr_path)
+        accession = basename.replace(".fhr.json", "")
+
+        meta = None
+        for ns in namespaces:
+            meta = store.get_collection_metadata_by_alias(ns, accession)
+            if meta is not None:
+                break
+
+        if meta is None:
+            n_skipped += 1
+            continue
+
+        tmp_path = strip_vital_stats(fhr_path)
+        try:
+            store.load_fhr_metadata(meta.digest, tmp_path)
+        finally:
+            os.unlink(tmp_path)
+        n_loaded += 1
+
+        if n_loaded % 100 == 0:
+            print(f"  ... {n_loaded} loaded", file=sys.stderr)
+
+    print(f"\nLoaded {n_loaded}, skipped {n_skipped} (no alias match)", file=sys.stderr)
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Load FHR metadata into RefgetStore")
+    parser.add_argument("--store-path", required=True, help="Path to RefgetStore")
+    parser.add_argument("--fhr-dir", help="Directory of .fhr.json files")
+    parser.add_argument("--fhr", help="Single .fhr.json file")
+    parser.add_argument("--digest", help="Collection digest (required with --fhr)")
+    args = parser.parse_args()
+
+    store = RefgetStore.on_disk(args.store_path)
+
+    if args.fhr_dir:
+        load_fhr_dir(store, args.fhr_dir)
+    elif args.fhr and args.digest:
+        tmp_path = strip_vital_stats(args.fhr)
+        try:
+            store.load_fhr_metadata(args.digest, tmp_path)
+        finally:
+            os.unlink(tmp_path)
+        print(f"Loaded {args.fhr} -> {args.digest}", file=sys.stderr)
+    else:
+        print("Provide --fhr-dir or --fhr + --digest", file=sys.stderr)
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/data_loaders/ref-genome-analysis/fhr/metadata/GCA_000001405.29.fhr.json b/data_loaders/ref-genome-analysis/fhr/metadata/GCA_000001405.29.fhr.json
new file mode 100755
index 0000000..4c3b322
--- /dev/null
+++ b/data_loaders/ref-genome-analysis/fhr/metadata/GCA_000001405.29.fhr.json
@@ -0,0 +1,36 @@
+{
+  "schema": "https://raw.githubusercontent.com/FAIR-bioHeaders/FHR-Specification/main/fhr.json",
+  "schemaVersion": 1,
+  "genome": "Homo sapiens",
+  "version": "GRCh38.p14",
+  "dateCreated": "2022-02-03",
+  "taxon": {
+    "name": "Homo sapiens",
+    "uri": "https://identifiers.org/taxonomy:9606"
+  },
+  "genomeSynonym": [
+    "human"
+  ],
+  "accessionID": {
+    "name": "GCA_000001405.29",
+    "url": "https://www.ncbi.nlm.nih.gov/datasets/genome/GCA_000001405.29/"
+  },
+  "assemblyAuthor": [
+    {
+      "name": "Genome Reference Consortium"
+    }
+  ],
+  "vitalStats": {
+    "N50": 57879411,
+    "L50": 18,
+    "totalBasePairs": 3099734149,
+    "numberContigs": 999,
+    "numberScaffolds": 473,
+    "scaffoldN50": 67794873
+  },
+  "relatedLink": [
+    "https://www.genomeark.org/genomeark-all/Homo_sapiens.html",
+    "https://www.ncbi.nlm.nih.gov/datasets/genome/GCA_000001405.29/"
+  ],
+  "license": "https://www.genomeark.org/documentation/data-use-policy.html"
+}
\ No newline at end of file
diff --git a/data_loaders/ref-genome-analysis/fhr/metadata/GCA_000001405.fhr.json b/data_loaders/ref-genome-analysis/fhr/metadata/GCA_000001405.fhr.json
new file mode 100755
index 0000000..4c3b322
--- /dev/null
+++ b/data_loaders/ref-genome-analysis/fhr/metadata/GCA_000001405.fhr.json
@@ -0,0 +1,36 @@
+{
+  "schema": "https://raw.githubusercontent.com/FAIR-bioHeaders/FHR-Specification/main/fhr.json",
+  "schemaVersion": 1,
+  "genome": "Homo sapiens",
+  "version": "GRCh38.p14",
+  "dateCreated": "2022-02-03",
+  "taxon": {
+    "name": "Homo sapiens",
+    "uri": "https://identifiers.org/taxonomy:9606"
+  },
+  "genomeSynonym": [
+    "human"
+  ],
+  "accessionID": {
+    "name": "GCA_000001405.29",
+    "url": "https://www.ncbi.nlm.nih.gov/datasets/genome/GCA_000001405.29/"
+  },
+  "assemblyAuthor": [
+    {
+      "name": "Genome Reference Consortium"
+    }
+  ],
+  "vitalStats": {
+    "N50": 57879411,
+    "L50": 18,
+    "totalBasePairs": 3099734149,
+    "numberContigs": 999,
+    "numberScaffolds": 473,
+    "scaffoldN50": 67794873
+  },
+  "relatedLink": [
+    "https://www.genomeark.org/genomeark-all/Homo_sapiens.html",
+    "https://www.ncbi.nlm.nih.gov/datasets/genome/GCA_000001405.29/"
+  ],
+  "license": "https://www.genomeark.org/documentation/data-use-policy.html"
+}
\ No newline at end of file
diff --git a/data_loaders/ref-genome-analysis/fhr/metadata/GCA_964261635.1.fhr.json b/data_loaders/ref-genome-analysis/fhr/metadata/GCA_964261635.1.fhr.json
new file mode 100644
index 0000000..37fdd6f
--- /dev/null
+++ b/data_loaders/ref-genome-analysis/fhr/metadata/GCA_964261635.1.fhr.json
@@ -0,0 +1,41 @@
+{
+  "schema": "https://raw.githubusercontent.com/FAIR-bioHeaders/FHR-Specification/main/fhr.json",
+  "schemaVersion": 1,
+  "genome": "Lissotriton helveticus",
+  "version": "aLisHel1.1",
+  "dateCreated": "2024-10-17",
+  "taxon": {
+    "name": "Lissotriton helveticus",
+    "uri": "https://identifiers.org/taxonomy:256425"
+  },
+  "genomeSynonym": [
+    "palmate newt"
+  ],
+  "accessionID": {
+    "name": "GCA_964261635.1",
+    "url": "https://www.ncbi.nlm.nih.gov/datasets/genome/GCA_964261635.1/"
+  },
+  "assemblyAuthor": [
+    {
+      "name": "WELLCOME SANGER INSTITUTE"
+    }
+  ],
+  "instrument": [
+    "PacBio",
+    "Arima2"
+  ],
+  "vitalStats": {
+    "N50": 7795245,
+    "L50": 941,
+    "totalBasePairs": 23170028842,
+    "numberContigs": 5693,
+    "numberScaffolds": 448,
+    "scaffoldN50": 2132484007
+  },
+  "relatedLink": [
+    "https://www.genomeark.org/genomeark-all/Lissotriton_helveticus.html",
+    "https://www.ncbi.nlm.nih.gov/datasets/genome/GCA_964261635.1/"
+  ],
+  "documentation": "Projects: Vertebrate Genomes Project; Darwin Tree of Life Project: Genome Data and Assemblies; Earth BioGenome Project (EBP)",
+  "license": "https://www.genomeark.org/documentation/data-use-policy.html"
+}
\ No newline at end of file
diff --git a/data_loaders/ref-genome-analysis/fhr/metadata/GCA_964263255.1.fhr.json b/data_loaders/ref-genome-analysis/fhr/metadata/GCA_964263255.1.fhr.json
new file mode 100644
index 0000000..e8a6bda
--- /dev/null
+++ b/data_loaders/ref-genome-analysis/fhr/metadata/GCA_964263255.1.fhr.json
@@ -0,0 +1,41 @@
+{
+  "schema": "https://raw.githubusercontent.com/FAIR-bioHeaders/FHR-Specification/main/fhr.json",
+  "schemaVersion": 1,
+  "genome": "Lissotriton vulgaris",
+  "version": "aLisVul1.1",
+  "dateCreated": "2024-10-17",
+  "taxon": {
+    "name": "Lissotriton vulgaris",
+    "uri": "https://identifiers.org/taxonomy:8324"
+  },
+  "genomeSynonym": [
+    "common newt"
+  ],
+  "accessionID": {
+    "name": "GCA_964263255.1",
+    "url": "https://www.ncbi.nlm.nih.gov/datasets/genome/GCA_964263255.1/"
+  },
+  "assemblyAuthor": [
+    {
+      "name": "WELLCOME SANGER INSTITUTE"
+    }
+  ],
+  "instrument": [
+    "PacBio",
+    "Arima2"
+  ],
+  "vitalStats": {
+    "N50": 6568731,
+    "L50": 1102,
+    "totalBasePairs": 24226223864,
+    "numberContigs": 19295,
+    "numberScaffolds": 15265,
+    "scaffoldN50": 1925992481
+  },
+  "relatedLink": [
+    "https://www.genomeark.org/genomeark-all/Lissotriton_vulgaris.html",
+    "https://www.ncbi.nlm.nih.gov/datasets/genome/GCA_964263255.1/"
+  ],
+  "documentation": "Projects: Vertebrate Genomes Project; Darwin Tree of Life Project: Genome Data and Assemblies; Earth BioGenome Project (EBP)",
+  "license": "https://www.genomeark.org/documentation/data-use-policy.html"
+}
\ No newline at end of file
diff --git a/data_loaders/ref-genome-analysis/inventory_genomes.py b/data_loaders/ref-genome-analysis/inventory/inventory_genomes.py
similarity index 100%
rename from data_loaders/ref-genome-analysis/inventory_genomes.py
rename to data_loaders/ref-genome-analysis/inventory/inventory_genomes.py
diff --git a/data_loaders/ref-genome-analysis/process-all-genomes.sbatch b/data_loaders/ref-genome-analysis/process-all-genomes.sbatch
deleted file mode 100644
index 28f3de7..0000000
--- a/data_loaders/ref-genome-analysis/process-all-genomes.sbatch
+++ /dev/null
@@ -1,15 +0,0 @@
-#!/bin/bash
-#SBATCH --job-name=refgetstore
-#SBATCH --output=refgetstore_%j.log
-#SBATCH --error=refgetstore_%j.log
-#SBATCH --partition=standard
-#SBATCH --time=24:00:00
-#SBATCH --mem=16G
-#SBATCH --cpus-per-task=4
-#SBATCH --account=shefflab
-
-module load miniforge/24.3.0-py3.11
-
-cd /project/shefflab/brickyard/datasets_downloaded/refgenomes_fasta/refget/data_loaders/ref-genome-analysis
-
-python build_refgetstore.py --store-path /project/shefflab/brickyard/refget_store
diff --git a/data_loaders/ref-genome-analysis/profiling/profile_all.py b/data_loaders/ref-genome-analysis/profiling/profile_all.py
new file mode 100644
index 0000000..e9cf60b
--- /dev/null
+++ b/data_loaders/ref-genome-analysis/profiling/profile_all.py
@@ -0,0 +1,48 @@
+"""Profile RefgetStore on 5 genomes: newt + 4 normal. Compare timing and memory."""
+import time
+import resource
+
+def peak_mb():
+    return resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024
+
+def rss_mb():
+    try:
+        with open("/proc/self/status") as f:
+            for line in f:
+                if line.startswith("VmRSS:"):
+                    return int(line.split()[1]) / 1024
+    except:
+        pass
+    return peak_mb()
+
+from gtars.refget import RefgetStore
+
+BRICK_ROOT = "/project/shefflab/brickyard/datasets_downloaded/refgenomes_fasta"
+STORE_PATH = f"{BRICK_ROOT}/refget_store"
+GENOMES = [
+    # (path, old_total_time, n_seqs, label)
+    ("/project/shefflab/brickyard/datasets_downloaded/refgenomes_fasta/vertebrates/fasta/GCA_964261635.1.fa.gz", 183.7, 448, "newt (2GB chr)"),
+    ("/project/shefflab/brickyard/datasets_downloaded/refgenomes_fasta/vertebrates/fasta/GCA_964263255.1.fa.gz", 213.2, 15265, "15K seqs"),
+    ("/project/shefflab/brickyard/datasets_downloaded/refgenomes_fasta/vertebrates/fasta/GCA_964263955.1.fa.gz", 42.7, 11150, "11K seqs"),
+    ("/project/shefflab/brickyard/datasets_downloaded/refgenomes_fasta/vertebrates/fasta/GCA_964264875.2.fa.gz", 27.4, 585, "585 seqs"),
+    ("/project/shefflab/brickyard/datasets_downloaded/refgenomes_fasta/vertebrates/fasta/GCA_964266715.1.fa.gz", 17.0, 1581, "1.6K seqs"),
+]
+
+store = RefgetStore.on_disk(STORE_PATH)
+print(f"Store opened. Stats: {store.stats()}")
+print(f"RSS after open: {rss_mb():.0f} MB\n")
+
+print(f"{'Genome':<30} {'Seqs':>6} {'New(s)':>8} {'Old(s)':>8} {'Ratio':>7} {'Peak MB':>8}")
+print("-" * 75)
+
+for fasta, old_total, old_nseqs, label in GENOMES:
+    name = fasta.split("/")[-1]
+    t0 = time.time()
+    meta, was_new = store.add_sequence_collection_from_fasta(fasta)
+    elapsed = time.time() - t0
+    ratio = elapsed / old_total
+    status = "NEW" if was_new else "SKIP"
+    print(f"{label:<30} {meta.n_sequences:>6} {elapsed:>7.1f}s {old_total:>7.1f}s {ratio:>6.2f}x {peak_mb():>7.0f}", flush=True)
+
+print(f"\nFinal RSS: {rss_mb():.0f} MB, Peak: {peak_mb():.0f} MB")
+print(f"Store stats: {store.stats()}")
diff --git a/data_loaders/ref-genome-analysis/profiling/profile_all.sbatch b/data_loaders/ref-genome-analysis/profiling/profile_all.sbatch
new file mode 100644
index 0000000..e1edf19
--- /dev/null
+++ b/data_loaders/ref-genome-analysis/profiling/profile_all.sbatch
@@ -0,0 +1,15 @@
+#!/bin/bash
+#SBATCH --job-name=profile_all
+#SBATCH --output=profile_all_%j.log
+#SBATCH --error=profile_all_%j.log
+#SBATCH --partition=standard
+#SBATCH --time=2:00:00
+#SBATCH --mem=16G
+#SBATCH --cpus-per-task=4
+#SBATCH --account=shefflab
+
+module load miniforge/24.3.0-py3.11
+
+cd /project/shefflab/brickyard/datasets_downloaded/refgenomes_fasta/refget/data_loaders/ref-genome-analysis/profiling
+
+python profile_all.py
diff --git a/data_loaders/ref-genome-analysis/profiling/profile_batch.py b/data_loaders/ref-genome-analysis/profiling/profile_batch.py
new file mode 100644
index 0000000..b291e78
--- /dev/null
+++ b/data_loaders/ref-genome-analysis/profiling/profile_batch.py
@@ -0,0 +1,28 @@
+"""Profile RefgetStore on several normal genomes for timing comparison."""
+import time
+import resource
+
+def peak_mb():
+    return resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024
+
+from gtars.refget import RefgetStore
+
+BRICK_ROOT = "/project/shefflab/brickyard/datasets_downloaded/refgenomes_fasta"
+STORE_PATH = f"{BRICK_ROOT}/refget_store"
+GENOMES = [
+    # (path, old_pipeline_time, old_total_time, n_seqs)
+    ("/project/shefflab/brickyard/datasets_downloaded/refgenomes_fasta/vertebrates/fasta/GCA_964263255.1.fa.gz", 203.1, 213.2, 15265),
+    ("/project/shefflab/brickyard/datasets_downloaded/refgenomes_fasta/vertebrates/fasta/GCA_964263955.1.fa.gz", 32.6, 42.7, 11150),
+    ("/project/shefflab/brickyard/datasets_downloaded/refgenomes_fasta/vertebrates/fasta/GCA_964266715.1.fa.gz", 7.2, 17.0, 1581),
+]
+
+store = RefgetStore.on_disk(STORE_PATH)
+print(f"Store opened. Stats: {store.stats()}\n")
+
+for fasta, old_pipe, old_total, old_nseqs in GENOMES:
+    name = fasta.split("/")[-1]
+    t0 = time.time()
+    meta, was_new = store.add_sequence_collection_from_fasta(fasta)
+    elapsed = time.time() - t0
+    status = "NEW" if was_new else "SKIP"
+    print(f"{status} {name}: {meta.n_sequences} seqs, {elapsed:.1f}s (old: {old_pipe:.1f}s pipe / {old_total:.1f}s total), Peak={peak_mb():.0f} MB")
diff --git a/data_loaders/ref-genome-analysis/profiling/profile_memory.py b/data_loaders/ref-genome-analysis/profiling/profile_memory.py
new file mode 100644
index 0000000..5511032
--- /dev/null
+++ b/data_loaders/ref-genome-analysis/profiling/profile_memory.py
@@ -0,0 +1,87 @@
+"""Profile RefgetStore memory usage on Rivanna."""
+import os
+import sys
+import time
+import resource
+import csv
+
+def rss_mb():
+    """Current RSS in MB from /proc/self/status (more accurate than ru_maxrss)."""
+    try:
+        with open("/proc/self/status") as f:
+            for line in f:
+                if line.startswith("VmRSS:"):
+                    return int(line.split()[1]) / 1024  # KB to MB
+    except:
+        pass
+    return resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024
+
+def peak_mb():
+    """Peak RSS (high-water mark)."""
+    return resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024
+
+def print_mem(label):
+    print(f"[MEM] {label}: RSS={rss_mb():.1f} MB, Peak={peak_mb():.1f} MB", flush=True)
+
+print_mem("startup")
+
+from gtars.refget import RefgetStore
+print_mem("after import")
+
+BRICK_ROOT = "/project/shefflab/brickyard/datasets_downloaded/refgenomes_fasta"
+STORE_PATH = f"{BRICK_ROOT}/refget_store"
+INVENTORY_CSV = "/project/shefflab/brickyard/datasets_downloaded/refgenomes_fasta/refgenomes_inventory.csv"
+
+# Open the store
+t0 = time.time()
+store = RefgetStore.on_disk(STORE_PATH)
+t1 = time.time()
+print_mem(f"after open_local ({t1-t0:.1f}s)")
+print(f"Store stats: {store.stats()}")
+
+# Read inventory
+rows = []
+with open(INVENTORY_CSV) as f:
+    reader = csv.DictReader(f)
+    for row in reader:
+        rows.append(row)
+
+# Use offset to skip to unprocessed files
+OFFSET = int(sys.argv[1]) if len(sys.argv) > 1 else 0
+TARGET_NEW = int(sys.argv[2]) if len(sys.argv) > 2 else 5
+
+if OFFSET:
+    rows = rows[OFFSET:]
+    print(f"Skipped to offset {OFFSET}, {len(rows)} remaining")
+
+print(f"Total rows to process: {len(rows)}, targeting {TARGET_NEW} new files")
+print_mem("before processing loop")
+
+n_new = 0
+n_skipped = 0
+
+for i, row in enumerate(rows):
+    fasta_path = row["path"]
+    filename = row.get("filename", "")
+
+    t0 = time.time()
+    try:
+        meta, was_new = store.add_sequence_collection_from_fasta(fasta_path, threads=4)
+        elapsed = time.time() - t0
+
+        if was_new:
+            n_new += 1
+            print(f"\n[{OFFSET+i+1}] NEW: {filename} -> {meta.digest} ({meta.n_sequences} seqs, {elapsed:.1f}s)")
+            print_mem(f"after NEW #{n_new}")
+            print(f"Store stats: {store.stats()}")
+            if n_new >= TARGET_NEW:
+                break
+        else:
+            n_skipped += 1
+            if n_skipped % 50 == 0:
+                print_mem(f"skipping... ({n_skipped} skipped, row {OFFSET+i+1})")
+    except Exception as e:
+        print(f"[{OFFSET+i+1}] FAILED {filename}: {e}")
+
+print(f"\nDone: {n_new} new, {n_skipped} skipped")
+print_mem("final")
diff --git a/data_loaders/ref-genome-analysis/profiling/profile_memory.sbatch b/data_loaders/ref-genome-analysis/profiling/profile_memory.sbatch
new file mode 100644
index 0000000..b6ddc84
--- /dev/null
+++ b/data_loaders/ref-genome-analysis/profiling/profile_memory.sbatch
@@ -0,0 +1,15 @@
+#!/bin/bash
+#SBATCH --job-name=profile_mem
+#SBATCH --output=profile_mem_%j.log
+#SBATCH --error=profile_mem_%j.log
+#SBATCH --partition=standard
+#SBATCH --time=2:00:00
+#SBATCH --mem=16G
+#SBATCH --cpus-per-task=4
+#SBATCH --account=shefflab
+
+module load miniforge/24.3.0-py3.11
+
+cd /project/shefflab/brickyard/datasets_downloaded/refgenomes_fasta/refget/data_loaders/ref-genome-analysis/profiling
+
+python profile_memory.py 850 5
diff --git a/data_loaders/ref-genome-analysis/profiling/profile_newt.py b/data_loaders/ref-genome-analysis/profiling/profile_newt.py
new file mode 100644
index 0000000..5343eba
--- /dev/null
+++ b/data_loaders/ref-genome-analysis/profiling/profile_newt.py
@@ -0,0 +1,57 @@
+"""Profile RefgetStore memory on the palmate newt genome (GCA_964261635.1).
+
+This genome has a single 2 GB chromosome — the worst case for pipeline memory.
+Run via sbatch after removing the genome from the store to force re-processing.
+"""
+import os
+import sys
+import time
+import resource
+
+def rss_mb():
+    """Current RSS in MB from /proc/self/status."""
+    try:
+        with open("/proc/self/status") as f:
+            for line in f:
+                if line.startswith("VmRSS:"):
+                    return int(line.split()[1]) / 1024
+    except:
+        pass
+    return resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024
+
+def peak_mb():
+    """Peak RSS (high-water mark)."""
+    return resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024
+
+def print_mem(label):
+    print(f"[MEM] {label}: RSS={rss_mb():.1f} MB, Peak={peak_mb():.1f} MB", flush=True)
+
+print_mem("startup")
+
+from gtars.refget import RefgetStore
+print_mem("after import")
+
+BRICK_ROOT = "/project/shefflab/brickyard/datasets_downloaded/refgenomes_fasta"
+STORE_PATH = f"{BRICK_ROOT}/refget_store"
+NEWT_FASTA = "/project/shefflab/brickyard/datasets_downloaded/refgenomes_fasta/vertebrates/fasta/GCA_964261635.1.fa.gz"
+
+# Open the store
+t0 = time.time()
+store = RefgetStore.on_disk(STORE_PATH)
+t1 = time.time()
+print_mem(f"after open_local ({t1-t0:.1f}s)")
+print(f"Store stats: {store.stats()}")
+
+# Process the newt genome
+print(f"\nProcessing newt genome: {NEWT_FASTA}")
+t0 = time.time()
+meta, was_new = store.add_sequence_collection_from_fasta(NEWT_FASTA)
+elapsed = time.time() - t0
+
+status = "NEW" if was_new else "SKIPPED (already exists)"
+print(f"\nResult: {status}")
+print(f"Digest: {meta.digest}")
+print(f"Sequences: {meta.n_sequences}")
+print(f"Time: {elapsed:.1f}s")
+print_mem("after processing")
+print(f"Store stats: {store.stats()}")
diff --git a/data_loaders/ref-genome-analysis/profiling/profile_newt.sbatch b/data_loaders/ref-genome-analysis/profiling/profile_newt.sbatch
new file mode 100644
index 0000000..e3595af
--- /dev/null
+++ b/data_loaders/ref-genome-analysis/profiling/profile_newt.sbatch
@@ -0,0 +1,15 @@
+#!/bin/bash
+#SBATCH --job-name=profile_newt
+#SBATCH --output=profile_newt_%j.log
+#SBATCH --error=profile_newt_%j.log
+#SBATCH --partition=standard
+#SBATCH --time=2:00:00
+#SBATCH --mem=16G
+#SBATCH --cpus-per-task=4
+#SBATCH --account=shefflab
+
+module load miniforge/24.3.0-py3.11
+
+cd /project/shefflab/brickyard/datasets_downloaded/refgenomes_fasta/refget/data_loaders/ref-genome-analysis/profiling
+
+python profile_newt.py
diff --git a/data_loaders/ref-genome-analysis/profiling/profile_normal.py b/data_loaders/ref-genome-analysis/profiling/profile_normal.py
new file mode 100644
index 0000000..ddec583
--- /dev/null
+++ b/data_loaders/ref-genome-analysis/profiling/profile_normal.py
@@ -0,0 +1,34 @@
+"""Profile RefgetStore on a normal-sized genome (GCA_964264875.2, 585 seqs).
+Compare timing with old code (17.7s pipeline time)."""
+import time
+import resource
+
+def peak_mb():
+    return resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024
+
+def print_mem(label):
+    print(f"[MEM] {label}: Peak={peak_mb():.1f} MB", flush=True)
+
+print_mem("startup")
+
+from gtars.refget import RefgetStore
+print_mem("after import")
+
+BRICK_ROOT = "/project/shefflab/brickyard/datasets_downloaded/refgenomes_fasta"
+STORE_PATH = f"{BRICK_ROOT}/refget_store"
+FASTA = "/project/shefflab/brickyard/datasets_downloaded/refgenomes_fasta/vertebrates/fasta/GCA_964264875.2.fa.gz"
+
+t0 = time.time()
+store = RefgetStore.on_disk(STORE_PATH)
+print_mem(f"after open_local ({time.time()-t0:.1f}s)")
+
+print(f"\nProcessing: {FASTA}")
+t0 = time.time()
+meta, was_new = store.add_sequence_collection_from_fasta(FASTA)
+elapsed = time.time() - t0
+
+print(f"Result: {'NEW' if was_new else 'SKIPPED'}")
+print(f"Digest: {meta.digest}")
+print(f"Sequences: {meta.n_sequences}")
+print(f"Time: {elapsed:.1f}s (old code: 17.7s pipeline / 27.4s total)")
+print_mem("after processing")
diff --git a/data_loaders/ref-genome-analysis/profiling/profile_normal.sbatch b/data_loaders/ref-genome-analysis/profiling/profile_normal.sbatch
new file mode 100644
index 0000000..a6e7b88
--- /dev/null
+++ b/data_loaders/ref-genome-analysis/profiling/profile_normal.sbatch
@@ -0,0 +1,15 @@
+#!/bin/bash
+#SBATCH --job-name=profile_normal
+#SBATCH --output=profile_normal_%j.log
+#SBATCH --error=profile_normal_%j.log
+#SBATCH --partition=standard
+#SBATCH --time=0:30:00
+#SBATCH --mem=8G
+#SBATCH --cpus-per-task=4
+#SBATCH --account=shefflab
+
+module load miniforge/24.3.0-py3.11
+
+cd /project/shefflab/brickyard/datasets_downloaded/refgenomes_fasta/refget/data_loaders/ref-genome-analysis/profiling
+
+python profile_normal.py
diff --git a/data_loaders/ref-genome-analysis/verify_refgetstore.py b/data_loaders/ref-genome-analysis/verify/verify_refgetstore.py
similarity index 97%
rename from data_loaders/ref-genome-analysis/verify_refgetstore.py
rename to data_loaders/ref-genome-analysis/verify/verify_refgetstore.py
index fc53a59..e054393 100644
--- a/data_loaders/ref-genome-analysis/verify_refgetstore.py
+++ b/data_loaders/ref-genome-analysis/verify/verify_refgetstore.py
@@ -26,9 +26,10 @@
 import tempfile
 import time
 
-STORE_PATH = "/project/shefflab/brickyard/refget_store"
-INVENTORY_CSV = "/project/shefflab/brickyard/datasets_downloaded/refgenomes_fasta/refgenomes_inventory.csv"
-DIGEST_MAP_CSV = "/home/nsheff/Dropbox/workspaces/refgenie/repos/refget/data_loaders/ref-genome-analysis/digest_map.csv"
+BRICK_ROOT = "/project/shefflab/brickyard/datasets_downloaded/refgenomes_fasta"
+STORE_PATH = f"{BRICK_ROOT}/refget_store"
+INVENTORY_CSV = f"{BRICK_ROOT}/refgenomes_inventory.csv"
+DIGEST_MAP_CSV = f"{BRICK_ROOT}/refget_staging/digest_map.csv"
 
 results = []
 
@@ -75,7 +76,7 @@ def check_store_opens(store_path):
 
     # Count collections and sequences
     try:
-        collections = list(store.list_collections())
+        collections = list(store.list_collections()["results"])
         n_collections = len(collections)
     except Exception as e:
         check("list_collections", False, f"error={e}")
@@ -130,7 +131,7 @@ def check_digest_map(store, digest_map_path):
     )
 
     # Get store collection digests for comparison
-    store_digests = {meta.digest for meta in store.list_collections()}
+    store_digests = {meta.digest for meta in store.list_collections()["results"]}
 
     # Check how many digest_map digests are in the store
     matched = 0
@@ -155,7 +156,7 @@ def check_digest_map(store, digest_map_path):
 
 def check_level2_integrity(store, n_to_check=3):
     """Verify level2 data for a sample of collections."""
-    collections = list(store.list_collections())
+    collections = list(store.list_collections()["results"])
     if not collections:
         check("level2_integrity", False, "no collections to check")
         return
@@ -232,7 +233,7 @@ def check_roundtrip_export(store, store_path, digest_map_path, inventory_path, l
         return
 
     # Pick a sample of collections that have original files
-    collections = list(store.list_collections())
+    collections = list(store.list_collections()["results"])
     test_pairs = []
     for meta in collections:
         if meta.digest in digest_to_original:
diff --git a/data_loaders/riva_pangenome_analysis/update-gtars.sh b/data_loaders/riva_pangenome_analysis/update-gtars.sh
new file mode 100644
index 0000000..44d22c5
--- /dev/null
+++ b/data_loaders/riva_pangenome_analysis/update-gtars.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+# Reinstall gtars and refget on rivanna
+
+ssh riva 'bash --login -s' << 'EOF'
+set -e
+source /etc/profile.d/modules.sh
+module load miniforge/24.3.0-py3.11
+
+# Build gtars (refget module only)
+cd ~/code/gtars
+git checkout refgetstore
+git pull
+cd gtars-python
+rm -f ../target/wheels/gtars-*.whl
+maturin build --release --no-default-features --features refget
+pip install ../target/wheels/gtars-*.whl --force-reinstall --no-deps
+
+# Install local refget
+cd ~/code/refget
+git checkout dev
+git pull
+python -m pip install -e .
+
+echo "Done!"
+EOF

From e09ccdd3222d5fc52d27e26a2990d287e885c89f Mon Sep 17 00:00:00 2001
From: nsheff <nsheff@users.noreply.github.com>
Date: Mon, 16 Mar 2026 22:20:41 -0400
Subject: [PATCH 21/31] build up store-backed seqcolapi

---
 .github/workflows/deploy_store.yml        |  56 ++++++++++++
 deployment/seqcolapi-store/Dockerfile     |   5 ++
 deployment/seqcolapi-store/production.env |   2 +
 deployment/seqcolapi-store/task_def.json  | 101 ++++++++++++++++++++++
 refget-r/R/getSeq-methods.R               |  42 +++++++--
 refget/middleware.py                      |  59 +++++++++++++
 refget/router.py                          |  31 +++++--
 seqcolapi/main.py                         |  81 ++++++++++++-----
 8 files changed, 339 insertions(+), 38 deletions(-)
 create mode 100644 .github/workflows/deploy_store.yml
 create mode 100644 deployment/seqcolapi-store/Dockerfile
 create mode 100644 deployment/seqcolapi-store/production.env
 create mode 100644 deployment/seqcolapi-store/task_def.json
 create mode 100644 refget/middleware.py

diff --git a/.github/workflows/deploy_store.yml b/.github/workflows/deploy_store.yml
new file mode 100644
index 0000000..6a88016
--- /dev/null
+++ b/.github/workflows/deploy_store.yml
@@ -0,0 +1,56 @@
+on:
+  workflow_dispatch:
+    inputs: null
+  workflow_run:
+    workflows: ["Deploy to Dockerhub on release"]
+    types:
+      - completed
+
+name: Deploy store-backed seqcolapi to Amazon ECS
+
+jobs:
+  deploy:
+    name: Deploy
+    runs-on: ubuntu-latest
+    steps:
+    - name: Checkout
+      uses: actions/checkout@v4
+
+    - name: Configure AWS credentials
+      uses: aws-actions/configure-aws-credentials@v1
+      with:
+        aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
+        aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+        aws-region: us-east-1
+
+    - name: Login to Amazon ECR
+      id: login-ecr
+      uses: aws-actions/amazon-ecr-login@v1
+
+    - name: Build, tag, and push image to Amazon ECR
+      id: build-image
+      env:
+        ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }}
+        ECR_REPOSITORY: seqcolapi-store
+        IMAGE_TAG: ${{ github.sha }}
+      run: |
+        cd deployment/seqcolapi-store/
+        docker build -t $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG -f Dockerfile .
+        docker push $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG
+        echo "::set-output name=image::$ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG"
+
+    - name: Fill in the new image ID in the Amazon ECS task definition
+      id: task-def
+      uses: aws-actions/amazon-ecs-render-task-definition@v1
+      with:
+        task-definition: deployment/seqcolapi-store/task_def.json
+        container-name: seqcolapi-store
+        image: ${{ steps.build-image.outputs.image }}
+
+    - name: Deploy Amazon ECS task definition
+      uses: aws-actions/amazon-ecs-deploy-task-definition@v1
+      with:
+        task-definition: ${{ steps.task-def.outputs.task-definition }}
+        service: seqcolapi-store-service
+        cluster: yeti
+        wait-for-service-stability: true
diff --git a/deployment/seqcolapi-store/Dockerfile b/deployment/seqcolapi-store/Dockerfile
new file mode 100644
index 0000000..e138239
--- /dev/null
+++ b/deployment/seqcolapi-store/Dockerfile
@@ -0,0 +1,5 @@
+FROM tiangolo/uvicorn-gunicorn:python3.11-slim
+LABEL authors="Nathan Sheffield"
+RUN pip install https://github.com/refgenie/refget/archive/dev.zip
+RUN pip install gtars
+CMD ["uvicorn", "seqcolapi.main:store_app", "--host", "0.0.0.0", "--port", "80"]
diff --git a/deployment/seqcolapi-store/production.env b/deployment/seqcolapi-store/production.env
new file mode 100644
index 0000000..fa32c22
--- /dev/null
+++ b/deployment/seqcolapi-store/production.env
@@ -0,0 +1,2 @@
+export REFGET_STORE_URL="s3://seqcolapi-store/refget/"
+export SERVER_ENV="production"
diff --git a/deployment/seqcolapi-store/task_def.json b/deployment/seqcolapi-store/task_def.json
new file mode 100644
index 0000000..4a3d22a
--- /dev/null
+++ b/deployment/seqcolapi-store/task_def.json
@@ -0,0 +1,101 @@
+{
+  "ipcMode": null,
+  "executionRoleArn": "arn:aws:iam::235728444054:role/ecsTaskExecutionRole",
+  "containerDefinitions": [
+    {
+      "dnsSearchDomains": null,
+      "environmentFiles": null,
+      "logConfiguration": null,
+      "entryPoint": null,
+      "portMappings": [
+        {
+          "hostPort": 8106,
+          "protocol": "tcp",
+          "containerPort": 80
+        }
+      ],
+      "command": null,
+      "linuxParameters": null,
+      "cpu": 0,
+      "environment": [
+        {
+          "name": "REFGET_STORE_URL",
+          "value": "s3://seqcolapi-store/refget/"
+        }
+      ],
+      "resourceRequirements": null,
+      "ulimits": null,
+      "dnsServers": null,
+      "mountPoints": [],
+      "workingDirectory": null,
+      "secrets": [],
+      "dockerSecurityOptions": null,
+      "memory": 2048,
+      "memoryReservation": 512,
+      "volumesFrom": [],
+      "stopTimeout": null,
+      "image": "235728444054.dkr.ecr.us-east-1.amazonaws.com/my-ecr-repo:latest",
+      "startTimeout": null,
+      "firelensConfiguration": null,
+      "dependsOn": null,
+      "disableNetworking": null,
+      "interactive": null,
+      "healthCheck": null,
+      "essential": true,
+      "links": null,
+      "hostname": null,
+      "extraHosts": null,
+      "pseudoTerminal": null,
+      "user": null,
+      "readonlyRootFilesystem": null,
+      "dockerLabels": null,
+      "systemControls": null,
+      "privileged": null,
+      "name": "seqcolapi-store"
+    }
+  ],
+  "placementConstraints": [],
+  "memory": null,
+  "taskRoleArn": "ecsTaskExecutionRole",
+  "compatibilities": [
+    "EC2"
+  ],
+  "family": "seqcolapi-store-task",
+  "requiresAttributes": [
+    {
+      "targetId": null,
+      "targetType": null,
+      "value": null,
+      "name": "com.amazonaws.ecs.capability.ecr-auth"
+    },
+    {
+      "targetId": null,
+      "targetType": null,
+      "value": null,
+      "name": "com.amazonaws.ecs.capability.docker-remote-api.1.21"
+    },
+    {
+      "targetId": null,
+      "targetType": null,
+      "value": null,
+      "name": "com.amazonaws.ecs.capability.task-iam-role"
+    },
+    {
+      "targetId": null,
+      "targetType": null,
+      "value": null,
+      "name": "ecs.capability.execution-role-ecr-pull"
+    }
+  ],
+  "pidMode": null,
+  "requiresCompatibilities": [
+    "EC2"
+  ],
+  "networkMode": "bridge",
+  "cpu": "128",
+  "revision": 1,
+  "status": "ACTIVE",
+  "inferenceAccelerators": null,
+  "proxyConfiguration": null,
+  "volumes": []
+}
diff --git a/refget-r/R/getSeq-methods.R b/refget-r/R/getSeq-methods.R
index 7b0f9f2..0cab648 100644
--- a/refget-r/R/getSeq-methods.R
+++ b/refget-r/R/getSeq-methods.R
@@ -113,17 +113,43 @@ setMethod("getSeq", "RefgetGenome",
     stop("Length mismatch: names, start, end, and strand must have compatible lengths")
   }
 
-  # Extract each sequence
-  seqs <- vapply(seq_len(n), function(i) {
-    .getSeq_single(genome, names[i], start[i], end[i], strand[i], as.character = TRUE)
-  }, character(1))
+  # Use bulk BED extraction if all regions have coordinates
+  if (!any(is.na(start)) && !any(is.na(end))) {
+    # Write temp BED file (convert 1-based closed to 0-based half-open)
+    bed_file <- tempfile(fileext = ".bed")
+    on.exit(unlink(bed_file), add = TRUE)
+
+    bed_df <- data.frame(
+      chrom = names,
+      start = as.integer(start - 1L),
+      end = as.integer(end)
+    )
+    write.table(bed_df, bed_file, sep = "\t", row.names = FALSE,
+                col.names = FALSE, quote = FALSE)
+
+    # Single Rust FFI call for all regions
+    retrieved <- gtars::get_seqs_bed_file_to_vec(
+      genome@store, genome@collection_digest, bed_file
+    )
+
+    seqs <- vapply(retrieved, function(r) r@sequence, character(1))
+
+    # Handle negative strand
+    minus_idx <- which(strand == "-")
+    if (length(minus_idx) > 0) {
+      seqs[minus_idx] <- vapply(seqs[minus_idx], .reverse_complement, character(1))
+    }
 
-  # Name the results
-  if (all(is.na(start)) || all(is.na(end))) {
-    result_names <- names
-  } else {
     result_names <- sprintf("%s:%d-%d", names, start, end)
+  } else {
+    # Fallback: full chromosome extraction (no coordinates)
+    seqs <- vapply(seq_len(n), function(i) {
+      .getSeq_single(genome, names[i], start[i], end[i], strand[i], as.character = TRUE)
+    }, character(1))
+
+    result_names <- names
   }
+
   names(seqs) <- result_names
 
   # Convert to DNAStringSet if requested
diff --git a/refget/middleware.py b/refget/middleware.py
new file mode 100644
index 0000000..a32d37d
--- /dev/null
+++ b/refget/middleware.py
@@ -0,0 +1,59 @@
+"""
+Middleware for store-backed seqcolapi deployments.
+
+StoreFreshnessMiddleware periodically checks if the remote store has changed
+(via rgstore.json digest) and reloads the backend when new data is available.
+"""
+
+import json
+import logging
+import time
+import urllib.request
+
+from starlette.middleware.base import BaseHTTPMiddleware
+
+_LOGGER = logging.getLogger(__name__)
+
+
+class StoreFreshnessMiddleware(BaseHTTPMiddleware):
+    """On each request, if >N seconds since last check, fetch rgstore.json
+    and compare collections_digest. If changed, re-open the store and
+    swap the backend. Lazy, request-triggered, no background threads."""
+
+    def __init__(self, app, store_url: str, cache_dir: str, check_interval: int = 300):
+        super().__init__(app)
+        self.store_url = store_url
+        self.cache_dir = cache_dir
+        self.check_interval = check_interval
+        self.last_check = time.time()
+        self.last_digest = None
+
+    async def dispatch(self, request, call_next):
+        now = time.time()
+        if now - self.last_check > self.check_interval:
+            self.last_check = now
+            self._check_and_reload(request.app)
+        return await call_next(request)
+
+    def _check_and_reload(self, app):
+        try:
+            metadata = self._fetch_metadata()
+            digest = metadata.get("collections_digest")
+            if digest and digest != self.last_digest:
+                self.last_digest = digest
+                self._reload_backend(app)
+        except Exception as e:
+            _LOGGER.warning(f"Store freshness check failed: {e}")
+
+    def _fetch_metadata(self) -> dict:
+        url = self.store_url.rstrip("/") + "/rgstore.json"
+        with urllib.request.urlopen(url) as resp:
+            return json.loads(resp.read())
+
+    def _reload_backend(self, app):
+        from refget.backend import RefgetStoreBackend
+        from refget.store import RefgetStore
+
+        _LOGGER.info(f"Store changed, reloading from {self.store_url}")
+        store = RefgetStore.open_remote(self.cache_dir, self.store_url)
+        app.state.backend = RefgetStoreBackend(store.into_readonly())
diff --git a/refget/router.py b/refget/router.py
index a2af04d..815c1e3 100644
--- a/refget/router.py
+++ b/refget/router.py
@@ -6,17 +6,14 @@
 This router does not supply the /service-info endpoint, which should be created
 by the main app.
 
-To use, first import it, then attach it to the app,
-then create a backend object and attach it to the app state like this:
+To use, import the router and setup_backend, then wire them up:
 
-from refget.router import create_refget_router
-from refget.agents import RefgetDBAgent
+from refget.router import create_refget_router, setup_backend
 
 router = create_refget_router(sequences=False, collections=True, pangenomes=False)
 app.include_router(router, prefix="/seqcol")
-dbagent = RefgetDBAgent()
-app.state.backend = dbagent       # RefgetDBAgent satisfies SeqColBackend
-app.state.dbagent = dbagent       # For DB-only endpoints (similarities, pangenomes, DRS)
+setup_backend(app, store=my_store)       # RefgetStore backend (no database)
+# OR: setup_backend(app, engine=engine)  # PostgreSQL via RefgetDBAgent
 """
 
 import logging
@@ -37,6 +34,26 @@
 _ROUTER_CONFIG: dict = {}
 
 
+def setup_backend(app, store=None, engine=None):
+    """Configure the seqcol backend on a FastAPI app.
+
+    Pass a RefgetStore to serve from the store (default, no database needed).
+    Pass a SQLAlchemy engine to serve from PostgreSQL via RefgetDBAgent.
+    """
+    if store is not None:
+        from .backend import RefgetStoreBackend
+
+        app.state.backend = RefgetStoreBackend(store.into_readonly())
+    elif engine is not None:
+        from .agents import RefgetDBAgent
+
+        dbagent = RefgetDBAgent(engine=engine)
+        app.state.dbagent = dbagent
+        app.state.backend = dbagent
+    else:
+        raise ValueError("setup_backend requires either store or engine")
+
+
 async def get_backend(request: Request) -> SeqColBackend:
     """Get the SeqColBackend from the app state."""
     return request.app.state.backend
diff --git a/seqcolapi/main.py b/seqcolapi/main.py
index c21c995..04b302f 100644
--- a/seqcolapi/main.py
+++ b/seqcolapi/main.py
@@ -11,7 +11,7 @@
 from refget.agents import RefgetDBAgent
 from refget.const import HUMANS_SAMPLE_LIST, MOUSE_SAMPLES_LIST
 from refget.models import HumanReadableNames
-from refget.router import _ROUTER_CONFIG, _SAMPLE_DIGESTS, create_refget_router
+from refget.router import _ROUTER_CONFIG, _SAMPLE_DIGESTS, create_refget_router, setup_backend
 
 from .const import ALL_VERSIONS, STATIC_DIRNAME, STATIC_PATH
 from .examples import *
@@ -30,10 +30,8 @@ async def lifespan_loader(app):
     """
     _LOGGER.info("Starting lifespan: Loading sample data...")
 
-    # Initialize database agent and store in app state
-    dbagent = RefgetDBAgent()
-    app.state.dbagent = dbagent
-    app.state.backend = dbagent  # RefgetDBAgent satisfies SeqColBackend
+    # Initialize backend via setup_backend
+    setup_backend(app, engine=RefgetDBAgent().engine)
 
     species_samples = {"human": HUMANS_SAMPLE_LIST, "mouse": MOUSE_SAMPLES_LIST}
 
@@ -41,7 +39,7 @@ async def lifespan_loader(app):
         try:
             _LOGGER.info(f"Loading {len(sample_names)} sample names for {species}")
 
-            with Session(dbagent.engine) as session:
+            with Session(app.state.dbagent.engine) as session:
                 statement = select(HumanReadableNames).where(
                     HumanReadableNames.human_readable_name.in_(sample_names)
                 )
@@ -143,7 +141,7 @@ async def index(request: Request):
 async def service_info():
     # Build seqcol capabilities object
     seqcol_info = {
-        "schema": dbagent.schema_dict,
+        "schema": getattr(app.state.dbagent, "schema_dict", None) if hasattr(app.state, "dbagent") else None,
         "sorted_name_length_pairs": True,
         "fasta_drs": {"enabled": _ROUTER_CONFIG.get("fasta_drs", False)},
     }
@@ -182,15 +180,6 @@ async def service_info():
 app.mount("/", StaticFiles(directory=STATIC_PATH), name=STATIC_DIRNAME)
 
 
-def create_global_dbagent():
-    """
-    Create a global database agent for use in the app.
-    """
-    global dbagent
-    dbagent = RefgetDBAgent()  # Configured via env vars
-    return dbagent
-
-
 def create_store_app(store_path: str, remote: bool = False, cache_dir: str = "/tmp/seqcol_cache"):
     """Create a seqcolapi FastAPI app backed by a RefgetStore (no database).
 
@@ -202,7 +191,6 @@ def create_store_app(store_path: str, remote: bool = False, cache_dir: str = "/t
     Returns:
         FastAPI app with store-backed seqcol endpoints.
     """
-    from refget.backend import RefgetStoreBackend
     from refget.store import RefgetStore
 
     if remote:
@@ -210,18 +198,65 @@ def create_store_app(store_path: str, remote: bool = False, cache_dir: str = "/t
     else:
         store = RefgetStore.on_disk(store_path)
 
-    backend = RefgetStoreBackend(store.into_readonly())
+    store_app = FastAPI(
+        title="Sequence Collections API (Store-backed)",
+        version=ALL_VERSIONS["refget_version"],
+    )
+
+    store_app.add_middleware(
+        CORSMiddleware,
+        allow_origins=["*"],
+        allow_credentials=True,
+        allow_methods=["*"],
+        allow_headers=["*"],
+    )
 
-    store_app = FastAPI(title="Sequence Collections API (Store-backed)")
-    store_app.state.backend = backend
+    setup_backend(store_app, store=store)
     router = create_refget_router(
         sequences=False, pangenomes=False, refget_store_url=store_path if remote else None
     )
     store_app.include_router(router)
+
+    if remote:
+        from refget.middleware import StoreFreshnessMiddleware
+
+        store_app.add_middleware(
+            StoreFreshnessMiddleware,
+            store_url=store_path,
+            cache_dir=cache_dir,
+        )
+
+    @store_app.get("/service-info", summary="GA4GH service info", tags=["General endpoints"])
+    async def store_service_info():
+        backend = getattr(store_app.state, "backend", None)
+        caps = backend.capabilities() if backend and hasattr(backend, "capabilities") else {}
+        return {
+            "id": "org.databio.seqcolapi.store",
+            "name": "Sequence collections (store-backed)",
+            "type": {
+                "group": "org.ga4gh",
+                "artifact": "refget-seqcol",
+                "version": ALL_VERSIONS["seqcol_spec_version"],
+            },
+            "description": "Store-backed API providing metadata for collections of reference sequences",
+            "organization": {"name": "Databio Lab", "url": "https://databio.org"},
+            "contactUrl": "https://github.com/refgenie/refget/issues",
+            "version": ALL_VERSIONS,
+            "seqcol": {
+                "refget_store": {"enabled": True, "url": store_path, **caps},
+            },
+        }
+
     return store_app
 
 
+import os
+
+_STORE_URL_ENV = os.environ.get("REFGET_STORE_URL")
+
+if _STORE_URL_ENV:
+    store_app = create_store_app(_STORE_URL_ENV, remote=True)
+
+
 if __name__ != "__main__":
-    _dbagent = create_global_dbagent()
-    app.state.dbagent = _dbagent
-    app.state.backend = _dbagent  # RefgetDBAgent satisfies SeqColBackend
+    setup_backend(app, engine=RefgetDBAgent().engine)

From be71c4e2c4898ef8a4dee7cfb422f28bc1d0e5b9 Mon Sep 17 00:00:00 2001
From: nsheff <nsheff@users.noreply.github.com>
Date: Tue, 17 Mar 2026 08:06:42 -0400
Subject: [PATCH 22/31] restructure data loading

---
 data_loaders/ref-genome-analysis/.gitignore   |   3 +
 data_loaders/ref-genome-analysis/CLAUDE.md    |  49 ++++++
 data_loaders/ref-genome-analysis/README.md    |  70 +++++----
 .../ref-genome-analysis/env/deploy-deps.sh    |  32 ++++
 .../ref-genome-analysis/env/mutagen-setup.sh  |  55 +++++++
 .../ref-genome-analysis/env/on-cluster.env    |   6 +
 .../ref-genome-analysis/env/remote-hpc.env    |   9 ++
 .../01_inventory}/inventory_genomes.py        |   4 +-
 .../02_aliases}/build_ncbi_alias_table.py     |   6 +-
 .../02_aliases}/register_aliases.sbatch       |   5 +-
 .../02_aliases}/register_ncbi_aliases.py      |   9 +-
 .../src/02_build/build_digest_map.py          | 135 ++++++++++++++++
 .../src/02_build/build_digest_map.sbatch      |  16 ++
 .../{fhr => src/03_fhr}/batch_generate_fhr.py |   0
 .../{fhr => src/03_fhr}/genomeark_to_fhr.py   |   0
 .../{fhr => src/03_fhr}/load_fhr_metadata.py  |   0
 .../metadata/GCA_000001405.29.fhr.json        |   0
 .../03_fhr}/metadata/GCA_000001405.fhr.json   |   0
 .../03_fhr}/metadata/GCA_964261635.1.fhr.json |   0
 .../03_fhr}/metadata/GCA_964263255.1.fhr.json |   0
 .../04_verify}/verify_refgetstore.py          |  14 +-
 .../05_profiling}/profile_all.py              |  15 +-
 .../05_profiling}/profile_all.sbatch          |   5 +-
 .../05_profiling}/profile_batch.py            |  11 +-
 .../05_profiling}/profile_memory.py           |   6 +-
 .../05_profiling}/profile_memory.sbatch       |   5 +-
 .../05_profiling}/profile_newt.py             |   6 +-
 .../05_profiling}/profile_newt.sbatch         |   5 +-
 .../05_profiling}/profile_normal.py           |   7 +-
 .../05_profiling}/profile_normal.sbatch       |   5 +-
 .../ref-genome-analysis/src/90_split_store.py | 145 ++++++++++++++++++
 .../src/90_split_store.sbatch                 |  16 ++
 .../{ => src}/examples/test_20_genomes.py     |   9 +-
 .../riva_pangenome_analysis/update-gtars.sh   |   2 +-
 34 files changed, 565 insertions(+), 85 deletions(-)
 create mode 100644 data_loaders/ref-genome-analysis/.gitignore
 create mode 100644 data_loaders/ref-genome-analysis/CLAUDE.md
 create mode 100644 data_loaders/ref-genome-analysis/env/deploy-deps.sh
 create mode 100755 data_loaders/ref-genome-analysis/env/mutagen-setup.sh
 create mode 100644 data_loaders/ref-genome-analysis/env/on-cluster.env
 create mode 100644 data_loaders/ref-genome-analysis/env/remote-hpc.env
 rename data_loaders/ref-genome-analysis/{inventory => src/01_inventory}/inventory_genomes.py (97%)
 rename data_loaders/ref-genome-analysis/{aliases => src/02_aliases}/build_ncbi_alias_table.py (98%)
 rename data_loaders/ref-genome-analysis/{aliases => src/02_aliases}/register_aliases.sbatch (53%)
 rename data_loaders/ref-genome-analysis/{aliases => src/02_aliases}/register_ncbi_aliases.py (96%)
 create mode 100644 data_loaders/ref-genome-analysis/src/02_build/build_digest_map.py
 create mode 100644 data_loaders/ref-genome-analysis/src/02_build/build_digest_map.sbatch
 rename data_loaders/ref-genome-analysis/{fhr => src/03_fhr}/batch_generate_fhr.py (100%)
 rename data_loaders/ref-genome-analysis/{fhr => src/03_fhr}/genomeark_to_fhr.py (100%)
 rename data_loaders/ref-genome-analysis/{fhr => src/03_fhr}/load_fhr_metadata.py (100%)
 rename data_loaders/ref-genome-analysis/{fhr => src/03_fhr}/metadata/GCA_000001405.29.fhr.json (100%)
 rename data_loaders/ref-genome-analysis/{fhr => src/03_fhr}/metadata/GCA_000001405.fhr.json (100%)
 rename data_loaders/ref-genome-analysis/{fhr => src/03_fhr}/metadata/GCA_964261635.1.fhr.json (100%)
 rename data_loaders/ref-genome-analysis/{fhr => src/03_fhr}/metadata/GCA_964263255.1.fhr.json (100%)
 rename data_loaders/ref-genome-analysis/{verify => src/04_verify}/verify_refgetstore.py (97%)
 rename data_loaders/ref-genome-analysis/{profiling => src/05_profiling}/profile_all.py (61%)
 rename data_loaders/ref-genome-analysis/{profiling => src/05_profiling}/profile_all.sbatch (66%)
 rename data_loaders/ref-genome-analysis/{profiling => src/05_profiling}/profile_batch.py (60%)
 rename data_loaders/ref-genome-analysis/{profiling => src/05_profiling}/profile_memory.py (91%)
 rename data_loaders/ref-genome-analysis/{profiling => src/05_profiling}/profile_memory.sbatch (64%)
 rename data_loaders/ref-genome-analysis/{profiling => src/05_profiling}/profile_newt.py (86%)
 rename data_loaders/ref-genome-analysis/{profiling => src/05_profiling}/profile_newt.sbatch (66%)
 rename data_loaders/ref-genome-analysis/{profiling => src/05_profiling}/profile_normal.py (79%)
 rename data_loaders/ref-genome-analysis/{profiling => src/05_profiling}/profile_normal.sbatch (66%)
 create mode 100644 data_loaders/ref-genome-analysis/src/90_split_store.py
 create mode 100644 data_loaders/ref-genome-analysis/src/90_split_store.sbatch
 rename data_loaders/ref-genome-analysis/{ => src}/examples/test_20_genomes.py (94%)

diff --git a/data_loaders/ref-genome-analysis/.gitignore b/data_loaders/ref-genome-analysis/.gitignore
new file mode 100644
index 0000000..ff7f203
--- /dev/null
+++ b/data_loaders/ref-genome-analysis/.gitignore
@@ -0,0 +1,3 @@
+__pycache__/
+*.pyc
+*.log
diff --git a/data_loaders/ref-genome-analysis/CLAUDE.md b/data_loaders/ref-genome-analysis/CLAUDE.md
new file mode 100644
index 0000000..cbe896b
--- /dev/null
+++ b/data_loaders/ref-genome-analysis/CLAUDE.md
@@ -0,0 +1,49 @@
+# ref-genome-analysis
+
+Pipeline for building a RefgetStore from reference genome FASTA files. Inventories genomes from the brickyard, loads them into a refget store, registers NCBI aliases, generates FAIR Header Representation (FHR) metadata, and verifies the result.
+
+## Setup
+
+Source the environment for your compute target:
+- HPC (from laptop): `source env/remote-hpc.env`
+- HPC (direct): `source env/on-cluster.env`
+
+To start mutagen sync: `./env/mutagen-setup.sh`
+
+## Pipeline Phases
+
+Execute in order:
+
+1. **01_inventory** -- Scan brickyard, generate CSV inventory of all FASTA files
+   - `python src/01_inventory/inventory_genomes.py`
+
+2. **02_aliases** -- Download NCBI assembly reports, build alias table, register in store
+   - Phase A: `python src/02_aliases/build_ncbi_alias_table.py` (downloads from NCBI, slow)
+   - Phase B: `sbatch src/02_aliases/register_aliases.sbatch`
+
+3. **03_fhr** -- Generate FAIR Header Representation metadata, load into store
+   - `python src/03_fhr/batch_generate_fhr.py --inventory $INVENTORY_CSV --output-dir $STAGING/fhr_metadata`
+   - `python src/03_fhr/load_fhr_metadata.py --store-path $STORE_PATH --fhr-dir $STAGING/fhr_metadata`
+
+4. **04_verify** -- Validate store integrity
+   - `python src/04_verify/verify_refgetstore.py`
+
+## Key Environment Variables
+
+- `BRICK_ROOT` -- Root of the refgenomes_fasta brick
+- `STORE_PATH` -- Path to the RefgetStore database
+- `STAGING` -- Staging area for intermediates (assembly reports, alias tables, FHR JSON)
+- `INVENTORY_CSV` -- Path to the genome inventory CSV
+
+## Dependencies
+
+- Python 3.11+ (via `module load miniforge/24.3.0-py3.11` on Rivanna)
+- `refget` or `gtars` Python package (for RefgetStore)
+- Internet access for NCBI API calls (phases 2 and 3)
+
+## Notes
+
+- All phases are resumable -- cached downloads, idempotent store operations
+- Phase 2A rate-limits NCBI requests (0.3s between calls)
+- `src/05_profiling/` contains memory/timing benchmarks (not part of the main pipeline)
+- `src/examples/` contains a 20-genome integration test
diff --git a/data_loaders/ref-genome-analysis/README.md b/data_loaders/ref-genome-analysis/README.md
index 6f71ee6..1ccf999 100644
--- a/data_loaders/ref-genome-analysis/README.md
+++ b/data_loaders/ref-genome-analysis/README.md
@@ -2,6 +2,14 @@
 
 Pipeline for loading reference genome FASTA files into a RefgetStore and enriching them with NCBI aliases and FHR provenance metadata.
 
+## Setup
+
+```bash
+source env/on-cluster.env        # on Rivanna directly
+source env/remote-hpc.env        # from laptop, targeting Rivanna
+./env/mutagen-setup.sh           # start file sync (laptop only)
+```
+
 ## Pipeline stages
 
 Execute in order:
@@ -10,51 +18,45 @@ Execute in order:
 inventory --> build --> aliases --> fhr --> verify
 ```
 
-| Stage | Directory | Purpose |
+| Stage | Location | Purpose |
 |---|---|---|
-| **inventory** | `inventory/` | Scan brickyard FASTA files, produce `refgenomes_inventory.csv` |
-| **build** | `build/` | Load FASTAs into RefgetStore, produce `digest_map.csv` |
-| **aliases** | `aliases/` | Download NCBI assembly reports, build alias table, register sequence/collection aliases |
-| **fhr** | `fhr/` | Generate and attach FHR provenance metadata (species, taxon, accession, submitter, etc.) |
-| **verify** | `verify/` | Automated pass/fail checks against the store |
-| **profiling** | `profiling/` | Memory and timing benchmarks |
-| **examples** | `examples/` | End-to-end test scripts (e.g., load 20 genomes with FHR) |
-
-## Rivanna paths
-
-All data lives within the `refgenomes_fasta` brickyard brick:
-
-```
-/project/shefflab/brickyard/datasets_downloaded/refgenomes_fasta/
-├── homo_sapiens/...              # Source FASTAs
-├── mus_musculus/...
-├── refgenomes_inventory.csv      # Inventory of all FASTAs
-├── refget_store/                 # The RefgetStore (fixed-format, don't modify manually)
-└── refget_staging/               # Pipeline intermediates
-    ├── assembly_reports/         # Downloaded NCBI assembly_report.txt files
-    ├── ncbi_alias_table.csv      # Parsed alias table (367K sequence rows)
-    ├── fhr_metadata/             # Generated FHR provenance JSON files
-    └── digest_map.csv            # Build output mapping FASTAs to digests
-```
-
-- **Store**: `.../refgenomes_fasta/refget_store`
-- **Staging**: `.../refgenomes_fasta/refget_staging`
-- **This pipeline**: `.../refgenomes_fasta/refget/data_loaders/ref-genome-analysis/`
+| **inventory** | `src/01_inventory/` | Scan brickyard FASTA files, produce `refgenomes_inventory.csv` |
+| **build** | `src/02_build/` | Compute seqcol digests for all FASTAs, produce `digest_map.csv` |
+| **aliases** | `src/02_aliases/` | Download NCBI assembly reports, build alias table, register sequence/collection aliases |
+| **fhr** | `src/03_fhr/` | Generate and attach FHR provenance metadata (species, taxon, accession, submitter, etc.) |
+| **verify** | `src/04_verify/` | Automated pass/fail checks against the store |
+| **profiling** | `src/05_profiling/` | Memory and timing benchmarks |
+| **split** | `src/90_split_store.py` | Split combined store into VGP and reference genome stores |
+| **examples** | `src/examples/` | End-to-end test scripts (e.g., load 20 genomes with FHR) |
+
+## Environment variables
+
+All paths come from environment variables set by sourcing an env file. No hardcoded paths in scripts.
+
+| Variable | Purpose |
+|---|---|
+| `BRICKYARD` | Lab-wide brickyard root |
+| `BRICK_ROOT` | This project's brick (`$BRICKYARD/datasets_downloaded/refgenomes_fasta`) |
+| `STORE_PATH` | The RefgetStore database |
+| `STAGING` | Pipeline intermediates (assembly reports, alias tables, FHR JSON) |
+| `INVENTORY_CSV` | Inventory of all FASTAs |
 
 ## Quick start (Rivanna)
 
 ```bash
+source env/on-cluster.env
 module load miniforge/24.3.0-py3.11
 
-# 1. Build store
-sbatch build/build_refgetstore.sbatch
+# 1. Inventory
+python src/01_inventory/inventory_genomes.py
 
 # 2. Register NCBI aliases
-sbatch aliases/register_aliases.sbatch
+sbatch src/02_aliases/register_aliases.sbatch
 
 # 3. Attach FHR metadata
-cd fhr && python load_fhr_metadata.py --store-path /project/shefflab/brickyard/datasets_downloaded/refgenomes_fasta/refget_store --fhr-dir metadata/
+python src/03_fhr/batch_generate_fhr.py --inventory $INVENTORY_CSV --output-dir $STAGING/fhr_metadata
+python src/03_fhr/load_fhr_metadata.py --store-path $STORE_PATH --fhr-dir $STAGING/fhr_metadata
 
 # 4. Verify
-cd verify && python verify_refgetstore.py
+python src/04_verify/verify_refgetstore.py
 ```
diff --git a/data_loaders/ref-genome-analysis/env/deploy-deps.sh b/data_loaders/ref-genome-analysis/env/deploy-deps.sh
new file mode 100644
index 0000000..5052be2
--- /dev/null
+++ b/data_loaders/ref-genome-analysis/env/deploy-deps.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+# env/deploy-deps.sh — Build and install dependencies on Rivanna from mutagen-synced source
+#
+# Requires: DEPLOY_HOST and DEPLOY_DIR set in env file
+# Requires: mutagen syncs running (via mutagen-setup.sh)
+
+if [ -z "$DEPLOY_HOST" ] || [ -z "$DEPLOY_DIR" ]; then
+  echo "DEPLOY_HOST and DEPLOY_DIR must be set. Source your env file first."
+  exit 1
+fi
+
+ssh "$DEPLOY_HOST" 'bash --login -s' << EOF
+set -e
+source /etc/profile.d/modules.sh
+module load miniforge/24.3.0-py3.11
+
+# Build gtars from synced source
+cd ${DEPLOY_DIR}/gtars/gtars-python
+rm -f ../target/wheels/gtars-*.whl
+echo "Building gtars..."
+maturin build --release --no-default-features --features refget
+pip install ../target/wheels/gtars-*.whl --force-reinstall --no-deps
+echo "gtars installed."
+
+# Install refget from synced source
+cd ${DEPLOY_DIR}/refget
+echo "Installing refget..."
+python -m pip install -e .
+echo "refget installed."
+
+echo "Done!"
+EOF
diff --git a/data_loaders/ref-genome-analysis/env/mutagen-setup.sh b/data_loaders/ref-genome-analysis/env/mutagen-setup.sh
new file mode 100755
index 0000000..e17922f
--- /dev/null
+++ b/data_loaders/ref-genome-analysis/env/mutagen-setup.sh
@@ -0,0 +1,55 @@
+#!/bin/bash
+# env/mutagen-setup.sh — Start mutagen sync for this project and its dependencies
+
+if [ -z "$SYNC_REMOTE" ]; then
+  echo "SYNC_REMOTE is not set. Set it in your env file to enable sync."
+  echo "Example: export SYNC_REMOTE=user@host:/path/to/project"
+  exit 0
+fi
+
+if [ -z "$PROJECT_NAME" ]; then
+  PROJECT_NAME=$(basename "$PWD")
+fi
+
+# Sync the project itself
+mutagen sync create \
+  --name="${PROJECT_NAME}-pipeline" \
+  --ignore=__pycache__ \
+  --ignore="*.pyc" \
+  --ignore="*.log" \
+  --ignore=.git \
+  . "$SYNC_REMOTE"
+
+echo "Sync started: ${PROJECT_NAME}-pipeline → $SYNC_REMOTE"
+
+# Sync dependencies for deployment
+if [ -n "$DEPLOY_HOST" ] && [ -n "$DEPLOY_DIR" ]; then
+  # gtars — local source synced to remote deploy dir
+  GTARS_LOCAL="$HOME/Dropbox/workspaces/intervals/repos/gtars"
+  if [ -d "$GTARS_LOCAL" ]; then
+    mutagen sync create \
+      --name="deploy-gtars" \
+      --ignore=target \
+      --ignore=__pycache__ \
+      --ignore="*.pyc" \
+      --ignore=.git \
+      "$GTARS_LOCAL" "${DEPLOY_HOST}:${DEPLOY_DIR}/gtars"
+    echo "Sync started: deploy-gtars → ${DEPLOY_HOST}:${DEPLOY_DIR}/gtars"
+  else
+    echo "Warning: $GTARS_LOCAL not found, skipping gtars sync"
+  fi
+
+  # refget — local source synced to remote deploy dir
+  REFGET_LOCAL="$HOME/Dropbox/workspaces/intervals/repos/refget"
+  if [ -d "$REFGET_LOCAL" ]; then
+    mutagen sync create \
+      --name="deploy-refget" \
+      --ignore=__pycache__ \
+      --ignore="*.pyc" \
+      --ignore=.git \
+      "$REFGET_LOCAL" "${DEPLOY_HOST}:${DEPLOY_DIR}/refget"
+    echo "Sync started: deploy-refget → ${DEPLOY_HOST}:${DEPLOY_DIR}/refget"
+  else
+    echo "Warning: $REFGET_LOCAL not found, skipping refget sync"
+  fi
+fi
diff --git a/data_loaders/ref-genome-analysis/env/on-cluster.env b/data_loaders/ref-genome-analysis/env/on-cluster.env
new file mode 100644
index 0000000..167b5e4
--- /dev/null
+++ b/data_loaders/ref-genome-analysis/env/on-cluster.env
@@ -0,0 +1,6 @@
+export PROJECT_NAME="ref-genome-analysis"
+export BRICKYARD=/project/shefflab/brickyard
+export BRICK_ROOT=$BRICKYARD/datasets_downloaded/refgenomes_fasta
+export STORE_PATH=$BRICK_ROOT/refget_store
+export STAGING=$BRICK_ROOT/refget_staging
+export INVENTORY_CSV=$BRICK_ROOT/refgenomes_inventory.csv
diff --git a/data_loaders/ref-genome-analysis/env/remote-hpc.env b/data_loaders/ref-genome-analysis/env/remote-hpc.env
new file mode 100644
index 0000000..6504138
--- /dev/null
+++ b/data_loaders/ref-genome-analysis/env/remote-hpc.env
@@ -0,0 +1,9 @@
+export PROJECT_NAME="ref-genome-analysis"
+export BRICKYARD=/project/shefflab/brickyard
+export BRICK_ROOT=$BRICKYARD/datasets_downloaded/refgenomes_fasta
+export STORE_PATH=$BRICK_ROOT/refget_store
+export STAGING=$BRICK_ROOT/refget_staging
+export INVENTORY_CSV=$BRICK_ROOT/refgenomes_inventory.csv
+export SYNC_REMOTE=ns5bc@login.hpc.virginia.edu:/home/ns5bc/code/ref-genome-analysis
+export DEPLOY_HOST=ns5bc@login.hpc.virginia.edu
+export DEPLOY_DIR=/home/ns5bc/deploy
diff --git a/data_loaders/ref-genome-analysis/inventory/inventory_genomes.py b/data_loaders/ref-genome-analysis/src/01_inventory/inventory_genomes.py
similarity index 97%
rename from data_loaders/ref-genome-analysis/inventory/inventory_genomes.py
rename to data_loaders/ref-genome-analysis/src/01_inventory/inventory_genomes.py
index d741601..b7881b8 100644
--- a/data_loaders/ref-genome-analysis/inventory/inventory_genomes.py
+++ b/data_loaders/ref-genome-analysis/src/01_inventory/inventory_genomes.py
@@ -25,9 +25,9 @@
 import urllib.error
 import urllib.request
 
-BRICKYARD_ROOT = "/project/shefflab/brickyard/datasets_downloaded/refgenomes_fasta"
+BRICKYARD_ROOT = os.environ["BRICK_ROOT"]
 PEP_URL = "https://pephub-api.databio.org/api/v1/projects/donaldcampbelljr/human_mouse_fasta_brickyard/samples?tag=default"
-OUTPUT_FILE = os.path.join(BRICKYARD_ROOT, "refgenomes_inventory.csv")
+OUTPUT_FILE = os.environ.get("INVENTORY_CSV", os.path.join(BRICKYARD_ROOT, "refgenomes_inventory.csv"))
 FASTA_EXTENSIONS = {".fa", ".fa.gz", ".fna", ".fna.gz", ".fasta", ".fasta.gz"}
 ACCESSION_PATTERN = re.compile(r"(GC[AF]_\d+\.\d+)")
 
diff --git a/data_loaders/ref-genome-analysis/aliases/build_ncbi_alias_table.py b/data_loaders/ref-genome-analysis/src/02_aliases/build_ncbi_alias_table.py
similarity index 98%
rename from data_loaders/ref-genome-analysis/aliases/build_ncbi_alias_table.py
rename to data_loaders/ref-genome-analysis/src/02_aliases/build_ncbi_alias_table.py
index 8c0d07c..4ceca3f 100644
--- a/data_loaders/ref-genome-analysis/aliases/build_ncbi_alias_table.py
+++ b/data_loaders/ref-genome-analysis/src/02_aliases/build_ncbi_alias_table.py
@@ -23,9 +23,9 @@
 import urllib.error
 import urllib.request
 
-BRICK_ROOT = "/project/shefflab/brickyard/datasets_downloaded/refgenomes_fasta"
-INVENTORY_CSV = f"{BRICK_ROOT}/refgenomes_inventory.csv"
-STAGING_DIR = f"{BRICK_ROOT}/refget_staging"
+BRICK_ROOT = os.environ["BRICK_ROOT"]
+INVENTORY_CSV = os.environ.get("INVENTORY_CSV", f"{BRICK_ROOT}/refgenomes_inventory.csv")
+STAGING_DIR = os.environ.get("STAGING", f"{BRICK_ROOT}/refget_staging")
 ACCESSION_PATTERN = re.compile(r"(GC[AF]_\d+\.\d+)")
 NCBI_FTP_BASE = "https://ftp.ncbi.nlm.nih.gov/genomes/all"
 
diff --git a/data_loaders/ref-genome-analysis/aliases/register_aliases.sbatch b/data_loaders/ref-genome-analysis/src/02_aliases/register_aliases.sbatch
similarity index 53%
rename from data_loaders/ref-genome-analysis/aliases/register_aliases.sbatch
rename to data_loaders/ref-genome-analysis/src/02_aliases/register_aliases.sbatch
index 10dfe48..dc50581 100644
--- a/data_loaders/ref-genome-analysis/aliases/register_aliases.sbatch
+++ b/data_loaders/ref-genome-analysis/src/02_aliases/register_aliases.sbatch
@@ -10,6 +10,7 @@
 
 module load miniforge/24.3.0-py3.11
 
-cd /project/shefflab/brickyard/datasets_downloaded/refgenomes_fasta/refget/data_loaders/ref-genome-analysis/aliases
+cd /home/ns5bc/code/ref-genome-analysis
+source env/on-cluster.env
 
-python register_ncbi_aliases.py --store-path /project/shefflab/brickyard/datasets_downloaded/refgenomes_fasta/refget_store
+python src/02_aliases/register_ncbi_aliases.py
diff --git a/data_loaders/ref-genome-analysis/aliases/register_ncbi_aliases.py b/data_loaders/ref-genome-analysis/src/02_aliases/register_ncbi_aliases.py
similarity index 96%
rename from data_loaders/ref-genome-analysis/aliases/register_ncbi_aliases.py
rename to data_loaders/ref-genome-analysis/src/02_aliases/register_ncbi_aliases.py
index c344d75..f3f545b 100644
--- a/data_loaders/ref-genome-analysis/aliases/register_ncbi_aliases.py
+++ b/data_loaders/ref-genome-analysis/src/02_aliases/register_ncbi_aliases.py
@@ -22,10 +22,11 @@
 
 from refget.store import RefgetStore
 
-BRICK_ROOT = "/project/shefflab/brickyard/datasets_downloaded/refgenomes_fasta"
-STORE_PATH = f"{BRICK_ROOT}/refget_store"
-INVENTORY_CSV = f"{BRICK_ROOT}/refgenomes_inventory.csv"
-ALIAS_TABLE_CSV = f"{BRICK_ROOT}/refget_staging/ncbi_alias_table.csv"
+BRICK_ROOT = os.environ["BRICK_ROOT"]
+STORE_PATH = os.environ.get("STORE_PATH", f"{BRICK_ROOT}/refget_store")
+INVENTORY_CSV = os.environ.get("INVENTORY_CSV", f"{BRICK_ROOT}/refgenomes_inventory.csv")
+STAGING = os.environ.get("STAGING", f"{BRICK_ROOT}/refget_staging")
+ALIAS_TABLE_CSV = f"{STAGING}/ncbi_alias_table.csv"
 
 
 def parse_args():
diff --git a/data_loaders/ref-genome-analysis/src/02_build/build_digest_map.py b/data_loaders/ref-genome-analysis/src/02_build/build_digest_map.py
new file mode 100644
index 0000000..f4c2487
--- /dev/null
+++ b/data_loaders/ref-genome-analysis/src/02_build/build_digest_map.py
@@ -0,0 +1,135 @@
+#!/usr/bin/env python3
+"""
+Build a complete digest_map.csv from the inventory CSV.
+
+For each FASTA in the inventory, reads the seqcol digest from the .rgsi cache
+file next to it (instant — just reads the first line). Falls back to computing
+the digest with digest_fasta() if no .rgsi exists.
+
+Outputs: $STAGING/digest_map.csv with columns:
+    path, filename, digest, n_sequences, group
+
+Usage:
+    python src/02_build/build_digest_map.py
+    python src/02_build/build_digest_map.py --dry-run
+"""
+
+import argparse
+import csv
+import os
+import re
+import sys
+import time
+
+BRICK_ROOT = os.environ["BRICK_ROOT"]
+STAGING = os.environ.get("STAGING", os.path.join(BRICK_ROOT, "refget_staging"))
+INVENTORY_CSV = os.environ.get("INVENTORY_CSV", os.path.join(BRICK_ROOT, "refgenomes_inventory.csv"))
+OUTPUT_CSV = os.path.join(STAGING, "digest_map.csv")
+
+# Pattern to strip FASTA extensions and get the RGSI path
+FASTA_EXTS = re.compile(r'\.(fa|fasta|fna)(\.gz)?$')
+
+
+def rgsi_path_for(fasta_path: str) -> str:
+    """Get the .rgsi cache path for a FASTA file."""
+    return FASTA_EXTS.sub('.rgsi', fasta_path)
+
+
+def read_rgsi_digest(rgsi_path: str) -> tuple[str, int] | None:
+    """Read seqcol digest and sequence count from an .rgsi file.
+
+    Returns (digest, n_sequences) or None if file doesn't exist or is malformed.
+    """
+    if not os.path.exists(rgsi_path):
+        return None
+    digest = None
+    n_sequences = 0
+    with open(rgsi_path) as f:
+        for line in f:
+            if line.startswith("##seqcol_digest="):
+                digest = line.strip().split("=", 1)[1]
+            elif not line.startswith("#"):
+                n_sequences += 1
+    if digest:
+        return digest, n_sequences
+    return None
+
+
+def build_digest_map(inventory_path: str, output_path: str, dry_run: bool = False):
+    with open(inventory_path) as f:
+        rows = list(csv.DictReader(f))
+
+    total = len(rows)
+    print(f"Inventory: {total} FASTAs from {inventory_path}")
+
+    if dry_run:
+        # Just count how many have .rgsi files
+        have_rgsi = sum(1 for r in rows if os.path.exists(rgsi_path_for(r["path"])))
+        print(f"FASTAs with .rgsi cache: {have_rgsi}/{total}")
+        print("--dry-run: stopping here.")
+        return
+
+    results = []
+    from_cache = 0
+    skipped = 0
+    t0 = time.time()
+
+    for i, row in enumerate(rows, 1):
+        fasta_path = row["path"]
+        group = row.get("group", "")
+        filename = row.get("filename", os.path.basename(fasta_path))
+
+        # Try .rgsi cache first
+        rgsi = rgsi_path_for(fasta_path)
+        cached = read_rgsi_digest(rgsi)
+        if cached:
+            digest, n_sequences = cached
+            from_cache += 1
+            results.append({
+                "path": fasta_path,
+                "filename": filename,
+                "digest": digest,
+                "n_sequences": n_sequences,
+                "group": group,
+            })
+            print(f"  [{i}/{total}] (cache) {group}/{filename} -> {digest}")
+            continue
+
+        # No cache — skip (these FASTAs were never successfully loaded)
+        print(f"  [{i}/{total}] NO CACHE: {group}/{filename}", file=sys.stderr)
+        skipped += 1
+
+    elapsed = time.time() - t0
+
+    # Write output
+    os.makedirs(os.path.dirname(output_path), exist_ok=True)
+    with open(output_path, "w", newline="") as f:
+        writer = csv.DictWriter(f, fieldnames=["path", "filename", "digest", "n_sequences", "group"])
+        writer.writeheader()
+        writer.writerows(results)
+
+    print(f"\nDone in {elapsed:.1f}s")
+    print(f"  Written:    {len(results)} entries to {output_path}")
+    print(f"  From cache: {from_cache}")
+    print(f"  No cache:   {skipped}")
+
+    # Summary by group
+    from collections import Counter
+    group_counts = Counter(r["group"] for r in results)
+    print(f"\nBy group:")
+    for group, count in sorted(group_counts.items(), key=lambda x: -x[1]):
+        print(f"  {group}: {count}")
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Build complete digest_map.csv from inventory.")
+    parser.add_argument("--inventory", default=INVENTORY_CSV)
+    parser.add_argument("--output", default=OUTPUT_CSV)
+    parser.add_argument("--dry-run", action="store_true")
+    args = parser.parse_args()
+
+    build_digest_map(args.inventory, args.output, args.dry_run)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/data_loaders/ref-genome-analysis/src/02_build/build_digest_map.sbatch b/data_loaders/ref-genome-analysis/src/02_build/build_digest_map.sbatch
new file mode 100644
index 0000000..d4c042b
--- /dev/null
+++ b/data_loaders/ref-genome-analysis/src/02_build/build_digest_map.sbatch
@@ -0,0 +1,16 @@
+#!/bin/bash
+#SBATCH --job-name=build_digest_map
+#SBATCH --output=build_digest_map_%j.log
+#SBATCH --error=build_digest_map_%j.log
+#SBATCH --partition=standard
+#SBATCH --time=4:00:00
+#SBATCH --mem=16G
+#SBATCH --cpus-per-task=1
+#SBATCH --account=shefflab
+
+module load miniforge/24.3.0-py3.11
+
+cd /home/ns5bc/code/ref-genome-analysis
+source env/on-cluster.env
+
+python src/02_build/build_digest_map.py
diff --git a/data_loaders/ref-genome-analysis/fhr/batch_generate_fhr.py b/data_loaders/ref-genome-analysis/src/03_fhr/batch_generate_fhr.py
similarity index 100%
rename from data_loaders/ref-genome-analysis/fhr/batch_generate_fhr.py
rename to data_loaders/ref-genome-analysis/src/03_fhr/batch_generate_fhr.py
diff --git a/data_loaders/ref-genome-analysis/fhr/genomeark_to_fhr.py b/data_loaders/ref-genome-analysis/src/03_fhr/genomeark_to_fhr.py
similarity index 100%
rename from data_loaders/ref-genome-analysis/fhr/genomeark_to_fhr.py
rename to data_loaders/ref-genome-analysis/src/03_fhr/genomeark_to_fhr.py
diff --git a/data_loaders/ref-genome-analysis/fhr/load_fhr_metadata.py b/data_loaders/ref-genome-analysis/src/03_fhr/load_fhr_metadata.py
similarity index 100%
rename from data_loaders/ref-genome-analysis/fhr/load_fhr_metadata.py
rename to data_loaders/ref-genome-analysis/src/03_fhr/load_fhr_metadata.py
diff --git a/data_loaders/ref-genome-analysis/fhr/metadata/GCA_000001405.29.fhr.json b/data_loaders/ref-genome-analysis/src/03_fhr/metadata/GCA_000001405.29.fhr.json
similarity index 100%
rename from data_loaders/ref-genome-analysis/fhr/metadata/GCA_000001405.29.fhr.json
rename to data_loaders/ref-genome-analysis/src/03_fhr/metadata/GCA_000001405.29.fhr.json
diff --git a/data_loaders/ref-genome-analysis/fhr/metadata/GCA_000001405.fhr.json b/data_loaders/ref-genome-analysis/src/03_fhr/metadata/GCA_000001405.fhr.json
similarity index 100%
rename from data_loaders/ref-genome-analysis/fhr/metadata/GCA_000001405.fhr.json
rename to data_loaders/ref-genome-analysis/src/03_fhr/metadata/GCA_000001405.fhr.json
diff --git a/data_loaders/ref-genome-analysis/fhr/metadata/GCA_964261635.1.fhr.json b/data_loaders/ref-genome-analysis/src/03_fhr/metadata/GCA_964261635.1.fhr.json
similarity index 100%
rename from data_loaders/ref-genome-analysis/fhr/metadata/GCA_964261635.1.fhr.json
rename to data_loaders/ref-genome-analysis/src/03_fhr/metadata/GCA_964261635.1.fhr.json
diff --git a/data_loaders/ref-genome-analysis/fhr/metadata/GCA_964263255.1.fhr.json b/data_loaders/ref-genome-analysis/src/03_fhr/metadata/GCA_964263255.1.fhr.json
similarity index 100%
rename from data_loaders/ref-genome-analysis/fhr/metadata/GCA_964263255.1.fhr.json
rename to data_loaders/ref-genome-analysis/src/03_fhr/metadata/GCA_964263255.1.fhr.json
diff --git a/data_loaders/ref-genome-analysis/verify/verify_refgetstore.py b/data_loaders/ref-genome-analysis/src/04_verify/verify_refgetstore.py
similarity index 97%
rename from data_loaders/ref-genome-analysis/verify/verify_refgetstore.py
rename to data_loaders/ref-genome-analysis/src/04_verify/verify_refgetstore.py
index e054393..b7b4686 100644
--- a/data_loaders/ref-genome-analysis/verify/verify_refgetstore.py
+++ b/data_loaders/ref-genome-analysis/src/04_verify/verify_refgetstore.py
@@ -26,10 +26,11 @@
 import tempfile
 import time
 
-BRICK_ROOT = "/project/shefflab/brickyard/datasets_downloaded/refgenomes_fasta"
-STORE_PATH = f"{BRICK_ROOT}/refget_store"
-INVENTORY_CSV = f"{BRICK_ROOT}/refgenomes_inventory.csv"
-DIGEST_MAP_CSV = f"{BRICK_ROOT}/refget_staging/digest_map.csv"
+BRICK_ROOT = os.environ["BRICK_ROOT"]
+STORE_PATH = os.environ.get("STORE_PATH", f"{BRICK_ROOT}/refget_store")
+INVENTORY_CSV = os.environ.get("INVENTORY_CSV", f"{BRICK_ROOT}/refgenomes_inventory.csv")
+STAGING = os.environ.get("STAGING", f"{BRICK_ROOT}/refget_staging")
+DIGEST_MAP_CSV = f"{STAGING}/digest_map.csv"
 
 results = []
 
@@ -375,8 +376,9 @@ def print_summary(store_path):
             if r["status"] == "FAIL":
                 print(f"  - {r['name']}: {r['detail']}")
 
-    # Write JSON report next to the store
-    report_dir = os.path.dirname(os.path.abspath(__file__))
+    # Write JSON report to staging area
+    report_dir = STAGING
+    os.makedirs(report_dir, exist_ok=True)
     report_path = os.path.join(report_dir, "verification_report.json")
     with open(report_path, "w") as f:
         json.dump(
diff --git a/data_loaders/ref-genome-analysis/profiling/profile_all.py b/data_loaders/ref-genome-analysis/src/05_profiling/profile_all.py
similarity index 61%
rename from data_loaders/ref-genome-analysis/profiling/profile_all.py
rename to data_loaders/ref-genome-analysis/src/05_profiling/profile_all.py
index e9cf60b..3fac641 100644
--- a/data_loaders/ref-genome-analysis/profiling/profile_all.py
+++ b/data_loaders/ref-genome-analysis/src/05_profiling/profile_all.py
@@ -17,15 +17,16 @@ def rss_mb():
 
 from gtars.refget import RefgetStore
 
-BRICK_ROOT = "/project/shefflab/brickyard/datasets_downloaded/refgenomes_fasta"
-STORE_PATH = f"{BRICK_ROOT}/refget_store"
+import os
+BRICK_ROOT = os.environ["BRICK_ROOT"]
+STORE_PATH = os.environ.get("STORE_PATH", f"{BRICK_ROOT}/refget_store")
 GENOMES = [
     # (path, old_total_time, n_seqs, label)
-    ("/project/shefflab/brickyard/datasets_downloaded/refgenomes_fasta/vertebrates/fasta/GCA_964261635.1.fa.gz", 183.7, 448, "newt (2GB chr)"),
-    ("/project/shefflab/brickyard/datasets_downloaded/refgenomes_fasta/vertebrates/fasta/GCA_964263255.1.fa.gz", 213.2, 15265, "15K seqs"),
-    ("/project/shefflab/brickyard/datasets_downloaded/refgenomes_fasta/vertebrates/fasta/GCA_964263955.1.fa.gz", 42.7, 11150, "11K seqs"),
-    ("/project/shefflab/brickyard/datasets_downloaded/refgenomes_fasta/vertebrates/fasta/GCA_964264875.2.fa.gz", 27.4, 585, "585 seqs"),
-    ("/project/shefflab/brickyard/datasets_downloaded/refgenomes_fasta/vertebrates/fasta/GCA_964266715.1.fa.gz", 17.0, 1581, "1.6K seqs"),
+    (f"{BRICK_ROOT}/vertebrates/fasta/GCA_964261635.1.fa.gz", 183.7, 448, "newt (2GB chr)"),
+    (f"{BRICK_ROOT}/vertebrates/fasta/GCA_964263255.1.fa.gz", 213.2, 15265, "15K seqs"),
+    (f"{BRICK_ROOT}/vertebrates/fasta/GCA_964263955.1.fa.gz", 42.7, 11150, "11K seqs"),
+    (f"{BRICK_ROOT}/vertebrates/fasta/GCA_964264875.2.fa.gz", 27.4, 585, "585 seqs"),
+    (f"{BRICK_ROOT}/vertebrates/fasta/GCA_964266715.1.fa.gz", 17.0, 1581, "1.6K seqs"),
 ]
 
 store = RefgetStore.on_disk(STORE_PATH)
diff --git a/data_loaders/ref-genome-analysis/profiling/profile_all.sbatch b/data_loaders/ref-genome-analysis/src/05_profiling/profile_all.sbatch
similarity index 66%
rename from data_loaders/ref-genome-analysis/profiling/profile_all.sbatch
rename to data_loaders/ref-genome-analysis/src/05_profiling/profile_all.sbatch
index e1edf19..d96a2c0 100644
--- a/data_loaders/ref-genome-analysis/profiling/profile_all.sbatch
+++ b/data_loaders/ref-genome-analysis/src/05_profiling/profile_all.sbatch
@@ -10,6 +10,7 @@
 
 module load miniforge/24.3.0-py3.11
 
-cd /project/shefflab/brickyard/datasets_downloaded/refgenomes_fasta/refget/data_loaders/ref-genome-analysis/profiling
+cd /home/ns5bc/code/ref-genome-analysis
+source env/on-cluster.env
 
-python profile_all.py
+python src/05_profiling/profile_all.py
diff --git a/data_loaders/ref-genome-analysis/profiling/profile_batch.py b/data_loaders/ref-genome-analysis/src/05_profiling/profile_batch.py
similarity index 60%
rename from data_loaders/ref-genome-analysis/profiling/profile_batch.py
rename to data_loaders/ref-genome-analysis/src/05_profiling/profile_batch.py
index b291e78..760d64c 100644
--- a/data_loaders/ref-genome-analysis/profiling/profile_batch.py
+++ b/data_loaders/ref-genome-analysis/src/05_profiling/profile_batch.py
@@ -7,13 +7,14 @@ def peak_mb():
 
 from gtars.refget import RefgetStore
 
-BRICK_ROOT = "/project/shefflab/brickyard/datasets_downloaded/refgenomes_fasta"
-STORE_PATH = f"{BRICK_ROOT}/refget_store"
+import os
+BRICK_ROOT = os.environ["BRICK_ROOT"]
+STORE_PATH = os.environ.get("STORE_PATH", f"{BRICK_ROOT}/refget_store")
 GENOMES = [
     # (path, old_pipeline_time, old_total_time, n_seqs)
-    ("/project/shefflab/brickyard/datasets_downloaded/refgenomes_fasta/vertebrates/fasta/GCA_964263255.1.fa.gz", 203.1, 213.2, 15265),
-    ("/project/shefflab/brickyard/datasets_downloaded/refgenomes_fasta/vertebrates/fasta/GCA_964263955.1.fa.gz", 32.6, 42.7, 11150),
-    ("/project/shefflab/brickyard/datasets_downloaded/refgenomes_fasta/vertebrates/fasta/GCA_964266715.1.fa.gz", 7.2, 17.0, 1581),
+    (f"{BRICK_ROOT}/vertebrates/fasta/GCA_964263255.1.fa.gz", 203.1, 213.2, 15265),
+    (f"{BRICK_ROOT}/vertebrates/fasta/GCA_964263955.1.fa.gz", 32.6, 42.7, 11150),
+    (f"{BRICK_ROOT}/vertebrates/fasta/GCA_964266715.1.fa.gz", 7.2, 17.0, 1581),
 ]
 
 store = RefgetStore.on_disk(STORE_PATH)
diff --git a/data_loaders/ref-genome-analysis/profiling/profile_memory.py b/data_loaders/ref-genome-analysis/src/05_profiling/profile_memory.py
similarity index 91%
rename from data_loaders/ref-genome-analysis/profiling/profile_memory.py
rename to data_loaders/ref-genome-analysis/src/05_profiling/profile_memory.py
index 5511032..07a3ad7 100644
--- a/data_loaders/ref-genome-analysis/profiling/profile_memory.py
+++ b/data_loaders/ref-genome-analysis/src/05_profiling/profile_memory.py
@@ -28,9 +28,9 @@ def print_mem(label):
 from gtars.refget import RefgetStore
 print_mem("after import")
 
-BRICK_ROOT = "/project/shefflab/brickyard/datasets_downloaded/refgenomes_fasta"
-STORE_PATH = f"{BRICK_ROOT}/refget_store"
-INVENTORY_CSV = "/project/shefflab/brickyard/datasets_downloaded/refgenomes_fasta/refgenomes_inventory.csv"
+BRICK_ROOT = os.environ["BRICK_ROOT"]
+STORE_PATH = os.environ.get("STORE_PATH", f"{BRICK_ROOT}/refget_store")
+INVENTORY_CSV = os.environ.get("INVENTORY_CSV", f"{BRICK_ROOT}/refgenomes_inventory.csv")
 
 # Open the store
 t0 = time.time()
diff --git a/data_loaders/ref-genome-analysis/profiling/profile_memory.sbatch b/data_loaders/ref-genome-analysis/src/05_profiling/profile_memory.sbatch
similarity index 64%
rename from data_loaders/ref-genome-analysis/profiling/profile_memory.sbatch
rename to data_loaders/ref-genome-analysis/src/05_profiling/profile_memory.sbatch
index b6ddc84..0e70a7b 100644
--- a/data_loaders/ref-genome-analysis/profiling/profile_memory.sbatch
+++ b/data_loaders/ref-genome-analysis/src/05_profiling/profile_memory.sbatch
@@ -10,6 +10,7 @@
 
 module load miniforge/24.3.0-py3.11
 
-cd /project/shefflab/brickyard/datasets_downloaded/refgenomes_fasta/refget/data_loaders/ref-genome-analysis/profiling
+cd /home/ns5bc/code/ref-genome-analysis
+source env/on-cluster.env
 
-python profile_memory.py 850 5
+python src/05_profiling/profile_memory.py 850 5
diff --git a/data_loaders/ref-genome-analysis/profiling/profile_newt.py b/data_loaders/ref-genome-analysis/src/05_profiling/profile_newt.py
similarity index 86%
rename from data_loaders/ref-genome-analysis/profiling/profile_newt.py
rename to data_loaders/ref-genome-analysis/src/05_profiling/profile_newt.py
index 5343eba..7d2285a 100644
--- a/data_loaders/ref-genome-analysis/profiling/profile_newt.py
+++ b/data_loaders/ref-genome-analysis/src/05_profiling/profile_newt.py
@@ -31,9 +31,9 @@ def print_mem(label):
 from gtars.refget import RefgetStore
 print_mem("after import")
 
-BRICK_ROOT = "/project/shefflab/brickyard/datasets_downloaded/refgenomes_fasta"
-STORE_PATH = f"{BRICK_ROOT}/refget_store"
-NEWT_FASTA = "/project/shefflab/brickyard/datasets_downloaded/refgenomes_fasta/vertebrates/fasta/GCA_964261635.1.fa.gz"
+BRICK_ROOT = os.environ["BRICK_ROOT"]
+STORE_PATH = os.environ.get("STORE_PATH", f"{BRICK_ROOT}/refget_store")
+NEWT_FASTA = f"{BRICK_ROOT}/vertebrates/fasta/GCA_964261635.1.fa.gz"
 
 # Open the store
 t0 = time.time()
diff --git a/data_loaders/ref-genome-analysis/profiling/profile_newt.sbatch b/data_loaders/ref-genome-analysis/src/05_profiling/profile_newt.sbatch
similarity index 66%
rename from data_loaders/ref-genome-analysis/profiling/profile_newt.sbatch
rename to data_loaders/ref-genome-analysis/src/05_profiling/profile_newt.sbatch
index e3595af..ee37f56 100644
--- a/data_loaders/ref-genome-analysis/profiling/profile_newt.sbatch
+++ b/data_loaders/ref-genome-analysis/src/05_profiling/profile_newt.sbatch
@@ -10,6 +10,7 @@
 
 module load miniforge/24.3.0-py3.11
 
-cd /project/shefflab/brickyard/datasets_downloaded/refgenomes_fasta/refget/data_loaders/ref-genome-analysis/profiling
+cd /home/ns5bc/code/ref-genome-analysis
+source env/on-cluster.env
 
-python profile_newt.py
+python src/05_profiling/profile_newt.py
diff --git a/data_loaders/ref-genome-analysis/profiling/profile_normal.py b/data_loaders/ref-genome-analysis/src/05_profiling/profile_normal.py
similarity index 79%
rename from data_loaders/ref-genome-analysis/profiling/profile_normal.py
rename to data_loaders/ref-genome-analysis/src/05_profiling/profile_normal.py
index ddec583..6c414ca 100644
--- a/data_loaders/ref-genome-analysis/profiling/profile_normal.py
+++ b/data_loaders/ref-genome-analysis/src/05_profiling/profile_normal.py
@@ -14,9 +14,10 @@ def print_mem(label):
 from gtars.refget import RefgetStore
 print_mem("after import")
 
-BRICK_ROOT = "/project/shefflab/brickyard/datasets_downloaded/refgenomes_fasta"
-STORE_PATH = f"{BRICK_ROOT}/refget_store"
-FASTA = "/project/shefflab/brickyard/datasets_downloaded/refgenomes_fasta/vertebrates/fasta/GCA_964264875.2.fa.gz"
+import os
+BRICK_ROOT = os.environ["BRICK_ROOT"]
+STORE_PATH = os.environ.get("STORE_PATH", f"{BRICK_ROOT}/refget_store")
+FASTA = f"{BRICK_ROOT}/vertebrates/fasta/GCA_964264875.2.fa.gz"
 
 t0 = time.time()
 store = RefgetStore.on_disk(STORE_PATH)
diff --git a/data_loaders/ref-genome-analysis/profiling/profile_normal.sbatch b/data_loaders/ref-genome-analysis/src/05_profiling/profile_normal.sbatch
similarity index 66%
rename from data_loaders/ref-genome-analysis/profiling/profile_normal.sbatch
rename to data_loaders/ref-genome-analysis/src/05_profiling/profile_normal.sbatch
index a6e7b88..dd7eb04 100644
--- a/data_loaders/ref-genome-analysis/profiling/profile_normal.sbatch
+++ b/data_loaders/ref-genome-analysis/src/05_profiling/profile_normal.sbatch
@@ -10,6 +10,7 @@
 
 module load miniforge/24.3.0-py3.11
 
-cd /project/shefflab/brickyard/datasets_downloaded/refgenomes_fasta/refget/data_loaders/ref-genome-analysis/profiling
+cd /home/ns5bc/code/ref-genome-analysis
+source env/on-cluster.env
 
-python profile_normal.py
+python src/05_profiling/profile_normal.py
diff --git a/data_loaders/ref-genome-analysis/src/90_split_store.py b/data_loaders/ref-genome-analysis/src/90_split_store.py
new file mode 100644
index 0000000..86a4d0c
--- /dev/null
+++ b/data_loaders/ref-genome-analysis/src/90_split_store.py
@@ -0,0 +1,145 @@
+#!/usr/bin/env python3
+"""
+Split the combined refget store into two stores: VGP vertebrates and reference genomes.
+
+Reads digest_map.csv (produced by 02_build/build_digest_map.py) which has a 'group'
+column for every FASTA. Collections with group='vertebrates' go to the VGP store,
+everything else goes to the ref store.
+
+Usage:
+    python src/90_split_store.py --dry-run
+    python src/90_split_store.py
+"""
+
+import argparse
+import csv
+import os
+import sys
+import time
+
+from refget.store import RefgetStore
+
+BRICK_ROOT = os.environ["BRICK_ROOT"]
+DEFAULT_SOURCE = os.environ.get("STORE_PATH", os.path.join(BRICK_ROOT, "refget_store"))
+STAGING = os.environ.get("STAGING", os.path.join(BRICK_ROOT, "refget_staging"))
+DEFAULT_DIGEST_MAP = os.path.join(STAGING, "digest_map.csv")
+DEFAULT_VGP_OUTPUT = os.path.join(BRICK_ROOT, "vgp_reference_store")
+DEFAULT_REF_OUTPUT = os.path.join(BRICK_ROOT, "refgenome_jungle_store")
+
+VGP_GROUPS = {"vertebrates"}
+
+
+def load_digest_map(digest_map_path: str) -> dict[str, set[str]]:
+    """Read digest_map.csv and return group -> set of digests."""
+    groups: dict[str, set[str]] = {}
+    with open(digest_map_path) as f:
+        for row in csv.DictReader(f):
+            digest = row.get("digest", "").strip()
+            group = row.get("group", "unknown").strip()
+            if digest:
+                groups.setdefault(group, set()).add(digest)
+    return groups
+
+
+def split_store(
+    source_path: str,
+    digest_map_path: str,
+    vgp_output: str,
+    ref_output: str,
+    dry_run: bool = False,
+):
+    # Load group -> digest mapping
+    group_digests = load_digest_map(digest_map_path)
+
+    vgp_digests = set()
+    ref_digests = set()
+    for group, digests in group_digests.items():
+        label = "VGP" if group in VGP_GROUPS else "ref"
+        print(f"  {group}: {len(digests)} collections ({label})")
+        if group in VGP_GROUPS:
+            vgp_digests |= digests
+        else:
+            ref_digests |= digests
+
+    # Open source store and load all collections (metadata only)
+    print(f"\nOpening source store: {source_path}")
+    source = RefgetStore.on_disk(source_path)
+    source.load_all_collections()
+
+    # Get all store digests
+    all_store_digests = set()
+    page = 0
+    while True:
+        result = source.list_collections(page, 1000)
+        for c in result["results"]:
+            all_store_digests.add(c.digest)
+        if len(result["results"]) < 1000:
+            break
+        page += 1
+
+    vgp_in_store = vgp_digests & all_store_digests
+    ref_in_store = ref_digests & all_store_digests
+    unaccounted = all_store_digests - vgp_digests - ref_digests
+
+    print(f"\nTotal in store:  {len(all_store_digests)}")
+    print(f"VGP to import:   {len(vgp_in_store)}")
+    print(f"Ref to import:   {len(ref_in_store)}")
+    if unaccounted:
+        print(f"Unaccounted:     {len(unaccounted)} (in store but not in digest_map)")
+
+    if vgp_digests - all_store_digests:
+        print(f"Warning: {len(vgp_digests - all_store_digests)} VGP digests not in store", file=sys.stderr)
+    if ref_digests - all_store_digests:
+        print(f"Warning: {len(ref_digests - all_store_digests)} ref digests not in store", file=sys.stderr)
+
+    if dry_run:
+        print("\n--dry-run: stopping here.")
+        return
+
+    # Import VGP collections
+    print(f"\nCreating VGP store: {vgp_output}")
+    vgp_store = RefgetStore.on_disk(vgp_output)
+    print(f"Importing {len(vgp_in_store)} VGP collections...")
+    t0 = time.time()
+    for i, digest in enumerate(sorted(vgp_in_store), 1):
+        print(f"  [{i}/{len(vgp_in_store)}] {digest}")
+        vgp_store.import_collection(source, digest)
+    print(f"VGP import done in {time.time() - t0:.1f}s")
+
+    # Import ref collections
+    print(f"\nCreating ref store: {ref_output}")
+    ref_store = RefgetStore.on_disk(ref_output)
+    print(f"Importing {len(ref_in_store)} ref genome collections...")
+    t0 = time.time()
+    for i, digest in enumerate(sorted(ref_in_store), 1):
+        print(f"  [{i}/{len(ref_in_store)}] {digest}")
+        ref_store.import_collection(source, digest)
+    print(f"Ref import done in {time.time() - t0:.1f}s")
+
+    print("\nDone!")
+    print(f"  VGP store: {vgp_output}")
+    print(f"  Ref store: {ref_output}")
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Split combined refget store into VGP and ref genome stores."
+    )
+    parser.add_argument("--source", default=DEFAULT_SOURCE)
+    parser.add_argument("--digest-map", default=DEFAULT_DIGEST_MAP)
+    parser.add_argument("--vgp-output", default=DEFAULT_VGP_OUTPUT)
+    parser.add_argument("--ref-output", default=DEFAULT_REF_OUTPUT)
+    parser.add_argument("--dry-run", action="store_true")
+    args = parser.parse_args()
+
+    split_store(
+        source_path=args.source,
+        digest_map_path=args.digest_map,
+        vgp_output=args.vgp_output,
+        ref_output=args.ref_output,
+        dry_run=args.dry_run,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/data_loaders/ref-genome-analysis/src/90_split_store.sbatch b/data_loaders/ref-genome-analysis/src/90_split_store.sbatch
new file mode 100644
index 0000000..83e19e4
--- /dev/null
+++ b/data_loaders/ref-genome-analysis/src/90_split_store.sbatch
@@ -0,0 +1,16 @@
+#!/bin/bash
+#SBATCH --job-name=split_store
+#SBATCH --output=split_store_%j.log
+#SBATCH --error=split_store_%j.log
+#SBATCH --partition=standard
+#SBATCH --time=4:00:00
+#SBATCH --mem=16G
+#SBATCH --cpus-per-task=1
+#SBATCH --account=shefflab
+
+module load miniforge/24.3.0-py3.11
+
+cd /home/ns5bc/code/ref-genome-analysis
+source env/on-cluster.env
+
+python src/90_split_store.py
diff --git a/data_loaders/ref-genome-analysis/examples/test_20_genomes.py b/data_loaders/ref-genome-analysis/src/examples/test_20_genomes.py
similarity index 94%
rename from data_loaders/ref-genome-analysis/examples/test_20_genomes.py
rename to data_loaders/ref-genome-analysis/src/examples/test_20_genomes.py
index 6e98e85..3b0eda7 100644
--- a/data_loaders/ref-genome-analysis/examples/test_20_genomes.py
+++ b/data_loaders/ref-genome-analysis/src/examples/test_20_genomes.py
@@ -16,10 +16,11 @@
 
 from gtars.refget import RefgetStore
 
-BRICK_ROOT = "/project/shefflab/brickyard/datasets_downloaded/refgenomes_fasta"
-INVENTORY_CSV = f"{BRICK_ROOT}/refgenomes_inventory.csv"
-FHR_DIR = f"{BRICK_ROOT}/refget_staging/fhr_metadata"
-STORE_PATH = "/scratch/ns5bc/test_refget_store_20"
+BRICK_ROOT = os.environ["BRICK_ROOT"]
+INVENTORY_CSV = os.environ.get("INVENTORY_CSV", f"{BRICK_ROOT}/refgenomes_inventory.csv")
+STAGING = os.environ.get("STAGING", f"{BRICK_ROOT}/refget_staging")
+FHR_DIR = f"{STAGING}/fhr_metadata"
+STORE_PATH = os.environ.get("STORE_PATH", "/scratch/ns5bc/test_refget_store_20")
 
 
 def main():
diff --git a/data_loaders/riva_pangenome_analysis/update-gtars.sh b/data_loaders/riva_pangenome_analysis/update-gtars.sh
index 44d22c5..2664b00 100644
--- a/data_loaders/riva_pangenome_analysis/update-gtars.sh
+++ b/data_loaders/riva_pangenome_analysis/update-gtars.sh
@@ -8,7 +8,7 @@ module load miniforge/24.3.0-py3.11
 
 # Build gtars (refget module only)
 cd ~/code/gtars
-git checkout refgetstore
+git checkout dev
 git pull
 cd gtars-python
 rm -f ../target/wheels/gtars-*.whl

From 8324650af623c238a872eaf2058b1d1d6eb4b06f Mon Sep 17 00:00:00 2001
From: nsheff <nsheff@users.noreply.github.com>
Date: Wed, 18 Mar 2026 09:27:22 -0400
Subject: [PATCH 23/31] major work on building stores, frontend

---
 README.md                                     |  45 +-
 data_loaders/demo_build_store.py              |   9 +-
 data_loaders/ref-genome-analysis/README.md    |  39 ++
 .../docs/missing_seqcolapi_collections.md     |  22 +
 .../docs/pephubclient-issues.md               |  47 ++
 .../ref-genome-analysis/env/mutagen-setup.sh  |   2 +-
 .../ref-genome-analysis/env/on-cluster.env    |  13 +
 .../ref-genome-analysis/env/remote-hpc.env    |  21 +-
 .../src/02_aliases/register_aliases.sbatch    |   2 +-
 .../src/02_build/build_digest_map.sbatch      |   2 +-
 .../src/05_profiling/profile_all.sbatch       |   2 +-
 .../src/05_profiling/profile_memory.sbatch    |   2 +-
 .../src/05_profiling/profile_newt.sbatch      |   2 +-
 .../src/05_profiling/profile_normal.sbatch    |   2 +-
 .../ref-genome-analysis/src/90_split_store.py |  27 +-
 .../src/90_split_store.sbatch                 |   2 +-
 .../src/backfill_sequence_aliases.py          | 208 +++++++
 .../src/backfill_sequence_aliases.sbatch      |  22 +
 .../src/examples/test_20_genomes.py           |   2 +-
 .../ref-genome-analysis/src/push_to_s3.sh     |  56 ++
 .../src/validate_split_stores.py              | 538 ++++++++++++++++++
 .../src/validate_split_stores.sbatch          |  16 +
 deployment/seqcolapi-store/production.env     |   2 +-
 deployment/seqcolapi-store/task_def.json      |   2 +-
 deployment/store_demo/store_demo.env          |   3 +
 deployment/store_demo_up.sh                   |  61 ++
 frontend/src/components/APINav.jsx            |  50 ++
 frontend/src/components/CompareTable.jsx      |   2 +-
 frontend/src/components/CopyableDigest.jsx    |  29 +
 frontend/src/components/ExplorerNav.jsx       |  37 ++
 frontend/src/components/SequenceTable.jsx     | 188 ++++++
 frontend/src/components/StoreNav.jsx          |   8 +-
 frontend/src/main.jsx                         | 305 ++++++----
 frontend/src/pages/APICollectionView.jsx      | 147 +++++
 frontend/src/pages/APICollections.jsx         |  99 ++++
 frontend/src/pages/APICompare.jsx             |  26 +
 frontend/src/pages/APICompliance.jsx          |  26 +
 frontend/src/pages/APIExplorer.jsx            | 115 ++++
 frontend/src/pages/Explorer.jsx               | 228 ++++++++
 frontend/src/pages/ExplorerAliases.jsx        | 172 ++++++
 frontend/src/pages/ExplorerCollection.jsx     | 334 +++++++++++
 frontend/src/pages/ExplorerSequences.jsx      | 152 +++++
 frontend/src/pages/LandingPage.jsx            | 231 ++++++++
 frontend/src/pages/StoreAliases.jsx           |   4 +-
 frontend/src/pages/StoreCollection.jsx        |   2 +-
 frontend/src/pages/StoreExplorer.jsx          |  34 +-
 frontend/src/pages/StoreOverview.jsx          |  14 +-
 frontend/src/pages/StoreSequences.jsx         |   2 +-
 frontend/src/services/fetchData.jsx           |  53 +-
 frontend/src/stores/apiExplorerStore.js       |  55 ++
 frontend/src/stores/unifiedStore.js           |  67 +++
 refget/__init__.py                            |   2 -
 refget/agents.py                              |   5 +
 refget/backend.py                             | 137 ++++-
 refget/cli/store.py                           |   2 +-
 refget/compliance.py                          |  53 +-
 refget/middleware.py                          |   2 +-
 refget/router.py                              | 107 ++--
 scripts/test-store-integration.sh             |  11 +
 seqcolapi/main.py                             |  59 +-
 test_fasta/pair_swap.rgsi                     |  11 +
 test_fasta/swap_wo_coords.rgsi                |  11 +
 tests/api/test_compliance.py                  |   8 +-
 tests/conftest.py                             |   2 +-
 tests/integration/conftest.py                 | 112 ++++
 tests/integration/test_store_compliance.py    | 102 ++++
 tests/local/test_backend.py                   |  14 +-
 67 files changed, 3870 insertions(+), 297 deletions(-)
 create mode 100644 data_loaders/ref-genome-analysis/docs/missing_seqcolapi_collections.md
 create mode 100644 data_loaders/ref-genome-analysis/docs/pephubclient-issues.md
 create mode 100644 data_loaders/ref-genome-analysis/src/backfill_sequence_aliases.py
 create mode 100644 data_loaders/ref-genome-analysis/src/backfill_sequence_aliases.sbatch
 create mode 100644 data_loaders/ref-genome-analysis/src/push_to_s3.sh
 create mode 100644 data_loaders/ref-genome-analysis/src/validate_split_stores.py
 create mode 100644 data_loaders/ref-genome-analysis/src/validate_split_stores.sbatch
 create mode 100644 deployment/store_demo/store_demo.env
 create mode 100755 deployment/store_demo_up.sh
 create mode 100644 frontend/src/components/APINav.jsx
 create mode 100644 frontend/src/components/CopyableDigest.jsx
 create mode 100644 frontend/src/components/ExplorerNav.jsx
 create mode 100644 frontend/src/components/SequenceTable.jsx
 create mode 100644 frontend/src/pages/APICollectionView.jsx
 create mode 100644 frontend/src/pages/APICollections.jsx
 create mode 100644 frontend/src/pages/APICompare.jsx
 create mode 100644 frontend/src/pages/APICompliance.jsx
 create mode 100644 frontend/src/pages/APIExplorer.jsx
 create mode 100644 frontend/src/pages/Explorer.jsx
 create mode 100644 frontend/src/pages/ExplorerAliases.jsx
 create mode 100644 frontend/src/pages/ExplorerCollection.jsx
 create mode 100644 frontend/src/pages/ExplorerSequences.jsx
 create mode 100644 frontend/src/pages/LandingPage.jsx
 create mode 100644 frontend/src/stores/apiExplorerStore.js
 create mode 100644 frontend/src/stores/unifiedStore.js
 create mode 100755 scripts/test-store-integration.sh
 create mode 100644 test_fasta/pair_swap.rgsi
 create mode 100644 test_fasta/swap_wo_coords.rgsi
 create mode 100644 tests/integration/test_store_compliance.py

diff --git a/README.md b/README.md
index dec9086..c595362 100644
--- a/README.md
+++ b/README.md
@@ -43,9 +43,48 @@ This starts the test database, runs tests, and cleans up automatically.
 
 ## Development and deployment: Backend
 
-### Easy-peasy way
+### Store-backed (no database)
 
-In a moment I'll show you how to do these steps individually, but if you're in a hurry, the easy way get a development API running for testing is to just use my very simple shell script like this (no data persistence, just loads demo data):
+The store-backed seqcolapi uses a RefgetStore (local files) instead of PostgreSQL. This is the simplest way to run the API:
+
+#### Quick start
+
+```console
+bash deployment/store_demo_up.sh
+```
+
+This will:
+- Build a local RefgetStore from test FASTA files
+- Run the store-backed seqcolapi with uvicorn
+- Block the terminal until you press Ctrl+C, which cleans up
+
+No Docker or database required.
+
+#### Step-by-step
+
+1. Build a store from FASTA files:
+
+```console
+python data_loaders/demo_build_store.py test_fasta /tmp/refget_demo_store
+```
+
+2. Start the store-backed API:
+
+```console
+REFGET_STORE_PATH=/tmp/refget_demo_store uvicorn seqcolapi.main:store_app --reload --port 8100
+```
+
+#### Remote store
+
+To run against a remote (S3) store:
+
+```console
+REFGET_STORE_URL=https://example.com/store uvicorn seqcolapi.main:store_app --port 8100
+```
+
+### DB-backed (PostgreSQL)
+
+If you need a database-backed instance (e.g., for mutable data, advanced queries), use the DB-backed workflow. In a moment I'll show you how to do these steps individually, but if you're in a hurry, the easy way to get a development API running for testing is to just use my very simple shell script like this (no data persistence, just loads demo data):
 
 ```console
 bash deployment/demo_up.sh
@@ -58,7 +97,7 @@ This will:
 - load up the demo data
 - block the terminal until you press Ctrl+C, which will shut down all services.
 
-### Step-by-step process
+### Step-by-step process (DB-backed)
 
 Alternatively, if you want to run each step separately to see what's really going on, start here.
 
diff --git a/data_loaders/demo_build_store.py b/data_loaders/demo_build_store.py
index 39ae6c0..4fa1669 100644
--- a/data_loaders/demo_build_store.py
+++ b/data_loaders/demo_build_store.py
@@ -38,7 +38,14 @@ def main():
     store = RefgetStore.on_disk(store_path)
 
     for fasta in fasta_files:
-        store.add_sequence_collection_from_fasta(fasta)
+        result = store.add_sequence_collection_from_fasta(fasta)
+        # Register the filename (without extension) as a collection alias
+        basename = os.path.basename(fasta)
+        name = basename.split(".")[0]  # strip .fa, .fasta, .fa.gz, etc.
+        meta = result[0] if isinstance(result, tuple) else result
+        if meta:
+            store.add_collection_alias("fasta_filename", name, meta.digest)
+            print(f"  {name} → {meta.digest}")
 
     print(f"Done. Store at: {store_path}")
     print(f"Stats: {store.stats()}")
diff --git a/data_loaders/ref-genome-analysis/README.md b/data_loaders/ref-genome-analysis/README.md
index 1ccf999..a93b618 100644
--- a/data_loaders/ref-genome-analysis/README.md
+++ b/data_loaders/ref-genome-analysis/README.md
@@ -27,6 +27,9 @@ inventory --> build --> aliases --> fhr --> verify
 | **verify** | `src/04_verify/` | Automated pass/fail checks against the store |
 | **profiling** | `src/05_profiling/` | Memory and timing benchmarks |
 | **split** | `src/90_split_store.py` | Split combined store into VGP and reference genome stores |
+| **backfill** | `src/backfill_sequence_aliases.py` | Re-register aliases into split stores from NCBI alias table |
+| **validate** | `src/validate_split_stores.py` | Validate split stores (counts, aliases, FHR, sequences, cross-store) |
+| **push** | `src/push_to_s3.sh` | Push split stores to S3 (`s3://refgenie/`) |
 | **examples** | `src/examples/` | End-to-end test scripts (e.g., load 20 genomes with FHR) |
 
 ## Environment variables
@@ -59,4 +62,40 @@ python src/03_fhr/load_fhr_metadata.py --store-path $STORE_PATH --fhr-dir $STAGI
 
 # 4. Verify
 python src/04_verify/verify_refgetstore.py
+
+# 5. Split into VGP + ref stores
+sbatch src/90_split_store.sbatch
+
+# 6. Backfill aliases into split stores
+python src/backfill_sequence_aliases.py --target $BRICK_ROOT/vgp_reference_store
+python src/backfill_sequence_aliases.py --target $BRICK_ROOT/refgenome_jungle_store
+
+# 7. Validate split stores
+sbatch src/validate_split_stores.sbatch
+
+# 8. Push to S3 (requires GPG agent forwarding: ssh riva1_gpg)
+bash src/push_to_s3.sh both
+```
+
+## S3 deployment
+
+Requires GPG agent forwarding for `pass` credentials (see `ssh riva1_gpg` in SSH config).
+
+```bash
+ssh riva1_gpg
+cd code/ref-genome-analysis
+source env/on-cluster.env
+bash src/push_to_s3.sh vgp   # or: ref, both, "vgp --dry-run"
+```
+
+Stores are pushed to `s3://refgenie/refget-store/vgp` and `s3://refgenie/refget-store/jungle`.
+
+To load from S3:
+
+```python
+from refget.store import RefgetStore
+store = RefgetStore.open_remote(
+    "~/.refget/vgp_cache",
+    "https://refgenie.s3.us-east-1.amazonaws.com/refget-store/vgp"
+)
 ```
diff --git a/data_loaders/ref-genome-analysis/docs/missing_seqcolapi_collections.md b/data_loaders/ref-genome-analysis/docs/missing_seqcolapi_collections.md
new file mode 100644
index 0000000..3ba8d34
--- /dev/null
+++ b/data_loaders/ref-genome-analysis/docs/missing_seqcolapi_collections.md
@@ -0,0 +1,22 @@
+# Missing seqcolapi collections
+
+8 collections hosted on seqcolapi.databio.org are not in any RefgetStore.
+These were loaded into the PostgreSQL-backed seqcolapi directly from
+`fasta/pangenome_reference/` FASTAs that weren't included in the combined store build.
+
+## TODO
+
+Load these into the jungle store. 7 of 8 are confirmed in `$BRICK_ROOT/fasta/pangenome_reference/`:
+
+| Digest | Seqs | FASTA |
+|---|---|---|
+| `2WhejNO718T5jvB4DVTAz-A_JF03iIkz` | 25 | `GCA_009914755.4_CHM13_T2T_v2.0_genomic.fna.gz` |
+| `6DfkalgYxFZiYAKpJf19dbpnS-dGzi4m` | 24 | `chm13.draft_v1.1.fasta.gz` |
+| `Hve5dblWYLxu1p9Cp930NB8twHGCsf6X` | 640 | `GCA_000001405.28_GRCh38.p13_genomic.fa.gz` |
+| `VDUOdAUYpXHUhvU-MNmOTgYQAl67yRMs` | 445 | `Homo_sapiens.GRCh38.dna.alt.fa.gz` |
+| `WwIG41XDzO0BTmEpzT7nPXv6Dfx7h4ju` | 1 | `CM000663.2.fasta.gz` |
+| `awlJ5Q7EPDVlwXWH8LPN93oJ5jY2uajW` | 24 | `T2T-CHM13v2.0.unmasked.fa.gz` |
+| `qJ79liNTAD-LShR3j_2xntOEt-eC3vhM` | 639 | `Homo_sapiens.GRCh38.dna.toplevel.fa.gz` |
+| `gHcfbUVnFzHv3QSqz2sSqVHdUQbDO8N5` | 3366 | Not in pangenome_reference. Likely `GRCh38_full_analysis_set_plus_decoy_hla.fa.gz` from `fasta/jungle/homo_sapiens/` |
+
+These are needed for seqcol compliance testing since they're currently served by the API.
diff --git a/data_loaders/ref-genome-analysis/docs/pephubclient-issues.md b/data_loaders/ref-genome-analysis/docs/pephubclient-issues.md
new file mode 100644
index 0000000..bdeb978
--- /dev/null
+++ b/data_loaders/ref-genome-analysis/docs/pephubclient-issues.md
@@ -0,0 +1,47 @@
+# PEPhub Client: Issues Encountered
+
+## 1. `--force` doesn't update samples on existing projects
+
+**Problem:** `phc push --force` and `phc.upload(force=True)` return success (202) but silently fail to update the sample table when the project already exists. The config/metadata may update, but samples remain unchanged.
+
+**Workaround:** Delete the project first, then push fresh:
+
+```python
+import requests
+from pephubclient import PEPHubClient
+
+phc = PEPHubClient()
+jwt = phc._PEPHubClient__jwt_data
+headers = {"Authorization": f"Bearer {jwt}"}
+
+requests.delete(
+    "https://pephub-api.databio.org/api/v1/projects/NAMESPACE/PROJECT",
+    params={"tag": "TAG"},
+    headers=headers,
+)
+```
+
+Then push normally with `phc push`.
+
+## 2. Bare CSV push fails with 400
+
+**Problem:** The CLI help says `CFG` accepts "Project config file (YAML) or sample table (CSV/TSV)", but pushing a bare CSV fails with `Unexpected Response Error. 400`.
+
+**Workaround:** Always push a YAML config that references the CSV:
+
+```yaml
+# project_config.yaml
+pep_version: "2.1.0"
+sample_table: samples.csv
+name: my_project
+```
+
+```bash
+phc push --namespace NS --name NAME --tag TAG project_config.yaml
+```
+
+## 3. `phc.upload()` with peppy Project reports success but uploads empty samples
+
+**Problem:** Loading a project with `phc.load_project()`, modifying `sample_table` in-place, then calling `phc.upload()` reports success but the server receives no samples. The `project.to_dict()` output is correct (verified locally), so the issue is server-side.
+
+**Workaround:** Write the modified sample table to a CSV, create a YAML config referencing it, and use `phc push` with the YAML.
diff --git a/data_loaders/ref-genome-analysis/env/mutagen-setup.sh b/data_loaders/ref-genome-analysis/env/mutagen-setup.sh
index e17922f..8a147eb 100755
--- a/data_loaders/ref-genome-analysis/env/mutagen-setup.sh
+++ b/data_loaders/ref-genome-analysis/env/mutagen-setup.sh
@@ -40,7 +40,7 @@ if [ -n "$DEPLOY_HOST" ] && [ -n "$DEPLOY_DIR" ]; then
   fi
 
   # refget — local source synced to remote deploy dir
-  REFGET_LOCAL="$HOME/Dropbox/workspaces/intervals/repos/refget"
+  REFGET_LOCAL="$HOME/Dropbox/workspaces/refgenie/repos/refget"
   if [ -d "$REFGET_LOCAL" ]; then
     mutagen sync create \
       --name="deploy-refget" \
diff --git a/data_loaders/ref-genome-analysis/env/on-cluster.env b/data_loaders/ref-genome-analysis/env/on-cluster.env
index 167b5e4..f00236d 100644
--- a/data_loaders/ref-genome-analysis/env/on-cluster.env
+++ b/data_loaders/ref-genome-analysis/env/on-cluster.env
@@ -4,3 +4,16 @@ export BRICK_ROOT=$BRICKYARD/datasets_downloaded/refgenomes_fasta
 export STORE_PATH=$BRICK_ROOT/refget_store
 export STAGING=$BRICK_ROOT/refget_staging
 export INVENTORY_CSV=$BRICK_ROOT/refgenomes_inventory.csv
+export S3_BUCKET=s3://refgenie
+
+# vgp store
+export VGP_STORE_PATH=$BRICK_ROOT/refget-store/vgp
+export VGP_S3_PATH=$S3_BUCKET/refget-store/vgp
+
+# jungle store
+export REF_STORE_PATH=$BRICK_ROOT/refget-store/jungle
+export REF_S3_PATH=$S3_BUCKET/refget-store/jungle
+
+# pangenome store
+export PANGENOME_STORE_PATH=$BRICK_ROOT/refget-store/pangenome
+export PANGENOME_S3_PATH=$S3_BUCKET/refget-store/pangenome
diff --git a/data_loaders/ref-genome-analysis/env/remote-hpc.env b/data_loaders/ref-genome-analysis/env/remote-hpc.env
index 6504138..3ec463f 100644
--- a/data_loaders/ref-genome-analysis/env/remote-hpc.env
+++ b/data_loaders/ref-genome-analysis/env/remote-hpc.env
@@ -4,6 +4,21 @@ export BRICK_ROOT=$BRICKYARD/datasets_downloaded/refgenomes_fasta
 export STORE_PATH=$BRICK_ROOT/refget_store
 export STAGING=$BRICK_ROOT/refget_staging
 export INVENTORY_CSV=$BRICK_ROOT/refgenomes_inventory.csv
-export SYNC_REMOTE=ns5bc@login.hpc.virginia.edu:/home/ns5bc/code/ref-genome-analysis
-export DEPLOY_HOST=ns5bc@login.hpc.virginia.edu
-export DEPLOY_DIR=/home/ns5bc/deploy
+export S3_BUCKET=s3://refgenie
+
+# vgp store
+export VGP_STORE_PATH=$BRICK_ROOT/refget-store/vgp
+export VGP_S3_PATH=$S3_BUCKET/refget-store/vgp
+
+# jungle store
+export REF_STORE_PATH=$BRICK_ROOT/refget-store/jungle
+export REF_S3_PATH=$S3_BUCKET/refget-store/jungle
+
+# pangenome store
+export PANGENOME_STORE_PATH=$BRICK_ROOT/refget-store/pangenome
+export PANGENOME_S3_PATH=$S3_BUCKET/refget-store/pangenome
+
+# remote deployment
+export SYNC_REMOTE=riva:~/code/ref-genome-analysis
+export DEPLOY_HOST=riva
+export DEPLOY_DIR=~/deploy
diff --git a/data_loaders/ref-genome-analysis/src/02_aliases/register_aliases.sbatch b/data_loaders/ref-genome-analysis/src/02_aliases/register_aliases.sbatch
index dc50581..6b831e2 100644
--- a/data_loaders/ref-genome-analysis/src/02_aliases/register_aliases.sbatch
+++ b/data_loaders/ref-genome-analysis/src/02_aliases/register_aliases.sbatch
@@ -10,7 +10,7 @@
 
 module load miniforge/24.3.0-py3.11
 
-cd /home/ns5bc/code/ref-genome-analysis
+cd $HOME/code/ref-genome-analysis
 source env/on-cluster.env
 
 python src/02_aliases/register_ncbi_aliases.py
diff --git a/data_loaders/ref-genome-analysis/src/02_build/build_digest_map.sbatch b/data_loaders/ref-genome-analysis/src/02_build/build_digest_map.sbatch
index d4c042b..523f1f7 100644
--- a/data_loaders/ref-genome-analysis/src/02_build/build_digest_map.sbatch
+++ b/data_loaders/ref-genome-analysis/src/02_build/build_digest_map.sbatch
@@ -10,7 +10,7 @@
 
 module load miniforge/24.3.0-py3.11
 
-cd /home/ns5bc/code/ref-genome-analysis
+cd $HOME/code/ref-genome-analysis
 source env/on-cluster.env
 
 python src/02_build/build_digest_map.py
diff --git a/data_loaders/ref-genome-analysis/src/05_profiling/profile_all.sbatch b/data_loaders/ref-genome-analysis/src/05_profiling/profile_all.sbatch
index d96a2c0..5d9e57e 100644
--- a/data_loaders/ref-genome-analysis/src/05_profiling/profile_all.sbatch
+++ b/data_loaders/ref-genome-analysis/src/05_profiling/profile_all.sbatch
@@ -10,7 +10,7 @@
 
 module load miniforge/24.3.0-py3.11
 
-cd /home/ns5bc/code/ref-genome-analysis
+cd $HOME/code/ref-genome-analysis
 source env/on-cluster.env
 
 python src/05_profiling/profile_all.py
diff --git a/data_loaders/ref-genome-analysis/src/05_profiling/profile_memory.sbatch b/data_loaders/ref-genome-analysis/src/05_profiling/profile_memory.sbatch
index 0e70a7b..2eee915 100644
--- a/data_loaders/ref-genome-analysis/src/05_profiling/profile_memory.sbatch
+++ b/data_loaders/ref-genome-analysis/src/05_profiling/profile_memory.sbatch
@@ -10,7 +10,7 @@
 
 module load miniforge/24.3.0-py3.11
 
-cd /home/ns5bc/code/ref-genome-analysis
+cd $HOME/code/ref-genome-analysis
 source env/on-cluster.env
 
 python src/05_profiling/profile_memory.py 850 5
diff --git a/data_loaders/ref-genome-analysis/src/05_profiling/profile_newt.sbatch b/data_loaders/ref-genome-analysis/src/05_profiling/profile_newt.sbatch
index ee37f56..3db0284 100644
--- a/data_loaders/ref-genome-analysis/src/05_profiling/profile_newt.sbatch
+++ b/data_loaders/ref-genome-analysis/src/05_profiling/profile_newt.sbatch
@@ -10,7 +10,7 @@
 
 module load miniforge/24.3.0-py3.11
 
-cd /home/ns5bc/code/ref-genome-analysis
+cd $HOME/code/ref-genome-analysis
 source env/on-cluster.env
 
 python src/05_profiling/profile_newt.py
diff --git a/data_loaders/ref-genome-analysis/src/05_profiling/profile_normal.sbatch b/data_loaders/ref-genome-analysis/src/05_profiling/profile_normal.sbatch
index dd7eb04..c5ba2ca 100644
--- a/data_loaders/ref-genome-analysis/src/05_profiling/profile_normal.sbatch
+++ b/data_loaders/ref-genome-analysis/src/05_profiling/profile_normal.sbatch
@@ -10,7 +10,7 @@
 
 module load miniforge/24.3.0-py3.11
 
-cd /home/ns5bc/code/ref-genome-analysis
+cd $HOME/code/ref-genome-analysis
 source env/on-cluster.env
 
 python src/05_profiling/profile_normal.py
diff --git a/data_loaders/ref-genome-analysis/src/90_split_store.py b/data_loaders/ref-genome-analysis/src/90_split_store.py
index 86a4d0c..7f87cdd 100644
--- a/data_loaders/ref-genome-analysis/src/90_split_store.py
+++ b/data_loaders/ref-genome-analysis/src/90_split_store.py
@@ -29,6 +29,17 @@
 VGP_GROUPS = {"vertebrates"}
 
 
+def _paginate(store):
+    """Yield pages of collection results from a store."""
+    page = 0
+    while True:
+        result = store.list_collections(page, 1000)
+        yield result["results"]
+        if len(result["results"]) < 1000:
+            break
+        page += 1
+
+
 def load_digest_map(digest_map_path: str) -> dict[str, set[str]]:
     """Read digest_map.csv and return group -> set of digests."""
     groups: dict[str, set[str]] = {}
@@ -99,20 +110,24 @@ def split_store(
     # Import VGP collections
     print(f"\nCreating VGP store: {vgp_output}")
     vgp_store = RefgetStore.on_disk(vgp_output)
-    print(f"Importing {len(vgp_in_store)} VGP collections...")
+    existing_vgp = {c.digest for p in _paginate(vgp_store) for c in p}
+    to_import_vgp = sorted(vgp_in_store - existing_vgp)
+    print(f"VGP: {len(vgp_in_store)} total, {len(existing_vgp)} already imported, {len(to_import_vgp)} remaining")
     t0 = time.time()
-    for i, digest in enumerate(sorted(vgp_in_store), 1):
-        print(f"  [{i}/{len(vgp_in_store)}] {digest}")
+    for i, digest in enumerate(to_import_vgp, 1):
+        print(f"  [{i}/{len(to_import_vgp)}] {digest}")
         vgp_store.import_collection(source, digest)
     print(f"VGP import done in {time.time() - t0:.1f}s")
 
     # Import ref collections
     print(f"\nCreating ref store: {ref_output}")
     ref_store = RefgetStore.on_disk(ref_output)
-    print(f"Importing {len(ref_in_store)} ref genome collections...")
+    existing_ref = {c.digest for p in _paginate(ref_store) for c in p}
+    to_import_ref = sorted(ref_in_store - existing_ref)
+    print(f"Ref: {len(ref_in_store)} total, {len(existing_ref)} already imported, {len(to_import_ref)} remaining")
     t0 = time.time()
-    for i, digest in enumerate(sorted(ref_in_store), 1):
-        print(f"  [{i}/{len(ref_in_store)}] {digest}")
+    for i, digest in enumerate(to_import_ref, 1):
+        print(f"  [{i}/{len(to_import_ref)}] {digest}")
         ref_store.import_collection(source, digest)
     print(f"Ref import done in {time.time() - t0:.1f}s")
 
diff --git a/data_loaders/ref-genome-analysis/src/90_split_store.sbatch b/data_loaders/ref-genome-analysis/src/90_split_store.sbatch
index 83e19e4..eb83e26 100644
--- a/data_loaders/ref-genome-analysis/src/90_split_store.sbatch
+++ b/data_loaders/ref-genome-analysis/src/90_split_store.sbatch
@@ -10,7 +10,7 @@
 
 module load miniforge/24.3.0-py3.11
 
-cd /home/ns5bc/code/ref-genome-analysis
+cd $HOME/code/ref-genome-analysis
 source env/on-cluster.env
 
 python src/90_split_store.py
diff --git a/data_loaders/ref-genome-analysis/src/backfill_sequence_aliases.py b/data_loaders/ref-genome-analysis/src/backfill_sequence_aliases.py
new file mode 100644
index 0000000..0b95b75
--- /dev/null
+++ b/data_loaders/ref-genome-analysis/src/backfill_sequence_aliases.py
@@ -0,0 +1,208 @@
+#!/usr/bin/env python3
+"""
+Backfill sequence and collection aliases into a split store.
+
+Matches accessions to target store collections via digest_map (path join),
+then registers aliases from the NCBI alias table by matching sequence names
+in level2 data. Does NOT load any FASTAs — read-only against the target store
+except for alias registration.
+
+Usage:
+    source env/on-cluster.env
+    python src/backfill_sequence_aliases.py --target $VGP_STORE_PATH
+    python src/backfill_sequence_aliases.py --target $REF_STORE_PATH
+    python src/backfill_sequence_aliases.py --target $VGP_STORE_PATH --dry-run
+"""
+
+import argparse
+import csv
+import os
+import tempfile
+import time
+from collections import defaultdict
+
+BRICK_ROOT = os.environ["BRICK_ROOT"]
+STAGING = os.environ.get("STAGING", os.path.join(BRICK_ROOT, "refget_staging"))
+INVENTORY_CSV = os.environ.get("INVENTORY_CSV", os.path.join(BRICK_ROOT, "refgenomes_inventory.csv"))
+ALIAS_TABLE_CSV = os.path.join(STAGING, "ncbi_alias_table.csv")
+DIGEST_MAP_CSV = os.path.join(STAGING, "digest_map.csv")
+
+
+def get_all_collection_digests(store):
+    digests = set()
+    page = 0
+    while True:
+        result = store.list_collections(page, 1000)
+        for c in result["results"]:
+            digests.add(c.digest)
+        if len(result["results"]) < 1000:
+            break
+        page += 1
+    return digests
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Backfill aliases into a split store from NCBI alias table."
+    )
+    parser.add_argument("--target", required=True, help="Target RefgetStore path")
+    parser.add_argument("--alias-table", default=ALIAS_TABLE_CSV)
+    parser.add_argument("--inventory", default=INVENTORY_CSV)
+    parser.add_argument("--digest-map", default=DIGEST_MAP_CSV)
+    parser.add_argument("--dry-run", action="store_true")
+    args = parser.parse_args()
+
+    from refget.store import RefgetStore
+
+    print(f"Target store: {args.target}")
+    print(f"Alias table:  {args.alias_table}")
+    print(f"Inventory:    {args.inventory}")
+    print(f"Digest map:   {args.digest_map}")
+    print(f"Dry run:      {args.dry_run}")
+    print()
+
+    # Open target store (read-only for collection lookup, then alias writes)
+    store = RefgetStore.open_local(args.target)
+    target_digests = get_all_collection_digests(store)
+    print(f"Target has {len(target_digests)} collections")
+
+    # Build path -> accession from inventory
+    path_to_accession = {}
+    with open(args.inventory, newline="") as f:
+        for row in csv.DictReader(f):
+            acc = row.get("accession", "").strip()
+            path = row.get("path", "").strip()
+            if acc and path:
+                path_to_accession[path] = acc
+
+    # Build digest -> accession via digest_map (join on path)
+    digest_to_accession = {}
+    with open(args.digest_map, newline="") as f:
+        for row in csv.DictReader(f):
+            digest = row.get("digest", "").strip()
+            path = row.get("path", "").strip()
+            if digest and path and path in path_to_accession:
+                digest_to_accession[digest] = path_to_accession[path]
+
+    # Filter to accessions whose digest is in the target store
+    target_acc_to_digest = {}
+    for digest in target_digests:
+        acc = digest_to_accession.get(digest)
+        if acc:
+            target_acc_to_digest[acc] = digest
+
+    print(f"Accessions in target with alias data: {len(target_acc_to_digest)}")
+
+    # Read alias table, filtered to target accessions
+    acc_to_rows = defaultdict(list)
+    with open(args.alias_table, newline="") as f:
+        for row in csv.DictReader(f):
+            acc = row.get("accession", "").strip()
+            if acc and acc in target_acc_to_digest:
+                acc_to_rows[acc].append(row)
+
+    common = sorted(target_acc_to_digest.keys() & acc_to_rows.keys())
+    print(f"Accessions with alias table entries: {len(common)}")
+
+    # Re-open as on_disk for writing aliases
+    store = RefgetStore.on_disk(args.target)
+    store.set_quiet(True)
+
+    seq_aliases = {"refseq": [], "insdc": [], "ucsc": []}
+    coll_aliases = {"refseq": [], "insdc": []}
+    n_matched = 0
+    n_unmatched = 0
+    t_start = time.time()
+
+    for i, accession in enumerate(common, 1):
+        coll_digest = target_acc_to_digest[accession]
+        alias_rows = acc_to_rows[accession]
+
+        print(f"[{i}/{len(common)}] {accession} ({len(alias_rows)} seqs)...", end=" ", flush=True)
+
+        # Collection-level aliases
+        first_row = alias_rows[0]
+        genbank_acc = first_row.get("genbank_assembly_accn", "").strip()
+        refseq_acc = first_row.get("refseq_assembly_accn", "").strip()
+        if refseq_acc:
+            coll_aliases["refseq"].append((refseq_acc, coll_digest))
+        if genbank_acc:
+            coll_aliases["insdc"].append((genbank_acc, coll_digest))
+
+        # Sequence-level aliases via name matching in level2
+        level2 = store.get_collection_level2(coll_digest)
+        names = level2.get("names", [])
+        lengths = level2.get("lengths", [])
+        sequences = level2.get("sequences", [])
+        name_to_info = {n: (s, int(l)) for n, l, s in zip(names, lengths, sequences)}
+
+        matched_this = 0
+        for row in alias_rows:
+            seq_name = row.get("sequence_name", "").strip()
+            seq_length_str = row.get("sequence_length", "").strip()
+            refseq_accn = row.get("refseq_accn", "").strip()
+            genbank_accn = row.get("genbank_accn", "").strip()
+            ucsc_name = row.get("ucsc_name", "").strip()
+            seq_length = int(seq_length_str) if seq_length_str else None
+
+            seq_digest = None
+            for candidate in [seq_name, refseq_accn, genbank_accn, ucsc_name]:
+                if candidate and candidate in name_to_info:
+                    sd, sl = name_to_info[candidate]
+                    if seq_length is None or sl == seq_length:
+                        seq_digest = sd
+                        break
+
+            if seq_digest is None:
+                n_unmatched += 1
+                continue
+
+            matched_this += 1
+            if refseq_accn:
+                seq_aliases["refseq"].append((refseq_accn, seq_digest))
+            if genbank_accn:
+                seq_aliases["insdc"].append((genbank_accn, seq_digest))
+            if ucsc_name:
+                seq_aliases["ucsc"].append((ucsc_name, seq_digest))
+
+        n_matched += matched_this
+        print(f"{matched_this}/{len(alias_rows)} matched")
+
+    elapsed = time.time() - t_start
+    n_seq = sum(len(v) for v in seq_aliases.values())
+    n_coll = sum(len(v) for v in coll_aliases.values())
+    print(f"\nMatching done in {elapsed:.1f}s")
+    print(f"  Matched: {n_matched}, unmatched: {n_unmatched}")
+    print(f"  Seq aliases: {n_seq}, coll aliases: {n_coll}")
+
+    if args.dry_run:
+        print("\n[DRY RUN] Skipping registration.")
+        return
+
+    print("\nRegistering aliases...")
+    with tempfile.TemporaryDirectory() as tmpdir:
+        for ns, pairs in seq_aliases.items():
+            if not pairs:
+                continue
+            tsv = os.path.join(tmpdir, f"seq_{ns}.tsv")
+            with open(tsv, "w") as f:
+                for alias, digest in pairs:
+                    f.write(f"{alias}\t{digest}\n")
+            n = store.load_sequence_aliases(ns, tsv)
+            print(f"  sequences/{ns}: {n} aliases loaded")
+
+        for ns, pairs in coll_aliases.items():
+            if not pairs:
+                continue
+            tsv = os.path.join(tmpdir, f"coll_{ns}.tsv")
+            with open(tsv, "w") as f:
+                for alias, digest in pairs:
+                    f.write(f"{alias}\t{digest}\n")
+            n = store.load_collection_aliases(ns, tsv)
+            print(f"  collections/{ns}: {n} aliases loaded")
+
+    print(f"\nDone! Store stats: {store.stats()}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/data_loaders/ref-genome-analysis/src/backfill_sequence_aliases.sbatch b/data_loaders/ref-genome-analysis/src/backfill_sequence_aliases.sbatch
new file mode 100644
index 0000000..c6c976d
--- /dev/null
+++ b/data_loaders/ref-genome-analysis/src/backfill_sequence_aliases.sbatch
@@ -0,0 +1,22 @@
+#!/bin/bash
+#SBATCH --job-name=backfill_aliases
+#SBATCH --output=backfill_aliases_%j.log
+#SBATCH --error=backfill_aliases_%j.log
+#SBATCH --partition=standard
+#SBATCH --time=2:00:00
+#SBATCH --mem=16G
+#SBATCH --cpus-per-task=1
+#SBATCH --account=shefflab
+
+module load miniforge/24.3.0-py3.11
+
+cd $HOME/code/ref-genome-analysis
+source env/on-cluster.env
+
+# Backfill VGP store
+python src/backfill_sequence_aliases.py --target $BRICK_ROOT/vgp_reference_store
+
+# Backfill ref store (if it exists)
+if [ -d "$BRICK_ROOT/refgenome_jungle_store" ]; then
+    python src/backfill_sequence_aliases.py --target $BRICK_ROOT/refgenome_jungle_store
+fi
diff --git a/data_loaders/ref-genome-analysis/src/examples/test_20_genomes.py b/data_loaders/ref-genome-analysis/src/examples/test_20_genomes.py
index 3b0eda7..3937965 100644
--- a/data_loaders/ref-genome-analysis/src/examples/test_20_genomes.py
+++ b/data_loaders/ref-genome-analysis/src/examples/test_20_genomes.py
@@ -20,7 +20,7 @@
 INVENTORY_CSV = os.environ.get("INVENTORY_CSV", f"{BRICK_ROOT}/refgenomes_inventory.csv")
 STAGING = os.environ.get("STAGING", f"{BRICK_ROOT}/refget_staging")
 FHR_DIR = f"{STAGING}/fhr_metadata"
-STORE_PATH = os.environ.get("STORE_PATH", "/scratch/ns5bc/test_refget_store_20")
+STORE_PATH = os.environ.get("STORE_PATH", "/scratch/$USER/test_refget_store_20")
 
 
 def main():
diff --git a/data_loaders/ref-genome-analysis/src/push_to_s3.sh b/data_loaders/ref-genome-analysis/src/push_to_s3.sh
new file mode 100644
index 0000000..920ccff
--- /dev/null
+++ b/data_loaders/ref-genome-analysis/src/push_to_s3.sh
@@ -0,0 +1,56 @@
+#!/bin/bash
+# Push refget stores to S3 via Rivanna.
+#
+# Clears stale GPG socket, connects with agent forwarding, decrypts
+# credentials, and runs aws s3 sync.
+#
+# Usage (from laptop):
+#   source env/remote-hpc.env
+#   bash src/push_to_s3.sh vgp
+#   bash src/push_to_s3.sh ref
+#   bash src/push_to_s3.sh pangenome
+#   bash src/push_to_s3.sh all
+#   bash src/push_to_s3.sh vgp --dry-run
+
+set -euo pipefail
+
+STORE=${1:-all}
+DRYRUN_FLAG="${2:-}"
+
+: "${VGP_STORE_PATH:?Set VGP_STORE_PATH in env}"
+: "${REF_STORE_PATH:?Set REF_STORE_PATH in env}"
+: "${PANGENOME_STORE_PATH:?Set PANGENOME_STORE_PATH in env}"
+: "${VGP_S3_PATH:?Set VGP_S3_PATH in env}"
+: "${REF_S3_PATH:?Set REF_S3_PATH in env}"
+: "${PANGENOME_S3_PATH:?Set PANGENOME_S3_PATH in env}"
+
+# Clear stale GPG socket, then connect with forwarding
+ssh riva1 "rm -f /run/user/\$(id -u)/gnupg/S.gpg-agent"
+
+ssh riva1_gpg "
+  source /etc/profile.d/modules.sh
+  module load awscli
+
+  export AWS_ACCESS_KEY_ID=\$(pass databio/refgenie/s3_access_key_id)
+  export AWS_SECRET_ACCESS_KEY=\$(pass databio/refgenie/s3_secret_access_key)
+
+  if [ \"$STORE\" = \"vgp\" ] || [ \"$STORE\" = \"all\" ]; then
+    echo 'Pushing VGP store to $VGP_S3_PATH ...'
+    aws s3 sync '$VGP_STORE_PATH' '$VGP_S3_PATH' $DRYRUN_FLAG
+    echo 'VGP push complete.'
+  fi
+
+  if [ \"$STORE\" = \"ref\" ] || [ \"$STORE\" = \"all\" ]; then
+    echo 'Pushing ref store to $REF_S3_PATH ...'
+    aws s3 sync '$REF_STORE_PATH' '$REF_S3_PATH' $DRYRUN_FLAG
+    echo 'Ref push complete.'
+  fi
+
+  if [ \"$STORE\" = \"pangenome\" ] || [ \"$STORE\" = \"all\" ]; then
+    echo 'Pushing pangenome store to $PANGENOME_S3_PATH ...'
+    aws s3 sync '$PANGENOME_STORE_PATH' '$PANGENOME_S3_PATH' $DRYRUN_FLAG
+    echo 'Pangenome push complete.'
+  fi
+
+  echo 'Done!'
+"
diff --git a/data_loaders/ref-genome-analysis/src/validate_split_stores.py b/data_loaders/ref-genome-analysis/src/validate_split_stores.py
new file mode 100644
index 0000000..085d7f4
--- /dev/null
+++ b/data_loaders/ref-genome-analysis/src/validate_split_stores.py
@@ -0,0 +1,538 @@
+#!/usr/bin/env python3
+"""
+Validate the VGP and ref genome stores produced by 90_split_store.py.
+
+Checks that the split stores are complete, internally consistent, and
+that every collection from the source store ended up in exactly one
+output store.
+
+Usage:
+    source env/on-cluster.env
+    python src/validate_split_stores.py                  # validate both
+    python src/validate_split_stores.py --store vgp      # VGP only
+    python src/validate_split_stores.py --store ref      # ref only
+    python src/validate_split_stores.py --thorough       # deep checks (slow)
+"""
+
+import argparse
+import csv
+import json
+import os
+import sys
+import tempfile
+import time
+
+BRICK_ROOT = os.environ["BRICK_ROOT"]
+STAGING = os.environ.get("STAGING", os.path.join(BRICK_ROOT, "refget_staging"))
+SOURCE_PATH = os.environ.get("STORE_PATH", os.path.join(BRICK_ROOT, "refget_store"))
+VGP_PATH = os.path.join(BRICK_ROOT, "vgp_reference_store")
+REF_PATH = os.path.join(BRICK_ROOT, "refgenome_jungle_store")
+DIGEST_MAP = os.path.join(STAGING, "digest_map.csv")
+
+VGP_GROUPS = {"vertebrates"}
+
+results = []
+
+
+def check(name, passed, detail=""):
+    status = "PASS" if passed else "FAIL"
+    results.append({"name": name, "status": status, "detail": detail})
+    print(f"  [{'PASS' if passed else 'FAIL'}] {name}" + (f" -- {detail}" if detail else ""))
+
+
+def load_digest_map(path):
+    """Return (group->set_of_digests, all_rows)."""
+    groups = {}
+    rows = []
+    with open(path) as f:
+        for row in csv.DictReader(f):
+            rows.append(row)
+            digest = row.get("digest", "").strip()
+            group = row.get("group", "unknown").strip()
+            if digest:
+                groups.setdefault(group, set()).add(digest)
+    return groups, rows
+
+
+def get_all_collection_digests(store):
+    """Paginate through list_collections to get all digests."""
+    digests = set()
+    page = 0
+    while True:
+        result = store.list_collections(page, 1000)
+        for c in result["results"]:
+            digests.add(c.digest)
+        if len(result["results"]) < 1000:
+            break
+        page += 1
+    return digests
+
+
+# ── Test 1: Store opens and basic stats ────────────────────────────────
+
+
+def test_store_opens(store_path, label):
+    """Verify store opens and has non-zero collections/sequences."""
+    from refget.store import RefgetStore
+
+    print(f"\n── {label}: Store opens and stats ──")
+
+    try:
+        store = RefgetStore.open_local(store_path)
+        check(f"{label}_opens", True, f"path={store_path}")
+    except Exception as e:
+        check(f"{label}_opens", False, f"error={e}")
+        return None
+
+    try:
+        stats = store.stats()
+        check(f"{label}_stats", True, f"stats={stats}")
+    except Exception as e:
+        check(f"{label}_stats", False, f"error={e}")
+
+    digests = get_all_collection_digests(store)
+    check(f"{label}_has_collections", len(digests) > 0, f"n={len(digests)}")
+
+    try:
+        seqs = store.list_sequences()
+        n_seqs = len(seqs)
+        check(f"{label}_has_sequences", n_seqs > 0, f"n={n_seqs}")
+    except Exception as e:
+        check(f"{label}_has_sequences", False, f"error={e}")
+
+    return store
+
+
+# ── Test 2: Collection counts match digest map ────────────────────────
+
+
+def test_collection_counts(store, label, expected_digests):
+    """Verify the store has exactly the expected collections."""
+    print(f"\n── {label}: Collection count vs digest map ──")
+
+    store_digests = get_all_collection_digests(store)
+
+    check(
+        f"{label}_count_match",
+        len(store_digests) == len(expected_digests),
+        f"store={len(store_digests)}, expected={len(expected_digests)}",
+    )
+
+    missing = expected_digests - store_digests
+    extra = store_digests - expected_digests
+
+    check(
+        f"{label}_no_missing",
+        len(missing) == 0,
+        f"missing={len(missing)}" + (f", sample={list(missing)[:3]}" if missing else ""),
+    )
+    check(
+        f"{label}_no_extra",
+        len(extra) == 0,
+        f"extra={len(extra)}" + (f", sample={list(extra)[:3]}" if extra else ""),
+    )
+
+    return store_digests
+
+
+# ── Test 3: Level2 integrity for all collections ──────────────────────
+
+
+def test_level2_integrity(store, label, digests, limit=None):
+    """Verify level2 arrays are aligned and valid for every collection."""
+    print(f"\n── {label}: Level2 data integrity ──")
+
+    to_check = sorted(digests)
+    if limit:
+        to_check = to_check[:limit]
+
+    ok_count = 0
+    fail_count = 0
+    fail_details = []
+
+    for digest in to_check:
+        try:
+            level2 = store.get_collection_level2(digest)
+            names = level2.get("names", [])
+            lengths = level2.get("lengths", [])
+            sequences = level2.get("sequences", [])
+
+            arrays_aligned = len(names) == len(lengths) == len(sequences) and len(names) > 0
+            lengths_positive = all(l > 0 for l in lengths) if lengths else False
+            seqs_nonempty = all(s and len(s) > 0 for s in sequences) if sequences else False
+
+            if arrays_aligned and lengths_positive and seqs_nonempty:
+                ok_count += 1
+            else:
+                fail_count += 1
+                fail_details.append(
+                    f"{digest[:16]}: names={len(names)} lengths={len(lengths)} "
+                    f"seqs={len(sequences)} aligned={arrays_aligned} "
+                    f"lengths_ok={lengths_positive} seqs_ok={seqs_nonempty}"
+                )
+        except Exception as e:
+            fail_count += 1
+            fail_details.append(f"{digest[:16]}: ERROR {e}")
+
+    total = ok_count + fail_count
+    check(
+        f"{label}_level2_all_valid",
+        fail_count == 0,
+        f"ok={ok_count}/{total}" + (f", failures=[{'; '.join(fail_details[:5])}]" if fail_details else ""),
+    )
+
+
+# ── Test 4: Aliases were imported ─────────────────────────────────────
+
+
+def test_aliases(store, label, digests):
+    """Check that alias namespaces exist and at least some collections have aliases."""
+    print(f"\n── {label}: Alias integrity ──")
+
+    # Check namespaces exist
+    try:
+        coll_ns = store.list_collection_alias_namespaces()
+        check(f"{label}_collection_alias_namespaces", len(coll_ns) > 0, f"namespaces={coll_ns}")
+    except Exception as e:
+        check(f"{label}_collection_alias_namespaces", False, f"error={e}")
+        coll_ns = []
+
+    try:
+        seq_ns = store.list_sequence_alias_namespaces()
+        check(f"{label}_sequence_alias_namespaces", len(seq_ns) > 0, f"namespaces={seq_ns}")
+    except Exception as e:
+        check(f"{label}_sequence_alias_namespaces", False, f"error={e}")
+        seq_ns = []
+
+    # Sample: check that some collections have aliases
+    sample = sorted(digests)[:20]
+    with_aliases = 0
+    for digest in sample:
+        try:
+            aliases = store.get_aliases_for_collection(digest)
+            if aliases and len(aliases) > 0:
+                with_aliases += 1
+        except Exception:
+            pass
+
+    check(
+        f"{label}_collections_have_aliases",
+        with_aliases > 0,
+        f"with_aliases={with_aliases}/{len(sample)} (sampled)",
+    )
+
+    # For each namespace, count total aliases
+    for ns in coll_ns:
+        try:
+            aliases = store.list_collection_aliases(ns)
+            check(f"{label}_coll_alias_count_{ns}", len(aliases) > 0, f"n={len(aliases)}")
+        except Exception as e:
+            check(f"{label}_coll_alias_count_{ns}", False, f"error={e}")
+
+    # Forward lookup: pick an alias and verify it resolves
+    for ns in coll_ns[:1]:  # test first namespace
+        try:
+            aliases = store.list_collection_aliases(ns)
+            if aliases:
+                alias = aliases[0]
+                resolved = store.get_collection_by_alias(ns, alias)
+                check(
+                    f"{label}_coll_alias_forward_lookup_{ns}",
+                    resolved is not None,
+                    f"alias={alias}, resolved={resolved.digest[:16] if resolved else None}",
+                )
+        except Exception as e:
+            check(f"{label}_coll_alias_forward_lookup_{ns}", False, f"error={e}")
+
+    # Sequence alias count proportionality check
+    for ns in seq_ns:
+        try:
+            aliases = store.list_sequence_aliases(ns)
+            n_aliases = len(aliases) if aliases else 0
+            check(f"{label}_seq_alias_count_{ns}", n_aliases > 0, f"n={n_aliases}")
+        except Exception as e:
+            check(f"{label}_seq_alias_count_{ns}", False, f"error={e}")
+
+
+# ── Test 5: FHR metadata was imported ─────────────────────────────────
+
+
+def test_fhr_metadata(store, label, digests):
+    """Check that FHR metadata exists for collections."""
+    print(f"\n── {label}: FHR metadata ──")
+
+    try:
+        fhr_digests = store.list_fhr_metadata()
+        n_fhr = len(fhr_digests)
+        check(f"{label}_fhr_exists", n_fhr > 0, f"n_with_fhr={n_fhr}")
+    except Exception as e:
+        check(f"{label}_fhr_exists", False, f"error={e}")
+        return
+
+    # Verify FHR digests are in this store
+    fhr_set = set(fhr_digests)
+    orphan_fhr = fhr_set - digests
+    check(
+        f"{label}_fhr_no_orphans",
+        len(orphan_fhr) == 0,
+        f"orphaned_fhr={len(orphan_fhr)}" + (f", sample={list(orphan_fhr)[:3]}" if orphan_fhr else ""),
+    )
+
+    # Sample: read a few FHR records
+    sample = list(fhr_set & digests)[:5]
+    readable = 0
+    for digest in sample:
+        try:
+            fhr = store.get_fhr_metadata(digest)
+            if fhr is not None:
+                readable += 1
+        except Exception:
+            pass
+
+    check(
+        f"{label}_fhr_readable",
+        readable == len(sample),
+        f"readable={readable}/{len(sample)}",
+    )
+
+
+# ── Test 6: Sequence retrieval works ──────────────────────────────────
+
+
+def test_sequence_retrieval(store, label, digests):
+    """Verify sequences can be retrieved for sampled collections."""
+    print(f"\n── {label}: Sequence retrieval ──")
+
+    sample = sorted(digests)[:5]
+    ok_count = 0
+    fail_details = []
+
+    for coll_digest in sample:
+        try:
+            level2 = store.get_collection_level2(coll_digest)
+            seq_digests = level2.get("sequences", [])
+            lengths = level2.get("lengths", [])
+            if not seq_digests:
+                fail_details.append(f"{coll_digest[:16]}: no sequences")
+                continue
+
+            # Test first sequence in collection
+            seq = store.get_sequence(seq_digests[0])
+            if seq is not None:
+                ok_count += 1
+            else:
+                fail_details.append(f"{coll_digest[:16]}: get_sequence returned None")
+        except Exception as e:
+            fail_details.append(f"{coll_digest[:16]}: {e}")
+
+    check(
+        f"{label}_sequence_retrieval",
+        ok_count == len(sample),
+        f"ok={ok_count}/{len(sample)}" + (f", failures=[{'; '.join(fail_details[:3])}]" if fail_details else ""),
+    )
+
+
+# ── Test 7: No overlap between VGP and ref stores ────────────────────
+
+
+def test_no_overlap(vgp_store, ref_store):
+    """Verify no collection appears in both stores."""
+    print("\n── Cross-store: No overlap ──")
+
+    vgp_digests = get_all_collection_digests(vgp_store)
+    ref_digests = get_all_collection_digests(ref_store)
+
+    overlap = vgp_digests & ref_digests
+    check(
+        "no_collection_overlap",
+        len(overlap) == 0,
+        f"overlap={len(overlap)}" + (f", sample={list(overlap)[:3]}" if overlap else ""),
+    )
+
+
+# ── Test 8: Full coverage — VGP + ref = source ───────────────────────
+
+
+def test_full_coverage(vgp_store, ref_store, source_store):
+    """Verify VGP + ref stores together contain all source collections."""
+    print("\n── Cross-store: Full coverage ──")
+
+    vgp_digests = get_all_collection_digests(vgp_store)
+    ref_digests = get_all_collection_digests(ref_store)
+    source_digests = get_all_collection_digests(source_store)
+
+    combined = vgp_digests | ref_digests
+    missing = source_digests - combined
+    extra = combined - source_digests
+
+    check(
+        "combined_equals_source",
+        len(missing) == 0 and len(extra) == 0,
+        f"source={len(source_digests)}, vgp={len(vgp_digests)}, ref={len(ref_digests)}, "
+        f"combined={len(combined)}, missing={len(missing)}, extra={len(extra)}",
+    )
+
+
+# ── Test 9: Roundtrip FASTA export ───────────────────────────────────
+
+
+def test_roundtrip_fasta(store, label, digests, limit=3):
+    """Export a few collections to FASTA and verify digest matches."""
+    print(f"\n── {label}: Roundtrip FASTA export ──")
+
+    try:
+        from gtars.refget import digest_fasta
+    except ImportError:
+        check(f"{label}_roundtrip", False, "gtars.refget.digest_fasta not available")
+        return
+
+    sample = sorted(digests)[:limit]
+    ok_count = 0
+    fail_details = []
+
+    for digest in sample:
+        fd, tmp_path = tempfile.mkstemp(suffix=".fa")
+        os.close(fd)
+        try:
+            store.export_fasta(digest, tmp_path, None, 80)
+            exported_sc = digest_fasta(tmp_path)
+            match = exported_sc.digest == digest
+            if match:
+                ok_count += 1
+            else:
+                fail_details.append(
+                    f"{digest[:16]}: exported={exported_sc.digest[:16]} != original"
+                )
+        except Exception as e:
+            fail_details.append(f"{digest[:16]}: {e}")
+        finally:
+            if os.path.exists(tmp_path):
+                os.unlink(tmp_path)
+
+    check(
+        f"{label}_roundtrip_fasta",
+        ok_count == len(sample),
+        f"ok={ok_count}/{len(sample)}" + (f", failures=[{'; '.join(fail_details)}]" if fail_details else ""),
+    )
+
+
+# ── Main ──────────────────────────────────────────────────────────────
+
+
+def validate_store(store_path, label, expected_digests, thorough=False):
+    """Run all single-store validations."""
+    from refget.store import RefgetStore
+
+    store = test_store_opens(store_path, label)
+    if store is None:
+        return None
+
+    store_digests = test_collection_counts(store, label, expected_digests)
+
+    # Level2: check all in thorough mode, sample otherwise
+    limit = None if thorough else 20
+    test_level2_integrity(store, label, store_digests, limit=limit)
+
+    test_aliases(store, label, store_digests)
+    test_fhr_metadata(store, label, store_digests)
+    test_sequence_retrieval(store, label, store_digests)
+
+    if thorough:
+        test_roundtrip_fasta(store, label, store_digests, limit=5)
+
+    return store
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Validate split RefgetStores")
+    parser.add_argument(
+        "--store",
+        choices=["vgp", "ref", "both"],
+        default="both",
+        help="Which store to validate (default: both)",
+    )
+    parser.add_argument(
+        "--thorough",
+        action="store_true",
+        help="Run deep checks: all level2, roundtrip FASTA (slow)",
+    )
+    parser.add_argument("--vgp-path", default=VGP_PATH)
+    parser.add_argument("--ref-path", default=REF_PATH)
+    parser.add_argument("--source-path", default=SOURCE_PATH)
+    parser.add_argument("--digest-map", default=DIGEST_MAP)
+    args = parser.parse_args()
+
+    print(f"Validating split stores")
+    print(f"  Source:     {args.source_path}")
+    print(f"  VGP:        {args.vgp_path}")
+    print(f"  Ref:        {args.ref_path}")
+    print(f"  Digest map: {args.digest_map}")
+    print(f"  Thorough:   {args.thorough}")
+    print("=" * 60)
+
+    t_start = time.time()
+
+    # Load digest map to compute expected sets
+    group_digests, dm_rows = load_digest_map(args.digest_map)
+    vgp_expected = set()
+    ref_expected = set()
+    for group, digests in group_digests.items():
+        if group in VGP_GROUPS:
+            vgp_expected |= digests
+        else:
+            ref_expected |= digests
+
+    print(f"\nDigest map: {len(dm_rows)} rows, "
+          f"VGP expected={len(vgp_expected)}, ref expected={len(ref_expected)}")
+
+    vgp_store = None
+    ref_store = None
+
+    if args.store in ("vgp", "both"):
+        vgp_store = validate_store(args.vgp_path, "vgp", vgp_expected, args.thorough)
+
+    if args.store in ("ref", "both"):
+        ref_store = validate_store(args.ref_path, "ref", ref_expected, args.thorough)
+
+    # Cross-store checks (only if both stores validated)
+    if vgp_store and ref_store:
+        test_no_overlap(vgp_store, ref_store)
+
+        # Full coverage against source
+        from refget.store import RefgetStore
+        if RefgetStore.store_exists(args.source_path):
+            source_store = RefgetStore.open_local(args.source_path)
+            test_full_coverage(vgp_store, ref_store, source_store)
+        else:
+            check("full_coverage", False, f"source store not found: {args.source_path}")
+
+    # Summary
+    elapsed = time.time() - t_start
+    print(f"\n{'=' * 60}")
+    print("VALIDATION SUMMARY")
+    print("=" * 60)
+    passed = sum(1 for r in results if r["status"] == "PASS")
+    failed = sum(1 for r in results if r["status"] == "FAIL")
+    print(f"Passed: {passed}")
+    print(f"Failed: {failed}")
+    print(f"Total:  {passed + failed}")
+    print(f"Time:   {elapsed:.1f}s")
+
+    if failed > 0:
+        print("\nFailed checks:")
+        for r in results:
+            if r["status"] == "FAIL":
+                print(f"  - {r['name']}: {r['detail']}")
+
+    # Write JSON report
+    report_path = os.path.join(STAGING, "split_validation_report.json")
+    os.makedirs(STAGING, exist_ok=True)
+    with open(report_path, "w") as f:
+        json.dump({"results": results, "passed": passed, "failed": failed}, f, indent=2)
+    print(f"\nJSON report: {report_path}")
+
+    sys.exit(1 if failed > 0 else 0)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/data_loaders/ref-genome-analysis/src/validate_split_stores.sbatch b/data_loaders/ref-genome-analysis/src/validate_split_stores.sbatch
new file mode 100644
index 0000000..d9c3b4c
--- /dev/null
+++ b/data_loaders/ref-genome-analysis/src/validate_split_stores.sbatch
@@ -0,0 +1,16 @@
+#!/bin/bash
+#SBATCH --job-name=validate_split
+#SBATCH --output=validate_split_%j.log
+#SBATCH --error=validate_split_%j.log
+#SBATCH --partition=standard
+#SBATCH --time=2:00:00
+#SBATCH --mem=16G
+#SBATCH --cpus-per-task=1
+#SBATCH --account=shefflab
+
+module load miniforge/24.3.0-py3.11
+
+cd $HOME/code/ref-genome-analysis
+source env/on-cluster.env
+
+python src/validate_split_stores.py "$@"
diff --git a/deployment/seqcolapi-store/production.env b/deployment/seqcolapi-store/production.env
index fa32c22..ee36462 100644
--- a/deployment/seqcolapi-store/production.env
+++ b/deployment/seqcolapi-store/production.env
@@ -1,2 +1,2 @@
-export REFGET_STORE_URL="s3://seqcolapi-store/refget/"
+export REFGET_STORE_URL="https://refgenie.s3.us-east-1.amazonaws.com/refget-store/vgp/"
 export SERVER_ENV="production"
diff --git a/deployment/seqcolapi-store/task_def.json b/deployment/seqcolapi-store/task_def.json
index 4a3d22a..7e74531 100644
--- a/deployment/seqcolapi-store/task_def.json
+++ b/deployment/seqcolapi-store/task_def.json
@@ -20,7 +20,7 @@
       "environment": [
         {
           "name": "REFGET_STORE_URL",
-          "value": "s3://seqcolapi-store/refget/"
+          "value": "https://refgenie.s3.us-east-1.amazonaws.com/refget-store/vgp/"
         }
       ],
       "resourceRequirements": null,
diff --git a/deployment/store_demo/store_demo.env b/deployment/store_demo/store_demo.env
new file mode 100644
index 0000000..05ac069
--- /dev/null
+++ b/deployment/store_demo/store_demo.env
@@ -0,0 +1,3 @@
+export REFGET_STORE_PATH="/tmp/refget_demo_store"
+export SEQCOLAPI_PORT="8100"
+export SERVER_ENV="dev"
diff --git a/deployment/store_demo_up.sh b/deployment/store_demo_up.sh
new file mode 100755
index 0000000..7e3bb24
--- /dev/null
+++ b/deployment/store_demo_up.sh
@@ -0,0 +1,61 @@
+#!/bin/bash
+# This script starts a local store-backed demo of the SeqCol API service
+
+# Use local source instead of installed package
+export PYTHONPATH="$(pwd):$PYTHONPATH"
+
+# Function to handle cleanup on Ctrl+C
+cleanup() {
+    echo "Stopping uvicorn (PID: $PID)..."
+    kill -15 $PID 2>/dev/null
+    wait $PID 2>/dev/null
+    echo "Uvicorn stopped."
+    if [ -n "$STORE_HTTP_PID" ]; then
+        echo "Stopping store HTTP server (PID: $STORE_HTTP_PID)..."
+        kill -15 $STORE_HTTP_PID 2>/dev/null
+        wait $STORE_HTTP_PID 2>/dev/null
+    fi
+    echo "Cleaning up demo store at $REFGET_STORE_PATH..."
+    rm -rf "$REFGET_STORE_PATH"
+    exit 0
+}
+
+# Load environment variables
+source deployment/store_demo/store_demo.env
+
+echo "Building demo store from test FASTA files..."
+python data_loaders/demo_build_store.py test_fasta "$REFGET_STORE_PATH"
+
+STORE_HTTP_PORT=8200
+echo "Starting HTTP file server for store on port $STORE_HTTP_PORT..."
+STORE_DIR="$REFGET_STORE_PATH" STORE_PORT="$STORE_HTTP_PORT" python -c '
+import http.server, socketserver, os
+
+class CORSHandler(http.server.SimpleHTTPRequestHandler):
+    def end_headers(self):
+        self.send_header("Access-Control-Allow-Origin", "*")
+        super().end_headers()
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, directory=os.environ["STORE_DIR"], **kwargs)
+
+socketserver.TCPServer(("", int(os.environ["STORE_PORT"])), CORSHandler).serve_forever()
+' &
+STORE_HTTP_PID=$!
+export REFGET_STORE_HTTP_URL="http://localhost:$STORE_HTTP_PORT"
+
+echo "Running store-backed uvicorn API service..."
+uvicorn seqcolapi.main:store_app --reload --port ${SEQCOLAPI_PORT:-8100} &
+PID=$!
+
+echo ""
+echo "Store-backed seqcolapi is running at http://localhost:${SEQCOLAPI_PORT:-8100}"
+echo "  API docs:      http://localhost:${SEQCOLAPI_PORT:-8100}/docs"
+echo "  Service info:  http://localhost:${SEQCOLAPI_PORT:-8100}/service-info"
+echo "  Store files:   $REFGET_STORE_HTTP_URL"
+echo ""
+
+# Set up cleanup on Ctrl+C
+trap cleanup SIGINT EXIT
+
+# Wait indefinitely until Ctrl+C is pressed
+wait $PID
diff --git a/frontend/src/components/APINav.jsx b/frontend/src/components/APINav.jsx
new file mode 100644
index 0000000..5ca752f
--- /dev/null
+++ b/frontend/src/components/APINav.jsx
@@ -0,0 +1,50 @@
+import { Link } from 'react-router-dom';
+import { useApiExplorerStore } from '../stores/apiExplorerStore.js';
+
+const APINav = ({ active }) => {
+  const { apiUrl } = useApiExplorerStore();
+  const urlParam = apiUrl ? `?url=${encodeURIComponent(apiUrl)}` : '';
+
+  const items = [
+    { key: 'collections', label: 'Collections', path: '/explore-api/collections', icon: 'bi-collection' },
+    { key: 'compare', label: 'Compare (SCIM)', path: '/explore-api/compare', icon: 'bi-arrows-angle-contract' },
+  ];
+
+  return (
+    <div className="mb-4">
+      <div className="d-flex justify-content-between align-items-center mb-3">
+        <h4 className="fw-light mb-0">
+          <i className="bi bi-cloud me-2" />
+          API Explorer
+        </h4>
+        <Link to="/explore-api" className="btn btn-sm btn-outline-secondary">
+          <i className="bi bi-arrow-left me-1" />
+          Change API
+        </Link>
+      </div>
+
+      {apiUrl && (
+        <div className="text-muted small mb-2">
+          <i className="bi bi-link-45deg me-1" />
+          {apiUrl}
+        </div>
+      )}
+
+      <ul className="nav nav-tabs mb-3">
+        {items.map((item) => (
+          <li className="nav-item" key={item.key}>
+            <Link
+              to={`${item.path}${urlParam}`}
+              className={`nav-link ${active === item.key ? 'active' : ''}`}
+            >
+              <i className={`bi ${item.icon} me-1`} />
+              {item.label}
+            </Link>
+          </li>
+        ))}
+      </ul>
+    </div>
+  );
+};
+
+export { APINav };
diff --git a/frontend/src/components/CompareTable.jsx b/frontend/src/components/CompareTable.jsx
index 4b349b3..0ad9ca4 100644
--- a/frontend/src/components/CompareTable.jsx
+++ b/frontend/src/components/CompareTable.jsx
@@ -36,7 +36,7 @@ const CompareTable = ({ seqColDict }) => {
               '='
             ) : (
               <Link
-                to={`/scim/${seqColDigests[i]}/${seqColDigests[j]}`}
+                to={`/compare/${seqColDigests[i]}/${seqColDigests[j]}`}
                 key={`${seqColDigests[i]}-${seqColDigests[j]}`}
               >
                 <img
diff --git a/frontend/src/components/CopyableDigest.jsx b/frontend/src/components/CopyableDigest.jsx
new file mode 100644
index 0000000..ffcc132
--- /dev/null
+++ b/frontend/src/components/CopyableDigest.jsx
@@ -0,0 +1,29 @@
+import { useState } from 'react';
+
+/**
+ * A monospace digest string with a clipboard icon that changes to a check on copy.
+ */
+const CopyableDigest = ({ value }) => {
+  const [copied, setCopied] = useState(false);
+  const handleCopy = (e) => {
+    e.stopPropagation();
+    navigator.clipboard.writeText(value).then(() => {
+      setCopied(true);
+      setTimeout(() => setCopied(false), 1500);
+    });
+  };
+  return (
+    <span className="font-monospace small">
+      {value}
+      <i
+        className={`bi ${copied ? 'bi-check-lg text-success' : 'bi-clipboard'} ms-2`}
+        role="button"
+        title="Copy to clipboard"
+        onClick={handleCopy}
+        style={{ cursor: 'pointer' }}
+      />
+    </span>
+  );
+};
+
+export { CopyableDigest };
diff --git a/frontend/src/components/ExplorerNav.jsx b/frontend/src/components/ExplorerNav.jsx
new file mode 100644
index 0000000..6562691
--- /dev/null
+++ b/frontend/src/components/ExplorerNav.jsx
@@ -0,0 +1,37 @@
+import { Link } from 'react-router-dom';
+import { useUnifiedStore } from '../stores/unifiedStore.js';
+
+const ExplorerNav = ({ active }) => {
+  const { hasStore, hasAPI } = useUnifiedStore();
+
+  const items = [
+    { key: 'collections', label: 'Collections', path: '/collections', icon: 'bi-collection' },
+    { key: 'sequences', label: 'Sequences', path: '/sequences', icon: 'bi-list-ol', requireStore: true },
+    { key: 'aliases', label: 'Aliases', path: '/aliases', icon: 'bi-tag', requireStore: true },
+    { key: 'compare', label: 'Compare', path: '/compare', icon: 'bi-arrows-angle-contract', requireAPI: true },
+  ];
+
+  const visibleItems = items.filter((item) => {
+    if (item.requireStore && !hasStore) return false;
+    if (item.requireAPI && !hasAPI) return false;
+    return true;
+  });
+
+  return (
+    <ul className="nav nav-tabs mb-4">
+      {visibleItems.map((item) => (
+        <li className="nav-item" key={item.key}>
+          <Link
+            to={item.path}
+            className={`nav-link ${active === item.key ? 'active' : ''}`}
+          >
+            <i className={`bi ${item.icon} me-1`} />
+            {item.label}
+          </Link>
+        </li>
+      ))}
+    </ul>
+  );
+};
+
+export { ExplorerNav };
diff --git a/frontend/src/components/SequenceTable.jsx b/frontend/src/components/SequenceTable.jsx
new file mode 100644
index 0000000..1df6700
--- /dev/null
+++ b/frontend/src/components/SequenceTable.jsx
@@ -0,0 +1,188 @@
+import { useState, useMemo } from 'react';
+import { CopyableDigest } from './CopyableDigest.jsx';
+import { CliCommand } from './CliSnippet.jsx';
+
+const PAGE_SIZE = 50;
+
+/**
+ * Paginated sequence table with detail modal.
+ *
+ * Props:
+ *   sequences: array of {name, length, sha512t24u, md5, alphabet, description}
+ *   storeUrl: optional store URL for code snippets in modal
+ *   sortable: if true, column headers are clickable to sort
+ */
+const SequenceTable = ({ sequences, storeUrl, sortable = false }) => {
+  const [page, setPage] = useState(0);
+  const [selectedSeq, setSelectedSeq] = useState(null);
+  const [codeTab, setCodeTab] = useState('cli');
+  const [sortCol, setSortCol] = useState(null);
+  const [sortAsc, setSortAsc] = useState(true);
+
+  const handleSort = (col) => {
+    if (!sortable) return;
+    if (sortCol === col) setSortAsc(!sortAsc);
+    else { setSortCol(col); setSortAsc(true); }
+    setPage(0);
+  };
+
+  const sorted = useMemo(() => {
+    if (!sortable || !sortCol) return sequences;
+    return [...sequences].sort((a, b) => {
+      const va = a[sortCol];
+      const vb = b[sortCol];
+      if (typeof va === 'number' && typeof vb === 'number')
+        return sortAsc ? va - vb : vb - va;
+      return sortAsc
+        ? String(va).localeCompare(String(vb))
+        : String(vb).localeCompare(String(va));
+    });
+  }, [sequences, sortCol, sortAsc, sortable]);
+
+  const totalPages = Math.ceil(sorted.length / PAGE_SIZE);
+  const paged = sorted.slice(page * PAGE_SIZE, (page + 1) * PAGE_SIZE);
+
+  const SortIcon = ({ col }) => {
+    if (!sortable || sortCol !== col) return null;
+    return <i className={`bi bi-caret-${sortAsc ? 'up' : 'down'}-fill ms-1`} />;
+  };
+
+  const thStyle = sortable ? { cursor: 'pointer' } : {};
+
+  return (
+    <>
+      <div className="table-responsive">
+        <table className="table table-sm table-hover mb-0">
+          <thead>
+            <tr>
+              <th style={thStyle} onClick={() => handleSort('name')}>
+                Name<SortIcon col="name" />
+              </th>
+              <th className="text-end" style={thStyle} onClick={() => handleSort('length')}>
+                Length<SortIcon col="length" />
+              </th>
+              <th style={thStyle} onClick={() => handleSort('sha512t24u')}>
+                SHA-512/24u<SortIcon col="sha512t24u" />
+              </th>
+              <th style={{ width: '1%' }}></th>
+            </tr>
+          </thead>
+          <tbody>
+            {paged.map((seq, i) => (
+              <tr key={`${seq.sha512t24u}-${i}`}>
+                <td>{seq.name}</td>
+                <td className="text-end font-monospace">{seq.length.toLocaleString()}</td>
+                <td><CopyableDigest value={seq.sha512t24u} /></td>
+                <td>
+                  <button
+                    className="btn btn-sm btn-outline-secondary py-0 px-1"
+                    title="View details"
+                    onClick={() => setSelectedSeq(seq)}
+                  >
+                    <i className="bi bi-three-dots" />
+                  </button>
+                </td>
+              </tr>
+            ))}
+          </tbody>
+        </table>
+      </div>
+
+      {totalPages > 1 && (
+        <div className="card-footer">
+          <nav>
+            <ul className="pagination pagination-sm justify-content-center mb-0">
+              <li className={`page-item ${page === 0 ? 'disabled' : ''}`}>
+                <button className="page-link" onClick={() => setPage(page - 1)}>Previous</button>
+              </li>
+              <li className="page-item disabled">
+                <span className="page-link">Page {page + 1} of {totalPages}</span>
+              </li>
+              <li className={`page-item ${page >= totalPages - 1 ? 'disabled' : ''}`}>
+                <button className="page-link" onClick={() => setPage(page + 1)}>Next</button>
+              </li>
+            </ul>
+          </nav>
+        </div>
+      )}
+
+      {/* Sequence detail modal */}
+      {selectedSeq && (
+        <>
+          <div className="modal-backdrop fade show" onClick={() => setSelectedSeq(null)} />
+          <div className="modal fade show d-block" tabIndex="-1" onClick={() => setSelectedSeq(null)}>
+            <div className="modal-dialog modal-lg" onClick={(e) => e.stopPropagation()}>
+              <div className="modal-content">
+                <div className="modal-header">
+                  <h5 className="modal-title">{selectedSeq.name}</h5>
+                  <button type="button" className="btn-close" onClick={() => setSelectedSeq(null)} />
+                </div>
+                <div className="modal-body">
+                  <table className="table table-sm mb-4">
+                    <tbody>
+                      <tr>
+                        <td className="text-muted">Length</td>
+                        <td className="font-monospace">{selectedSeq.length.toLocaleString()}</td>
+                      </tr>
+                      <tr>
+                        <td className="text-muted">Alphabet</td>
+                        <td><span className="badge bg-secondary">{selectedSeq.alphabet}</span></td>
+                      </tr>
+                      <tr>
+                        <td className="text-muted">SHA-512/24u</td>
+                        <td><CopyableDigest value={selectedSeq.sha512t24u} /></td>
+                      </tr>
+                      <tr>
+                        <td className="text-muted">MD5</td>
+                        <td><CopyableDigest value={selectedSeq.md5} /></td>
+                      </tr>
+                      {selectedSeq.description && (
+                        <tr>
+                          <td className="text-muted">Description</td>
+                          <td className="small">{selectedSeq.description}</td>
+                        </tr>
+                      )}
+                    </tbody>
+                  </table>
+                  {storeUrl && (
+                    <>
+                      <h6 className="text-muted mb-2">Code</h6>
+                      <ul className="nav nav-pills nav-pills-sm mb-3">
+                        <li className="nav-item">
+                          <button
+                            className={`nav-link py-1 px-2 ${codeTab === 'cli' ? 'active' : ''}`}
+                            onClick={() => setCodeTab('cli')}
+                          >
+                            CLI
+                          </button>
+                        </li>
+                        <li className="nav-item">
+                          <button
+                            className={`nav-link py-1 px-2 ${codeTab === 'python' ? 'active' : ''}`}
+                            onClick={() => setCodeTab('python')}
+                          >
+                            Python
+                          </button>
+                        </li>
+                      </ul>
+                      <small className="text-muted d-block mb-1">Get sequence</small>
+                      <CliCommand
+                        command={
+                          codeTab === 'cli'
+                            ? `refget store get --sequence \\\n  ${selectedSeq.sha512t24u} \\\n  --remote ${storeUrl}`
+                            : `import refget\n\nstore = refget.RefgetStore("${storeUrl}")\nstore.get("${selectedSeq.sha512t24u}", sequence=True)`
+                        }
+                      />
+                    </>
+                  )}
+                </div>
+              </div>
+            </div>
+          </div>
+        </>
+      )}
+    </>
+  );
+};
+
+export { SequenceTable };
diff --git a/frontend/src/components/StoreNav.jsx b/frontend/src/components/StoreNav.jsx
index f24d50f..1ba2cd5 100644
--- a/frontend/src/components/StoreNav.jsx
+++ b/frontend/src/components/StoreNav.jsx
@@ -11,9 +11,9 @@ const StoreNav = ({ active, storeUrlParam, collectionDigest }) => {
   const remote = storeUrl || new URLSearchParams(storeUrlParam).get('url') || '';
 
   const items = [
-    { key: 'overview', label: 'Overview', path: '/explore/store', icon: 'bi-house' },
-    { key: 'sequences', label: 'Sequences', path: '/explore/store/sequences', icon: 'bi-list-ol' },
-    { key: 'aliases', label: 'Aliases', path: '/explore/store/aliases', icon: 'bi-tag' },
+    { key: 'overview', label: 'Overview', path: '/explore-store/overview', icon: 'bi-house' },
+    { key: 'sequences', label: 'Sequences', path: '/explore-store/sequences', icon: 'bi-list-ol' },
+    { key: 'aliases', label: 'Aliases', path: '/explore-store/aliases', icon: 'bi-tag' },
   ];
 
   const snippetGroups = [
@@ -137,7 +137,7 @@ store.chrom_sizes("${collectionDigest}")`,
             <i className="bi bi-code-slash me-1" />
             Code
           </button>
-          <Link to="/explore" className="btn btn-sm btn-outline-secondary">
+          <Link to="/explore-store" className="btn btn-sm btn-outline-secondary">
             <i className="bi bi-arrow-left me-1" />
             Change Store
           </Link>
diff --git a/frontend/src/main.jsx b/frontend/src/main.jsx
index 5a6f613..db616d1 100644
--- a/frontend/src/main.jsx
+++ b/frontend/src/main.jsx
@@ -11,27 +11,43 @@ import 'bootstrap/dist/css/bootstrap.css';
 import 'bootstrap/dist/js/bootstrap.bundle.js';
 import 'bootstrap-icons/font/bootstrap-icons.css';
 
-import { CollectionView } from './pages/CollectionView.jsx';
+import { useUnifiedStore } from './stores/unifiedStore.js';
+
+// Unified Explorer pages
+import { LandingPage } from './pages/LandingPage.jsx';
+import { Explorer } from './pages/Explorer.jsx';
+import { ExplorerCollection } from './pages/ExplorerCollection.jsx';
+import { ExplorerSequences } from './pages/ExplorerSequences.jsx';
+import { ExplorerAliases } from './pages/ExplorerAliases.jsx';
+
+// API Explorer pages
+import { APIExplorer } from './pages/APIExplorer.jsx';
+import { APICollections } from './pages/APICollections.jsx';
+import { APICollectionView } from './pages/APICollectionView.jsx';
+import { APICompare } from './pages/APICompare.jsx';
+import { APICompliance } from './pages/APICompliance.jsx';
+
+// Store Explorer pages
+import { StoreExplorer } from './pages/StoreExplorer.jsx';
+import { StoreOverview } from './pages/StoreOverview.jsx';
+import { StoreSequences } from './pages/StoreSequences.jsx';
+import { StoreCollection } from './pages/StoreCollection.jsx';
+import { StoreAliases } from './pages/StoreAliases.jsx';
+
+// Site-specific pages
 import { PangenomeView } from './pages/PangenomeView.jsx';
 import { AttributeView } from './pages/AttributeView.jsx';
 import { DemoPage } from './pages/DemoPage.jsx';
 import { SCIM } from './pages/SCIM.jsx';
 import { SCOM } from './pages/SCOM.jsx';
-import { HomePage } from './pages/HomePage.jsx';
 import { HPRCGenomes } from './pages/HPRCGenomes.jsx';
 import { HumanReferencesView } from './pages/HumanReferences.jsx';
 import { DigestPage } from './pages/DigestPage.jsx';
 import { CompliancePage } from './pages/CompliancePage.jsx';
-import { StoreExplorer } from './pages/StoreExplorer.jsx';
-import { StoreOverview } from './pages/StoreOverview.jsx';
-import { StoreSequences } from './pages/StoreSequences.jsx';
-import { StoreCollection } from './pages/StoreCollection.jsx';
-import { StoreAliases } from './pages/StoreAliases.jsx';
 
 import {
   fetchServiceInfo,
   fetchPangenomeLevels,
-  fetchSeqColList,
   fetchAllSeqCols,
   fetchCollectionLevels,
   fetchComparison,
@@ -52,9 +68,38 @@ import {
 
 import { API_BASE } from './utilities.jsx';
 
+const NavItem = ({ path, label, location, navigate, isDropdown }) => {
+  const active = path === '/'
+    ? location === ''
+    : location.startsWith(path.substring(1));
+
+  return (
+    <li className={`nav-item mx-2 my-0 h6 ${isDropdown ? '' : ''}`}>
+      <span
+        onClick={() => navigate(path)}
+        className={`nav-link cursor-pointer ${active ? 'fw-medium text-black' : 'fw-light'}`}
+      >
+        {label}
+      </span>
+    </li>
+  );
+};
+
 const Nav = () => {
   const navigate = useNavigate();
   const location = useLocation().pathname.substring(1) || '';
+  const { serviceInfo } = useUnifiedStore();
+  const scomEnabled = serviceInfo?.seqcol?.scom?.enabled;
+
+  const navTo = (path) => {
+    navigate(path);
+    // Close any open Bootstrap dropdown
+    document.querySelectorAll('.dropdown-menu.show').forEach((el) => {
+      el.classList.remove('show');
+      el.previousElementSibling?.classList.remove('show');
+      el.previousElementSibling?.setAttribute('aria-expanded', 'false');
+    });
+  };
 
   return (
     <nav
@@ -92,54 +137,54 @@ const Nav = () => {
           id='navbarSupportedContent'
         >
           <ul className='navbar-nav ms-auto mb-2 mb-sm-0'>
-            <li className='nav-item mx-2 my-0 h6'>
-              <span
-                onClick={() => navigate('/')}
-                className={`nav-link cursor-pointer ${location === '' ? 'fw-medium text-black' : 'fw-light'}`}
-              >
-                Home
-              </span>
-            </li>
-            <li className='nav-item mx-2 my-0 h6'>
-              <span
-                onClick={() => navigate('/fasta')}
-                className={`nav-link cursor-pointer ${location.startsWith('fasta') ? 'fw-medium text-black' : 'fw-light'}`}
-              >
-                FASTADigest
-              </span>
-            </li>
-            <li className='nav-item mx-2 my-0 h6'>
-              <span
-                onClick={() => navigate('/scim')}
-                className={`nav-link cursor-pointer ${location.startsWith('scim') ? 'fw-medium text-black' : 'fw-light'}`}
-              >
-                SCIM
-              </span>
-            </li>
-            <li className='nav-item mx-2 my-0 h6'>
-              <span
-                onClick={() => navigate('/scom')}
-                className={`nav-link cursor-pointer ${location.startsWith('scom') ? 'fw-medium text-black' : 'fw-light'}`}
-              >
-                SCOM
-              </span>
-            </li>
-            <li className='nav-item mx-2 my-0 h6'>
+            {/* Browse */}
+            <NavItem path="/" label="Home" location={location} navigate={navigate} />
+            <NavItem path="/collections" label="Collections" location={location} navigate={navigate} />
+
+            {/* Tools dropdown */}
+            <li className='nav-item dropdown mx-2 my-0 h6'>
               <span
-                onClick={() => navigate('/compliance')}
-                className={`nav-link cursor-pointer ${location.startsWith('compliance') ? 'fw-medium text-black' : 'fw-light'}`}
+                className={`nav-link cursor-pointer dropdown-toggle ${
+                  ['fasta', 'compare', 'compliance', 'explore-store', 'explore-api'].some(p => location.startsWith(p))
+                    ? 'fw-medium text-black' : 'fw-light'
+                }`}
+                role='button'
+                data-bs-toggle='dropdown'
+                aria-expanded='false'
               >
-                Compliance
+                Tools
               </span>
+              <ul className='dropdown-menu'>
+                <li><span className='dropdown-item cursor-pointer' onClick={() => navTo('/fasta')}>FASTA Digester</span></li>
+                <li><span className='dropdown-item cursor-pointer' onClick={() => navTo('/compare')}>Compare (SCIM)</span></li>
+                <li><hr className='dropdown-divider' /></li>
+                <li><span className='dropdown-item cursor-pointer' onClick={() => navTo('/explore-store')}>Explore a Store</span></li>
+                <li><span className='dropdown-item cursor-pointer' onClick={() => navTo('/explore-api')}>Explore an API</span></li>
+                <li><hr className='dropdown-divider' /></li>
+                <li><span className='dropdown-item cursor-pointer' onClick={() => navTo('/compliance')}>Compliance Testing</span></li>
+              </ul>
             </li>
-            <li className='nav-item mx-2 my-0 h6'>
+
+            {/* Curated dropdown */}
+            <li className='nav-item dropdown mx-2 my-0 h6'>
               <span
-                onClick={() => navigate('/explore')}
-                className={`nav-link cursor-pointer ${location.startsWith('explore') ? 'fw-medium text-black' : 'fw-light'}`}
+                className={`nav-link cursor-pointer dropdown-toggle ${
+                  ['scom', 'human', 'hprc'].some(p => location.startsWith(p))
+                    ? 'fw-medium text-black' : 'fw-light'
+                }`}
+                role='button'
+                data-bs-toggle='dropdown'
+                aria-expanded='false'
               >
-                Explore Store
+                Curated
               </span>
+              <ul className='dropdown-menu'>
+                {scomEnabled && <li><span className='dropdown-item cursor-pointer' onClick={() => navTo('/scom')}>SCOM</span></li>}
+                <li><span className='dropdown-item cursor-pointer' onClick={() => navTo('/human')}>Human Genomes</span></li>
+                <li><span className='dropdown-item cursor-pointer' onClick={() => navTo('/hprc')}>HPRC Genomes</span></li>
+              </ul>
             </li>
+
             <li className='nav-item mx-2 my-0 h6'>
               <a
                 href={`${API_BASE}/docs`}
@@ -160,16 +205,6 @@ const Nav = () => {
                 GitHub
               </a>
             </li>
-            <li className='nav-item mx-2 my-0 h6'>
-              <a
-                href='https://ga4gh.github.io/refget/'
-                className='nav-link fw-light'
-                target='_blank'
-                rel='noopener noreferrer'
-              >
-                Specification
-              </a>
-            </li>
           </ul>
         </div>
       </div>
@@ -212,35 +247,47 @@ class ReactErrorBoundary extends React.Component {
 
 const App = () => {
   const loaderData = useLoaderData();
+  const apiAvailable = loaderData != null;
+  const version = loaderData?.version;
+
   return (
     <>
       <Nav />
       <main className='container'>
         <ReactErrorBoundary>
-          <Outlet />
+          <Outlet context={{ apiAvailable, serviceInfo: loaderData }} />
         </ReactErrorBoundary>
       </main>
       <div className='container'>
         <footer className='flex-wrap py-3 my-4 align-top d-flex justify-content-between align-items-center border-top'>
           <div className='d-flex flex-column'>
-            <div>
-              <span className='badge rounded-pill bg-primary text-primary bg-opacity-25 border border-primary me-1'>
-                refget {loaderData['version']['refget_version']}
-              </span>
-              <span className='badge rounded-pill bg-primary text-primary bg-opacity-25 border border-primary me-1'>
-                gtars {loaderData['version']['gtars_version']}
-              </span>
-              <span className='badge rounded-pill bg-primary text-primary bg-opacity-25 border border-primary me-1'>
-                python {loaderData['version']['python_version']}
-              </span>
-              <span className='badge rounded-pill bg-primary text-primary bg-opacity-25 border border-primary me-1'>
-                seqcol spec {loaderData['version']['seqcol_spec_version']}
-              </span>
-            </div>
-            <div className='d-flex flex-row mt-1 align-items-center'>
-              <div className='p-1 bg-success border border-success rounded-circle me-1'></div>
-              Connected
-            </div>
+            {version ? (
+              <>
+                <div>
+                  <span className='badge rounded-pill bg-primary text-primary bg-opacity-25 border border-primary me-1'>
+                    refget {version.refget_version}
+                  </span>
+                  <span className='badge rounded-pill bg-primary text-primary bg-opacity-25 border border-primary me-1'>
+                    gtars {version.gtars_version}
+                  </span>
+                  <span className='badge rounded-pill bg-primary text-primary bg-opacity-25 border border-primary me-1'>
+                    python {version.python_version}
+                  </span>
+                  <span className='badge rounded-pill bg-primary text-primary bg-opacity-25 border border-primary me-1'>
+                    seqcol spec {version.seqcol_spec_version}
+                  </span>
+                </div>
+                <div className='d-flex flex-row mt-1 align-items-center'>
+                  <div className='p-1 bg-success border border-success rounded-circle me-1'></div>
+                  Connected
+                </div>
+              </>
+            ) : (
+              <div className='d-flex flex-row mt-1 align-items-center'>
+                <div className='p-1 bg-warning border border-warning rounded-circle me-1'></div>
+                <span className='text-muted'>API unavailable</span>
+              </div>
+            )}
           </div>
           <div className='ms-auto'>
             <a href='https://databio.org/'>
@@ -332,50 +379,48 @@ const router = createBrowserRouter([
     loader: fetchServiceInfo,
     errorElement: <ErrorBoundary />,
     children: [
+      // Landing page
       {
         path: '/',
-        element: <HomePage />,
+        element: <LandingPage />,
         errorElement: <ErrorBoundary />,
-        loader: fetchSeqColList,
       },
+
+      // Unified Explorer
       {
-        path: '/demo',
-        element: <DemoPage />,
+        path: '/collections',
+        element: <Explorer />,
         errorElement: <ErrorBoundary />,
       },
       {
-        path: '/fasta',
-        element: <DigestPage />,
+        path: '/collection/:digest',
+        element: <ExplorerCollection />,
         errorElement: <ErrorBoundary />,
       },
       {
-        path: '/compliance',
-        element: <CompliancePage />,
+        path: '/sequences',
+        element: <ExplorerSequences />,
         errorElement: <ErrorBoundary />,
       },
       {
-        path: '/human',
-        element: <HumanReferencesView />,
+        path: '/aliases',
+        element: <ExplorerAliases />,
         errorElement: <ErrorBoundary />,
       },
+
+      // Shared tools (standalone)
       {
-        path: '/hprc',
-        element: <HPRCGenomes />,
+        path: '/fasta',
+        element: <DigestPage />,
         errorElement: <ErrorBoundary />,
       },
       {
-        path: '/scim',
+        path: '/compare',
         element: <SCIM />,
         errorElement: <ErrorBoundary />,
       },
       {
-        path: '/scom',
-        element: <SCOM />,
-        errorElement: <ErrorBoundary />,
-        loader: fetchAllSeqCols,
-      },
-      {
-        path: '/scim/:digest1/:digest2',
+        path: '/compare/:digest1/:digest2',
         element: <SCIM />,
         errorElement: <ErrorBoundary />,
         loader: (request) => {
@@ -386,13 +431,35 @@ const router = createBrowserRouter([
         },
       },
       {
-        path: '/collection/:digest',
-        element: <CollectionView />,
+        path: '/compliance',
+        element: <CompliancePage />,
         errorElement: <ErrorBoundary />,
-        loader: (request) => {
-          return fetchCollectionLevels(request.params.digest);
-        },
+      },
+
+      // Site-specific curated pages
+      {
+        path: '/scom',
+        element: <SCOM />,
+        errorElement: <ErrorBoundary />,
+        loader: () => fetchAllSeqCols(),
       },
+      {
+        path: '/human',
+        element: <HumanReferencesView />,
+        errorElement: <ErrorBoundary />,
+      },
+      {
+        path: '/hprc',
+        element: <HPRCGenomes />,
+        errorElement: <ErrorBoundary />,
+      },
+      {
+        path: '/demo',
+        element: <DemoPage />,
+        errorElement: <ErrorBoundary />,
+      },
+
+      // Attribute view (linked from explorer)
       {
         path: '/attribute/:attribute/:digest',
         element: <AttributeView />,
@@ -410,31 +477,55 @@ const router = createBrowserRouter([
         errorElement: <ErrorBoundary />,
         loader: (request) => fetchPangenomeLevels(request.params.digest),
       },
+
+      // Store Explorer (generic tool)
       {
-        path: '/explore',
+        path: '/explore-store',
         element: <StoreExplorer />,
         errorElement: <ErrorBoundary />,
       },
       {
-        path: '/explore/store',
+        path: '/explore-store/overview',
         element: <StoreOverview />,
         errorElement: <ErrorBoundary />,
       },
       {
-        path: '/explore/store/sequences',
+        path: '/explore-store/sequences',
         element: <StoreSequences />,
         errorElement: <ErrorBoundary />,
       },
       {
-        path: '/explore/store/collection/:digest',
+        path: '/explore-store/collection/:digest',
         element: <StoreCollection />,
         errorElement: <ErrorBoundary />,
       },
       {
-        path: '/explore/store/aliases',
+        path: '/explore-store/aliases',
         element: <StoreAliases />,
         errorElement: <ErrorBoundary />,
       },
+
+      // API Explorer (generic tool)
+      {
+        path: '/explore-api',
+        element: <APIExplorer />,
+        errorElement: <ErrorBoundary />,
+      },
+      {
+        path: '/explore-api/collections',
+        element: <APICollections />,
+        errorElement: <ErrorBoundary />,
+      },
+      {
+        path: '/explore-api/collection/:digest',
+        element: <APICollectionView />,
+        errorElement: <ErrorBoundary />,
+      },
+      {
+        path: '/explore-api/compare',
+        element: <APICompare />,
+        errorElement: <ErrorBoundary />,
+      },
     ],
   },
 ]);
diff --git a/frontend/src/pages/APICollectionView.jsx b/frontend/src/pages/APICollectionView.jsx
new file mode 100644
index 0000000..511589d
--- /dev/null
+++ b/frontend/src/pages/APICollectionView.jsx
@@ -0,0 +1,147 @@
+import { useState, useEffect } from 'react';
+import { Link, useParams, useSearchParams } from 'react-router-dom';
+import { useApiExplorerStore } from '../stores/apiExplorerStore.js';
+import { APINav } from '../components/APINav.jsx';
+import { fetchCollectionLevels } from '../services/fetchData.jsx';
+import {
+  AttributeValue,
+  LinkedAttributeDigest,
+} from '../components/ValuesAndDigests.jsx';
+
+const APICollectionView = () => {
+  const { digest } = useParams();
+  const [searchParams] = useSearchParams();
+  const { apiUrl, probeApi } = useApiExplorerStore();
+  const [collection, setCollection] = useState(null);
+  const [loading, setLoading] = useState(true);
+  const [error, setError] = useState(null);
+
+  const urlParam = searchParams.get('url');
+  const effectiveUrl = apiUrl || urlParam;
+
+  useEffect(() => {
+    const init = async () => {
+      try {
+        if (urlParam && !apiUrl) {
+          await probeApi(urlParam);
+        }
+        const result = await fetchCollectionLevels(digest, effectiveUrl);
+        setCollection(result);
+      } catch (err) {
+        setError(err.message);
+      } finally {
+        setLoading(false);
+      }
+    };
+    init();
+  }, [digest, urlParam]); // eslint-disable-line react-hooks/exhaustive-deps
+
+  if (loading) {
+    return (
+      <div>
+        <APINav active="collections" />
+        <div className="text-center py-5">
+          <div className="spinner-border" />
+        </div>
+      </div>
+    );
+  }
+
+  if (error) {
+    return (
+      <div>
+        <APINav active="collections" />
+        <div className="alert alert-danger">{error}</div>
+      </div>
+    );
+  }
+
+  if (!Array.isArray(collection) || collection.length < 3) {
+    return (
+      <div>
+        <APINav active="collections" />
+        <div className="alert alert-warning">Failed to load collection data.</div>
+      </div>
+    );
+  }
+
+  const level1 = collection[0];
+  const level2 = collection[1];
+  const uncollated = collection[2];
+  const base = effectiveUrl?.replace(/\/+$/, '') || '';
+
+  const attributeViews = Object.keys(level2).map((attribute) => (
+    <div key={attribute}>
+      <h6 className="mb-2 mt-4 fw-medium">{attribute}</h6>
+      <div className="row align-items-center home">
+        <div className="col-md-1 text-muted">Digest:</div>
+        <div className="col">
+          <LinkedAttributeDigest attribute={attribute} digest={level1[attribute]} />
+        </div>
+      </div>
+      <div className="row align-items-center">
+        <div className="col-md-1 text-muted">Value:</div>
+        <div className="col">
+          <AttributeValue value={level2[attribute]} />
+        </div>
+      </div>
+    </div>
+  ));
+
+  return (
+    <div>
+      <APINav active="collections" />
+
+      <h4 className="fw-light">Sequence Collection: {digest}</h4>
+
+      <h5 className="mt-4 pt-2">Attribute View</h5>
+      {attributeViews}
+
+      <h5 className="mt-4 pt-2">Raw View</h5>
+      <div className="row g-3">
+        {[
+          { id: 'collapseLevel1Api', label: 'Level 1', data: level1, query: '?level=1', defaultOpen: true },
+          { id: 'collapseLevel2Api', label: 'Level 2', data: level2, query: '?level=2', defaultOpen: false },
+          { id: 'collapseUncollatedApi', label: 'Uncollated', data: uncollated, query: '?collated=false', defaultOpen: false },
+        ].map(({ id, label, data, query, defaultOpen }) => (
+          <div className="col-12" key={id}>
+            <div className="card">
+              <div className="card-header d-flex justify-content-between align-items-center position-relative">
+                <button
+                  className="btn btn-link text-decoration-none p-0 flex-grow-1 text-start text-black stretched-link"
+                  type="button"
+                  data-bs-toggle="collapse"
+                  data-bs-target={`#${id}`}
+                  aria-expanded={defaultOpen}
+                >
+                  <h6 className="mb-0">{label}: /collection/{digest}{query}</h6>
+                </button>
+                {base && (
+                  <a
+                    className="btn btn-secondary btn-sm"
+                    href={`${base}/collection/${digest}${query}`}
+                    target="_blank"
+                    rel="noopener noreferrer"
+                    style={{ zIndex: 999 }}
+                  >
+                    <i className="bi bi-box-arrow-up-right me-2" />
+                    API
+                  </a>
+                )}
+              </div>
+              <div id={id} className={`collapse ${defaultOpen ? 'show' : ''}`}>
+                <div className="card-body">
+                  <pre className="card card-body bg-light mb-0">
+                    {JSON.stringify(data, null, 2)}
+                  </pre>
+                </div>
+              </div>
+            </div>
+          </div>
+        ))}
+      </div>
+    </div>
+  );
+};
+
+export { APICollectionView };
diff --git a/frontend/src/pages/APICollections.jsx b/frontend/src/pages/APICollections.jsx
new file mode 100644
index 0000000..c8eb919
--- /dev/null
+++ b/frontend/src/pages/APICollections.jsx
@@ -0,0 +1,99 @@
+import { useState, useEffect } from 'react';
+import { Link, useSearchParams } from 'react-router-dom';
+import { useApiExplorerStore } from '../stores/apiExplorerStore.js';
+import { APINav } from '../components/APINav.jsx';
+import { fetchSeqColList } from '../services/fetchData.jsx';
+
+const APICollections = () => {
+  const [searchParams] = useSearchParams();
+  const { apiUrl, probeApi, loading: probing } = useApiExplorerStore();
+  const [data, setData] = useState(null);
+  const [loading, setLoading] = useState(true);
+  const [error, setError] = useState(null);
+
+  const urlParam = searchParams.get('url');
+  const effectiveUrl = apiUrl || urlParam;
+
+  useEffect(() => {
+    const init = async () => {
+      try {
+        if (urlParam && !apiUrl) {
+          await probeApi(urlParam);
+        }
+        const result = await fetchSeqColList(effectiveUrl);
+        setData(result);
+      } catch (err) {
+        setError(err.message);
+      } finally {
+        setLoading(false);
+      }
+    };
+    init();
+  }, [urlParam]); // eslint-disable-line react-hooks/exhaustive-deps
+
+  if (loading || probing) {
+    return (
+      <div>
+        <APINav active="collections" />
+        <div className="text-center py-5">
+          <div className="spinner-border" />
+          <p className="mt-3 text-muted">Loading collections...</p>
+        </div>
+      </div>
+    );
+  }
+
+  if (error) {
+    return (
+      <div>
+        <APINav active="collections" />
+        <div className="alert alert-danger">{error}</div>
+      </div>
+    );
+  }
+
+  if (!data || !Array.isArray(data) || data.length < 1) {
+    return (
+      <div>
+        <APINav active="collections" />
+        <div className="alert alert-warning">No data available.</div>
+      </div>
+    );
+  }
+
+  const collections = data[0];
+  const urlSuffix = effectiveUrl ? `?url=${encodeURIComponent(effectiveUrl)}` : '';
+
+  return (
+    <div className="mb-5">
+      <APINav active="collections" />
+
+      <div className="d-flex justify-content-end mb-3">
+        <div className="card">
+          <div className="card-body py-2 px-3 tiny">
+            <b>{collections?.pagination?.total ?? 0}</b> collections
+          </div>
+        </div>
+      </div>
+
+      {collections?.results?.length > 0 ? (
+        <ul>
+          {collections.results.map((digest) => (
+            <li key={digest}>
+              <Link
+                to={`/explore-api/collection/${digest}${urlSuffix}`}
+                className="font-monospace"
+              >
+                {digest}
+              </Link>
+            </li>
+          ))}
+        </ul>
+      ) : (
+        <p className="text-muted">No collections found.</p>
+      )}
+    </div>
+  );
+};
+
+export { APICollections };
diff --git a/frontend/src/pages/APICompare.jsx b/frontend/src/pages/APICompare.jsx
new file mode 100644
index 0000000..a485f19
--- /dev/null
+++ b/frontend/src/pages/APICompare.jsx
@@ -0,0 +1,26 @@
+import { useEffect } from 'react';
+import { useSearchParams } from 'react-router-dom';
+import { useApiExplorerStore } from '../stores/apiExplorerStore.js';
+import { APINav } from '../components/APINav.jsx';
+import { SCIM } from './SCIM.jsx';
+
+const APICompare = () => {
+  const [searchParams] = useSearchParams();
+  const { apiUrl, probeApi } = useApiExplorerStore();
+  const urlParam = searchParams.get('url');
+
+  useEffect(() => {
+    if (urlParam && !apiUrl) {
+      probeApi(urlParam).catch(() => {});
+    }
+  }, [urlParam]); // eslint-disable-line react-hooks/exhaustive-deps
+
+  return (
+    <div>
+      <APINav active="compare" />
+      <SCIM />
+    </div>
+  );
+};
+
+export { APICompare };
diff --git a/frontend/src/pages/APICompliance.jsx b/frontend/src/pages/APICompliance.jsx
new file mode 100644
index 0000000..b221c99
--- /dev/null
+++ b/frontend/src/pages/APICompliance.jsx
@@ -0,0 +1,26 @@
+import { useEffect } from 'react';
+import { useSearchParams } from 'react-router-dom';
+import { useApiExplorerStore } from '../stores/apiExplorerStore.js';
+import { APINav } from '../components/APINav.jsx';
+import { CompliancePage } from './CompliancePage.jsx';
+
+const APICompliance = () => {
+  const [searchParams] = useSearchParams();
+  const { apiUrl, probeApi } = useApiExplorerStore();
+  const urlParam = searchParams.get('url');
+
+  useEffect(() => {
+    if (urlParam && !apiUrl) {
+      probeApi(urlParam).catch(() => {});
+    }
+  }, [urlParam]); // eslint-disable-line react-hooks/exhaustive-deps
+
+  return (
+    <div>
+      <APINav active="compliance" />
+      <CompliancePage />
+    </div>
+  );
+};
+
+export { APICompliance };
diff --git a/frontend/src/pages/APIExplorer.jsx b/frontend/src/pages/APIExplorer.jsx
new file mode 100644
index 0000000..ec0aada
--- /dev/null
+++ b/frontend/src/pages/APIExplorer.jsx
@@ -0,0 +1,115 @@
+import { useState } from 'react';
+import { useNavigate } from 'react-router-dom';
+import { useApiExplorerStore } from '../stores/apiExplorerStore.js';
+
+const RECENT_APIS_KEY = 'refget-explorer-recent-apis';
+const MAX_RECENT = 5;
+
+const getRecentApis = () => {
+  try {
+    return JSON.parse(localStorage.getItem(RECENT_APIS_KEY)) || [];
+  } catch {
+    return [];
+  }
+};
+
+const APIExplorer = () => {
+  const navigate = useNavigate();
+  const { probeApi, loading, error } = useApiExplorerStore();
+  const [url, setUrl] = useState('');
+  const [localError, setLocalError] = useState(null);
+  const recentApis = getRecentApis();
+
+  const handleExplore = async (targetUrl) => {
+    const trimmed = (targetUrl || url).trim();
+    if (!trimmed) return;
+    setLocalError(null);
+    try {
+      await probeApi(trimmed);
+      navigate(`/explore-api/collections?url=${encodeURIComponent(trimmed)}`);
+    } catch (err) {
+      setLocalError(err.message);
+    }
+  };
+
+  const handleSubmit = (e) => {
+    e.preventDefault();
+    handleExplore();
+  };
+
+  return (
+    <div className="mb-5">
+      <h3 className="fw-light mb-3">
+        <i className="bi bi-cloud me-2" />
+        API Explorer
+      </h3>
+      <p className="text-muted">
+        Browse any SeqCol API server. Enter the base URL and explore its collections,
+        run comparisons, and test compliance.
+      </p>
+
+      <form onSubmit={handleSubmit} className="mb-4">
+        <div className="input-group input-group-lg">
+          <input
+            type="url"
+            className="form-control"
+            placeholder="https://seqcolapi.databio.org"
+            value={url}
+            onChange={(e) => setUrl(e.target.value)}
+            required
+          />
+          <button
+            className="btn btn-primary"
+            type="submit"
+            disabled={loading}
+          >
+            {loading ? (
+              <>
+                <span className="spinner-border spinner-border-sm me-2" />
+                Connecting...
+              </>
+            ) : (
+              <>
+                <i className="bi bi-search me-2" />
+                Explore
+              </>
+            )}
+          </button>
+        </div>
+      </form>
+
+      {(localError || error) && (
+        <div className="alert alert-danger">
+          <strong>Failed to connect:</strong> {localError || error}
+          <p className="mt-2 mb-0 text-muted small">
+            Make sure the URL points to a SeqCol API server with a{' '}
+            <code>/service-info</code> endpoint. The server must allow CORS.
+          </p>
+        </div>
+      )}
+
+      {recentApis.length > 0 && (
+        <div className="mt-4">
+          <h6 className="text-muted">Recent APIs</h6>
+          <div className="list-group">
+            {recentApis.map((recentUrl) => (
+              <button
+                key={recentUrl}
+                className="list-group-item list-group-item-action font-monospace small"
+                onClick={() => {
+                  setUrl(recentUrl);
+                  handleExplore(recentUrl);
+                }}
+              >
+                <i className="bi bi-clock-history me-2" />
+                {recentUrl}
+              </button>
+            ))}
+          </div>
+        </div>
+      )}
+    </div>
+  );
+};
+
+export { APIExplorer };
diff --git a/frontend/src/pages/Explorer.jsx b/frontend/src/pages/Explorer.jsx
new file mode 100644
index 0000000..f483795
--- /dev/null
+++ b/frontend/src/pages/Explorer.jsx
@@ -0,0 +1,228 @@
+import { useState, useEffect, useMemo } from 'react';
+import { Link } from 'react-router-dom';
+import { useUnifiedStore } from '../stores/unifiedStore.js';
+import { useExplorerStore } from '../stores/explorerStore.js';
+import { ExplorerNav } from '../components/ExplorerNav.jsx';
+import { fetchSeqColList } from '../services/fetchData.jsx';
+
+const Explorer = () => {
+  const { hasStore, hasAPI, storeUrl, apiUrl, storeCollections, probe, probed, loading: probing } =
+    useUnifiedStore();
+  const { loadStore, metadata, loadAliases } = useExplorerStore();
+  const [apiCollections, setApiCollections] = useState(null);
+  const [aliasMap, setAliasMap] = useState({});
+  const [filter, setFilter] = useState('');
+  const [sortCol, setSortCol] = useState(null);
+  const [sortAsc, setSortAsc] = useState(true);
+  const [loading, setLoading] = useState(true);
+
+  useEffect(() => {
+    const init = async () => {
+      await probe();
+    };
+    init();
+  }, []); // eslint-disable-line react-hooks/exhaustive-deps
+
+  useEffect(() => {
+    if (!probed) return;
+
+    const load = async () => {
+      // Load store data if available
+      if (hasStore && storeUrl) {
+        try {
+          await loadStore(storeUrl);
+        } catch {}
+        // Try to load collection aliases
+        try {
+          const storeData = useExplorerStore.getState();
+          const namespaces = storeData.metadata?.collection_alias_namespaces || [];
+          const map = {};
+          for (const ns of namespaces) {
+            const aliases = await loadAliases('collections', ns).catch(() => null);
+            if (aliases) {
+              aliases.forEach((a) => {
+                if (!map[a.digest]) map[a.digest] = [];
+                map[a.digest].push(a.alias);
+              });
+            }
+          }
+          setAliasMap(map);
+        } catch {}
+      }
+
+      // Load API collection list if available
+      if (hasAPI) {
+        try {
+          const result = await fetchSeqColList(apiUrl);
+          setApiCollections(result[0]);
+        } catch {}
+      }
+
+      setLoading(false);
+    };
+    load();
+  }, [probed, hasStore, hasAPI]); // eslint-disable-line react-hooks/exhaustive-deps
+
+  // Merge store collections with API collection list
+  // NOTE: useMemo hooks must be called before any early returns to avoid
+  // "Rendered more hooks than during the previous render" errors
+  const collections = useMemo(() => {
+    const byDigest = new Map();
+
+    // Store collections have richer data (n_sequences, attribute digests)
+    if (storeCollections) {
+      storeCollections.forEach((col) => {
+        byDigest.set(col.digest, {
+          digest: col.digest,
+          n_sequences: col.n_sequences,
+          names: aliasMap[col.digest] || [],
+          source: 'store',
+        });
+      });
+    }
+
+    // API collections add any that store doesn't have
+    if (apiCollections?.results) {
+      apiCollections.results.forEach((digest) => {
+        if (!byDigest.has(digest)) {
+          byDigest.set(digest, {
+            digest,
+            n_sequences: null,
+            names: aliasMap[digest] || [],
+            source: 'api',
+          });
+        }
+      });
+    }
+
+    return Array.from(byDigest.values());
+  }, [storeCollections, apiCollections, aliasMap]);
+
+  const filtered = useMemo(() => {
+    if (!filter) return collections;
+    const term = filter.toLowerCase();
+    return collections.filter(
+      (c) =>
+        c.digest.toLowerCase().includes(term) ||
+        c.names.some((n) => n.toLowerCase().includes(term)),
+    );
+  }, [collections, filter]);
+
+  const sorted = useMemo(() => {
+    if (!sortCol) return filtered;
+    return [...filtered].sort((a, b) => {
+      let va, vb;
+      if (sortCol === 'name') {
+        va = (a.names[0] || '').toLowerCase();
+        vb = (b.names[0] || '').toLowerCase();
+      } else if (sortCol === 'n_sequences') {
+        va = a.n_sequences ?? -1;
+        vb = b.n_sequences ?? -1;
+        return sortAsc ? va - vb : vb - va;
+      } else {
+        va = (a[sortCol] || '').toLowerCase();
+        vb = (b[sortCol] || '').toLowerCase();
+      }
+      return sortAsc ? va.localeCompare(vb) : vb.localeCompare(va);
+    });
+  }, [filtered, sortCol, sortAsc]);
+
+  const handleSort = (col) => {
+    if (sortCol === col) setSortAsc(!sortAsc);
+    else { setSortCol(col); setSortAsc(true); }
+  };
+
+  if (probing || loading) {
+    return (
+      <div className="text-center py-5">
+        <div className="spinner-border" />
+        <p className="mt-3 text-muted">Loading collections...</p>
+      </div>
+    );
+  }
+
+  const totalFromApi = apiCollections?.pagination?.total;
+
+  return (
+    <div className="mb-5">
+      <ExplorerNav active="collections" />
+
+      <div className="d-flex justify-content-between align-items-center mb-3">
+        <span className="text-muted">
+          {filtered.length} collection{filtered.length !== 1 ? 's' : ''}
+          {totalFromApi != null && ` (${totalFromApi} total on server)`}
+          {filter && ` matching "${filter}"`}
+        </span>
+        <input
+          type="search"
+          className="form-control form-control-sm"
+          style={{ maxWidth: '300px' }}
+          placeholder="Filter by name or digest..."
+          value={filter}
+          onChange={(e) => setFilter(e.target.value)}
+        />
+      </div>
+
+      {!hasStore && !hasAPI && (
+        <div className="alert alert-warning">
+          Neither a RefgetStore nor an API was detected at this server.
+          Try the <Link to="/explore-store">Store Explorer</Link> or{' '}
+          <Link to="/explore-api">API Explorer</Link> to connect to a specific URL.
+        </div>
+      )}
+
+      {sorted.length > 0 ? (
+        <div className="table-responsive">
+          <table className="table table-sm table-hover">
+            <thead>
+              <tr>
+                <th style={{ cursor: 'pointer' }} onClick={() => handleSort('name')}>
+                  Name
+                  {sortCol === 'name' && <i className={`bi bi-caret-${sortAsc ? 'up' : 'down'}-fill ms-1`} />}
+                </th>
+                <th style={{ cursor: 'pointer' }} onClick={() => handleSort('digest')}>
+                  Digest
+                  {sortCol === 'digest' && <i className={`bi bi-caret-${sortAsc ? 'up' : 'down'}-fill ms-1`} />}
+                </th>
+                {hasStore && (
+                  <th className="text-end" style={{ cursor: 'pointer' }} onClick={() => handleSort('n_sequences')}>
+                    Sequences
+                    {sortCol === 'n_sequences' && <i className={`bi bi-caret-${sortAsc ? 'up' : 'down'}-fill ms-1`} />}
+                  </th>
+                )}
+              </tr>
+            </thead>
+            <tbody>
+              {sorted.map((col) => (
+                <tr key={col.digest}>
+                  <td>
+                    {col.names.length > 0
+                      ? [...new Set(col.names)].join(', ')
+                      : <span className="text-muted">-</span>}
+                  </td>
+                  <td>
+                    <Link
+                      to={`/collection/${col.digest}`}
+                      className="font-monospace small"
+                    >
+                      {col.digest}
+                    </Link>
+                  </td>
+                  {hasStore && (
+                    <td className="text-end">
+                      {col.n_sequences != null ? col.n_sequences : '-'}
+                    </td>
+                  )}
+                </tr>
+              ))}
+            </tbody>
+          </table>
+        </div>
+      ) : (
+        <p className="text-muted">No collections found.</p>
+      )}
+    </div>
+  );
+};
+
+export { Explorer };
diff --git a/frontend/src/pages/ExplorerAliases.jsx b/frontend/src/pages/ExplorerAliases.jsx
new file mode 100644
index 0000000..cb51543
--- /dev/null
+++ b/frontend/src/pages/ExplorerAliases.jsx
@@ -0,0 +1,172 @@
+import { useEffect } from 'react';
+import { Link } from 'react-router-dom';
+import { useUnifiedStore } from '../stores/unifiedStore.js';
+import { useExplorerStore } from '../stores/explorerStore.js';
+import { ExplorerNav } from '../components/ExplorerNav.jsx';
+import { useState } from 'react';
+
+const AliasPanel = ({ type, availableNamespaces }) => {
+  const { loadAliases } = useExplorerStore();
+  const [namespace, setNamespace] = useState('');
+  const [aliases, setAliases] = useState(null);
+  const [loading, setLoading] = useState(false);
+  const [error, setError] = useState(null);
+  const [filter, setFilter] = useState('');
+
+  const handleNamespaceClick = (ns) => {
+    setNamespace(ns);
+    setFilter('');
+    setError(null);
+    setLoading(true);
+    loadAliases(type, ns)
+      .then((data) => {
+        if (!data) {
+          setError(`Namespace "${ns}" not found.`);
+          setAliases(null);
+        } else {
+          setAliases(data);
+        }
+      })
+      .catch((err) => setError(err.message))
+      .finally(() => setLoading(false));
+  };
+
+  const filtered = aliases
+    ? aliases.filter(
+        (a) =>
+          !filter ||
+          a.alias.toLowerCase().includes(filter.toLowerCase()) ||
+          a.digest.toLowerCase().includes(filter.toLowerCase()),
+      )
+    : null;
+
+  const linkPrefix = type === 'collections' ? '/collection/' : null;
+
+  return (
+    <div className="card mb-3">
+      <div className="card-header">
+        <h6 className="mb-0 text-capitalize">
+          <i className={`bi ${type === 'sequences' ? 'bi-list-ol' : 'bi-collection'} me-2`} />
+          {type} aliases
+        </h6>
+      </div>
+      <div className="card-body">
+        {availableNamespaces && availableNamespaces.length > 0 ? (
+          <div className="mb-3">
+            <span className="text-muted small me-2">Namespaces:</span>
+            {availableNamespaces.map((ns) => (
+              <button
+                key={ns}
+                className={`btn btn-sm me-1 mb-1 ${namespace === ns ? 'btn-primary' : 'btn-outline-primary'}`}
+                onClick={() => handleNamespaceClick(ns)}
+                disabled={loading}
+              >
+                {ns}
+              </button>
+            ))}
+            {loading && <span className="spinner-border spinner-border-sm ms-2" />}
+          </div>
+        ) : (
+          <p className="text-muted small mb-0">
+            No {type} alias namespaces found.
+          </p>
+        )}
+
+        {error && <div className="alert alert-warning small py-2">{error}</div>}
+
+        {filtered && (
+          <>
+            <div className="d-flex justify-content-between align-items-center mb-2">
+              <span className="text-muted small">{filtered.length} aliases in "{namespace}"</span>
+              <input
+                type="search"
+                className="form-control form-control-sm"
+                style={{ maxWidth: '250px' }}
+                placeholder="Filter..."
+                value={filter}
+                onChange={(e) => setFilter(e.target.value)}
+              />
+            </div>
+            <div className="table-responsive" style={{ maxHeight: '400px' }}>
+              <table className="table table-sm table-hover mb-0">
+                <thead className="sticky-top bg-white">
+                  <tr>
+                    <th>Alias</th>
+                    <th>Digest</th>
+                  </tr>
+                </thead>
+                <tbody>
+                  {filtered.map((a, i) => (
+                    <tr key={`${a.alias}-${i}`}>
+                      <td>{a.alias}</td>
+                      <td className="font-monospace small">
+                        {linkPrefix ? (
+                          <Link to={`${linkPrefix}${a.digest}`}>{a.digest}</Link>
+                        ) : (
+                          a.digest
+                        )}
+                      </td>
+                    </tr>
+                  ))}
+                </tbody>
+              </table>
+            </div>
+          </>
+        )}
+      </div>
+    </div>
+  );
+};
+
+const ExplorerAliases = () => {
+  const { hasStore, storeUrl, probe, probed } = useUnifiedStore();
+  const { metadata, loading, loadStore } = useExplorerStore();
+
+  useEffect(() => {
+    const init = async () => {
+      if (!probed) await probe();
+    };
+    init();
+  }, []); // eslint-disable-line react-hooks/exhaustive-deps
+
+  useEffect(() => {
+    if (probed && hasStore && storeUrl && !metadata && !loading) {
+      loadStore(storeUrl).catch(() => {});
+    }
+  }, [probed, hasStore, storeUrl]); // eslint-disable-line react-hooks/exhaustive-deps
+
+  if (!probed || loading) {
+    return (
+      <div>
+        <ExplorerNav active="aliases" />
+        <div className="text-center py-5">
+          <div className="spinner-border" />
+        </div>
+      </div>
+    );
+  }
+
+  if (!hasStore) {
+    return (
+      <div>
+        <ExplorerNav active="aliases" />
+        <div className="alert alert-info">
+          Alias browsing requires a RefgetStore. No store was detected.
+        </div>
+      </div>
+    );
+  }
+
+  return (
+    <div className="mb-5">
+      <ExplorerNav active="aliases" />
+      <p className="text-muted">
+        Aliases map human-readable names to digests. Select a namespace to browse.
+      </p>
+      <AliasPanel type="sequences" availableNamespaces={metadata?.sequence_alias_namespaces} />
+      <AliasPanel type="collections" availableNamespaces={metadata?.collection_alias_namespaces} />
+    </div>
+  );
+};
+
+export { ExplorerAliases };
diff --git a/frontend/src/pages/ExplorerCollection.jsx b/frontend/src/pages/ExplorerCollection.jsx
new file mode 100644
index 0000000..f46c1d5
--- /dev/null
+++ b/frontend/src/pages/ExplorerCollection.jsx
@@ -0,0 +1,334 @@
+import { useState, useEffect } from 'react';
+import { Link, useParams } from 'react-router-dom';
+import { useUnifiedStore } from '../stores/unifiedStore.js';
+import { useExplorerStore } from '../stores/explorerStore.js';
+import { ExplorerNav } from '../components/ExplorerNav.jsx';
+import { CliCommand } from '../components/CliSnippet.jsx';
+import { SequenceTable } from '../components/SequenceTable.jsx';
+import { fetchCollectionLevels, fetchAttribute } from '../services/fetchData.jsx';
+import { CopyableDigest } from '../components/CopyableDigest.jsx';
+import {
+  AttributeValue,
+  LinkedAttributeDigest,
+} from '../components/ValuesAndDigests.jsx';
+
+const ExplorerCollection = () => {
+  const { digest } = useParams();
+  const { hasStore, hasAPI, storeUrl, apiUrl } = useUnifiedStore();
+  const { loadCollection, loadFhrMetadata, loadStore, metadata } = useExplorerStore();
+
+  const [storeData, setStoreData] = useState(null);
+  const [apiData, setApiData] = useState(null);
+  const [fhr, setFhr] = useState(undefined);
+  const [relatedCollections, setRelatedCollections] = useState(null);
+  const [loading, setLoading] = useState(true);
+  const [error, setError] = useState(null);
+  const [showRaw, setShowRaw] = useState(false);
+  const [codeTab, setCodeTab] = useState('cli');
+
+  useEffect(() => {
+    const load = async () => {
+      setLoading(true);
+      setError(null);
+      try {
+        // Load store data
+        if (hasStore && storeUrl) {
+          if (!metadata) {
+            await loadStore(storeUrl).catch(() => {});
+          }
+          const col = await loadCollection(digest).catch(() => null);
+          setStoreData(col);
+          const fhrData = await loadFhrMetadata(digest).catch(() => null);
+          setFhr(fhrData);
+        }
+
+        // Load API data
+        if (hasAPI && apiUrl) {
+          const levels = await fetchCollectionLevels(digest, apiUrl).catch(() => null);
+          setApiData(levels);
+
+          // Fetch related collections via sorted_name_length_pairs
+          if (levels && levels[0]?.sorted_name_length_pairs) {
+            const snlp = levels[0].sorted_name_length_pairs;
+            try {
+              const related = await fetchAttribute('sorted_name_length_pairs', snlp, apiUrl);
+              setRelatedCollections(related[0]?.results?.filter((d) => d !== digest) || []);
+            } catch {}
+          }
+        }
+      } catch (err) {
+        setError(err.message);
+      } finally {
+        setLoading(false);
+      }
+    };
+    load();
+  }, [digest]); // eslint-disable-line react-hooks/exhaustive-deps
+
+  if (loading) {
+    return (
+      <div>
+        <ExplorerNav active="collections" />
+        <div className="text-center py-5">
+          <div className="spinner-border" />
+          <p className="mt-3 text-muted">Loading collection...</p>
+        </div>
+      </div>
+    );
+  }
+
+  if (error) {
+    return (
+      <div>
+        <ExplorerNav active="collections" />
+        <div className="alert alert-danger">{error}</div>
+      </div>
+    );
+  }
+
+  if (!storeData && !apiData) {
+    return (
+      <div>
+        <ExplorerNav active="collections" />
+        <div className="alert alert-warning">
+          Collection <code>{digest}</code> not found.
+        </div>
+      </div>
+    );
+  }
+
+  const sequences = storeData?.sequences || [];
+  const totalBases = sequences.reduce((sum, s) => sum + s.length, 0);
+  const alphabetCounts = {};
+  sequences.forEach((s) => {
+    alphabetCounts[s.alphabet] = (alphabetCounts[s.alphabet] || 0) + 1;
+  });
+
+  const level1 = apiData?.[0];
+  const level2 = apiData?.[1];
+  const uncollated = apiData?.[2];
+
+  return (
+    <div className="mb-5">
+      <ExplorerNav active="" />
+
+      <nav aria-label="breadcrumb" className="mb-3">
+        <ol className="breadcrumb">
+          <li className="breadcrumb-item"><Link to="/collections">Collections</Link></li>
+          <li className="breadcrumb-item active font-monospace small" aria-current="page">{digest}</li>
+        </ol>
+      </nav>
+
+      {/* Summary stats (from store) */}
+      {storeData && (
+        <div className="row g-3 mb-4">
+          <div className="col-auto">
+            <div className="card">
+              <div className="card-body py-2 px-3">
+                <small className="text-muted d-block">Sequences</small>
+                <strong>{sequences.length.toLocaleString()}</strong>
+              </div>
+            </div>
+          </div>
+          <div className="col-auto">
+            <div className="card">
+              <div className="card-body py-2 px-3">
+                <small className="text-muted d-block">Total bases</small>
+                <strong>{totalBases.toLocaleString()}</strong>
+              </div>
+            </div>
+          </div>
+          {Object.keys(alphabetCounts).length > 0 && (
+            <div className="col-auto">
+              <div className="card">
+                <div className="card-body py-2 px-3">
+                  <small className="text-muted d-block">Alphabets</small>
+                  {Object.entries(alphabetCounts).map(([alph, count]) => (
+                    <span key={alph} className="badge bg-secondary me-1">
+                      {alph}: {count}
+                    </span>
+                  ))}
+                </div>
+              </div>
+            </div>
+          )}
+        </div>
+      )}
+
+      {/* Related collections (from API) */}
+      {relatedCollections && relatedCollections.length > 0 && (
+        <div className="card mb-4">
+          <div className="card-header">
+            <h6 className="mb-0">
+              <i className="bi bi-diagram-3 me-2" />
+              Related collections (same coordinate system)
+            </h6>
+          </div>
+          <div className="card-body">
+            <p className="text-muted small mb-2">
+              Collections sharing the same <code>sorted_name_length_pairs</code> digest:
+            </p>
+            <ul className="mb-0">
+              {relatedCollections.slice(0, 10).map((d) => (
+                <li key={d}>
+                  <Link to={`/collection/${d}`} className="font-monospace small">
+                    {d}
+                  </Link>
+                </li>
+              ))}
+              {relatedCollections.length > 10 && (
+                <li className="text-muted">
+                  ...and {relatedCollections.length - 10} more
+                </li>
+              )}
+            </ul>
+          </div>
+        </div>
+      )}
+
+      {/* Compare button */}
+      {hasAPI && (
+        <div className="mb-4">
+          <Link to={`/compare`} className="btn btn-outline-primary btn-sm">
+            <i className="bi bi-arrows-angle-contract me-1" />
+            Compare this collection
+          </Link>
+        </div>
+      )}
+
+      {/* FHR metadata */}
+      {fhr && (
+        <div className="card mb-3">
+          <div className="card-header">
+            <h6 className="mb-0">
+              <i className="bi bi-file-earmark-text me-2" />
+              FHR Metadata
+            </h6>
+          </div>
+          <div className="card-body">
+            <pre className="bg-light p-3 rounded mb-0 small">
+              {JSON.stringify(fhr, null, 2)}
+            </pre>
+          </div>
+        </div>
+      )}
+
+      {/* Sequence table (from store) */}
+      {sequences.length > 0 && (
+        <div className="card mb-4">
+          <div className="card-header">
+            <h6 className="mb-0">Sequences ({sequences.length.toLocaleString()})</h6>
+          </div>
+          <div className="card-body p-0">
+            <SequenceTable sequences={sequences} storeUrl={storeUrl} />
+          </div>
+        </div>
+      )}
+
+      {/* Attribute digests (from API) */}
+      {level2 && (
+        <div className="card mb-4">
+          <div className="card-header">
+            <h6 className="mb-0">Attribute Digests</h6>
+          </div>
+          <div className="card-body">
+            {Object.keys(level2).map((attribute) => (
+              <div key={attribute} className="mb-3">
+                <h6 className="mb-1 fw-medium">{attribute}</h6>
+                <div className="row align-items-center">
+                  <div className="col-md-1 text-muted small">Digest:</div>
+                  <div className="col">
+                    <LinkedAttributeDigest
+                      attribute={attribute}
+                      digest={level1[attribute]}
+                    />
+                  </div>
+                </div>
+              </div>
+            ))}
+          </div>
+        </div>
+      )}
+
+      {/* Collapsible Technical Details */}
+      {apiData && (
+        <div className="card mb-4">
+          <div className="card-header">
+            <button
+              className="btn btn-link text-decoration-none p-0 text-black"
+              onClick={() => setShowRaw(!showRaw)}
+            >
+              <i className={`bi bi-chevron-${showRaw ? 'down' : 'right'} me-2`} />
+              <h6 className="mb-0 d-inline">Technical Details</h6>
+            </button>
+          </div>
+          {showRaw && (
+            <div className="card-body">
+              {[
+                { label: 'Level 1', data: level1, query: '?level=1' },
+                { label: 'Level 2', data: level2, query: '?level=2' },
+                { label: 'Uncollated', data: uncollated, query: '?collated=false' },
+              ].map(({ label, data, query }) => (
+                <div key={label} className="mb-3">
+                  <div className="d-flex justify-content-between align-items-center mb-1">
+                    <strong className="small">{label}: /collection/{digest}{query}</strong>
+                    {apiUrl && (
+                      <a
+                        className="btn btn-sm btn-outline-secondary"
+                        href={`${apiUrl}/collection/${digest}${query}`}
+                        target="_blank"
+                        rel="noopener noreferrer"
+                      >
+                        <i className="bi bi-box-arrow-up-right me-1" />
+                        API
+                      </a>
+                    )}
+                  </div>
+                  <pre className="bg-light p-3 rounded small mb-0">
+                    {JSON.stringify(data, null, 2)}
+                  </pre>
+                </div>
+              ))}
+
+              {/* CLI/Python snippets */}
+              {hasStore && storeUrl && (
+                <div className="mt-4">
+                  <h6 className="text-muted mb-2">Code</h6>
+                  <ul className="nav nav-pills nav-pills-sm mb-3">
+                    <li className="nav-item">
+                      <button
+                        className={`nav-link py-1 px-2 ${codeTab === 'cli' ? 'active' : ''}`}
+                        onClick={() => setCodeTab('cli')}
+                      >
+                        CLI
+                      </button>
+                    </li>
+                    <li className="nav-item">
+                      <button
+                        className={`nav-link py-1 px-2 ${codeTab === 'python' ? 'active' : ''}`}
+                        onClick={() => setCodeTab('python')}
+                      >
+                        Python
+                      </button>
+                    </li>
+                  </ul>
+                  <small className="text-muted d-block mb-1">Pull collection</small>
+                  <CliCommand
+                    command={
+                      codeTab === 'cli'
+                        ? `refget store pull \\\n  ${digest} \\\n  --remote ${storeUrl}`
+                        : `import refget\n\nstore = refget.RefgetStore("${storeUrl}")\nstore.pull("${digest}")`
+                    }
+                  />
+                </div>
+              )}
+            </div>
+          )}
+        </div>
+      )}
+
+    </div>
+  );
+};
+
+export { ExplorerCollection };
diff --git a/frontend/src/pages/ExplorerSequences.jsx b/frontend/src/pages/ExplorerSequences.jsx
new file mode 100644
index 0000000..2d4159c
--- /dev/null
+++ b/frontend/src/pages/ExplorerSequences.jsx
@@ -0,0 +1,152 @@
+import { useState, useMemo, useEffect } from 'react';
+import { useUnifiedStore } from '../stores/unifiedStore.js';
+import { useExplorerStore } from '../stores/explorerStore.js';
+import { ExplorerNav } from '../components/ExplorerNav.jsx';
+import { SequenceTable } from '../components/SequenceTable.jsx';
+
+const PARTIAL_LOAD_SIZE = 2 * 1024 * 1024;
+
+const formatBytes = (bytes) => {
+  if (bytes < 1024) return `${bytes} B`;
+  if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(0)} KB`;
+  return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
+};
+
+const ExplorerSequences = () => {
+  const { hasStore, storeUrl, probe, probed } = useUnifiedStore();
+  const {
+    metadata, sequenceIndex, sequenceIndexPartial, sequenceIndexTotalSize,
+    loading, loadStore, loadSequenceIndex,
+  } = useExplorerStore();
+  const [filter, setFilter] = useState('');
+  const [seqLoading, setSeqLoading] = useState(false);
+  const [seqError, setSeqError] = useState(null);
+
+  useEffect(() => {
+    const init = async () => {
+      if (!probed) await probe();
+    };
+    init();
+  }, []); // eslint-disable-line react-hooks/exhaustive-deps
+
+  useEffect(() => {
+    if (probed && hasStore && storeUrl && !metadata && !loading) {
+      loadStore(storeUrl).catch(() => {});
+    }
+  }, [probed, hasStore, storeUrl]); // eslint-disable-line react-hooks/exhaustive-deps
+
+  useEffect(() => {
+    if (metadata && !sequenceIndex && !seqLoading) {
+      setSeqLoading(true);
+      loadSequenceIndex()
+        .catch((err) => setSeqError(err.message))
+        .finally(() => setSeqLoading(false));
+    }
+  }, [metadata]); // eslint-disable-line react-hooks/exhaustive-deps
+
+  if (!probed || loading || seqLoading) {
+    return (
+      <div>
+        <ExplorerNav active="sequences" />
+        <div className="text-center py-5">
+          <div className="spinner-border" />
+          <p className="mt-3 text-muted">Loading sequences...</p>
+        </div>
+      </div>
+    );
+  }
+
+  if (!hasStore) {
+    return (
+      <div>
+        <ExplorerNav active="sequences" />
+        <div className="alert alert-info">
+          Sequence browsing requires a RefgetStore. No store was detected.
+        </div>
+      </div>
+    );
+  }
+
+  if (seqError) {
+    return (
+      <div>
+        <ExplorerNav active="sequences" />
+        <div className="alert alert-danger">{seqError}</div>
+      </div>
+    );
+  }
+
+  if (!sequenceIndex) {
+    return (
+      <div>
+        <ExplorerNav active="sequences" />
+        <div className="alert alert-info">No sequence index found.</div>
+      </div>
+    );
+  }
+
+  const filtered = sequenceIndex.filter((s) => {
+    if (!filter) return true;
+    const term = filter.toLowerCase();
+    return (
+      s.name?.toLowerCase().includes(term) ||
+      s.sha512t24u?.toLowerCase().includes(term) ||
+      s.md5?.toLowerCase().includes(term) ||
+      s.description?.toLowerCase().includes(term)
+    );
+  });
+
+  const handleLoadMore = async (maxBytes) => {
+    setSeqLoading(true);
+    setSeqError(null);
+    try {
+      await loadSequenceIndex(maxBytes ? { maxBytes } : {});
+    } catch (err) {
+      setSeqError(err.message);
+    } finally {
+      setSeqLoading(false);
+    }
+  };
+
+  return (
+    <div className="mb-5">
+      <ExplorerNav active="sequences" />
+
+      {sequenceIndexPartial && (
+        <div className="alert alert-warning d-flex justify-content-between align-items-center py-2">
+          <span>
+            <i className="bi bi-exclamation-triangle me-2" />
+            Showing first {sequenceIndex.length.toLocaleString()} of
+            ~{Math.round(sequenceIndex.length * sequenceIndexTotalSize / (PARTIAL_LOAD_SIZE) / 1000).toLocaleString()}k sequences
+            (loaded {formatBytes(PARTIAL_LOAD_SIZE)} of {formatBytes(sequenceIndexTotalSize)}).
+          </span>
+          <button
+            className="btn btn-sm btn-warning ms-3"
+            onClick={() => handleLoadMore(sequenceIndexTotalSize)}
+          >
+            Load all ({formatBytes(sequenceIndexTotalSize)})
+          </button>
+        </div>
+      )}
+
+      <div className="d-flex justify-content-between align-items-center mb-3">
+        <span className="text-muted">
+          {filtered.length.toLocaleString()} sequences
+          {filter && ` (filtered from ${sequenceIndex.length.toLocaleString()})`}
+        </span>
+        <input
+          type="search"
+          className="form-control form-control-sm"
+          style={{ maxWidth: '300px' }}
+          placeholder="Filter by name, digest..."
+          value={filter}
+          onChange={(e) => setFilter(e.target.value)}
+        />
+      </div>
+
+      <SequenceTable sequences={filtered} storeUrl={storeUrl} sortable />
+    </div>
+  );
+};
+
+export { ExplorerSequences };
diff --git a/frontend/src/pages/LandingPage.jsx b/frontend/src/pages/LandingPage.jsx
new file mode 100644
index 0000000..7db6903
--- /dev/null
+++ b/frontend/src/pages/LandingPage.jsx
@@ -0,0 +1,231 @@
+import { Link, useOutletContext } from 'react-router-dom';
+import { useUnifiedStore } from '../stores/unifiedStore.js';
+import { CopyableDigest } from '../components/CopyableDigest.jsx';
+import { useEffect } from 'react';
+
+const LandingPage = () => {
+  const { apiAvailable } = useOutletContext();
+  const { hasStore, hasAPI, storeUrl, storeMetadata, storeCollections, serviceInfo, probe, probed } =
+    useUnifiedStore();
+
+  useEffect(() => {
+    if (!probed) probe();
+  }, []); // eslint-disable-line react-hooks/exhaustive-deps
+
+  const nCollections = storeCollections?.length || serviceInfo?.seqcol?.refget_store?.n_collections;
+  const nSequences = serviceInfo?.seqcol?.refget_store?.n_sequences || storeMetadata?.n_sequences;
+  const aliasNamespaces = storeMetadata?.collection_alias_namespaces || serviceInfo?.seqcol?.refget_store?.collection_alias_namespaces || [];
+  const scomEnabled = serviceInfo?.seqcol?.scom?.enabled;
+
+  return (
+    <div className="mb-5">
+      <h3 className="fw-light mb-4">Refget Sequence Collections</h3>
+
+      <p>
+        Welcome to the Refget Sequence Collections service. Browse, compare, and
+        explore reference genome sequence collections following the{' '}
+        <a
+          href="https://ga4gh.github.io/refget/"
+          target="_blank"
+          rel="noopener noreferrer"
+        >
+          GA4GH refget specification
+        </a>.
+      </p>
+
+      <div className="row g-4 mt-3">
+        {/* Browse section */}
+        <div className="col-md-6">
+          <div className="card h-100">
+            <div className="card-header">
+              <h5 className="mb-0">
+                <i className="bi bi-collection me-2" />
+                Browse
+              </h5>
+            </div>
+            <div className="card-body">
+              <p className="text-muted small mb-3">
+                Explore sequence collections on this server. Each collection represents a reference genome assembly with its sequences, names, and lengths.
+              </p>
+              {storeUrl && (
+                <div className="mb-3 d-flex align-items-center">
+                  <small className="text-muted me-2">Store:</small>
+                  <span style={{ fontSize: '0.7rem', fontFamily: '"Roboto Condensed", "Arial Narrow", "Helvetica Neue", Arial, sans-serif', fontStretch: 'condensed', wordBreak: 'break-all' }}>
+                    <CopyableDigest value={storeUrl} />
+                  </span>
+                </div>
+              )}
+              <ul className="list-unstyled mb-0">
+                <li className="mb-2">
+                  <Link to="/collections" className="text-decoration-none">
+                    <i className="bi bi-arrow-right me-2" />
+                    Collections
+                    {nCollections && <span className="text-muted ms-1">(n = {Number(nCollections).toLocaleString()})</span>}
+                  </Link>
+                </li>
+                <li className="mb-2">
+                  <Link to="/sequences" className="text-decoration-none">
+                    <i className="bi bi-arrow-right me-2" />
+                    Sequences
+                    {nSequences && <span className="text-muted ms-1">(n = {Number(nSequences).toLocaleString()})</span>}
+                  </Link>
+                </li>
+                <li className="mb-2">
+                  <Link to="/aliases" className="text-decoration-none">
+                    <i className="bi bi-arrow-right me-2" />
+                    Aliases
+                    {aliasNamespaces.length > 0 && (
+                      <span className="text-muted ms-1">
+                        ({aliasNamespaces.length} namespace{aliasNamespaces.length !== 1 ? 's' : ''}: {aliasNamespaces.join(', ')})
+                      </span>
+                    )}
+                  </Link>
+                </li>
+              </ul>
+            </div>
+          </div>
+        </div>
+
+        {/* Tools section */}
+        <div className="col-md-6">
+          <div className="card h-100">
+            <div className="card-header">
+              <h5 className="mb-0">
+                <i className="bi bi-tools me-2" />
+                Tools
+              </h5>
+            </div>
+            <div className="card-body">
+              <p className="text-muted small mb-3">
+                Standalone tools for working with sequence collections. Compute digests, compare assemblies, or connect to external servers.
+              </p>
+              <ul className="list-unstyled mb-0">
+                <li className="mb-2">
+                  <Link to="/fasta" className="text-decoration-none">
+                    <i className="bi bi-arrow-right me-2" />
+                    FASTA Digester
+                  </Link>
+                  <span className="text-muted d-block small ms-4">
+                    Compute digests from FASTA files in-browser
+                  </span>
+                </li>
+                <li className="mb-2">
+                  <Link to="/compare" className="text-decoration-none">
+                    <i className="bi bi-arrow-right me-2" />
+                    Compare (SCIM)
+                  </Link>
+                  <span className="text-muted d-block small ms-4">
+                    Interpret sequence collection comparisons
+                  </span>
+                </li>
+                <li className="mb-2">
+                  <Link to="/explore-store" className="text-decoration-none">
+                    <i className="bi bi-arrow-right me-2" />
+                    Explore a Store
+                  </Link>
+                  <span className="text-muted d-block small ms-4">
+                    Browse any RefgetStore by URL
+                  </span>
+                </li>
+                <li className="mb-2">
+                  <Link to="/explore-api" className="text-decoration-none">
+                    <i className="bi bi-arrow-right me-2" />
+                    Explore an API
+                  </Link>
+                  <span className="text-muted d-block small ms-4">
+                    Connect to any SeqCol API server
+                  </span>
+                </li>
+              </ul>
+            </div>
+          </div>
+        </div>
+      </div>
+
+      <div className="row g-4 mt-1">
+        {/* Curated section */}
+        <div className="col-md-6">
+          <div className="card h-100">
+            <div className="card-header">
+              <h5 className="mb-0">
+                <i className="bi bi-bookmark-star me-2" />
+                Curated
+              </h5>
+            </div>
+            <div className="card-body">
+              <p className="text-muted small mb-3">
+                Pre-built views for specific genome sets. Precomputed similarity matrices and curated reference genome pages.
+              </p>
+              <ul className="list-unstyled mb-0">
+                {scomEnabled && (
+                  <li className="mb-2">
+                    <Link to="/scom" className="text-decoration-none">
+                      <i className="bi bi-arrow-right me-2" />
+                      SCOM — Similarity Matrix
+                    </Link>
+                  </li>
+                )}
+                <li className="mb-2">
+                  <Link to="/human" className="text-decoration-none">
+                    <i className="bi bi-arrow-right me-2" />
+                    Human Reference Genomes
+                  </Link>
+                </li>
+                <li className="mb-2">
+                  <Link to="/hprc" className="text-decoration-none">
+                    <i className="bi bi-arrow-right me-2" />
+                    HPRC Genomes
+                  </Link>
+                </li>
+              </ul>
+            </div>
+          </div>
+        </div>
+        <div className="col-md-6">
+          <div className="card h-100">
+            <div className="card-header">
+              <h5 className="mb-0">
+                <i className="bi bi-check-circle me-2" />
+                Developer
+              </h5>
+            </div>
+            <div className="card-body">
+              <p className="text-muted small mb-3">
+                Test API compliance and explore the raw API endpoints. For developers building on the seqcol specification.
+              </p>
+              <ul className="list-unstyled mb-0">
+                <li className="mb-2">
+                  <Link to="/compliance" className="text-decoration-none">
+                    <i className="bi bi-arrow-right me-2" />
+                    Compliance Testing
+                  </Link>
+                  <span className="text-muted d-block small ms-4">
+                    Run GA4GH spec compliance checks
+                  </span>
+                </li>
+                <li className="mb-2">
+                  <Link to="/demo" className="text-decoration-none">
+                    <i className="bi bi-arrow-right me-2" />
+                    Demo
+                  </Link>
+                  <span className="text-muted d-block small ms-4">
+                    Collection comparison demo
+                  </span>
+                </li>
+              </ul>
+            </div>
+          </div>
+        </div>
+      </div>
+
+      {!apiAvailable && (
+        <div className="alert alert-warning mt-4">
+          <i className="bi bi-exclamation-triangle me-2" />
+          The API is currently unavailable. Some features may be limited.
+        </div>
+      )}
+    </div>
+  );
+};
+
+export { LandingPage };
diff --git a/frontend/src/pages/StoreAliases.jsx b/frontend/src/pages/StoreAliases.jsx
index beec357..8ff33dc 100644
--- a/frontend/src/pages/StoreAliases.jsx
+++ b/frontend/src/pages/StoreAliases.jsx
@@ -61,7 +61,7 @@ const AliasNamespacePanel = ({ type, storeUrlParam, availableNamespaces }) => {
   const linkPrefix =
     type === 'sequences'
       ? null // sequences don't have a detail page in the explorer
-      : `/explore/store/collection/`;
+      : `/explore-store/collection/`;
 
   return (
     <div className="card mb-3">
@@ -167,7 +167,7 @@ const StoreAliases = () => {
     return (
       <div className="alert alert-warning">
         No store loaded.{' '}
-        <Link to="/explore">Go back to enter a store URL.</Link>
+        <Link to="/explore-store">Go back to enter a store URL.</Link>
       </div>
     );
   }
diff --git a/frontend/src/pages/StoreCollection.jsx b/frontend/src/pages/StoreCollection.jsx
index 4b9ea21..2602197 100644
--- a/frontend/src/pages/StoreCollection.jsx
+++ b/frontend/src/pages/StoreCollection.jsx
@@ -43,7 +43,7 @@ const StoreCollection = () => {
     return (
       <div className="alert alert-warning">
         No store loaded.{' '}
-        <Link to="/explore">Go back to enter a store URL.</Link>
+        <Link to="/explore-store">Go back to enter a store URL.</Link>
       </div>
     );
   }
diff --git a/frontend/src/pages/StoreExplorer.jsx b/frontend/src/pages/StoreExplorer.jsx
index 810ef30..c7a2d17 100644
--- a/frontend/src/pages/StoreExplorer.jsx
+++ b/frontend/src/pages/StoreExplorer.jsx
@@ -45,7 +45,7 @@ const StoreExplorer = () => {
     try {
       await loadStore(trimmed);
       saveRecentStore(trimmed);
-      navigate(`/explore/store?url=${encodeURIComponent(trimmed)}`);
+      navigate(`/explore-store/overview?url=${encodeURIComponent(trimmed)}`);
     } catch (err) {
       setLocalError(err.message);
     }
@@ -113,17 +113,31 @@ const StoreExplorer = () => {
           <h6 className="text-muted">Recent stores</h6>
           <div className="list-group">
             {recentStores.map((recentUrl) => (
-              <button
+              <div
                 key={recentUrl}
-                className="list-group-item list-group-item-action font-monospace small"
-                onClick={() => {
-                  setUrl(recentUrl);
-                  handleExplore(recentUrl);
-                }}
+                className="list-group-item d-flex justify-content-between align-items-center"
               >
-                <i className="bi bi-clock-history me-2" />
-                {recentUrl}
-              </button>
+                <span className="font-monospace small text-truncate me-2">{recentUrl}</span>
+                <span className="d-flex gap-1 flex-shrink-0">
+                  <button
+                    className="btn btn-sm btn-outline-secondary"
+                    title="Copy URL"
+                    onClick={() => navigator.clipboard.writeText(recentUrl)}
+                  >
+                    <i className="bi bi-clipboard" />
+                  </button>
+                  <button
+                    className="btn btn-sm btn-outline-primary"
+                    title="Load store"
+                    onClick={() => {
+                      setUrl(recentUrl);
+                      handleExplore(recentUrl);
+                    }}
+                  >
+                    <i className="bi bi-box-arrow-in-right" />
+                  </button>
+                </span>
+              </div>
             ))}
           </div>
         </div>
diff --git a/frontend/src/pages/StoreOverview.jsx b/frontend/src/pages/StoreOverview.jsx
index fb17740..3505300 100644
--- a/frontend/src/pages/StoreOverview.jsx
+++ b/frontend/src/pages/StoreOverview.jsx
@@ -37,7 +37,7 @@ const StoreOverview = () => {
     return (
       <div className="alert alert-warning">
         No store loaded.{' '}
-        <Link to="/explore">Go back to enter a store URL.</Link>
+        <Link to="/explore-store">Go back to enter a store URL.</Link>
       </div>
     );
   }
@@ -122,7 +122,7 @@ const StoreOverview = () => {
                 Sequences
               </h6>
               <Link
-                to={`/explore/store/sequences${storeUrlParam}`}
+                to={`/explore-store/sequences${storeUrlParam}`}
                 className="btn btn-sm btn-outline-primary"
               >
                 Browse all
@@ -175,7 +175,7 @@ const StoreOverview = () => {
         <div className="card-header d-flex justify-content-between align-items-center">
           <h6 className="mb-0">
             <i className="bi bi-collection me-2" />
-            Collections
+            Collections{collections?.length > 0 && ` (${collections.length.toLocaleString()})`}
           </h6>
         </div>
         <div className="card-body">
@@ -194,7 +194,7 @@ const StoreOverview = () => {
                     <tr key={col.digest}>
                       <td>
                         <Link
-                          to={`/explore/store/collection/${col.digest}${storeUrlParam}`}
+                          to={`/explore-store/collection/${col.digest}${storeUrlParam}`}
                           className="font-monospace small"
                         >
                           {col.digest}
@@ -250,7 +250,7 @@ store.export("${col.digest}")`,
             Aliases
           </h6>
           <Link
-            to={`/explore/store/aliases${storeUrlParam}`}
+            to={`/explore-store/aliases${storeUrlParam}`}
             className="btn btn-sm btn-outline-primary"
           >
             Browse aliases
@@ -267,7 +267,7 @@ store.export("${col.digest}")`,
                       {metadata.sequence_alias_namespaces.map((ns) => (
                         <Link
                           key={ns}
-                          to={`/explore/store/aliases${storeUrlParam}`}
+                          to={`/explore-store/aliases${storeUrlParam}`}
                           className="badge bg-secondary me-1 text-decoration-none"
                         >
                           {ns}
@@ -283,7 +283,7 @@ store.export("${col.digest}")`,
                       {metadata.collection_alias_namespaces.map((ns) => (
                         <Link
                           key={ns}
-                          to={`/explore/store/aliases${storeUrlParam}`}
+                          to={`/explore-store/aliases${storeUrlParam}`}
                           className="badge bg-secondary me-1 text-decoration-none"
                         >
                           {ns}
diff --git a/frontend/src/pages/StoreSequences.jsx b/frontend/src/pages/StoreSequences.jsx
index da0fca5..ae09afe 100644
--- a/frontend/src/pages/StoreSequences.jsx
+++ b/frontend/src/pages/StoreSequences.jsx
@@ -111,7 +111,7 @@ const StoreSequences = () => {
     return (
       <div className="alert alert-warning">
         No store loaded.{' '}
-        <Link to="/explore">Go back to enter a store URL.</Link>
+        <Link to="/explore-store">Go back to enter a store URL.</Link>
       </div>
     );
   }
diff --git a/frontend/src/services/fetchData.jsx b/frontend/src/services/fetchData.jsx
index f911b93..9d8173a 100644
--- a/frontend/src/services/fetchData.jsx
+++ b/frontend/src/services/fetchData.jsx
@@ -33,7 +33,18 @@ const checkResponse = async (response, url) => {
 };
 
 export const fetchServiceInfo = async () => {
-  const url = `${API_BASE}/service-info`;
+  try {
+    const url = `${API_BASE}/service-info`;
+    const response = await fetch(url);
+    await checkResponse(response, url);
+    return response.json();
+  } catch {
+    return null;
+  }
+};
+
+export const fetchServiceInfoFromUrl = async (baseUrl) => {
+  const url = `${baseUrl.replace(/\/+$/, '')}/service-info`;
   const response = await fetch(url);
   await checkResponse(response, url);
   return response.json();
@@ -55,7 +66,8 @@ export const fetchPangenomeLevels = async (digest) => {
   );
 };
 
-export const fetchSeqColList = async () => {
+export const fetchSeqColList = async (baseUrl) => {
+  const base = (baseUrl || API_BASE).replace(/\/+$/, '');
   const fetchRequired = async (url) => {
     const response = await fetch(url);
     await checkResponse(response, url);
@@ -73,15 +85,16 @@ export const fetchSeqColList = async () => {
   };
 
   return Promise.all([
-    fetchRequired(`${API_BASE}/list/collection?page_size=10&page=0`),
-    fetchOptional(`${API_BASE}/list/pangenome?page_size=5`),
-    fetchRequired(`${API_BASE}/list/attributes/name_length_pairs?page_size=5`),
+    fetchRequired(`${base}/list/collection?page_size=10&page=0`),
+    fetchOptional(`${base}/list/pangenome?page_size=5`),
+    fetchRequired(`${base}/list/attributes/name_length_pairs?page_size=5`),
   ]);
 };
 
-export const fetchAllSeqCols = async () => {
+export const fetchAllSeqCols = async (baseUrl) => {
+  const base = (baseUrl || API_BASE).replace(/\/+$/, '');
   const urls = [
-    `${API_BASE}/list/collection?page_size=1000&page=0`,
+    `${base}/list/collection?page_size=1000&page=0`,
   ];
 
   return Promise.all(
@@ -93,11 +106,12 @@ export const fetchAllSeqCols = async () => {
   );
 };
 
-export const fetchCollectionLevels = async (digest) => {
+export const fetchCollectionLevels = async (digest, baseUrl) => {
+  const base = (baseUrl || API_BASE).replace(/\/+$/, '');
   const urls = [
-    `${API_BASE}/collection/${digest}?level=1`,
-    `${API_BASE}/collection/${digest}?level=2`,
-    `${API_BASE}/collection/${digest}?collated=false`,
+    `${base}/collection/${digest}?level=1`,
+    `${base}/collection/${digest}?level=2`,
+    `${base}/collection/${digest}?collated=false`,
   ];
 
   return Promise.all(
@@ -109,8 +123,9 @@ export const fetchCollectionLevels = async (digest) => {
   );
 };
 
-export const fetchComparison = async (digest1, digest2) => {
-  const url = `${API_BASE}/comparison/${digest1}/${digest2}`;
+export const fetchComparison = async (digest1, digest2, baseUrl) => {
+  const base = (baseUrl || API_BASE).replace(/\/+$/, '');
+  const url = `${base}/comparison/${digest1}/${digest2}`;
   const response = await fetch(url);
   if (!response.ok) {
     if (response.status === 404) {
@@ -126,8 +141,9 @@ export const fetchComparison = async (digest1, digest2) => {
   return response.json();
 };
 
-export const fetchComparisonJSON = async (data, digest) => {
-  const url = `${API_BASE}/comparison/${digest}`;
+export const fetchComparisonJSON = async (data, digest, baseUrl) => {
+  const base = (baseUrl || API_BASE).replace(/\/+$/, '');
+  const url = `${base}/comparison/${digest}`;
   const response = await fetch(url, {
     method: 'POST',
     headers: {
@@ -139,10 +155,11 @@ export const fetchComparisonJSON = async (data, digest) => {
   return response.json();
 };
 
-export const fetchAttribute = async (attribute, digest) => {
+export const fetchAttribute = async (attribute, digest, baseUrl) => {
+  const base = (baseUrl || API_BASE).replace(/\/+$/, '');
   const urls = [
-    `${API_BASE}/list/collection?${attribute}=${digest}`,
-    `${API_BASE}/attribute/collection/${attribute}/${digest}`,
+    `${base}/list/collection?${attribute}=${digest}`,
+    `${base}/attribute/collection/${attribute}/${digest}`,
   ];
 
   return Promise.all(
diff --git a/frontend/src/stores/apiExplorerStore.js b/frontend/src/stores/apiExplorerStore.js
new file mode 100644
index 0000000..e75df30
--- /dev/null
+++ b/frontend/src/stores/apiExplorerStore.js
@@ -0,0 +1,55 @@
+import { create } from 'zustand';
+import { fetchServiceInfoFromUrl } from '../services/fetchData.jsx';
+
+const RECENT_APIS_KEY = 'refget-explorer-recent-apis';
+const MAX_RECENT = 5;
+
+const getRecentApis = () => {
+  try {
+    return JSON.parse(localStorage.getItem(RECENT_APIS_KEY)) || [];
+  } catch {
+    return [];
+  }
+};
+
+const saveRecentApi = (url) => {
+  const recent = getRecentApis().filter((u) => u !== url);
+  recent.unshift(url);
+  localStorage.setItem(
+    RECENT_APIS_KEY,
+    JSON.stringify(recent.slice(0, MAX_RECENT)),
+  );
+};
+
+export const useApiExplorerStore = create((set, get) => ({
+  apiUrl: null,
+  serviceInfo: null,
+  apiAvailable: false,
+  loading: false,
+  error: null,
+
+  probeApi: async (url) => {
+    const trimmed = url.replace(/\/+$/, '');
+    set({ loading: true, error: null, apiUrl: trimmed });
+    try {
+      const info = await fetchServiceInfoFromUrl(trimmed);
+      saveRecentApi(trimmed);
+      set({ serviceInfo: info, apiAvailable: true, loading: false });
+      return info;
+    } catch (err) {
+      set({ serviceInfo: null, apiAvailable: false, loading: false, error: err.message });
+      throw err;
+    }
+  },
+
+  reset: () =>
+    set({
+      apiUrl: null,
+      serviceInfo: null,
+      apiAvailable: false,
+      loading: false,
+      error: null,
+    }),
+
+  getRecentApis,
+}));
diff --git a/frontend/src/stores/unifiedStore.js b/frontend/src/stores/unifiedStore.js
new file mode 100644
index 0000000..f964fc2
--- /dev/null
+++ b/frontend/src/stores/unifiedStore.js
@@ -0,0 +1,67 @@
+import { create } from 'zustand';
+import { API_BASE } from '../utilities.jsx';
+import { fetchStoreMetadata, fetchCollectionIndex } from '../services/storeService.js';
+
+export const useUnifiedStore = create((set, get) => ({
+  hasStore: false,
+  hasAPI: false,
+  storeUrl: null,
+  apiUrl: API_BASE,
+  storeMetadata: null,
+  storeCollections: null,
+  serviceInfo: null,
+  probed: false,
+  loading: false,
+
+  probe: async () => {
+    if (get().probed) return;
+    set({ loading: true });
+
+    let hasAPI = false;
+    let hasStore = false;
+    let storeUrl = null;
+    let storeMetadata = null;
+    let storeCollections = null;
+    let serviceInfo = null;
+
+    // First, fetch /service-info to discover the API and store URL
+    try {
+      const resp = await fetch(`${API_BASE}/service-info`);
+      if (resp.ok) {
+        hasAPI = true;
+        serviceInfo = await resp.json();
+
+        // Extract store URL from service-info
+        const storeConfig = serviceInfo?.seqcol?.refget_store;
+        if (storeConfig?.enabled && storeConfig?.url) {
+          const candidateUrl = storeConfig.url;
+
+          // Only probe if it's an HTTP(S) URL (browser can't fetch local paths)
+          if (/^https?:\/\//i.test(candidateUrl)) {
+            try {
+              storeMetadata = await fetchStoreMetadata(candidateUrl);
+              hasStore = true;
+              storeUrl = candidateUrl;
+              storeCollections = await fetchCollectionIndex(candidateUrl).catch(() => null);
+            } catch {
+              hasStore = false;
+            }
+          }
+        }
+      }
+    } catch {
+      hasAPI = false;
+    }
+
+    set({
+      hasStore,
+      hasAPI,
+      storeUrl,
+      storeMetadata,
+      storeCollections,
+      serviceInfo,
+      probed: true,
+      loading: false,
+    });
+  },
+}));
diff --git a/refget/__init__.py b/refget/__init__.py
index 196ab19..8fae16e 100644
--- a/refget/__init__.py
+++ b/refget/__init__.py
@@ -13,7 +13,6 @@
 from ._version import __version__
 from .backend import RefgetStoreBackend, SeqColBackend
 from .clients import SequenceCollectionClient
-from .compliance import run_compliance
 from .const import GTARS_INSTALLED
 from .exceptions import InvalidSeqColError
 from .store import (
@@ -37,7 +36,6 @@
     "compute_fai",
     "digest_sequence",
     "SequenceCollection",
-    "run_compliance",
     "SeqColBackend",
     "RefgetStoreBackend",
     "SequenceCollectionClient",
diff --git a/refget/agents.py b/refget/agents.py
index e40ad1d..9bdce15 100644
--- a/refget/agents.py
+++ b/refget/agents.py
@@ -862,6 +862,11 @@ def collection_count(self) -> int:
         result = self.seqcol.list_by_offset(limit=1, offset=0)
         return result["pagination"]["total"]
 
+    def list_attributes(self, attribute: str, page: int = 0, page_size: int = 100) -> dict:
+        res = self.attribute.list(attribute, limit=page_size, offset=page * page_size)
+        res["results"] = [x.digest for x in res["results"]]
+        return res
+
     def capabilities(self) -> dict:
         return {
             "backend_type": "database",
diff --git a/refget/backend.py b/refget/backend.py
index 9408230..1e02547 100644
--- a/refget/backend.py
+++ b/refget/backend.py
@@ -11,7 +11,8 @@
 
 from typing import Protocol, runtime_checkable
 
-from .utils import compare_seqcols
+from .const import DEFAULT_TRANSIENT_ATTRS
+from .utils import compare_seqcols, calc_jaccard_similarities
 
 
 @runtime_checkable
@@ -49,6 +50,12 @@ def list_collections(
         Returns {"results": [...], "pagination": {...}}"""
         ...
 
+    def list_attributes(
+        self, attribute: str, page: int = 0, page_size: int = 100
+    ) -> dict:
+        """List unique attribute digests. Returns {"results": [...], "pagination": {...}}"""
+        ...
+
     def collection_count(self) -> int:
         """Total number of collections."""
         ...
@@ -64,7 +71,8 @@ class RefgetStoreBackend:
     def __init__(self, store):
         """
         Args:
-            store: A RefgetStore or ReadonlyRefgetStore instance from gtars.
+            store: A RefgetStore instance from gtars. Do NOT pass a
+                ReadonlyRefgetStore — it cannot lazy-load collections.
         """
         self._store = store
 
@@ -73,7 +81,7 @@ def get_collection(self, digest: str, level: int = 2) -> dict:
             if level == 1:
                 result = self._store.get_collection_level1(digest)
             else:
-                result = self._store.get_collection_level2(digest)
+                result = self._get_enriched_level2(digest)
         except (OSError, IOError):
             raise ValueError(f"Collection '{digest}' not found")
         if result is None:
@@ -96,35 +104,136 @@ def get_collection_itemwise(self, digest: str, limit: int | None = None) -> list
         return [{k: level2[k][i] for k in keys} for i in range(n)]
 
     def get_attribute(self, attribute_name: str, attribute_digest: str) -> list:
+        if attribute_name in DEFAULT_TRANSIENT_ATTRS:
+            raise KeyError(f"Transient attribute '{attribute_name}' is not served via /attribute endpoint")
         result = self._store.get_attribute(attribute_name, attribute_digest)
         if result is None:
             raise KeyError(f"Attribute {attribute_name}/{attribute_digest} not found")
         return result
 
-    def compare_digests(self, digest_a: str, digest_b: str) -> dict:
+    def _get_enriched_level2(self, digest: str) -> dict:
+        """Get level 2 enriched with derived attributes (name_length_pairs, sorted_sequences).
+
+        The store's get_collection_level2 only returns core attributes (names, lengths,
+        sequences). For comparison, we need the derived attributes too. We get them
+        from level 1 digests and resolve each via get_attribute.
+        """
+        level2 = self._store.get_collection_level2(digest)
+        if level2 is None:
+            raise ValueError(f"Collection '{digest}' not found")
         try:
-            result = self._store.compare(digest_a, digest_b)
+            level1 = self._store.get_collection_level1(digest)
         except (OSError, IOError):
-            raise ValueError("Collection not found")
-        if result is None:
-            raise ValueError("Collection not found")
-        return result
+            return level2
+        # Add derived attributes that exist in level 1 but not level 2
+        for attr in ["name_length_pairs", "sorted_sequences"]:
+            if attr in level1 and attr not in level2:
+                try:
+                    resolved = self._store.get_attribute(attr, level1[attr])
+                    if resolved is not None:
+                        level2[attr] = resolved
+                except Exception:
+                    pass
+        return level2
+
+    def compare_digests(self, digest_a: str, digest_b: str) -> dict:
+        level2_a = self._get_enriched_level2(digest_a)
+        level2_b = self._get_enriched_level2(digest_b)
+        return compare_seqcols(level2_a, level2_b)
 
     def compare_digest_with_level2(self, digest: str, level2_b: dict) -> dict:
         """Compare a stored collection with a POSTed level2 dict.
 
         The store does not have a native compare_with_level2, so we retrieve
-        level2 for the stored collection and use the Python compare utility.
+        enriched level2 for the stored collection and use the Python compare utility.
         """
-        level2_a = self.get_collection(digest, level=2)
+        level2_a = self._get_enriched_level2(digest)
         return compare_seqcols(level2_a, level2_b)
 
     def list_collections(
         self, page: int = 0, page_size: int = 100, filters: dict | None = None
     ) -> dict:
-        if filters:
-            raise ValueError("Filtering by attribute is not supported by RefgetStore backend")
-        return self._store.list_collections(page=page, page_size=page_size)
+        result = self._store.list_collections(page=page, page_size=page_size, filters=filters)
+        # Extract digest strings from SequenceCollectionMetadata objects
+        result["results"] = [
+            r.digest if hasattr(r, "digest") else r for r in result["results"]
+        ]
+        return result
+
+    def list_attributes(self, attribute: str, page: int = 0, page_size: int = 100) -> dict:
+        all_cols = self._store.list_collections(page=0, page_size=10000)
+        unique_digests = set()
+        for col in all_cols["results"]:
+            digest = col.digest if hasattr(col, "digest") else col
+            level1 = self._store.get_collection_level1(digest)
+            if level1 and attribute in level1:
+                unique_digests.add(level1[attribute])
+        sorted_digests = sorted(unique_digests)
+        start = page * page_size
+        end = start + page_size
+        return {
+            "results": sorted_digests[start:end],
+            "pagination": {"page": page, "page_size": page_size, "total": len(sorted_digests)},
+        }
+
+    def compute_similarities(
+        self, seqcol: dict, page: int = 0, page_size: int = 50,
+        target_digests: list[str] | None = None,
+    ) -> dict:
+        """Compute Jaccard similarities between a seqcol and collections in the store.
+
+        Args:
+            target_digests: If provided, only compare against these digests.
+                If None, compares against all collections.
+        """
+        if target_digests:
+            all_digests = target_digests
+        else:
+            all_cols = self._store.list_collections(page=0, page_size=10000)
+            all_digests = [c.digest if hasattr(c, "digest") else c for c in all_cols["results"]]
+
+        # Get aliases for human-readable names
+        alias_map = {}
+        for ns in self._store.list_collection_alias_namespaces():
+            try:
+                aliases = self._store.list_collection_aliases(ns)
+                for a in aliases:
+                    digest = a["digest"] if isinstance(a, dict) else a.digest
+                    alias = a["alias"] if isinstance(a, dict) else a.alias
+                    alias_map.setdefault(digest, []).append(alias)
+            except Exception:
+                pass
+
+        similarities = []
+        for digest in all_digests:
+            try:
+                level2 = self._store.get_collection_level2(digest)
+                if level2 is None:
+                    continue
+                jaccard = calc_jaccard_similarities(seqcol, level2)
+                similarities.append({
+                    "digest": digest,
+                    "human_readable_names": alias_map.get(digest, []),
+                    "similarities": jaccard,
+                })
+            except Exception:
+                continue
+
+        # Sort by max similarity descending
+        similarities.sort(
+            key=lambda s: max(s["similarities"].values()) if s["similarities"] else 0,
+            reverse=True,
+        )
+
+        total = len(similarities)
+        start = page * page_size
+        paged = similarities[start : start + page_size]
+
+        return {
+            "similarities": paged,
+            "pagination": {"page": page, "page_size": page_size, "total": total},
+            "reference_digest": None,
+        }
 
     def collection_count(self) -> int:
         result = self._store.list_collections(page=0, page_size=1)
diff --git a/refget/cli/store.py b/refget/cli/store.py
index 0fe33e3..d1c5f14 100644
--- a/refget/cli/store.py
+++ b/refget/cli/store.py
@@ -1244,7 +1244,7 @@ def serve(
     else:
         store = _load_store(None)
 
-    backend = RefgetStoreBackend(store.into_readonly())
+    backend = RefgetStoreBackend(store)
 
     from fastapi import FastAPI
 
diff --git a/refget/compliance.py b/refget/compliance.py
index eaa64f7..24d2d84 100644
--- a/refget/compliance.py
+++ b/refget/compliance.py
@@ -33,27 +33,37 @@
 _DIGESTS_FILE = REPO_ROOT / "test_fasta" / "test_fasta_digests.json"
 _COMPARISON_DIR = REPO_ROOT / "tests" / "api" / "comparison"
 
-# Load digest test data
-with open(_DIGESTS_FILE) as _f:
-    DIGEST_DATA = json.load(_f)
-
-# Convert to list of (name, bundle) tuples for iteration
-DIGEST_TESTS = [(name, bundle) for name, bundle in DIGEST_DATA.items()]
-
-# Comparison fixture files (base.fa vs each other file)
-COMPARISON_FILES = [
-    _COMPARISON_DIR / "compare_base.fa_subset.fa.json",
-    _COMPARISON_DIR / "compare_base.fa_different_names.fa.json",
-    _COMPARISON_DIR / "compare_base.fa_different_order.fa.json",
-    _COMPARISON_DIR / "compare_base.fa_pair_swap.fa.json",
-    _COMPARISON_DIR / "compare_base.fa_swap_wo_coords.fa.json",
-]
-
-# Load comparison fixtures
-COMPARISON_FIXTURES = {}
-for _f in COMPARISON_FILES:
-    with open(_f) as _fp:
-        COMPARISON_FIXTURES[_f.name] = json.load(_fp)
+# Lazy-loaded — these are only needed when running compliance checks
+DIGEST_DATA = None
+DIGEST_TESTS = None
+COMPARISON_FILES = None
+COMPARISON_FIXTURES = None
+
+
+def _load_test_data():
+    global DIGEST_DATA, DIGEST_TESTS, COMPARISON_FILES, COMPARISON_FIXTURES
+    if DIGEST_DATA is not None:
+        return
+    if not _DIGESTS_FILE.exists():
+        raise FileNotFoundError(
+            f"Compliance test data not found at {_DIGESTS_FILE}. "
+            "This is expected when refget is pip-installed. "
+            "Clone the repo to run compliance tests."
+        )
+    with open(_DIGESTS_FILE) as f:
+        DIGEST_DATA = json.load(f)
+    DIGEST_TESTS = [(name, bundle) for name, bundle in DIGEST_DATA.items()]
+    COMPARISON_FILES = [
+        _COMPARISON_DIR / "compare_base.fa_subset.fa.json",
+        _COMPARISON_DIR / "compare_base.fa_different_names.fa.json",
+        _COMPARISON_DIR / "compare_base.fa_different_order.fa.json",
+        _COMPARISON_DIR / "compare_base.fa_pair_swap.fa.json",
+        _COMPARISON_DIR / "compare_base.fa_swap_wo_coords.fa.json",
+    ]
+    COMPARISON_FIXTURES = {}
+    for fp in COMPARISON_FILES:
+        with open(fp) as fh:
+            COMPARISON_FIXTURES[fp.name] = json.load(fh)
 
 
 # ============================================================
@@ -417,6 +427,7 @@ def build_checks(api_root: str) -> list[tuple[str, callable, list]]:
 
     Returns list of (name, function, args) tuples.
     """
+    _load_test_data()
     checks = []
 
     # Structure checks
diff --git a/refget/middleware.py b/refget/middleware.py
index a32d37d..f9e0c87 100644
--- a/refget/middleware.py
+++ b/refget/middleware.py
@@ -56,4 +56,4 @@ def _reload_backend(self, app):
 
         _LOGGER.info(f"Store changed, reloading from {self.store_url}")
         store = RefgetStore.open_remote(self.cache_dir, self.store_url)
-        app.state.backend = RefgetStoreBackend(store.into_readonly())
+        app.state.backend = RefgetStoreBackend(store)
diff --git a/refget/router.py b/refget/router.py
index 815c1e3..2141158 100644
--- a/refget/router.py
+++ b/refget/router.py
@@ -37,13 +37,14 @@
 def setup_backend(app, store=None, engine=None):
     """Configure the seqcol backend on a FastAPI app.
 
-    Pass a RefgetStore to serve from the store (default, no database needed).
+    Pass a RefgetStore to serve from the store (no database needed).
+    The store is used directly (not converted to readonly) so it can lazy-load collections.
     Pass a SQLAlchemy engine to serve from PostgreSQL via RefgetDBAgent.
     """
     if store is not None:
         from .backend import RefgetStoreBackend
 
-        app.state.backend = RefgetStoreBackend(store.into_readonly())
+        app.state.backend = RefgetStoreBackend(store)
     elif engine is not None:
         from .agents import RefgetDBAgent
 
@@ -233,107 +234,73 @@ async def compare_2_digests(
 
 @seqcol_router.post(
     "/similarities/{collection_digest}",
-    summary="Calculate Jaccard similarities between a single sequence collection in the database and all other collections in the database (by species)",
+    summary="Calculate Jaccard similarities between a sequence collection and all others",
     tags=["Comparing sequence collections"],
     response_model=Similarities,
 )
 async def calc_similarities(
     collection_digest: str,
-    species: str = Query("human", description="Species to filter by ('human' or 'mouse')"),
+    species: str = Query("human", description="Species/group to filter by"),
     page_size: int = Query(50, description="Number of results per page"),
     page: int = Query(0, description="Page number (0-indexed)"),
-    dbagent=Depends(get_dbagent),
+    backend=Depends(get_backend),
 ) -> Similarities:
     _LOGGER.info("Calculating Jaccard similarities...")
     try:
-        seqcolA = dbagent.seqcol.get(digest=collection_digest)
-    except Exception as e:
-        _LOGGER.debug(f"Error fetching collection: {e}")
+        seqcolA = backend.get_collection(collection_digest, level=2)
+    except (ValueError, KeyError):
         raise HTTPException(status_code=404, detail="Collection not found")
 
-    return await calc_similarities_from_json(seqcolA, species, page_size, page, dbagent)
+    return await _compute_similarities(seqcolA, species, page_size, page, backend)
 
 
 @seqcol_router.post(
     "/similarities/",
-    summary="Calculate Jaccard similarities between input sequence collection and all collections in database",
+    summary="Calculate Jaccard similarities between input sequence collection and all collections",
     tags=["Comparing sequence collections"],
     response_model=Similarities,
 )
 async def calc_similarities_from_json(
     seqcolA: dict,
-    species: str = Query("human", description="Species to filter by ('human' or 'mouse')"),
+    species: str = Query("human", description="Species/group to filter by"),
     page_size: int = Query(50, description="Number of results per page"),
     page: int = Query(0, description="Page number (0-indexed)"),
-    dbagent=Depends(get_dbagent),
+    backend=Depends(get_backend),
 ) -> Similarities:
-    """
-    Calculate Jaccard similarities between input sequence collection and all collections in DB.
-    Takes a JSON sequence collection directly instead of a digest.
-    Take output from: refget digest-fasta "yourfasta.fa" -l 2 > myoutput.json
+    return await _compute_similarities(seqcolA, species, page_size, page, backend)
 
-    Args:
-        seqcolA: Input sequence collection dictionary
-        species: Species to filter by ("human" or "mouse"), defaults to "human"
-        page_size: Number of results per page
-        page: Page number
-        dbagent: Database agent dependency
-    """
-    _LOGGER.info(
-        f"Calculating Jaccard similarities from input sequence collection for {species}..."
-    )
 
+async def _compute_similarities(
+    seqcolA: dict,
+    species: str,
+    page_size: int,
+    page: int,
+    backend: SeqColBackend,
+) -> Similarities:
+    """Shared implementation for both similarity endpoints."""
     try:
-        # Validate species parameter
-        if species.lower() not in _SAMPLE_DIGESTS:
+        # Get target digests for species if configured
+        target_digests = _SAMPLE_DIGESTS.get(species.lower()) if _SAMPLE_DIGESTS else None
+
+        if not _SAMPLE_DIGESTS:
             raise HTTPException(
-                status_code=400,
-                detail=f"Invalid species '{species}'. Choose from: {list(_SAMPLE_DIGESTS.keys())}",
+                status_code=501,
+                detail="Similarities not configured. No scom_config.json found.",
             )
-
-        # Get pre-loaded digests for the species
-        target_digests = _SAMPLE_DIGESTS[species.lower()]
-
         if not target_digests:
-            _LOGGER.warning(f"No pre-loaded digests found for {species}")
-            return Similarities(
-                similarities=[],
-                pagination=PaginationResult(page=page, page_size=page_size, total=0),
-                reference_digest=None,
-            )
-
-        _LOGGER.info(f"Using {len(target_digests)} pre-loaded digests for {species}")
-
-        # Use the modified get_many_level2_offset function with target_digests filter
-        results = dbagent.seqcol.get_many_level2_offset(
-            limit=page_size, offset=page * page_size, target_digests=target_digests
-        )
-
-        similarities = []
-        for key in results.results.keys():
-            human_readable_names = results.results[key]["human_readable_names"]
-            jaccard_sims = dbagent.calc_similarities_seqcol_dicts(seqcolA, results.results[key])
-            similarities.append(
-                {
-                    "digest": key,
-                    "human_readable_names": human_readable_names,
-                    "similarities": jaccard_sims,
-                }
+            raise HTTPException(
+                status_code=400,
+                detail=f"Invalid species '{species}'. Choose from: {list(_SAMPLE_DIGESTS.keys())}",
             )
 
-        result = Similarities(
-            similarities=similarities, pagination=results.pagination, reference_digest=None
+        result = backend.compute_similarities(
+            seqcolA, page=page, page_size=page_size, target_digests=target_digests
         )
-
-    except HTTPException:
-        # Re-raise HTTP exceptions
-        raise
+        return Similarities(**result)
     except Exception as e:
-        _LOGGER.debug(f"Error in calc_similarities_from_json: {e}")
+        _LOGGER.debug(f"Error computing similarities: {e}")
         raise HTTPException(status_code=500, detail="Error calculating similarities")
 
-    return result
-
 
 @seqcol_router.post(
     "/comparison/{collection_digest1}",
@@ -407,15 +374,13 @@ async def list_collections_by_offset(
     response_model=PaginatedDigestList,
 )
 async def list_attributes(
-    dbagent=Depends(get_dbagent),
+    backend=Depends(get_backend),
     attribute: str = "names",
     page_size: int = Query(100, description="Number of results per page"),
     page: int = Query(0, description="Page number (0-indexed)"),
 ):
     try:
-        res = dbagent.attribute.list(attribute, limit=page_size, offset=page * page_size)
-        res["results"] = [x.digest for x in res["results"]]
-        return res
+        return backend.list_attributes(attribute, page=page, page_size=page_size)
     except KeyError:
         raise HTTPException(
             status_code=404,
diff --git a/scripts/test-store-integration.sh b/scripts/test-store-integration.sh
new file mode 100755
index 0000000..2b50ca6
--- /dev/null
+++ b/scripts/test-store-integration.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+# Run store-backed integration tests (no database needed)
+#
+# Usage: ./scripts/test-store-integration.sh [pytest args...]
+#
+# Examples:
+#   ./scripts/test-store-integration.sh           # run store compliance tests
+#   ./scripts/test-store-integration.sh -v         # verbose output
+set -e
+cd "$(dirname "${BASH_SOURCE[0]}")/.."
+pytest tests/integration/test_store_compliance.py "$@"
diff --git a/seqcolapi/main.py b/seqcolapi/main.py
index 04b302f..391bfff 100644
--- a/seqcolapi/main.py
+++ b/seqcolapi/main.py
@@ -19,6 +19,40 @@
 global _LOGGER
 _LOGGER = logging.getLogger(__name__)
 
+
+def _load_scom_config(store_path: str, remote: bool):
+    """Load scom_config.json from next to the store if it exists.
+
+    Convention: a JSON file at {store_url}/scom_config.json with format:
+        {"human": ["digest1", "digest2", ...], "mouse": [...]}
+    """
+    import json
+
+    if remote:
+        import urllib.request
+
+        url = store_path.rstrip("/") + "/scom_config.json"
+        try:
+            with urllib.request.urlopen(url, timeout=10) as resp:
+                config = json.loads(resp.read())
+            for species, digests in config.items():
+                _SAMPLE_DIGESTS[species] = digests
+                _LOGGER.info(f"SCOM: loaded {len(digests)} target digests for '{species}'")
+        except Exception as e:
+            _LOGGER.info(f"No scom_config.json found at {url} ({e}). SCOM disabled.")
+    else:
+        import os
+
+        config_path = os.path.join(store_path, "scom_config.json")
+        if os.path.exists(config_path):
+            with open(config_path) as f:
+                config = json.load(f)
+            for species, digests in config.items():
+                _SAMPLE_DIGESTS[species] = digests
+                _LOGGER.info(f"SCOM: loaded {len(digests)} target digests for '{species}'")
+        else:
+            _LOGGER.info(f"No scom_config.json at {config_path}. SCOM disabled.")
+
 for key, value in ALL_VERSIONS.items():
     _LOGGER.info(f"{key}: {value}")
 
@@ -217,6 +251,9 @@ def create_store_app(store_path: str, remote: bool = False, cache_dir: str = "/t
     )
     store_app.include_router(router)
 
+    # Load SCOM config from store (convention: scom_config.json next to rgstore.json)
+    _load_scom_config(store_path, remote)
+
     if remote:
         from refget.middleware import StoreFreshnessMiddleware
 
@@ -228,8 +265,20 @@ def create_store_app(store_path: str, remote: bool = False, cache_dir: str = "/t
 
     @store_app.get("/service-info", summary="GA4GH service info", tags=["General endpoints"])
     async def store_service_info():
+        import json as _json
+        from pathlib import Path as _Path
+
         backend = getattr(store_app.state, "backend", None)
         caps = backend.capabilities() if backend and hasattr(backend, "capabilities") else {}
+
+        # Load the seqcol schema (same schema used by the DB-backed app)
+        _schema_path = _Path(__file__).parent.parent / "refget" / "schemas" / "seqcol.json"
+        try:
+            with open(_schema_path) as _f:
+                schema = _json.load(_f)
+        except Exception:
+            schema = None
+
         return {
             "id": "org.databio.seqcolapi.store",
             "name": "Sequence collections (store-backed)",
@@ -243,7 +292,9 @@ async def store_service_info():
             "contactUrl": "https://github.com/refgenie/refget/issues",
             "version": ALL_VERSIONS,
             "seqcol": {
-                "refget_store": {"enabled": True, "url": store_path, **caps},
+                "schema": schema,
+                "refget_store": {"enabled": True, "url": os.environ.get("REFGET_STORE_HTTP_URL", store_path), **caps},
+                "scom": {"enabled": bool(_SAMPLE_DIGESTS), "species": list(_SAMPLE_DIGESTS.keys())},
             },
         }
 
@@ -253,10 +304,12 @@ async def store_service_info():
 import os
 
 _STORE_URL_ENV = os.environ.get("REFGET_STORE_URL")
+_STORE_PATH_ENV = os.environ.get("REFGET_STORE_PATH")
 
 if _STORE_URL_ENV:
     store_app = create_store_app(_STORE_URL_ENV, remote=True)
+elif _STORE_PATH_ENV:
+    store_app = create_store_app(_STORE_PATH_ENV, remote=False)
 
-
-if __name__ != "__main__":
+if __name__ != "__main__" and not _STORE_URL_ENV and not _STORE_PATH_ENV:
     setup_backend(app, engine=RefgetDBAgent().engine)
diff --git a/test_fasta/pair_swap.rgsi b/test_fasta/pair_swap.rgsi
new file mode 100644
index 0000000..f9cafb5
--- /dev/null
+++ b/test_fasta/pair_swap.rgsi
@@ -0,0 +1,11 @@
+##seqcol_digest=UNGAdNDmBbQbHihecPPFxwTydTcdFKxL
+##names_digest=gSWbV6khfIsnlQTyw1PmlQ8G7VRfIWbU
+##sequences_digest=0uDQVLuHaOZi1u76LjV__yrVUIz9Bwhr
+##lengths_digest=cGRMZIb3AVgkcAfNv39RN7hnT5Chk7RX
+##name_length_pairs_digest=yjUFKuKCURANxHar4JDF5ABOn6FJ-T8m
+##sorted_name_length_pairs_digest=rL5OQOnFba8yyz7lS-0-hgZvwcQsiajN
+##sorted_sequences_digest=KgWo6TT1Lqw6vgkXU9sYtCU9xwXoDt6M
+#name	length	alphabet	sha512t24u	md5	description
+chr2	8	dna2bit	iYtREV555dUFKg2_agSJW6suquUyPpMw	5f63cfaa3ef61f88c9635fb9d18ec945	
+chr1	4	dna2bit	YBbVX0dLKG1ieEDCiMmkrTZFt_Z5Vdaj	31fc6ca291a32fb9df82b85e5f077e31	
+chrX	4	dna2bit	AcLxtBuKEPk_7PGE_H4dGElwZHCujwH6	92c6a56c9e9459d8a42b96f7884710bc	
diff --git a/test_fasta/swap_wo_coords.rgsi b/test_fasta/swap_wo_coords.rgsi
new file mode 100644
index 0000000..20d6e1c
--- /dev/null
+++ b/test_fasta/swap_wo_coords.rgsi
@@ -0,0 +1,11 @@
+##seqcol_digest=aVzHaGFlUDUNF2IEmNdzS_A8lCY0stQH
+##names_digest=QX5ur-faw5nXis8HXUK2kMxgY5MTGVRn
+##sequences_digest=0uDQVLuHaOZi1u76LjV__yrVUIz9Bwhr
+##lengths_digest=cGRMZIb3AVgkcAfNv39RN7hnT5Chk7RX
+##name_length_pairs_digest=suXpFjcxpyUDOkBgNEakNEXtLlyxtjJr
+##sorted_name_length_pairs_digest=zjM1Ie9m0zFbqsAnZ6jAJSXuFpKTr40J
+##sorted_sequences_digest=KgWo6TT1Lqw6vgkXU9sYtCU9xwXoDt6M
+#name	length	alphabet	sha512t24u	md5	description
+chrX	8	dna2bit	iYtREV555dUFKg2_agSJW6suquUyPpMw	5f63cfaa3ef61f88c9635fb9d18ec945	
+chr2	4	dna2bit	YBbVX0dLKG1ieEDCiMmkrTZFt_Z5Vdaj	31fc6ca291a32fb9df82b85e5f077e31	
+chr1	4	dna2bit	AcLxtBuKEPk_7PGE_H4dGElwZHCujwH6	92c6a56c9e9459d8a42b96f7884710bc	
diff --git a/tests/api/test_compliance.py b/tests/api/test_compliance.py
index 8960493..62676b9 100644
--- a/tests/api/test_compliance.py
+++ b/tests/api/test_compliance.py
@@ -11,9 +11,8 @@
 
 import pytest
 
+import refget.compliance as compliance
 from refget.compliance import (
-    COMPARISON_FIXTURES,
-    DIGEST_TESTS,
     check_attribute_retrieval,
     check_collection_level1,
     check_collection_level2,
@@ -32,6 +31,11 @@
     check_transient_attribute_not_served,
 )
 
+# Load test data at import time — tests always run from the repo
+compliance._load_test_data()
+DIGEST_TESTS = compliance.DIGEST_TESTS
+COMPARISON_FIXTURES = compliance.COMPARISON_FIXTURES
+
 
 @pytest.mark.require_service
 class TestAPI:
diff --git a/tests/conftest.py b/tests/conftest.py
index 2e2f637..dad190e 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -299,5 +299,5 @@ def pytest_collection_modifyitems(config, items):
         for item in items:
             if "require_service" in item.keywords:
                 # Only skip if this is the base TestAPI class, not a subclass with test_server
-                if "TestAPI" in item.nodeid and "TestComplianceViaIntegration" not in item.nodeid:
+                if "TestAPI" in item.nodeid and "TestComplianceViaIntegration" not in item.nodeid and "TestStoreCompliance" not in item.nodeid:
                     item.add_marker(skip_service)
diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
index d840250..f67f829 100644
--- a/tests/integration/conftest.py
+++ b/tests/integration/conftest.py
@@ -171,6 +171,118 @@ def override_get_dbagent():
     app.dependency_overrides.clear()
 
 
+@pytest.fixture(scope="session")
+def store_test_server(tmp_path_factory):
+    """
+    Provide a store-backed seqcol server URL for integration tests.
+
+    Creates a temporary RefgetStore, loads all 6 test FASTA files,
+    and runs a store-backed uvicorn server in a background thread.
+    No database required.
+
+    Note: We build the app manually (instead of create_store_app) so we can
+    reuse the same store instance that loaded the FASTAs, preserving
+    correct array ordering. Opening a new store from the same path
+    would lose FASTA-order due to a gtars hash-map ordering issue.
+    """
+    import json
+
+    import uvicorn
+    from fastapi import FastAPI
+    from fastapi.middleware.cors import CORSMiddleware
+
+    from refget.router import create_refget_router, setup_backend
+    from refget.store import RefgetStore
+    from seqcolapi.const import ALL_VERSIONS
+
+    # Create store and load test FASTAs
+    store_dir = tmp_path_factory.mktemp("store")
+    store = RefgetStore.on_disk(str(store_dir))
+
+    test_fasta_dir = Path(__file__).parent.parent.parent / "test_fasta"
+    for fa_file in [
+        "base.fa",
+        "different_names.fa",
+        "different_order.fa",
+        "pair_swap.fa",
+        "subset.fa",
+        "swap_wo_coords.fa",
+    ]:
+        fa_path = test_fasta_dir / fa_file
+        store.add_sequence_collection_from_fasta(str(fa_path))
+
+    # Build the app directly using the same store instance
+    store_app = FastAPI(
+        title="Sequence Collections API (Store-backed test)",
+        version=ALL_VERSIONS["refget_version"],
+    )
+    store_app.add_middleware(
+        CORSMiddleware,
+        allow_origins=["*"],
+        allow_credentials=True,
+        allow_methods=["*"],
+        allow_headers=["*"],
+    )
+    setup_backend(store_app, store=store)
+    router = create_refget_router(sequences=False, pangenomes=False)
+    store_app.include_router(router)
+
+    # Load seqcol schema for service-info
+    schema_path = Path(__file__).parent.parent.parent / "refget" / "schemas" / "seqcol.json"
+    try:
+        with open(schema_path) as f:
+            schema = json.load(f)
+    except Exception:
+        schema = None
+
+    @store_app.get("/service-info", summary="GA4GH service info", tags=["General endpoints"])
+    async def store_service_info():
+        backend = getattr(store_app.state, "backend", None)
+        caps = backend.capabilities() if backend and hasattr(backend, "capabilities") else {}
+        return {
+            "id": "org.databio.seqcolapi.store",
+            "name": "Sequence collections (store-backed)",
+            "type": {
+                "group": "org.ga4gh",
+                "artifact": "refget-seqcol",
+                "version": ALL_VERSIONS["seqcol_spec_version"],
+            },
+            "description": "Store-backed API providing metadata for collections of reference sequences",
+            "organization": {"name": "Databio Lab", "url": "https://databio.org"},
+            "contactUrl": "https://github.com/refgenie/refget/issues",
+            "version": ALL_VERSIONS,
+            "seqcol": {
+                "schema": schema,
+                "refget_store": {"enabled": True, **caps},
+            },
+        }
+
+    port = find_free_port()
+    server_url = f"http://localhost:{port}"
+
+    config = uvicorn.Config(store_app, host="127.0.0.1", port=port, log_level="error", ws="none")
+    server = uvicorn.Server(config)
+
+    thread = threading.Thread(target=server.run, daemon=True)
+    thread.start()
+
+    # Wait for server to start
+    max_wait = 5.0
+    start_time = time.time()
+    while time.time() - start_time < max_wait:
+        try:
+            with socket.create_connection(("127.0.0.1", port), timeout=0.1):
+                break
+        except (ConnectionRefusedError, OSError):
+            time.sleep(0.1)
+    else:
+        raise RuntimeError(f"Store test server failed to start on port {port}")
+
+    yield server_url
+
+    server.should_exit = True
+
+
 @pytest.fixture
 def cli_runner():
     """CLI runner for integration tests."""
diff --git a/tests/integration/test_store_compliance.py b/tests/integration/test_store_compliance.py
new file mode 100644
index 0000000..6785bdd
--- /dev/null
+++ b/tests/integration/test_store_compliance.py
@@ -0,0 +1,102 @@
+"""Run the compliance suite against a store-backed seqcolapi server.
+
+Tests that depend on array element ordering are marked xfail because the
+gtars RefgetStore does not preserve FASTA insertion order when returning
+level 2 arrays.  The digests (level 1) are always correct.
+"""
+
+import pytest
+import requests
+
+import refget.compliance as compliance
+from refget.compliance import COMPLIANCE_TIMEOUT, check_transient_attribute_not_served
+from tests.api.test_compliance import TestAPI
+
+# Load test data at import time — tests always run from the repo
+compliance._load_test_data()
+DIGEST_TESTS = compliance.DIGEST_TESTS
+COMPARISON_FIXTURES = compliance.COMPARISON_FIXTURES
+
+# Reason used for all ordering-dependent xfails
+_ORDER_REASON = "gtars RefgetStore does not preserve array element ordering (level 2)"
+
+
+@pytest.mark.require_service
+class TestStoreCompliance(TestAPI):
+    """Run compliance tests against store-backed seqcolapi server.
+
+    Inherits all tests from TestAPI but provides api_root from
+    the store_test_server fixture. DB-only endpoints are overridden
+    to assert the expected non-200 behavior. Tests that depend on
+    exact array ordering are marked xfail due to a known gtars limitation.
+    """
+
+    @pytest.fixture
+    def api_root(self, store_test_server):
+        return store_test_server
+
+    # --- Override DB-only tests ---
+
+    @pytest.mark.parametrize("attribute_name", ["lengths", "names", "sequences"])
+    def test_list_attributes(self, api_root, attribute_name):
+        """Store backend: /list/attributes returns 501 (DB-only endpoint)."""
+        res = requests.get(
+            f"{api_root}/list/attributes/{attribute_name}",
+            timeout=COMPLIANCE_TIMEOUT,
+        )
+        assert res.status_code == 501
+
+    @pytest.mark.parametrize("attr_name", ["lengths", "names", "sequences"])
+    def test_list_filter_by_attribute(self, api_root, attr_name):
+        """Store backend: attribute filtering returns 400 (not supported)."""
+        fa_name, bundle = DIGEST_TESTS[0]
+        attr_digest = bundle["level1"][attr_name]
+        res = requests.get(
+            f"{api_root}/list/collection?{attr_name}={attr_digest}",
+            timeout=COMPLIANCE_TIMEOUT,
+        )
+        assert res.status_code == 400
+
+    def test_multi_attribute_filter_and(self, api_root):
+        """Store backend: multi-attribute filtering returns 400 (not supported)."""
+        bundle = DIGEST_TESTS[0][1]
+        res = requests.get(
+            f"{api_root}/list/collection?names={bundle['level1']['names']}&lengths={bundle['level1']['lengths']}",
+            timeout=COMPLIANCE_TIMEOUT,
+        )
+        assert res.status_code == 400
+
+    # --- Override ordering-dependent tests with xfail ---
+
+    @pytest.mark.xfail(reason=_ORDER_REASON, strict=False)
+    @pytest.mark.parametrize("fa_name, bundle", DIGEST_TESTS)
+    def test_collection_level2(self, api_root, fa_name, bundle):
+        super().test_collection_level2(api_root, fa_name, bundle)
+
+    @pytest.mark.xfail(reason=_ORDER_REASON, strict=False)
+    @pytest.mark.parametrize("fa_name, bundle", DIGEST_TESTS)
+    @pytest.mark.parametrize("attr_name", ["lengths", "names", "sequences"])
+    def test_attribute_retrieval(self, api_root, fa_name, bundle, attr_name):
+        super().test_attribute_retrieval(api_root, fa_name, bundle, attr_name)
+
+    def test_transient_attribute_not_served(self, api_root):
+        """Transient attributes should return 404 from /attribute endpoint."""
+        check_transient_attribute_not_served(api_root)
+
+    @pytest.mark.xfail(reason=_ORDER_REASON, strict=False)
+    @pytest.mark.parametrize(
+        "fixture_name, expected",
+        list(COMPARISON_FIXTURES.items()),
+        ids=list(COMPARISON_FIXTURES.keys()),
+    )
+    def test_comparison(self, api_root, fixture_name, expected):
+        super().test_comparison(api_root, fixture_name, expected)
+
+    @pytest.mark.xfail(reason=_ORDER_REASON, strict=False)
+    @pytest.mark.parametrize(
+        "fixture_name, expected",
+        list(COMPARISON_FIXTURES.items()),
+        ids=list(COMPARISON_FIXTURES.keys()),
+    )
+    def test_comparison_post(self, api_root, fixture_name, expected):
+        super().test_comparison_post(api_root, fixture_name, expected)
diff --git a/tests/local/test_backend.py b/tests/local/test_backend.py
index e52f309..5f224ec 100644
--- a/tests/local/test_backend.py
+++ b/tests/local/test_backend.py
@@ -43,7 +43,7 @@ def backend():
     store = RefgetStore.in_memory()
     store.add_sequence_collection_from_fasta(str(BASE_FASTA))
     store.add_sequence_collection_from_fasta(str(DIFFERENT_NAMES_FASTA))
-    return RefgetStoreBackend(store.into_readonly())
+    return RefgetStoreBackend(store)
 
 
 @pytest.mark.skipif(not _RUST_BINDINGS_AVAILABLE, reason="gtars is not installed")
@@ -171,16 +171,18 @@ def store_client(self):
 
         store = RefgetStore.in_memory()
         store.add_sequence_collection_from_fasta(str(BASE_FASTA))
-        backend = RefgetStoreBackend(store.into_readonly())
+        backend = RefgetStoreBackend(store)
         app.state.backend = backend
         # Deliberately do NOT set app.state.dbagent
         return TestClient(app)
 
-    def test_list_attributes_returns_501(self, store_client):
-        """GET /list/attributes/names returns 501 without dbagent."""
+    def test_list_attributes_works_without_dbagent(self, store_client):
+        """GET /list/attributes/names works via backend without dbagent."""
         response = store_client.get("/seqcol/list/attributes/names")
-        assert response.status_code == 501
-        assert "database backend" in response.json()["detail"].lower()
+        assert response.status_code == 200
+        data = response.json()
+        assert "results" in data
+        assert "pagination" in data
 
     def test_similarities_post_returns_501(self, store_client):
         """POST /similarities/{digest} returns 501 without dbagent."""

From ab77d9f82eceb79cbc2880aa56ed9eec235803fa Mon Sep 17 00:00:00 2001
From: nsheff <nsheff@users.noreply.github.com>
Date: Wed, 18 Mar 2026 09:34:19 -0400
Subject: [PATCH 24/31] add scom config

---
 deployment/seqcolapi-store/scom_config.json | 101 ++++++++++++++++++++
 1 file changed, 101 insertions(+)
 create mode 100644 deployment/seqcolapi-store/scom_config.json

diff --git a/deployment/seqcolapi-store/scom_config.json b/deployment/seqcolapi-store/scom_config.json
new file mode 100644
index 0000000..69f1836
--- /dev/null
+++ b/deployment/seqcolapi-store/scom_config.json
@@ -0,0 +1,101 @@
+{
+  "human": [
+    "5ryrHdbJHIgyZuE29h5uzITRL4kinZWG",
+    "tkRdBlSp5hewK4OpEJC87J9pw-ac0vOa",
+    "u1HyLgIlq8M_XvEwy0oGqAvKGHJMGtxH",
+    "a_WL8OC7sFJfjux5m11M2bKl0dYepA1x",
+    "NTeQ1GQMt2ocCFkS8Z3_qkvetZjabWSt",
+    "lWRRNMNypacEjnJCy-AYiDNUPy1brQGC",
+    "0OgP0NkIM22lVYT5AMmkbb9knKDhk4I6",
+    "0OgP0NkIM22lVYT5AMmkbb9knKDhk4I6",
+    "h_kOcvPobU9it_QR1LjPqeNpM56xjEJQ",
+    "vRjC5qM1Tc-fFjJo0TGRw4CVjFhmLG0f",
+    "EiFob05aCWgVU_B_Ae0cypnQut3cxUP1",
+    "ieWVCws5MC2QFRKgH9QcN3u5_Y_3hPG6",
+    "FnbS0xDAGOePD7lp6Xt0vnbcASzkk3gk",
+    "lXa-sGAmSafYXHN4iEwup0EsQh5F1krA",
+    "Ba88PY52_qeifhJrgUXyin6UITdXNsg3",
+    "jFm0Uca8a7vK2cbuIQgBopjBilgCFheD",
+    "wdpZbFN0pd92H2VeKZBAp3riQN4nJXkK",
+    "Dx_M8skbJqROkfXhhQRtWejcCyewRbdL",
+    "RGSkyOkQ4qnSLhjrR_e3AI51Ac-RohAL",
+    "GOIHeGSorDrbznRxihs5rIb6vTiTKaw7",
+    "DvAlkUMPq7CRnTYAfXGkQmAOfFqVMZHE",
+    "7hP7E8o-q6H8qcqMHBEIYdbAK49PoEUZ",
+    "EiFob05aCWgVU_B_Ae0cypnQut3cxUP1",
+    "q5Gn6dl5HbkZe6sRs9CZVGwGd1-XwyAq",
+    "H3RA3Jez6oqMOW87LGuwtSgKQqTgWVxx",
+    "EiFob05aCWgVU_B_Ae0cypnQut3cxUP1",
+    "XJWKh8nsSqBFfcU0DIHMZohYyCWF-vcA",
+    "EiFob05aCWgVU_B_Ae0cypnQut3cxUP1",
+    "svwHqvgassl0loigdqVIQJdjo7NWDIx3",
+    "Q3xii3AkJDCTXSO6Vg13kjbOutQu0KP9",
+    "kgTizBhIBf5BeEMTqBuyzRBi2AlwTRj6",
+    "oLfPx0NOBKKXMIngGeQ4YewtU4Ge_wKz",
+    "tmnbiAyj2fke68d_TYjq2g487US8C15r",
+    "MkXYxV2-83BEcPmzskEVGJsJ7Qkb--gX",
+    "EGlYk1stOsAjmTALWkqKDqtZOSkyA_YH",
+    "oLfPx0NOBKKXMIngGeQ4YewtU4Ge_wKz",
+    "k4mLJvbFzZiw3o6SL8hh63V2u7AjDMrE",
+    "xEg2q8K9gV4027DMTCiaUGLCcrqySglR",
+    "tkRdBlSp5hewK4OpEJC87J9pw-ac0vOa",
+    "NTeQ1GQMt2ocCFkS8Z3_qkvetZjabWSt",
+    "6pxZqxG0TYtyVb8yp14ONpaxZ8msQqKr",
+    "k4mLJvbFzZiw3o6SL8hh63V2u7AjDMrE",
+    "F9zeFn6M4EN4KGAJush7rZEU3GROoeNP",
+    "tmnbiAyj2fke68d_TYjq2g487US8C15r",
+    "-qU7PUmse_-pilikFDTJKyrt2_QJvUFy",
+    "H9er8ocYfIN2TOfyf6zMyeXXm7trXGP7",
+    "5ryrHdbJHIgyZuE29h5uzITRL4kinZWG",
+    "ThZcNYiLuWWL86NdJ8dvvJG15K9mW3Fo",
+    "EiFob05aCWgVU_B_Ae0cypnQut3cxUP1",
+    "5SdkZCnuZL2YIptqjSBZfupo_O7HpD_B",
+    "ThZcNYiLuWWL86NdJ8dvvJG15K9mW3Fo",
+    "gHcfbUVnFzHv3QSqz2sSqVHdUQbDO8N5",
+    "eN7J_gZz_meakMCeXXBEvY_njignMPxl",
+    "YfZ0rklv8KY9DCtqG0iIX16zsgbuBgmM",
+    "gHcfbUVnFzHv3QSqz2sSqVHdUQbDO8N5",
+    "gHcfbUVnFzHv3QSqz2sSqVHdUQbDO8N5",
+    "6chutju9QVJW0rdA-wgubHbtoTQ42o-6",
+    "gHcfbUVnFzHv3QSqz2sSqVHdUQbDO8N5",
+    "jFm0Uca8a7vK2cbuIQgBopjBilgCFheD"
+  ],
+  "mouse": [
+    "6-UTIAyR94-nanfrhd_sAF6oHLyMd0zH",
+    "JPyo8AqZzCyVaUx1lAkk6LbpyQPX4VUB",
+    "WsOG-InFnIta0rqSy1KUBjrFrukbnE5j",
+    "KPagVaXI4XwQ1D0L0EW4eEuVl-otaAtX",
+    "M7ZWnvUTT06JREJnMb_7UGwgGaG0-13s",
+    "bLbjXXCz_5qAonaDXcVuadx65QZkC7mb",
+    "bLbjXXCz_5qAonaDXcVuadx65QZkC7mb",
+    "ABEupc6KHmxtHGarfWFXTmu9mUcNnfoM",
+    "dMjpOU7EvpeZVb0gpPoZ7prNaxOu88Ta",
+    "D-6wf8dsOttiVNnLSImSglRJvw_8Zr_j",
+    "JL56x8L1q1Fs_-jHvZxBG01Vitac-CmO",
+    "XVhfRj6PCzLoGiEeXjpFv7vriVN02aPc",
+    "hW3Ba5zoufl3-MGXQESlWXjsW56R5vPG",
+    "-e70JAQq4NJDg8-1Ab2XhHu6yYjeW-zu",
+    "3MS1-4k87pZ0-C80QDoUvhFmC0usPH28",
+    "WsOG-InFnIta0rqSy1KUBjrFrukbnE5j",
+    "wsDErYxgCXiPnb2FWZ4sXtx3B0YyruRu",
+    "FTBYBUoMhkOJ_-8lWpERVTxe62kstAol",
+    "9k1WrFA4Ys2fPifOOVswhOEdurvsaLfI",
+    "jpOqOhddb15iOm2SIdJSjsf-U5Uu7Def",
+    "qtPKGcXii2OuiyIoDA9K0jSKR62qCyzd",
+    "6-UTIAyR94-nanfrhd_sAF6oHLyMd0zH",
+    "-e70JAQq4NJDg8-1Ab2XhHu6yYjeW-zu",
+    "2Ls1P5eUdKbvtOhjJx3s2R5r0_I-IB5Z",
+    "TQORtixTJqM3Su9dmtACKc7hNHAceE4I",
+    "fXjBOJjw-DYsSnnfDBl5vtZu1N7lbnUl",
+    "2Ls1P5eUdKbvtOhjJx3s2R5r0_I-IB5Z",
+    "6-UTIAyR94-nanfrhd_sAF6oHLyMd0zH",
+    "PM8ODmBlTISp4Onv0aSBFaAfi3QVCGzx",
+    "TQORtixTJqM3Su9dmtACKc7hNHAceE4I",
+    "XVhfRj6PCzLoGiEeXjpFv7vriVN02aPc",
+    "qcT5VVX5G3mN2O9OqeFR-F0POVuY2oGw",
+    "4mvptys3ckGgiUCcly4HOHB40IhpwwVT",
+    "3rgz8-_XPSiTUYPamUTRF3DArhAhTint",
+    "99TjKCwZJJLjpBqkLpTgC2E_Y3OgKtMz",
+    "vygX07e7feibvucSnWj6hRScGMfc7B6P"
+  ]
+}
\ No newline at end of file

From 0dd0747c9d4254d96003362e328067272cbab0d3 Mon Sep 17 00:00:00 2001
From: nsheff <nsheff@users.noreply.github.com>
Date: Wed, 18 Mar 2026 10:04:48 -0400
Subject: [PATCH 25/31] prep for deploy

---
 .../src/load_compliance_fastas.py             |  79 +++++++++++
 deployment/seqcolapi-store/production.env     |   3 +-
 deployment/seqcolapi-store/scom_config.json   | 126 +++++++-----------
 deployment/seqcolapi-store/task_def.json      |   6 +-
 frontend/src/pages/SCOM.jsx                   |  21 +--
 refget/backend.py                             |   2 +-
 seqcolapi/main.py                             |  61 +++++----
 7 files changed, 185 insertions(+), 113 deletions(-)
 create mode 100644 data_loaders/ref-genome-analysis/src/load_compliance_fastas.py

diff --git a/data_loaders/ref-genome-analysis/src/load_compliance_fastas.py b/data_loaders/ref-genome-analysis/src/load_compliance_fastas.py
new file mode 100644
index 0000000..35e3b15
--- /dev/null
+++ b/data_loaders/ref-genome-analysis/src/load_compliance_fastas.py
@@ -0,0 +1,79 @@
+#!/usr/bin/env python3
+"""
+Load GA4GH seqcol compliance test FASTAs into all RefgetStores.
+
+These small synthetic FASTAs are required for the compliance suite to pass.
+They live in the refget repo at test_fasta/*.fa.
+
+Usage:
+    source env/on-cluster.env
+    python src/load_compliance_fastas.py
+    python src/load_compliance_fastas.py --store $VGP_STORE_PATH   # single store
+"""
+
+import argparse
+import os
+from pathlib import Path
+
+# Compliance FASTAs are in the refget repo
+REFGET_REPO = os.environ.get("REFGET_REPO", os.path.join(os.environ.get("DEPLOY_DIR", ""), "refget"))
+TEST_FASTA_DIR = os.path.join(REFGET_REPO, "test_fasta")
+
+COMPLIANCE_FASTAS = [
+    "base.fa",
+    "different_names.fa",
+    "different_order.fa",
+    "pair_swap.fa",
+    "subset.fa",
+    "swap_wo_coords.fa",
+]
+
+
+def load_compliance_fastas(store_path: str, fasta_dir: str):
+    from refget.store import RefgetStore
+
+    store = RefgetStore.on_disk(store_path)
+    store.set_quiet(True)
+
+    print(f"Store: {store_path}")
+    loaded = 0
+    for fa in COMPLIANCE_FASTAS:
+        path = os.path.join(fasta_dir, fa)
+        if not os.path.exists(path):
+            print(f"  {fa}: NOT FOUND at {path}")
+            continue
+        meta, was_new = store.add_sequence_collection_from_fasta(path)
+        status = "added" if was_new else "exists"
+        print(f"  {fa}: {meta.digest} ({status})")
+        loaded += 1
+
+    print(f"  {loaded}/{len(COMPLIANCE_FASTAS)} loaded\n")
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Load compliance FASTAs into RefgetStores")
+    parser.add_argument("--store", help="Load into a single store (path)")
+    parser.add_argument("--fasta-dir", default=TEST_FASTA_DIR, help="Directory containing test FASTAs")
+    args = parser.parse_args()
+
+    if not os.path.isdir(args.fasta_dir):
+        print(f"Error: FASTA directory not found: {args.fasta_dir}")
+        print("Set REFGET_REPO or DEPLOY_DIR, or pass --fasta-dir")
+        return
+
+    print(f"Compliance FASTAs from: {args.fasta_dir}\n")
+
+    if args.store:
+        load_compliance_fastas(args.store, args.fasta_dir)
+    else:
+        # Load into all stores from env vars
+        for var in ["VGP_STORE_PATH", "REF_STORE_PATH", "PANGENOME_STORE_PATH"]:
+            path = os.environ.get(var)
+            if path and os.path.isdir(path):
+                load_compliance_fastas(path, args.fasta_dir)
+            elif path:
+                print(f"  {var}={path} (not found, skipping)")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/deployment/seqcolapi-store/production.env b/deployment/seqcolapi-store/production.env
index ee36462..595f490 100644
--- a/deployment/seqcolapi-store/production.env
+++ b/deployment/seqcolapi-store/production.env
@@ -1,2 +1,3 @@
-export REFGET_STORE_URL="https://refgenie.s3.us-east-1.amazonaws.com/refget-store/vgp/"
+export REFGET_STORE_URL="https://refgenie.s3.us-east-1.amazonaws.com/refget-store/jungle/"
+export SCOM_CONFIG_URL="https://raw.githubusercontent.com/refgenie/refget/dev/deployment/seqcolapi-store/scom_config.json"
 export SERVER_ENV="production"
diff --git a/deployment/seqcolapi-store/scom_config.json b/deployment/seqcolapi-store/scom_config.json
index 69f1836..df19a7d 100644
--- a/deployment/seqcolapi-store/scom_config.json
+++ b/deployment/seqcolapi-store/scom_config.json
@@ -1,101 +1,77 @@
 {
   "human": [
-    "5ryrHdbJHIgyZuE29h5uzITRL4kinZWG",
-    "tkRdBlSp5hewK4OpEJC87J9pw-ac0vOa",
-    "u1HyLgIlq8M_XvEwy0oGqAvKGHJMGtxH",
-    "a_WL8OC7sFJfjux5m11M2bKl0dYepA1x",
-    "NTeQ1GQMt2ocCFkS8Z3_qkvetZjabWSt",
-    "lWRRNMNypacEjnJCy-AYiDNUPy1brQGC",
-    "0OgP0NkIM22lVYT5AMmkbb9knKDhk4I6",
-    "0OgP0NkIM22lVYT5AMmkbb9knKDhk4I6",
-    "h_kOcvPobU9it_QR1LjPqeNpM56xjEJQ",
-    "vRjC5qM1Tc-fFjJo0TGRw4CVjFhmLG0f",
-    "EiFob05aCWgVU_B_Ae0cypnQut3cxUP1",
-    "ieWVCws5MC2QFRKgH9QcN3u5_Y_3hPG6",
     "FnbS0xDAGOePD7lp6Xt0vnbcASzkk3gk",
+    "MkXYxV2-83BEcPmzskEVGJsJ7Qkb--gX",
+    "gHcfbUVnFzHv3QSqz2sSqVHdUQbDO8N5",
+    "q5Gn6dl5HbkZe6sRs9CZVGwGd1-XwyAq",
     "lXa-sGAmSafYXHN4iEwup0EsQh5F1krA",
-    "Ba88PY52_qeifhJrgUXyin6UITdXNsg3",
-    "jFm0Uca8a7vK2cbuIQgBopjBilgCFheD",
-    "wdpZbFN0pd92H2VeKZBAp3riQN4nJXkK",
-    "Dx_M8skbJqROkfXhhQRtWejcCyewRbdL",
-    "RGSkyOkQ4qnSLhjrR_e3AI51Ac-RohAL",
-    "GOIHeGSorDrbznRxihs5rIb6vTiTKaw7",
-    "DvAlkUMPq7CRnTYAfXGkQmAOfFqVMZHE",
     "7hP7E8o-q6H8qcqMHBEIYdbAK49PoEUZ",
-    "EiFob05aCWgVU_B_Ae0cypnQut3cxUP1",
-    "q5Gn6dl5HbkZe6sRs9CZVGwGd1-XwyAq",
-    "H3RA3Jez6oqMOW87LGuwtSgKQqTgWVxx",
-    "EiFob05aCWgVU_B_Ae0cypnQut3cxUP1",
-    "XJWKh8nsSqBFfcU0DIHMZohYyCWF-vcA",
-    "EiFob05aCWgVU_B_Ae0cypnQut3cxUP1",
-    "svwHqvgassl0loigdqVIQJdjo7NWDIx3",
-    "Q3xii3AkJDCTXSO6Vg13kjbOutQu0KP9",
+    "h_kOcvPobU9it_QR1LjPqeNpM56xjEJQ",
+    "GOIHeGSorDrbznRxihs5rIb6vTiTKaw7",
     "kgTizBhIBf5BeEMTqBuyzRBi2AlwTRj6",
-    "oLfPx0NOBKKXMIngGeQ4YewtU4Ge_wKz",
-    "tmnbiAyj2fke68d_TYjq2g487US8C15r",
-    "MkXYxV2-83BEcPmzskEVGJsJ7Qkb--gX",
-    "EGlYk1stOsAjmTALWkqKDqtZOSkyA_YH",
-    "oLfPx0NOBKKXMIngGeQ4YewtU4Ge_wKz",
-    "k4mLJvbFzZiw3o6SL8hh63V2u7AjDMrE",
+    "Dx_M8skbJqROkfXhhQRtWejcCyewRbdL",
     "xEg2q8K9gV4027DMTCiaUGLCcrqySglR",
-    "tkRdBlSp5hewK4OpEJC87J9pw-ac0vOa",
-    "NTeQ1GQMt2ocCFkS8Z3_qkvetZjabWSt",
-    "6pxZqxG0TYtyVb8yp14ONpaxZ8msQqKr",
     "k4mLJvbFzZiw3o6SL8hh63V2u7AjDMrE",
-    "F9zeFn6M4EN4KGAJush7rZEU3GROoeNP",
     "tmnbiAyj2fke68d_TYjq2g487US8C15r",
-    "-qU7PUmse_-pilikFDTJKyrt2_QJvUFy",
-    "H9er8ocYfIN2TOfyf6zMyeXXm7trXGP7",
-    "5ryrHdbJHIgyZuE29h5uzITRL4kinZWG",
     "ThZcNYiLuWWL86NdJ8dvvJG15K9mW3Fo",
+    "ieWVCws5MC2QFRKgH9QcN3u5_Y_3hPG6",
+    "DvAlkUMPq7CRnTYAfXGkQmAOfFqVMZHE",
+    "6chutju9QVJW0rdA-wgubHbtoTQ42o-6",
+    "YfZ0rklv8KY9DCtqG0iIX16zsgbuBgmM",
+    "6pxZqxG0TYtyVb8yp14ONpaxZ8msQqKr",
+    "5ryrHdbJHIgyZuE29h5uzITRL4kinZWG",
+    "vRjC5qM1Tc-fFjJo0TGRw4CVjFhmLG0f",
+    "RGSkyOkQ4qnSLhjrR_e3AI51Ac-RohAL",
     "EiFob05aCWgVU_B_Ae0cypnQut3cxUP1",
-    "5SdkZCnuZL2YIptqjSBZfupo_O7HpD_B",
-    "ThZcNYiLuWWL86NdJ8dvvJG15K9mW3Fo",
-    "gHcfbUVnFzHv3QSqz2sSqVHdUQbDO8N5",
+    "Ba88PY52_qeifhJrgUXyin6UITdXNsg3",
+    "-qU7PUmse_-pilikFDTJKyrt2_QJvUFy",
+    "0OgP0NkIM22lVYT5AMmkbb9knKDhk4I6",
+    "a_WL8OC7sFJfjux5m11M2bKl0dYepA1x",
     "eN7J_gZz_meakMCeXXBEvY_njignMPxl",
-    "YfZ0rklv8KY9DCtqG0iIX16zsgbuBgmM",
-    "gHcfbUVnFzHv3QSqz2sSqVHdUQbDO8N5",
-    "gHcfbUVnFzHv3QSqz2sSqVHdUQbDO8N5",
-    "6chutju9QVJW0rdA-wgubHbtoTQ42o-6",
-    "gHcfbUVnFzHv3QSqz2sSqVHdUQbDO8N5",
-    "jFm0Uca8a7vK2cbuIQgBopjBilgCFheD"
+    "svwHqvgassl0loigdqVIQJdjo7NWDIx3",
+    "wdpZbFN0pd92H2VeKZBAp3riQN4nJXkK",
+    "NTeQ1GQMt2ocCFkS8Z3_qkvetZjabWSt",
+    "XJWKh8nsSqBFfcU0DIHMZohYyCWF-vcA",
+    "oLfPx0NOBKKXMIngGeQ4YewtU4Ge_wKz",
+    "tkRdBlSp5hewK4OpEJC87J9pw-ac0vOa",
+    "5SdkZCnuZL2YIptqjSBZfupo_O7HpD_B",
+    "H3RA3Jez6oqMOW87LGuwtSgKQqTgWVxx",
+    "Q3xii3AkJDCTXSO6Vg13kjbOutQu0KP9",
+    "H9er8ocYfIN2TOfyf6zMyeXXm7trXGP7",
+    "jFm0Uca8a7vK2cbuIQgBopjBilgCFheD",
+    "F9zeFn6M4EN4KGAJush7rZEU3GROoeNP",
+    "lWRRNMNypacEjnJCy-AYiDNUPy1brQGC",
+    "EGlYk1stOsAjmTALWkqKDqtZOSkyA_YH",
+    "u1HyLgIlq8M_XvEwy0oGqAvKGHJMGtxH"
   ],
   "mouse": [
     "6-UTIAyR94-nanfrhd_sAF6oHLyMd0zH",
-    "JPyo8AqZzCyVaUx1lAkk6LbpyQPX4VUB",
-    "WsOG-InFnIta0rqSy1KUBjrFrukbnE5j",
-    "KPagVaXI4XwQ1D0L0EW4eEuVl-otaAtX",
-    "M7ZWnvUTT06JREJnMb_7UGwgGaG0-13s",
-    "bLbjXXCz_5qAonaDXcVuadx65QZkC7mb",
-    "bLbjXXCz_5qAonaDXcVuadx65QZkC7mb",
-    "ABEupc6KHmxtHGarfWFXTmu9mUcNnfoM",
-    "dMjpOU7EvpeZVb0gpPoZ7prNaxOu88Ta",
     "D-6wf8dsOttiVNnLSImSglRJvw_8Zr_j",
-    "JL56x8L1q1Fs_-jHvZxBG01Vitac-CmO",
-    "XVhfRj6PCzLoGiEeXjpFv7vriVN02aPc",
-    "hW3Ba5zoufl3-MGXQESlWXjsW56R5vPG",
+    "bLbjXXCz_5qAonaDXcVuadx65QZkC7mb",
+    "TQORtixTJqM3Su9dmtACKc7hNHAceE4I",
     "-e70JAQq4NJDg8-1Ab2XhHu6yYjeW-zu",
     "3MS1-4k87pZ0-C80QDoUvhFmC0usPH28",
-    "WsOG-InFnIta0rqSy1KUBjrFrukbnE5j",
-    "wsDErYxgCXiPnb2FWZ4sXtx3B0YyruRu",
-    "FTBYBUoMhkOJ_-8lWpERVTxe62kstAol",
-    "9k1WrFA4Ys2fPifOOVswhOEdurvsaLfI",
-    "jpOqOhddb15iOm2SIdJSjsf-U5Uu7Def",
+    "PM8ODmBlTISp4Onv0aSBFaAfi3QVCGzx",
+    "hW3Ba5zoufl3-MGXQESlWXjsW56R5vPG",
+    "dMjpOU7EvpeZVb0gpPoZ7prNaxOu88Ta",
     "qtPKGcXii2OuiyIoDA9K0jSKR62qCyzd",
-    "6-UTIAyR94-nanfrhd_sAF6oHLyMd0zH",
-    "-e70JAQq4NJDg8-1Ab2XhHu6yYjeW-zu",
-    "2Ls1P5eUdKbvtOhjJx3s2R5r0_I-IB5Z",
-    "TQORtixTJqM3Su9dmtACKc7hNHAceE4I",
     "fXjBOJjw-DYsSnnfDBl5vtZu1N7lbnUl",
-    "2Ls1P5eUdKbvtOhjJx3s2R5r0_I-IB5Z",
-    "6-UTIAyR94-nanfrhd_sAF6oHLyMd0zH",
-    "PM8ODmBlTISp4Onv0aSBFaAfi3QVCGzx",
-    "TQORtixTJqM3Su9dmtACKc7hNHAceE4I",
     "XVhfRj6PCzLoGiEeXjpFv7vriVN02aPc",
-    "qcT5VVX5G3mN2O9OqeFR-F0POVuY2oGw",
+    "jpOqOhddb15iOm2SIdJSjsf-U5Uu7Def",
+    "99TjKCwZJJLjpBqkLpTgC2E_Y3OgKtMz",
+    "JL56x8L1q1Fs_-jHvZxBG01Vitac-CmO",
+    "9k1WrFA4Ys2fPifOOVswhOEdurvsaLfI",
     "4mvptys3ckGgiUCcly4HOHB40IhpwwVT",
+    "ABEupc6KHmxtHGarfWFXTmu9mUcNnfoM",
+    "M7ZWnvUTT06JREJnMb_7UGwgGaG0-13s",
+    "FTBYBUoMhkOJ_-8lWpERVTxe62kstAol",
+    "KPagVaXI4XwQ1D0L0EW4eEuVl-otaAtX",
+    "2Ls1P5eUdKbvtOhjJx3s2R5r0_I-IB5Z",
+    "qcT5VVX5G3mN2O9OqeFR-F0POVuY2oGw",
     "3rgz8-_XPSiTUYPamUTRF3DArhAhTint",
-    "99TjKCwZJJLjpBqkLpTgC2E_Y3OgKtMz",
+    "wsDErYxgCXiPnb2FWZ4sXtx3B0YyruRu",
+    "WsOG-InFnIta0rqSy1KUBjrFrukbnE5j",
+    "JPyo8AqZzCyVaUx1lAkk6LbpyQPX4VUB",
     "vygX07e7feibvucSnWj6hRScGMfc7B6P"
   ]
 }
\ No newline at end of file
diff --git a/deployment/seqcolapi-store/task_def.json b/deployment/seqcolapi-store/task_def.json
index 7e74531..70529b9 100644
--- a/deployment/seqcolapi-store/task_def.json
+++ b/deployment/seqcolapi-store/task_def.json
@@ -20,7 +20,11 @@
       "environment": [
         {
           "name": "REFGET_STORE_URL",
-          "value": "https://refgenie.s3.us-east-1.amazonaws.com/refget-store/vgp/"
+          "value": "https://refgenie.s3.us-east-1.amazonaws.com/refget-store/jungle/"
+        },
+        {
+          "name": "SCOM_CONFIG_URL",
+          "value": "https://raw.githubusercontent.com/refgenie/refget/dev/deployment/seqcolapi-store/scom_config.json"
         }
       ],
       "resourceRequirements": null,
diff --git a/frontend/src/pages/SCOM.jsx b/frontend/src/pages/SCOM.jsx
index b27d747..ff3dd87 100644
--- a/frontend/src/pages/SCOM.jsx
+++ b/frontend/src/pages/SCOM.jsx
@@ -313,8 +313,11 @@ const SCOM = () => {
       const result = await fetchSimilaritiesJSON(data, species);
       if (result?.similarities) {
         const customDigest = 'Input Seqcol';
-        const flattenedSimilarities = result.similarities.flatMap((s) =>
-          s.human_readable_names.map((humanReadableName) => ({
+        const flattenedSimilarities = result.similarities.flatMap((s) => {
+          const names = s.human_readable_names.length > 0
+            ? s.human_readable_names
+            : [s.digest];
+          return names.map((humanReadableName) => ({
             selectedDigest: name !== '' ? name : customDigest,
             comparedDigest: s.digest,
             comparedAlias: humanReadableName || s.digest,
@@ -325,8 +328,8 @@ const SCOM = () => {
             sorted_sequences: s.similarities.sorted_sequences,
             custom: true,
             raw: data,
-          }))
-        );
+          }));
+        });
 
         if (relationship === 'oneToMany') {
           setCustomCollections([
@@ -599,11 +602,11 @@ const SCOM = () => {
                     >
                       <td>{row.comparedAlias ? row.comparedAlias : row.comparedDigest}</td>
                       <td>{row.comparedDigest}</td>
-                      <td>{Number.isInteger(row.lengths) ? row.lengths : row.lengths.toFixed(3)}</td>
-                      <td>{Number.isInteger(row.name_length_pairs) ? row.name_length_pairs : row.name_length_pairs.toFixed(3)}</td>
-                      <td>{Number.isInteger(row.names) ? row.names : row.names.toFixed(3)}</td>
-                      <td>{Number.isInteger(row.sequences) ? row.sequences : row.sequences.toFixed(3)}</td>
-                      <td>{Number.isInteger(row.sorted_sequences) ? row.sorted_sequences : row.sorted_sequences.toFixed(3)}</td>
+                      <td>{row.lengths != null ? (Number.isInteger(row.lengths) ? row.lengths : row.lengths.toFixed(3)) : '-'}</td>
+                      <td>{row.name_length_pairs != null ? (Number.isInteger(row.name_length_pairs) ? row.name_length_pairs : row.name_length_pairs.toFixed(3)) : '-'}</td>
+                      <td>{row.names != null ? (Number.isInteger(row.names) ? row.names : row.names.toFixed(3)) : '-'}</td>
+                      <td>{row.sequences != null ? (Number.isInteger(row.sequences) ? row.sequences : row.sequences.toFixed(3)) : '-'}</td>
+                      <td>{row.sorted_sequences != null ? (Number.isInteger(row.sorted_sequences) ? row.sorted_sequences : row.sorted_sequences.toFixed(3)) : '-'}</td>
                     </tr>
                   ))}
                 </tbody>
diff --git a/refget/backend.py b/refget/backend.py
index 1e02547..e05048f 100644
--- a/refget/backend.py
+++ b/refget/backend.py
@@ -187,7 +187,7 @@ def compute_similarities(
                 If None, compares against all collections.
         """
         if target_digests:
-            all_digests = target_digests
+            all_digests = list(dict.fromkeys(target_digests))  # deduplicate, preserve order
         else:
             all_cols = self._store.list_collections(page=0, page_size=10000)
             all_digests = [c.digest if hasattr(c, "digest") else c for c in all_cols["results"]]
diff --git a/seqcolapi/main.py b/seqcolapi/main.py
index 391bfff..720abaf 100644
--- a/seqcolapi/main.py
+++ b/seqcolapi/main.py
@@ -21,37 +21,46 @@
 
 
 def _load_scom_config(store_path: str, remote: bool):
-    """Load scom_config.json from next to the store if it exists.
+    """Load SCOM target digests from a JSON config.
 
-    Convention: a JSON file at {store_url}/scom_config.json with format:
-        {"human": ["digest1", "digest2", ...], "mouse": [...]}
+    Checks (in order):
+    1. SCOM_CONFIG_URL environment variable (any HTTP URL)
+    2. scom_config.json next to the store (convention)
+
+    Format: {"human": ["digest1", "digest2", ...], "mouse": [...]}
     """
     import json
+    import os
+    import urllib.request
 
-    if remote:
-        import urllib.request
+    # Try env var first
+    config_url = os.environ.get("SCOM_CONFIG_URL")
 
-        url = store_path.rstrip("/") + "/scom_config.json"
-        try:
-            with urllib.request.urlopen(url, timeout=10) as resp:
-                config = json.loads(resp.read())
-            for species, digests in config.items():
-                _SAMPLE_DIGESTS[species] = digests
-                _LOGGER.info(f"SCOM: loaded {len(digests)} target digests for '{species}'")
-        except Exception as e:
-            _LOGGER.info(f"No scom_config.json found at {url} ({e}). SCOM disabled.")
-    else:
-        import os
-
-        config_path = os.path.join(store_path, "scom_config.json")
-        if os.path.exists(config_path):
-            with open(config_path) as f:
-                config = json.load(f)
-            for species, digests in config.items():
-                _SAMPLE_DIGESTS[species] = digests
-                _LOGGER.info(f"SCOM: loaded {len(digests)} target digests for '{species}'")
+    # Fall back to store convention
+    if not config_url:
+        if remote:
+            config_url = store_path.rstrip("/") + "/scom_config.json"
         else:
-            _LOGGER.info(f"No scom_config.json at {config_path}. SCOM disabled.")
+            config_path = os.path.join(store_path, "scom_config.json")
+            if os.path.exists(config_path):
+                with open(config_path) as f:
+                    config = json.load(f)
+                for species, digests in config.items():
+                    _SAMPLE_DIGESTS[species] = digests
+                    _LOGGER.info(f"SCOM: loaded {len(digests)} target digests for '{species}'")
+                return
+            else:
+                _LOGGER.info("No SCOM_CONFIG_URL set and no scom_config.json found. SCOM disabled.")
+                return
+
+    try:
+        with urllib.request.urlopen(config_url, timeout=10) as resp:
+            config = json.loads(resp.read())
+        for species, digests in config.items():
+            _SAMPLE_DIGESTS[species] = digests
+            _LOGGER.info(f"SCOM: loaded {len(digests)} target digests for '{species}'")
+    except Exception as e:
+        _LOGGER.info(f"Could not load SCOM config from {config_url} ({e}). SCOM disabled.")
 
 for key, value in ALL_VERSIONS.items():
     _LOGGER.info(f"{key}: {value}")
@@ -251,7 +260,7 @@ def create_store_app(store_path: str, remote: bool = False, cache_dir: str = "/t
     )
     store_app.include_router(router)
 
-    # Load SCOM config from store (convention: scom_config.json next to rgstore.json)
+    # Load SCOM config: check SCOM_CONFIG_URL env var, then fall back to store convention
     _load_scom_config(store_path, remote)
 
     if remote:

From 01a48d3daffc97f54254c3f6753e1972d879dd16 Mon Sep 17 00:00:00 2001
From: nsheff <nsheff@users.noreply.github.com>
Date: Wed, 18 Mar 2026 20:40:41 -0400
Subject: [PATCH 26/31] Fix store deploy: checkout dev branch for seqcolapi
 code

---
 .github/workflows/deploy_store.yml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/deploy_store.yml b/.github/workflows/deploy_store.yml
index 6a88016..7bf010e 100644
--- a/.github/workflows/deploy_store.yml
+++ b/.github/workflows/deploy_store.yml
@@ -14,7 +14,9 @@ jobs:
     runs-on: ubuntu-latest
     steps:
     - name: Checkout
-      uses: actions/checkout@v4
+      uses: actions/checkout@v5
+      with:
+        ref: dev
 
     - name: Configure AWS credentials
       uses: aws-actions/configure-aws-credentials@v1

From 228b67c4caf10bc44e25e98a5d82b0d57bb09cd8 Mon Sep 17 00:00:00 2001
From: nsheff <nsheff@users.noreply.github.com>
Date: Wed, 18 Mar 2026 22:15:08 -0400
Subject: [PATCH 27/31] fix store-backed frontend, add tests

---
 .../ref-genome-analysis/src/90_split_store.py |   4 +-
 .../src/validate_split_stores.py              |   4 +-
 deployment/seqcolapi-store/Dockerfile         |   6 +
 frontend/src/main.jsx                         |   2 +-
 pyproject.toml                                |   2 +-
 scripts/test-store-docker.sh                  | 119 ++++++++++++++++++
 6 files changed, 131 insertions(+), 6 deletions(-)
 create mode 100755 scripts/test-store-docker.sh

diff --git a/data_loaders/ref-genome-analysis/src/90_split_store.py b/data_loaders/ref-genome-analysis/src/90_split_store.py
index 7f87cdd..93ff4b5 100644
--- a/data_loaders/ref-genome-analysis/src/90_split_store.py
+++ b/data_loaders/ref-genome-analysis/src/90_split_store.py
@@ -23,8 +23,8 @@
 DEFAULT_SOURCE = os.environ.get("STORE_PATH", os.path.join(BRICK_ROOT, "refget_store"))
 STAGING = os.environ.get("STAGING", os.path.join(BRICK_ROOT, "refget_staging"))
 DEFAULT_DIGEST_MAP = os.path.join(STAGING, "digest_map.csv")
-DEFAULT_VGP_OUTPUT = os.path.join(BRICK_ROOT, "vgp_reference_store")
-DEFAULT_REF_OUTPUT = os.path.join(BRICK_ROOT, "refgenome_jungle_store")
+DEFAULT_VGP_OUTPUT = os.environ.get("VGP_STORE_PATH", os.path.join(BRICK_ROOT, "refget-store", "vgp"))
+DEFAULT_REF_OUTPUT = os.environ.get("REF_STORE_PATH", os.path.join(BRICK_ROOT, "refget-store", "jungle"))
 
 VGP_GROUPS = {"vertebrates"}
 
diff --git a/data_loaders/ref-genome-analysis/src/validate_split_stores.py b/data_loaders/ref-genome-analysis/src/validate_split_stores.py
index 085d7f4..d1ba420 100644
--- a/data_loaders/ref-genome-analysis/src/validate_split_stores.py
+++ b/data_loaders/ref-genome-analysis/src/validate_split_stores.py
@@ -25,8 +25,8 @@
 BRICK_ROOT = os.environ["BRICK_ROOT"]
 STAGING = os.environ.get("STAGING", os.path.join(BRICK_ROOT, "refget_staging"))
 SOURCE_PATH = os.environ.get("STORE_PATH", os.path.join(BRICK_ROOT, "refget_store"))
-VGP_PATH = os.path.join(BRICK_ROOT, "vgp_reference_store")
-REF_PATH = os.path.join(BRICK_ROOT, "refgenome_jungle_store")
+VGP_PATH = os.path.join(BRICK_ROOT, "refget-store/vgp")
+REF_PATH = os.path.join(BRICK_ROOT, "refget-store/jungle")
 DIGEST_MAP = os.path.join(STAGING, "digest_map.csv")
 
 VGP_GROUPS = {"vertebrates"}
diff --git a/deployment/seqcolapi-store/Dockerfile b/deployment/seqcolapi-store/Dockerfile
index e138239..10fad3d 100644
--- a/deployment/seqcolapi-store/Dockerfile
+++ b/deployment/seqcolapi-store/Dockerfile
@@ -2,4 +2,10 @@ FROM tiangolo/uvicorn-gunicorn:python3.11-slim
 LABEL authors="Nathan Sheffield"
 RUN pip install https://github.com/refgenie/refget/archive/dev.zip
 RUN pip install gtars
+
+COPY seqcolapi/requirements/requirements-seqcolapi.txt requirements-seqcolapi.txt
+RUN pip install -r requirements-seqcolapi.txt --no-cache-dir
+
+COPY seqcolapi/ /app/seqcolapi
+
 CMD ["uvicorn", "seqcolapi.main:store_app", "--host", "0.0.0.0", "--port", "80"]
diff --git a/frontend/src/main.jsx b/frontend/src/main.jsx
index db616d1..6d31897 100644
--- a/frontend/src/main.jsx
+++ b/frontend/src/main.jsx
@@ -279,7 +279,7 @@ const App = () => {
                 </div>
                 <div className='d-flex flex-row mt-1 align-items-center'>
                   <div className='p-1 bg-success border border-success rounded-circle me-1'></div>
-                  Connected
+                  Connected to {API_BASE}
                 </div>
               </>
             ) : (
diff --git a/pyproject.toml b/pyproject.toml
index 47a1411..1788399 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -24,7 +24,7 @@ classifiers = [
     "Programming Language :: Python :: 3.14",
 ]
 dependencies = [
-    "gtars>=0.7.0",
+    "gtars>=0.8.0",
     "jsonschema",
     "pyyaml",
     "requests",
diff --git a/scripts/test-store-docker.sh b/scripts/test-store-docker.sh
new file mode 100755
index 0000000..2dcd790
--- /dev/null
+++ b/scripts/test-store-docker.sh
@@ -0,0 +1,119 @@
+#!/bin/bash
+# Test the store-backed Docker image builds and starts correctly.
+#
+# Usage: ./scripts/test-store-docker.sh
+#
+# Builds a Docker image using LOCAL source code, compiles gtars from source
+# if needed (cached as a wheel for subsequent runs), and verifies endpoints.
+
+set -e
+
+IMAGE_NAME="seqcolapi-store-test"
+CONTAINER_NAME="seqcolapi-store-test"
+PORT=8199
+STORE_URL="https://refgenie.s3.us-east-1.amazonaws.com/refget-store/jungle/"
+GTARS_REPO="${GTARS_REPO:-$(cd "$(dirname "$0")/../../gtars" 2>/dev/null && pwd)}"
+WHEEL_CACHE_DIR="${HOME}/.cache/seqcolapi-test-wheels"
+
+cleanup() {
+    echo "Cleaning up..."
+    docker stop "$CONTAINER_NAME" 2>/dev/null || true
+    docker rm "$CONTAINER_NAME" 2>/dev/null || true
+}
+trap cleanup EXIT
+
+# Build or find gtars wheel for cp311
+mkdir -p "$WHEEL_CACHE_DIR"
+GTARS_WHEEL=$(find "$WHEEL_CACHE_DIR" -name "gtars*cp311*linux*.whl" 2>/dev/null | head -1)
+
+if [ -z "$GTARS_WHEEL" ] && [ -d "$GTARS_REPO" ]; then
+    echo "Building gtars wheel for Python 3.11 (one-time, cached)..."
+    docker run --rm -v "$GTARS_REPO:/src" -v "$WHEEL_CACHE_DIR:/wheels" \
+        python:3.11-slim bash -c "
+            apt-get update -qq && apt-get install -y -qq curl gcc > /dev/null 2>&1
+            curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y > /dev/null 2>&1
+            export PATH=\$HOME/.cargo/bin:\$PATH
+            pip install maturin > /dev/null 2>&1
+            cd /src/gtars-python && maturin build --release -o /wheels 2>&1 | tail -3
+        "
+    GTARS_WHEEL=$(find "$WHEEL_CACHE_DIR" -name "gtars*cp311*linux*.whl" 2>/dev/null | head -1)
+fi
+
+if [ -n "$GTARS_WHEEL" ]; then
+    echo "Using cached gtars wheel: $(basename "$GTARS_WHEEL")"
+    GTARS_INSTALL="COPY gtars.whl /tmp/gtars.whl
+RUN pip install --no-cache-dir /tmp/gtars.whl"
+    EXTRA_COPY="-v $GTARS_WHEEL:/tmp/build-context/gtars.whl:ro"
+    # We'll copy the wheel into context below
+else
+    echo "No local gtars repo found, using PyPI version"
+    GTARS_INSTALL="RUN pip install --no-cache-dir gtars"
+    EXTRA_COPY=""
+fi
+
+# Build context — just refget source + wheel
+CONTEXT_DIR="/tmp/seqcolapi-store-docker-context"
+rm -rf "$CONTEXT_DIR"
+mkdir -p "$CONTEXT_DIR"
+
+# Copy only essential files, not .git or node_modules
+rsync -a --exclude='.git' --exclude='node_modules' --exclude='__pycache__' \
+    --exclude='*.pyc' --exclude='.pytest_cache' --exclude='frontend/node_modules' \
+    . "$CONTEXT_DIR/refget/"
+
+[ -n "$GTARS_WHEEL" ] && cp "$GTARS_WHEEL" "$CONTEXT_DIR/gtars.whl"
+
+echo "Building Docker image from local source..."
+docker build -t "$IMAGE_NAME" -f - "$CONTEXT_DIR" <<DOCKERFILE
+FROM tiangolo/uvicorn-gunicorn:python3.11-slim
+COPY refget /src/refget
+RUN pip install --no-cache-dir /src/refget
+${GTARS_INSTALL}
+RUN pip install --no-cache-dir fastapi psycopg2-binary ubiquerg henge
+COPY refget/seqcolapi/ /app/seqcolapi
+CMD ["uvicorn", "seqcolapi.main:store_app", "--host", "0.0.0.0", "--port", "80"]
+DOCKERFILE
+
+echo "Starting container..."
+docker run -d \
+    --name "$CONTAINER_NAME" \
+    -p "$PORT:80" \
+    -e "REFGET_STORE_URL=$STORE_URL" \
+    "$IMAGE_NAME"
+
+echo "Waiting for server to start..."
+for i in $(seq 1 60); do
+    if curl -sf "http://localhost:$PORT/service-info" > /dev/null 2>&1; then
+        echo "Server is up after ${i}s"
+        break
+    fi
+    if [ "$i" -eq 60 ]; then
+        echo "FAILED: Server did not start within 60s"
+        echo "Container logs:"
+        docker logs "$CONTAINER_NAME"
+        exit 1
+    fi
+    sleep 1
+done
+
+echo "Checking /service-info..."
+curl -s "http://localhost:$PORT/service-info" | python3 -c "
+import sys, json
+info = json.load(sys.stdin)
+assert 'seqcol' in info, 'Missing seqcol in service-info'
+print(f'  Name: {info[\"name\"]}')
+print(f'  Store: {info[\"seqcol\"].get(\"refget_store\", {}).get(\"enabled\", False)}')
+print(f'  SCOM: {info[\"seqcol\"].get(\"scom\", {}).get(\"enabled\", False)}')
+"
+
+echo "Checking /list/collection..."
+curl -s "http://localhost:$PORT/list/collection?page_size=1" | python3 -c "
+import sys, json
+data = json.load(sys.stdin)
+assert 'results' in data, 'Missing results in list response'
+assert 'pagination' in data, 'Missing pagination'
+print(f'  Total collections: {data[\"pagination\"][\"total\"]}')
+"
+
+echo ""
+echo "PASSED: Store Docker image builds and runs correctly."

From 2d2c51e3a27c7618966ff746cb7f84ec2e2be27d Mon Sep 17 00:00:00 2001
From: nsheff <nsheff@users.noreply.github.com>
Date: Wed, 18 Mar 2026 22:59:16 -0400
Subject: [PATCH 28/31] lint

---
 .github/workflows/deploy_store.yml |  21 ++-
 refget/backend.py                  |  31 ++--
 refget/cli/store.py                | 236 ++++++++++++++++-------------
 refget/router.py                   |   2 +-
 seqcolapi/main.py                  |  20 ++-
 tests/conftest.py                  |   6 +-
 tests/test_cli/test_store_crate.py | 149 ++++++++++++------
 7 files changed, 275 insertions(+), 190 deletions(-)

diff --git a/.github/workflows/deploy_store.yml b/.github/workflows/deploy_store.yml
index 7bf010e..9972735 100644
--- a/.github/workflows/deploy_store.yml
+++ b/.github/workflows/deploy_store.yml
@@ -15,11 +15,9 @@ jobs:
     steps:
     - name: Checkout
       uses: actions/checkout@v5
-      with:
-        ref: dev
 
     - name: Configure AWS credentials
-      uses: aws-actions/configure-aws-credentials@v1
+      uses: aws-actions/configure-aws-credentials@v4
       with:
         aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
         aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
@@ -27,32 +25,31 @@ jobs:
 
     - name: Login to Amazon ECR
       id: login-ecr
-      uses: aws-actions/amazon-ecr-login@v1
+      uses: aws-actions/amazon-ecr-login@v2
 
     - name: Build, tag, and push image to Amazon ECR
       id: build-image
       env:
         ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }}
-        ECR_REPOSITORY: seqcolapi-store
+        ECR_REPOSITORY: seqcolapi
         IMAGE_TAG: ${{ github.sha }}
       run: |
-        cd deployment/seqcolapi-store/
-        docker build -t $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG -f Dockerfile .
+        docker build -t $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG -f deployment/seqcolapi-store/Dockerfile .
         docker push $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG
-        echo "::set-output name=image::$ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG"
+        echo "image=$ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG" >> $GITHUB_OUTPUT
 
     - name: Fill in the new image ID in the Amazon ECS task definition
       id: task-def
-      uses: aws-actions/amazon-ecs-render-task-definition@v1
+      uses: aws-actions/amazon-ecs-render-task-definition@v1.6.2
       with:
         task-definition: deployment/seqcolapi-store/task_def.json
-        container-name: seqcolapi-store
+        container-name: seqcolapi
         image: ${{ steps.build-image.outputs.image }}
 
     - name: Deploy Amazon ECS task definition
-      uses: aws-actions/amazon-ecs-deploy-task-definition@v1
+      uses: aws-actions/amazon-ecs-deploy-task-definition@v2
       with:
         task-definition: ${{ steps.task-def.outputs.task-definition }}
-        service: seqcolapi-store-service
+        service: seqcolapi-service
         cluster: yeti
         wait-for-service-stability: true
diff --git a/refget/backend.py b/refget/backend.py
index e05048f..cbba0be 100644
--- a/refget/backend.py
+++ b/refget/backend.py
@@ -12,7 +12,7 @@
 from typing import Protocol, runtime_checkable
 
 from .const import DEFAULT_TRANSIENT_ATTRS
-from .utils import compare_seqcols, calc_jaccard_similarities
+from .utils import calc_jaccard_similarities, compare_seqcols
 
 
 @runtime_checkable
@@ -50,9 +50,7 @@ def list_collections(
         Returns {"results": [...], "pagination": {...}}"""
         ...
 
-    def list_attributes(
-        self, attribute: str, page: int = 0, page_size: int = 100
-    ) -> dict:
+    def list_attributes(self, attribute: str, page: int = 0, page_size: int = 100) -> dict:
         """List unique attribute digests. Returns {"results": [...], "pagination": {...}}"""
         ...
 
@@ -105,7 +103,9 @@ def get_collection_itemwise(self, digest: str, limit: int | None = None) -> list
 
     def get_attribute(self, attribute_name: str, attribute_digest: str) -> list:
         if attribute_name in DEFAULT_TRANSIENT_ATTRS:
-            raise KeyError(f"Transient attribute '{attribute_name}' is not served via /attribute endpoint")
+            raise KeyError(
+                f"Transient attribute '{attribute_name}' is not served via /attribute endpoint"
+            )
         result = self._store.get_attribute(attribute_name, attribute_digest)
         if result is None:
             raise KeyError(f"Attribute {attribute_name}/{attribute_digest} not found")
@@ -155,9 +155,7 @@ def list_collections(
     ) -> dict:
         result = self._store.list_collections(page=page, page_size=page_size, filters=filters)
         # Extract digest strings from SequenceCollectionMetadata objects
-        result["results"] = [
-            r.digest if hasattr(r, "digest") else r for r in result["results"]
-        ]
+        result["results"] = [r.digest if hasattr(r, "digest") else r for r in result["results"]]
         return result
 
     def list_attributes(self, attribute: str, page: int = 0, page_size: int = 100) -> dict:
@@ -177,7 +175,10 @@ def list_attributes(self, attribute: str, page: int = 0, page_size: int = 100) -
         }
 
     def compute_similarities(
-        self, seqcol: dict, page: int = 0, page_size: int = 50,
+        self,
+        seqcol: dict,
+        page: int = 0,
+        page_size: int = 50,
         target_digests: list[str] | None = None,
     ) -> dict:
         """Compute Jaccard similarities between a seqcol and collections in the store.
@@ -211,11 +212,13 @@ def compute_similarities(
                 if level2 is None:
                     continue
                 jaccard = calc_jaccard_similarities(seqcol, level2)
-                similarities.append({
-                    "digest": digest,
-                    "human_readable_names": alias_map.get(digest, []),
-                    "similarities": jaccard,
-                })
+                similarities.append(
+                    {
+                        "digest": digest,
+                        "human_readable_names": alias_map.get(digest, []),
+                        "similarities": jaccard,
+                    }
+                )
             except Exception:
                 continue
 
diff --git a/refget/cli/store.py b/refget/cli/store.py
index d1c5f14..d385819 100644
--- a/refget/cli/store.py
+++ b/refget/cli/store.py
@@ -1071,131 +1071,147 @@ def crate(
     graph.append(root)
 
     # Data entities
-    graph.extend([
-        {
-            "@id": "rgstore.json",
-            "@type": "File",
-            "name": "Store configuration",
-            "description": "Operational configuration for RefgetStore: path templates, storage mode, format version.",
-            "encodingFormat": "application/json",
-        },
-        {
-            "@id": "sequences.rgsi",
-            "@type": "File",
-            "name": "Master sequence index",
-            "description": "Tab-separated index of all sequences in the store with names, lengths, alphabets, and GA4GH digests.",
-            "encodingFormat": "text/tab-separated-values",
-        },
-        {
-            "@id": "sequences/",
-            "@type": "Dataset",
-            "name": "Sequence data",
-            "description": "Content-addressable sequence files organized by digest prefix.",
-        },
-        {
-            "@id": "collections/",
-            "@type": "Dataset",
-            "name": "Sequence collections",
-            "description": "GA4GH sequence collection metadata. Each .rgsi file defines a collection with its member sequences and digests.",
-        },
-    ])
+    graph.extend(
+        [
+            {
+                "@id": "rgstore.json",
+                "@type": "File",
+                "name": "Store configuration",
+                "description": "Operational configuration for RefgetStore: path templates, storage mode, format version.",
+                "encodingFormat": "application/json",
+            },
+            {
+                "@id": "sequences.rgsi",
+                "@type": "File",
+                "name": "Master sequence index",
+                "description": "Tab-separated index of all sequences in the store with names, lengths, alphabets, and GA4GH digests.",
+                "encodingFormat": "text/tab-separated-values",
+            },
+            {
+                "@id": "sequences/",
+                "@type": "Dataset",
+                "name": "Sequence data",
+                "description": "Content-addressable sequence files organized by digest prefix.",
+            },
+            {
+                "@id": "collections/",
+                "@type": "Dataset",
+                "name": "Sequence collections",
+                "description": "GA4GH sequence collection metadata. Each .rgsi file defines a collection with its member sequences and digests.",
+            },
+        ]
+    )
 
     if aliases_path.exists() and aliases_path.is_dir():
-        graph.append({
-            "@id": "aliases/",
-            "@type": "Dataset",
-            "name": "Alias namespaces",
-            "description": "Human-readable name mappings for sequences and collections.",
-        })
+        graph.append(
+            {
+                "@id": "aliases/",
+                "@type": "Dataset",
+                "name": "Alias namespaces",
+                "description": "Human-readable name mappings for sequences and collections.",
+            }
+        )
 
     # PropertyValue entities
-    graph.extend([
-        {
-            "@id": "#prop-storageMode",
-            "@type": "PropertyValue",
-            "propertyID": "storageMode",
-            "name": "Storage Mode",
-            "value": storage_mode,
-        },
-        {
-            "@id": "#prop-sequenceCount",
-            "@type": "PropertyValue",
-            "propertyID": "sequenceCount",
-            "name": "Sequence Count",
-            "value": seq_count,
-        },
-        {
-            "@id": "#prop-collectionCount",
-            "@type": "PropertyValue",
-            "propertyID": "collectionCount",
-            "name": "Collection Count",
-            "value": coll_count,
-        },
-        {
-            "@id": "#prop-refgetDigestAlgorithm",
-            "@type": "PropertyValue",
-            "propertyID": "refgetDigestAlgorithm",
-            "name": "Refget Digest Algorithm",
-            "value": "sha512t24u",
-        },
-    ])
+    graph.extend(
+        [
+            {
+                "@id": "#prop-storageMode",
+                "@type": "PropertyValue",
+                "propertyID": "storageMode",
+                "name": "Storage Mode",
+                "value": storage_mode,
+            },
+            {
+                "@id": "#prop-sequenceCount",
+                "@type": "PropertyValue",
+                "propertyID": "sequenceCount",
+                "name": "Sequence Count",
+                "value": seq_count,
+            },
+            {
+                "@id": "#prop-collectionCount",
+                "@type": "PropertyValue",
+                "propertyID": "collectionCount",
+                "name": "Collection Count",
+                "value": coll_count,
+            },
+            {
+                "@id": "#prop-refgetDigestAlgorithm",
+                "@type": "PropertyValue",
+                "propertyID": "refgetDigestAlgorithm",
+                "name": "Refget Digest Algorithm",
+                "value": "sha512t24u",
+            },
+        ]
+    )
 
     # CreateAction provenance
-    graph.extend([
-        {
-            "@id": "#crate-creation",
-            "@type": "CreateAction",
-            "name": "Generate RO-Crate metadata for RefgetStore",
-            "endTime": now,
-            "instrument": {"@id": "#refget-software"},
-            "result": {"@id": "./"},
-        },
-        {
-            "@id": "#refget-software",
-            "@type": "SoftwareApplication",
-            "name": "refget",
-            "version": __version__,
-            "url": "https://github.com/refgenie/refget",
-            "description": "Python package implementing GA4GH refget standards for sequences and sequence collections.",
-        },
-    ])
+    graph.extend(
+        [
+            {
+                "@id": "#crate-creation",
+                "@type": "CreateAction",
+                "name": "Generate RO-Crate metadata for RefgetStore",
+                "endTime": now,
+                "instrument": {"@id": "#refget-software"},
+                "result": {"@id": "./"},
+            },
+            {
+                "@id": "#refget-software",
+                "@type": "SoftwareApplication",
+                "name": "refget",
+                "version": __version__,
+                "url": "https://github.com/refgenie/refget",
+                "description": "Python package implementing GA4GH refget standards for sequences and sequence collections.",
+            },
+        ]
+    )
 
     # Add agent to CreateAction if author provided
     if author:
         graph[-2]["agent"] = root["author"]
 
     # Profile entity
-    graph.append({
-        "@id": "https://w3id.org/ga4gh/refget/refgetstore-crate/0.1",
-        "@type": ["CreativeWork", "Profile"],
-        "name": "RefgetStore RO-Crate Profile",
-        "version": "0.1",
-        "description": "Profile for RO-Crates containing GA4GH RefgetStore sequence databases.",
-    })
+    graph.append(
+        {
+            "@id": "https://w3id.org/ga4gh/refget/refgetstore-crate/0.1",
+            "@type": ["CreativeWork", "Profile"],
+            "name": "RefgetStore RO-Crate Profile",
+            "version": "0.1",
+            "description": "Profile for RO-Crates containing GA4GH RefgetStore sequence databases.",
+        }
+    )
 
     # Author entity
     if author:
         match = re.match(r"^(.+?)\s*<(.+?)>\s*$", author)
         if match:
-            graph.append({
-                "@id": author_url,
-                "@type": "Person",
-                "name": author_name,
-            })
+            graph.append(
+                {
+                    "@id": author_url,
+                    "@type": "Person",
+                    "name": author_name,
+                }
+            )
         else:
-            graph.append({
-                "@id": root["author"]["@id"],
-                "@type": "Person",
-                "name": author_name,
-            })
+            graph.append(
+                {
+                    "@id": root["author"]["@id"],
+                    "@type": "Person",
+                    "name": author_name,
+                }
+            )
 
     # License entity
     if license:
-        graph.append({
-            "@id": license,
-            "@type": "CreativeWork",
-            "name": license.rstrip("/").split("/")[-1] or "License",
-        })
+        graph.append(
+            {
+                "@id": license,
+                "@type": "CreativeWork",
+                "name": license.rstrip("/").split("/")[-1] or "License",
+            }
+        )
 
     crate = {
         "@context": "https://w3id.org/ro/crate/1.2/context",
@@ -1207,11 +1223,13 @@ def crate(
     output_path.parent.mkdir(parents=True, exist_ok=True)
     output_path.write_text(json.dumps(crate, indent=2) + "\n")
 
-    print_json({
-        "output": str(output_path),
-        "status": "created",
-        "entities": len(graph),
-    })
+    print_json(
+        {
+            "output": str(output_path),
+            "status": "created",
+            "entities": len(graph),
+        }
+    )
     raise typer.Exit(EXIT_SUCCESS)
 
 
diff --git a/refget/router.py b/refget/router.py
index 2141158..c29cfc9 100644
--- a/refget/router.py
+++ b/refget/router.py
@@ -23,7 +23,7 @@
 
 from .backend import SeqColBackend
 from .examples import *
-from .models import PaginatedDigestList, PaginationResult, Similarities
+from .models import PaginatedDigestList, Similarities
 
 _LOGGER = logging.getLogger(__name__)
 
diff --git a/seqcolapi/main.py b/seqcolapi/main.py
index 720abaf..e2a5605 100644
--- a/seqcolapi/main.py
+++ b/seqcolapi/main.py
@@ -50,7 +50,9 @@ def _load_scom_config(store_path: str, remote: bool):
                     _LOGGER.info(f"SCOM: loaded {len(digests)} target digests for '{species}'")
                 return
             else:
-                _LOGGER.info("No SCOM_CONFIG_URL set and no scom_config.json found. SCOM disabled.")
+                _LOGGER.info(
+                    "No SCOM_CONFIG_URL set and no scom_config.json found. SCOM disabled."
+                )
                 return
 
     try:
@@ -62,6 +64,7 @@ def _load_scom_config(store_path: str, remote: bool):
     except Exception as e:
         _LOGGER.info(f"Could not load SCOM config from {config_url} ({e}). SCOM disabled.")
 
+
 for key, value in ALL_VERSIONS.items():
     _LOGGER.info(f"{key}: {value}")
 
@@ -184,7 +187,9 @@ async def index(request: Request):
 async def service_info():
     # Build seqcol capabilities object
     seqcol_info = {
-        "schema": getattr(app.state.dbagent, "schema_dict", None) if hasattr(app.state, "dbagent") else None,
+        "schema": getattr(app.state.dbagent, "schema_dict", None)
+        if hasattr(app.state, "dbagent")
+        else None,
         "sorted_name_length_pairs": True,
         "fasta_drs": {"enabled": _ROUTER_CONFIG.get("fasta_drs", False)},
     }
@@ -302,8 +307,15 @@ async def store_service_info():
             "version": ALL_VERSIONS,
             "seqcol": {
                 "schema": schema,
-                "refget_store": {"enabled": True, "url": os.environ.get("REFGET_STORE_HTTP_URL", store_path), **caps},
-                "scom": {"enabled": bool(_SAMPLE_DIGESTS), "species": list(_SAMPLE_DIGESTS.keys())},
+                "refget_store": {
+                    "enabled": True,
+                    "url": os.environ.get("REFGET_STORE_HTTP_URL", store_path),
+                    **caps,
+                },
+                "scom": {
+                    "enabled": bool(_SAMPLE_DIGESTS),
+                    "species": list(_SAMPLE_DIGESTS.keys()),
+                },
             },
         }
 
diff --git a/tests/conftest.py b/tests/conftest.py
index dad190e..b504297 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -299,5 +299,9 @@ def pytest_collection_modifyitems(config, items):
         for item in items:
             if "require_service" in item.keywords:
                 # Only skip if this is the base TestAPI class, not a subclass with test_server
-                if "TestAPI" in item.nodeid and "TestComplianceViaIntegration" not in item.nodeid and "TestStoreCompliance" not in item.nodeid:
+                if (
+                    "TestAPI" in item.nodeid
+                    and "TestComplianceViaIntegration" not in item.nodeid
+                    and "TestStoreCompliance" not in item.nodeid
+                ):
                     item.add_marker(skip_service)
diff --git a/tests/test_cli/test_store_crate.py b/tests/test_cli/test_store_crate.py
index 2be442d..c5fdf51 100644
--- a/tests/test_cli/test_store_crate.py
+++ b/tests/test_cli/test_store_crate.py
@@ -33,9 +33,12 @@ def test_produces_valid_json(self, cli, tmp_path):
         store_path = _init_and_add(cli, tmp_path)
 
         result = cli(
-            "store", "crate",
-            "--path", str(store_path),
-            "--name", "Test Store",
+            "store",
+            "crate",
+            "--path",
+            str(store_path),
+            "--name",
+            "Test Store",
         )
 
         assert result.exit_code == 0
@@ -52,9 +55,12 @@ def test_has_must_entities(self, cli, tmp_path):
         store_path = _init_and_add(cli, tmp_path)
 
         cli(
-            "store", "crate",
-            "--path", str(store_path),
-            "--name", "Test Store",
+            "store",
+            "crate",
+            "--path",
+            str(store_path),
+            "--name",
+            "Test Store",
         )
 
         crate = json.loads((store_path / "ro-crate-metadata.json").read_text())
@@ -73,9 +79,12 @@ def test_metadata_descriptor_conformsto(self, cli, tmp_path):
         store_path = _init_and_add(cli, tmp_path)
 
         cli(
-            "store", "crate",
-            "--path", str(store_path),
-            "--name", "Test Store",
+            "store",
+            "crate",
+            "--path",
+            str(store_path),
+            "--name",
+            "Test Store",
         )
 
         crate = json.loads((store_path / "ro-crate-metadata.json").read_text())
@@ -90,9 +99,12 @@ def test_root_dataset_name(self, cli, tmp_path):
         store_path = _init_and_add(cli, tmp_path)
 
         cli(
-            "store", "crate",
-            "--path", str(store_path),
-            "--name", "My Genome Store",
+            "store",
+            "crate",
+            "--path",
+            str(store_path),
+            "--name",
+            "My Genome Store",
         )
 
         crate = json.loads((store_path / "ro-crate-metadata.json").read_text())
@@ -104,9 +116,12 @@ def test_property_values(self, cli, tmp_path):
         store_path = _init_and_add(cli, tmp_path)
 
         cli(
-            "store", "crate",
-            "--path", str(store_path),
-            "--name", "Test Store",
+            "store",
+            "crate",
+            "--path",
+            str(store_path),
+            "--name",
+            "Test Store",
         )
 
         crate = json.loads((store_path / "ro-crate-metadata.json").read_text())
@@ -128,10 +143,14 @@ def test_author_parsing_orcid(self, cli, tmp_path):
         store_path = _init_and_add(cli, tmp_path)
 
         cli(
-            "store", "crate",
-            "--path", str(store_path),
-            "--name", "Test Store",
-            "--author", "Jane Doe <https://orcid.org/0000-0001-1234-5678>",
+            "store",
+            "crate",
+            "--path",
+            str(store_path),
+            "--name",
+            "Test Store",
+            "--author",
+            "Jane Doe <https://orcid.org/0000-0001-1234-5678>",
         )
 
         crate = json.loads((store_path / "ro-crate-metadata.json").read_text())
@@ -154,10 +173,14 @@ def test_author_plain_name(self, cli, tmp_path):
         store_path = _init_and_add(cli, tmp_path)
 
         cli(
-            "store", "crate",
-            "--path", str(store_path),
-            "--name", "Test Store",
-            "--author", "John Smith",
+            "store",
+            "crate",
+            "--path",
+            str(store_path),
+            "--name",
+            "Test Store",
+            "--author",
+            "John Smith",
         )
 
         crate = json.loads((store_path / "ro-crate-metadata.json").read_text())
@@ -173,10 +196,14 @@ def test_license(self, cli, tmp_path):
         store_path = _init_and_add(cli, tmp_path)
 
         cli(
-            "store", "crate",
-            "--path", str(store_path),
-            "--name", "Test Store",
-            "--license", "https://creativecommons.org/publicdomain/zero/1.0/",
+            "store",
+            "crate",
+            "--path",
+            str(store_path),
+            "--name",
+            "Test Store",
+            "--license",
+            "https://creativecommons.org/publicdomain/zero/1.0/",
         )
 
         crate = json.loads((store_path / "ro-crate-metadata.json").read_text())
@@ -185,8 +212,11 @@ def test_license(self, cli, tmp_path):
         assert root["license"]["@id"] == "https://creativecommons.org/publicdomain/zero/1.0/"
 
         license_entity = next(
-            (e for e in crate["@graph"]
-             if e["@id"] == "https://creativecommons.org/publicdomain/zero/1.0/"),
+            (
+                e
+                for e in crate["@graph"]
+                if e["@id"] == "https://creativecommons.org/publicdomain/zero/1.0/"
+            ),
             None,
         )
         assert license_entity is not None
@@ -198,10 +228,14 @@ def test_custom_output_path(self, cli, tmp_path):
         output_path = tmp_path / "custom" / "crate.json"
 
         result = cli(
-            "store", "crate",
-            "--path", str(store_path),
-            "--name", "Test Store",
-            "--output", str(output_path),
+            "store",
+            "crate",
+            "--path",
+            str(store_path),
+            "--name",
+            "Test Store",
+            "--output",
+            str(output_path),
         )
 
         assert result.exit_code == 0
@@ -218,12 +252,16 @@ def test_no_aliases_when_absent(self, cli, tmp_path):
         aliases = store_path / "aliases"
         if aliases.exists():
             import shutil
+
             shutil.rmtree(aliases)
 
         cli(
-            "store", "crate",
-            "--path", str(store_path),
-            "--name", "Test Store",
+            "store",
+            "crate",
+            "--path",
+            str(store_path),
+            "--name",
+            "Test Store",
         )
 
         crate = json.loads((store_path / "ro-crate-metadata.json").read_text())
@@ -235,9 +273,12 @@ def test_create_action_provenance(self, cli, tmp_path):
         store_path = _init_and_add(cli, tmp_path)
 
         cli(
-            "store", "crate",
-            "--path", str(store_path),
-            "--name", "Test Store",
+            "store",
+            "crate",
+            "--path",
+            str(store_path),
+            "--name",
+            "Test Store",
         )
 
         crate = json.loads((store_path / "ro-crate-metadata.json").read_text())
@@ -264,9 +305,12 @@ def test_description_optional(self, cli, tmp_path):
 
         # Without description
         cli(
-            "store", "crate",
-            "--path", str(store_path),
-            "--name", "Test Store",
+            "store",
+            "crate",
+            "--path",
+            str(store_path),
+            "--name",
+            "Test Store",
         )
         crate = json.loads((store_path / "ro-crate-metadata.json").read_text())
         root = next(e for e in crate["@graph"] if e["@id"] == "./")
@@ -274,10 +318,14 @@ def test_description_optional(self, cli, tmp_path):
 
         # With description
         cli(
-            "store", "crate",
-            "--path", str(store_path),
-            "--name", "Test Store",
-            "--description", "A test store for genomes",
+            "store",
+            "crate",
+            "--path",
+            str(store_path),
+            "--name",
+            "Test Store",
+            "--description",
+            "A test store for genomes",
         )
         crate = json.loads((store_path / "ro-crate-metadata.json").read_text())
         root = next(e for e in crate["@graph"] if e["@id"] == "./")
@@ -289,9 +337,12 @@ def test_empty_store(self, cli, tmp_path):
         cli("store", "init", "--path", str(store_path))
 
         result = cli(
-            "store", "crate",
-            "--path", str(store_path),
-            "--name", "Empty Store",
+            "store",
+            "crate",
+            "--path",
+            str(store_path),
+            "--name",
+            "Empty Store",
         )
 
         assert result.exit_code == 0

From 9cc02d4f4bc3a53dd4a94dc886f8dc3f0aaf60fd Mon Sep 17 00:00:00 2001
From: nsheff <nsheff@users.noreply.github.com>
Date: Wed, 18 Mar 2026 23:00:59 -0400
Subject: [PATCH 29/31] fix import

---
 seqcolapi/main.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/seqcolapi/main.py b/seqcolapi/main.py
index e2a5605..85fe6b8 100644
--- a/seqcolapi/main.py
+++ b/seqcolapi/main.py
@@ -1,4 +1,5 @@
 import logging
+import os
 from contextlib import asynccontextmanager
 
 from fastapi import FastAPI, HTTPException
@@ -322,8 +323,6 @@ async def store_service_info():
     return store_app
 
 
-import os
-
 _STORE_URL_ENV = os.environ.get("REFGET_STORE_URL")
 _STORE_PATH_ENV = os.environ.get("REFGET_STORE_PATH")
 

From fca18fa1e987e2f29983b814867603c1cfe257f4 Mon Sep 17 00:00:00 2001
From: nsheff <nsheff@users.noreply.github.com>
Date: Wed, 18 Mar 2026 23:19:51 -0400
Subject: [PATCH 30/31] cleanup

---
 refget/backend.py | 5 ++++-
 refget/router.py  | 2 ++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/refget/backend.py b/refget/backend.py
index cbba0be..148c53f 100644
--- a/refget/backend.py
+++ b/refget/backend.py
@@ -118,7 +118,10 @@ def _get_enriched_level2(self, digest: str) -> dict:
         sequences). For comparison, we need the derived attributes too. We get them
         from level 1 digests and resolve each via get_attribute.
         """
-        level2 = self._store.get_collection_level2(digest)
+        try:
+            level2 = self._store.get_collection_level2(digest)
+        except (OSError, IOError):
+            raise ValueError(f"Collection '{digest}' not found")
         if level2 is None:
             raise ValueError(f"Collection '{digest}' not found")
         try:
diff --git a/refget/router.py b/refget/router.py
index c29cfc9..486c692 100644
--- a/refget/router.py
+++ b/refget/router.py
@@ -297,6 +297,8 @@ async def _compute_similarities(
             seqcolA, page=page, page_size=page_size, target_digests=target_digests
         )
         return Similarities(**result)
+    except HTTPException:
+        raise
     except Exception as e:
         _LOGGER.debug(f"Error computing similarities: {e}")
         raise HTTPException(status_code=500, detail="Error calculating similarities")

From aea3a4dd70815647dca6ff9e3e0177ef9f130753 Mon Sep 17 00:00:00 2001
From: nsheff <nsheff@users.noreply.github.com>
Date: Wed, 18 Mar 2026 23:25:10 -0400
Subject: [PATCH 31/31] fix tests

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 1788399..0ddd2d3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -37,7 +37,7 @@ dependencies = [
 refget = "refget.cli:main"
 
 [project.optional-dependencies]
-test = ["pytest", "pytest-cov>=6.0.0"]
+test = ["pytest", "pytest-cov>=6.0.0", "fastapi", "httpx"]
 seqcolapi = [
     "fastapi",
     "psycopg2-binary",