From 94922e2c1668d1e1d1481d3cc9c0523dd9893643 Mon Sep 17 00:00:00 2001
From: jbutch <jbutch@uw.edu>
Date: Sat, 4 Apr 2026 21:23:54 -0700
Subject: [PATCH 1/5] Fix ligand res_id offset to match AF3 output convention

RFD3 was offsetting ligand res_id values from the protein max, causing
(chain_id, res_id, atom_name) pairing to fail against AF3 predictions
which always start ligand res_id at 1. Replace the offset with dense
rank-based per-chain renumbering (1, 2, ...) and add a chain A
validation with an override option (allow_ligand_on_chain_a).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .../rfd3/src/rfd3/inference/input_parsing.py  | 29 ++++++++++++++-----
 1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/models/rfd3/src/rfd3/inference/input_parsing.py b/models/rfd3/src/rfd3/inference/input_parsing.py
index d97b3be3..30ff294b 100644
--- a/models/rfd3/src/rfd3/inference/input_parsing.py
+++ b/models/rfd3/src/rfd3/inference/input_parsing.py
@@ -136,6 +136,7 @@ class DesignInputSpecification(BaseModel):
     # Extra args:
     length:  Optional[str] = Field(None, description="Length range as 'min-max' or int. Constrains length of contig if provided")
     ligand:  Optional[str] = Field(None, description="Ligand name or index to include in design.")
+    allow_ligand_on_chain_a: bool = Field(False, description="If True, suppress the error when a ligand is on chain A (the protein chain). Use with caution — chain ID is leaked to the model.")
     cif_parser_args: Optional[Dict[str, Any]] = Field(None, description="CIF parser arguments")
     extra: Optional[Dict[str, Any]] = Field(default_factory=dict, description="Extra metadata to include in output (useful for logging additional info in metadata)")
     dialect: int = Field(2, description="RFdiffusion3 input dialect. 1: legacy, 2: release.")
@@ -672,14 +673,26 @@ def _append_ligand(self, atom_array, atom_array_input_annotated):
                     + list(atom_array_input_annotated.get_annotation_categories())
                 ),
             )
-            # Offset ligand residue ids based on the original input to avoid clashes
-            # with any newly created residues (matches legacy behaviour).
-            ligand_array.res_id = (
-                ligand_array.res_id
-                - np.min(ligand_array.res_id)
-                + np.max(atom_array.res_id)
-                + 1
-            )
+            # Error if any ligand sits on chain A (the protein chain) unless
+            # explicitly overridden — chain ID is leaked to the model so this
+            # is a significant difference from the expected convention.
+            ligand_chains = np.unique(ligand_array.chain_id)
+            if "A" in ligand_chains and not self.allow_ligand_on_chain_a:
+                raise ValueError(
+                    f"Ligand found on chain A, which is reserved for the protein. "
+                    f"Ligand chain(s): {ligand_chains.tolist()}. "
+                    f"Place ligands on separate chains (B, C, D, ...) or set "
+                    f"'allow_ligand_on_chain_a: true' to override this check."
+                )
+            # Reset ligand res_id to start from 1 per chain, matching the
+            # convention AF3 uses in its output CIF files.  Use dense
+            # rank-based renumbering so gaps in the original numbering
+            # (e.g. res_id 850, 900) become sequential (1, 2).
+            for chain in ligand_chains:
+                mask = ligand_array.chain_id == chain
+                chain_res_ids = ligand_array.res_id[mask]
+                _, inverse = np.unique(chain_res_ids, return_inverse=True)
+                ligand_array.res_id[mask] = inverse + 1
             # Harmonize conditioning annotations before concatenation: biotite's
             # concatenate only preserves annotations present in ALL arrays (set
             # intersection), so mismatched optional conditioning annotations

From 96b2e5f882fcac106ad5f3bfc29196cad6b301b8 Mon Sep 17 00:00:00 2001
From: jbutch <jbutch@uw.edu>
Date: Sat, 4 Apr 2026 21:29:31 -0700
Subject: [PATCH 2/5] Generalize chain validation to all existing chains
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Rename allow_ligand_on_chain_a → allow_ligand_on_existing_chain and
check against all chains already present in the built atom array,
not just chain A.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .../rfd3/src/rfd3/inference/input_parsing.py  | 20 ++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/models/rfd3/src/rfd3/inference/input_parsing.py b/models/rfd3/src/rfd3/inference/input_parsing.py
index 30ff294b..f237e026 100644
--- a/models/rfd3/src/rfd3/inference/input_parsing.py
+++ b/models/rfd3/src/rfd3/inference/input_parsing.py
@@ -136,7 +136,7 @@ class DesignInputSpecification(BaseModel):
     # Extra args:
     length:  Optional[str] = Field(None, description="Length range as 'min-max' or int. Constrains length of contig if provided")
     ligand:  Optional[str] = Field(None, description="Ligand name or index to include in design.")
-    allow_ligand_on_chain_a: bool = Field(False, description="If True, suppress the error when a ligand is on chain A (the protein chain). Use with caution — chain ID is leaked to the model.")
+    allow_ligand_on_existing_chain: bool = Field(False, description="If True, suppress the error when a ligand shares a chain ID with the built atom array. Use with caution — chain ID is leaked to the model.")
     cif_parser_args: Optional[Dict[str, Any]] = Field(None, description="CIF parser arguments")
     extra: Optional[Dict[str, Any]] = Field(default_factory=dict, description="Extra metadata to include in output (useful for logging additional info in metadata)")
     dialect: int = Field(2, description="RFdiffusion3 input dialect. 1: legacy, 2: release.")
@@ -673,16 +673,18 @@ def _append_ligand(self, atom_array, atom_array_input_annotated):
                     + list(atom_array_input_annotated.get_annotation_categories())
                 ),
             )
-            # Error if any ligand sits on chain A (the protein chain) unless
-            # explicitly overridden — chain ID is leaked to the model so this
-            # is a significant difference from the expected convention.
+            # Error if any ligand shares a chain ID with the already-built
+            # atom array — chain ID is leaked to the model so collisions
+            # represent a significant deviation from the expected convention.
             ligand_chains = np.unique(ligand_array.chain_id)
-            if "A" in ligand_chains and not self.allow_ligand_on_chain_a:
+            existing_chains = set(np.unique(atom_array.chain_id))
+            overlapping = sorted(existing_chains & set(ligand_chains))
+            if overlapping and not self.allow_ligand_on_existing_chain:
                 raise ValueError(
-                    f"Ligand found on chain A, which is reserved for the protein. "
-                    f"Ligand chain(s): {ligand_chains.tolist()}. "
-                    f"Place ligands on separate chains (B, C, D, ...) or set "
-                    f"'allow_ligand_on_chain_a: true' to override this check."
+                    f"Ligand chain(s) {overlapping} overlap with existing "
+                    f"chain(s) {sorted(existing_chains)}. Place ligands on "
+                    f"separate chains or set 'allow_ligand_on_existing_chain: "
+                    f"true' to override this check."
                 )
             # Reset ligand res_id to start from 1 per chain, matching the
             # convention AF3 uses in its output CIF files.  Use dense

From 9d4ea2cca4156ee3563a880d08d03ea7a79fcee2 Mon Sep 17 00:00:00 2001
From: jbutch <jbutch@uw.edu>
Date: Sat, 4 Apr 2026 21:46:02 -0700
Subject: [PATCH 3/5] Error on multiple ligands per chain; preserve gaps in
 override mode

When allow_ligand_on_existing_chain is False, raise an error if
multiple ligand residues share the same chain. Reset res_id min to 1
per chain, preserving relative gaps when ligands share a chain
(override mode).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .../rfd3/src/rfd3/inference/input_parsing.py  | 40 +++++++++++--------
 1 file changed, 24 insertions(+), 16 deletions(-)

diff --git a/models/rfd3/src/rfd3/inference/input_parsing.py b/models/rfd3/src/rfd3/inference/input_parsing.py
index f237e026..9ad908cc 100644
--- a/models/rfd3/src/rfd3/inference/input_parsing.py
+++ b/models/rfd3/src/rfd3/inference/input_parsing.py
@@ -673,28 +673,36 @@ def _append_ligand(self, atom_array, atom_array_input_annotated):
                     + list(atom_array_input_annotated.get_annotation_categories())
                 ),
             )
-            # Error if any ligand shares a chain ID with the already-built
-            # atom array — chain ID is leaked to the model so collisions
-            # represent a significant deviation from the expected convention.
+            # Validate chain assignments — chain ID is leaked to the model
+            # so collisions are a significant deviation from convention.
             ligand_chains = np.unique(ligand_array.chain_id)
             existing_chains = set(np.unique(atom_array.chain_id))
             overlapping = sorted(existing_chains & set(ligand_chains))
-            if overlapping and not self.allow_ligand_on_existing_chain:
-                raise ValueError(
-                    f"Ligand chain(s) {overlapping} overlap with existing "
-                    f"chain(s) {sorted(existing_chains)}. Place ligands on "
-                    f"separate chains or set 'allow_ligand_on_existing_chain: "
-                    f"true' to override this check."
-                )
-            # Reset ligand res_id to start from 1 per chain, matching the
-            # convention AF3 uses in its output CIF files.  Use dense
-            # rank-based renumbering so gaps in the original numbering
-            # (e.g. res_id 850, 900) become sequential (1, 2).
+            if not self.allow_ligand_on_existing_chain:
+                if overlapping:
+                    raise ValueError(
+                        f"Ligand chain(s) {overlapping} overlap with existing "
+                        f"chain(s) {sorted(existing_chains)}. Place ligands on "
+                        f"separate chains or set 'allow_ligand_on_existing_chain: "
+                        f"true' to override this check."
+                    )
+                # Multiple ligands must each be on their own chain.
+                for chain in ligand_chains:
+                    n_residues = len(
+                        np.unique(ligand_array.res_id[ligand_array.chain_id == chain])
+                    )
+                    if n_residues > 1:
+                        raise ValueError(
+                            f"Multiple ligand residues on chain {chain}. Each "
+                            f"ligand must be on its own chain, or set "
+                            f"'allow_ligand_on_existing_chain: true' to override."
+                        )
+            # Reset ligand res_id to start from 1 per chain. When ligands
+            # share a chain (override mode), preserve relative gaps.
             for chain in ligand_chains:
                 mask = ligand_array.chain_id == chain
                 chain_res_ids = ligand_array.res_id[mask]
-                _, inverse = np.unique(chain_res_ids, return_inverse=True)
-                ligand_array.res_id[mask] = inverse + 1
+                ligand_array.res_id[mask] = chain_res_ids - np.min(chain_res_ids) + 1
             # Harmonize conditioning annotations before concatenation: biotite's
             # concatenate only preserves annotations present in ALL arrays (set
             # intersection), so mismatched optional conditioning annotations

From 01d92a950205a5fd01b1040a63e76770055919ef Mon Sep 17 00:00:00 2001
From: jbutch <jbutch@uw.edu>
Date: Sat, 4 Apr 2026 21:48:06 -0700
Subject: [PATCH 4/5] Use legacy res_id offset when
 allow_ligand_on_existing_chain is True

The override path now matches the old behaviour (offset from protein
max res_id). The default path (separate chains) sets each ligand
chain's res_id to 1.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .../rfd3/src/rfd3/inference/input_parsing.py  | 20 +++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/models/rfd3/src/rfd3/inference/input_parsing.py b/models/rfd3/src/rfd3/inference/input_parsing.py
index 9ad908cc..96952147 100644
--- a/models/rfd3/src/rfd3/inference/input_parsing.py
+++ b/models/rfd3/src/rfd3/inference/input_parsing.py
@@ -697,12 +697,20 @@ def _append_ligand(self, atom_array, atom_array_input_annotated):
                             f"ligand must be on its own chain, or set "
                             f"'allow_ligand_on_existing_chain: true' to override."
                         )
-            # Reset ligand res_id to start from 1 per chain. When ligands
-            # share a chain (override mode), preserve relative gaps.
-            for chain in ligand_chains:
-                mask = ligand_array.chain_id == chain
-                chain_res_ids = ligand_array.res_id[mask]
-                ligand_array.res_id[mask] = chain_res_ids - np.min(chain_res_ids) + 1
+            if self.allow_ligand_on_existing_chain:
+                # Legacy behaviour: offset from protein max to avoid clashes.
+                ligand_array.res_id = (
+                    ligand_array.res_id
+                    - np.min(ligand_array.res_id)
+                    + np.max(atom_array.res_id)
+                    + 1
+                )
+            else:
+                # Reset ligand res_id to start from 1 per chain, matching
+                # the convention AF3 uses in its output CIF files.
+                for chain in ligand_chains:
+                    mask = ligand_array.chain_id == chain
+                    ligand_array.res_id[mask] = 1
             # Harmonize conditioning annotations before concatenation: biotite's
             # concatenate only preserves annotations present in ALL arrays (set
             # intersection), so mismatched optional conditioning annotations

From 860b8b8de58291fa743433acc37089da4fb36af0 Mon Sep 17 00:00:00 2001
From: jbutch <jbutch@uw.edu>
Date: Sat, 4 Apr 2026 21:49:07 -0700
Subject: [PATCH 5/5] Note in errors that override restores old behaviour

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 models/rfd3/src/rfd3/inference/input_parsing.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/models/rfd3/src/rfd3/inference/input_parsing.py b/models/rfd3/src/rfd3/inference/input_parsing.py
index 96952147..3d922466 100644
--- a/models/rfd3/src/rfd3/inference/input_parsing.py
+++ b/models/rfd3/src/rfd3/inference/input_parsing.py
@@ -684,7 +684,7 @@ def _append_ligand(self, atom_array, atom_array_input_annotated):
                         f"Ligand chain(s) {overlapping} overlap with existing "
                         f"chain(s) {sorted(existing_chains)}. Place ligands on "
                         f"separate chains or set 'allow_ligand_on_existing_chain: "
-                        f"true' to override this check."
+                        f"true' to restore the old behaviour."
                     )
                 # Multiple ligands must each be on their own chain.
                 for chain in ligand_chains:
@@ -695,7 +695,8 @@ def _append_ligand(self, atom_array, atom_array_input_annotated):
                         raise ValueError(
                             f"Multiple ligand residues on chain {chain}. Each "
                             f"ligand must be on its own chain, or set "
-                            f"'allow_ligand_on_existing_chain: true' to override."
+                            f"'allow_ligand_on_existing_chain: true' to restore "
+                            f"the old behaviour."
                         )
             if self.allow_ligand_on_existing_chain:
                 # Legacy behaviour: offset from protein max to avoid clashes.