RosettaCommons · timkartar · Jan 11, 2026 · Jan 6, 2026 · Jan 18, 2026 · Jan 20, 2026
diff --git a/.env b/.env
@@ -10,7 +10,7 @@
 # expected that you use the same saving conventions as the RCSB PDB, which means:
 #   `1a2b` --> /path/to/pdb_mirror/a2/1a2b.cif.gz
 # To set up a mirror, you can use tha atomworks commandline: `atomworks pdb sync /path/to/mirror`
-PDB_MIRROR_PATH=
+PDB_MIRROR_PATH=/projects/ml/frozen_pdb_copies/2024_12_01_pdb
 
 # The `CCD_MIRROR_PATH` is a path to a local mirror of the CCD database.
 # It's expected that you use the same saving conventions as the RCSB CCD, which means:
@@ -19,7 +19,7 @@ PDB_MIRROR_PATH=
 # If no mirror is provided, the internal biotite CCD will be used as a fallback. To provide a 
 # custom CCD for a ligand, you can place it in the in the CCD mirror path following the CCDs pattern.
 # Example: /path/to/ccd_mirror/M/MYLIGAND1/MYLIGAND1.cif
-CCD_MIRROR_PATH=
+CCD_MIRROR_PATH=/projects/ml/frozen_pdb_copies/2024_12_11_ccd
 
 # --- Local MSA directories ---
 LOCAL_MSA_DIRS=
@@ -29,14 +29,14 @@ LOCAL_MSA_DIRS=
 # The HBPLUS_PATH is a path to the hbplus tool, which is used for hydrogen bond calculation
 # during training and during metrics computation.
 # Example: /path/to/hbplus
-HBPLUS_PATH=
+HBPLUS_PATH=/projects/ml/hbplus
 
 # The `X3DNA_PATH` is a path to the x3dna tool, which is used for DNA structure analysis.
 # Example: /path/to/x3dna-v2.4
-X3DNA_PATH=
+X3DNA_PATH=/projects/ml/prot_dna/x3dna-v2.4
 
 # For secondary structure prediction (not currently used)
-DSSP_PATH=
+DSSP_PATH=/projects/ml/dssp/install/bin/mkdssp
 
 # The `HHFILTER_PATH` is a path to the hhfilter tool from the HH-suite, which is used for
 # filtering MSAs to reduce redundancy.

diff --git a/models/rf3/configs/model/components/rf3_net_with_confidence_head.yaml b/models/rf3/configs/model/components/rf3_net_with_confidence_head.yaml
@@ -42,4 +42,4 @@ confidence_head:
   n_bins_exp_resolved: 2
   use_Cb_distances: False
   use_af3_style_binning_and_final_layer_norms: True
-  symmetrize_Cb_logits: True
+  symmetrize_Cb_logits: True
diff --git a/models/rfd3/README.md b/models/rfd3/README.md
@@ -1,6 +1,6 @@
 # De novo Design of Biomolecular Interactions with RFdiffusion3
 
-RFdiffusion3 (RFD3) is a diffusion method that can design protein structures 
+RFdiffusion3 (RFD3) is a diffusion method that can design biopolymer structures 
 under complex constraints. 
 
 This repository contains both the training and inference code, and
@@ -62,6 +62,8 @@ For example, you can fix sequence and not structure (prediction-type task), fix
 
 For full details on how to specify inputs, see the [input specification documentation](./docs/input.md). You can also see `foundry/models/rfd3/configs/inference_engine/rfdiffusion3.yaml` for even more options.
 
+Nucleic acid design, along with proteins, is also possible using RFD3 using the atom23 checkpoints. For full details see the [atom23 design documentation](./docs/examples/atom23_design.md)
+
 ## Further example JSONs for different applications
 Additional examples are broken up by use case. If you have cloned the
 repository, matching `.json` files are in `foundry/models/rfd3/docs/examples`
@@ -75,27 +77,33 @@ you will need to change the path in the `.json` file(s) before running.
 <table>
   <tr>
     <td align="center">
-      <h3><a href="./docs/na_binder_design.md">Nucleic acid binder design</a></h3>
-      <img src="docs/.assets/dna.png" height="150" />
-    </td>
-    <td align="center">
-      <h3><a href="./docs/sm_binder_design.md">Small molecule binder design</a></h3>
+      <h3><a href="./docs/examples/sm_binder_design.md">Small molecule binder design</a></h3>
       <img src="docs/.assets/sm.png" height="150" />
     </td>
     <td align="center">
-      <h3><a href="./docs/protein_binder_design.md">Protein binder design</a></h3>
+      <h3><a href="./docs/examples/protein_binder_design.md">Protein binder design</a></h3>
       <img src="docs/.assets/ppi.png" height="150" />
     </td>
+     <td align="center">
+      <h3><a href="./docs/examples/na_binder_design.md">Nucleic acid binder design</a></h3>
+      <img src="docs/.assets/dna.png" height="150" />
+    </td>
+
   </tr>
   <tr>
     <td align="center">
-      <h3><a href="./docs/enzyme_design.md">Enzyme design</a></h3>
+      <h3><a href="./docs/examples/enzyme_design.md">Enzyme design</a></h3>
       <img src="docs/.assets/enzyme.png" height="150" />
     </td>
     <td align="center">
-      <h3><a href="./docs/symmetry.md">Symmetric design</a></h3>
+      <h3><a href="./docs/examples/symmetry.md">Symmetric design</a></h3>
       <img src="docs/.assets/symm.png" height="150" />
     </td>
+    <td align="center">
+      <h3><a href="./docs/examples/atom23_design.md">Multipolymer design</a></h3>
+      <img src="docs/.assets/multipolymer.png" height="150" />
+    </td>
+
   </tr>
 </table>
 

diff --git a/models/rfd3/configs/callbacks/design_callbacks.yaml b/models/rfd3/configs/callbacks/design_callbacks.yaml
@@ -1,5 +1,6 @@
 defaults:
   - train_logging
+  - metrics_logging
   - _self_
 
 log_learning_rate_callback:

diff --git a/models/rfd3/configs/datasets/design_base_rfd3na.yaml b/models/rfd3/configs/datasets/design_base_rfd3na.yaml
@@ -0,0 +1,105 @@
+# base training dataset for training AF3 design models (atom14 variants):
+# protein subsampling only.
+
+defaults:
+  # Grab datasets
+  - train/pdb/rfd3_train_interface@train.pdb.sub_datasets.interface
+  - train/pdb/rfd3_train_pn_unit@train.pdb.sub_datasets.pn_unit
+  - train/rfd3_monomer_distillation@train
+  - train/rna_monomer_distillation@train
+
+  # Customized validation datasets
+  #- val/unconditional@val.unconditional
+  #- val/unconditional_deep@val.unconditional_deep
+  #- val/indexed@val.indexed
+  - val/pseudoknot@val.pseudoknot
+
+  # Customized train masks
+  - conditions/unconditional@global_transform_args.train_conditions.unconditional
+  - conditions/island@global_transform_args.train_conditions.island
+  - conditions/tipatom@global_transform_args.train_conditions.tipatom
+  - conditions/sequence_design@global_transform_args.train_conditions.sequence_design
+  - conditions/ppi@global_transform_args.train_conditions.ppi
+
+  - _self_
+
+# Create a dictionary used for transform arguments
+pipeline_target: rfd3.transforms.pipelines.build_atom14_base_pipeline
+
+# Base config overrides:
+diffusion_batch_size_train: 32
+diffusion_batch_size_inference: 8
+crop_size: 384
+n_recycles_train: 2
+n_recycles_validation: 1
+max_atoms_in_crop: 3840  # ~10x crop size.
+
+# Global transform arguments are necessary for arguments shared between training and inference
+global_transform_args:
+  n_atoms_per_token: 14
+  central_atom: CB
+  sigma_perturb: 2.0
+  sigma_perturb_com: 1.0
+  association_scheme: dense
+  center_option: diffuse  # options are ["all", "motif", "diffuse"]
+
+  # Reference conformer policy
+  generate_conformers: True
+  generate_conformers_for_non_protein_only: True
+  provide_reference_conformer_when_unmasked: True
+  ground_truth_conformer_policy: IGNORE  # Other options: REPLACE, ADD, FALLBACK. See atomworks.enums for details
+  provide_elements_for_unindexed_components: True
+  use_element_for_atom_names_of_atomized_tokens: True  # TODO: correct name, implies unindexed do too
+
+  # PPI Cropping
+  keep_full_binder_in_spatial_crop: False
+  max_binder_length: 170
+
+  # PPI Hotspots
+  max_ppi_hotspots_frac_to_provide: 0.2
+  ppi_hotspot_max_distance: 4.5
+
+  # Secondary structure features
+  max_ss_frac_to_provide: 0.4
+  min_ss_island_len: 1
+  max_ss_island_len: 10
+
+  # Nucleic acid features
+  add_na_pair_features: false
+
+  train_conditions:
+    unconditional:
+      frequency: 5.0
+    sequence_design:
+      frequency: 2.0
+    island:
+      frequency: 1.0
+    tipatom:
+      frequency: 0.0
+    ppi:
+      frequency: 0.0
+
+  # Used to create simple boolean flags for downstream conditioning
+  meta_conditioning_probabilities:
+    p_is_nucleic_ss_example: 0.1
+    p_nucleic_ss_show_partial_feats: 0.7
+    calculate_NA_SS: 0.5
+    calculate_hbonds: 0.2
+    calculate_rasa: 0.6
+
+    keep_protein_motif_rasa: 0.1  # Small to prevent noisy input to model
+    hbond_subsample: 0.5
+
+    # fully indexed training
+    unindex_leak_global_index: 0.10
+    unindex_insert_random_break: 0.10
+    unindex_remove_random_break: 0.10
+
+    # Probability of adding 1d secondary structure conditioning
+    add_1d_ss_features: 0.1
+    featurize_plddt: 0.9  # Applied for monomer distillation only
+    add_global_is_non_loopy_feature: 0.99
+
+    # PPI
+    add_ppi_hotspots: 0.75
+    full_binder_crop: 0.75
diff --git a/models/rfd3/configs/datasets/train/pdb/base_transform_args.yaml b/models/rfd3/configs/datasets/train/pdb/base_transform_args.yaml
@@ -43,6 +43,9 @@ dataset:
     min_ss_island_len: ${datasets.global_transform_args.min_ss_island_len}
     max_ss_island_len: ${datasets.global_transform_args.max_ss_island_len}
 
+    # Nucleic acid features
+    add_na_pair_features: ${datasets.global_transform_args.add_na_pair_features}
+
     # Cropping
     crop_size: ${datasets.crop_size}
     max_atoms_in_crop: ${datasets.max_atoms_in_crop}
@@ -56,4 +59,5 @@ dataset:
 
     # Other dataset-specific parameters
     atom_1d_features: ${model.net.token_initializer.atom_1d_features}
-    token_1d_features: ${model.net.token_initializer.token_1d_features}
+    token_1d_features: ${model.net.token_initializer.token_1d_features}
+    token_2d_features: ${model.net.token_initializer.token_2d_features}
diff --git a/models/rfd3/configs/datasets/train/pdb/rfd3_train_interface.yaml b/models/rfd3/configs/datasets/train/pdb/rfd3_train_interface.yaml
@@ -12,11 +12,12 @@ dataset:
         # filters common across all PDB datasets
         - 'pdb_id not in ["7rte", "7m5w", "7n5u"]'
         - 'pdb_id not in ["3di3", "5o45", "1z92", "2gy5", "4zxb"]'
+        - 'pdb_id not in ["1drz", "2m8k", "2miy", "3q3z", "4oqu", "4plx", "4znp", "7kd1", "7kga", "7qr4"]'
         - "deposition_date < '2024-12-16'"
         - "resolution < 9.0"
         - "num_polymer_pn_units <= 300" 
         - "cluster.notnull()"
         # interface specific filters
         - "~(pn_unit_1_non_polymer_res_names.notnull() and pn_unit_1_non_polymer_res_names.str.contains('${resolve_import:atomworks.constants,AF3_EXCLUDED_LIGANDS_REGEX}', regex=True))"
         - "~(pn_unit_2_non_polymer_res_names.notnull() and pn_unit_2_non_polymer_res_names.str.contains('${resolve_import:atomworks.constants,AF3_EXCLUDED_LIGANDS_REGEX}', regex=True))"
-        - "is_inter_molecule"
+        - "is_inter_molecule"
diff --git a/models/rfd3/configs/datasets/train/pdb/rfd3_train_pn_unit.yaml b/models/rfd3/configs/datasets/train/pdb/rfd3_train_pn_unit.yaml
@@ -15,6 +15,7 @@ dataset:
         # filters common across all PDB datasets
       - 'pdb_id not in ["7rte", "7m5w", "7n5u"]'
       - 'pdb_id not in ["3di3", "5o45", "1z92", "2gy5", "4zxb"]'
+      - 'pdb_id not in ["1drz", "2m8k", "2miy", "3q3z", "4oqu", "4plx", "4znp", "7kd1", "7kga", "7qr4"]'
       - "deposition_date < '2024-12-16'"
       - "resolution < 9.0"
       - "num_polymer_pn_units <= 300" 

diff --git a/models/rfd3/configs/datasets/train/rna_monomer_distillation.yaml b/models/rfd3/configs/datasets/train/rna_monomer_distillation.yaml
@@ -0,0 +1,39 @@
+defaults:
+  - pdb/base_transform_args@rna_monomer_distillation
+  - _self_
+
+rna_monomer_distillation:
+  dataset:
+    _target_: atomworks.ml.datasets.StructuralDatasetWrapper
+    save_failed_examples_to_dir: ${paths.data.failed_examples_dir}
+
+    # cif parser arguments
+    cif_parser_args:
+      cache_dir: null
+      load_from_cache: False
+      save_to_cache: False 
+
+    # metadata parser
+    dataset_parser:
+      _target_: atomworks.ml.datasets.parsers.GenericDFParser
+      pn_unit_iid_colnames: null
+
+    # metadata dataset
+    dataset:
+      _target_: atomworks.ml.datasets.PandasDataset
+      name: rna_monomer_distillation
+      id_column: example_id
+      data: /projects/ml/afavor/rna_distillation/rna_distillation_filtered_df.parquet
+      columns_to_load:
+        - example_id
+        - path
+        - cluster_id
+        - seq_hash
+        - overall_plddt
+        - overall_pde
+        - overall_pae
+
+    transform:
+      crop_contiguous_probability: 0.67
+      crop_spatial_probability: 0.33
+
diff --git a/models/rfd3/configs/datasets/val/design_validation_base.yaml b/models/rfd3/configs/datasets/val/design_validation_base.yaml
@@ -37,4 +37,5 @@ dataset:
 
     # Other dataset-specific parameters
     atom_1d_features: ${model.net.token_initializer.atom_1d_features}
-    token_1d_features: ${model.net.token_initializer.token_1d_features}
+    token_1d_features: ${model.net.token_initializer.token_1d_features}
+    token_2d_features: ${model.net.token_initializer.token_2d_features}
diff --git a/models/rfd3/configs/datasets/val/pseudoknot.yaml b/models/rfd3/configs/datasets/val/pseudoknot.yaml
@@ -0,0 +1,9 @@
+
+defaults:
+  - design_validation_base
+  - _self_
+
+dataset:
+  name: pseudoknot
+  eval_every_n: 1
+  data: ${paths.data.design_benchmark_data_dir}/pseudoknot.json