From 06d6ccfa689dfead7080e4fed23ddf74d5d4f9fb Mon Sep 17 00:00:00 2001 From: jaeminy00 Date: Thu, 9 Apr 2026 14:23:06 -0700 Subject: [PATCH 01/31] added kwargs for jobflow library building --- src/rxn_ca/workflow/jobs.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/rxn_ca/workflow/jobs.py b/src/rxn_ca/workflow/jobs.py index fb1fc02..a28600b 100644 --- a/src/rxn_ca/workflow/jobs.py +++ b/src/rxn_ca/workflow/jobs.py @@ -20,6 +20,7 @@ def _build_reaction_library( ensure_phases: List[str] = None, metastability_cutoff: float = 0.1, exclude_theoretical: bool = True, + **kwargs_entry ) -> tuple: """Build phase set and reaction library for a chemical system. @@ -46,6 +47,7 @@ def _build_reaction_library( metastability_cutoff=metastability_cutoff, ensure_phases=ensure_phases or [], exclude_theoretical_phases=exclude_theoretical, + **kwargs_entry ) print(f"Got {len(entries)} entries for {chemical_system}") if ensure_phases: From 3018d250ae440ddfcb7cc1bfbd4334a29c72373c Mon Sep 17 00:00:00 2001 From: jaeminy00 Date: Thu, 9 Apr 2026 14:28:54 -0700 Subject: [PATCH 02/31] fixed typo --- src/rxn_ca/workflow/jobs.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/rxn_ca/workflow/jobs.py b/src/rxn_ca/workflow/jobs.py index a28600b..99e03f0 100644 --- a/src/rxn_ca/workflow/jobs.py +++ b/src/rxn_ca/workflow/jobs.py @@ -20,7 +20,7 @@ def _build_reaction_library( ensure_phases: List[str] = None, metastability_cutoff: float = 0.1, exclude_theoretical: bool = True, - **kwargs_entry + **entry_kwargs ) -> tuple: """Build phase set and reaction library for a chemical system. @@ -47,7 +47,7 @@ def _build_reaction_library( metastability_cutoff=metastability_cutoff, ensure_phases=ensure_phases or [], exclude_theoretical_phases=exclude_theoretical, - **kwargs_entry + **entry_kwargs ) print(f"Got {len(entries)} entries for {chemical_system}") if ensure_phases: From 4d7fb76ef7f0dcc218a47edbac419816c7f875cc Mon Sep 17 00:00:00 2001 From: jaeminy00 Date: Thu, 9 Apr 2026 14:33:01 -0700 Subject: [PATCH 03/31] added entry_kwargs to the @job decorated setup_reaction_library as well --- src/rxn_ca/workflow/jobs.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/rxn_ca/workflow/jobs.py b/src/rxn_ca/workflow/jobs.py index 99e03f0..e8c67c6 100644 --- a/src/rxn_ca/workflow/jobs.py +++ b/src/rxn_ca/workflow/jobs.py @@ -84,6 +84,7 @@ def setup_reaction_library( metastability_cutoff: float = 0.1, exclude_theoretical: bool = True, save_to_file: bool = True, + **entry_kwargs ) -> ReactionLibraryData: """Set up phase set and reaction library for a chemical system. @@ -111,6 +112,7 @@ def setup_reaction_library( ensure_phases, metastability_cutoff, exclude_theoretical, + **entry_kwargs ) # Optionally save reaction library to file From 9b849e8678f056dcf1f80727fb50614ea8345fcb Mon Sep 17 00:00:00 2001 From: jaeminy00 Date: Thu, 16 Apr 2026 15:33:10 -0700 Subject: [PATCH 04/31] Bayesian workflow jobs --- src/rxn_ca/optimization/search_space.py | 35 +++ src/rxn_ca/utilities/bayesian_helpers.py | 89 ++++++++ src/rxn_ca/workflow/__init__.py | 4 +- src/rxn_ca/workflow/jobs.py | 270 +++++++++++++++++++++-- 4 files changed, 382 insertions(+), 16 deletions(-) create mode 100644 src/rxn_ca/utilities/bayesian_helpers.py diff --git a/src/rxn_ca/optimization/search_space.py b/src/rxn_ca/optimization/search_space.py index c109d20..eb7d97e 100644 --- a/src/rxn_ca/optimization/search_space.py +++ b/src/rxn_ca/optimization/search_space.py @@ -295,6 +295,41 @@ def parameter_names(self) -> List[str]: """Get list of all parameter names.""" return [p.name for p in self.parameters] + def as_dict(self) -> dict: + """Serialize to a JSON-safe dict for passing between jobflow jobs.""" + params = [] + for p in self.parameters: + d: Dict[str, Any] = {"type": p.param_type.value, "name": p.name} + if isinstance(p, DiscreteParameter): + d["low"] = p.low + d["high"] = p.high + d["step"] = p.step + elif isinstance(p, ContinuousParameter): + d["low"] = p.low + d["high"] = p.high + elif isinstance(p, PrecursorSlotParameter): + d["candidates"] = p.candidates + elif isinstance(p, CategoricalParameter): + d["choices"] = p.choices + params.append(d) + return {"parameters": params} + + @classmethod + def from_dict(cls, d: dict) -> "SearchSpace": + """Reconstruct a SearchSpace from a serialized dict.""" + space = cls() + for p in d["parameters"]: + ptype = p["type"] + if ptype == "discrete": + space.add_discrete(p["name"], p["low"], p["high"], p["step"]) + elif ptype == "continuous": + space.add_continuous(p["name"], p["low"], p["high"]) + elif ptype == "precursor_slot": + space.add_precursor_slot(p["name"], p["candidates"]) + elif ptype == "categorical": + space.add_categorical(p["name"], p["choices"]) + return space + def __repr__(self) -> str: param_strs = [] for p in self.parameters: diff --git a/src/rxn_ca/utilities/bayesian_helpers.py b/src/rxn_ca/utilities/bayesian_helpers.py new file mode 100644 index 0000000..394487e --- /dev/null +++ b/src/rxn_ca/utilities/bayesian_helpers.py @@ -0,0 +1,89 @@ +"""Helper functions for Bayesian Optimization jobs. + +These are plain functions (not @job decorated) called from within +bo_trial_step in rxn_ca.workflow.jobs. +""" + +from __future__ import annotations + +from typing import Any, Dict, List, Optional + +from rxn_ca.optimization import ( + AnalyzedResult, + FinalProductScorer, + MaximumProductScorer, + OptimizableRecipe, + get_result_analysis, +) + + +def build_recipe_from_params( + params: Dict[str, Any], + precursor_slot_names: List[str], + fixed_precursors: Optional[Dict[str, float]], + simulation_size: int, + num_realizations: int, +) -> OptimizableRecipe: + """Convert a BayBE parameter recommendation into an OptimizableRecipe. + + Args: + params: Parameter dict recommended by BayBE (may contain numpy scalars). + precursor_slot_names: Names of precursor slot parameters in the search + space (e.g. ["li_source", "si_source"]). Used to extract the + selected precursor formula for each slot from params. + fixed_precursors: If provided, use these formula→amount pairs directly + (precursor selection is not optimized). + simulation_size: CA grid size (NxN). + num_realizations: Number of simulation realizations to average. + + Returns: + Configured OptimizableRecipe ready for to_recipe(). + """ + # Normalize numpy scalar types that BayBE returns in its DataFrame + clean_params = { + k: (v.item() if hasattr(v, "item") else v) + for k, v in params.items() + } + + if fixed_precursors is not None: + return OptimizableRecipe( + precursors=fixed_precursors, + hold_temp=int(clean_params["hold_temp"]), + hold_time=int(clean_params["hold_time"]), + ramp_step_time=int(clean_params.get("ramp_step_time", 1)), + simulation_size=simulation_size, + num_simulations=num_realizations, + ) + + # Variable precursors: extract the selected formula for each slot from params. + # We pass actual string values (not None) so OptimizableRecipe.from_params + # populates precursors correctly via its first loop. + precursor_slots = {name: clean_params[name] for name in precursor_slot_names} + + return OptimizableRecipe.from_params( + params=clean_params, + precursor_slots=precursor_slots, + simulation_size=simulation_size, + num_simulations=num_realizations, + ) + + +def _score_result(result_doc: Any, target_phase: str, scorer_type: str) -> float: + """Score a simulation result document. + + Args: + result_doc: RxnCAResultDoc from the simulation. + target_phase: Chemical formula of the target product. + scorer_type: "final" (yield at end) or "maximum" (peak yield). + + Returns: + Float score in [0, 1], or 0.0 if scoring fails. + """ + try: + traces = get_result_analysis(result_doc) + analyzed = AnalyzedResult(traces) + scorer_cls = MaximumProductScorer if scorer_type == "maximum" else FinalProductScorer + return scorer_cls(target_phase).score(analyzed) + except Exception as e: + print(f"Warning: scoring failed ({type(e).__name__}: {e})") + return 0.0 diff --git a/src/rxn_ca/workflow/__init__.py b/src/rxn_ca/workflow/__init__.py index ea6fe1e..7da9725 100644 --- a/src/rxn_ca/workflow/__init__.py +++ b/src/rxn_ca/workflow/__init__.py @@ -8,7 +8,7 @@ """ from .schemas import SimulationOutput, ReactionLibraryData -from .jobs import setup_reaction_library, run_simulation +from .jobs import setup_reaction_library, run_simulation, init_bo_campaign, bo_trial_step from .flows import create_simulation_flow __all__ = [ @@ -17,4 +17,6 @@ "setup_reaction_library", "run_simulation", "create_simulation_flow", + "init_bo_campaign", + "bo_trial_step", ] diff --git a/src/rxn_ca/workflow/jobs.py b/src/rxn_ca/workflow/jobs.py index e8c67c6..49eb1d0 100644 --- a/src/rxn_ca/workflow/jobs.py +++ b/src/rxn_ca/workflow/jobs.py @@ -4,14 +4,20 @@ rxn-ca simulations in workflow frameworks. """ +from __future__ import annotations + +import csv +import json +from datetime import datetime from pathlib import Path from typing import Any, Dict, List, Optional -import json -from jobflow import job -from monty.json import MontyEncoder, MontyDecoder +import pandas as pd +from jobflow import Response, job +from monty.json import MontyEncoder from .schemas import ReactionLibraryData, SimulationOutput +from ..utilities.bayesian_helpers import build_recipe_from_params, _score_result def _build_reaction_library( @@ -41,7 +47,6 @@ def _build_reaction_library( from rxn_network.enumerators.minimize import MinimizeGibbsEnumerator from rxn_network.enumerators.utils import run_enumerators - # Get entries from Materials Project entries = get_entries( chem_sys=chemical_system, metastability_cutoff=metastability_cutoff, @@ -53,18 +58,14 @@ def _build_reaction_library( if ensure_phases: print(f" (ensured inclusion of: {ensure_phases})") - # Create phase set phase_set = SolidPhaseSet.from_entry_set(entries) - # Enumerate reactions enumerators = [MinimizeGibbsEnumerator(), BasicEnumerator()] rxn_set = run_enumerators(enumerators, entries) print(f"Enumerated {len(rxn_set)} reactions") - # Compute reactions at all temperatures temp_rxn_mapping = rxn_set.compute_at_temperatures(temperatures) - # Score reactions reaction_lib = get_scored_rxns( rxn_set, temps=temperatures, @@ -115,7 +116,6 @@ def setup_reaction_library( **entry_kwargs ) - # Optionally save reaction library to file reaction_library_path = None if save_to_file: filename = f"reaction_library_{chemical_system.replace('-', '_')}.json" @@ -176,7 +176,6 @@ def run_simulation( recipe_dict = recipe.as_dict() - # Set up phase set and reaction library if reaction_library_data is not None: phase_set = SolidPhaseSet.from_dict(reaction_library_data.phase_set_dict) reaction_lib = ReactionLibrary.from_dict(reaction_library_data.reaction_library_dict) @@ -192,7 +191,6 @@ def run_simulation( chem_sys = chemical_system reaction_library_path = None - # Run simulation if recipe.num_realizations > 1: result_doc = run_sim_parallel( recipe=recipe, @@ -210,7 +208,6 @@ def run_simulation( compress_freq=compress_freq, ) - # Optionally save result doc to file result_doc_path = None if save_to_file: filename = f"result_doc_{chem_sys.replace('-', '_')}.json" @@ -219,15 +216,12 @@ def run_simulation( json.dump(result_doc.as_dict(), f, cls=MontyEncoder) print(f"Saved result doc to {result_doc_path}") - # Analyze results analyzer = BulkReactionAnalyzer.from_result_doc(result_doc) - # Get final molar amounts final_molar_amounts = analyzer.get_all_absolute_molar_amounts( analyzer.last_loaded_step_idx ) - # Build trajectory: molar amounts at each step molar_trajectory: Dict[str, List[float]] = {} temp_trajectory: List[float] = [] step_indices: List[int] = list(analyzer.loaded_step_idxs) @@ -253,3 +247,249 @@ def run_simulation( num_realizations=recipe.num_realizations, metadata=metadata, ) + + +# --------------------------------------------------------------------------- +# Bayesian Optimization Jobs +# --------------------------------------------------------------------------- + +@job +def init_bo_campaign( + search_space_config: dict, + n_initial: int, + n_iterations: int, + target_name: str = "yield", + # precursor_smiles: Optional[Dict[str, Dict[str, str]]] = None, +) -> dict: + """Initialize a BayBE Campaign from a serialized SearchSpace. + + Builds the BayBE Campaign (search space, recommender, target) and returns + it as a JSON string so it can be passed between stateless HPC jobs without + losing the GP posterior. + + Args: + search_space_config: Serialized SearchSpace from SearchSpace.as_dict(). + n_initial: Number of random exploration trials before BO guidance. + n_iterations: Number of BO-guided trials after the initial phase. + target_name: Name of the optimization target (default "yield"). + + Returns: + {"campaign_json": } + """ + from rxn_ca.optimization import BayesianOptimizer, SearchSpace + from rxn_ca.optimization.objective import MockObjectiveFunction, ObjectiveConfig, ScorerType + + search_space = SearchSpace.from_dict(search_space_config) + + # MockObjectiveFunction is a stub — no simulations run here. + # BayesianOptimizer only needs it to satisfy the constructor signature; + # _build_campaign() does not call objective.evaluate(). + stub_objective = MockObjectiveFunction( + config=ObjectiveConfig( + target_phase="__stub__", + scorer_type=ScorerType.FINAL, + ), + score_fn=lambda p: 0.0, + ) + + optimizer = BayesianOptimizer( + search_space=search_space, + objective=stub_objective, + n_initial=n_initial, + n_iterations=n_iterations, + target_name=target_name, + # precursor_smiles=precursor_smiles or {}, + ) + + print(f"Initialized BayBE Campaign: {search_space}") + print(f" n_initial={n_initial}, n_iterations={n_iterations}, target='{target_name}'") + + return {"campaign_json": optimizer._campaign.to_json()} + + +@job +def bo_trial_step( + iteration: int, + total_iterations: int, + campaign_json: str, + reaction_library_data: ReactionLibraryData, + precursor_slot_names: List[str], + fixed_precursors: Optional[Dict[str, float]], + objective_config: dict, + output_dir: str, +) -> Response: + """Run one Bayesian optimization trial and chain the next. + + Each call: + 1. Restores the BayBE Campaign from JSON (GP posterior preserved). + 2. Calls campaign.recommend() for the next parameter configuration. + 3. Builds an OptimizableRecipe from those parameters. + 4. Runs the CA simulation using the pre-built reaction library. + 5. Scores the result and adds it to the Campaign. + 6. Saves per-trial JSON and appends to history.csv. + 7. Returns Response(addition=) or a summary on the final trial. + + Args: + iteration: Current 0-based trial index. + total_iterations: Total trials (n_initial + n_iterations). + campaign_json: Serialized BayBE Campaign from init_bo_campaign or the + previous trial. Contains the full GP posterior. + reaction_library_data: Output from setup_reaction_library. Passed + through every trial — MP enumeration is done once. + precursor_slot_names: Names of PrecursorSlotParameters in the search + space (e.g. ["li_source", "si_source"]). Used to extract the + selected precursor from BayBE's recommendation. + fixed_precursors: Formula → amount map when precursors are not + optimized. Set to None when using precursor slots. + objective_config: Dict with keys: + target_phase, scorer_type ("final"|"maximum"), + simulation_size, num_realizations, + live_compress (bool), compress_freq (int). + output_dir: Shared filesystem path for per-trial JSON, history.csv, + and best_result.json. Must be accessible from all worker nodes. + + Returns: + Response(addition=) if more trials remain, + otherwise a summary dict with best score and parameters. + """ + from baybe import Campaign + from rxn_ca.phases import SolidPhaseSet + from rxn_ca.reactions import ReactionLibrary + from rxn_ca.utilities.parallel_sim import run_sim_parallel + from rxn_ca.utilities.single_sim import run_single_sim + + target_phase = objective_config["target_phase"] + scorer_type = objective_config.get("scorer_type", "final") + simulation_size = objective_config["simulation_size"] + num_realizations = objective_config["num_realizations"] + live_compress = objective_config.get("live_compress", True) + compress_freq = objective_config.get("compress_freq", 50) + target_name = objective_config.get("target_name", "yield") + + output_path = Path(output_dir) + sim_dir = output_path / "simulations" + sim_dir.mkdir(parents=True, exist_ok=True) + + print(f"\n=== BO Trial {iteration + 1}/{total_iterations} ===") + + # --- Step 1: Restore BayBE Campaign (GP posterior preserved) --- + campaign = Campaign.from_json(campaign_json) + + # --- Step 2: Get next recommendation --- + recommendation = campaign.recommend(batch_size=1) + params = { + col: ( + recommendation.iloc[0][col].item() + if hasattr(recommendation.iloc[0][col], "item") + else recommendation.iloc[0][col] + ) + for col in recommendation.columns + } + print(f"Recommended params: {params}") + + # --- Step 3: Build recipe --- + opt_recipe = build_recipe_from_params( + params=params, + precursor_slot_names=precursor_slot_names, + fixed_precursors=fixed_precursors, + simulation_size=simulation_size, + num_realizations=num_realizations, + ) + recipe = opt_recipe.to_recipe() + print(f"Recipe: {opt_recipe}") + + # --- Step 4: Load reaction library --- + phase_set = SolidPhaseSet.from_dict(reaction_library_data.phase_set_dict) + rxn_lib = ReactionLibrary.from_dict(reaction_library_data.reaction_library_dict) + + # --- Step 5: Run simulation --- + print("Running simulation...") + if num_realizations > 1: + result_doc = run_sim_parallel( + recipe, + reaction_lib=rxn_lib, + phase_set=phase_set, + live_compress=live_compress, + compress_freq=compress_freq, + ) + else: + result_doc = run_single_sim( + recipe, + reaction_lib=rxn_lib, + phase_set=phase_set, + live_compress=live_compress, + compress_freq=compress_freq, + ) + + # --- Step 6: Score --- + score = _score_result(result_doc, target_phase, scorer_type) + print(f"Score ({target_phase}, {scorer_type}): {score:.4f}") + + # --- Step 7: Tell Campaign --- + campaign.add_measurements(pd.DataFrame([{**params, target_name: score}])) + + # --- Step 8: Save per-trial outputs --- + trial_result = { + "iteration": iteration, + "params": params, + "score": score, + "timestamp": datetime.utcnow().isoformat(), + } + trial_path = sim_dir / f"trial_{iteration:03d}.json" + trial_path.write_text(json.dumps(trial_result, indent=2)) + + history_path = output_path / "history.csv" + write_header = not history_path.exists() + fieldnames = ["iteration", "score", *sorted(params.keys())] + with history_path.open("a", newline="") as f: + writer = csv.DictWriter(f, fieldnames=fieldnames) + if write_header: + writer.writeheader() + writer.writerow({"iteration": iteration, "score": score, **params}) + + if result_doc is not None and hasattr(result_doc, "to_file"): + try: + result_doc.to_file(str(sim_dir / f"trial_{iteration:03d}_full.json")) + except Exception as e: + print(f"Warning: could not save full result doc: {e}") + + # --- Step 9: Chain next trial or finalize --- + new_campaign_json = campaign.to_json() + + if iteration + 1 < total_iterations: + next_job = bo_trial_step( + iteration=iteration + 1, + total_iterations=total_iterations, + campaign_json=new_campaign_json, + reaction_library_data=reaction_library_data, + precursor_slot_names=precursor_slot_names, + fixed_precursors=fixed_precursors, + objective_config=objective_config, + output_dir=output_dir, + ) + return Response(addition=next_job) + + # Final iteration: collect all trial results and write summary + history_rows: List[dict] = [] + for i in range(total_iterations): + trial_file = sim_dir / f"trial_{i:03d}.json" + if trial_file.exists(): + history_rows.append(json.loads(trial_file.read_text())) + + if history_rows: + best = max(history_rows, key=lambda r: r["score"]) + summary = { + "target_phase": target_phase, + "fixed_precursors": fixed_precursors, + "best_score": best["score"], + "best_params": best["params"], + "best_iteration": best["iteration"], + "total_evaluations": total_iterations, + } + (output_path / "best_result.json").write_text(json.dumps(summary, indent=2)) + print(f"\nOptimization complete.") + print(f"Best score: {best['score']:.4f} at iteration {best['iteration']}") + print(f"Best params: {best['params']}") + return summary + + return {"status": "complete", "iterations": total_iterations} From 40c89828ad069af4d83690c465efa438428deb65 Mon Sep 17 00:00:00 2001 From: jaeminy00 Date: Fri, 17 Apr 2026 13:22:08 -0700 Subject: [PATCH 05/31] added bayesian optimizer JobFlow flow file --- src/rxn_ca/workflow/__init__.py | 2 + src/rxn_ca/workflow/bayesian_flow_makers.py | 223 ++++++++++++++++++++ 2 files changed, 225 insertions(+) create mode 100644 src/rxn_ca/workflow/bayesian_flow_makers.py diff --git a/src/rxn_ca/workflow/__init__.py b/src/rxn_ca/workflow/__init__.py index 7da9725..e7501b0 100644 --- a/src/rxn_ca/workflow/__init__.py +++ b/src/rxn_ca/workflow/__init__.py @@ -10,6 +10,7 @@ from .schemas import SimulationOutput, ReactionLibraryData from .jobs import setup_reaction_library, run_simulation, init_bo_campaign, bo_trial_step from .flows import create_simulation_flow +from .bayesian_flow_makers import BOFlowMaker __all__ = [ "SimulationOutput", @@ -19,4 +20,5 @@ "create_simulation_flow", "init_bo_campaign", "bo_trial_step", + "BOFlowMaker", ] diff --git a/src/rxn_ca/workflow/bayesian_flow_makers.py b/src/rxn_ca/workflow/bayesian_flow_makers.py new file mode 100644 index 0000000..80794a6 --- /dev/null +++ b/src/rxn_ca/workflow/bayesian_flow_makers.py @@ -0,0 +1,223 @@ +"""Maker-based Bayesian Optimization flow for ReactCA synthesis. + +Provides BOFlowMaker, a jobflow Maker that builds the full BO loop Flow. +Makers store configuration as dataclass fields so the same instance can be +reused across different chemical systems, serialized with monty, and composed +into larger workflow Makers. + +Usage: + from rxn_ca.optimization import SearchSpace + from rxn_ca.workflow.bayesian_flow_makers import BOFlowMaker + + maker = BOFlowMaker( + n_initial=5, + n_iterations=15, + simulation_size=10, + num_realizations=3, + ) + + # Reuse the same maker for different chemical systems + flow_li = maker.make( + chemical_system="Li-Si-O-C", + target_phase="Li4SiO4", + search_space=search_space_li, + output_dir="/pscratch/sd/y/yoo/ReactCA/li4sio4_bo", + thermo_types=["R2SCAN"], + ) + flow_ba = maker.make( + chemical_system="Ba-Ti-O", + target_phase="BaTiO3", + search_space=search_space_ba, + output_dir="/pscratch/sd/y/yoo/ReactCA/batio3_bo", + thermo_types=["R2SCAN"], + ) +""" + +from __future__ import annotations + +import numpy as np +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any, Dict, List, Optional + +from jobflow import Flow, Maker + +from rxn_ca.optimization import SearchSpace +from .jobs import setup_reaction_library, init_bo_campaign, bo_trial_step + + +@dataclass +class BOFlowMaker(Maker): + """Maker for Bayesian Optimization flows over ReactCA simulations. + + Stores optimization configuration as dataclass fields so the same + Maker instance can be reused for multiple chemical systems without + re-specifying all parameters. Can be serialized with monty and + composed into larger workflow Makers. + + Wires together three job stages: + setup_reaction_library → init_bo_campaign → bo_trial_step[0] + ↓ Response.addition + bo_trial_step[1] → ... + + Attributes: + name: Name prefix for generated Flows. Full flow name is + "{name}_{target_phase}_{chemical_system}". + n_initial: Number of random exploration trials before BO guidance. + n_iterations: Number of BO-guided trials after the initial phase. + scorer_type: How to score each simulation result. + "final" — yield of target phase at the end of the simulation. + "maximum" — peak yield of target phase during the simulation. + simulation_size: CA grid size (NxN cells). + num_realizations: Number of independent simulation runs per trial. + Results are averaged for the BO score. + live_compress: Store full CA state snapshots at each compress_freq + step instead of diffs. Strongly recommended. + compress_freq: Interval (in simulation steps) between snapshots + when live_compress is True. + metastability_cutoff: Energy above the convex hull (eV/atom) below + which phases are included. + exclude_theoretical: If True, exclude phases without experimental + observations in the Materials Project database. + + Example — compose into a larger Maker: + @dataclass + class MySweepMaker(Maker): + name: str = "sweep" + bo_maker: BOFlowMaker = field(default_factory=BOFlowMaker) + + def make(self, systems: list, ...) -> Flow: + flows = [self.bo_maker.make(sys, ...) for sys in systems] + return Flow(flows, name=self.name) + """ + + name: str = "bo_flow" + n_initial: int = 5 + n_iterations: int = 15 + scorer_type: str = "final" + simulation_size: int = 10 + num_realizations: int = 3 + live_compress: bool = True + compress_freq: int = 50 + metastability_cutoff: float = 0.1 + exclude_theoretical: bool = True + + def make( + self, + chemical_system: str, + target_phase: str, + search_space: SearchSpace, + output_dir: str, + fixed_precursors: Optional[Dict[str, float]] = None, + ensure_phases: Optional[List[str]] = None, + **library_kwargs, + ) -> Flow: + """Build a Bayesian Optimization Flow for the given chemical system. + + Args: + chemical_system: Element system string, e.g. "Li-Si-O-C". + target_phase: Formula of the target product, e.g. "Li4SiO4". + search_space: Configured SearchSpace. Must contain a 'hold_temp' + parameter; precursor slots (if any) are derived automatically. + output_dir: Shared filesystem path written to by every trial job + (history.csv, best_result.json, simulations/). Must be + accessible from all HPC worker nodes. + fixed_precursors: Formula → molar amount map. When provided, + precursor selection is not optimized (only the thermal profile + is). Mutually exclusive with precursor slots in search_space. + ensure_phases: Phases that must be present in the reaction library. + Defaults to target_phase + all precursor candidates. + **library_kwargs: Forwarded to setup_reaction_library + (e.g. thermo_types=["R2SCAN"]). + + Returns: + Flow containing setup, init, and first trial jobs. Subsequent trial + jobs are added dynamically at runtime via Response(addition=...). + + Raises: + ValueError: If search_space has no 'hold_temp' parameter. + ValueError: If both fixed_precursors and precursor slots are given. + """ + flow_name = f"{self.name}_{target_phase}_{chemical_system}" + output_dir = str(Path(output_dir).expanduser().resolve()) + total_iterations = self.n_initial + self.n_iterations + + # --- Derive temperatures from search space hold_temp parameter --- + hold_temp_param = search_space.get_parameter("hold_temp") + if hold_temp_param is None: + raise ValueError( + "search_space must contain a 'hold_temp' parameter. " + "Add one with search_space.add_temperature_range(...)." + ) + temperatures = sorted( + set(float(t) for t in np.arange(300, hold_temp_param.high + 1, 100)) + ) + + # --- Derive precursor slot names from search space --- + precursor_slot_names = [p.name for p in search_space.precursor_parameters] + + if fixed_precursors is not None and precursor_slot_names: + raise ValueError( + "Provide either fixed_precursors or precursor slots in search_space, not both." + ) + + # --- Build ensure_phases if not explicitly provided --- + if ensure_phases is None: + ensure_phases = [target_phase] + if fixed_precursors: + ensure_phases.extend(fixed_precursors.keys()) + else: + for param in search_space.precursor_parameters: + ensure_phases.extend(param.candidates) + ensure_phases = list(dict.fromkeys(ensure_phases)) + + # --- Objective config passed to every trial job --- + objective_config = { + "target_phase": target_phase, + "scorer_type": self.scorer_type, + "simulation_size": self.simulation_size, + "num_realizations": self.num_realizations, + "live_compress": self.live_compress, + "compress_freq": self.compress_freq, + "target_name": "yield", + } + + # --- Job 1: Build reaction library (runs once, shared across all trials) --- + setup_job = setup_reaction_library( + chemical_system=chemical_system, + temperatures=temperatures, + ensure_phases=ensure_phases, + metastability_cutoff=self.metastability_cutoff, + exclude_theoretical=self.exclude_theoretical, + save_to_file=True, + **library_kwargs, + ) + setup_job.name = f"setup_{chemical_system}" + + # --- Job 2: Initialize BayBE Campaign --- + init_job = init_bo_campaign( + search_space_config=search_space.as_dict(), + n_initial=self.n_initial, + n_iterations=self.n_iterations, + ) + init_job.name = "init_bo_campaign" + + # --- Job 3: First BO trial --- + # campaign_json and reaction_library_data are job output references — + # jobflow resolves them at runtime after the upstream jobs complete. + first_trial = bo_trial_step( + iteration=0, + total_iterations=total_iterations, + campaign_json=init_job.output["campaign_json"], + reaction_library_data=setup_job.output, + precursor_slot_names=precursor_slot_names, + fixed_precursors=fixed_precursors, + objective_config=objective_config, + output_dir=output_dir, + ) + first_trial.name = "bo_trial_000" + + return Flow( + [setup_job, init_job, first_trial], + name=flow_name, + ) From f80e728f468eeacaa46b52f99ba6bd49684f3f99 Mon Sep 17 00:00:00 2001 From: jaeminy00 Date: Fri, 17 Apr 2026 13:36:23 -0700 Subject: [PATCH 06/31] Created jobs and flows folders, and separated job and flow files for standard ReactCA simulation and Bayesian Optimization simulation. --- src/rxn_ca/workflow/__init__.py | 5 +- src/rxn_ca/workflow/flows/__init__.py | 8 + .../bayesian.py} | 45 +-- .../workflow/{flows.py => flows/core.py} | 36 +-- src/rxn_ca/workflow/jobs/__init__.py | 9 + .../workflow/{jobs.py => jobs/bayesian.py} | 284 ++++-------------- src/rxn_ca/workflow/jobs/core.py | 241 +++++++++++++++ 7 files changed, 334 insertions(+), 294 deletions(-) create mode 100644 src/rxn_ca/workflow/flows/__init__.py rename src/rxn_ca/workflow/{bayesian_flow_makers.py => flows/bayesian.py} (86%) rename src/rxn_ca/workflow/{flows.py => flows/core.py} (78%) create mode 100644 src/rxn_ca/workflow/jobs/__init__.py rename src/rxn_ca/workflow/{jobs.py => jobs/bayesian.py} (50%) create mode 100644 src/rxn_ca/workflow/jobs/core.py diff --git a/src/rxn_ca/workflow/__init__.py b/src/rxn_ca/workflow/__init__.py index e7501b0..a41e6ab 100644 --- a/src/rxn_ca/workflow/__init__.py +++ b/src/rxn_ca/workflow/__init__.py @@ -9,16 +9,15 @@ from .schemas import SimulationOutput, ReactionLibraryData from .jobs import setup_reaction_library, run_simulation, init_bo_campaign, bo_trial_step -from .flows import create_simulation_flow -from .bayesian_flow_makers import BOFlowMaker +from .flows import create_simulation_flow, BOFlowMaker __all__ = [ "SimulationOutput", "ReactionLibraryData", "setup_reaction_library", "run_simulation", - "create_simulation_flow", "init_bo_campaign", "bo_trial_step", + "create_simulation_flow", "BOFlowMaker", ] diff --git a/src/rxn_ca/workflow/flows/__init__.py b/src/rxn_ca/workflow/flows/__init__.py new file mode 100644 index 0000000..b6f1069 --- /dev/null +++ b/src/rxn_ca/workflow/flows/__init__.py @@ -0,0 +1,8 @@ +from .core import create_simulation_flow, create_multi_simulation_flow +from .bayesian import BOFlowMaker + +__all__ = [ + "create_simulation_flow", + "create_multi_simulation_flow", + "BOFlowMaker", +] diff --git a/src/rxn_ca/workflow/bayesian_flow_makers.py b/src/rxn_ca/workflow/flows/bayesian.py similarity index 86% rename from src/rxn_ca/workflow/bayesian_flow_makers.py rename to src/rxn_ca/workflow/flows/bayesian.py index 80794a6..f55e923 100644 --- a/src/rxn_ca/workflow/bayesian_flow_makers.py +++ b/src/rxn_ca/workflow/flows/bayesian.py @@ -1,49 +1,17 @@ -"""Maker-based Bayesian Optimization flow for ReactCA synthesis. - -Provides BOFlowMaker, a jobflow Maker that builds the full BO loop Flow. -Makers store configuration as dataclass fields so the same instance can be -reused across different chemical systems, serialized with monty, and composed -into larger workflow Makers. - -Usage: - from rxn_ca.optimization import SearchSpace - from rxn_ca.workflow.bayesian_flow_makers import BOFlowMaker - - maker = BOFlowMaker( - n_initial=5, - n_iterations=15, - simulation_size=10, - num_realizations=3, - ) - - # Reuse the same maker for different chemical systems - flow_li = maker.make( - chemical_system="Li-Si-O-C", - target_phase="Li4SiO4", - search_space=search_space_li, - output_dir="/pscratch/sd/y/yoo/ReactCA/li4sio4_bo", - thermo_types=["R2SCAN"], - ) - flow_ba = maker.make( - chemical_system="Ba-Ti-O", - target_phase="BaTiO3", - search_space=search_space_ba, - output_dir="/pscratch/sd/y/yoo/ReactCA/batio3_bo", - thermo_types=["R2SCAN"], - ) -""" +"""Bayesian Optimization flow Maker for ReactCA synthesis.""" from __future__ import annotations import numpy as np from dataclasses import dataclass, field from pathlib import Path -from typing import Any, Dict, List, Optional +from typing import Dict, List, Optional from jobflow import Flow, Maker from rxn_ca.optimization import SearchSpace -from .jobs import setup_reaction_library, init_bo_campaign, bo_trial_step +from ..jobs.core import setup_reaction_library +from ..jobs.bayesian import init_bo_campaign, bo_trial_step @dataclass @@ -80,6 +48,11 @@ class BOFlowMaker(Maker): exclude_theoretical: If True, exclude phases without experimental observations in the Materials Project database. + Example — reuse the same maker for different systems: + maker = BOFlowMaker(n_initial=5, n_iterations=15, simulation_size=10) + flow_li = maker.make("Li-Si-O-C", "Li4SiO4", search_space_li, output_dir_li) + flow_ba = maker.make("Ba-Ti-O", "BaTiO3", search_space_ba, output_dir_ba) + Example — compose into a larger Maker: @dataclass class MySweepMaker(Maker): diff --git a/src/rxn_ca/workflow/flows.py b/src/rxn_ca/workflow/flows/core.py similarity index 78% rename from src/rxn_ca/workflow/flows.py rename to src/rxn_ca/workflow/flows/core.py index 04af1f3..a7c9223 100644 --- a/src/rxn_ca/workflow/flows.py +++ b/src/rxn_ca/workflow/flows/core.py @@ -1,11 +1,12 @@ -"""Pre-built flows for common rxn-ca simulation workflows.""" +"""Core flows for rxn-ca simulations.""" + +from __future__ import annotations from typing import Any, Dict, List, Optional, Union from jobflow import Flow -from .jobs import setup_reaction_library, run_simulation -from .schemas import SimulationOutput +from ..jobs.core import setup_reaction_library, run_simulation def create_simulation_flow( @@ -40,39 +41,15 @@ def create_simulation_flow( Returns: Flow with setup and simulation jobs, outputting SimulationOutput - - Example: - >>> from rxn_ca.core.recipe import ReactionRecipe - >>> from rxn_ca.core.heating import HeatingSchedule, HeatingStep - >>> from rxn_ca.workflow import create_simulation_flow - >>> - >>> # Create a recipe - >>> heating_steps = HeatingStep.sweep(t0=298, tf=1273, stage_length=1, temp_step_size=50) - >>> heating_sched = HeatingSchedule.build(heating_steps) - >>> recipe = ReactionRecipe( - ... heating_schedule=heating_sched, - ... reactant_amounts={"BaCO3": 1.0, "TiO2": 1.0}, - ... simulation_size=15, - ... ) - >>> - >>> # Create and run the flow - >>> flow = create_simulation_flow( - ... recipe=recipe, - ... chemical_system="Ba-C-O-Ti", - ... ensure_phases=["BaTiO3", "BaCO3", "TiO2"], - ... ) """ - # Ensure we have a ReactionRecipe object from rxn_ca.core.recipe import ReactionRecipe if not isinstance(recipe, ReactionRecipe): recipe = ReactionRecipe.from_dict(recipe) - # Get temperatures from recipe if not provided if temperatures is None: temperatures = recipe.heating_schedule.all_temps - # Create setup job setup_job = setup_reaction_library( chemical_system=chemical_system, temperatures=temperatures, @@ -83,7 +60,6 @@ def create_simulation_flow( ) setup_job.name = f"setup_{chemical_system}" - # Create simulation job sim_job = run_simulation( recipe=recipe, reaction_library_data=setup_job.output, @@ -130,7 +106,6 @@ def create_multi_simulation_flow( """ from rxn_ca.core.recipe import ReactionRecipe - # Ensure all recipes are ReactionRecipe objects and collect temperatures recipe_objects = [] all_temps = set() @@ -140,11 +115,9 @@ def create_multi_simulation_flow( recipe_objects.append(r) all_temps.update(r.heating_schedule.all_temps) - # Use provided temperatures or collected ones if temperatures is None: temperatures = sorted(all_temps) - # Create setup job (shared across all simulations) setup_job = setup_reaction_library( chemical_system=chemical_system, temperatures=temperatures, @@ -158,7 +131,6 @@ def create_multi_simulation_flow( jobs = [setup_job] outputs = [] - # Create simulation jobs for i, recipe in enumerate(recipe_objects): metadata = metadata_list[i] if metadata_list else None diff --git a/src/rxn_ca/workflow/jobs/__init__.py b/src/rxn_ca/workflow/jobs/__init__.py new file mode 100644 index 0000000..6156f8d --- /dev/null +++ b/src/rxn_ca/workflow/jobs/__init__.py @@ -0,0 +1,9 @@ +from .core import setup_reaction_library, run_simulation +from .bayesian import init_bo_campaign, bo_trial_step + +__all__ = [ + "setup_reaction_library", + "run_simulation", + "init_bo_campaign", + "bo_trial_step", +] diff --git a/src/rxn_ca/workflow/jobs.py b/src/rxn_ca/workflow/jobs/bayesian.py similarity index 50% rename from src/rxn_ca/workflow/jobs.py rename to src/rxn_ca/workflow/jobs/bayesian.py index 49eb1d0..dea67e6 100644 --- a/src/rxn_ca/workflow/jobs.py +++ b/src/rxn_ca/workflow/jobs/bayesian.py @@ -1,8 +1,4 @@ -"""Jobflow jobs for rxn-ca simulations. - -These jobs can be used standalone or composed into flows for running -rxn-ca simulations in workflow frameworks. -""" +"""Bayesian Optimization jobs for rxn-ca simulations.""" from __future__ import annotations @@ -14,243 +10,89 @@ import pandas as pd from jobflow import Response, job -from monty.json import MontyEncoder -from .schemas import ReactionLibraryData, SimulationOutput -from ..utilities.bayesian_helpers import build_recipe_from_params, _score_result +from ..schemas import ReactionLibraryData -def _build_reaction_library( - chemical_system: str, - temperatures: List[float], - ensure_phases: List[str] = None, - metastability_cutoff: float = 0.1, - exclude_theoretical: bool = True, - **entry_kwargs -) -> tuple: - """Build phase set and reaction library for a chemical system. +# --------------------------------------------------------------------------- +# Helpers (not @job — called inside bo_trial_step) +# --------------------------------------------------------------------------- + +def build_recipe_from_params( + params: Dict[str, Any], + precursor_slot_names: List[str], + fixed_precursors: Optional[Dict[str, float]], + simulation_size: int, + num_realizations: int, +) -> "OptimizableRecipe": + """Convert a BayBE parameter recommendation into an OptimizableRecipe. Args: - chemical_system: Chemical system string (e.g., "Ba-Ti-O") - temperatures: List of temperatures (K) to score reactions at - ensure_phases: List of phase formulas that MUST be included - metastability_cutoff: Energy above hull cutoff for phases - exclude_theoretical: Whether to exclude theoretical phases + params: Parameter dict recommended by BayBE (may contain numpy scalars). + precursor_slot_names: Names of precursor slot parameters in the search + space (e.g. ["li_source", "si_source"]). Used to extract the + selected precursor formula for each slot from params. + fixed_precursors: If provided, use these formula→amount pairs directly + (precursor selection is not optimized). + simulation_size: CA grid size (NxN). + num_realizations: Number of simulation realizations to average. Returns: - Tuple of (phase_set, reaction_lib) + Configured OptimizableRecipe ready for to_recipe(). """ - from rxn_ca.utilities.get_entries import get_entries - from rxn_ca.phases import SolidPhaseSet - from rxn_ca.utilities.get_scored_rxns import get_scored_rxns - from rxn_network.enumerators.basic import BasicEnumerator - from rxn_network.enumerators.minimize import MinimizeGibbsEnumerator - from rxn_network.enumerators.utils import run_enumerators - - entries = get_entries( - chem_sys=chemical_system, - metastability_cutoff=metastability_cutoff, - ensure_phases=ensure_phases or [], - exclude_theoretical_phases=exclude_theoretical, - **entry_kwargs - ) - print(f"Got {len(entries)} entries for {chemical_system}") - if ensure_phases: - print(f" (ensured inclusion of: {ensure_phases})") + from rxn_ca.optimization import OptimizableRecipe - phase_set = SolidPhaseSet.from_entry_set(entries) + # Normalize numpy scalar types that BayBE returns in its DataFrame + clean_params = { + k: (v.item() if hasattr(v, "item") else v) + for k, v in params.items() + } - enumerators = [MinimizeGibbsEnumerator(), BasicEnumerator()] - rxn_set = run_enumerators(enumerators, entries) - print(f"Enumerated {len(rxn_set)} reactions") + if fixed_precursors is not None: + return OptimizableRecipe( + precursors=fixed_precursors, + hold_temp=int(clean_params["hold_temp"]), + hold_time=int(clean_params["hold_time"]), + ramp_step_time=int(clean_params.get("ramp_step_time", 1)), + simulation_size=simulation_size, + num_simulations=num_realizations, + ) - temp_rxn_mapping = rxn_set.compute_at_temperatures(temperatures) + precursor_slots = {name: clean_params[name] for name in precursor_slot_names} - reaction_lib = get_scored_rxns( - rxn_set, - temps=temperatures, - phase_set=phase_set, - rxns_at_temps=temp_rxn_mapping, - parallel=True, + return OptimizableRecipe.from_params( + params=clean_params, + precursor_slots=precursor_slots, + simulation_size=simulation_size, + num_simulations=num_realizations, ) - return phase_set, reaction_lib - -@job -def setup_reaction_library( - chemical_system: str, - temperatures: List[float], - ensure_phases: List[str] = None, - metastability_cutoff: float = 0.1, - exclude_theoretical: bool = True, - save_to_file: bool = True, - **entry_kwargs -) -> ReactionLibraryData: - """Set up phase set and reaction library for a chemical system. - - This job fetches thermodynamic data from Materials Project, enumerates - possible reactions, and scores them at each temperature. This is typically - the most expensive step and can be shared across multiple simulations - in the same chemical system. +def _score_result(result_doc: Any, target_phase: str, scorer_type: str) -> float: + """Score a simulation result document. Args: - chemical_system: Chemical system string (e.g., "Ba-Ti-O") - temperatures: List of temperatures (K) to score reactions at - ensure_phases: List of phase formulas that MUST be included - even if they would otherwise be filtered out (e.g., phases - known to exist from experimental observations) - metastability_cutoff: Energy above hull cutoff for phases - exclude_theoretical: Whether to exclude theoretical phases - save_to_file: If True, save reaction library to a JSON file + result_doc: RxnCAResultDoc from the simulation. + target_phase: Chemical formula of the target product. + scorer_type: "final" (yield at end) or "maximum" (peak yield). Returns: - ReactionLibraryData with phase set, reaction library, and metadata + Float score in [0, 1], or 0.0 if scoring fails. """ - phase_set, reaction_lib = _build_reaction_library( - chemical_system, - temperatures, - ensure_phases, - metastability_cutoff, - exclude_theoretical, - **entry_kwargs - ) - - reaction_library_path = None - if save_to_file: - filename = f"reaction_library_{chemical_system.replace('-', '_')}.json" - reaction_library_path = str(Path.cwd() / filename) - with open(reaction_library_path, "w") as f: - json.dump(reaction_lib.as_dict(), f, cls=MontyEncoder) - print(f"Saved reaction library to {reaction_library_path}") - - return ReactionLibraryData( - phase_set_dict=phase_set.as_dict(), - reaction_library_dict=reaction_lib.as_dict(), - chemical_system=chemical_system, - temperatures=temperatures, - phases_available=list(phase_set.phases), - reaction_library_path=reaction_library_path, - ) + from rxn_ca.optimization import AnalyzedResult, FinalProductScorer, MaximumProductScorer, get_result_analysis - -@job -def run_simulation( - recipe: "ReactionRecipe", - reaction_library_data: ReactionLibraryData = None, - chemical_system: str = None, - ensure_phases: List[str] = None, - metastability_cutoff: float = 0.1, - save_to_file: bool = True, - metadata: Dict[str, Any] = None, - live_compress: bool = True, - compress_freq: int = 100, -) -> SimulationOutput: - """Run an rxn-ca simulation. - - Args: - recipe: ReactionRecipe specifying reactants, heating schedule, etc. - reaction_library_data: Pre-computed reaction library from setup_reaction_library. - If not provided, will build one from scratch (requires chemical_system). - chemical_system: Chemical system string, required if reaction_library_data - not provided - ensure_phases: Phases to ensure are included, used if building library - from scratch - metastability_cutoff: Energy above hull cutoff, used if building library - from scratch - save_to_file: If True, save the full result doc to a JSON file - metadata: Optional user-provided metadata for tagging/provenance - live_compress: If True, store full state snapshots at compress_freq - intervals instead of diffs. Avoids slow reconstruction during analysis. - compress_freq: Interval for storing frames when live_compress is True. - - Returns: - SimulationOutput with analyzed results and file references - """ - from rxn_ca.phases import SolidPhaseSet - from rxn_ca.core.recipe import ReactionRecipe - from rxn_ca.utilities.parallel_sim import run_sim_parallel - from rxn_ca.utilities.single_sim import run_single_sim - from rxn_ca.analysis import BulkReactionAnalyzer - from rxn_ca.reactions import ReactionLibrary - - recipe_dict = recipe.as_dict() - - if reaction_library_data is not None: - phase_set = SolidPhaseSet.from_dict(reaction_library_data.phase_set_dict) - reaction_lib = ReactionLibrary.from_dict(reaction_library_data.reaction_library_dict) - chem_sys = reaction_library_data.chemical_system - reaction_library_path = reaction_library_data.reaction_library_path - else: - if chemical_system is None: - raise ValueError("chemical_system required when reaction_library_data not provided") - all_temps = recipe.heating_schedule.all_temps - phase_set, reaction_lib = _build_reaction_library( - chemical_system, all_temps, ensure_phases, metastability_cutoff - ) - chem_sys = chemical_system - reaction_library_path = None - - if recipe.num_realizations > 1: - result_doc = run_sim_parallel( - recipe=recipe, - reaction_lib=reaction_lib, - phase_set=phase_set, - live_compress=live_compress, - compress_freq=compress_freq, - ) - else: - result_doc = run_single_sim( - recipe=recipe, - reaction_lib=reaction_lib, - phase_set=phase_set, - live_compress=live_compress, - compress_freq=compress_freq, - ) - - result_doc_path = None - if save_to_file: - filename = f"result_doc_{chem_sys.replace('-', '_')}.json" - result_doc_path = str(Path.cwd() / filename) - with open(result_doc_path, "w") as f: - json.dump(result_doc.as_dict(), f, cls=MontyEncoder) - print(f"Saved result doc to {result_doc_path}") - - analyzer = BulkReactionAnalyzer.from_result_doc(result_doc) - - final_molar_amounts = analyzer.get_all_absolute_molar_amounts( - analyzer.last_loaded_step_idx - ) - - molar_trajectory: Dict[str, List[float]] = {} - temp_trajectory: List[float] = [] - step_indices: List[int] = list(analyzer.loaded_step_idxs) - - for step_idx in step_indices: - amounts = analyzer.get_all_absolute_molar_amounts(step_idx) - for phase, amount in amounts.items(): - if phase not in molar_trajectory: - molar_trajectory[phase] = [] - molar_trajectory[phase].append(amount) - temp_trajectory.append(recipe.heating_schedule.temp_at(step_idx)) - - return SimulationOutput( - final_molar_amounts=final_molar_amounts, - molar_amounts_trajectory=molar_trajectory, - temperature_trajectory=temp_trajectory, - step_indices=step_indices, - phase_set_dict=phase_set.as_dict(), - reaction_library_path=reaction_library_path, - result_doc_path=result_doc_path, - recipe_dict=recipe_dict, - chemical_system=chem_sys, - num_realizations=recipe.num_realizations, - metadata=metadata, - ) + try: + traces = get_result_analysis(result_doc) + analyzed = AnalyzedResult(traces) + scorer_cls = MaximumProductScorer if scorer_type == "maximum" else FinalProductScorer + return scorer_cls(target_phase).score(analyzed) + except Exception as e: + print(f"Warning: scoring failed ({type(e).__name__}: {e})") + return 0.0 # --------------------------------------------------------------------------- -# Bayesian Optimization Jobs +# Jobs # --------------------------------------------------------------------------- @job @@ -281,9 +123,6 @@ def init_bo_campaign( search_space = SearchSpace.from_dict(search_space_config) - # MockObjectiveFunction is a stub — no simulations run here. - # BayesianOptimizer only needs it to satisfy the constructor signature; - # _build_campaign() does not call objective.evaluate(). stub_objective = MockObjectiveFunction( config=ObjectiveConfig( target_phase="__stub__", @@ -337,8 +176,7 @@ def bo_trial_step( reaction_library_data: Output from setup_reaction_library. Passed through every trial — MP enumeration is done once. precursor_slot_names: Names of PrecursorSlotParameters in the search - space (e.g. ["li_source", "si_source"]). Used to extract the - selected precursor from BayBE's recommendation. + space (e.g. ["li_source", "si_source"]). fixed_precursors: Formula → amount map when precursors are not optimized. Set to None when using precursor slots. objective_config: Dict with keys: diff --git a/src/rxn_ca/workflow/jobs/core.py b/src/rxn_ca/workflow/jobs/core.py new file mode 100644 index 0000000..1fc46bf --- /dev/null +++ b/src/rxn_ca/workflow/jobs/core.py @@ -0,0 +1,241 @@ +"""Core jobflow jobs for rxn-ca simulations.""" + +from __future__ import annotations + +import json +from pathlib import Path +from typing import Any, Dict, List, Optional + +from jobflow import job +from monty.json import MontyEncoder + +from ..schemas import ReactionLibraryData, SimulationOutput + + +def _build_reaction_library( + chemical_system: str, + temperatures: List[float], + ensure_phases: List[str] = None, + metastability_cutoff: float = 0.1, + exclude_theoretical: bool = True, + **entry_kwargs +) -> tuple: + """Build phase set and reaction library for a chemical system. + + Args: + chemical_system: Chemical system string (e.g., "Ba-Ti-O") + temperatures: List of temperatures (K) to score reactions at + ensure_phases: List of phase formulas that MUST be included + metastability_cutoff: Energy above hull cutoff for phases + exclude_theoretical: Whether to exclude theoretical phases + + Returns: + Tuple of (phase_set, reaction_lib) + """ + from rxn_ca.utilities.get_entries import get_entries + from rxn_ca.phases import SolidPhaseSet + from rxn_ca.utilities.get_scored_rxns import get_scored_rxns + from rxn_network.enumerators.basic import BasicEnumerator + from rxn_network.enumerators.minimize import MinimizeGibbsEnumerator + from rxn_network.enumerators.utils import run_enumerators + + entries = get_entries( + chem_sys=chemical_system, + metastability_cutoff=metastability_cutoff, + ensure_phases=ensure_phases or [], + exclude_theoretical_phases=exclude_theoretical, + **entry_kwargs + ) + print(f"Got {len(entries)} entries for {chemical_system}") + if ensure_phases: + print(f" (ensured inclusion of: {ensure_phases})") + + phase_set = SolidPhaseSet.from_entry_set(entries) + + enumerators = [MinimizeGibbsEnumerator(), BasicEnumerator()] + rxn_set = run_enumerators(enumerators, entries) + print(f"Enumerated {len(rxn_set)} reactions") + + temp_rxn_mapping = rxn_set.compute_at_temperatures(temperatures) + + reaction_lib = get_scored_rxns( + rxn_set, + temps=temperatures, + phase_set=phase_set, + rxns_at_temps=temp_rxn_mapping, + parallel=True, + ) + + return phase_set, reaction_lib + + +@job +def setup_reaction_library( + chemical_system: str, + temperatures: List[float], + ensure_phases: List[str] = None, + metastability_cutoff: float = 0.1, + exclude_theoretical: bool = True, + save_to_file: bool = True, + **entry_kwargs +) -> ReactionLibraryData: + """Set up phase set and reaction library for a chemical system. + + This job fetches thermodynamic data from Materials Project, enumerates + possible reactions, and scores them at each temperature. This is typically + the most expensive step and can be shared across multiple simulations + in the same chemical system. + + Args: + chemical_system: Chemical system string (e.g., "Ba-Ti-O") + temperatures: List of temperatures (K) to score reactions at + ensure_phases: List of phase formulas that MUST be included + even if they would otherwise be filtered out (e.g., phases + known to exist from experimental observations) + metastability_cutoff: Energy above hull cutoff for phases + exclude_theoretical: Whether to exclude theoretical phases + save_to_file: If True, save reaction library to a JSON file + + Returns: + ReactionLibraryData with phase set, reaction library, and metadata + """ + phase_set, reaction_lib = _build_reaction_library( + chemical_system, + temperatures, + ensure_phases, + metastability_cutoff, + exclude_theoretical, + **entry_kwargs + ) + + reaction_library_path = None + if save_to_file: + filename = f"reaction_library_{chemical_system.replace('-', '_')}.json" + reaction_library_path = str(Path.cwd() / filename) + with open(reaction_library_path, "w") as f: + json.dump(reaction_lib.as_dict(), f, cls=MontyEncoder) + print(f"Saved reaction library to {reaction_library_path}") + + return ReactionLibraryData( + phase_set_dict=phase_set.as_dict(), + reaction_library_dict=reaction_lib.as_dict(), + chemical_system=chemical_system, + temperatures=temperatures, + phases_available=list(phase_set.phases), + reaction_library_path=reaction_library_path, + ) + + +@job +def run_simulation( + recipe: "ReactionRecipe", + reaction_library_data: ReactionLibraryData = None, + chemical_system: str = None, + ensure_phases: List[str] = None, + metastability_cutoff: float = 0.1, + save_to_file: bool = True, + metadata: Dict[str, Any] = None, + live_compress: bool = True, + compress_freq: int = 100, +) -> SimulationOutput: + """Run an rxn-ca simulation. + + Args: + recipe: ReactionRecipe specifying reactants, heating schedule, etc. + reaction_library_data: Pre-computed reaction library from setup_reaction_library. + If not provided, will build one from scratch (requires chemical_system). + chemical_system: Chemical system string, required if reaction_library_data + not provided + ensure_phases: Phases to ensure are included, used if building library + from scratch + metastability_cutoff: Energy above hull cutoff, used if building library + from scratch + save_to_file: If True, save the full result doc to a JSON file + metadata: Optional user-provided metadata for tagging/provenance + live_compress: If True, store full state snapshots at compress_freq + intervals instead of diffs. Avoids slow reconstruction during analysis. + compress_freq: Interval for storing frames when live_compress is True. + + Returns: + SimulationOutput with analyzed results and file references + """ + from rxn_ca.phases import SolidPhaseSet + from rxn_ca.core.recipe import ReactionRecipe + from rxn_ca.utilities.parallel_sim import run_sim_parallel + from rxn_ca.utilities.single_sim import run_single_sim + from rxn_ca.analysis import BulkReactionAnalyzer + from rxn_ca.reactions import ReactionLibrary + + recipe_dict = recipe.as_dict() + + if reaction_library_data is not None: + phase_set = SolidPhaseSet.from_dict(reaction_library_data.phase_set_dict) + reaction_lib = ReactionLibrary.from_dict(reaction_library_data.reaction_library_dict) + chem_sys = reaction_library_data.chemical_system + reaction_library_path = reaction_library_data.reaction_library_path + else: + if chemical_system is None: + raise ValueError("chemical_system required when reaction_library_data not provided") + all_temps = recipe.heating_schedule.all_temps + phase_set, reaction_lib = _build_reaction_library( + chemical_system, all_temps, ensure_phases, metastability_cutoff + ) + chem_sys = chemical_system + reaction_library_path = None + + if recipe.num_realizations > 1: + result_doc = run_sim_parallel( + recipe=recipe, + reaction_lib=reaction_lib, + phase_set=phase_set, + live_compress=live_compress, + compress_freq=compress_freq, + ) + else: + result_doc = run_single_sim( + recipe=recipe, + reaction_lib=reaction_lib, + phase_set=phase_set, + live_compress=live_compress, + compress_freq=compress_freq, + ) + + result_doc_path = None + if save_to_file: + filename = f"result_doc_{chem_sys.replace('-', '_')}.json" + result_doc_path = str(Path.cwd() / filename) + with open(result_doc_path, "w") as f: + json.dump(result_doc.as_dict(), f, cls=MontyEncoder) + print(f"Saved result doc to {result_doc_path}") + + analyzer = BulkReactionAnalyzer.from_result_doc(result_doc) + + final_molar_amounts = analyzer.get_all_absolute_molar_amounts( + analyzer.last_loaded_step_idx + ) + + molar_trajectory: Dict[str, List[float]] = {} + temp_trajectory: List[float] = [] + step_indices: List[int] = list(analyzer.loaded_step_idxs) + + for step_idx in step_indices: + amounts = analyzer.get_all_absolute_molar_amounts(step_idx) + for phase, amount in amounts.items(): + if phase not in molar_trajectory: + molar_trajectory[phase] = [] + molar_trajectory[phase].append(amount) + temp_trajectory.append(recipe.heating_schedule.temp_at(step_idx)) + + return SimulationOutput( + final_molar_amounts=final_molar_amounts, + molar_amounts_trajectory=molar_trajectory, + temperature_trajectory=temp_trajectory, + step_indices=step_indices, + phase_set_dict=phase_set.as_dict(), + reaction_library_path=reaction_library_path, + result_doc_path=result_doc_path, + recipe_dict=recipe_dict, + chemical_system=chem_sys, + num_realizations=recipe.num_realizations, + metadata=metadata, + ) From 3e822266bfcb40414316b791fb16b3f36027cb3e Mon Sep 17 00:00:00 2001 From: jaeminy00 Date: Fri, 17 Apr 2026 13:39:44 -0700 Subject: [PATCH 07/31] Revise docstring for core jobflow jobs Updated docstring to clarify jobflow usage. --- src/rxn_ca/workflow/jobs/core.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/rxn_ca/workflow/jobs/core.py b/src/rxn_ca/workflow/jobs/core.py index 1fc46bf..6c9cc8d 100644 --- a/src/rxn_ca/workflow/jobs/core.py +++ b/src/rxn_ca/workflow/jobs/core.py @@ -1,4 +1,8 @@ -"""Core jobflow jobs for rxn-ca simulations.""" +"""Jobflow jobs for rxn-ca simulations. + +These jobs can be used standalone or composed into flows for running +rxn-ca simulations in workflow frameworks. +""" from __future__ import annotations From b4c7955992114682b4abfa901fa6604f1c09bec9 Mon Sep 17 00:00:00 2001 From: jaeminy00 Date: Fri, 17 Apr 2026 13:40:51 -0700 Subject: [PATCH 08/31] Refactor imports and enhance reaction library setup Copied original jobs.py file to here --- src/rxn_ca/workflow/jobs/core.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/src/rxn_ca/workflow/jobs/core.py b/src/rxn_ca/workflow/jobs/core.py index 6c9cc8d..fb1fc02 100644 --- a/src/rxn_ca/workflow/jobs/core.py +++ b/src/rxn_ca/workflow/jobs/core.py @@ -4,16 +4,14 @@ rxn-ca simulations in workflow frameworks. """ -from __future__ import annotations - -import json from pathlib import Path from typing import Any, Dict, List, Optional +import json from jobflow import job -from monty.json import MontyEncoder +from monty.json import MontyEncoder, MontyDecoder -from ..schemas import ReactionLibraryData, SimulationOutput +from .schemas import ReactionLibraryData, SimulationOutput def _build_reaction_library( @@ -22,7 +20,6 @@ def _build_reaction_library( ensure_phases: List[str] = None, metastability_cutoff: float = 0.1, exclude_theoretical: bool = True, - **entry_kwargs ) -> tuple: """Build phase set and reaction library for a chemical system. @@ -43,25 +40,29 @@ def _build_reaction_library( from rxn_network.enumerators.minimize import MinimizeGibbsEnumerator from rxn_network.enumerators.utils import run_enumerators + # Get entries from Materials Project entries = get_entries( chem_sys=chemical_system, metastability_cutoff=metastability_cutoff, ensure_phases=ensure_phases or [], exclude_theoretical_phases=exclude_theoretical, - **entry_kwargs ) print(f"Got {len(entries)} entries for {chemical_system}") if ensure_phases: print(f" (ensured inclusion of: {ensure_phases})") + # Create phase set phase_set = SolidPhaseSet.from_entry_set(entries) + # Enumerate reactions enumerators = [MinimizeGibbsEnumerator(), BasicEnumerator()] rxn_set = run_enumerators(enumerators, entries) print(f"Enumerated {len(rxn_set)} reactions") + # Compute reactions at all temperatures temp_rxn_mapping = rxn_set.compute_at_temperatures(temperatures) + # Score reactions reaction_lib = get_scored_rxns( rxn_set, temps=temperatures, @@ -81,7 +82,6 @@ def setup_reaction_library( metastability_cutoff: float = 0.1, exclude_theoretical: bool = True, save_to_file: bool = True, - **entry_kwargs ) -> ReactionLibraryData: """Set up phase set and reaction library for a chemical system. @@ -109,9 +109,9 @@ def setup_reaction_library( ensure_phases, metastability_cutoff, exclude_theoretical, - **entry_kwargs ) + # Optionally save reaction library to file reaction_library_path = None if save_to_file: filename = f"reaction_library_{chemical_system.replace('-', '_')}.json" @@ -172,6 +172,7 @@ def run_simulation( recipe_dict = recipe.as_dict() + # Set up phase set and reaction library if reaction_library_data is not None: phase_set = SolidPhaseSet.from_dict(reaction_library_data.phase_set_dict) reaction_lib = ReactionLibrary.from_dict(reaction_library_data.reaction_library_dict) @@ -187,6 +188,7 @@ def run_simulation( chem_sys = chemical_system reaction_library_path = None + # Run simulation if recipe.num_realizations > 1: result_doc = run_sim_parallel( recipe=recipe, @@ -204,6 +206,7 @@ def run_simulation( compress_freq=compress_freq, ) + # Optionally save result doc to file result_doc_path = None if save_to_file: filename = f"result_doc_{chem_sys.replace('-', '_')}.json" @@ -212,12 +215,15 @@ def run_simulation( json.dump(result_doc.as_dict(), f, cls=MontyEncoder) print(f"Saved result doc to {result_doc_path}") + # Analyze results analyzer = BulkReactionAnalyzer.from_result_doc(result_doc) + # Get final molar amounts final_molar_amounts = analyzer.get_all_absolute_molar_amounts( analyzer.last_loaded_step_idx ) + # Build trajectory: molar amounts at each step molar_trajectory: Dict[str, List[float]] = {} temp_trajectory: List[float] = [] step_indices: List[int] = list(analyzer.loaded_step_idxs) From b6d9f39a553758e5e5888a408bf38ce11e62eb78 Mon Sep 17 00:00:00 2001 From: jaeminy00 Date: Fri, 17 Apr 2026 13:43:53 -0700 Subject: [PATCH 09/31] Enhance docstring with example for create_simulation_flow Added example usage for creating a simulation flow in the docstring. --- src/rxn_ca/workflow/flows/core.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/src/rxn_ca/workflow/flows/core.py b/src/rxn_ca/workflow/flows/core.py index a7c9223..54f2f29 100644 --- a/src/rxn_ca/workflow/flows/core.py +++ b/src/rxn_ca/workflow/flows/core.py @@ -41,6 +41,27 @@ def create_simulation_flow( Returns: Flow with setup and simulation jobs, outputting SimulationOutput + + Example: + >>> from rxn_ca.core.recipe import ReactionRecipe + >>> from rxn_ca.core.heating import HeatingSchedule, HeatingStep + >>> from rxn_ca.workflow import create_simulation_flow + >>> + >>> # Create a recipe + >>> heating_steps = HeatingStep.sweep(t0=298, tf=1273, stage_length=1, temp_step_size=50) + >>> heating_sched = HeatingSchedule.build(heating_steps) + >>> recipe = ReactionRecipe( + ... heating_schedule=heating_sched, + ... reactant_amounts={"BaCO3": 1.0, "TiO2": 1.0}, + ... simulation_size=15, + ... ) + >>> + >>> # Create and run the flow + >>> flow = create_simulation_flow( + ... recipe=recipe, + ... chemical_system="Ba-C-O-Ti", + ... ensure_phases=["BaTiO3", "BaCO3", "TiO2"], + ... ) """ from rxn_ca.core.recipe import ReactionRecipe From 9969eace65d48c5f1d7cba6a72d943ab6fd76b80 Mon Sep 17 00:00:00 2001 From: jaeminy00 Date: Fri, 17 Apr 2026 16:55:57 -0700 Subject: [PATCH 10/31] fixed kwargs not correctly being added due to wrong branching --- src/rxn_ca/utilities/bayesian_helpers.py | 89 ------------------------ src/rxn_ca/workflow/flows/ | 88 +++++++++++++++++++++++ src/rxn_ca/workflow/flows/bayesian.py | 2 +- src/rxn_ca/workflow/jobs/core.py | 4 ++ 4 files changed, 93 insertions(+), 90 deletions(-) delete mode 100644 src/rxn_ca/utilities/bayesian_helpers.py create mode 100644 src/rxn_ca/workflow/flows/ diff --git a/src/rxn_ca/utilities/bayesian_helpers.py b/src/rxn_ca/utilities/bayesian_helpers.py deleted file mode 100644 index 394487e..0000000 --- a/src/rxn_ca/utilities/bayesian_helpers.py +++ /dev/null @@ -1,89 +0,0 @@ -"""Helper functions for Bayesian Optimization jobs. - -These are plain functions (not @job decorated) called from within -bo_trial_step in rxn_ca.workflow.jobs. -""" - -from __future__ import annotations - -from typing import Any, Dict, List, Optional - -from rxn_ca.optimization import ( - AnalyzedResult, - FinalProductScorer, - MaximumProductScorer, - OptimizableRecipe, - get_result_analysis, -) - - -def build_recipe_from_params( - params: Dict[str, Any], - precursor_slot_names: List[str], - fixed_precursors: Optional[Dict[str, float]], - simulation_size: int, - num_realizations: int, -) -> OptimizableRecipe: - """Convert a BayBE parameter recommendation into an OptimizableRecipe. - - Args: - params: Parameter dict recommended by BayBE (may contain numpy scalars). - precursor_slot_names: Names of precursor slot parameters in the search - space (e.g. ["li_source", "si_source"]). Used to extract the - selected precursor formula for each slot from params. - fixed_precursors: If provided, use these formula→amount pairs directly - (precursor selection is not optimized). - simulation_size: CA grid size (NxN). - num_realizations: Number of simulation realizations to average. - - Returns: - Configured OptimizableRecipe ready for to_recipe(). - """ - # Normalize numpy scalar types that BayBE returns in its DataFrame - clean_params = { - k: (v.item() if hasattr(v, "item") else v) - for k, v in params.items() - } - - if fixed_precursors is not None: - return OptimizableRecipe( - precursors=fixed_precursors, - hold_temp=int(clean_params["hold_temp"]), - hold_time=int(clean_params["hold_time"]), - ramp_step_time=int(clean_params.get("ramp_step_time", 1)), - simulation_size=simulation_size, - num_simulations=num_realizations, - ) - - # Variable precursors: extract the selected formula for each slot from params. - # We pass actual string values (not None) so OptimizableRecipe.from_params - # populates precursors correctly via its first loop. - precursor_slots = {name: clean_params[name] for name in precursor_slot_names} - - return OptimizableRecipe.from_params( - params=clean_params, - precursor_slots=precursor_slots, - simulation_size=simulation_size, - num_simulations=num_realizations, - ) - - -def _score_result(result_doc: Any, target_phase: str, scorer_type: str) -> float: - """Score a simulation result document. - - Args: - result_doc: RxnCAResultDoc from the simulation. - target_phase: Chemical formula of the target product. - scorer_type: "final" (yield at end) or "maximum" (peak yield). - - Returns: - Float score in [0, 1], or 0.0 if scoring fails. - """ - try: - traces = get_result_analysis(result_doc) - analyzed = AnalyzedResult(traces) - scorer_cls = MaximumProductScorer if scorer_type == "maximum" else FinalProductScorer - return scorer_cls(target_phase).score(analyzed) - except Exception as e: - print(f"Warning: scoring failed ({type(e).__name__}: {e})") - return 0.0 diff --git a/src/rxn_ca/workflow/flows/ b/src/rxn_ca/workflow/flows/ new file mode 100644 index 0000000..b767d53 --- /dev/null +++ b/src/rxn_ca/workflow/flows/ @@ -0,0 +1,88 @@ +#!/usr/bin/env python3 +"""Quick local test for the BOFlowMaker jobflow. + +Runs 2 initial + 2 BO trials on a tiny 5x5 grid with 1 realization. +Uses run_locally() so no FireWorks/MongoDB needed. + +Usage: + python test_bo_flow.py + python test_bo_flow.py --dry-run # just build the flow, no simulation +""" + +from __future__ import annotations + +import argparse +from pathlib import Path + +from rxn_ca.optimization import SearchSpace +from rxn_ca.workflow import BOFlowMaker + + +def build_parser(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--dry-run", + action="store_true", + help="Print flow structure without running simulations.", + ) + parser.add_argument( + "--output-dir", + default="./test_bo_output", + help="Directory for trial outputs.", + ) + return parser + + +def main(): + args = build_parser().parse_args() + + output_dir = Path(args.output_dir).expanduser().resolve() + output_dir.mkdir(parents=True, exist_ok=True) + + # Minimal search space — small ranges so trials are fast + search_space = ( + SearchSpace() + .add_temperature_range(low=900, high=1100, step=100) + .add_hold_time_range(low=30, high=60) + .add_ramp_step_time_range(low=1, high=5) + ) + + maker = BOFlowMaker( + n_initial=2, + n_iterations=2, + simulation_size=5, # tiny grid + num_realizations=1, # single run per trial + live_compress=True, + compress_freq=50, + ) + + flow = maker.make( + chemical_system="Li-Si-O-C", + target_phase="Li4SiO4", + search_space=search_space, + output_dir=str(output_dir), + fixed_precursors={"Li2CO3": 2.0, "SiO2": 1.0}, + thermo_types=["R2SCAN"], + ) + + print(f"Flow: '{flow.name}'") + print(f"Jobs in flow:") + for j in flow.jobs: + print(f" - {j.name} ({j.uuid})") + + if args.dry_run: + print("\n[Dry run] Skipping execution.") + return + + from jobflow import run_locally + print("\nRunning flow locally...") + responses = run_locally(flow, create_folders=True) + + print("\nDone. Check outputs:") + print(f" {output_dir}/history.csv") + print(f" {output_dir}/best_result.json") + print(f" {output_dir}/simulations/") + + +if __name__ == "__main__": + main() diff --git a/src/rxn_ca/workflow/flows/bayesian.py b/src/rxn_ca/workflow/flows/bayesian.py index f55e923..01bad43 100644 --- a/src/rxn_ca/workflow/flows/bayesian.py +++ b/src/rxn_ca/workflow/flows/bayesian.py @@ -163,7 +163,7 @@ def make( metastability_cutoff=self.metastability_cutoff, exclude_theoretical=self.exclude_theoretical, save_to_file=True, - **library_kwargs, + entry_kwargs=library_kwargs.get("entry_kwargs", {}), ) setup_job.name = f"setup_{chemical_system}" diff --git a/src/rxn_ca/workflow/jobs/core.py b/src/rxn_ca/workflow/jobs/core.py index fb1fc02..a9736ff 100644 --- a/src/rxn_ca/workflow/jobs/core.py +++ b/src/rxn_ca/workflow/jobs/core.py @@ -20,6 +20,7 @@ def _build_reaction_library( ensure_phases: List[str] = None, metastability_cutoff: float = 0.1, exclude_theoretical: bool = True, + **entry_kwargs, ) -> tuple: """Build phase set and reaction library for a chemical system. @@ -46,6 +47,7 @@ def _build_reaction_library( metastability_cutoff=metastability_cutoff, ensure_phases=ensure_phases or [], exclude_theoretical_phases=exclude_theoretical, + **entry_kwargs, ) print(f"Got {len(entries)} entries for {chemical_system}") if ensure_phases: @@ -82,6 +84,7 @@ def setup_reaction_library( metastability_cutoff: float = 0.1, exclude_theoretical: bool = True, save_to_file: bool = True, + **library_kwargs, ) -> ReactionLibraryData: """Set up phase set and reaction library for a chemical system. @@ -109,6 +112,7 @@ def setup_reaction_library( ensure_phases, metastability_cutoff, exclude_theoretical, + **library_kwargs, ) # Optionally save reaction library to file From a4017c88e387d9d53f1a4ce66d8fe15ebb569849 Mon Sep 17 00:00:00 2001 From: jaeminy00 Date: Fri, 17 Apr 2026 16:58:36 -0700 Subject: [PATCH 11/31] fixed typo --- src/rxn_ca/workflow/jobs/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rxn_ca/workflow/jobs/core.py b/src/rxn_ca/workflow/jobs/core.py index a9736ff..6a3cd7a 100644 --- a/src/rxn_ca/workflow/jobs/core.py +++ b/src/rxn_ca/workflow/jobs/core.py @@ -11,7 +11,7 @@ from jobflow import job from monty.json import MontyEncoder, MontyDecoder -from .schemas import ReactionLibraryData, SimulationOutput +from ..schemas import ReactionLibraryData, SimulationOutput def _build_reaction_library( From aca9692326f7042a33337c5561ba22d7a7203e0b Mon Sep 17 00:00:00 2001 From: jaeminy00 Date: Fri, 17 Apr 2026 17:06:23 -0700 Subject: [PATCH 12/31] more kwargs error!!! --- src/rxn_ca/workflow/jobs/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rxn_ca/workflow/jobs/core.py b/src/rxn_ca/workflow/jobs/core.py index 6a3cd7a..74486d5 100644 --- a/src/rxn_ca/workflow/jobs/core.py +++ b/src/rxn_ca/workflow/jobs/core.py @@ -47,7 +47,7 @@ def _build_reaction_library( metastability_cutoff=metastability_cutoff, ensure_phases=ensure_phases or [], exclude_theoretical_phases=exclude_theoretical, - **entry_kwargs, + thermo_types=entry_kwargs.get("thermo_types", ["GGA_GGA+U"]) ) print(f"Got {len(entries)} entries for {chemical_system}") if ensure_phases: From 8aa0d69136f7697f802bad2855d46baac931a30c Mon Sep 17 00:00:00 2001 From: jaeminy00 Date: Mon, 20 Apr 2026 13:57:02 -0700 Subject: [PATCH 13/31] fixed KeyError issue arising from using floats instead of ints when setting temperatures; rxn-network uses string instead of float/int so the decimal point caused KeyErrors. --- src/rxn_ca/workflow/flows/bayesian.py | 2 +- src/rxn_ca/workflow/jobs/core.py | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/rxn_ca/workflow/flows/bayesian.py b/src/rxn_ca/workflow/flows/bayesian.py index 01bad43..c958f56 100644 --- a/src/rxn_ca/workflow/flows/bayesian.py +++ b/src/rxn_ca/workflow/flows/bayesian.py @@ -123,7 +123,7 @@ def make( "Add one with search_space.add_temperature_range(...)." ) temperatures = sorted( - set(float(t) for t in np.arange(300, hold_temp_param.high + 1, 100)) + set(int(t) for t in np.arange(300, hold_temp_param.high + 1, 100)) ) # --- Derive precursor slot names from search space --- diff --git a/src/rxn_ca/workflow/jobs/core.py b/src/rxn_ca/workflow/jobs/core.py index 74486d5..46372ad 100644 --- a/src/rxn_ca/workflow/jobs/core.py +++ b/src/rxn_ca/workflow/jobs/core.py @@ -61,13 +61,17 @@ def _build_reaction_library( rxn_set = run_enumerators(enumerators, entries) print(f"Enumerated {len(rxn_set)} reactions") + # rxn_network's G_ELEMS uses integer string keys ('300', '400', ...), + # so temperatures must be ints — float 300.0 becomes '300.0' and KeyErrors. + int_temps = [int(t) for t in temperatures] + # Compute reactions at all temperatures - temp_rxn_mapping = rxn_set.compute_at_temperatures(temperatures) + temp_rxn_mapping = rxn_set.compute_at_temperatures(int_temps) # Score reactions reaction_lib = get_scored_rxns( rxn_set, - temps=temperatures, + temps=int_temps, phase_set=phase_set, rxns_at_temps=temp_rxn_mapping, parallel=True, From e479e7ac868f087607cc572342533afdb410dcac Mon Sep 17 00:00:00 2001 From: jaeminy00 Date: Mon, 20 Apr 2026 16:40:53 -0700 Subject: [PATCH 14/31] fixed mongoDB max memory issue, now passes the path instead of the whole json --- src/rxn_ca/workflow/jobs/bayesian.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/rxn_ca/workflow/jobs/bayesian.py b/src/rxn_ca/workflow/jobs/bayesian.py index dea67e6..d52355a 100644 --- a/src/rxn_ca/workflow/jobs/bayesian.py +++ b/src/rxn_ca/workflow/jobs/bayesian.py @@ -12,12 +12,13 @@ from jobflow import Response, job from ..schemas import ReactionLibraryData - +import os # --------------------------------------------------------------------------- # Helpers (not @job — called inside bo_trial_step) # --------------------------------------------------------------------------- + def build_recipe_from_params( params: Dict[str, Any], precursor_slot_names: List[str], @@ -142,8 +143,9 @@ def init_bo_campaign( print(f"Initialized BayBE Campaign: {search_space}") print(f" n_initial={n_initial}, n_iterations={n_iterations}, target='{target_name}'") - - return {"campaign_json": optimizer._campaign.to_json()} + with open("campaign.json", "w") as f: + json.dump(optimizer._campaign.to_json(), f) + return {"campaign.json": os.getcwd() + "/campaign.json"} @job @@ -211,7 +213,9 @@ def bo_trial_step( print(f"\n=== BO Trial {iteration + 1}/{total_iterations} ===") # --- Step 1: Restore BayBE Campaign (GP posterior preserved) --- - campaign = Campaign.from_json(campaign_json) + with open(campaign_json, "r") as f: + campaign_jsonfile = json.load(f) + campaign = Campaign.from_json(campaign_jsonfile) # --- Step 2: Get next recommendation --- recommendation = campaign.recommend(batch_size=1) @@ -325,7 +329,7 @@ def bo_trial_step( "total_evaluations": total_iterations, } (output_path / "best_result.json").write_text(json.dumps(summary, indent=2)) - print(f"\nOptimization complete.") + print("\nOptimization complete.") print(f"Best score: {best['score']:.4f} at iteration {best['iteration']}") print(f"Best params: {best['params']}") return summary From 843add80d56328e6db162973918f0d8757fc604c Mon Sep 17 00:00:00 2001 From: jaeminy00 Date: Mon, 20 Apr 2026 16:51:05 -0700 Subject: [PATCH 15/31] fixed flows/bayesian.py as well --- src/rxn_ca/workflow/flows/bayesian.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rxn_ca/workflow/flows/bayesian.py b/src/rxn_ca/workflow/flows/bayesian.py index c958f56..28cd252 100644 --- a/src/rxn_ca/workflow/flows/bayesian.py +++ b/src/rxn_ca/workflow/flows/bayesian.py @@ -181,7 +181,7 @@ def make( first_trial = bo_trial_step( iteration=0, total_iterations=total_iterations, - campaign_json=init_job.output["campaign_json"], + campaign_json=init_job.output["campaign.json"], reaction_library_data=setup_job.output, precursor_slot_names=precursor_slot_names, fixed_precursors=fixed_precursors, From 10ca6bfbd3dc2a7322662b0638fe1a9b06c3c672 Mon Sep 17 00:00:00 2001 From: jaeminy00 Date: Tue, 21 Apr 2026 09:59:08 -0700 Subject: [PATCH 16/31] made changes to kwargs logic to make sure everything is passed down correctly, while making sure the reaction library path is exported in BO workflow when it is first generated to prevent mongoDB memory errors. --- src/rxn_ca/workflow/flows/bayesian.py | 10 ++++++++- src/rxn_ca/workflow/jobs/bayesian.py | 12 ++++++++-- src/rxn_ca/workflow/jobs/core.py | 32 ++++++++++++++++++++++----- 3 files changed, 46 insertions(+), 8 deletions(-) diff --git a/src/rxn_ca/workflow/flows/bayesian.py b/src/rxn_ca/workflow/flows/bayesian.py index 28cd252..de916eb 100644 --- a/src/rxn_ca/workflow/flows/bayesian.py +++ b/src/rxn_ca/workflow/flows/bayesian.py @@ -156,6 +156,11 @@ def make( } # --- Job 1: Build reaction library (runs once, shared across all trials) --- + setup_kwargs = dict(library_kwargs) + library_dir = setup_kwargs.pop("library_dir", None) + include_library_dict = setup_kwargs.pop("include_library_dict", False) + entry_kwargs = setup_kwargs.pop("entry_kwargs", {}) + setup_job = setup_reaction_library( chemical_system=chemical_system, temperatures=temperatures, @@ -163,7 +168,10 @@ def make( metastability_cutoff=self.metastability_cutoff, exclude_theoretical=self.exclude_theoretical, save_to_file=True, - entry_kwargs=library_kwargs.get("entry_kwargs", {}), + library_dir=library_dir, + include_library_dict=include_library_dict, + entry_kwargs=entry_kwargs, + **setup_kwargs, ) setup_job.name = f"setup_{chemical_system}" diff --git a/src/rxn_ca/workflow/jobs/bayesian.py b/src/rxn_ca/workflow/jobs/bayesian.py index d52355a..f954d8c 100644 --- a/src/rxn_ca/workflow/jobs/bayesian.py +++ b/src/rxn_ca/workflow/jobs/bayesian.py @@ -241,8 +241,16 @@ def bo_trial_step( print(f"Recipe: {opt_recipe}") # --- Step 4: Load reaction library --- - phase_set = SolidPhaseSet.from_dict(reaction_library_data.phase_set_dict) - rxn_lib = ReactionLibrary.from_dict(reaction_library_data.reaction_library_dict) + if reaction_library_data.reaction_library_path: + rxn_lib = ReactionLibrary.from_file(reaction_library_data.reaction_library_path) + phase_set = rxn_lib.phases + elif reaction_library_data.reaction_library_dict: + phase_set = SolidPhaseSet.from_dict(reaction_library_data.phase_set_dict) + rxn_lib = ReactionLibrary.from_dict(reaction_library_data.reaction_library_dict) + else: + raise ValueError( + "reaction_library_data must include reaction_library_path or reaction_library_dict" + ) # --- Step 5: Run simulation --- print("Running simulation...") diff --git a/src/rxn_ca/workflow/jobs/core.py b/src/rxn_ca/workflow/jobs/core.py index 46372ad..7eba093 100644 --- a/src/rxn_ca/workflow/jobs/core.py +++ b/src/rxn_ca/workflow/jobs/core.py @@ -88,6 +88,9 @@ def setup_reaction_library( metastability_cutoff: float = 0.1, exclude_theoretical: bool = True, save_to_file: bool = True, + library_dir: Optional[str] = None, + include_library_dict: bool = True, + entry_kwargs: Optional[Dict[str, Any]] = None, **library_kwargs, ) -> ReactionLibraryData: """Set up phase set and reaction library for a chemical system. @@ -106,31 +109,42 @@ def setup_reaction_library( metastability_cutoff: Energy above hull cutoff for phases exclude_theoretical: Whether to exclude theoretical phases save_to_file: If True, save reaction library to a JSON file + library_dir: Optional output directory for saved library JSON. + Defaults to current working directory when not provided. + include_library_dict: Whether to include the full serialized + reaction_library_dict in the returned payload. + entry_kwargs: Optional kwargs forwarded to entry/reaction enumeration. Returns: ReactionLibraryData with phase set, reaction library, and metadata """ + build_kwargs: Dict[str, Any] = {} + build_kwargs.update(entry_kwargs or {}) + build_kwargs.update(library_kwargs) + phase_set, reaction_lib = _build_reaction_library( chemical_system, temperatures, ensure_phases, metastability_cutoff, exclude_theoretical, - **library_kwargs, + **build_kwargs, ) # Optionally save reaction library to file reaction_library_path = None if save_to_file: + output_dir = Path(library_dir).expanduser() if library_dir else Path.cwd() + output_dir.mkdir(parents=True, exist_ok=True) filename = f"reaction_library_{chemical_system.replace('-', '_')}.json" - reaction_library_path = str(Path.cwd() / filename) + reaction_library_path = str((output_dir / filename).resolve()) with open(reaction_library_path, "w") as f: json.dump(reaction_lib.as_dict(), f, cls=MontyEncoder) print(f"Saved reaction library to {reaction_library_path}") return ReactionLibraryData( phase_set_dict=phase_set.as_dict(), - reaction_library_dict=reaction_lib.as_dict(), + reaction_library_dict=reaction_lib.as_dict() if include_library_dict else {}, chemical_system=chemical_system, temperatures=temperatures, phases_available=list(phase_set.phases), @@ -182,8 +196,16 @@ def run_simulation( # Set up phase set and reaction library if reaction_library_data is not None: - phase_set = SolidPhaseSet.from_dict(reaction_library_data.phase_set_dict) - reaction_lib = ReactionLibrary.from_dict(reaction_library_data.reaction_library_dict) + if reaction_library_data.reaction_library_path: + reaction_lib = ReactionLibrary.from_file(reaction_library_data.reaction_library_path) + phase_set = reaction_lib.phases + elif reaction_library_data.reaction_library_dict: + phase_set = SolidPhaseSet.from_dict(reaction_library_data.phase_set_dict) + reaction_lib = ReactionLibrary.from_dict(reaction_library_data.reaction_library_dict) + else: + raise ValueError( + "reaction_library_data must include reaction_library_path or reaction_library_dict" + ) chem_sys = reaction_library_data.chemical_system reaction_library_path = reaction_library_data.reaction_library_path else: From db00955a2e3b0c698f379e89ad127e89ab3152c6 Mon Sep 17 00:00:00 2001 From: jaeminy00 Date: Fri, 1 May 2026 15:27:52 -0700 Subject: [PATCH 17/31] fixed a bug where json from 1 trial to another was not writing correctly --- campaign.json | 1 + 1 file changed, 1 insertion(+) create mode 100644 campaign.json diff --git a/campaign.json b/campaign.json new file mode 100644 index 0000000..91b62af --- /dev/null +++ b/campaign.json @@ -0,0 +1 @@ +"{\"type\": \"Campaign\", \"searchspace\": {\"discrete\": {\"parameters\": [{\"type\": \"NumericalDiscreteParameter\", \"name\": \"hold_temp\", \"metadata\": {\"description\": null, \"unit\": null}, \"values\": [900.0, 1000.0, 1100.0], \"tolerance\": 0.0}, {\"type\": \"NumericalDiscreteParameter\", \"name\": \"hold_time\", \"metadata\": {\"description\": null, \"unit\": null}, \"values\": [2.0, 3.0, 4.0], \"tolerance\": 0.0}, {\"type\": \"NumericalDiscreteParameter\", \"name\": \"ramp_step_time\", \"metadata\": {\"description\": null, \"unit\": null}, \"values\": [1.0, 2.0], \"tolerance\": 0.0}], \"exp_rep\": \"gASVwQQAAAAAAACMEXBhbmRhcy5jb3JlLmZyYW1llIwJRGF0YUZyYW1llJOUKYGUfZQojARfbWdylIwecGFuZGFzLmNvcmUuaW50ZXJuYWxzLm1hbmFnZXJzlIwMQmxvY2tNYW5hZ2VylJOUjBZwYW5kYXMuX2xpYnMuaW50ZXJuYWxzlIwPX3VucGlja2xlX2Jsb2NrlJOUjBZudW1weS5fY29yZS5tdWx0aWFycmF5lIwMX3JlY29uc3RydWN0lJOUjAVudW1weZSMB25kYXJyYXmUk5RLAIWUQwFilIeUUpQoSwFLAUsShpRoD4wFZHR5cGWUk5SMAmY4lImIh5RSlChLA4wBPJROTk5K/////0r/////SwB0lGKJQ5AAAAAAAADwPwAAAAAAAABAAAAAAAAA8D8AAAAAAAAAQAAAAAAAAPA/AAAAAAAAAEAAAAAAAADwPwAAAAAAAABAAAAAAAAA8D8AAAAAAAAAQAAAAAAAAPA/AAAAAAAAAEAAAAAAAADwPwAAAAAAAABAAAAAAAAA8D8AAAAAAAAAQAAAAAAAAPA/AAAAAAAAAECUdJRijAhidWlsdGluc5SMBXNsaWNllJOUSwJLA0sBh5RSlEsCh5RSlGgLaA5oEUsAhZRoE4eUUpQoSwFLAUsShpRoG4lDkAAAAAAAAABAAAAAAAAAAEAAAAAAAAAIQAAAAAAAAAhAAAAAAAAAEEAAAAAAAAAQQAAAAAAAAABAAAAAAAAAAEAAAAAAAAAIQAAAAAAAAAhAAAAAAAAAEEAAAAAAAAAQQAAAAAAAAABAAAAAAAAAAEAAAAAAAAAIQAAAAAAAAAhAAAAAAAAAEEAAAAAAAAAQQJR0lGJoIksBSwJLAYeUUpRLAoeUUpRoC2gOaBFLAIWUaBOHlFKUKEsBSwFLEoaUaBuJQ5AAAAAAACCMQAAAAAAAIIxAAAAAAAAgjEAAAAAAACCMQAAAAAAAIIxAAAAAAAAgjEAAAAAAAECPQAAAAAAAQI9AAAAAAABAj0AAAAAAAECPQAAAAAAAQI9AAAAAAABAj0AAAAAAADCRQAAAAAAAMJFAAAAAAAAwkUAAAAAAADCRQAAAAAAAMJFAAAAAAAAwkUCUdJRiaCJLAEsBSwGHlFKUSwKHlFKUh5RdlCiMGHBhbmRhcy5jb3JlLmluZGV4ZXMuYmFzZZSMCl9uZXdfSW5kZXiUk5RoPYwFSW5kZXiUk5R9lCiMBGRhdGGUaA5oEUsAhZRoE4eUUpQoSwFLA4WUaBiMAk84lImIh5RSlChLA4wBfJROTk5K/////0r/////Sz90lGKJXZQojAlob2xkX3RlbXCUjAlob2xkX3RpbWWUjA5yYW1wX3N0ZXBfdGltZZRldJRijARuYW1llE51hpRSlGg/jBlwYW5kYXMuY29yZS5pbmRleGVzLnJhbmdllIwKUmFuZ2VJbmRleJSTlH2UKGhSTowFc3RhcnSUSwCMBHN0b3CUSxKMBHN0ZXCUSwF1hpRSlGWGlFKUjARfdHlwlIwJZGF0YWZyYW1llIwJX21ldGFkYXRhlF2UjAVhdHRyc5R9lIwGX2ZsYWdzlH2UjBdhbGxvd3NfZHVwbGljYXRlX2xhYmVsc5SIc3ViLg==\", \"empty_encoding\": false, \"constraints\": [], \"comp_rep\": \"gASVwQQAAAAAAACMEXBhbmRhcy5jb3JlLmZyYW1llIwJRGF0YUZyYW1llJOUKYGUfZQojARfbWdylIwecGFuZGFzLmNvcmUuaW50ZXJuYWxzLm1hbmFnZXJzlIwMQmxvY2tNYW5hZ2VylJOUjBZwYW5kYXMuX2xpYnMuaW50ZXJuYWxzlIwPX3VucGlja2xlX2Jsb2NrlJOUjBZudW1weS5fY29yZS5tdWx0aWFycmF5lIwMX3JlY29uc3RydWN0lJOUjAVudW1weZSMB25kYXJyYXmUk5RLAIWUQwFilIeUUpQoSwFLAUsShpRoD4wFZHR5cGWUk5SMAmY4lImIh5RSlChLA4wBPJROTk5K/////0r/////SwB0lGKJQ5AAAAAAACCMQAAAAAAAIIxAAAAAAAAgjEAAAAAAACCMQAAAAAAAIIxAAAAAAAAgjEAAAAAAAECPQAAAAAAAQI9AAAAAAABAj0AAAAAAAECPQAAAAAAAQI9AAAAAAABAj0AAAAAAADCRQAAAAAAAMJFAAAAAAAAwkUAAAAAAADCRQAAAAAAAMJFAAAAAAAAwkUCUdJRijAhidWlsdGluc5SMBXNsaWNllJOUSwBLAUsBh5RSlEsCh5RSlGgLaA5oEUsAhZRoE4eUUpQoSwFLAUsShpRoG4lDkAAAAAAAAABAAAAAAAAAAEAAAAAAAAAIQAAAAAAAAAhAAAAAAAAAEEAAAAAAAAAQQAAAAAAAAABAAAAAAAAAAEAAAAAAAAAIQAAAAAAAAAhAAAAAAAAAEEAAAAAAAAAQQAAAAAAAAABAAAAAAAAAAEAAAAAAAAAIQAAAAAAAAAhAAAAAAAAAEEAAAAAAAAAQQJR0lGJoIksBSwJLAYeUUpRLAoeUUpRoC2gOaBFLAIWUaBOHlFKUKEsBSwFLEoaUaBuJQ5AAAAAAAADwPwAAAAAAAABAAAAAAAAA8D8AAAAAAAAAQAAAAAAAAPA/AAAAAAAAAEAAAAAAAADwPwAAAAAAAABAAAAAAAAA8D8AAAAAAAAAQAAAAAAAAPA/AAAAAAAAAEAAAAAAAADwPwAAAAAAAABAAAAAAAAA8D8AAAAAAAAAQAAAAAAAAPA/AAAAAAAAAECUdJRiaCJLAksDSwGHlFKUSwKHlFKUh5RdlCiMGHBhbmRhcy5jb3JlLmluZGV4ZXMuYmFzZZSMCl9uZXdfSW5kZXiUk5RoPYwFSW5kZXiUk5R9lCiMBGRhdGGUaA5oEUsAhZRoE4eUUpQoSwFLA4WUaBiMAk84lImIh5RSlChLA4wBfJROTk5K/////0r/////Sz90lGKJXZQojAlob2xkX3RlbXCUjAlob2xkX3RpbWWUjA5yYW1wX3N0ZXBfdGltZZRldJRijARuYW1llE51hpRSlGg/jBlwYW5kYXMuY29yZS5pbmRleGVzLnJhbmdllIwKUmFuZ2VJbmRleJSTlH2UKGhSTowFc3RhcnSUSwCMBHN0b3CUSxKMBHN0ZXCUSwF1hpRSlGWGlFKUjARfdHlwlIwJZGF0YWZyYW1llIwJX21ldGFkYXRhlF2UjAVhdHRyc5R9lIwGX2ZsYWdzlH2UjBdhbGxvd3NfZHVwbGljYXRlX2xhYmVsc5SIc3ViLg==\"}, \"continuous\": {\"parameters\": [], \"constraints_lin_eq\": [], \"constraints_lin_ineq\": [], \"constraints_nonlin\": []}}, \"objective\": {\"type\": \"SingleTargetObjective\", \"metadata\": {\"description\": null}, \"target\": {\"type\": \"NumericalTarget\", \"name\": \"yield\", \"metadata\": {\"description\": null, \"unit\": null}, \"transformation\": {\"type\": \"IdentityTransformation\"}, \"minimize\": false, \"constructor_info\": null}}, \"recommender\": {\"type\": \"TwoPhaseMetaRecommender\", \"initial_recommender\": {\"type\": \"RandomRecommender\"}, \"recommender\": {\"type\": \"BotorchRecommender\", \"surrogate_model\": {\"type\": \"CompositeSurrogate\", \"surrogates\": {\"type\": \"_ReplicationMapping\", \"template\": {\"type\": \"GaussianProcessSurrogate\", \"kernel_or_factory\": {\"type\": \"DefaultKernelFactory\"}}}}, \"acquisition_function\": null, \"sequential_continuous\": true, \"hybrid_sampler\": null, \"sampling_percentage\": 1.0, \"n_restarts\": 10, \"n_raw_samples\": 64, \"max_n_subspaces\": 10}, \"switch_after\": 1, \"remain_switched\": false, \"_has_switched\": false}, \"allow_recommending_already_measured\": true, \"allow_recommending_already_recommended\": false, \"allow_recommending_pending_experiments\": false, \"searchspace_metadata\": \"gASV2AIAAAAAAACMEXBhbmRhcy5jb3JlLmZyYW1llIwJRGF0YUZyYW1llJOUKYGUfZQojARfbWdylIwecGFuZGFzLmNvcmUuaW50ZXJuYWxzLm1hbmFnZXJzlIwMQmxvY2tNYW5hZ2VylJOUjBZwYW5kYXMuX2xpYnMuaW50ZXJuYWxzlIwPX3VucGlja2xlX2Jsb2NrlJOUjBZudW1weS5fY29yZS5tdWx0aWFycmF5lIwMX3JlY29uc3RydWN0lJOUjAVudW1weZSMB25kYXJyYXmUk5RLAIWUQwFilIeUUpQoSwFLA0sShpRoD4wFZHR5cGWUk5SMAmIxlImIh5RSlChLA4wBfJROTk5K/////0r/////SwB0lGKIQzYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACUdJRijAhidWlsdGluc5SMBXNsaWNllJOUSwBLA0sBh5RSlEsCh5RSlIWUXZQojBhwYW5kYXMuY29yZS5pbmRleGVzLmJhc2WUjApfbmV3X0luZGV4lJOUaCmMBUluZGV4lJOUfZQojARkYXRhlGgOaBFLAIWUaBOHlFKUKEsBSwOFlGgYjAJPOJSJiIeUUpQoSwNoHE5OTkr/////Sv////9LP3SUYoldlCiMC3JlY29tbWVuZGVklIwIbWVhc3VyZWSUjAhleGNsdWRlZJRldJRijARuYW1llE51hpRSlGgrjBlwYW5kYXMuY29yZS5pbmRleGVzLnJhbmdllIwKUmFuZ2VJbmRleJSTlH2UKGg9TowFc3RhcnSUSwCMBHN0b3CUSxKMBHN0ZXCUSwF1hpRSlGWGlFKUjARfdHlwlIwJZGF0YWZyYW1llIwJX21ldGFkYXRhlF2UjAVhdHRyc5R9lIwGX2ZsYWdzlH2UjBdhbGxvd3NfZHVwbGljYXRlX2xhYmVsc5SIc3ViLg==\", \"n_batches_done\": 0, \"n_fits_done\": 0, \"measurements_exp\": \"gASVWwEAAAAAAACMEXBhbmRhcy5jb3JlLmZyYW1llIwJRGF0YUZyYW1llJOUKYGUfZQojARfbWdylIwecGFuZGFzLmNvcmUuaW50ZXJuYWxzLm1hbmFnZXJzlIwMQmxvY2tNYW5hZ2VylJOUKV2UKIwYcGFuZGFzLmNvcmUuaW5kZXhlcy5iYXNllIwKX25ld19JbmRleJSTlIwZcGFuZGFzLmNvcmUuaW5kZXhlcy5yYW5nZZSMClJhbmdlSW5kZXiUk5R9lCiMBG5hbWWUTowFc3RhcnSUSwCMBHN0b3CUSwCMBHN0ZXCUSwF1hpRSlGgMaA99lChoEU5oEksAaBNLAGgUSwF1hpRSlGWGlFKUjARfdHlwlIwJZGF0YWZyYW1llIwJX21ldGFkYXRhlF2UjAVhdHRyc5R9lIwGX2ZsYWdzlH2UjBdhbGxvd3NfZHVwbGljYXRlX2xhYmVsc5SIc3ViLg==\", \"cached_recommendation\": null, \"version\": \"0.14.3\"}" \ No newline at end of file From 575339ed33e262048e787b44acac2ba051e4c075 Mon Sep 17 00:00:00 2001 From: jaeminy00 Date: Fri, 1 May 2026 15:34:56 -0700 Subject: [PATCH 18/31] deleted the weird json file --- campaign.json | 1 - 1 file changed, 1 deletion(-) delete mode 100644 campaign.json diff --git a/campaign.json b/campaign.json deleted file mode 100644 index 91b62af..0000000 --- a/campaign.json +++ /dev/null @@ -1 +0,0 @@ -"{\"type\": \"Campaign\", \"searchspace\": {\"discrete\": {\"parameters\": [{\"type\": \"NumericalDiscreteParameter\", \"name\": \"hold_temp\", \"metadata\": {\"description\": null, \"unit\": null}, \"values\": [900.0, 1000.0, 1100.0], \"tolerance\": 0.0}, {\"type\": \"NumericalDiscreteParameter\", \"name\": \"hold_time\", \"metadata\": {\"description\": null, \"unit\": null}, \"values\": [2.0, 3.0, 4.0], \"tolerance\": 0.0}, {\"type\": \"NumericalDiscreteParameter\", \"name\": \"ramp_step_time\", \"metadata\": {\"description\": null, \"unit\": null}, \"values\": [1.0, 2.0], \"tolerance\": 0.0}], \"exp_rep\": \"gASVwQQAAAAAAACMEXBhbmRhcy5jb3JlLmZyYW1llIwJRGF0YUZyYW1llJOUKYGUfZQojARfbWdylIwecGFuZGFzLmNvcmUuaW50ZXJuYWxzLm1hbmFnZXJzlIwMQmxvY2tNYW5hZ2VylJOUjBZwYW5kYXMuX2xpYnMuaW50ZXJuYWxzlIwPX3VucGlja2xlX2Jsb2NrlJOUjBZudW1weS5fY29yZS5tdWx0aWFycmF5lIwMX3JlY29uc3RydWN0lJOUjAVudW1weZSMB25kYXJyYXmUk5RLAIWUQwFilIeUUpQoSwFLAUsShpRoD4wFZHR5cGWUk5SMAmY4lImIh5RSlChLA4wBPJROTk5K/////0r/////SwB0lGKJQ5AAAAAAAADwPwAAAAAAAABAAAAAAAAA8D8AAAAAAAAAQAAAAAAAAPA/AAAAAAAAAEAAAAAAAADwPwAAAAAAAABAAAAAAAAA8D8AAAAAAAAAQAAAAAAAAPA/AAAAAAAAAEAAAAAAAADwPwAAAAAAAABAAAAAAAAA8D8AAAAAAAAAQAAAAAAAAPA/AAAAAAAAAECUdJRijAhidWlsdGluc5SMBXNsaWNllJOUSwJLA0sBh5RSlEsCh5RSlGgLaA5oEUsAhZRoE4eUUpQoSwFLAUsShpRoG4lDkAAAAAAAAABAAAAAAAAAAEAAAAAAAAAIQAAAAAAAAAhAAAAAAAAAEEAAAAAAAAAQQAAAAAAAAABAAAAAAAAAAEAAAAAAAAAIQAAAAAAAAAhAAAAAAAAAEEAAAAAAAAAQQAAAAAAAAABAAAAAAAAAAEAAAAAAAAAIQAAAAAAAAAhAAAAAAAAAEEAAAAAAAAAQQJR0lGJoIksBSwJLAYeUUpRLAoeUUpRoC2gOaBFLAIWUaBOHlFKUKEsBSwFLEoaUaBuJQ5AAAAAAACCMQAAAAAAAIIxAAAAAAAAgjEAAAAAAACCMQAAAAAAAIIxAAAAAAAAgjEAAAAAAAECPQAAAAAAAQI9AAAAAAABAj0AAAAAAAECPQAAAAAAAQI9AAAAAAABAj0AAAAAAADCRQAAAAAAAMJFAAAAAAAAwkUAAAAAAADCRQAAAAAAAMJFAAAAAAAAwkUCUdJRiaCJLAEsBSwGHlFKUSwKHlFKUh5RdlCiMGHBhbmRhcy5jb3JlLmluZGV4ZXMuYmFzZZSMCl9uZXdfSW5kZXiUk5RoPYwFSW5kZXiUk5R9lCiMBGRhdGGUaA5oEUsAhZRoE4eUUpQoSwFLA4WUaBiMAk84lImIh5RSlChLA4wBfJROTk5K/////0r/////Sz90lGKJXZQojAlob2xkX3RlbXCUjAlob2xkX3RpbWWUjA5yYW1wX3N0ZXBfdGltZZRldJRijARuYW1llE51hpRSlGg/jBlwYW5kYXMuY29yZS5pbmRleGVzLnJhbmdllIwKUmFuZ2VJbmRleJSTlH2UKGhSTowFc3RhcnSUSwCMBHN0b3CUSxKMBHN0ZXCUSwF1hpRSlGWGlFKUjARfdHlwlIwJZGF0YWZyYW1llIwJX21ldGFkYXRhlF2UjAVhdHRyc5R9lIwGX2ZsYWdzlH2UjBdhbGxvd3NfZHVwbGljYXRlX2xhYmVsc5SIc3ViLg==\", \"empty_encoding\": false, \"constraints\": [], \"comp_rep\": \"gASVwQQAAAAAAACMEXBhbmRhcy5jb3JlLmZyYW1llIwJRGF0YUZyYW1llJOUKYGUfZQojARfbWdylIwecGFuZGFzLmNvcmUuaW50ZXJuYWxzLm1hbmFnZXJzlIwMQmxvY2tNYW5hZ2VylJOUjBZwYW5kYXMuX2xpYnMuaW50ZXJuYWxzlIwPX3VucGlja2xlX2Jsb2NrlJOUjBZudW1weS5fY29yZS5tdWx0aWFycmF5lIwMX3JlY29uc3RydWN0lJOUjAVudW1weZSMB25kYXJyYXmUk5RLAIWUQwFilIeUUpQoSwFLAUsShpRoD4wFZHR5cGWUk5SMAmY4lImIh5RSlChLA4wBPJROTk5K/////0r/////SwB0lGKJQ5AAAAAAACCMQAAAAAAAIIxAAAAAAAAgjEAAAAAAACCMQAAAAAAAIIxAAAAAAAAgjEAAAAAAAECPQAAAAAAAQI9AAAAAAABAj0AAAAAAAECPQAAAAAAAQI9AAAAAAABAj0AAAAAAADCRQAAAAAAAMJFAAAAAAAAwkUAAAAAAADCRQAAAAAAAMJFAAAAAAAAwkUCUdJRijAhidWlsdGluc5SMBXNsaWNllJOUSwBLAUsBh5RSlEsCh5RSlGgLaA5oEUsAhZRoE4eUUpQoSwFLAUsShpRoG4lDkAAAAAAAAABAAAAAAAAAAEAAAAAAAAAIQAAAAAAAAAhAAAAAAAAAEEAAAAAAAAAQQAAAAAAAAABAAAAAAAAAAEAAAAAAAAAIQAAAAAAAAAhAAAAAAAAAEEAAAAAAAAAQQAAAAAAAAABAAAAAAAAAAEAAAAAAAAAIQAAAAAAAAAhAAAAAAAAAEEAAAAAAAAAQQJR0lGJoIksBSwJLAYeUUpRLAoeUUpRoC2gOaBFLAIWUaBOHlFKUKEsBSwFLEoaUaBuJQ5AAAAAAAADwPwAAAAAAAABAAAAAAAAA8D8AAAAAAAAAQAAAAAAAAPA/AAAAAAAAAEAAAAAAAADwPwAAAAAAAABAAAAAAAAA8D8AAAAAAAAAQAAAAAAAAPA/AAAAAAAAAEAAAAAAAADwPwAAAAAAAABAAAAAAAAA8D8AAAAAAAAAQAAAAAAAAPA/AAAAAAAAAECUdJRiaCJLAksDSwGHlFKUSwKHlFKUh5RdlCiMGHBhbmRhcy5jb3JlLmluZGV4ZXMuYmFzZZSMCl9uZXdfSW5kZXiUk5RoPYwFSW5kZXiUk5R9lCiMBGRhdGGUaA5oEUsAhZRoE4eUUpQoSwFLA4WUaBiMAk84lImIh5RSlChLA4wBfJROTk5K/////0r/////Sz90lGKJXZQojAlob2xkX3RlbXCUjAlob2xkX3RpbWWUjA5yYW1wX3N0ZXBfdGltZZRldJRijARuYW1llE51hpRSlGg/jBlwYW5kYXMuY29yZS5pbmRleGVzLnJhbmdllIwKUmFuZ2VJbmRleJSTlH2UKGhSTowFc3RhcnSUSwCMBHN0b3CUSxKMBHN0ZXCUSwF1hpRSlGWGlFKUjARfdHlwlIwJZGF0YWZyYW1llIwJX21ldGFkYXRhlF2UjAVhdHRyc5R9lIwGX2ZsYWdzlH2UjBdhbGxvd3NfZHVwbGljYXRlX2xhYmVsc5SIc3ViLg==\"}, \"continuous\": {\"parameters\": [], \"constraints_lin_eq\": [], \"constraints_lin_ineq\": [], \"constraints_nonlin\": []}}, \"objective\": {\"type\": \"SingleTargetObjective\", \"metadata\": {\"description\": null}, \"target\": {\"type\": \"NumericalTarget\", \"name\": \"yield\", \"metadata\": {\"description\": null, \"unit\": null}, \"transformation\": {\"type\": \"IdentityTransformation\"}, \"minimize\": false, \"constructor_info\": null}}, \"recommender\": {\"type\": \"TwoPhaseMetaRecommender\", \"initial_recommender\": {\"type\": \"RandomRecommender\"}, \"recommender\": {\"type\": \"BotorchRecommender\", \"surrogate_model\": {\"type\": \"CompositeSurrogate\", \"surrogates\": {\"type\": \"_ReplicationMapping\", \"template\": {\"type\": \"GaussianProcessSurrogate\", \"kernel_or_factory\": {\"type\": \"DefaultKernelFactory\"}}}}, \"acquisition_function\": null, \"sequential_continuous\": true, \"hybrid_sampler\": null, \"sampling_percentage\": 1.0, \"n_restarts\": 10, \"n_raw_samples\": 64, \"max_n_subspaces\": 10}, \"switch_after\": 1, \"remain_switched\": false, \"_has_switched\": false}, \"allow_recommending_already_measured\": true, \"allow_recommending_already_recommended\": false, \"allow_recommending_pending_experiments\": false, \"searchspace_metadata\": \"gASV2AIAAAAAAACMEXBhbmRhcy5jb3JlLmZyYW1llIwJRGF0YUZyYW1llJOUKYGUfZQojARfbWdylIwecGFuZGFzLmNvcmUuaW50ZXJuYWxzLm1hbmFnZXJzlIwMQmxvY2tNYW5hZ2VylJOUjBZwYW5kYXMuX2xpYnMuaW50ZXJuYWxzlIwPX3VucGlja2xlX2Jsb2NrlJOUjBZudW1weS5fY29yZS5tdWx0aWFycmF5lIwMX3JlY29uc3RydWN0lJOUjAVudW1weZSMB25kYXJyYXmUk5RLAIWUQwFilIeUUpQoSwFLA0sShpRoD4wFZHR5cGWUk5SMAmIxlImIh5RSlChLA4wBfJROTk5K/////0r/////SwB0lGKIQzYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACUdJRijAhidWlsdGluc5SMBXNsaWNllJOUSwBLA0sBh5RSlEsCh5RSlIWUXZQojBhwYW5kYXMuY29yZS5pbmRleGVzLmJhc2WUjApfbmV3X0luZGV4lJOUaCmMBUluZGV4lJOUfZQojARkYXRhlGgOaBFLAIWUaBOHlFKUKEsBSwOFlGgYjAJPOJSJiIeUUpQoSwNoHE5OTkr/////Sv////9LP3SUYoldlCiMC3JlY29tbWVuZGVklIwIbWVhc3VyZWSUjAhleGNsdWRlZJRldJRijARuYW1llE51hpRSlGgrjBlwYW5kYXMuY29yZS5pbmRleGVzLnJhbmdllIwKUmFuZ2VJbmRleJSTlH2UKGg9TowFc3RhcnSUSwCMBHN0b3CUSxKMBHN0ZXCUSwF1hpRSlGWGlFKUjARfdHlwlIwJZGF0YWZyYW1llIwJX21ldGFkYXRhlF2UjAVhdHRyc5R9lIwGX2ZsYWdzlH2UjBdhbGxvd3NfZHVwbGljYXRlX2xhYmVsc5SIc3ViLg==\", \"n_batches_done\": 0, \"n_fits_done\": 0, \"measurements_exp\": \"gASVWwEAAAAAAACMEXBhbmRhcy5jb3JlLmZyYW1llIwJRGF0YUZyYW1llJOUKYGUfZQojARfbWdylIwecGFuZGFzLmNvcmUuaW50ZXJuYWxzLm1hbmFnZXJzlIwMQmxvY2tNYW5hZ2VylJOUKV2UKIwYcGFuZGFzLmNvcmUuaW5kZXhlcy5iYXNllIwKX25ld19JbmRleJSTlIwZcGFuZGFzLmNvcmUuaW5kZXhlcy5yYW5nZZSMClJhbmdlSW5kZXiUk5R9lCiMBG5hbWWUTowFc3RhcnSUSwCMBHN0b3CUSwCMBHN0ZXCUSwF1hpRSlGgMaA99lChoEU5oEksAaBNLAGgUSwF1hpRSlGWGlFKUjARfdHlwlIwJZGF0YWZyYW1llIwJX21ldGFkYXRhlF2UjAVhdHRyc5R9lIwGX2ZsYWdzlH2UjBdhbGxvd3NfZHVwbGljYXRlX2xhYmVsc5SIc3ViLg==\", \"cached_recommendation\": null, \"version\": \"0.14.3\"}" \ No newline at end of file From 9c4f68e98dd1d9fde196b313a075cf1770088d83 Mon Sep 17 00:00:00 2001 From: jaeminy00 Date: Fri, 1 May 2026 15:36:32 -0700 Subject: [PATCH 19/31] Change campaign_json to file path for next trial Update campaign handling to write JSON to a file for next trial. --- src/rxn_ca/workflow/jobs/bayesian.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/rxn_ca/workflow/jobs/bayesian.py b/src/rxn_ca/workflow/jobs/bayesian.py index f954d8c..b0fda32 100644 --- a/src/rxn_ca/workflow/jobs/bayesian.py +++ b/src/rxn_ca/workflow/jobs/bayesian.py @@ -304,13 +304,19 @@ def bo_trial_step( print(f"Warning: could not save full result doc: {e}") # --- Step 9: Chain next trial or finalize --- - new_campaign_json = campaign.to_json() + # Write the updated campaign to a shared-filesystem file so the next + # trial job (running on a different worker) can read it by path. + # Passing campaign.to_json() directly would give a raw JSON string, but + # bo_trial_step expects a file path (open(campaign_json, "r")). + campaign_path = str(output_path / f"campaign_iter_{iteration:03d}.json") + with open(campaign_path, "w") as f: + json.dump(campaign.to_json(), f) if iteration + 1 < total_iterations: next_job = bo_trial_step( iteration=iteration + 1, total_iterations=total_iterations, - campaign_json=new_campaign_json, + campaign_json=campaign_path, reaction_library_data=reaction_library_data, precursor_slot_names=precursor_slot_names, fixed_precursors=fixed_precursors, From 7db99516b3e87769be0ce89654c1cacf9ab816af Mon Sep 17 00:00:00 2001 From: jaeminy00 Date: Fri, 1 May 2026 15:57:20 -0700 Subject: [PATCH 20/31] cherry picked max's edits on precursor selection --- .../optimization/precursor_selection.py | 131 ++++++++++++------ 1 file changed, 89 insertions(+), 42 deletions(-) diff --git a/src/rxn_ca/optimization/precursor_selection.py b/src/rxn_ca/optimization/precursor_selection.py index e474230..bba72ea 100644 --- a/src/rxn_ca/optimization/precursor_selection.py +++ b/src/rxn_ca/optimization/precursor_selection.py @@ -65,11 +65,11 @@ def __post_init__(self): AnionType("phosphate", "PO4", -3, frozenset({"P", "O"})), ] -# Default anion types for typical solid-state synthesis -DEFAULT_PRECURSOR_ANIONS: List[str] = ["oxide", "carbonate", "hydroxide", "nitrate"] +# Default anion types for typical solid-state synthesis (by formula) +DEFAULT_PRECURSOR_ANIONS: List[str] = ["O", "CO3", "OH", "NO3"] -# Anion types useful for metathesis reactions -METATHESIS_ANIONS: List[str] = ["chloride", "bromide", "nitrate", "sulfate", "acetate"] +# Anion types useful for metathesis reactions (by formula) +METATHESIS_ANIONS: List[str] = ["Cl", "Br", "NO3", "SO4", "C2H3O2"] # Counter-cations for metathesis reactions (provide leaving groups) # Maps cation symbol to its oxidation state @@ -81,6 +81,9 @@ def __post_init__(self): "Cs": 1, } +# Build formula -> AnionType lookup (populated after COMMON_ANION_TYPES is defined) +_ANION_BY_FORMULA: Dict[str, "AnionType"] = {} + # ============================================================================= # Precursor formula generation @@ -164,22 +167,49 @@ def generate_precursor_formula( return cation_str + anion_str -def get_anion_by_name(name: str) -> AnionType: - """Get an AnionType by its name. +def _build_anion_lookup() -> None: + """Build the formula -> AnionType lookup dict.""" + for anion in COMMON_ANION_TYPES: + _ANION_BY_FORMULA[anion.formula] = anion + + +def get_anion(identifier: str) -> AnionType: + """Get an AnionType by its formula or name. Args: - name: Anion name (e.g., "oxide", "carbonate") + identifier: Anion formula (e.g., "CO3", "Cl") or name (e.g., "carbonate") Returns: Matching AnionType Raises: - ValueError: If anion name not found + ValueError: If anion not found + + Examples: + >>> get_anion("CO3") + AnionType(name='carbonate', formula='CO3', ...) + >>> get_anion("Cl") + AnionType(name='chloride', formula='Cl', ...) + >>> get_anion("carbonate") # name also works for backwards compat + AnionType(name='carbonate', formula='CO3', ...) """ + # Ensure lookup is populated + if not _ANION_BY_FORMULA: + _build_anion_lookup() + + # Try formula first + if identifier in _ANION_BY_FORMULA: + return _ANION_BY_FORMULA[identifier] + + # Fall back to name lookup for backwards compatibility for anion in COMMON_ANION_TYPES: - if anion.name == name: + if anion.name == identifier: return anion - raise ValueError(f"Unknown anion type: {name}") + + raise ValueError( + f"Unknown anion: '{identifier}'. " + f"Valid formulas: {list(_ANION_BY_FORMULA.keys())}" + ) def generate_practical_precursors( @@ -214,7 +244,7 @@ def generate_practical_precursors( if not oxidation_states: return [] - anions = [get_anion_by_name(name) for name in anion_types] + anions = [get_anion(a) for a in anion_types] precursors = [] seen = set() @@ -253,7 +283,7 @@ def generate_metathesis_sources( if counter_cations is None: counter_cations = ["Na", "K"] - anion = get_anion_by_name(target_anion) + anion = get_anion(target_anion) sources = [] for cation in counter_cations: @@ -266,21 +296,22 @@ def generate_metathesis_sources( return sources -def get_elements_from_anion_types(anion_types: Optional[List[str]] = None) -> Set[str]: - """Get all elements introduced by a set of anion types. +def get_elements_from_anions(anions: Optional[List[str]] = None) -> Set[str]: + """Get all elements introduced by a set of anions. Args: - anion_types: List of anion type names. Defaults to DEFAULT_PRECURSOR_ANIONS. + anions: List of anion formulas (e.g., ["CO3", "Cl"]). + Defaults to DEFAULT_PRECURSOR_ANIONS. Returns: Set of element symbols """ - if anion_types is None: - anion_types = DEFAULT_PRECURSOR_ANIONS + if anions is None: + anions = DEFAULT_PRECURSOR_ANIONS elements: Set[str] = set() - for name in anion_types: - anion = get_anion_by_name(name) + for identifier in anions: + anion = get_anion(identifier) elements.update(anion.elements) return elements @@ -288,20 +319,25 @@ def get_elements_from_anion_types(anion_types: Optional[List[str]] = None) -> Se def get_expanded_elements( target_phase: str, - anion_types: Optional[List[str]] = None, - include_metathesis: bool = True, + anions: Optional[List[str]] = None, + metathesis_anions: Optional[List[str]] = None, + counter_cations: Optional[List[str]] = None, ) -> Set[str]: """Get the full set of elements needed for precursor selection. This expands beyond the target phase elements to include elements from - common precursor anions (e.g., C from carbonates, N from nitrates). + precursor anions (e.g., C from CO3, N from NO3) and optionally metathesis + reagents. Use this to determine what elements to pass to get_entries(). Args: target_phase: Target product formula (e.g., "BaTiO3") - anion_types: Anion types to consider. Defaults to DEFAULT_PRECURSOR_ANIONS. - include_metathesis: If True, also include elements from metathesis anions. + anions: Base anion formulas for precursors. Defaults to ["O", "CO3", "OH", "NO3"]. + metathesis_anions: Additional anions for metathesis (e.g., ["Cl"]). + Defaults to None (no metathesis anions). + counter_cations: Counter-cations for metathesis (e.g., ["Na", "K"]). + Defaults to None (no counter-cations). Returns: Set of element symbols needed for get_entries() @@ -309,30 +345,41 @@ def get_expanded_elements( Examples: >>> get_expanded_elements("BaTiO3") {'Ba', 'Ti', 'O', 'C', 'N', 'H'} - >>> get_expanded_elements("BaTiO3", anion_types=["oxide"]) + >>> get_expanded_elements("BaTiO3", anions=["O"]) {'Ba', 'Ti', 'O'} + >>> get_expanded_elements("BaTiO3", metathesis_anions=["Cl"], counter_cations=["Na"]) + {'Ba', 'Ti', 'O', 'C', 'N', 'H', 'Cl', 'Na'} """ # Start with target phase elements target_comp = Composition(target_phase) elements = {str(el) for el in target_comp.elements} - # Add elements from precursor anions - if anion_types is None: - anion_types = list(DEFAULT_PRECURSOR_ANIONS) - - if include_metathesis: - # Add metathesis anion elements too - anion_types = list(set(anion_types + METATHESIS_ANIONS)) - - elements.update(get_elements_from_anion_types(anion_types)) - - # Add counter-cation elements if including metathesis - if include_metathesis: - elements.update(METATHESIS_COUNTER_CATIONS.keys()) - # Remove NH4 as it's not a real element, add N and H instead - elements.discard("NH4") - elements.add("N") - elements.add("H") + # Add elements from base precursor anions + if anions is None: + anions = list(DEFAULT_PRECURSOR_ANIONS) + + all_anions = list(anions) + + # Add metathesis anions if specified + if metathesis_anions: + all_anions = list(set(all_anions + metathesis_anions)) + + elements.update(get_elements_from_anions(all_anions)) + + # Add counter-cation elements if specified + if counter_cations: + for cation in counter_cations: + if cation == "NH4": + # NH4 is not a real element, add N and H instead + elements.add("N") + elements.add("H") + elif cation not in METATHESIS_COUNTER_CATIONS: + raise ValueError( + f"Unknown counter-cation: '{cation}'. " + f"Valid options: {list(METATHESIS_COUNTER_CATIONS.keys())}" + ) + else: + elements.add(cation) return elements From 99488a864fd93e6043b9a5d5bb5d5884c3214b7f Mon Sep 17 00:00:00 2001 From: jaeminy00 Date: Mon, 4 May 2026 16:10:03 -0700 Subject: [PATCH 21/31] accounting for when job walltime hits and need to re-launch on a new hpc job --- src/rxn_ca/workflow/jobs/bayesian.py | 85 ++++++++++++++++------------ 1 file changed, 49 insertions(+), 36 deletions(-) diff --git a/src/rxn_ca/workflow/jobs/bayesian.py b/src/rxn_ca/workflow/jobs/bayesian.py index b0fda32..fab606c 100644 --- a/src/rxn_ca/workflow/jobs/bayesian.py +++ b/src/rxn_ca/workflow/jobs/bayesian.py @@ -252,50 +252,63 @@ def bo_trial_step( "reaction_library_data must include reaction_library_path or reaction_library_dict" ) - # --- Step 5: Run simulation --- - print("Running simulation...") - if num_realizations > 1: - result_doc = run_sim_parallel( - recipe, - reaction_lib=rxn_lib, - phase_set=phase_set, - live_compress=live_compress, - compress_freq=compress_freq, - ) + # --- Steps 5-6: Run simulation and score --- + # If trial_path already exists this Firework is being re-run after a + # walltime kill that occurred after the simulation completed (e.g. during + # campaign save or Response chaining). Skip the expensive simulation and + # reuse the cached score so the GP posterior stays consistent. + trial_path = sim_dir / f"trial_{iteration:03d}.json" + if trial_path.exists(): + cached = json.loads(trial_path.read_text()) + score = cached["score"] + result_doc = None + print(f"Re-run detected: reusing cached score {score:.4f} from {trial_path.name}") else: - result_doc = run_single_sim( - recipe, - reaction_lib=rxn_lib, - phase_set=phase_set, - live_compress=live_compress, - compress_freq=compress_freq, - ) - - # --- Step 6: Score --- - score = _score_result(result_doc, target_phase, scorer_type) - print(f"Score ({target_phase}, {scorer_type}): {score:.4f}") + print("Running simulation...") + if num_realizations > 1: + result_doc = run_sim_parallel( + recipe, + reaction_lib=rxn_lib, + phase_set=phase_set, + live_compress=live_compress, + compress_freq=compress_freq, + ) + else: + result_doc = run_single_sim( + recipe, + reaction_lib=rxn_lib, + phase_set=phase_set, + live_compress=live_compress, + compress_freq=compress_freq, + ) + score = _score_result(result_doc, target_phase, scorer_type) + print(f"Score ({target_phase}, {scorer_type}): {score:.4f}") # --- Step 7: Tell Campaign --- campaign.add_measurements(pd.DataFrame([{**params, target_name: score}])) # --- Step 8: Save per-trial outputs --- - trial_result = { - "iteration": iteration, - "params": params, - "score": score, - "timestamp": datetime.utcnow().isoformat(), - } - trial_path = sim_dir / f"trial_{iteration:03d}.json" - trial_path.write_text(json.dumps(trial_result, indent=2)) + if not trial_path.exists(): + trial_path.write_text(json.dumps({ + "iteration": iteration, + "params": params, + "score": score, + "timestamp": datetime.utcnow().isoformat(), + }, indent=2)) history_path = output_path / "history.csv" - write_header = not history_path.exists() - fieldnames = ["iteration", "score", *sorted(params.keys())] - with history_path.open("a", newline="") as f: - writer = csv.DictWriter(f, fieldnames=fieldnames) - if write_header: - writer.writeheader() - writer.writerow({"iteration": iteration, "score": score, **params}) + already_logged = history_path.exists() and any( + int(row["iteration"]) == iteration + for row in csv.DictReader(history_path.open()) + ) + if not already_logged: + write_header = not history_path.exists() + fieldnames = ["iteration", "score", *sorted(params.keys())] + with history_path.open("a", newline="") as f: + writer = csv.DictWriter(f, fieldnames=fieldnames) + if write_header: + writer.writeheader() + writer.writerow({"iteration": iteration, "score": score, **params}) if result_doc is not None and hasattr(result_doc, "to_file"): try: From 8b07d00a0d0d7bd72f39e5a3fcd891217db3b2b7 Mon Sep 17 00:00:00 2001 From: jaeminy00 Date: Tue, 5 May 2026 18:15:43 -0700 Subject: [PATCH 22/31] Enable optimization of precursor amounts with ratios Added functionality to optimize individual precursor amounts using ratio parameters. --- src/rxn_ca/workflow/jobs/bayesian.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/rxn_ca/workflow/jobs/bayesian.py b/src/rxn_ca/workflow/jobs/bayesian.py index fab606c..e19e414 100644 --- a/src/rxn_ca/workflow/jobs/bayesian.py +++ b/src/rxn_ca/workflow/jobs/bayesian.py @@ -50,8 +50,15 @@ def build_recipe_from_params( } if fixed_precursors is not None: + # Allow BayBE to optimize individual precursor amounts via + # '{formula}_ratio' parameters in the search space. Formulas without + # a ratio parameter keep their base amount from fixed_precursors. + scaled = { + formula: float(clean_params.get(f"{formula}_ratio", base_amount)) + for formula, base_amount in fixed_precursors.items() + } return OptimizableRecipe( - precursors=fixed_precursors, + precursors=scaled, hold_temp=int(clean_params["hold_temp"]), hold_time=int(clean_params["hold_time"]), ramp_step_time=int(clean_params.get("ramp_step_time", 1)), From 36955a646260d68cf0dd24eda00d7c86697cff9a Mon Sep 17 00:00:00 2001 From: Jaemin Yoo Date: Thu, 7 May 2026 15:03:45 -0700 Subject: [PATCH 23/31] fixed fireworks saving launcher files to wrong directories --- src/rxn_ca/cli.py | 30 ++++++++++++++++- src/rxn_ca/optimization/search_space.py | 15 ++++++--- src/rxn_ca/workflow/flows/bayesian.py | 10 ++++++ src/rxn_ca/workflow/jobs/bayesian.py | 44 ++++++++++++++++++++++++- 4 files changed, 93 insertions(+), 6 deletions(-) diff --git a/src/rxn_ca/cli.py b/src/rxn_ca/cli.py index 3947b38..fbb25eb 100644 --- a/src/rxn_ca/cli.py +++ b/src/rxn_ca/cli.py @@ -293,15 +293,43 @@ def suggest_precursors(): action='store_true', help='Output as JSON instead of human-readable format', ) + parser.add_argument( + '--anions', + type=str, + default=None, + help='Comma-separated anion formulas for precursors (default: O,CO3,OH,NO3)', + ) + parser.add_argument( + '--metathesis', + type=str, + default=None, + help='Comma-separated metathesis anion formulas (e.g., Cl or Cl,Br)', + ) + parser.add_argument( + '--counter-cations', + type=str, + default=None, + help='Comma-separated counter-cations for metathesis (e.g., Na,K)', + ) args = parser.parse_args() target = args.target print(f"Finding precursor combinations for: {target}", file=sys.stderr) + # Parse anion/cation arguments + anions = args.anions.split(',') if args.anions else None + metathesis_anions = args.metathesis.split(',') if args.metathesis else None + counter_cations = args.counter_cations.split(',') if args.counter_cations else None + # Expand elements to include precursor anions (C, N, H, etc.) print("Expanding element set for precursor phases...", file=sys.stderr) - elements = get_expanded_elements(target) + elements = get_expanded_elements( + target, + anions=anions, + metathesis_anions=metathesis_anions, + counter_cations=counter_cations, + ) print(f" Elements: {', '.join(sorted(elements))}", file=sys.stderr) # Fetch entries from Materials Project diff --git a/src/rxn_ca/optimization/search_space.py b/src/rxn_ca/optimization/search_space.py index eb7d97e..d6ad2b0 100644 --- a/src/rxn_ca/optimization/search_space.py +++ b/src/rxn_ca/optimization/search_space.py @@ -124,19 +124,26 @@ def add_precursor_ratio( name: str, low: float, high: float, + step: float = 0.05, ) -> "SearchSpace": - """Add a precursor ratio parameter. + """Add a precursor ratio parameter (discrete with given step size). + + Discrete rather than continuous: BayBE enumerates all candidate values + and selects via Thompson Sampling, avoiding the L-BFGS-B boundary-hang + that occurs when a continuous parameter's optimum sits at its lower bound + (e.g. the stoichiometric ratio is also the minimum of the search range). Args: name: Parameter name (should match a precursor slot name + "_ratio") - low: Minimum ratio (typically 0-1) - high: Maximum ratio (typically 0-1) + low: Minimum ratio value + high: Maximum ratio value + step: Grid spacing between ratio values (default 0.05) Returns: self for method chaining """ ratio_name = f"{name}_ratio" if not name.endswith("_ratio") else name - param = ContinuousParameter(name=ratio_name, low=low, high=high) + param = DiscreteParameter(name=ratio_name, low=low, high=high, step=step) return self._add_parameter(param) def add_continuous( diff --git a/src/rxn_ca/workflow/flows/bayesian.py b/src/rxn_ca/workflow/flows/bayesian.py index de916eb..c00947a 100644 --- a/src/rxn_ca/workflow/flows/bayesian.py +++ b/src/rxn_ca/workflow/flows/bayesian.py @@ -83,6 +83,7 @@ def make( output_dir: str, fixed_precursors: Optional[Dict[str, float]] = None, ensure_phases: Optional[List[str]] = None, + fw_category: Optional[str] = None, **library_kwargs, ) -> Flow: """Build a Bayesian Optimization Flow for the given chemical system. @@ -100,6 +101,12 @@ def make( is). Mutually exclusive with precursor slots in search_space. ensure_phases: Phases that must be present in the reaction library. Defaults to target_phase + all precursor candidates. + fw_category: FireWorks _category tag. When provided, every trial + job (including dynamically-added ones) carries this tag so + workers launched with `rlaunch --category ` only run + Fireworks belonging to this workflow. Generated automatically + by optimize_synthesis_general.py and written to + {output_dir}/fw_category.txt. **library_kwargs: Forwarded to setup_reaction_library (e.g. thermo_types=["R2SCAN"]). @@ -195,7 +202,10 @@ def make( fixed_precursors=fixed_precursors, objective_config=objective_config, output_dir=output_dir, + fw_category=fw_category, ) + if fw_category: + first_trial.update_config({"manager_config": {"_category": fw_category}}) first_trial.name = "bo_trial_000" return Flow( diff --git a/src/rxn_ca/workflow/jobs/bayesian.py b/src/rxn_ca/workflow/jobs/bayesian.py index e19e414..f0ed44b 100644 --- a/src/rxn_ca/workflow/jobs/bayesian.py +++ b/src/rxn_ca/workflow/jobs/bayesian.py @@ -4,6 +4,7 @@ import csv import json +import signal from datetime import datetime from pathlib import Path from typing import Any, Dict, List, Optional @@ -11,6 +12,16 @@ import pandas as pd from jobflow import Response, job +_RECOMMEND_TIMEOUT_SECONDS = 300 # fall back to random after 5 min + + +class _RecommendTimeout(Exception): + pass + + +def _alarm_handler(signum, frame): + raise _RecommendTimeout() + from ..schemas import ReactionLibraryData import os @@ -165,6 +176,7 @@ def bo_trial_step( fixed_precursors: Optional[Dict[str, float]], objective_config: dict, output_dir: str, + fw_category: Optional[str] = None, ) -> Response: """Run one Bayesian optimization trial and chain the next. @@ -194,6 +206,9 @@ def bo_trial_step( live_compress (bool), compress_freq (int). output_dir: Shared filesystem path for per-trial JSON, history.csv, and best_result.json. Must be accessible from all worker nodes. + fw_category: FireWorks _category tag for this workflow. When set, + propagated to each dynamically-added next trial so workers + filtered with `rlaunch --category ` only run this workflow. Returns: Response(addition=) if more trials remain, @@ -225,7 +240,31 @@ def bo_trial_step( campaign = Campaign.from_json(campaign_jsonfile) # --- Step 2: Get next recommendation --- - recommendation = campaign.recommend(batch_size=1) + # Guard against the acquisition-function optimiser hanging at a continuous + # parameter boundary (L-BFGS-B can spin forever when the GP posterior peak + # sits exactly on the lower/upper bound). After _RECOMMEND_TIMEOUT_SECONDS + # we fall back to RandomRecommender so the trial can still run. + signal.signal(signal.SIGALRM, _alarm_handler) + signal.alarm(_RECOMMEND_TIMEOUT_SECONDS) + try: + recommendation = campaign.recommend(batch_size=1) + signal.alarm(0) + except _RecommendTimeout: + signal.alarm(0) + print( + f"Warning: campaign.recommend() timed out after " + f"{_RECOMMEND_TIMEOUT_SECONDS}s. " + "Falling back to random recommendation. " + "Consider re-registering the workflow with discrete ratio parameters " + "to prevent this (see search_space.add_precursor_ratio)." + ) + from baybe.recommenders.pure.nonpredictive.sampling import RandomRecommender + recommendation = RandomRecommender().recommend( + batch_size=1, + searchspace=campaign.searchspace, + objective=campaign.objective, + measurements=campaign.measurements, + ) params = { col: ( recommendation.iloc[0][col].item() @@ -342,7 +381,10 @@ def bo_trial_step( fixed_precursors=fixed_precursors, objective_config=objective_config, output_dir=output_dir, + fw_category=fw_category, ) + if fw_category: + next_job.update_config({"manager_config": {"_category": fw_category}}) return Response(addition=next_job) # Final iteration: collect all trial results and write summary From 7b2456590bffdb453c24021ccb4bc1ca54a05ae3 Mon Sep 17 00:00:00 2001 From: Jaemin Yoo Date: Sun, 24 May 2026 15:31:56 -0700 Subject: [PATCH 24/31] changes to make sure the correct job is picked up by fireworks; modified workflow to build 1 common reaction library and share --- src/rxn_ca/utilities/get_scored_rxns.py | 13 +++- src/rxn_ca/workflow/flows/bayesian.py | 81 +++++++++++++++++++++++++ 2 files changed, 91 insertions(+), 3 deletions(-) diff --git a/src/rxn_ca/utilities/get_scored_rxns.py b/src/rxn_ca/utilities/get_scored_rxns.py index d305c84..f0e4e22 100644 --- a/src/rxn_ca/utilities/get_scored_rxns.py +++ b/src/rxn_ca/utilities/get_scored_rxns.py @@ -1,3 +1,7 @@ +import os +import multiprocessing as mp +from typing import List + from rxn_network.reactions.reaction_set import ReactionSet from ..core import HeatingSchedule @@ -6,11 +10,14 @@ from ..reactions import ReactionLibrary, ScoredReaction, ScoredReactionSet, score_rxns from ..reactions.scorers import BasicScore, TammanScore -from typing import List +_scoring_globals = {} -import multiprocessing as mp -_scoring_globals = {} +def _pool_initializer(data: dict): + """Populate worker-process globals (called by forkserver/spawn pool workers).""" + global _scoring_globals + _scoring_globals.update(data) + def fn(temp): score_class = _scoring_globals.get('score_class') diff --git a/src/rxn_ca/workflow/flows/bayesian.py b/src/rxn_ca/workflow/flows/bayesian.py index c00947a..fc8de56 100644 --- a/src/rxn_ca/workflow/flows/bayesian.py +++ b/src/rxn_ca/workflow/flows/bayesian.py @@ -212,3 +212,84 @@ def make( [setup_job, init_job, first_trial], name=flow_name, ) + + def make_campaign( + self, + chemical_system: str, + target_phase: str, + search_space: SearchSpace, + output_dir: str, + reaction_library_data, + fixed_precursors: Optional[Dict[str, float]] = None, + fw_category: Optional[str] = None, + ) -> Flow: + """Build a BO campaign Flow using a pre-built reaction library. + + Like make(), but accepts an existing reaction_library_data (or a + jobflow OutputReference to one) instead of creating a + setup_reaction_library job internally. Use this when sharing one + reaction library across multiple campaigns for the same chemical system. + + Args: + chemical_system: Element system string, e.g. "Na-Mo-Cd-N-O". + target_phase: Formula of the target product. + search_space: Configured SearchSpace. + output_dir: Shared filesystem path for trial outputs + (history.csv, best_result.json, simulations/). + reaction_library_data: A ReactionLibraryData object or a jobflow + OutputReference (e.g. setup_job.output) that resolves to one. + fixed_precursors: Formula → molar amount map. When provided, + precursor selection is not optimized. + fw_category: FireWorks _category tag. When provided, every job in + this campaign carries the tag so workers with a matching + fworker.yaml only run this campaign's Fireworks. + + Returns: + Flow containing init and first trial jobs only (no setup job). + """ + flow_name = f"{self.name}_{target_phase}_{chemical_system}" + output_dir = str(Path(output_dir).expanduser().resolve()) + total_iterations = self.n_initial + self.n_iterations + + precursor_slot_names = [p.name for p in search_space.precursor_parameters] + + if fixed_precursors is not None and precursor_slot_names: + raise ValueError( + "Provide either fixed_precursors or precursor slots in search_space, not both." + ) + + objective_config = { + "target_phase": target_phase, + "scorer_type": self.scorer_type, + "simulation_size": self.simulation_size, + "num_realizations": self.num_realizations, + "live_compress": self.live_compress, + "compress_freq": self.compress_freq, + "target_name": "yield", + } + + init_job = init_bo_campaign( + search_space_config=search_space.as_dict(), + n_initial=self.n_initial, + n_iterations=self.n_iterations, + ) + init_job.name = "init_bo_campaign" + if fw_category: + init_job.update_config({"manager_config": {"_category": fw_category}}) + + first_trial = bo_trial_step( + iteration=0, + total_iterations=total_iterations, + campaign_json=init_job.output["campaign.json"], + reaction_library_data=reaction_library_data, + precursor_slot_names=precursor_slot_names, + fixed_precursors=fixed_precursors, + objective_config=objective_config, + output_dir=output_dir, + fw_category=fw_category, + ) + if fw_category: + first_trial.update_config({"manager_config": {"_category": fw_category}}) + first_trial.name = "bo_trial_000" + + return Flow([init_job, first_trial], name=flow_name) From e3e71209ecbe2b29c00b0acb9b6620b178409fcf Mon Sep 17 00:00:00 2001 From: Jaemin Yoo Date: Tue, 26 May 2026 10:13:31 -0700 Subject: [PATCH 25/31] jobflow job naming convention fixes --- src/rxn_ca/workflow/flows/bayesian.py | 29 +++++++++++++++++++++++---- src/rxn_ca/workflow/jobs/bayesian.py | 5 +++++ 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/src/rxn_ca/workflow/flows/bayesian.py b/src/rxn_ca/workflow/flows/bayesian.py index fc8de56..6ffaebb 100644 --- a/src/rxn_ca/workflow/flows/bayesian.py +++ b/src/rxn_ca/workflow/flows/bayesian.py @@ -84,6 +84,7 @@ def make( fixed_precursors: Optional[Dict[str, float]] = None, ensure_phases: Optional[List[str]] = None, fw_category: Optional[str] = None, + metadata: Optional[Dict] = None, **library_kwargs, ) -> Flow: """Build a Bayesian Optimization Flow for the given chemical system. @@ -190,6 +191,13 @@ def make( ) init_job.name = "init_bo_campaign" + # --- Merge user metadata with auto-derived fields --- + _metadata = { + "target_phase": target_phase, + "chemical_system": chemical_system, + **(metadata or {}), + } + # --- Job 3: First BO trial --- # campaign_json and reaction_library_data are job output references — # jobflow resolves them at runtime after the upstream jobs complete. @@ -203,15 +211,18 @@ def make( objective_config=objective_config, output_dir=output_dir, fw_category=fw_category, + metadata=_metadata, ) if fw_category: first_trial.update_config({"manager_config": {"_category": fw_category}}) - first_trial.name = "bo_trial_000" + first_trial.name = "bo_trial_step_000" - return Flow( + flow = Flow( [setup_job, init_job, first_trial], name=flow_name, ) + flow.update_metadata(_metadata) + return flow def make_campaign( self, @@ -222,6 +233,7 @@ def make_campaign( reaction_library_data, fixed_precursors: Optional[Dict[str, float]] = None, fw_category: Optional[str] = None, + metadata: Optional[Dict] = None, ) -> Flow: """Build a BO campaign Flow using a pre-built reaction library. @@ -277,6 +289,12 @@ def make_campaign( if fw_category: init_job.update_config({"manager_config": {"_category": fw_category}}) + _metadata = { + "target_phase": target_phase, + "chemical_system": chemical_system, + **(metadata or {}), + } + first_trial = bo_trial_step( iteration=0, total_iterations=total_iterations, @@ -287,9 +305,12 @@ def make_campaign( objective_config=objective_config, output_dir=output_dir, fw_category=fw_category, + metadata=_metadata, ) if fw_category: first_trial.update_config({"manager_config": {"_category": fw_category}}) - first_trial.name = "bo_trial_000" + first_trial.name = "bo_trial_step_000" - return Flow([init_job, first_trial], name=flow_name) + flow = Flow([init_job, first_trial], name=flow_name) + flow.update_metadata(_metadata) + return flow diff --git a/src/rxn_ca/workflow/jobs/bayesian.py b/src/rxn_ca/workflow/jobs/bayesian.py index f0ed44b..c3aff5a 100644 --- a/src/rxn_ca/workflow/jobs/bayesian.py +++ b/src/rxn_ca/workflow/jobs/bayesian.py @@ -177,6 +177,7 @@ def bo_trial_step( objective_config: dict, output_dir: str, fw_category: Optional[str] = None, + metadata: Optional[Dict] = None, ) -> Response: """Run one Bayesian optimization trial and chain the next. @@ -382,9 +383,13 @@ def bo_trial_step( objective_config=objective_config, output_dir=output_dir, fw_category=fw_category, + metadata=metadata, ) if fw_category: next_job.update_config({"manager_config": {"_category": fw_category}}) + next_job.name = f"bo_trial_step_{iteration + 1:03d}" + if metadata: + next_job.update_metadata(metadata) return Response(addition=next_job) # Final iteration: collect all trial results and write summary From a1463e49d746e46e3a44862b6333d372a910e2a7 Mon Sep 17 00:00:00 2001 From: jaeminy00 Date: Tue, 26 May 2026 15:20:28 -0700 Subject: [PATCH 26/31] Fix Ray over-subscription deadlock on SLURM shared nodes Ray auto-detects full node CPU count (e.g. 256) instead of the SLURM- allocated CPUs, causing it to spawn task workers that hang on startup. Initialize Ray with SLURM_NTASKS * SLURM_CPUS_PER_TASK before run_enumerators / get_scored_rxns to keep tasks within the allocated pool. --- src/rxn_ca/workflow/jobs/core.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/rxn_ca/workflow/jobs/core.py b/src/rxn_ca/workflow/jobs/core.py index 7eba093..5614ad4 100644 --- a/src/rxn_ca/workflow/jobs/core.py +++ b/src/rxn_ca/workflow/jobs/core.py @@ -56,6 +56,18 @@ def _build_reaction_library( # Create phase set phase_set = SolidPhaseSet.from_entry_set(entries) + # Initialize Ray with the actually-allocated SLURM CPUs before dispatching + # any Ray tasks. Without this, Ray detects the full node CPU count and tries + # to spawn hundreds of new task workers that hang on a shared Perlmutter node. + import os + import ray + if not ray.is_initialized(): + num_cpus = ( + int(os.environ.get("SLURM_NTASKS", 1)) + * int(os.environ.get("SLURM_CPUS_PER_TASK", 1)) + ) + ray.init(num_cpus=num_cpus, ignore_reinit_error=True) + # Enumerate reactions enumerators = [MinimizeGibbsEnumerator(), BasicEnumerator()] rxn_set = run_enumerators(enumerators, entries) From cefba4ea6a23b6505bd8e9d974504d7b80708058 Mon Sep 17 00:00:00 2001 From: jaeminy00 Date: Wed, 27 May 2026 14:26:48 -0700 Subject: [PATCH 27/31] changed default compress_freq to 500, instead of 1, for BO workflows to improve speed. --- src/rxn_ca/optimization/objective.py | 2 +- src/rxn_ca/workflow/flows/ | 88 --------------------------- src/rxn_ca/workflow/flows/bayesian.py | 2 +- src/rxn_ca/workflow/jobs/bayesian.py | 2 +- src/rxn_ca/workflow/jobs/core.py | 2 +- 5 files changed, 4 insertions(+), 92 deletions(-) delete mode 100644 src/rxn_ca/workflow/flows/ diff --git a/src/rxn_ca/optimization/objective.py b/src/rxn_ca/optimization/objective.py index 305a0e4..fd2efb6 100644 --- a/src/rxn_ca/optimization/objective.py +++ b/src/rxn_ca/optimization/objective.py @@ -49,7 +49,7 @@ class ObjectiveConfig: num_realizations: int = 3 cache_results: bool = True live_compress: bool = True - compress_freq: int = 50 + compress_freq: int = 500 class ObjectiveFunction: diff --git a/src/rxn_ca/workflow/flows/ b/src/rxn_ca/workflow/flows/ deleted file mode 100644 index b767d53..0000000 --- a/src/rxn_ca/workflow/flows/ +++ /dev/null @@ -1,88 +0,0 @@ -#!/usr/bin/env python3 -"""Quick local test for the BOFlowMaker jobflow. - -Runs 2 initial + 2 BO trials on a tiny 5x5 grid with 1 realization. -Uses run_locally() so no FireWorks/MongoDB needed. - -Usage: - python test_bo_flow.py - python test_bo_flow.py --dry-run # just build the flow, no simulation -""" - -from __future__ import annotations - -import argparse -from pathlib import Path - -from rxn_ca.optimization import SearchSpace -from rxn_ca.workflow import BOFlowMaker - - -def build_parser(): - parser = argparse.ArgumentParser() - parser.add_argument( - "--dry-run", - action="store_true", - help="Print flow structure without running simulations.", - ) - parser.add_argument( - "--output-dir", - default="./test_bo_output", - help="Directory for trial outputs.", - ) - return parser - - -def main(): - args = build_parser().parse_args() - - output_dir = Path(args.output_dir).expanduser().resolve() - output_dir.mkdir(parents=True, exist_ok=True) - - # Minimal search space — small ranges so trials are fast - search_space = ( - SearchSpace() - .add_temperature_range(low=900, high=1100, step=100) - .add_hold_time_range(low=30, high=60) - .add_ramp_step_time_range(low=1, high=5) - ) - - maker = BOFlowMaker( - n_initial=2, - n_iterations=2, - simulation_size=5, # tiny grid - num_realizations=1, # single run per trial - live_compress=True, - compress_freq=50, - ) - - flow = maker.make( - chemical_system="Li-Si-O-C", - target_phase="Li4SiO4", - search_space=search_space, - output_dir=str(output_dir), - fixed_precursors={"Li2CO3": 2.0, "SiO2": 1.0}, - thermo_types=["R2SCAN"], - ) - - print(f"Flow: '{flow.name}'") - print(f"Jobs in flow:") - for j in flow.jobs: - print(f" - {j.name} ({j.uuid})") - - if args.dry_run: - print("\n[Dry run] Skipping execution.") - return - - from jobflow import run_locally - print("\nRunning flow locally...") - responses = run_locally(flow, create_folders=True) - - print("\nDone. Check outputs:") - print(f" {output_dir}/history.csv") - print(f" {output_dir}/best_result.json") - print(f" {output_dir}/simulations/") - - -if __name__ == "__main__": - main() diff --git a/src/rxn_ca/workflow/flows/bayesian.py b/src/rxn_ca/workflow/flows/bayesian.py index 6ffaebb..a273956 100644 --- a/src/rxn_ca/workflow/flows/bayesian.py +++ b/src/rxn_ca/workflow/flows/bayesian.py @@ -71,7 +71,7 @@ def make(self, systems: list, ...) -> Flow: simulation_size: int = 10 num_realizations: int = 3 live_compress: bool = True - compress_freq: int = 50 + compress_freq: int = 500 metastability_cutoff: float = 0.1 exclude_theoretical: bool = True diff --git a/src/rxn_ca/workflow/jobs/bayesian.py b/src/rxn_ca/workflow/jobs/bayesian.py index c3aff5a..decc373 100644 --- a/src/rxn_ca/workflow/jobs/bayesian.py +++ b/src/rxn_ca/workflow/jobs/bayesian.py @@ -226,7 +226,7 @@ def bo_trial_step( simulation_size = objective_config["simulation_size"] num_realizations = objective_config["num_realizations"] live_compress = objective_config.get("live_compress", True) - compress_freq = objective_config.get("compress_freq", 50) + compress_freq = objective_config.get("compress_freq", 500) target_name = objective_config.get("target_name", "yield") output_path = Path(output_dir) diff --git a/src/rxn_ca/workflow/jobs/core.py b/src/rxn_ca/workflow/jobs/core.py index 5614ad4..7f0571f 100644 --- a/src/rxn_ca/workflow/jobs/core.py +++ b/src/rxn_ca/workflow/jobs/core.py @@ -174,7 +174,7 @@ def run_simulation( save_to_file: bool = True, metadata: Dict[str, Any] = None, live_compress: bool = True, - compress_freq: int = 100, + compress_freq: int = 500, ) -> SimulationOutput: """Run an rxn-ca simulation. From 15bc4c3f217949e3c06ecde0b3ae731e106354d2 Mon Sep 17 00:00:00 2001 From: jaeminy00 Date: Thu, 28 May 2026 19:11:14 -0700 Subject: [PATCH 28/31] changed default metastability cutoff from 0.1 to 0.03 --- src/rxn_ca/workflow/flows/bayesian.py | 2 +- src/rxn_ca/workflow/jobs/core.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/rxn_ca/workflow/flows/bayesian.py b/src/rxn_ca/workflow/flows/bayesian.py index a273956..484f542 100644 --- a/src/rxn_ca/workflow/flows/bayesian.py +++ b/src/rxn_ca/workflow/flows/bayesian.py @@ -72,7 +72,7 @@ def make(self, systems: list, ...) -> Flow: num_realizations: int = 3 live_compress: bool = True compress_freq: int = 500 - metastability_cutoff: float = 0.1 + metastability_cutoff: float = 0.03 exclude_theoretical: bool = True def make( diff --git a/src/rxn_ca/workflow/jobs/core.py b/src/rxn_ca/workflow/jobs/core.py index 7f0571f..6810278 100644 --- a/src/rxn_ca/workflow/jobs/core.py +++ b/src/rxn_ca/workflow/jobs/core.py @@ -97,7 +97,7 @@ def setup_reaction_library( chemical_system: str, temperatures: List[float], ensure_phases: List[str] = None, - metastability_cutoff: float = 0.1, + metastability_cutoff: float = 0.03, exclude_theoretical: bool = True, save_to_file: bool = True, library_dir: Optional[str] = None, @@ -170,7 +170,7 @@ def run_simulation( reaction_library_data: ReactionLibraryData = None, chemical_system: str = None, ensure_phases: List[str] = None, - metastability_cutoff: float = 0.1, + metastability_cutoff: float = 0.03, save_to_file: bool = True, metadata: Dict[str, Any] = None, live_compress: bool = True, From 6cca6099a23a29d439804d63cd74c154845f409b Mon Sep 17 00:00:00 2001 From: jaeminy00 Date: Tue, 2 Jun 2026 13:46:15 -0700 Subject: [PATCH 29/31] Removed the Ray initialization This was added previously but it's nolonger needed and is causing errors. Therefore reverting it back. --- src/rxn_ca/workflow/jobs/core.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/rxn_ca/workflow/jobs/core.py b/src/rxn_ca/workflow/jobs/core.py index 5614ad4..eb712ed 100644 --- a/src/rxn_ca/workflow/jobs/core.py +++ b/src/rxn_ca/workflow/jobs/core.py @@ -59,14 +59,14 @@ def _build_reaction_library( # Initialize Ray with the actually-allocated SLURM CPUs before dispatching # any Ray tasks. Without this, Ray detects the full node CPU count and tries # to spawn hundreds of new task workers that hang on a shared Perlmutter node. - import os - import ray - if not ray.is_initialized(): - num_cpus = ( - int(os.environ.get("SLURM_NTASKS", 1)) - * int(os.environ.get("SLURM_CPUS_PER_TASK", 1)) - ) - ray.init(num_cpus=num_cpus, ignore_reinit_error=True) +# import os +# import ray +# if not ray.is_initialized(): +# num_cpus = ( +# int(os.environ.get("SLURM_NTASKS", 1)) +# * int(os.environ.get("SLURM_CPUS_PER_TASK", 1)) +# ) +# ray.init(num_cpus=num_cpus, ignore_reinit_error=True) # Enumerate reactions enumerators = [MinimizeGibbsEnumerator(), BasicEnumerator()] From b7237e167dbd8cdb926cb213314c618da3043511 Mon Sep 17 00:00:00 2001 From: jaeminy00 Date: Tue, 2 Jun 2026 15:25:20 -0700 Subject: [PATCH 30/31] Removed Ray Initialization line as it's not functional --- src/rxn_ca/workflow/jobs/core.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/src/rxn_ca/workflow/jobs/core.py b/src/rxn_ca/workflow/jobs/core.py index eb712ed..7eba093 100644 --- a/src/rxn_ca/workflow/jobs/core.py +++ b/src/rxn_ca/workflow/jobs/core.py @@ -56,18 +56,6 @@ def _build_reaction_library( # Create phase set phase_set = SolidPhaseSet.from_entry_set(entries) - # Initialize Ray with the actually-allocated SLURM CPUs before dispatching - # any Ray tasks. Without this, Ray detects the full node CPU count and tries - # to spawn hundreds of new task workers that hang on a shared Perlmutter node. -# import os -# import ray -# if not ray.is_initialized(): -# num_cpus = ( -# int(os.environ.get("SLURM_NTASKS", 1)) -# * int(os.environ.get("SLURM_CPUS_PER_TASK", 1)) -# ) -# ray.init(num_cpus=num_cpus, ignore_reinit_error=True) - # Enumerate reactions enumerators = [MinimizeGibbsEnumerator(), BasicEnumerator()] rxn_set = run_enumerators(enumerators, entries) From 74bcf328ab85367cb5ebbb8b7017e93e445575fe Mon Sep 17 00:00:00 2001 From: jaeminy00 Date: Tue, 2 Jun 2026 15:39:38 -0700 Subject: [PATCH 31/31] test --- src/rxn_ca/workflow/jobs/bayesian.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rxn_ca/workflow/jobs/bayesian.py b/src/rxn_ca/workflow/jobs/bayesian.py index decc373..281a330 100644 --- a/src/rxn_ca/workflow/jobs/bayesian.py +++ b/src/rxn_ca/workflow/jobs/bayesian.py @@ -299,7 +299,7 @@ def bo_trial_step( "reaction_library_data must include reaction_library_path or reaction_library_dict" ) - # --- Steps 5-6: Run simulation and score --- + # --- Steps 5&6: Run simulation and score --- # If trial_path already exists this Firework is being re-run after a # walltime kill that occurred after the simulation completed (e.g. during # campaign save or Response chaining). Skip the expensive simulation and