From 23436e2c758c173ebb4ca495383dde2d428362b9 Mon Sep 17 00:00:00 2001 From: Bill Hlavacek Date: Mon, 11 May 2026 13:51:30 -0600 Subject: [PATCH 1/4] fix: preserve seeded trajectories with connected updates --- src/NFcore/moleculeType.cpp | 57 +++++++++---------- src/NFcore/reactionClass.cpp | 47 ++++++++++++++- src/NFcore/templateMolecule.cpp | 22 +++++-- .../transformations/transformationSet.cpp | 30 ++++------ validate/validate.py | 20 +++++++ 5 files changed, 117 insertions(+), 59 deletions(-) diff --git a/src/NFcore/moleculeType.cpp b/src/NFcore/moleculeType.cpp index af59eae3..8d4a6fa0 100644 --- a/src/NFcore/moleculeType.cpp +++ b/src/NFcore/moleculeType.cpp @@ -572,35 +572,33 @@ void MoleculeType::updateRxnMembership(Molecule * m) void MoleculeType::updateConnectedRxnMembership(Molecule * m, ReactionClass * firedReaction) { - // Replace the iteration over all reactions for the MoleculeType in - // MoleculeType::updateRxnMembership by only the - // connectedReactions for the fired Reaction. This is a much smaller loop - // and skips moleculetypes that are not the TemplateMolecule of the reactant - // in the connected reaction right away. - // Arvind Rasi Subramaniam - // - for (int r=0; rgetNumConnectedRxns(); r++) { - rxn = firedReaction->getconnectedRxn(r); - for (int pos=0; posgetNumOfReactants(); pos++) { - if (rxn->getMoleculeTypeOfReactantTemplate(pos) != this) continue; - double oldA = rxn->get_a(); - double oldAwithTotal = rxn->update_a(); - rxn->tryToAdd(m, pos); - double newA = rxn->update_a(); - this->system->update_A_tot(rxn,oldA,newA); - // Used for debugging to see which reaction rates changed - // upon updating molecule membership - // Arvind Rasi Subramaniam Nov 21, 2018 - if (!this->system->getTrackConnected()) continue; - if (oldAwithTotal != newA) { - this->system->getConnectedRxnFileStream() << - this->system->getGlobalEventCounter() << "\t" << - firedReaction->getName() << "\t" << - m->getMoleculeTypeName() << "\t" << - m->getUniqueID() << "\t" << - rxn->getName() << "\t" << - oldAwithTotal << "\t" << newA << endl; - } + // Preserve the MoleculeType's native reaction order so the connectivity path + // mutates reactant containers in the same sequence as a full membership + // refresh, while still using the precomputed connectivity matrix. + for (unsigned int r=0; rsystem->areReactionsConnected( + firedReaction->getRxnId(), rxn->getRxnId())) { + continue; + } + int pos = reactionPositions.at(r); + double oldA = rxn->get_a(); + double oldAwithTotal = rxn->update_a(); + rxn->tryToAdd(m, pos); + double newA = rxn->update_a(); + this->system->update_A_tot(rxn,oldA,newA); + // Used for debugging to see which reaction rates changed + // upon updating molecule membership + // Arvind Rasi Subramaniam Nov 21, 2018 + if (!this->system->getTrackConnected()) continue; + if (oldAwithTotal != newA) { + this->system->getConnectedRxnFileStream() << + this->system->getGlobalEventCounter() << "\t" << + firedReaction->getName() << "\t" << + m->getMoleculeTypeName() << "\t" << + m->getUniqueID() << "\t" << + rxn->getName() << "\t" << + oldAwithTotal << "\t" << newA << endl; } } } @@ -828,4 +826,3 @@ void MoleculeType::printDetails() const // } - diff --git a/src/NFcore/reactionClass.cpp b/src/NFcore/reactionClass.cpp index 6cc92f0b..e0e0ac75 100755 --- a/src/NFcore/reactionClass.cpp +++ b/src/NFcore/reactionClass.cpp @@ -273,7 +273,19 @@ void ReactionClass::appendConnectedRxn(ReactionClass * rxn) { bool ReactionClass::isReactionConnected(ReactionClass * rxn) { // First check if any of the operations share MoleculeType and components with // one of the reactant templates of rxn. - return this->transformationSet->checkConnection(rxn); + if (this->transformationSet->checkConnection(rxn)) return true; + + // Full membership refresh revisits every explicit reactant/product template in + // the fired rule, not only templates that carry direct transformations. Treat + // any compatible explicit template as connected so the fast path preserves the + // same reachable update set. + for (unsigned int i=0; iisTemplateCompatible(allReactantTemplates[i])) return true; + } + for (unsigned int i=0; iisTemplateCompatible(allProductTemplates[i])) return true; + } + return false; } ReactionClass::~ReactionClass() @@ -491,6 +503,30 @@ string ReactionClass::fire(double random_A_number, bool track) { // Add newly created molecules to the list of products this->transformationSet->getListOfAddedMolecules(mappingSet,products,traversalLimit); + // Track molecules that were explicitly mapped by this firing. Products added + // through bonded-neighborhood traversal must use the full updater to preserve + // the same membership mutation order as the non-connectivity path. + std::unordered_set directProductSet; + for (unsigned int msIndex=0; msIndexgetNumOfMappings(); mapIndex++) { + Mapping *mapping = ms->get(mapIndex); + if (mapping==0) continue; + Molecule *directMol = mapping->getMolecule(); + if (directMol!=0) directProductSet.insert(directMol); + } + } + bool hasIndirectProducts = false; + for (molIter = products.begin(); molIter != products.end(); molIter++) { + Molecule *mol = *molIter; + if (!mol->isAlive()) continue; + if (directProductSet.find(mol)==directProductSet.end()) { + hasIndirectProducts = true; + break; + } + } + // if complex bookkeeping is on, find all product complexes // (this is useful for updating Species Observables and TypeII functions, so keep the info handy). // NOTE: this is a brute force approach: check complex of each molecule. there may be a more @@ -558,8 +594,13 @@ string ReactionClass::fire(double random_A_number, bool track) { //Update this molcule's reaction membership // NOTE: as a side-effect, DORreactions that depend on molecule-scoped local functions // (typeI relationship) will be updated as long as UTL is set appropriately. - if ( mol->isAlive() ) - mol->updateRxnMembership(this, useConnectivity); + if ( mol->isAlive() ) { + bool useConnectedUpdate = + useConnectivity && + !hasIndirectProducts && + directProductSet.find(mol)!=directProductSet.end(); + mol->updateRxnMembership(this, useConnectedUpdate); + } } // update complex-scoped local functions for typeII dependencies diff --git a/src/NFcore/templateMolecule.cpp b/src/NFcore/templateMolecule.cpp index 89a433c8..5283351c 100644 --- a/src/NFcore/templateMolecule.cpp +++ b/src/NFcore/templateMolecule.cpp @@ -2148,22 +2148,32 @@ bool TemplateMolecule::checkSymmetryAroundBond(TemplateMolecule *tm1, TemplateMo bool TemplateMolecule::isMoleculeTypeAndComponentPresent(MoleculeType * mt, int cIndex) { if (this->getMoleculeType() != mt) return false; + + auto matchesComponent = [mt, cIndex](int templateComponent) { + if (templateComponent == cIndex) return true; + if (!mt->isEquivalentComponent(templateComponent) || + !mt->isEquivalentComponent(cIndex)) { + return false; + } + return mt->getEquivalenceClassNumber(templateComponent) == + mt->getEquivalenceClassNumber(cIndex); + }; // First make a joint vector of components specified in the TemplateMolecule for(int i=0; iemptyComps[i] == cIndex) return true; + if (matchesComponent(this->emptyComps[i])) return true; } for(int i=0; ioccupiedComps[i] == cIndex) return true; + if (matchesComponent(this->occupiedComps[i])) return true; } for(int i=0; ibondComp[i] == cIndex) return true; + if (matchesComponent(this->bondComp[i])) return true; } for(int i=0; icompStateConstraint_Comp[i] == cIndex) return true; + if (matchesComponent(this->compStateConstraint_Comp[i])) return true; } for(int i=0; icompStateExclusion_Comp[i] == cIndex) return true; + if (matchesComponent(this->compStateExclusion_Comp[i])) return true; } for(int c=0; cgetTemplateMolecule(); if (!t1) continue; - mt1 = t1->getMoleculeType(); // AS2023 - if this is not a removal, track connections, removal // doesn't give any reaction connections, so skip that if (transfn->getType()!=(int)TransformationFactory::REMOVE) { - c1 = transfn->getComponentIndex(); - // If the moleculetype or component is not present in the other reaction, - // it is not connected - if (!rxn->areMoleculeTypeAndComponentPresent(mt1, c1)) continue; - - // If the TemplateMolecule is 'incompatible' with any of the reactants - // or products, then the reaction is not connected - if (!rxn->isTemplateCompatible(t1)) continue; + bool isCompatible = rxn->isTemplateCompatible(t1); + if (!isCompatible) continue; + // Full membership refresh still removes/re-adds compatible mappings + // even when the changed component is outside the target pattern, + // which can change ReactantList/ReactantTree ordering. // Both checks passed for one op so return true return true; } else { @@ -1058,15 +1052,11 @@ bool TransformationSet::checkConnection(ReactionClass * rxn) { if (!t1) continue; t1 = t1->getMappedPartner(); if (!t1) continue; - mt1 = t1->getMoleculeType(); - c1 = transfn->getComponentIndex(); - // If the moleculetype or component is present in the other reaction, - // it is not connected - if (!rxn->areMoleculeTypeAndComponentPresent(mt1, c1)) continue; - - // If the TemplateMolecule is 'incompatible' with any of the reactants - // or products, then the reaction is not connected - if (!rxn->isTemplateCompatible(t1)) continue; + bool isCompatible = rxn->isTemplateCompatible(t1); + if (!isCompatible) continue; + // See note above: compatibility alone is enough to require a + // connected update if the fast path is to preserve full-update + // membership ordering. // Both checks passed for one op so return true return true; } diff --git a/validate/validate.py b/validate/validate.py index 47c9b06e..f25464ef 100644 --- a/validate/validate.py +++ b/validate/validate.py @@ -5,6 +5,7 @@ import re import fnmatch import sys +import tempfile import bionetgen nIterations=15 @@ -202,6 +203,25 @@ def _run_nfsim(self, outputDirectory, fileNumber, runOptions): expect_success=True, ) + def test_connectivity_preserves_seeded_tlbr_trajectory(self): + xmlPath = os.path.join(nfsimPrePath, 'test', 'tlbr', 'tlbr.xml') + with tempfile.TemporaryDirectory() as tmpdir: + offPath = os.path.join(tmpdir, 'tlbr_off.gdat') + onPath = os.path.join(tmpdir, 'tlbr_on.gdat') + + self._run_nfsim_xml(xmlPath, offPath, '-sim 1 -oSteps 100 -seed 1') + self._run_nfsim_xml(xmlPath, onPath, '-sim 1 -oSteps 100 -seed 1 -connect') + + offHeaders, offData = self._load_gdat(offPath) + onHeaders, onData = self._load_gdat(onPath) + + self.assertEqual(offHeaders, onHeaders, 'Connectivity regression changed TLBR output columns') + self.assertEqual(offData.shape, onData.shape, 'Connectivity regression changed TLBR output shape') + self.assertTrue( + np.array_equal(offData, onData), + 'Connectivity regression changed the same-seed TLBR trajectory' + ) + def test_issue48_ring_unbinding_requires_disconnection(self): outputDirectory = mfolder fileNumber = '37' From 124f8b7d7faa36a6ea0d4804420898f790d6bf7b Mon Sep 17 00:00:00 2001 From: Bill Hlavacek Date: Mon, 11 May 2026 14:44:48 -0600 Subject: [PATCH 2/4] fix: narrow connectivity reachability for add-only paths --- src/NFcore/reactionClass.cpp | 15 +++++++++------ src/NFcore/templateMolecule.cpp | 20 +++++--------------- 2 files changed, 14 insertions(+), 21 deletions(-) diff --git a/src/NFcore/reactionClass.cpp b/src/NFcore/reactionClass.cpp index e0e0ac75..b5ed4a0a 100755 --- a/src/NFcore/reactionClass.cpp +++ b/src/NFcore/reactionClass.cpp @@ -275,15 +275,18 @@ bool ReactionClass::isReactionConnected(ReactionClass * rxn) { // one of the reactant templates of rxn. if (this->transformationSet->checkConnection(rxn)) return true; - // Full membership refresh revisits every explicit reactant/product template in - // the fired rule, not only templates that carry direct transformations. Treat - // any compatible explicit template as connected so the fast path preserves the - // same reachable update set. + // Full membership refresh revisits every explicit reactant template in the + // fired rule, not only templates that carry direct transformations. for (unsigned int i=0; iisTemplateCompatible(allReactantTemplates[i])) return true; } - for (unsigned int i=0; iisTemplateCompatible(allProductTemplates[i])) return true; + + // Product templates can also create new compatible mappings, but avoid + // broadening pure-synthesis rules where this over-connects add-only paths. + if (n_reactants > 0) { + for (unsigned int i=0; iisTemplateCompatible(allProductTemplates[i])) return true; + } } return false; } diff --git a/src/NFcore/templateMolecule.cpp b/src/NFcore/templateMolecule.cpp index 5283351c..6307f950 100644 --- a/src/NFcore/templateMolecule.cpp +++ b/src/NFcore/templateMolecule.cpp @@ -2148,32 +2148,22 @@ bool TemplateMolecule::checkSymmetryAroundBond(TemplateMolecule *tm1, TemplateMo bool TemplateMolecule::isMoleculeTypeAndComponentPresent(MoleculeType * mt, int cIndex) { if (this->getMoleculeType() != mt) return false; - - auto matchesComponent = [mt, cIndex](int templateComponent) { - if (templateComponent == cIndex) return true; - if (!mt->isEquivalentComponent(templateComponent) || - !mt->isEquivalentComponent(cIndex)) { - return false; - } - return mt->getEquivalenceClassNumber(templateComponent) == - mt->getEquivalenceClassNumber(cIndex); - }; // First make a joint vector of components specified in the TemplateMolecule for(int i=0; iemptyComps[i])) return true; + if (this->emptyComps[i] == cIndex) return true; } for(int i=0; ioccupiedComps[i])) return true; + if (this->occupiedComps[i] == cIndex) return true; } for(int i=0; ibondComp[i])) return true; + if (this->bondComp[i] == cIndex) return true; } for(int i=0; icompStateConstraint_Comp[i])) return true; + if (this->compStateConstraint_Comp[i] == cIndex) return true; } for(int i=0; icompStateExclusion_Comp[i])) return true; + if (this->compStateExclusion_Comp[i] == cIndex) return true; } for(int c=0; c Date: Mon, 11 May 2026 15:44:25 -0600 Subject: [PATCH 3/4] Add RuleMonkey validation harness and reports --- .../basicmodels_smoke_standard.md | 41 ++++ .../corpus_smoke_connect.md | 44 ++++ .../corpus_smoke_standard.md | 44 ++++ .../feature_coverage_connect_subset.md | 34 +++ .../feature_coverage_standard_subset.md | 34 +++ .../tutorial_example_connect.md | 37 +++ .../tutorial_example_standard.md | 37 +++ validate/rulemonkey_nfsim_driver.py | 217 ++++++++++++++++++ 8 files changed, 488 insertions(+) create mode 100644 validate/results/rulemonkey_harness/basicmodels_smoke_standard.md create mode 100644 validate/results/rulemonkey_harness/corpus_smoke_connect.md create mode 100644 validate/results/rulemonkey_harness/corpus_smoke_standard.md create mode 100644 validate/results/rulemonkey_harness/feature_coverage_connect_subset.md create mode 100644 validate/results/rulemonkey_harness/feature_coverage_standard_subset.md create mode 100644 validate/results/rulemonkey_harness/tutorial_example_connect.md create mode 100644 validate/results/rulemonkey_harness/tutorial_example_standard.md create mode 100755 validate/rulemonkey_nfsim_driver.py diff --git a/validate/results/rulemonkey_harness/basicmodels_smoke_standard.md b/validate/results/rulemonkey_harness/basicmodels_smoke_standard.md new file mode 100644 index 00000000..0a77bdb4 --- /dev/null +++ b/validate/results/rulemonkey_harness/basicmodels_smoke_standard.md @@ -0,0 +1,41 @@ +# RuleMonkey Benchmark Report + +**Date:** 2026-05-11 +**Commit:** `6d7f240 docs(readme,quickstart): document the cancellation hook for embedders` +**Reps per model:** 10 +**NFsim reference:** 100-rep ensemble + +## Correctness + +- **screen**: max |RM_mean - NFsim_mean| / (NFsim_std / sqrt(n_reps)) over all (time, obs) pairs. Fast early-warning metric. Flags values ≥ 5.0 as "suspicious" but does not determine verdict. Historical benchmarks used this as the pass/fail criterion; for models with many rare-event Size_N observables it is statistically unreliable. +- **tz_max**: max over observables of the z-score of the per-rep trapezoidal time integral, computed against precomputed NFsim stats in `ensemble/{model}.tint.tsv`. Collapses each 1001-point trajectory to one number per observable per rep, eliminating single-time-point coincidences. +- **T**: per-model verdict threshold = max(5.0, 1.2 × tz_p99), where tz_p99 is the 99th percentile of `tz_max` from self-splitting the NFsim replicates at n=10 (see `tests/reference/nfsim/noise_floor.tsv`; provenance and regen recipe in the same directory's `PROVENANCE.md`). Adaptive to each model's intrinsic rare-event noise floor. +- **std_ratio**: max(RM_std / NFsim_std) across observables with nontrivial variance. Diagnostic for variance consistency; not part of verdict. +- **verdict**: PASS if `tz_max < T`, FAIL otherwise. Degenerate-observable mismatches (both stds zero, values differ) fail unconditionally. + +## Efficiency + +- **nfsim_s**: NFsim mean wall time (100-rep, from reference campaign). +- **rm_s**: RM mean wall time (10-rep average). +- **ev/s**: SSA events per wall-second (throughput). +- **sel/fire/obs/upd**: Phase breakdown as % of RM engine time. + +## Results + +| model | nfsim_s | rm_s | events | ev/s | sel% | fire% | obs% | upd% | screen | tz_max | T | std_ratio | worst_obs | verdict | +|-------|--------:|-----:|-------:|-----:|-----:|------:|-----:|-----:|-------:|-------:|----:|----------:|-----------|---------| +| r01 | — | 1.9 | — | — | 0.0 | 0.0 | 0.0 | 0.0 | 10.22 | 12.66 | 5.00 | 2.00 | Xp_free | FAIL | +| r05 | — | 0.3 | — | — | 0.0 | 0.0 | 0.0 | 0.0 | 7.83 | 8.51 | 5.00 | 1.86 | Complex | FAIL | +| r20 | — | 2.1 | — | — | 0.0 | 0.0 | 0.0 | 0.0 | 10.22 | 12.66 | 5.00 | 2.00 | Xp_free | FAIL | +| r22 | — | 0.3 | — | — | 0.0 | 0.0 | 0.0 | 0.0 | 6.87 | 10.28 | 5.00 | 1.00 | rib_elong | FAIL | +| r32 | — | 0.2 | — | — | 0.0 | 0.0 | 0.0 | 0.0 | 6.22 | 9.56 | 5.00 | 2.00 | Mintra | FAIL | + +## Summary + +| Metric | Count | +|--------|------:| +| PASS | 0 | +| FAIL | 5 | +| TIMEOUT | 0 | +| SKIP | 0 | +| **Total** | **5** | diff --git a/validate/results/rulemonkey_harness/corpus_smoke_connect.md b/validate/results/rulemonkey_harness/corpus_smoke_connect.md new file mode 100644 index 00000000..25157067 --- /dev/null +++ b/validate/results/rulemonkey_harness/corpus_smoke_connect.md @@ -0,0 +1,44 @@ +# RuleMonkey Benchmark Report + +**Date:** 2026-05-11 +**Commit:** `6d7f240 docs(readme,quickstart): document the cancellation hook for embedders` +**Reps per model:** 2 +**NFsim reference:** 100-rep ensemble + +## Correctness + +- **screen**: max |RM_mean - NFsim_mean| / (NFsim_std / sqrt(n_reps)) over all (time, obs) pairs. Fast early-warning metric. Flags values ≥ 5.0 as "suspicious" but does not determine verdict. Historical benchmarks used this as the pass/fail criterion; for models with many rare-event Size_N observables it is statistically unreliable. +- **tz_max**: max over observables of the z-score of the per-rep trapezoidal time integral, computed against precomputed NFsim stats in `ensemble/{model}.tint.tsv`. Collapses each 1001-point trajectory to one number per observable per rep, eliminating single-time-point coincidences. +- **T**: per-model verdict threshold = max(5.0, 1.2 × tz_p99), where tz_p99 is the 99th percentile of `tz_max` from self-splitting the NFsim replicates at n=10 (see `tests/reference/nfsim/noise_floor.tsv`; provenance and regen recipe in the same directory's `PROVENANCE.md`). Adaptive to each model's intrinsic rare-event noise floor. +- **std_ratio**: max(RM_std / NFsim_std) across observables with nontrivial variance. Diagnostic for variance consistency; not part of verdict. +- **verdict**: PASS if `tz_max < T`, FAIL otherwise. Degenerate-observable mismatches (both stds zero, values differ) fail unconditionally. + +## Efficiency + +- **nfsim_s**: NFsim mean wall time (100-rep, from reference campaign). +- **rm_s**: RM mean wall time (2-rep average). +- **ev/s**: SSA events per wall-second (throughput). +- **sel/fire/obs/upd**: Phase breakdown as % of RM engine time. + +## Results + +| model | nfsim_s | rm_s | events | ev/s | sel% | fire% | obs% | upd% | screen | tz_max | T | std_ratio | worst_obs | verdict | +|-------|--------:|-----:|-------:|-----:|-----:|------:|-----:|-----:|-------:|-------:|----:|----------:|-----------|---------| +| Tutorial_Example | 0.0 | 0.2 | — | — | 0.0 | 0.0 | 0.0 | 0.0 | 5.54 | 65.07 | 17.67 | 2.24 | A_phos_1 | FAIL | +| A_plus_A | 0.0 | 0.2 | — | — | 0.0 | 0.0 | 0.0 | 0.0 | 217.39 | 2675.74 | 14.52 | 2.00 | AA_1 | FAIL | +| nfsim_ring_closure_polymer | 0.1 | 0.2 | — | — | 0.0 | 0.0 | 0.0 | 0.0 | 1.16 | 9.24 | 12.82 | 1.00 | Linear_Dimers | PASS | +| ANx_noActivity | 0.2 | 0.4 | — | — | 0.0 | 0.0 | 0.0 | 0.0 | 4.00 | 11.86 | 15.18 | 2.00 | RD_R | PASS | +| isingspin_localfcn | 0.3 | 0.3 | — | — | 0.0 | 0.0 | 0.0 | 0.0 | 2.79 | 8.46 | 16.64 | 2.00 | M_spUp | PASS | +| BLBR | 0.9 | 0.4 | — | — | 0.0 | 0.0 | 0.0 | 0.0 | 95021.47 | 3014.83 | 19.16 | 31.66 | R1 | FAIL | +| e1 | 3.0 | 2.2 | — | — | 0.0 | 0.0 | 0.0 | 0.0 | 5.25 | 16.73 | 18.30 | 2.00 | Efree | PASS | +| fceri_ji | 18.9 | 4.0 | — | — | 0.0 | 0.0 | 0.0 | 0.0 | 4.35 | 4.43 | 18.27 | 3.40 | RecMon_1 | PASS | + +## Summary + +| Metric | Count | +|--------|------:| +| PASS | 5 | +| FAIL | 3 | +| TIMEOUT | 0 | +| SKIP | 0 | +| **Total** | **8** | diff --git a/validate/results/rulemonkey_harness/corpus_smoke_standard.md b/validate/results/rulemonkey_harness/corpus_smoke_standard.md new file mode 100644 index 00000000..8b36fa83 --- /dev/null +++ b/validate/results/rulemonkey_harness/corpus_smoke_standard.md @@ -0,0 +1,44 @@ +# RuleMonkey Benchmark Report + +**Date:** 2026-05-11 +**Commit:** `6d7f240 docs(readme,quickstart): document the cancellation hook for embedders` +**Reps per model:** 2 +**NFsim reference:** 100-rep ensemble + +## Correctness + +- **screen**: max |RM_mean - NFsim_mean| / (NFsim_std / sqrt(n_reps)) over all (time, obs) pairs. Fast early-warning metric. Flags values ≥ 5.0 as "suspicious" but does not determine verdict. Historical benchmarks used this as the pass/fail criterion; for models with many rare-event Size_N observables it is statistically unreliable. +- **tz_max**: max over observables of the z-score of the per-rep trapezoidal time integral, computed against precomputed NFsim stats in `ensemble/{model}.tint.tsv`. Collapses each 1001-point trajectory to one number per observable per rep, eliminating single-time-point coincidences. +- **T**: per-model verdict threshold = max(5.0, 1.2 × tz_p99), where tz_p99 is the 99th percentile of `tz_max` from self-splitting the NFsim replicates at n=10 (see `tests/reference/nfsim/noise_floor.tsv`; provenance and regen recipe in the same directory's `PROVENANCE.md`). Adaptive to each model's intrinsic rare-event noise floor. +- **std_ratio**: max(RM_std / NFsim_std) across observables with nontrivial variance. Diagnostic for variance consistency; not part of verdict. +- **verdict**: PASS if `tz_max < T`, FAIL otherwise. Degenerate-observable mismatches (both stds zero, values differ) fail unconditionally. + +## Efficiency + +- **nfsim_s**: NFsim mean wall time (100-rep, from reference campaign). +- **rm_s**: RM mean wall time (2-rep average). +- **ev/s**: SSA events per wall-second (throughput). +- **sel/fire/obs/upd**: Phase breakdown as % of RM engine time. + +## Results + +| model | nfsim_s | rm_s | events | ev/s | sel% | fire% | obs% | upd% | screen | tz_max | T | std_ratio | worst_obs | verdict | +|-------|--------:|-----:|-------:|-----:|-----:|------:|-----:|-----:|-------:|-------:|----:|----------:|-----------|---------| +| Tutorial_Example | 0.0 | 0.2 | — | — | 0.0 | 0.0 | 0.0 | 0.0 | 3.69 | 4.04 | 17.67 | 2.00 | R_dim_1 | PASS | +| A_plus_A | 0.0 | 0.2 | — | — | 0.0 | 0.0 | 0.0 | 0.0 | 217.39 | 2675.74 | 14.52 | 2.00 | AA_1 | FAIL | +| nfsim_ring_closure_polymer | 0.1 | 0.2 | — | — | 0.0 | 0.0 | 0.0 | 0.0 | 1.16 | 9.24 | 12.82 | 1.00 | Linear_Dimers | PASS | +| ANx_noActivity | 0.2 | 0.3 | — | — | 0.0 | 0.0 | 0.0 | 0.0 | 4.00 | 11.86 | 15.18 | 2.00 | RD_R | PASS | +| isingspin_localfcn | 0.3 | 0.3 | — | — | 0.0 | 0.0 | 0.0 | 0.0 | 2.79 | 8.46 | 16.64 | 2.00 | M_spUp | PASS | +| BLBR | 0.9 | 0.4 | — | — | 0.0 | 0.0 | 0.0 | 0.0 | 95021.47 | 3014.83 | 19.16 | 31.66 | R1 | FAIL | +| e1 | 3.0 | 2.2 | — | — | 0.0 | 0.0 | 0.0 | 0.0 | 5.25 | 16.73 | 18.30 | 2.00 | Efree | PASS | +| fceri_ji | 18.9 | 5.7 | — | — | 0.0 | 0.0 | 0.0 | 0.0 | 4.35 | 4.43 | 18.27 | 3.40 | RecMon_1 | PASS | + +## Summary + +| Metric | Count | +|--------|------:| +| PASS | 6 | +| FAIL | 2 | +| TIMEOUT | 0 | +| SKIP | 0 | +| **Total** | **8** | diff --git a/validate/results/rulemonkey_harness/feature_coverage_connect_subset.md b/validate/results/rulemonkey_harness/feature_coverage_connect_subset.md new file mode 100644 index 00000000..0f88998d --- /dev/null +++ b/validate/results/rulemonkey_harness/feature_coverage_connect_subset.md @@ -0,0 +1,34 @@ +# Feature Coverage Benchmark Report + +Generated: 2026-05-11 15:28:57 + +**Summary: 1 PASS / 2 FAIL / 0 SKIP** + +## Feature Coverage + +| Model | Tier | Features | RM vs NFsim | RM vs ODE | Verdict | +|-------|------|----------|-------------|-----------|--------| +| combo_synth_degrade_equilibrium | combinations | Feature combination: synthesis + degradation + bin | FAIL | - | **FAIL** | +| ft_delete_molecules | base | Feature: DeleteMolecules keyword on degradation ru | PASS | - | PASS | +| ft_multi_product | base | Feature: rule producing multiple new molecules (on | FAIL | - | **FAIL** | + +## Detailed Results + +### combo_synth_degrade_equilibrium +- Tier: combinations +- RM reps: 5, wall time: 0.293s +- vs NFsim: max_z=17.33 (AB), tz_max=17.22 — **FAIL** +- **Overall: FAIL** + +### ft_delete_molecules +- Tier: base +- RM reps: 5, wall time: 0.288s +- vs NFsim: max_z=3.31 (B_free), tz_max=4.83 — **PASS** +- **Overall: PASS** + +### ft_multi_product +- Tier: base +- RM reps: 5, wall time: 0.265s +- vs NFsim: max_z=25.97 (BC), tz_max=2329.70 — **FAIL** +- **Overall: FAIL** + diff --git a/validate/results/rulemonkey_harness/feature_coverage_standard_subset.md b/validate/results/rulemonkey_harness/feature_coverage_standard_subset.md new file mode 100644 index 00000000..27756938 --- /dev/null +++ b/validate/results/rulemonkey_harness/feature_coverage_standard_subset.md @@ -0,0 +1,34 @@ +# Feature Coverage Benchmark Report + +Generated: 2026-05-11 15:29:02 + +**Summary: 1 PASS / 2 FAIL / 0 SKIP** + +## Feature Coverage + +| Model | Tier | Features | RM vs NFsim | RM vs ODE | Verdict | +|-------|------|----------|-------------|-----------|--------| +| combo_synth_degrade_equilibrium | combinations | Feature combination: synthesis + degradation + bin | FAIL | - | **FAIL** | +| ft_delete_molecules | base | Feature: DeleteMolecules keyword on degradation ru | PASS | - | PASS | +| ft_multi_product | base | Feature: rule producing multiple new molecules (on | FAIL | - | **FAIL** | + +## Detailed Results + +### combo_synth_degrade_equilibrium +- Tier: combinations +- RM reps: 5, wall time: 0.318s +- vs NFsim: max_z=9.79 (A_free), tz_max=5.94 — **FAIL** +- **Overall: FAIL** + +### ft_delete_molecules +- Tier: base +- RM reps: 5, wall time: 0.284s +- vs NFsim: max_z=3.62 (A_total), tz_max=4.82 — **PASS** +- **Overall: PASS** + +### ft_multi_product +- Tier: base +- RM reps: 5, wall time: 0.285s +- vs NFsim: max_z=7.42 (BC), tz_max=516.40 — **FAIL** +- **Overall: FAIL** + diff --git a/validate/results/rulemonkey_harness/tutorial_example_connect.md b/validate/results/rulemonkey_harness/tutorial_example_connect.md new file mode 100644 index 00000000..4b5809ab --- /dev/null +++ b/validate/results/rulemonkey_harness/tutorial_example_connect.md @@ -0,0 +1,37 @@ +# RuleMonkey Benchmark Report + +**Date:** 2026-05-11 +**Commit:** `6d7f240 docs(readme,quickstart): document the cancellation hook for embedders` +**Reps per model:** 10 +**NFsim reference:** 100-rep ensemble + +## Correctness + +- **screen**: max |RM_mean - NFsim_mean| / (NFsim_std / sqrt(n_reps)) over all (time, obs) pairs. Fast early-warning metric. Flags values ≥ 5.0 as "suspicious" but does not determine verdict. Historical benchmarks used this as the pass/fail criterion; for models with many rare-event Size_N observables it is statistically unreliable. +- **tz_max**: max over observables of the z-score of the per-rep trapezoidal time integral, computed against precomputed NFsim stats in `ensemble/{model}.tint.tsv`. Collapses each 1001-point trajectory to one number per observable per rep, eliminating single-time-point coincidences. +- **T**: per-model verdict threshold = max(5.0, 1.2 × tz_p99), where tz_p99 is the 99th percentile of `tz_max` from self-splitting the NFsim replicates at n=10 (see `tests/reference/nfsim/noise_floor.tsv`; provenance and regen recipe in the same directory's `PROVENANCE.md`). Adaptive to each model's intrinsic rare-event noise floor. +- **std_ratio**: max(RM_std / NFsim_std) across observables with nontrivial variance. Diagnostic for variance consistency; not part of verdict. +- **verdict**: PASS if `tz_max < T`, FAIL otherwise. Degenerate-observable mismatches (both stds zero, values differ) fail unconditionally. + +## Efficiency + +- **nfsim_s**: NFsim mean wall time (100-rep, from reference campaign). +- **rm_s**: RM mean wall time (10-rep average). +- **ev/s**: SSA events per wall-second (throughput). +- **sel/fire/obs/upd**: Phase breakdown as % of RM engine time. + +## Results + +| model | nfsim_s | rm_s | events | ev/s | sel% | fire% | obs% | upd% | screen | tz_max | T | std_ratio | worst_obs | verdict | +|-------|--------:|-----:|-------:|-----:|-----:|------:|-----:|-----:|-------:|-------:|----:|----------:|-----------|---------| +| Tutorial_Example | 0.0 | 0.5 | — | — | 0.0 | 0.0 | 0.0 | 0.0 | 12.40 | 65.07 | 5.00 | 2.00 | A_phos_1 | FAIL | + +## Summary + +| Metric | Count | +|--------|------:| +| PASS | 0 | +| FAIL | 1 | +| TIMEOUT | 0 | +| SKIP | 0 | +| **Total** | **1** | diff --git a/validate/results/rulemonkey_harness/tutorial_example_standard.md b/validate/results/rulemonkey_harness/tutorial_example_standard.md new file mode 100644 index 00000000..8e3a20d4 --- /dev/null +++ b/validate/results/rulemonkey_harness/tutorial_example_standard.md @@ -0,0 +1,37 @@ +# RuleMonkey Benchmark Report + +**Date:** 2026-05-11 +**Commit:** `6d7f240 docs(readme,quickstart): document the cancellation hook for embedders` +**Reps per model:** 10 +**NFsim reference:** 100-rep ensemble + +## Correctness + +- **screen**: max |RM_mean - NFsim_mean| / (NFsim_std / sqrt(n_reps)) over all (time, obs) pairs. Fast early-warning metric. Flags values ≥ 5.0 as "suspicious" but does not determine verdict. Historical benchmarks used this as the pass/fail criterion; for models with many rare-event Size_N observables it is statistically unreliable. +- **tz_max**: max over observables of the z-score of the per-rep trapezoidal time integral, computed against precomputed NFsim stats in `ensemble/{model}.tint.tsv`. Collapses each 1001-point trajectory to one number per observable per rep, eliminating single-time-point coincidences. +- **T**: per-model verdict threshold = max(5.0, 1.2 × tz_p99), where tz_p99 is the 99th percentile of `tz_max` from self-splitting the NFsim replicates at n=10 (see `tests/reference/nfsim/noise_floor.tsv`; provenance and regen recipe in the same directory's `PROVENANCE.md`). Adaptive to each model's intrinsic rare-event noise floor. +- **std_ratio**: max(RM_std / NFsim_std) across observables with nontrivial variance. Diagnostic for variance consistency; not part of verdict. +- **verdict**: PASS if `tz_max < T`, FAIL otherwise. Degenerate-observable mismatches (both stds zero, values differ) fail unconditionally. + +## Efficiency + +- **nfsim_s**: NFsim mean wall time (100-rep, from reference campaign). +- **rm_s**: RM mean wall time (10-rep average). +- **ev/s**: SSA events per wall-second (throughput). +- **sel/fire/obs/upd**: Phase breakdown as % of RM engine time. + +## Results + +| model | nfsim_s | rm_s | events | ev/s | sel% | fire% | obs% | upd% | screen | tz_max | T | std_ratio | worst_obs | verdict | +|-------|--------:|-----:|-------:|-----:|-----:|------:|-----:|-----:|-------:|-------:|----:|----------:|-----------|---------| +| Tutorial_Example | 0.0 | 0.5 | — | — | 0.0 | 0.0 | 0.0 | 0.0 | 7.52 | 9.14 | 5.00 | 2.00 | R_phos_1 | FAIL | + +## Summary + +| Metric | Count | +|--------|------:| +| PASS | 0 | +| FAIL | 1 | +| TIMEOUT | 0 | +| SKIP | 0 | +| **Total** | **1** | diff --git a/validate/rulemonkey_nfsim_driver.py b/validate/rulemonkey_nfsim_driver.py new file mode 100755 index 00000000..5aa343da --- /dev/null +++ b/validate/rulemonkey_nfsim_driver.py @@ -0,0 +1,217 @@ +#!/usr/bin/env python3 +"""Adapt NFsim CLI to RuleMonkey's rm_driver interface. + +This lets RuleMonkey's Python harnesses validate an NFsim binary directly +against the vendored NFsim reference ensembles. + +Expected rm_driver CLI: + rulemonkey_nfsim_driver.py [rm_flags...] + +Useful environment variables: + NFSIM_BIN Path to NFsim executable (default: /build/NFsim) + NFSIM_SIM_PARAMS Path to RuleMonkey sim_params.tsv for model-specific flags + NFSIM_EXTRA_FLAGS Extra NFsim CLI flags, e.g. "-connect" +""" + +from __future__ import annotations + +import csv +import os +import shlex +import subprocess +import sys +import tempfile +from pathlib import Path + + +REPO_ROOT = Path(__file__).resolve().parents[1] +DEFAULT_NFSIM_BIN = REPO_ROOT / "build" / "NFsim" + + +def _usage() -> str: + return ( + "Usage: rulemonkey_nfsim_driver.py " + "[rm_flags...]" + ) + + +def _strip_header_hash(fieldnames: list[str] | None) -> list[str] | None: + if fieldnames and fieldnames[0].startswith("#"): + fieldnames = list(fieldnames) + fieldnames[0] = fieldnames[0].lstrip("#") + return fieldnames + + +def _load_model_flags(sim_params_path: Path | None, model_name: str) -> list[str]: + if sim_params_path is None or not sim_params_path.exists(): + return [] + + with sim_params_path.open(newline="") as f: + reader = csv.DictReader(f, delimiter="\t") + reader.fieldnames = _strip_header_hash(reader.fieldnames) + for row in reader: + model = (row.get("model") or "").strip() + if not model or model.startswith("#"): + continue + if model != model_name: + continue + raw_flags = (row.get("nfsim_flags") or "").strip() + return _normalize_nfsim_flags(shlex.split(raw_flags)) + return [] + + +def _normalize_nfsim_flags(tokens: list[str]) -> list[str]: + """Drop flags that the harness already supplies explicitly. + + Keep model-specific behavior flags such as -cb, -bscb, -gml, and -utl. + """ + + drop_with_value = { + "-xml", + "-o", + "-sim", + "-eq", + "-oSteps", + "-oTimes", + "-seed", + "-ss", + "-rxnlog", + "-logbuffer", + "-maxcputime", + } + + keep_with_value = { + "-gml", + "-utl", + } + + out: list[str] = [] + i = 0 + while i < len(tokens): + tok = tokens[i] + if tok in keep_with_value: + if i + 1 < len(tokens): + out.extend([tok, tokens[i + 1]]) + i += 2 + continue + if tok in drop_with_value: + i += 2 + continue + out.append(tok) + i += 1 + return out + + +def _dedupe_flags(tokens: list[str]) -> list[str]: + """Keep first occurrence of standalone flags; last occurrence of value flags.""" + + value_flags = {"-gml", "-utl"} + standalone_seen: set[str] = set() + value_map: dict[str, str] = {} + ordered_values: list[str] = [] + out: list[str] = [] + + i = 0 + while i < len(tokens): + tok = tokens[i] + if tok in value_flags: + if i + 1 < len(tokens): + if tok not in value_map: + ordered_values.append(tok) + value_map[tok] = tokens[i + 1] + i += 2 + continue + if tok not in standalone_seen: + standalone_seen.add(tok) + out.append(tok) + i += 1 + + for tok in ordered_values: + out.extend([tok, value_map[tok]]) + return out + + +def _normalize_gdat_text(raw_text: str) -> str: + """Rewrite NFsim gdat text as clean tab-separated output.""" + + out_lines: list[str] = [] + for raw_line in raw_text.splitlines(): + line = raw_line.strip() + if not line: + continue + if line.startswith("#"): + parts = line.lstrip("#").strip().split() + out_lines.append("#" + "\t".join(parts)) + continue + parts = line.split() + out_lines.append("\t".join(parts)) + return "\n".join(out_lines) + ("\n" if out_lines else "") + + +def main() -> int: + if len(sys.argv) < 5: + print(_usage(), file=sys.stderr) + return 2 + + xml_path = Path(sys.argv[1]).resolve() + t_end = sys.argv[2] + n_steps = sys.argv[3] + seed = sys.argv[4] + passthrough_flags = sys.argv[5:] + + nfsim_bin = Path(os.environ.get("NFSIM_BIN", str(DEFAULT_NFSIM_BIN))).resolve() + sim_params_env = os.environ.get("NFSIM_SIM_PARAMS") + sim_params_path = Path(sim_params_env).resolve() if sim_params_env else None + extra_flags = shlex.split(os.environ.get("NFSIM_EXTRA_FLAGS", "")) + + if not nfsim_bin.exists(): + print(f"NFsim binary not found: {nfsim_bin}", file=sys.stderr) + return 2 + if not xml_path.exists(): + print(f"XML not found: {xml_path}", file=sys.stderr) + return 2 + + model_name = xml_path.stem + model_flags = _load_model_flags(sim_params_path, model_name) + merged_flags = _dedupe_flags(model_flags + passthrough_flags + extra_flags) + + with tempfile.TemporaryDirectory(prefix=f"rm_nfsim_{model_name}_") as td_raw: + out_gdat = Path(td_raw) / f"{model_name}.gdat" + cmd = [ + str(nfsim_bin), + "-xml", + str(xml_path), + "-sim", + str(t_end), + "-oSteps", + str(n_steps), + "-seed", + str(seed), + *merged_flags, + "-o", + str(out_gdat), + ] + result = subprocess.run( + cmd, + cwd=str(xml_path.parent), + capture_output=True, + text=True, + ) + + if result.returncode != 0 or not out_gdat.exists(): + print("NFsim driver wrapper failed.", file=sys.stderr) + print(f"Command: {' '.join(shlex.quote(x) for x in cmd)}", file=sys.stderr) + if result.stdout.strip(): + print("--- stdout ---", file=sys.stderr) + print(result.stdout.strip(), file=sys.stderr) + if result.stderr.strip(): + print("--- stderr ---", file=sys.stderr) + print(result.stderr.strip(), file=sys.stderr) + return 1 + + sys.stdout.write(_normalize_gdat_text(out_gdat.read_text())) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) From 841c27ef015528ef120b016e19b0698dbd12544a Mon Sep 17 00:00:00 2001 From: Bill Hlavacek Date: Tue, 12 May 2026 11:33:53 -0600 Subject: [PATCH 4/4] test: tighten connectivity regression coverage --- .../basicmodels_smoke_standard.md | 41 ---- .../corpus_smoke_connect.md | 44 ---- .../corpus_smoke_standard.md | 44 ---- .../feature_coverage_connect_subset.md | 34 --- .../feature_coverage_standard_subset.md | 34 --- .../tutorial_example_connect.md | 37 --- .../tutorial_example_standard.md | 37 --- validate/rulemonkey_nfsim_driver.py | 217 ------------------ validate/validate.py | 43 +++- 9 files changed, 33 insertions(+), 498 deletions(-) delete mode 100644 validate/results/rulemonkey_harness/basicmodels_smoke_standard.md delete mode 100644 validate/results/rulemonkey_harness/corpus_smoke_connect.md delete mode 100644 validate/results/rulemonkey_harness/corpus_smoke_standard.md delete mode 100644 validate/results/rulemonkey_harness/feature_coverage_connect_subset.md delete mode 100644 validate/results/rulemonkey_harness/feature_coverage_standard_subset.md delete mode 100644 validate/results/rulemonkey_harness/tutorial_example_connect.md delete mode 100644 validate/results/rulemonkey_harness/tutorial_example_standard.md delete mode 100755 validate/rulemonkey_nfsim_driver.py diff --git a/validate/results/rulemonkey_harness/basicmodels_smoke_standard.md b/validate/results/rulemonkey_harness/basicmodels_smoke_standard.md deleted file mode 100644 index 0a77bdb4..00000000 --- a/validate/results/rulemonkey_harness/basicmodels_smoke_standard.md +++ /dev/null @@ -1,41 +0,0 @@ -# RuleMonkey Benchmark Report - -**Date:** 2026-05-11 -**Commit:** `6d7f240 docs(readme,quickstart): document the cancellation hook for embedders` -**Reps per model:** 10 -**NFsim reference:** 100-rep ensemble - -## Correctness - -- **screen**: max |RM_mean - NFsim_mean| / (NFsim_std / sqrt(n_reps)) over all (time, obs) pairs. Fast early-warning metric. Flags values ≥ 5.0 as "suspicious" but does not determine verdict. Historical benchmarks used this as the pass/fail criterion; for models with many rare-event Size_N observables it is statistically unreliable. -- **tz_max**: max over observables of the z-score of the per-rep trapezoidal time integral, computed against precomputed NFsim stats in `ensemble/{model}.tint.tsv`. Collapses each 1001-point trajectory to one number per observable per rep, eliminating single-time-point coincidences. -- **T**: per-model verdict threshold = max(5.0, 1.2 × tz_p99), where tz_p99 is the 99th percentile of `tz_max` from self-splitting the NFsim replicates at n=10 (see `tests/reference/nfsim/noise_floor.tsv`; provenance and regen recipe in the same directory's `PROVENANCE.md`). Adaptive to each model's intrinsic rare-event noise floor. -- **std_ratio**: max(RM_std / NFsim_std) across observables with nontrivial variance. Diagnostic for variance consistency; not part of verdict. -- **verdict**: PASS if `tz_max < T`, FAIL otherwise. Degenerate-observable mismatches (both stds zero, values differ) fail unconditionally. - -## Efficiency - -- **nfsim_s**: NFsim mean wall time (100-rep, from reference campaign). -- **rm_s**: RM mean wall time (10-rep average). -- **ev/s**: SSA events per wall-second (throughput). -- **sel/fire/obs/upd**: Phase breakdown as % of RM engine time. - -## Results - -| model | nfsim_s | rm_s | events | ev/s | sel% | fire% | obs% | upd% | screen | tz_max | T | std_ratio | worst_obs | verdict | -|-------|--------:|-----:|-------:|-----:|-----:|------:|-----:|-----:|-------:|-------:|----:|----------:|-----------|---------| -| r01 | — | 1.9 | — | — | 0.0 | 0.0 | 0.0 | 0.0 | 10.22 | 12.66 | 5.00 | 2.00 | Xp_free | FAIL | -| r05 | — | 0.3 | — | — | 0.0 | 0.0 | 0.0 | 0.0 | 7.83 | 8.51 | 5.00 | 1.86 | Complex | FAIL | -| r20 | — | 2.1 | — | — | 0.0 | 0.0 | 0.0 | 0.0 | 10.22 | 12.66 | 5.00 | 2.00 | Xp_free | FAIL | -| r22 | — | 0.3 | — | — | 0.0 | 0.0 | 0.0 | 0.0 | 6.87 | 10.28 | 5.00 | 1.00 | rib_elong | FAIL | -| r32 | — | 0.2 | — | — | 0.0 | 0.0 | 0.0 | 0.0 | 6.22 | 9.56 | 5.00 | 2.00 | Mintra | FAIL | - -## Summary - -| Metric | Count | -|--------|------:| -| PASS | 0 | -| FAIL | 5 | -| TIMEOUT | 0 | -| SKIP | 0 | -| **Total** | **5** | diff --git a/validate/results/rulemonkey_harness/corpus_smoke_connect.md b/validate/results/rulemonkey_harness/corpus_smoke_connect.md deleted file mode 100644 index 25157067..00000000 --- a/validate/results/rulemonkey_harness/corpus_smoke_connect.md +++ /dev/null @@ -1,44 +0,0 @@ -# RuleMonkey Benchmark Report - -**Date:** 2026-05-11 -**Commit:** `6d7f240 docs(readme,quickstart): document the cancellation hook for embedders` -**Reps per model:** 2 -**NFsim reference:** 100-rep ensemble - -## Correctness - -- **screen**: max |RM_mean - NFsim_mean| / (NFsim_std / sqrt(n_reps)) over all (time, obs) pairs. Fast early-warning metric. Flags values ≥ 5.0 as "suspicious" but does not determine verdict. Historical benchmarks used this as the pass/fail criterion; for models with many rare-event Size_N observables it is statistically unreliable. -- **tz_max**: max over observables of the z-score of the per-rep trapezoidal time integral, computed against precomputed NFsim stats in `ensemble/{model}.tint.tsv`. Collapses each 1001-point trajectory to one number per observable per rep, eliminating single-time-point coincidences. -- **T**: per-model verdict threshold = max(5.0, 1.2 × tz_p99), where tz_p99 is the 99th percentile of `tz_max` from self-splitting the NFsim replicates at n=10 (see `tests/reference/nfsim/noise_floor.tsv`; provenance and regen recipe in the same directory's `PROVENANCE.md`). Adaptive to each model's intrinsic rare-event noise floor. -- **std_ratio**: max(RM_std / NFsim_std) across observables with nontrivial variance. Diagnostic for variance consistency; not part of verdict. -- **verdict**: PASS if `tz_max < T`, FAIL otherwise. Degenerate-observable mismatches (both stds zero, values differ) fail unconditionally. - -## Efficiency - -- **nfsim_s**: NFsim mean wall time (100-rep, from reference campaign). -- **rm_s**: RM mean wall time (2-rep average). -- **ev/s**: SSA events per wall-second (throughput). -- **sel/fire/obs/upd**: Phase breakdown as % of RM engine time. - -## Results - -| model | nfsim_s | rm_s | events | ev/s | sel% | fire% | obs% | upd% | screen | tz_max | T | std_ratio | worst_obs | verdict | -|-------|--------:|-----:|-------:|-----:|-----:|------:|-----:|-----:|-------:|-------:|----:|----------:|-----------|---------| -| Tutorial_Example | 0.0 | 0.2 | — | — | 0.0 | 0.0 | 0.0 | 0.0 | 5.54 | 65.07 | 17.67 | 2.24 | A_phos_1 | FAIL | -| A_plus_A | 0.0 | 0.2 | — | — | 0.0 | 0.0 | 0.0 | 0.0 | 217.39 | 2675.74 | 14.52 | 2.00 | AA_1 | FAIL | -| nfsim_ring_closure_polymer | 0.1 | 0.2 | — | — | 0.0 | 0.0 | 0.0 | 0.0 | 1.16 | 9.24 | 12.82 | 1.00 | Linear_Dimers | PASS | -| ANx_noActivity | 0.2 | 0.4 | — | — | 0.0 | 0.0 | 0.0 | 0.0 | 4.00 | 11.86 | 15.18 | 2.00 | RD_R | PASS | -| isingspin_localfcn | 0.3 | 0.3 | — | — | 0.0 | 0.0 | 0.0 | 0.0 | 2.79 | 8.46 | 16.64 | 2.00 | M_spUp | PASS | -| BLBR | 0.9 | 0.4 | — | — | 0.0 | 0.0 | 0.0 | 0.0 | 95021.47 | 3014.83 | 19.16 | 31.66 | R1 | FAIL | -| e1 | 3.0 | 2.2 | — | — | 0.0 | 0.0 | 0.0 | 0.0 | 5.25 | 16.73 | 18.30 | 2.00 | Efree | PASS | -| fceri_ji | 18.9 | 4.0 | — | — | 0.0 | 0.0 | 0.0 | 0.0 | 4.35 | 4.43 | 18.27 | 3.40 | RecMon_1 | PASS | - -## Summary - -| Metric | Count | -|--------|------:| -| PASS | 5 | -| FAIL | 3 | -| TIMEOUT | 0 | -| SKIP | 0 | -| **Total** | **8** | diff --git a/validate/results/rulemonkey_harness/corpus_smoke_standard.md b/validate/results/rulemonkey_harness/corpus_smoke_standard.md deleted file mode 100644 index 8b36fa83..00000000 --- a/validate/results/rulemonkey_harness/corpus_smoke_standard.md +++ /dev/null @@ -1,44 +0,0 @@ -# RuleMonkey Benchmark Report - -**Date:** 2026-05-11 -**Commit:** `6d7f240 docs(readme,quickstart): document the cancellation hook for embedders` -**Reps per model:** 2 -**NFsim reference:** 100-rep ensemble - -## Correctness - -- **screen**: max |RM_mean - NFsim_mean| / (NFsim_std / sqrt(n_reps)) over all (time, obs) pairs. Fast early-warning metric. Flags values ≥ 5.0 as "suspicious" but does not determine verdict. Historical benchmarks used this as the pass/fail criterion; for models with many rare-event Size_N observables it is statistically unreliable. -- **tz_max**: max over observables of the z-score of the per-rep trapezoidal time integral, computed against precomputed NFsim stats in `ensemble/{model}.tint.tsv`. Collapses each 1001-point trajectory to one number per observable per rep, eliminating single-time-point coincidences. -- **T**: per-model verdict threshold = max(5.0, 1.2 × tz_p99), where tz_p99 is the 99th percentile of `tz_max` from self-splitting the NFsim replicates at n=10 (see `tests/reference/nfsim/noise_floor.tsv`; provenance and regen recipe in the same directory's `PROVENANCE.md`). Adaptive to each model's intrinsic rare-event noise floor. -- **std_ratio**: max(RM_std / NFsim_std) across observables with nontrivial variance. Diagnostic for variance consistency; not part of verdict. -- **verdict**: PASS if `tz_max < T`, FAIL otherwise. Degenerate-observable mismatches (both stds zero, values differ) fail unconditionally. - -## Efficiency - -- **nfsim_s**: NFsim mean wall time (100-rep, from reference campaign). -- **rm_s**: RM mean wall time (2-rep average). -- **ev/s**: SSA events per wall-second (throughput). -- **sel/fire/obs/upd**: Phase breakdown as % of RM engine time. - -## Results - -| model | nfsim_s | rm_s | events | ev/s | sel% | fire% | obs% | upd% | screen | tz_max | T | std_ratio | worst_obs | verdict | -|-------|--------:|-----:|-------:|-----:|-----:|------:|-----:|-----:|-------:|-------:|----:|----------:|-----------|---------| -| Tutorial_Example | 0.0 | 0.2 | — | — | 0.0 | 0.0 | 0.0 | 0.0 | 3.69 | 4.04 | 17.67 | 2.00 | R_dim_1 | PASS | -| A_plus_A | 0.0 | 0.2 | — | — | 0.0 | 0.0 | 0.0 | 0.0 | 217.39 | 2675.74 | 14.52 | 2.00 | AA_1 | FAIL | -| nfsim_ring_closure_polymer | 0.1 | 0.2 | — | — | 0.0 | 0.0 | 0.0 | 0.0 | 1.16 | 9.24 | 12.82 | 1.00 | Linear_Dimers | PASS | -| ANx_noActivity | 0.2 | 0.3 | — | — | 0.0 | 0.0 | 0.0 | 0.0 | 4.00 | 11.86 | 15.18 | 2.00 | RD_R | PASS | -| isingspin_localfcn | 0.3 | 0.3 | — | — | 0.0 | 0.0 | 0.0 | 0.0 | 2.79 | 8.46 | 16.64 | 2.00 | M_spUp | PASS | -| BLBR | 0.9 | 0.4 | — | — | 0.0 | 0.0 | 0.0 | 0.0 | 95021.47 | 3014.83 | 19.16 | 31.66 | R1 | FAIL | -| e1 | 3.0 | 2.2 | — | — | 0.0 | 0.0 | 0.0 | 0.0 | 5.25 | 16.73 | 18.30 | 2.00 | Efree | PASS | -| fceri_ji | 18.9 | 5.7 | — | — | 0.0 | 0.0 | 0.0 | 0.0 | 4.35 | 4.43 | 18.27 | 3.40 | RecMon_1 | PASS | - -## Summary - -| Metric | Count | -|--------|------:| -| PASS | 6 | -| FAIL | 2 | -| TIMEOUT | 0 | -| SKIP | 0 | -| **Total** | **8** | diff --git a/validate/results/rulemonkey_harness/feature_coverage_connect_subset.md b/validate/results/rulemonkey_harness/feature_coverage_connect_subset.md deleted file mode 100644 index 0f88998d..00000000 --- a/validate/results/rulemonkey_harness/feature_coverage_connect_subset.md +++ /dev/null @@ -1,34 +0,0 @@ -# Feature Coverage Benchmark Report - -Generated: 2026-05-11 15:28:57 - -**Summary: 1 PASS / 2 FAIL / 0 SKIP** - -## Feature Coverage - -| Model | Tier | Features | RM vs NFsim | RM vs ODE | Verdict | -|-------|------|----------|-------------|-----------|--------| -| combo_synth_degrade_equilibrium | combinations | Feature combination: synthesis + degradation + bin | FAIL | - | **FAIL** | -| ft_delete_molecules | base | Feature: DeleteMolecules keyword on degradation ru | PASS | - | PASS | -| ft_multi_product | base | Feature: rule producing multiple new molecules (on | FAIL | - | **FAIL** | - -## Detailed Results - -### combo_synth_degrade_equilibrium -- Tier: combinations -- RM reps: 5, wall time: 0.293s -- vs NFsim: max_z=17.33 (AB), tz_max=17.22 — **FAIL** -- **Overall: FAIL** - -### ft_delete_molecules -- Tier: base -- RM reps: 5, wall time: 0.288s -- vs NFsim: max_z=3.31 (B_free), tz_max=4.83 — **PASS** -- **Overall: PASS** - -### ft_multi_product -- Tier: base -- RM reps: 5, wall time: 0.265s -- vs NFsim: max_z=25.97 (BC), tz_max=2329.70 — **FAIL** -- **Overall: FAIL** - diff --git a/validate/results/rulemonkey_harness/feature_coverage_standard_subset.md b/validate/results/rulemonkey_harness/feature_coverage_standard_subset.md deleted file mode 100644 index 27756938..00000000 --- a/validate/results/rulemonkey_harness/feature_coverage_standard_subset.md +++ /dev/null @@ -1,34 +0,0 @@ -# Feature Coverage Benchmark Report - -Generated: 2026-05-11 15:29:02 - -**Summary: 1 PASS / 2 FAIL / 0 SKIP** - -## Feature Coverage - -| Model | Tier | Features | RM vs NFsim | RM vs ODE | Verdict | -|-------|------|----------|-------------|-----------|--------| -| combo_synth_degrade_equilibrium | combinations | Feature combination: synthesis + degradation + bin | FAIL | - | **FAIL** | -| ft_delete_molecules | base | Feature: DeleteMolecules keyword on degradation ru | PASS | - | PASS | -| ft_multi_product | base | Feature: rule producing multiple new molecules (on | FAIL | - | **FAIL** | - -## Detailed Results - -### combo_synth_degrade_equilibrium -- Tier: combinations -- RM reps: 5, wall time: 0.318s -- vs NFsim: max_z=9.79 (A_free), tz_max=5.94 — **FAIL** -- **Overall: FAIL** - -### ft_delete_molecules -- Tier: base -- RM reps: 5, wall time: 0.284s -- vs NFsim: max_z=3.62 (A_total), tz_max=4.82 — **PASS** -- **Overall: PASS** - -### ft_multi_product -- Tier: base -- RM reps: 5, wall time: 0.285s -- vs NFsim: max_z=7.42 (BC), tz_max=516.40 — **FAIL** -- **Overall: FAIL** - diff --git a/validate/results/rulemonkey_harness/tutorial_example_connect.md b/validate/results/rulemonkey_harness/tutorial_example_connect.md deleted file mode 100644 index 4b5809ab..00000000 --- a/validate/results/rulemonkey_harness/tutorial_example_connect.md +++ /dev/null @@ -1,37 +0,0 @@ -# RuleMonkey Benchmark Report - -**Date:** 2026-05-11 -**Commit:** `6d7f240 docs(readme,quickstart): document the cancellation hook for embedders` -**Reps per model:** 10 -**NFsim reference:** 100-rep ensemble - -## Correctness - -- **screen**: max |RM_mean - NFsim_mean| / (NFsim_std / sqrt(n_reps)) over all (time, obs) pairs. Fast early-warning metric. Flags values ≥ 5.0 as "suspicious" but does not determine verdict. Historical benchmarks used this as the pass/fail criterion; for models with many rare-event Size_N observables it is statistically unreliable. -- **tz_max**: max over observables of the z-score of the per-rep trapezoidal time integral, computed against precomputed NFsim stats in `ensemble/{model}.tint.tsv`. Collapses each 1001-point trajectory to one number per observable per rep, eliminating single-time-point coincidences. -- **T**: per-model verdict threshold = max(5.0, 1.2 × tz_p99), where tz_p99 is the 99th percentile of `tz_max` from self-splitting the NFsim replicates at n=10 (see `tests/reference/nfsim/noise_floor.tsv`; provenance and regen recipe in the same directory's `PROVENANCE.md`). Adaptive to each model's intrinsic rare-event noise floor. -- **std_ratio**: max(RM_std / NFsim_std) across observables with nontrivial variance. Diagnostic for variance consistency; not part of verdict. -- **verdict**: PASS if `tz_max < T`, FAIL otherwise. Degenerate-observable mismatches (both stds zero, values differ) fail unconditionally. - -## Efficiency - -- **nfsim_s**: NFsim mean wall time (100-rep, from reference campaign). -- **rm_s**: RM mean wall time (10-rep average). -- **ev/s**: SSA events per wall-second (throughput). -- **sel/fire/obs/upd**: Phase breakdown as % of RM engine time. - -## Results - -| model | nfsim_s | rm_s | events | ev/s | sel% | fire% | obs% | upd% | screen | tz_max | T | std_ratio | worst_obs | verdict | -|-------|--------:|-----:|-------:|-----:|-----:|------:|-----:|-----:|-------:|-------:|----:|----------:|-----------|---------| -| Tutorial_Example | 0.0 | 0.5 | — | — | 0.0 | 0.0 | 0.0 | 0.0 | 12.40 | 65.07 | 5.00 | 2.00 | A_phos_1 | FAIL | - -## Summary - -| Metric | Count | -|--------|------:| -| PASS | 0 | -| FAIL | 1 | -| TIMEOUT | 0 | -| SKIP | 0 | -| **Total** | **1** | diff --git a/validate/results/rulemonkey_harness/tutorial_example_standard.md b/validate/results/rulemonkey_harness/tutorial_example_standard.md deleted file mode 100644 index 8e3a20d4..00000000 --- a/validate/results/rulemonkey_harness/tutorial_example_standard.md +++ /dev/null @@ -1,37 +0,0 @@ -# RuleMonkey Benchmark Report - -**Date:** 2026-05-11 -**Commit:** `6d7f240 docs(readme,quickstart): document the cancellation hook for embedders` -**Reps per model:** 10 -**NFsim reference:** 100-rep ensemble - -## Correctness - -- **screen**: max |RM_mean - NFsim_mean| / (NFsim_std / sqrt(n_reps)) over all (time, obs) pairs. Fast early-warning metric. Flags values ≥ 5.0 as "suspicious" but does not determine verdict. Historical benchmarks used this as the pass/fail criterion; for models with many rare-event Size_N observables it is statistically unreliable. -- **tz_max**: max over observables of the z-score of the per-rep trapezoidal time integral, computed against precomputed NFsim stats in `ensemble/{model}.tint.tsv`. Collapses each 1001-point trajectory to one number per observable per rep, eliminating single-time-point coincidences. -- **T**: per-model verdict threshold = max(5.0, 1.2 × tz_p99), where tz_p99 is the 99th percentile of `tz_max` from self-splitting the NFsim replicates at n=10 (see `tests/reference/nfsim/noise_floor.tsv`; provenance and regen recipe in the same directory's `PROVENANCE.md`). Adaptive to each model's intrinsic rare-event noise floor. -- **std_ratio**: max(RM_std / NFsim_std) across observables with nontrivial variance. Diagnostic for variance consistency; not part of verdict. -- **verdict**: PASS if `tz_max < T`, FAIL otherwise. Degenerate-observable mismatches (both stds zero, values differ) fail unconditionally. - -## Efficiency - -- **nfsim_s**: NFsim mean wall time (100-rep, from reference campaign). -- **rm_s**: RM mean wall time (10-rep average). -- **ev/s**: SSA events per wall-second (throughput). -- **sel/fire/obs/upd**: Phase breakdown as % of RM engine time. - -## Results - -| model | nfsim_s | rm_s | events | ev/s | sel% | fire% | obs% | upd% | screen | tz_max | T | std_ratio | worst_obs | verdict | -|-------|--------:|-----:|-------:|-----:|-----:|------:|-----:|-----:|-------:|-------:|----:|----------:|-----------|---------| -| Tutorial_Example | 0.0 | 0.5 | — | — | 0.0 | 0.0 | 0.0 | 0.0 | 7.52 | 9.14 | 5.00 | 2.00 | R_phos_1 | FAIL | - -## Summary - -| Metric | Count | -|--------|------:| -| PASS | 0 | -| FAIL | 1 | -| TIMEOUT | 0 | -| SKIP | 0 | -| **Total** | **1** | diff --git a/validate/rulemonkey_nfsim_driver.py b/validate/rulemonkey_nfsim_driver.py deleted file mode 100755 index 5aa343da..00000000 --- a/validate/rulemonkey_nfsim_driver.py +++ /dev/null @@ -1,217 +0,0 @@ -#!/usr/bin/env python3 -"""Adapt NFsim CLI to RuleMonkey's rm_driver interface. - -This lets RuleMonkey's Python harnesses validate an NFsim binary directly -against the vendored NFsim reference ensembles. - -Expected rm_driver CLI: - rulemonkey_nfsim_driver.py [rm_flags...] - -Useful environment variables: - NFSIM_BIN Path to NFsim executable (default: /build/NFsim) - NFSIM_SIM_PARAMS Path to RuleMonkey sim_params.tsv for model-specific flags - NFSIM_EXTRA_FLAGS Extra NFsim CLI flags, e.g. "-connect" -""" - -from __future__ import annotations - -import csv -import os -import shlex -import subprocess -import sys -import tempfile -from pathlib import Path - - -REPO_ROOT = Path(__file__).resolve().parents[1] -DEFAULT_NFSIM_BIN = REPO_ROOT / "build" / "NFsim" - - -def _usage() -> str: - return ( - "Usage: rulemonkey_nfsim_driver.py " - "[rm_flags...]" - ) - - -def _strip_header_hash(fieldnames: list[str] | None) -> list[str] | None: - if fieldnames and fieldnames[0].startswith("#"): - fieldnames = list(fieldnames) - fieldnames[0] = fieldnames[0].lstrip("#") - return fieldnames - - -def _load_model_flags(sim_params_path: Path | None, model_name: str) -> list[str]: - if sim_params_path is None or not sim_params_path.exists(): - return [] - - with sim_params_path.open(newline="") as f: - reader = csv.DictReader(f, delimiter="\t") - reader.fieldnames = _strip_header_hash(reader.fieldnames) - for row in reader: - model = (row.get("model") or "").strip() - if not model or model.startswith("#"): - continue - if model != model_name: - continue - raw_flags = (row.get("nfsim_flags") or "").strip() - return _normalize_nfsim_flags(shlex.split(raw_flags)) - return [] - - -def _normalize_nfsim_flags(tokens: list[str]) -> list[str]: - """Drop flags that the harness already supplies explicitly. - - Keep model-specific behavior flags such as -cb, -bscb, -gml, and -utl. - """ - - drop_with_value = { - "-xml", - "-o", - "-sim", - "-eq", - "-oSteps", - "-oTimes", - "-seed", - "-ss", - "-rxnlog", - "-logbuffer", - "-maxcputime", - } - - keep_with_value = { - "-gml", - "-utl", - } - - out: list[str] = [] - i = 0 - while i < len(tokens): - tok = tokens[i] - if tok in keep_with_value: - if i + 1 < len(tokens): - out.extend([tok, tokens[i + 1]]) - i += 2 - continue - if tok in drop_with_value: - i += 2 - continue - out.append(tok) - i += 1 - return out - - -def _dedupe_flags(tokens: list[str]) -> list[str]: - """Keep first occurrence of standalone flags; last occurrence of value flags.""" - - value_flags = {"-gml", "-utl"} - standalone_seen: set[str] = set() - value_map: dict[str, str] = {} - ordered_values: list[str] = [] - out: list[str] = [] - - i = 0 - while i < len(tokens): - tok = tokens[i] - if tok in value_flags: - if i + 1 < len(tokens): - if tok not in value_map: - ordered_values.append(tok) - value_map[tok] = tokens[i + 1] - i += 2 - continue - if tok not in standalone_seen: - standalone_seen.add(tok) - out.append(tok) - i += 1 - - for tok in ordered_values: - out.extend([tok, value_map[tok]]) - return out - - -def _normalize_gdat_text(raw_text: str) -> str: - """Rewrite NFsim gdat text as clean tab-separated output.""" - - out_lines: list[str] = [] - for raw_line in raw_text.splitlines(): - line = raw_line.strip() - if not line: - continue - if line.startswith("#"): - parts = line.lstrip("#").strip().split() - out_lines.append("#" + "\t".join(parts)) - continue - parts = line.split() - out_lines.append("\t".join(parts)) - return "\n".join(out_lines) + ("\n" if out_lines else "") - - -def main() -> int: - if len(sys.argv) < 5: - print(_usage(), file=sys.stderr) - return 2 - - xml_path = Path(sys.argv[1]).resolve() - t_end = sys.argv[2] - n_steps = sys.argv[3] - seed = sys.argv[4] - passthrough_flags = sys.argv[5:] - - nfsim_bin = Path(os.environ.get("NFSIM_BIN", str(DEFAULT_NFSIM_BIN))).resolve() - sim_params_env = os.environ.get("NFSIM_SIM_PARAMS") - sim_params_path = Path(sim_params_env).resolve() if sim_params_env else None - extra_flags = shlex.split(os.environ.get("NFSIM_EXTRA_FLAGS", "")) - - if not nfsim_bin.exists(): - print(f"NFsim binary not found: {nfsim_bin}", file=sys.stderr) - return 2 - if not xml_path.exists(): - print(f"XML not found: {xml_path}", file=sys.stderr) - return 2 - - model_name = xml_path.stem - model_flags = _load_model_flags(sim_params_path, model_name) - merged_flags = _dedupe_flags(model_flags + passthrough_flags + extra_flags) - - with tempfile.TemporaryDirectory(prefix=f"rm_nfsim_{model_name}_") as td_raw: - out_gdat = Path(td_raw) / f"{model_name}.gdat" - cmd = [ - str(nfsim_bin), - "-xml", - str(xml_path), - "-sim", - str(t_end), - "-oSteps", - str(n_steps), - "-seed", - str(seed), - *merged_flags, - "-o", - str(out_gdat), - ] - result = subprocess.run( - cmd, - cwd=str(xml_path.parent), - capture_output=True, - text=True, - ) - - if result.returncode != 0 or not out_gdat.exists(): - print("NFsim driver wrapper failed.", file=sys.stderr) - print(f"Command: {' '.join(shlex.quote(x) for x in cmd)}", file=sys.stderr) - if result.stdout.strip(): - print("--- stdout ---", file=sys.stderr) - print(result.stdout.strip(), file=sys.stderr) - if result.stderr.strip(): - print("--- stderr ---", file=sys.stderr) - print(result.stderr.strip(), file=sys.stderr) - return 1 - - sys.stdout.write(_normalize_gdat_text(out_gdat.read_text())) - return 0 - - -if __name__ == "__main__": - raise SystemExit(main()) diff --git a/validate/validate.py b/validate/validate.py index f25464ef..40ab5fad 100644 --- a/validate/validate.py +++ b/validate/validate.py @@ -6,7 +6,10 @@ import fnmatch import sys import tempfile -import bionetgen +try: + import bionetgen +except ImportError: + bionetgen = None nIterations=15 nfsimPrePath='..' @@ -172,6 +175,9 @@ def _bng_generate(self, outputDirectory, fileNumber): # already generated, no need to rerun BNG return + if bionetgen is None: + self.fail('bionetgen Python package is required to generate XML fixtures') + bngFileName = os.path.join(outputDirectory, 'v{0}.bngl'.format(fileNumber)) bionetgen.run(bngFileName, out=outputDirectory, suppress=True) @@ -203,25 +209,42 @@ def _run_nfsim(self, outputDirectory, fileNumber, runOptions): expect_success=True, ) - def test_connectivity_preserves_seeded_tlbr_trajectory(self): - xmlPath = os.path.join(nfsimPrePath, 'test', 'tlbr', 'tlbr.xml') + def _assert_same_seed_connectivity_parity(self, xmlPath, runOptions, label): with tempfile.TemporaryDirectory() as tmpdir: - offPath = os.path.join(tmpdir, 'tlbr_off.gdat') - onPath = os.path.join(tmpdir, 'tlbr_on.gdat') + offPath = os.path.join(tmpdir, 'off.gdat') + onPath = os.path.join(tmpdir, 'on.gdat') - self._run_nfsim_xml(xmlPath, offPath, '-sim 1 -oSteps 100 -seed 1') - self._run_nfsim_xml(xmlPath, onPath, '-sim 1 -oSteps 100 -seed 1 -connect') + connectOptions = f'{runOptions} -connect'.strip() + self._run_nfsim_xml(xmlPath, offPath, runOptions) + self._run_nfsim_xml(xmlPath, onPath, connectOptions) offHeaders, offData = self._load_gdat(offPath) onHeaders, onData = self._load_gdat(onPath) - self.assertEqual(offHeaders, onHeaders, 'Connectivity regression changed TLBR output columns') - self.assertEqual(offData.shape, onData.shape, 'Connectivity regression changed TLBR output shape') + self.assertEqual(offHeaders, onHeaders, f'Connectivity regression changed {label} output columns') + self.assertEqual(offData.shape, onData.shape, f'Connectivity regression changed {label} output shape') self.assertTrue( np.array_equal(offData, onData), - 'Connectivity regression changed the same-seed TLBR trajectory' + f'Connectivity regression changed the same-seed {label} trajectory' ) + def test_connectivity_preserves_seeded_tlbr_trajectory(self): + self._assert_same_seed_connectivity_parity( + os.path.join(nfsimPrePath, 'test', 'tlbr', 'tlbr.xml'), + '-sim 1 -oSteps 100 -seed 1', + 'TLBR' + ) + + def test_connectivity_preserves_seeded_local_function_trajectory(self): + # testSuite/t3 exercises local-function membership updates on a much + # smaller model than AN_chemotaxis while still reproducing the + # master-vs-connect divergence fixed by this branch. + self._assert_same_seed_connectivity_parity( + os.path.join(nfsimPrePath, 'test', 'testSuite', 't3.xml'), + '-sim 1 -oSteps 20 -seed 1', + 'testSuite t3' + ) + def test_issue48_ring_unbinding_requires_disconnection(self): outputDirectory = mfolder fileNumber = '37'