From e6aa27f45617bd3f60872a5610edefea8b14b391 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Mon, 11 May 2026 11:35:24 +0100 Subject: [PATCH 1/3] perf(components): regenerate container configs once per top-level install/update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `install` and `update` were re-running `nextflow inspect` (via `try_generate_container_configs`) once for every transitive module dependency instead of once per command. Each call is JVM startup plus pipeline parse (~17 s), so a subworkflow with 18 module deps spent ~5 minutes in container-config regeneration alone. - `install.py`: gate the call on `not silent`. Recursive installs from `install_included_components` pass `silent=True`. Also fixes top-level subworkflow installs, which previously skipped regeneration entirely because of the `component_type == "modules"` guard. - `update.py`: hoist the call out of the per-component loop to after it, and skip when `--save-diff` is in effect (no changes are applied to the working tree). Measured against nf-core/rnaseq `pr-1680`, warm cache: subworkflows install --force bam_dedup_umi 5m27s → 39s subworkflows update --update-deps --no-preview ... 4m 4s → 44s Repeating "Could not find meta.yml" warning lines 36 → 2 Two new tests mock `try_generate_container_configs` and assert it is invoked exactly once for an install/update that touches multiple module dependencies. Co-Authored-By: Claude Opus 4.7 (1M context) --- nf_core/components/install.py | 12 +++++++++--- nf_core/components/update.py | 9 ++++++--- tests/subworkflows/test_install.py | 13 +++++++++++++ tests/subworkflows/test_update.py | 15 +++++++++++++++ 4 files changed, 43 insertions(+), 6 deletions(-) diff --git a/nf_core/components/install.py b/nf_core/components/install.py index 8350159320..5323d22c85 100644 --- a/nf_core/components/install.py +++ b/nf_core/components/install.py @@ -169,9 +169,15 @@ def install(self, component: str | dict[str, str], silent: bool = False) -> bool # Install included modules and subworkflows self.install_included_components(component_dir) - # Regenerate container configuration files for the pipeline when modules are installed - if self.component_type == "modules": - try_generate_container_configs(self.directory, component_dir, component) + # Regenerate container configuration files for the pipeline once per + # top-level install. Recursive installs of dependent modules pass + # silent=True and would otherwise re-run `nextflow inspect` for every + # transitive dependency. + if not silent: + if self.component_type == "modules": + try_generate_container_configs(self.directory, component_dir, component) + else: + try_generate_container_configs(self.directory) if not silent: modules_json.load() diff --git a/nf_core/components/update.py b/nf_core/components/update.py index e679643735..2a0f04e991 100644 --- a/nf_core/components/update.py +++ b/nf_core/components/update.py @@ -309,9 +309,6 @@ def update(self, component=None, silent=False, updated=None, check_diff_exist=Tr self.modules_json.update(self.component_type, modules_repo, component, version, installed_by=None) updated.append(component) - # Regenerate container configuration files for the pipeline when modules are updated - if self.component_type == "modules": - try_generate_container_configs(self.directory) recursive_update = True modules_to_update, subworkflows_to_update = self.get_components_to_update(component) if not silent and len(modules_to_update + subworkflows_to_update) > 0 and not self.update_all: @@ -356,6 +353,12 @@ def update(self, component=None, silent=False, updated=None, check_diff_exist=Tr self.modules_json.load() self.modules_json.dump(run_prettier=True) + # Regenerate container configuration files for the pipeline once per + # top-level update. The previous in-loop call ran `nextflow inspect` + # for every transitive linked component, which dominated wall time. + if not silent and updated and not self.save_diff_fn: + try_generate_container_configs(self.directory) + return exit_value def get_single_component_info(self, component): diff --git a/tests/subworkflows/test_install.py b/tests/subworkflows/test_install.py index 91263d2847..7812bfcd01 100644 --- a/tests/subworkflows/test_install.py +++ b/tests/subworkflows/test_install.py @@ -1,4 +1,5 @@ from pathlib import Path +from unittest import mock import pytest @@ -191,6 +192,18 @@ def test_subworkflows_install_tracking_added_super_subworkflow(self): ]["installed_by"] ) == sorted(["subworkflows", "bam_sort_stats_samtools"]) + @mock.patch("nf_core.components.install.try_generate_container_configs") + def test_subworkflows_install_regenerates_container_configs_once(self, mock_gen): + """Container config regeneration runs once for a top-level subworkflow install, + not once per transitive module dependency. ``bam_sort_stats_samtools`` pulls + in five samtools modules plus the ``bam_stats_samtools`` sub-subworkflow, so + the previous per-component behaviour would have called this many times.""" + assert self.subworkflow_install.install("bam_sort_stats_samtools") is not False + assert mock_gen.call_count == 1, ( + f"Expected try_generate_container_configs to run once per top-level " + f"install, got {mock_gen.call_count} calls" + ) + def test_subworkflows_install_alternate_remote(self): """Test installing a module from a different remote with the same organization path""" install_obj = SubworkflowInstall( diff --git a/tests/subworkflows/test_update.py b/tests/subworkflows/test_update.py index 936b61c782..a1cb36838f 100644 --- a/tests/subworkflows/test_update.py +++ b/tests/subworkflows/test_update.py @@ -32,6 +32,21 @@ def test_install_and_update(self): assert update_obj.update("bam_stats_samtools") is True assert cmp_component(tmpdir, sw_path) is True + @mock.patch("nf_core.components.update.try_generate_container_configs") + def test_subworkflow_update_regenerates_container_configs_once(self, mock_gen): + """Container config regeneration runs once for a top-level update, not once + per linked component. Updating ``fastq_align_bowtie2`` from an old SHA with + ``--update-deps`` cascades to several samtools modules; before this fix that + triggered ``nextflow inspect`` per dependency.""" + assert self.subworkflow_install_old.install("fastq_align_bowtie2") + mock_gen.reset_mock() + update_obj = SubworkflowUpdate(self.pipeline_dir, show_diff=False, update_deps=True) + assert update_obj.update("fastq_align_bowtie2") is True + assert mock_gen.call_count == 1, ( + f"Expected try_generate_container_configs to run once per top-level " + f"update, got {mock_gen.call_count} calls" + ) + def test_install_at_hash_and_update(self): """Installs an old version of a subworkflow in the pipeline and updates it""" assert self.subworkflow_install_old.install("fastq_align_bowtie2") From 027c7e1df68d70ce5897981c3dd17f538a7c9116 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Mon, 11 May 2026 11:40:47 +0100 Subject: [PATCH 2/3] style: ruff format test_update.py Co-Authored-By: Claude Opus 4.7 (1M context) --- tests/subworkflows/test_update.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/subworkflows/test_update.py b/tests/subworkflows/test_update.py index a1cb36838f..57dbc6766b 100644 --- a/tests/subworkflows/test_update.py +++ b/tests/subworkflows/test_update.py @@ -43,8 +43,7 @@ def test_subworkflow_update_regenerates_container_configs_once(self, mock_gen): update_obj = SubworkflowUpdate(self.pipeline_dir, show_diff=False, update_deps=True) assert update_obj.update("fastq_align_bowtie2") is True assert mock_gen.call_count == 1, ( - f"Expected try_generate_container_configs to run once per top-level " - f"update, got {mock_gen.call_count} calls" + f"Expected try_generate_container_configs to run once per top-level update, got {mock_gen.call_count} calls" ) def test_install_at_hash_and_update(self): From 750507b7ec5e11de3e058b5e00fd55565f0c1370 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Mon, 11 May 2026 11:47:12 +0100 Subject: [PATCH 3/3] test+style: address review (more regression tests, terser comments) - Add a regression test for `nf-core modules install`: regen runs once for a single top-level module install. - Add a regression test for `--save-diff`: regen must be skipped during a dry-run update so the pipeline's containers_*.config files don't drift while the user is still reviewing the patch. - Shorten the in-source comments at the two call sites. Co-Authored-By: Claude Opus 4.7 (1M context) --- nf_core/components/install.py | 6 ++---- nf_core/components/update.py | 5 ++--- tests/modules/test_install.py | 7 +++++++ tests/subworkflows/test_install.py | 10 ++-------- tests/subworkflows/test_update.py | 19 ++++++++++++------- 5 files changed, 25 insertions(+), 22 deletions(-) diff --git a/nf_core/components/install.py b/nf_core/components/install.py index 5323d22c85..282ce58b22 100644 --- a/nf_core/components/install.py +++ b/nf_core/components/install.py @@ -169,10 +169,8 @@ def install(self, component: str | dict[str, str], silent: bool = False) -> bool # Install included modules and subworkflows self.install_included_components(component_dir) - # Regenerate container configuration files for the pipeline once per - # top-level install. Recursive installs of dependent modules pass - # silent=True and would otherwise re-run `nextflow inspect` for every - # transitive dependency. + # Regenerate container configs once per top-level invocation + # (recursive installs pass silent=True). if not silent: if self.component_type == "modules": try_generate_container_configs(self.directory, component_dir, component) diff --git a/nf_core/components/update.py b/nf_core/components/update.py index 2a0f04e991..6357982fd1 100644 --- a/nf_core/components/update.py +++ b/nf_core/components/update.py @@ -353,9 +353,8 @@ def update(self, component=None, silent=False, updated=None, check_diff_exist=Tr self.modules_json.load() self.modules_json.dump(run_prettier=True) - # Regenerate container configuration files for the pipeline once per - # top-level update. The previous in-loop call ran `nextflow inspect` - # for every transitive linked component, which dominated wall time. + # Regenerate container configs once per top-level invocation + # (recursive updates pass silent=True; skip in --save-diff dry runs). if not silent and updated and not self.save_diff_fn: try_generate_container_configs(self.directory) diff --git a/tests/modules/test_install.py b/tests/modules/test_install.py index 92d30a494f..94cf913acb 100644 --- a/tests/modules/test_install.py +++ b/tests/modules/test_install.py @@ -1,4 +1,5 @@ from pathlib import Path +from unittest import mock import pytest @@ -44,6 +45,12 @@ def test_modules_install_trimgalore_twice(self): self.mods_install.install("trimgalore") assert self.mods_install.install("trimgalore") is True + @mock.patch("nf_core.components.install.try_generate_container_configs") + def test_modules_install_regenerates_container_configs_once(self, mock_gen): + """Container config regen runs exactly once for a top-level module install.""" + assert self.mods_install.install("trimgalore") is not False + assert mock_gen.call_count == 1 + def test_modules_install_from_gitlab(self): """Test installing a module from GitLab""" assert self.mods_install_gitlab.install("fastqc") is True diff --git a/tests/subworkflows/test_install.py b/tests/subworkflows/test_install.py index 7812bfcd01..2f25e2553e 100644 --- a/tests/subworkflows/test_install.py +++ b/tests/subworkflows/test_install.py @@ -194,15 +194,9 @@ def test_subworkflows_install_tracking_added_super_subworkflow(self): @mock.patch("nf_core.components.install.try_generate_container_configs") def test_subworkflows_install_regenerates_container_configs_once(self, mock_gen): - """Container config regeneration runs once for a top-level subworkflow install, - not once per transitive module dependency. ``bam_sort_stats_samtools`` pulls - in five samtools modules plus the ``bam_stats_samtools`` sub-subworkflow, so - the previous per-component behaviour would have called this many times.""" + """Container config regen runs once for a top-level subworkflow install with module deps.""" assert self.subworkflow_install.install("bam_sort_stats_samtools") is not False - assert mock_gen.call_count == 1, ( - f"Expected try_generate_container_configs to run once per top-level " - f"install, got {mock_gen.call_count} calls" - ) + assert mock_gen.call_count == 1 def test_subworkflows_install_alternate_remote(self): """Test installing a module from a different remote with the same organization path""" diff --git a/tests/subworkflows/test_update.py b/tests/subworkflows/test_update.py index 57dbc6766b..b5dc1e96c4 100644 --- a/tests/subworkflows/test_update.py +++ b/tests/subworkflows/test_update.py @@ -34,17 +34,22 @@ def test_install_and_update(self): @mock.patch("nf_core.components.update.try_generate_container_configs") def test_subworkflow_update_regenerates_container_configs_once(self, mock_gen): - """Container config regeneration runs once for a top-level update, not once - per linked component. Updating ``fastq_align_bowtie2`` from an old SHA with - ``--update-deps`` cascades to several samtools modules; before this fix that - triggered ``nextflow inspect`` per dependency.""" + """Container config regen runs once for a top-level update, not per linked component.""" assert self.subworkflow_install_old.install("fastq_align_bowtie2") mock_gen.reset_mock() update_obj = SubworkflowUpdate(self.pipeline_dir, show_diff=False, update_deps=True) assert update_obj.update("fastq_align_bowtie2") is True - assert mock_gen.call_count == 1, ( - f"Expected try_generate_container_configs to run once per top-level update, got {mock_gen.call_count} calls" - ) + assert mock_gen.call_count == 1 + + @mock.patch("nf_core.components.update.try_generate_container_configs") + def test_subworkflow_update_save_diff_skips_container_configs(self, mock_gen): + """``--save-diff`` is a dry run; container configs must not be regenerated.""" + assert self.subworkflow_install_old.install("fastq_align_bowtie2") + mock_gen.reset_mock() + patch_path = Path(self.pipeline_dir, "fastq_align_bowtie2.patch") + update_obj = SubworkflowUpdate(self.pipeline_dir, save_diff_fn=patch_path, update_deps=True) + assert update_obj.update("fastq_align_bowtie2") is True + assert mock_gen.call_count == 0 def test_install_at_hash_and_update(self): """Installs an old version of a subworkflow in the pipeline and updates it"""