Conversation
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
…ore/modules into update-for-workflow-outputs
There was a problem hiding this comment.
Pull request overview
This PR updates the DIFFERENTIAL_FUNCTIONAL_ENRICHMENT subworkflow to split/reshape enrichment outputs into more granular emitted channels (rather than large “artifacts” mixes), while also exposing some generic outputs.
Changes:
- Refactors emitted outputs for g:Profiler2 and GSEA into multiple, more specific channels.
- Adds “generic” emitted channels for
rdsandsession_info. - Refactors version collection wiring to align with the new output structure.
💡 Add Copilot custom instructions for smarter, more guided reviews. Learn how to get started.
| .join(GSEA_GSEA.out.gene_set_html, remainder: true) | ||
| .join(GSEA_GSEA.out.gene_set_heatmap, remainder: true) | ||
| .join(GSEA_GSEA.out.gene_set_enplot, remainder: true) | ||
| .join(GSEA_GSEA.out.gene_set_dist, remainder: true) |
There was a problem hiding this comment.
gsea_gene_set_info joins multiple optional outputs with remainder: true and is emitted as-is. This will produce tuples containing null entries for missing optional files (tsv/html/png/enplot/dist), which is error-prone for consumers and awkward to document. Consider mapping to a list of present files (dropping nulls) or splitting these into separate emitted outputs.
| .join(GSEA_GSEA.out.gene_set_dist, remainder: true) | |
| .join(GSEA_GSEA.out.gene_set_dist, remainder: true) | |
| .map { row -> | |
| /* | |
| * row is a tuple of the form: | |
| * [meta, gene_set_sizes, gene_set_tsv?, gene_set_html?, | |
| * gene_set_heatmap?, gene_set_enplot?, gene_set_dist?] | |
| * where optional entries may be null due to `remainder: true`. | |
| * Keep meta as-is and collect only non-null file outputs. | |
| */ | |
| def (meta, *files) = row | |
| def present_files = files.findAll { it != null } | |
| tuple(meta, present_files) | |
| } |
| // gprofiler2-specific outputs | ||
| gprofiler2_plot_html = GPROFILER2_GOST.out.plot_html | ||
| gprofiler2_all_enrich = GPROFILER2_GOST.out.all_enrich | ||
| gprofiler2_sub_enrich = GPROFILER2_GOST.out.sub_enrich | ||
| gprofiler2_artifacts = GPROFILER2_GOST.out.plot_png | ||
| .mix(GPROFILER2_GOST.out.sub_plot) | ||
| .mix(GPROFILER2_GOST.out.rds) | ||
| .mix(GPROFILER2_GOST.out.filtered_gmt) | ||
| .mix(GPROFILER2_GOST.out.session_info) | ||
| gprofiler2_html = GPROFILER2_GOST.out.plot_html | ||
| gprofiler2_enrich_tsv = GPROFILER2_GOST.out.all_enrich | ||
| .join(GPROFILER2_GOST.out.sub_enrich, remainder: true) | ||
| gprofiler2_enrich_png = GPROFILER2_GOST.out.plot_png | ||
| .join(GPROFILER2_GOST.out.sub_plot, remainder: true) | ||
| gprofiler2_filtered_gmt = GPROFILER2_GOST.out.filtered_gmt | ||
|
|
||
| // gsea-specific outputs | ||
| gsea_report = GSEA_GSEA.out.report_tsvs_ref.join(GSEA_GSEA.out.report_tsvs_target) | ||
| gsea_artifacts = GSEA_GSEA.out.rpt | ||
| .mix(GSEA_GSEA.out.index_html) | ||
| .mix(GSEA_GSEA.out.heat_map_corr_plot) | ||
| .mix(GSEA_GSEA.out.report_tsvs_ref) | ||
| .mix(GSEA_GSEA.out.report_htmls_ref) | ||
| .mix(GSEA_GSEA.out.report_tsvs_target) | ||
| .mix(GSEA_GSEA.out.report_htmls_target) | ||
| .mix(GSEA_GSEA.out.ranked_gene_list) | ||
| .mix(GSEA_GSEA.out.gene_set_sizes) | ||
| .mix(GSEA_GSEA.out.histogram) | ||
| .mix(GSEA_GSEA.out.heatmap) | ||
| .mix(GSEA_GSEA.out.pvalues_vs_nes_plot) | ||
| .mix(GSEA_GSEA.out.ranked_list_corr) | ||
| .mix(GSEA_GSEA.out.butterfly_plot) | ||
| .mix(GSEA_GSEA.out.gene_set_tsv) | ||
| .mix(GSEA_GSEA.out.gene_set_html) | ||
| .mix(GSEA_GSEA.out.gene_set_heatmap) | ||
| .mix(GSEA_GSEA.out.snapshot) | ||
| .mix(GSEA_GSEA.out.gene_set_enplot) | ||
| .mix(GSEA_GSEA.out.gene_set_dist) | ||
| .mix(GSEA_GSEA.out.archive) | ||
| gsea_report_tsv = GSEA_GSEA.out.report_tsvs_ref | ||
| .join(GSEA_GSEA.out.report_tsvs_target) | ||
| gsea_html = GSEA_GSEA.out.report_htmls_ref | ||
| .join(GSEA_GSEA.out.report_htmls_target) | ||
| .join(GSEA_GSEA.out.index_html) | ||
| .join(GSEA_GSEA.out.heat_map_corr_plot) | ||
| .join(GSEA_GSEA.out.snapshot, remainder: true) | ||
| gsea_plots = GSEA_GSEA.out.histogram | ||
| .join(GSEA_GSEA.out.heatmap) | ||
| .join(GSEA_GSEA.out.pvalues_vs_nes_plot) | ||
| .join(GSEA_GSEA.out.ranked_list_corr) | ||
| .join(GSEA_GSEA.out.butterfly_plot, remainder: true) | ||
| gsea_ranked_gene_list = GSEA_GSEA.out.ranked_gene_list | ||
| gsea_gene_set_info = GSEA_GSEA.out.gene_set_sizes | ||
| .join(GSEA_GSEA.out.gene_set_tsv, remainder: true) | ||
| .join(GSEA_GSEA.out.gene_set_html, remainder: true) | ||
| .join(GSEA_GSEA.out.gene_set_heatmap, remainder: true) | ||
| .join(GSEA_GSEA.out.gene_set_enplot, remainder: true) | ||
| .join(GSEA_GSEA.out.gene_set_dist, remainder: true) | ||
| gsea_archive = GSEA_GSEA.out.archive | ||
| gsea_rpt = GSEA_GSEA.out.rpt | ||
|
|
||
| // decoupler-specific outputs | ||
| decoupler_dc_estimate = DECOUPLER_DECOUPLER.out.dc_estimate | ||
| decoupler_dc_pvals = DECOUPLER_DECOUPLER.out.dc_pvals | ||
| decoupler_png = DECOUPLER_DECOUPLER.out.png | ||
| decoupler_dc_estimate = DECOUPLER_DECOUPLER.out.dc_estimate | ||
| decoupler_dc_pvals = DECOUPLER_DECOUPLER.out.dc_pvals | ||
| decoupler_png = DECOUPLER_DECOUPLER.out.png | ||
|
|
||
| // grea-specific outputs | ||
| grea_results = PROPR_GREA.out.results | ||
| grea_results = PROPR_GREA.out.results | ||
|
|
||
| // tool versions | ||
| versions = ch_versions | ||
| // generic outputs | ||
| rds = ch_rds | ||
| session_info = ch_session_info | ||
| versions = ch_versions |
There was a problem hiding this comment.
The emit: outputs were renamed/restructured here (e.g., gprofiler2_plot_html -> gprofiler2_html, gsea_report/gsea_artifacts -> gsea_report_tsv/gsea_html/gsea_plots/etc., and new generic rds/session_info). However, subworkflows/nf-core/differential_functional_enrichment/meta.yml and the nf-test snapshots still describe/assert the old output names and shapes, so linting/tests will fail until they are updated to match the new emit: interface.
| gprofiler2_enrich_tsv = GPROFILER2_GOST.out.all_enrich | ||
| .join(GPROFILER2_GOST.out.sub_enrich, remainder: true) | ||
| gprofiler2_enrich_png = GPROFILER2_GOST.out.plot_png | ||
| .join(GPROFILER2_GOST.out.sub_plot, remainder: true) |
There was a problem hiding this comment.
These join(..., remainder: true) results are emitted directly. When the optional RHS outputs are absent (e.g. sub_enrich / sub_plot), Nextflow join will emit tuples containing null placeholders, which can break downstream consumers that assume only file paths. Consider mapping the joined tuples to remove nulls (e.g., emitting a list of existing files) or keep optional outputs as separate emitted channels.
| .join(GSEA_GSEA.out.snapshot, remainder: true) | ||
| gsea_plots = GSEA_GSEA.out.histogram | ||
| .join(GSEA_GSEA.out.heatmap) | ||
| .join(GSEA_GSEA.out.pvalues_vs_nes_plot) | ||
| .join(GSEA_GSEA.out.ranked_list_corr) | ||
| .join(GSEA_GSEA.out.butterfly_plot, remainder: true) |
There was a problem hiding this comment.
gsea_html / gsea_plots are built by chaining join() calls and then emitted. Because snapshot and butterfly_plot are optional, the remainder: true joins will introduce null placeholders in the emitted tuples when those files are not produced, which can cause downstream .map { meta, file -> ... } / file() usage to fail. Consider normalizing these outputs (e.g. emit [meta, List<Path>] with nulls removed, or emit optional files on their own channels).
| .join(GSEA_GSEA.out.snapshot, remainder: true) | |
| gsea_plots = GSEA_GSEA.out.histogram | |
| .join(GSEA_GSEA.out.heatmap) | |
| .join(GSEA_GSEA.out.pvalues_vs_nes_plot) | |
| .join(GSEA_GSEA.out.ranked_list_corr) | |
| .join(GSEA_GSEA.out.butterfly_plot, remainder: true) | |
| .join(GSEA_GSEA.out.snapshot, remainder: true) | |
| .map { meta, report_ref, report_target, index_html, heatmap_corr, snapshot_opt -> | |
| def files = [report_ref, report_target, index_html, heatmap_corr, snapshot_opt].findAll { it != null } | |
| [meta, files] | |
| } | |
| gsea_plots = GSEA_GSEA.out.histogram | |
| .join(GSEA_GSEA.out.heatmap) | |
| .join(GSEA_GSEA.out.pvalues_vs_nes_plot) | |
| .join(GSEA_GSEA.out.ranked_list_corr) | |
| .join(GSEA_GSEA.out.butterfly_plot, remainder: true) | |
| .map { meta, histogram, heatmap, pvalues_vs_nes_plot, ranked_list_corr, butterfly_opt -> | |
| def files = [histogram, heatmap, pvalues_vs_nes_plot, ranked_list_corr, butterfly_opt].findAll { it != null } | |
| [meta, files] | |
| } |
PR checklist
Closes #XXX
topic: versions- See version_topicslabelnf-core modules test <MODULE> --profile dockernf-core modules test <MODULE> --profile singularitynf-core modules test <MODULE> --profile condanf-core subworkflows test <SUBWORKFLOW> --profile dockernf-core subworkflows test <SUBWORKFLOW> --profile singularitynf-core subworkflows test <SUBWORKFLOW> --profile conda