From 992c5bfe24388e8c92a646520891da83d508e8b3 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Mon, 14 Apr 2025 17:51:47 -0400 Subject: [PATCH 1/4] chore: use ccbr_tools@v0.2.3 for CLI implementation --- pyproject.toml | 2 +- src/__main__.py | 118 ++++++++++++++++++--------- src/util.py | 208 ------------------------------------------------ 3 files changed, 82 insertions(+), 246 deletions(-) delete mode 100644 src/util.py diff --git a/pyproject.toml b/pyproject.toml index 3ba17e0..f9f1da2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,7 +35,7 @@ classifiers = [ ] requires-python = ">=3.10" dependencies = [ - "ccbr_tools@git+https://github.com/CCBR/Tools", + "ccbr_tools@git+https://github.com/CCBR/Tools@v0.2", "cffconvert >= 2.0.0", "Click >= 8.1.3", "pyyaml >= 6.0" diff --git a/src/__main__.py b/src/__main__.py index 0614e60..1cd478a 100644 --- a/src/__main__.py +++ b/src/__main__.py @@ -1,40 +1,44 @@ """ Entrypoint for LOGAN CLI - -Check out the wiki for a detailed look at customizing this file: -https://github.com/beardymcjohnface/Snaketool/wiki/Customising-your-Snaketool """ -import os import click -from .util import ( - nek_base, - get_version, - copy_config, - OrderedCommands, - run_nextflow, - print_citation, -) +import os +import pathlib + +import ccbr_tools.pkg_util +import ccbr_tools.pipeline.util +import ccbr_tools.pipeline.nextflow -def common_options(func): - """Common options decorator for use with click commands.""" - options = [ - click.argument("nextflow_args", nargs=-1), - ] - for option in reversed(options): - func = option(func) - return func +def repo_base(*paths): + basedir = pathlib.Path(__file__).absolute().parent.parent + return basedir.joinpath(*paths) + + +def print_citation_flag(ctx, param, value): + if not value or ctx.resilient_parsing: + return + ccbr_tools.pkg_util.print_citation( + citation_file=repo_base("CITATION.cff"), output_format="bibtex" + ) + ctx.exit() @click.group( - cls=OrderedCommands, context_settings=dict(help_option_names=["-h", "--help"]) + cls=ccbr_tools.pkg_util.CustomClickGroup, + context_settings=dict(help_option_names=["-h", "--help"]), +) +@click.version_option( + ccbr_tools.pkg_util.get_version(repo_base=repo_base), + "-v", + "--version", + is_flag=True, ) -@click.version_option(get_version(), "-v", "--version", is_flag=True) @click.option( "--citation", is_flag=True, - callback=print_citation, + callback=print_citation_flag, expose_value=False, is_eager=True, help="Print the citation in bibtex format and exit.", @@ -63,6 +67,8 @@ def cli(): """ +# DEVELOPER NOTE: cannot use single-hyphen options e.g. -m, -o or else it may clash with nextflow's cli options +# e.g. -profile clashed with -o (--output) and caused the command to be parsed as "-pr -o file" @click.command( epilog=help_msg_extra, context_settings=dict( @@ -74,9 +80,16 @@ def cli(): "main_path", help="Path to the logan main.nf file or the GitHub repo (CCBR/LOGAN). Defaults to the version installed in the $PATH.", type=str, - default=nek_base(os.path.join("main.nf")), + default=repo_base("main.nf"), show_default=True, ) +@click.option( + "--output", + help="Output directory path for logan init & run. Equivalient to nextflow launchDir. Defaults to your current working directory.", + type=click.Path(file_okay=False, dir_okay=True, writable=True), + default=pathlib.Path.cwd(), + show_default=False, +) @click.option( "--mode", "_mode", @@ -85,8 +98,17 @@ def cli(): default="local", show_default=True, ) -@common_options -def run(main_path, _mode, **kwargs): +@click.option( + "--forceall", + "-F", + "force_all", + help="Force all processes to run (i.e. do not use nextflow -resume)", + is_flag=True, + default=False, + show_default=True, +) +@click.argument("nextflow_args", nargs=-1) +def run(main_path, output, _mode, force_all, **kwargs): """Run the workflow""" if ( # this is the only acceptable github repo option for logan main_path != "CCBR/LOGAN" @@ -96,21 +118,41 @@ def run(main_path, _mode, **kwargs): raise FileNotFoundError( f"Path to the logan main.nf file not found: {main_path}" ) - - run_nextflow( - nextfile_path=main_path, - mode=_mode, - **kwargs, - ) + output_dir = output if isinstance(output, pathlib.Path) else pathlib.Path(output) + ccbr_tools.pkg_util.msg_box("Output Directory", errmsg=str(output_dir)) + if not output_dir.is_dir() or not (output_dir / "nextflow.config").exists(): + raise FileNotFoundError( + f"output directory not initialized: {output_dir}. Hint: you must initialize the output directory with `logan init --output {output_dir}`" + ) + current_wd = os.getcwd() + try: + os.chdir(output_dir) + ccbr_tools.pipeline.nextflow.run( + nextfile_path=main_path, + mode=_mode, + force_all=force_all, + pipeline_name="LOGAN", + **kwargs, + ) + finally: + os.chdir(current_wd) @click.command() -def init(**kwargs): - """Initialize the working directory by copying the system default config files""" +@click.option( + "--output", + help="Output directory path for logan init & run. Equivalient to nextflow launchDir. Defaults to your current working directory.", + type=click.Path(file_okay=False, dir_okay=True, writable=True), + default=pathlib.Path.cwd(), + show_default=False, +) +def init(output): + """Initialize the launch directory by copying the system default config files""" + output_dir = output if isinstance(output, pathlib.Path) else pathlib.Path(output) + ccbr_tools.pkg_util.msg_box(f"Initializing LOGAN in {output_dir}") + (output_dir / "log/").mkdir(parents=True, exist_ok=True) paths = ("nextflow.config", "conf/", "assets/") - copy_config(paths) - if not os.path.exists("log/"): - os.mkdir("log/") + ccbr_tools.pipeline.util.copy_config(paths, repo_base=repo_base, outdir=output_dir) cli.add_command(run) @@ -121,5 +163,7 @@ def main(): cli() +cli(prog_name="logan") + if __name__ == "__main__": main() diff --git a/src/util.py b/src/util.py deleted file mode 100644 index 0df6bb3..0000000 --- a/src/util.py +++ /dev/null @@ -1,208 +0,0 @@ -from cffconvert.cli.create_citation import create_citation -from cffconvert.cli.validate_or_write_output import validate_or_write_output -from time import localtime, strftime - -import click -import collections.abc -import os -import pprint -import shutil -import stat -import subprocess -import sys -import yaml - - -def nek_base(rel_path): - basedir = os.path.split(os.path.dirname(os.path.realpath(__file__)))[0] - return os.path.join(basedir, rel_path) - - -def get_version(): - with open(nek_base("VERSION"), "r") as f: - version = f.readline() - return version - - -def print_citation(context, param, value): - if not value or context.resilient_parsing: - return - citation = create_citation(nek_base("CITATION.cff"), None) - # click.echo(citation._implementation.cffobj['message']) - validate_or_write_output(None, "bibtex", False, citation) - context.exit() - - -def msg(err_message): - tstamp = strftime("[%Y:%m:%d %H:%M:%S] ", localtime()) - click.echo(tstamp + err_message, err=True) - - -def msg_box(splash, errmsg=None): - msg("-" * (len(splash) + 4)) - msg(f"| {splash} |") - msg(("-" * (len(splash) + 4))) - if errmsg: - click.echo("\n" + errmsg, err=True) - - -def append_config_block(nf_config="nextflow.config", scope=None, **kwargs): - with open(nf_config, "a") as f: - f.write(scope.rstrip() + "{" + "\n") - for k in kwargs: - f.write(f"{k} = {kwargs[k]}\n") - f.write("}\n") - - -def copy_config(config_paths, overwrite=True): - msg(f"Copying default config files to current working directory") - for local_config in config_paths: - system_config = nek_base(local_config) - if os.path.isfile(system_config): - shutil.copyfile(system_config, local_config) - elif os.path.isdir(system_config): - shutil.copytree(system_config, local_config, dirs_exist_ok=overwrite) - else: - raise FileNotFoundError(f"Cannot copy {system_config} to {local_config}") - - -def read_config(file): - with open(file, "r") as stream: - _config = yaml.safe_load(stream) - return _config - - -def update_config(config, overwrite_config): - def _update(d, u): - for key, value in u.items(): - if isinstance(value, collections.abc.Mapping): - d[key] = _update(d.get(key, {}), value) - else: - d[key] = value - return d - - _update(config, overwrite_config) - - -def write_config(_config, file): - msg(f"Writing runtime config file to {file}") - with open(file, "w") as stream: - yaml.dump(_config, stream) - - -def chmod_bins_exec(): - """Ensure that all files in bin/ are executable. - - It appears that setuptools strips executable permissions from package_data files, - yet post-install scripts are not possible with the pyproject.toml format. - So this function will run when `run()` is called. - Without this hack, nextflow processes that call scripts in bin/ fail. - - https://stackoverflow.com/questions/18409296/package-data-files-with-executable-permissions - https://github.com/pypa/setuptools/issues/2041 - https://stackoverflow.com/questions/76320274/post-install-script-for-pyproject-toml-projects - """ - bin_dir = nek_base("bin/") - for filename in os.listdir(bin_dir): - bin_path = os.path.join(bin_dir, filename) - if os.path.isfile(bin_path): - file_stat = os.stat(bin_path) - # below is equivalent to `chmod +x` - os.chmod( - bin_path, file_stat.st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH - ) - - -class OrderedCommands(click.Group): - """Preserve the order of subcommands when printing --help""" - - def list_commands(self, ctx: click.Context): - return list(self.commands) - - -def scontrol_show(): - scontrol_dict = dict() - scontrol_out = subprocess.run( - "scontrol show config", shell=True, capture_output=True, text=True - ).stdout - if len(scontrol_out) > 0: - for line in scontrol_out.split("\n"): - line_split = line.split("=") - if len(line_split) > 1: - scontrol_dict[line_split[0].strip()] = line_split[1].strip() - return scontrol_dict - - -hpc_options = { - "biowulf": {"profile": "biowulf", "slurm": "assets/slurm_header_biowulf.sh"}, - "fnlcr": { - "profile": "frce", - "slurm": "assets/slurm_header_frce.sh", - }, -} - - -def get_hpc(): - scontrol_out = scontrol_show() - if "ClusterName" in scontrol_out.keys(): - hpc = scontrol_out["ClusterName"] - else: - hpc = None - return hpc - - -def run_nextflow( - nextfile_path=None, - merge_config=None, - threads=None, - nextflow_args=None, - mode="local", -): - """Run a Nextflow workflow""" - nextflow_command = ["nextflow", "run", nextfile_path] - - hpc = get_hpc() - if mode == "slurm" and not hpc: - raise ValueError("mode is 'slurm' but no HPC environment was detected") - # add any additional Nextflow commands - args_dict = dict() - prev_arg = "" - for arg in nextflow_args: - if arg.startswith("-"): - args_dict[arg] = "" - elif prev_arg.startswith("-"): - args_dict[prev_arg] = arg - prev_arg = arg - # make sure profile matches biowulf or frce - profiles = ( - set(args_dict["-profile"].split(",")) - if "-profile" in args_dict.keys() - else set() - ) - if mode == "slurm": - profiles.add("slurm") - if hpc: - profiles.add(hpc_options[hpc]["profile"]) - args_dict["-profile"] = ",".join(sorted(profiles)) - nextflow_command += list(f"{k} {v}" for k, v in args_dict.items()) - - # Print nextflow command - nextflow_command = " ".join(str(nf) for nf in nextflow_command) - msg_box("Nextflow command", errmsg=nextflow_command) - - if mode == "slurm": - slurm_filename = "submit_slurm.sh" - with open(slurm_filename, "w") as sbatch_file: - with open(nek_base(hpc_options[hpc]["slurm"]), "r") as template: - sbatch_file.writelines(template.readlines()) - sbatch_file.write(nextflow_command) - run_command = f"sbatch {slurm_filename}" - msg_box("Slurm batch job", errmsg=run_command) - elif mode == "local": - if hpc: - nextflow_command = f'bash -c "module load nextflow && {nextflow_command}"' - run_command = nextflow_command - else: - raise ValueError(f"mode {mode} not recognized") - # Run Nextflow!!! - subprocess.run(run_command, shell=True, check=True) From bcebdd4164a69b4b949c126ebba78d6a1837e3a9 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Mon, 14 Apr 2025 17:54:08 -0400 Subject: [PATCH 2/4] chore: update spooker usage --- lib/Utils.groovy | 4 ++-- nextflow.config | 22 +++++++++++++++++----- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/lib/Utils.groovy b/lib/Utils.groovy index 3fe6464..4854446 100644 --- a/lib/Utils.groovy +++ b/lib/Utils.groovy @@ -2,14 +2,14 @@ class Utils { // run spooker for the workflow public static String spooker(workflow) { def pipeline_name = "${workflow.manifest.name.tokenize('/')[-1]}" - def command_string = "spooker ${workflow.launchDir} ${pipeline_name}" def out = new StringBuilder() def err = new StringBuilder() def spooker_in_path = check_command_in_path("spooker") if (spooker_in_path) { try { println "Running spooker" - def command = command_string.execute() + def spooker_command = "spooker ${workflow.launchDir} ${pipeline_name} ${workflow.manifest.version}" + def command = spooker_command.execute() command.consumeProcessOutput(out, err) command.waitFor() } catch(IOException e) { diff --git a/nextflow.config b/nextflow.config index d740955..b0f6ac9 100644 --- a/nextflow.config +++ b/nextflow.config @@ -34,7 +34,7 @@ params { lofreq_convert = "${projectDir}/bin/add_gt_lofreq.sh" strelka_convert = "${projectDir}/bin/strelka_convert.py" split_regions = "70" //Number of regions to split - + vep_cache = "/fdb/VEP/102/cache" //SUB WORKFLOWS to SPLIT @@ -45,16 +45,16 @@ params { snv = null sv = null structural = null - cnv = null + cnv = null copynumber = null - + bam=null align=null indelrealign=null no_tonly=null ffpe=null exome=null - + //Set all Inputs to null sample_sheet=null fastq_file_input=null @@ -64,7 +64,7 @@ params { fastq_input=null bam_input=null BAMINPUT=null - + split_fastq = 0 //Default to no split for now but use 200000000 in future callers = "mutect2,deepsomatic,octopus,strelka,lofreq,muse,vardict,varscan" tonlycallers = "mutect2,deepsomatic,octopus,vardict,varscan" @@ -155,6 +155,18 @@ report { file = "${params.outdir}/pipeline_info/execution_report_${trace_timestamp}.html" } +String pipeline_version = new File("${projectDir}/VERSION").text + +manifest { + name = "CCBR/LOGAN" + author = "CCR Collaborative Bioinformatics Resource" + homePage = "https://github.com/CCBR/LOGAN" + description = "whoLe genOme-sequencinG Analysis pipeliNe" + mainScript = "main.nf" + defaultBranch = "main" + version = "${pipeline_version}" +} + // Function to ensure that resource requirements don't go beyond // a maximum limit def check_max(obj, type) { From 0e3ac4f235a3409d27abe04885b39a6577df230b Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Mon, 14 Apr 2025 17:57:09 -0400 Subject: [PATCH 3/4] chore: sync default nxf options w/ other pipelines --- conf/biowulf.config | 5 +---- nextflow.config | 4 ++-- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/conf/biowulf.config b/conf/biowulf.config index 371a41c..9e93c91 100644 --- a/conf/biowulf.config +++ b/conf/biowulf.config @@ -32,8 +32,7 @@ process { scratch = '/lscratch/$SLURM_JOBID' stageInMode = 'symlink' stageOutMode = 'rsync' - // for running pipeline on group sharing data directory, this can avoid inconsistent files timestamps - cache = 'lenient' + cache = 'deep' //Additional Process for BWAMEM2 withName:bwamem2 { @@ -65,5 +64,3 @@ process { } } - - diff --git a/nextflow.config b/nextflow.config index b0f6ac9..617315d 100644 --- a/nextflow.config +++ b/nextflow.config @@ -71,8 +71,8 @@ params { cnvcallers = "purple,sequenza,freec,ascat,cnvkit" svcallers = "manta,gridss,svaba" intervals = null - publish_dir_mode = 'symlink' - outdir = 'results' + publish_dir_mode = 'link' + outdir = 'output' } From b8c301ff69dcad0d7a37ca8ea519bb1153c205ee Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Mon, 14 Apr 2025 17:59:52 -0400 Subject: [PATCH 4/4] chore: update CHANGELOG.md --- CHANGELOG.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e567cce..073b976 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,14 @@ - Fix resources for qualimap bamqc. (#79, @dnousome) - Now using the readthedocs theme for the docs website. (#84, @kelly-sovacool) - LOGAN is now archived in Zenodo with DOI `10.5281/zenodo.14907169`. (#87, @kelly-sovacool) +- CLI updates: (#93, @kelly-sovacool) + - Use `nextflow run -resume` by default, or turn it off with `logan run --forceall`. + - Add `--output` argument for `logan init` and `logan run`. + - If not provided, commands are run in the current working directory. + - This is equivalent to the nextflow `$launchDir` constant. + - The nextflow preview is printed before launching the actual run. + - Set the `publish_dir_mode` nextflow option to `link` by default. + - Set the `process.cache` nextflow option to `deep` by default rather than lenient on biowulf. ## LOGAN 0.2.0