From 5d33d7be51f941d3bb9536dea7e9b24553d9b177 Mon Sep 17 00:00:00 2001 From: James Gilbert Date: Thu, 29 Jan 2026 14:30:45 +0000 Subject: [PATCH 1/3] Optionally write JSON report file alongside kmerpairs.smu file --- src/smudgeplot/cli.py | 15 ++++++++++++- src/smudgeplot/smudgeplot.py | 42 ++++++++++++++++++++++++++++++++---- 2 files changed, 52 insertions(+), 5 deletions(-) diff --git a/src/smudgeplot/cli.py b/src/smudgeplot/cli.py index b12a4e1..7601ef4 100755 --- a/src/smudgeplot/cli.py +++ b/src/smudgeplot/cli.py @@ -99,6 +99,7 @@ def __init__(self): default=False, help="Print the version and exit.", ) + # print version is a special case if len(sys.argv) > 1: if sys.argv[1] in ["-v", "--version"]: @@ -149,6 +150,7 @@ def hetmers(self): "-L", help="Count threshold below which k-mers are considered erroneous.", type=int, + required=True, ) argparser.add_argument("-t", help="Number of threads (default 4).", type=int, default=4) argparser.add_argument( @@ -162,6 +164,13 @@ def hetmers(self): default=".", ) argparser.add_argument("--verbose", action="store_true", default=False, help="Verbose mode.") + argparser.add_argument( + "--json_report", + action="store_true", + default=False, + help="Write a JSON format report recording the selected parameters (default False)", + ) + self.arguments = argparser.parse_args(sys.argv[2:]) def peak_aggregation(self): @@ -337,6 +346,7 @@ def main(): fin() if _parser.task == "hetmers": + hetmer_args = [ f"-o{args.o}", f"-e{args.L}", @@ -350,6 +360,9 @@ def main(): run_binary("hetmers", hetmer_args) + if args.json_report: + smg.save_hetmers_json_report(args.o, input_params=vars(args)) + fin() if _parser.task == "extract": @@ -368,7 +381,7 @@ def main(): fin() - title = args.title or str(Path(args.infile).with_suffix("").name) + title = args.title or Path(args.infile).stem if _parser.task == "plot": smudge_tab = smg.read_csv(args.smudgefile, sep="\t", names=["structure", "size", "rel_size"]) diff --git a/src/smudgeplot/smudgeplot.py b/src/smudgeplot/smudgeplot.py index 59a123d..5d7a553 100644 --- a/src/smudgeplot/smudgeplot.py +++ b/src/smudgeplot/smudgeplot.py @@ -1,10 +1,12 @@ #!/usr/bin/env python3 import json +import shlex import sys from collections import defaultdict from importlib.metadata import version from math import ceil, log +from pathlib import Path from statistics import fmean import matplotlib as mpl @@ -12,7 +14,7 @@ import numpy as np from matplotlib.collections import PatchCollection from numpy import arange, argmin, concatenate -from pandas import DataFrame, Series, concat, read_csv # type: ignore +from pandas import DataFrame, Series, concat, read_csv class Coverages: @@ -376,11 +378,14 @@ def generate_plots( if json_report: write_json_report(smudgeplot_data, input_params) + def write_json_report(smg_data, input_params=None, min_size=0.03): + hetmers_report = read_hetmers_report_json(input_params["infile"]) report = { "version": version("smudgeplot"), - "commandline_arguments": sys.argv[1:], + "commandline_arguments": shlex.join(sys.argv[1:]), "input_parameters": input_params, + "hetmers": hetmers_report, "haploid_coverage": float(f"{smg_data.cov:.3f}"), "error_fraction": smg_data.error_fraction, "top_smudges": [ @@ -400,8 +405,37 @@ def write_json_report(smg_data, input_params=None, min_size=0.03): for row in smg_data.smudge_tab.itertuples(index=False) ], } - with open(smg_data.json_report_file, "w") as fh: - fh.write(json.dumps(report, indent=2) + "\n") + write_json_file(smg_data.json_report_file, report) + + +def save_hetmers_json_report(outfile, input_params=None): + report = { + "version": version("smudgeplot"), + "commandline_arguments": shlex.join(sys.argv[1:]), + "input_parameters": input_params, + } + write_json_file(f"{outfile}_report.json", report) + + +def write_json_file(filename: str, data): + Path(filename).write_text(json.dumps(data, indent=2) + "\n") + + +def read_hetmers_report_json(hetmers: str): + """ + Returns the parsed contents of the hetmers report JSON file if it exists + and its modification time is the same as or more recent than the hetmers + file itself. + """ + hetmers_file = Path(hetmers) + report_file = Path(f"{hetmers_file.stem}_report.json") + + if ( + report_file.exists() + and report_file.stat().st_mtime >= hetmers_file.stat().st_mtime + ): + return json.loads(report_file.read_text()) + return None def prepare_smudgeplot_data_for_plotting(smudgeplot_data, output, title, fmt=None, upper_ylim=None): From 2c7d74ca7535139cdfa951ddbaf11ecda1c19467 Mon Sep 17 00:00:00 2001 From: James Gilbert Date: Thu, 29 Jan 2026 15:42:18 +0000 Subject: [PATCH 2/3] Rename JSON report hetmers key to hetmers_input --- src/smudgeplot/smudgeplot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/smudgeplot/smudgeplot.py b/src/smudgeplot/smudgeplot.py index 5d7a553..f7aa7b5 100644 --- a/src/smudgeplot/smudgeplot.py +++ b/src/smudgeplot/smudgeplot.py @@ -385,7 +385,7 @@ def write_json_report(smg_data, input_params=None, min_size=0.03): "version": version("smudgeplot"), "commandline_arguments": shlex.join(sys.argv[1:]), "input_parameters": input_params, - "hetmers": hetmers_report, + "hetmers_input": hetmers_report, "haploid_coverage": float(f"{smg_data.cov:.3f}"), "error_fraction": smg_data.error_fraction, "top_smudges": [ From 8a71b8ec99ab31368947c9701f720b642917617f Mon Sep 17 00:00:00 2001 From: James Gilbert Date: Fri, 30 Jan 2026 10:03:32 +0000 Subject: [PATCH 3/3] Fix coverages typo --- playground/alternative_fitting/pair_clustering.py | 2 +- src/smudgeplot/cli.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/playground/alternative_fitting/pair_clustering.py b/playground/alternative_fitting/pair_clustering.py index 1cd3eb3..059b40c 100644 --- a/playground/alternative_fitting/pair_clustering.py +++ b/playground/alternative_fitting/pair_clustering.py @@ -13,7 +13,7 @@ #### parser = argparse.ArgumentParser() -parser.add_argument('infile', nargs='?', help='name of the input tsv file with covarages and frequencies.') +parser.add_argument('infile', nargs='?', help='name of the input tsv file with coverages and frequencies.') parser.add_argument('-nf', '-noise_filter', help='Do not agregate into smudge k-mer pairs with frequency lower than this parameter', type=int, default=50) parser.add_argument('-d', '-distance', help='Manthattan distance of k-mer pairs that are considered neioboring for the local agregation purposes.', type=int, default=5) parser.add_argument('--mask_errors', help='instead of reporting assignments to individual smudges, just remove all monotonically decreasing points from the error line', action="store_true", default = False) diff --git a/src/smudgeplot/cli.py b/src/smudgeplot/cli.py index 7601ef4..30f0cec 100755 --- a/src/smudgeplot/cli.py +++ b/src/smudgeplot/cli.py @@ -182,7 +182,7 @@ def peak_aggregation(self): description="Aggregates smudges using local aggregation algorithm.") argparser.add_argument( "infile", - help="Name of the input smu file with covarages and frequencies.", + help="Name of the input smu file with coverages and frequencies.", ) argparser.add_argument( "-nf", @@ -261,7 +261,7 @@ def all(self): description="Runs all the steps (with default options).") argparser.add_argument( "infile", - help="Name of the input tsv file with covarages and frequencies.", + help="Name of the input tsv file with coverages and frequencies.", ) argparser.add_argument( "-o",