From 5d33d7be51f941d3bb9536dea7e9b24553d9b177 Mon Sep 17 00:00:00 2001
From: James Gilbert <jgrg@sanger.ac.uk>
Date: Thu, 29 Jan 2026 14:30:45 +0000
Subject: [PATCH 1/3] Optionally write JSON report file alongside kmerpairs.smu
 file

---
 src/smudgeplot/cli.py        | 15 ++++++++++++-
 src/smudgeplot/smudgeplot.py | 42 ++++++++++++++++++++++++++++++++----
 2 files changed, 52 insertions(+), 5 deletions(-)

diff --git a/src/smudgeplot/cli.py b/src/smudgeplot/cli.py
index b12a4e1..7601ef4 100755
--- a/src/smudgeplot/cli.py
+++ b/src/smudgeplot/cli.py
@@ -99,6 +99,7 @@ def __init__(self):
             default=False,
             help="Print the version and exit.",
         )
+
         # print version is a special case
         if len(sys.argv) > 1:
             if sys.argv[1] in ["-v", "--version"]:
@@ -149,6 +150,7 @@ def hetmers(self):
             "-L",
             help="Count threshold below which k-mers are considered erroneous.",
             type=int,
+            required=True,
         )
         argparser.add_argument("-t", help="Number of threads (default 4).", type=int, default=4)
         argparser.add_argument(
@@ -162,6 +164,13 @@ def hetmers(self):
             default=".",
         )
         argparser.add_argument("--verbose", action="store_true", default=False, help="Verbose mode.")
+        argparser.add_argument(
+            "--json_report",
+            action="store_true",
+            default=False,
+            help="Write a JSON format report recording the selected parameters (default False)",
+        )
+
         self.arguments = argparser.parse_args(sys.argv[2:])
 
     def peak_aggregation(self):
@@ -337,6 +346,7 @@ def main():
         fin()
 
     if _parser.task == "hetmers":
+
         hetmer_args = [
             f"-o{args.o}",
             f"-e{args.L}",
@@ -350,6 +360,9 @@ def main():
 
         run_binary("hetmers", hetmer_args)
 
+        if args.json_report:
+            smg.save_hetmers_json_report(args.o, input_params=vars(args))
+
         fin()
 
     if _parser.task == "extract":
@@ -368,7 +381,7 @@ def main():
 
         fin()
 
-    title = args.title or str(Path(args.infile).with_suffix("").name)
+    title = args.title or Path(args.infile).stem
 
     if _parser.task == "plot":
         smudge_tab = smg.read_csv(args.smudgefile, sep="\t", names=["structure", "size", "rel_size"])
diff --git a/src/smudgeplot/smudgeplot.py b/src/smudgeplot/smudgeplot.py
index 59a123d..5d7a553 100644
--- a/src/smudgeplot/smudgeplot.py
+++ b/src/smudgeplot/smudgeplot.py
@@ -1,10 +1,12 @@
 #!/usr/bin/env python3
 
 import json
+import shlex
 import sys
 from collections import defaultdict
 from importlib.metadata import version
 from math import ceil, log
+from pathlib import Path
 from statistics import fmean
 
 import matplotlib as mpl
@@ -12,7 +14,7 @@
 import numpy as np
 from matplotlib.collections import PatchCollection
 from numpy import arange, argmin, concatenate
-from pandas import DataFrame, Series, concat, read_csv # type: ignore
+from pandas import DataFrame, Series, concat, read_csv
 
 
 class Coverages:
@@ -376,11 +378,14 @@ def generate_plots(
     if json_report:
         write_json_report(smudgeplot_data, input_params)
 
+
 def write_json_report(smg_data, input_params=None, min_size=0.03):
+    hetmers_report = read_hetmers_report_json(input_params["infile"])
     report = {
         "version": version("smudgeplot"),
-        "commandline_arguments": sys.argv[1:],
+        "commandline_arguments": shlex.join(sys.argv[1:]),
         "input_parameters": input_params,
+        "hetmers": hetmers_report,
         "haploid_coverage": float(f"{smg_data.cov:.3f}"),
         "error_fraction": smg_data.error_fraction,
         "top_smudges": [
@@ -400,8 +405,37 @@ def write_json_report(smg_data, input_params=None, min_size=0.03):
             for row in smg_data.smudge_tab.itertuples(index=False)
         ],
     }
-    with open(smg_data.json_report_file, "w") as fh:
-        fh.write(json.dumps(report, indent=2) + "\n")
+    write_json_file(smg_data.json_report_file, report)
+
+
+def save_hetmers_json_report(outfile, input_params=None):
+    report = {
+        "version": version("smudgeplot"),
+        "commandline_arguments": shlex.join(sys.argv[1:]),
+        "input_parameters": input_params,
+    }
+    write_json_file(f"{outfile}_report.json", report)
+
+
+def write_json_file(filename: str, data):
+    Path(filename).write_text(json.dumps(data, indent=2) + "\n")
+
+
+def read_hetmers_report_json(hetmers: str):
+    """
+    Returns the parsed contents of the hetmers report JSON file if it exists
+    and its modification time is the same as or more recent than the hetmers
+    file itself.
+    """
+    hetmers_file = Path(hetmers)
+    report_file = Path(f"{hetmers_file.stem}_report.json")
+
+    if (
+        report_file.exists()
+        and report_file.stat().st_mtime >= hetmers_file.stat().st_mtime
+    ):
+        return json.loads(report_file.read_text())
+    return None
 
 
 def prepare_smudgeplot_data_for_plotting(smudgeplot_data, output, title, fmt=None, upper_ylim=None):

From 2c7d74ca7535139cdfa951ddbaf11ecda1c19467 Mon Sep 17 00:00:00 2001
From: James Gilbert <jgrg@sanger.ac.uk>
Date: Thu, 29 Jan 2026 15:42:18 +0000
Subject: [PATCH 2/3] Rename JSON report hetmers key to hetmers_input

---
 src/smudgeplot/smudgeplot.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/smudgeplot/smudgeplot.py b/src/smudgeplot/smudgeplot.py
index 5d7a553..f7aa7b5 100644
--- a/src/smudgeplot/smudgeplot.py
+++ b/src/smudgeplot/smudgeplot.py
@@ -385,7 +385,7 @@ def write_json_report(smg_data, input_params=None, min_size=0.03):
         "version": version("smudgeplot"),
         "commandline_arguments": shlex.join(sys.argv[1:]),
         "input_parameters": input_params,
-        "hetmers": hetmers_report,
+        "hetmers_input": hetmers_report,
         "haploid_coverage": float(f"{smg_data.cov:.3f}"),
         "error_fraction": smg_data.error_fraction,
         "top_smudges": [

From 8a71b8ec99ab31368947c9701f720b642917617f Mon Sep 17 00:00:00 2001
From: James Gilbert <jgrg@sanger.ac.uk>
Date: Fri, 30 Jan 2026 10:03:32 +0000
Subject: [PATCH 3/3] Fix coverages typo

---
 playground/alternative_fitting/pair_clustering.py | 2 +-
 src/smudgeplot/cli.py                             | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/playground/alternative_fitting/pair_clustering.py b/playground/alternative_fitting/pair_clustering.py
index 1cd3eb3..059b40c 100644
--- a/playground/alternative_fitting/pair_clustering.py
+++ b/playground/alternative_fitting/pair_clustering.py
@@ -13,7 +13,7 @@
 ####
 
 parser = argparse.ArgumentParser()
-parser.add_argument('infile', nargs='?', help='name of the input tsv file with covarages and frequencies.')
+parser.add_argument('infile', nargs='?', help='name of the input tsv file with coverages and frequencies.')
 parser.add_argument('-nf', '-noise_filter', help='Do not agregate into smudge k-mer pairs with frequency lower than this parameter', type=int, default=50)
 parser.add_argument('-d', '-distance', help='Manthattan distance of k-mer pairs that are considered neioboring for the local agregation purposes.', type=int, default=5)
 parser.add_argument('--mask_errors', help='instead of reporting assignments to individual smudges, just remove all monotonically decreasing points from the error line', action="store_true", default = False)
diff --git a/src/smudgeplot/cli.py b/src/smudgeplot/cli.py
index 7601ef4..30f0cec 100755
--- a/src/smudgeplot/cli.py
+++ b/src/smudgeplot/cli.py
@@ -182,7 +182,7 @@ def peak_aggregation(self):
             description="Aggregates smudges using local aggregation algorithm.")
         argparser.add_argument(
             "infile",
-            help="Name of the input smu file with covarages and frequencies.",
+            help="Name of the input smu file with coverages and frequencies.",
         )
         argparser.add_argument(
             "-nf",
@@ -261,7 +261,7 @@ def all(self):
             description="Runs all the steps (with default options).")
         argparser.add_argument(
             "infile",
-            help="Name of the input tsv file with covarages and frequencies.",
+            help="Name of the input tsv file with coverages and frequencies.",
         )
         argparser.add_argument(
             "-o",