Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion playground/alternative_fitting/pair_clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
####

parser = argparse.ArgumentParser()
parser.add_argument('infile', nargs='?', help='name of the input tsv file with covarages and frequencies.')
parser.add_argument('infile', nargs='?', help='name of the input tsv file with coverages and frequencies.')
parser.add_argument('-nf', '-noise_filter', help='Do not agregate into smudge k-mer pairs with frequency lower than this parameter', type=int, default=50)
parser.add_argument('-d', '-distance', help='Manthattan distance of k-mer pairs that are considered neioboring for the local agregation purposes.', type=int, default=5)
parser.add_argument('--mask_errors', help='instead of reporting assignments to individual smudges, just remove all monotonically decreasing points from the error line', action="store_true", default = False)
Expand Down
19 changes: 16 additions & 3 deletions src/smudgeplot/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ def __init__(self):
default=False,
help="Print the version and exit.",
)

# print version is a special case
if len(sys.argv) > 1:
if sys.argv[1] in ["-v", "--version"]:
Expand Down Expand Up @@ -149,6 +150,7 @@ def hetmers(self):
"-L",
help="Count threshold below which k-mers are considered erroneous.",
type=int,
required=True,
)
argparser.add_argument("-t", help="Number of threads (default 4).", type=int, default=4)
argparser.add_argument(
Expand All @@ -162,6 +164,13 @@ def hetmers(self):
default=".",
)
argparser.add_argument("--verbose", action="store_true", default=False, help="Verbose mode.")
argparser.add_argument(
"--json_report",
action="store_true",
default=False,
help="Write a JSON format report recording the selected parameters (default False)",
)

self.arguments = argparser.parse_args(sys.argv[2:])

def peak_aggregation(self):
Expand All @@ -173,7 +182,7 @@ def peak_aggregation(self):
description="Aggregates smudges using local aggregation algorithm.")
argparser.add_argument(
"infile",
help="Name of the input smu file with covarages and frequencies.",
help="Name of the input smu file with coverages and frequencies.",
)
argparser.add_argument(
"-nf",
Expand Down Expand Up @@ -252,7 +261,7 @@ def all(self):
description="Runs all the steps (with default options).")
argparser.add_argument(
"infile",
help="Name of the input tsv file with covarages and frequencies.",
help="Name of the input tsv file with coverages and frequencies.",
)
argparser.add_argument(
"-o",
Expand Down Expand Up @@ -337,6 +346,7 @@ def main():
fin()

if _parser.task == "hetmers":

hetmer_args = [
f"-o{args.o}",
f"-e{args.L}",
Expand All @@ -350,6 +360,9 @@ def main():

run_binary("hetmers", hetmer_args)

if args.json_report:
smg.save_hetmers_json_report(args.o, input_params=vars(args))

fin()

if _parser.task == "extract":
Expand All @@ -368,7 +381,7 @@ def main():

fin()

title = args.title or str(Path(args.infile).with_suffix("").name)
title = args.title or Path(args.infile).stem

if _parser.task == "plot":
smudge_tab = smg.read_csv(args.smudgefile, sep="\t", names=["structure", "size", "rel_size"])
Expand Down
42 changes: 38 additions & 4 deletions src/smudgeplot/smudgeplot.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,20 @@
#!/usr/bin/env python3

import json
import shlex
import sys
from collections import defaultdict
from importlib.metadata import version
from math import ceil, log
from pathlib import Path
from statistics import fmean

import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.collections import PatchCollection
from numpy import arange, argmin, concatenate
from pandas import DataFrame, Series, concat, read_csv # type: ignore
from pandas import DataFrame, Series, concat, read_csv


class Coverages:
Expand Down Expand Up @@ -376,11 +378,14 @@ def generate_plots(
if json_report:
write_json_report(smudgeplot_data, input_params)


def write_json_report(smg_data, input_params=None, min_size=0.03):
hetmers_report = read_hetmers_report_json(input_params["infile"])
report = {
"version": version("smudgeplot"),
"commandline_arguments": sys.argv[1:],
"commandline_arguments": shlex.join(sys.argv[1:]),
"input_parameters": input_params,
"hetmers_input": hetmers_report,
"haploid_coverage": float(f"{smg_data.cov:.3f}"),
"error_fraction": smg_data.error_fraction,
"top_smudges": [
Expand All @@ -400,8 +405,37 @@ def write_json_report(smg_data, input_params=None, min_size=0.03):
for row in smg_data.smudge_tab.itertuples(index=False)
],
}
with open(smg_data.json_report_file, "w") as fh:
fh.write(json.dumps(report, indent=2) + "\n")
write_json_file(smg_data.json_report_file, report)


def save_hetmers_json_report(outfile, input_params=None):
report = {
"version": version("smudgeplot"),
"commandline_arguments": shlex.join(sys.argv[1:]),
"input_parameters": input_params,
}
write_json_file(f"{outfile}_report.json", report)


def write_json_file(filename: str, data):
Path(filename).write_text(json.dumps(data, indent=2) + "\n")


def read_hetmers_report_json(hetmers: str):
"""
Returns the parsed contents of the hetmers report JSON file if it exists
and its modification time is the same as or more recent than the hetmers
file itself.
"""
hetmers_file = Path(hetmers)
report_file = Path(f"{hetmers_file.stem}_report.json")

if (
report_file.exists()
and report_file.stat().st_mtime >= hetmers_file.stat().st_mtime
):
return json.loads(report_file.read_text())
return None


def prepare_smudgeplot_data_for_plotting(smudgeplot_data, output, title, fmt=None, upper_ylim=None):
Expand Down
Loading