From 32312f7c1e01f4a5c068e6775372afa3219d69cf Mon Sep 17 00:00:00 2001 From: Ricky Sexton Date: Thu, 31 Jul 2025 10:59:21 -0700 Subject: [PATCH 01/16] initial working cli --- basicrta/cli.py | 89 +++++++++++++++++++++++++++++ basicrta/combine.py | 132 ++++++++++++++++++++++++++++++++++++++++++-- pyproject.toml | 3 + 3 files changed, 219 insertions(+), 5 deletions(-) create mode 100644 basicrta/cli.py diff --git a/basicrta/cli.py b/basicrta/cli.py new file mode 100644 index 0000000..1731678 --- /dev/null +++ b/basicrta/cli.py @@ -0,0 +1,89 @@ +""" +basicrta +A package to extract binding kinetics from molecular dynamics simulations +""" + +# Add imports here +from importlib.metadata import version +from basicrta import * +import argparse +import subprocess + +__version__ = version("basicrta") + +commands = ['contacts', 'cluster', 'combine', 'kinetics', 'gibbs'] + +def main(): + parser = argparse.ArgumentParser(prog='basicrta') + #parser.add_argument('command', help='Step in workflow to execute', nargs='+') + subparsers = parser.add_subparsers(dest='command') + + + parserA = subparsers.add_parser('contacts', help='Ahelp') + parserA.add_argument('--top', type=str, help='Topology') + parserA.add_argument('--traj', type=str, help='Trajectory') + parserA.add_argument('--sel1', type=str, help='First selection (group for' + 'which tau is to be calculated)') + parserA.add_argument('--sel2', type=str, help='Second selection (group of' + 'interest in interactions with first selection)') + parserA.add_argument('--cutoff', type=float, help='Value to use (in A) for' + 'the maximum separation distance that constitutes a' + 'contact.') + parserA.add_argument('--nproc', type=int, default=1, help='Number of' + 'processes to use') + parserA.add_argument('--nslices', type=int, default=100, help='Number of' + 'trajectory segments to use (if encountering a' + 'memoryerror, try using a greater value)') + + parserB = subparsers.add_parser('combine', add_help=True, help='Bhelp') + parserB.add_argument('--contacts', nargs='+', required=True, + help="List of contact pickle files to combine (e.g.," + "contacts_7.0.pkl from different runs)") + parserB.add_argument( '--output', type=str, default='combined_contacts.pkl', + help="Output filename for combined contacts (default:" + "combined_contacts.pkl)") + parserB.add_argument( '--no-validate', action='store_true', help="Skip" + "compatibility validation (use with caution)") + + parserC = subparsers.add_parser('cluster', help='Chelp') + parserC.add_argument('--nproc', type=int, default=1) + parserC.add_argument('--cutoff', type=float) + parserC.add_argument('--niter', type=int, default=110000) + parserC.add_argument('--prot', type=str, default=None, nargs='?') + parserC.add_argument('--label-cutoff', type=float, default=3, + dest='label_cutoff', + help='Only label residues with tau > ' + 'LABEL-CUTOFF * . ') + parserC.add_argument('--structure', type=str, nargs='?') + # use for default values + parserC.add_argument('--gskip', type=int, default=1000, + help='Gibbs skip parameter for decorrelated samples;' + 'default from https://pubs.acs.org/doi/10.1021/acs.jctc.4c01522') + parserC.add_argument('--burnin', type=int, default=10000, + help='Burn-in parameter, drop first N samples as equilibration;' + 'default from https://pubs.acs.org/doi/10.1021/acs.jctc.4c01522') + + parserD = subparsers.add_parser('gibbs', help='Dhelp') + parserD.add_argument('--contacts') + parserD.add_argument('--resid', type=int, default=None) + parserD.add_argument('--nproc', type=int, default=1) + parserD.add_argument('--niter', type=int, default=110000) + parserD.add_argument('--ncomp', type=int, default=15) + + parserE = subparsers.add_parser('kinetics', help='Ehelp') + parserE.add_argument("--gibbs", type=str) + parserE.add_argument("--contacts", type=str) + parserE.add_argument("--top_n", type=int, nargs='?', default=None) + parserE.add_argument("--step", type=int, nargs='?', default=1) + parserE.add_argument("--wdensity", action='store_true') + + args = parser.parse_args() + keys, values = vars(args).keys(), vars(args).values() + inarr = [[f"--{key}", f"{value}"] for key, value in zip(keys, values) if key!='command'] + inlist = [aset for alist in inarr for aset in alist] + subprocess.run(['python', + f'/home/r2/opt/basicrta/basicrta/{args.command}.py'] + + inlist) + +if __name__ == "__main__": + main() diff --git a/basicrta/combine.py b/basicrta/combine.py index 826b159..52f691d 100644 --- a/basicrta/combine.py +++ b/basicrta/combine.py @@ -9,10 +9,135 @@ import os import argparse -from basicrta.contacts import CombineContacts +class CombineContacts(object): + """Class to combine contact timeseries from multiple repeat runs. + + This class enables pooling data from multiple trajectory repeats and + calculating posteriors from all data together, rather than analyzing + each run separately. + + :param contact_files: List of contact pickle files to combine + :type contact_files: list of str + :param output_name: Name for the combined output file (default: 'combined_contacts.pkl') + :type output_name: str, optional + :param validate_compatibility: Whether to validate that files are compatible (default: True) + :type validate_compatibility: bool, optional + """ + + def __init__(self, contact_files, output_name='combined_contacts.pkl', + validate_compatibility=True): + self.contact_files = contact_files + self.output_name = output_name + self.validate_compatibility = validate_compatibility + + if len(contact_files) < 2: + raise ValueError("At least 2 contact files are required for combining") + + def _load_contact_file(self, filename): + """Load a contact pickle file and return data and metadata.""" + if not os.path.exists(filename): + raise FileNotFoundError(f"Contact file not found: {filename}") + + with open(filename, 'rb') as f: + contacts = pickle.load(f) + + metadata = contacts.dtype.metadata + return contacts, metadata + + def _validate_compatibility(self, metadatas): + """Validate that contact files are compatible for combining.""" + reference = metadatas[0] + + # Check that all files have the same atom groups + for i, meta in enumerate(metadatas[1:], 1): + # Compare cutoff + if meta['cutoff'] != reference['cutoff']: + raise ValueError(f"Incompatible cutoffs: file 0 has {reference['cutoff']}, " + f"file {i} has {meta['cutoff']}") + + # Compare atom group selections by checking if resids match + ref_ag1_resids = set(reference['ag1'].residues.resids) + ref_ag2_resids = set(reference['ag2'].residues.resids) + meta_ag1_resids = set(meta['ag1'].residues.resids) + meta_ag2_resids = set(meta['ag2'].residues.resids) + + if ref_ag1_resids != meta_ag1_resids: + raise ValueError(f"Incompatible ag1 residues between file 0 and file {i}") + if ref_ag2_resids != meta_ag2_resids: + raise ValueError(f"Incompatible ag2 residues between file 0 and file {i}") + + # Check timesteps and warn if different + timesteps = [meta['ts'] for meta in metadatas] + if not all(abs(ts - timesteps[0]) < 1e-6 for ts in timesteps): + print("WARNING: Different timesteps detected across runs:") + for i, (filename, ts) in enumerate(zip(self.contact_files, timesteps)): + print(f" File {i} ({filename}): dt = {ts} ns") + print("This may affect residence time estimates, especially for fast events.") + + def run(self): + """Combine contact files and save the result.""" + print(f"Combining {len(self.contact_files)} contact files...") + + all_contacts = [] + all_metadatas = [] + + # Load all contact files + for i, filename in enumerate(self.contact_files): + print(f"Loading file {i+1}/{len(self.contact_files)}: {filename}") + contacts, metadata = self._load_contact_file(filename) + all_contacts.append(contacts) + all_metadatas.append(metadata) + + # Validate compatibility if requested + if self.validate_compatibility: + print("Validating file compatibility...") + self._validate_compatibility(all_metadatas) + + # Combine contact data + print("Combining contact data...") + + # Calculate total size and create combined array + total_size = sum(len(contacts) for contacts in all_contacts) + reference_metadata = all_metadatas[0].copy() + + # Extend metadata to include trajectory source information + reference_metadata['source_files'] = self.contact_files + reference_metadata['n_trajectories'] = len(self.contact_files) + + # Determine number of columns (5 for raw contacts, 4 for processed) + n_cols = all_contacts[0].shape[1] + + # Create dtype with extended metadata + combined_dtype = np.dtype(np.float64, metadata=reference_metadata) + + # Add trajectory source column (will be last column) + combined_contacts = np.zeros((total_size, n_cols + 1), dtype=np.float64) + + # Combine data and add trajectory source information + offset = 0 + for traj_idx, contacts in enumerate(all_contacts): + n_contacts = len(contacts) + # Copy original contact data + combined_contacts[offset:offset+n_contacts, :n_cols] = contacts[:] + # Add trajectory source index + combined_contacts[offset:offset+n_contacts, n_cols] = traj_idx + offset += n_contacts + + # Create final memmap with proper dtype + final_contacts = combined_contacts.view(combined_dtype) + + # Save combined contacts + print(f"Saving combined contacts to {self.output_name}...") + final_contacts.dump(self.output_name, protocol=5) + + print(f"Successfully combined {len(self.contact_files)} files into {self.output_name}") + print(f"Total contacts: {total_size}") + print(f"Added trajectory source column (index {n_cols}) for kinetic clustering support") + + return self.output_name -def main(): +if __name__ == "__main__": """Main function for combining contact files.""" parser = argparse.ArgumentParser( description="Combine contact timeseries from multiple repeat runs. " @@ -82,6 +207,3 @@ def main(): print(f"ERROR: {e}") return 1 - -if __name__ == '__main__': - exit(main()) diff --git a/pyproject.toml b/pyproject.toml index 33b8c1c..f3991e6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -49,6 +49,9 @@ doc = [ source = "https://github.com/becksteinlab/basicrta" documentation = "https://basicrta.readthedocs.io" +[project.scripts] +basicrta = "basicrta.cli:main" + [tool.setuptools] py-modules = [] From 0f0fedec75f3720f5847207a705e6f4202df45bb Mon Sep 17 00:00:00 2001 From: Ricky Sexton Date: Sat, 2 Aug 2025 18:34:53 -0700 Subject: [PATCH 02/16] taking parser from submodules --- basicrta/cli.py | 114 +++++++++++++++++++++---------------------- basicrta/contacts.py | 40 +++++++++------ 2 files changed, 81 insertions(+), 73 deletions(-) diff --git a/basicrta/cli.py b/basicrta/cli.py index 1731678..e4bacff 100644 --- a/basicrta/cli.py +++ b/basicrta/cli.py @@ -19,63 +19,63 @@ def main(): subparsers = parser.add_subparsers(dest='command') - parserA = subparsers.add_parser('contacts', help='Ahelp') - parserA.add_argument('--top', type=str, help='Topology') - parserA.add_argument('--traj', type=str, help='Trajectory') - parserA.add_argument('--sel1', type=str, help='First selection (group for' - 'which tau is to be calculated)') - parserA.add_argument('--sel2', type=str, help='Second selection (group of' - 'interest in interactions with first selection)') - parserA.add_argument('--cutoff', type=float, help='Value to use (in A) for' - 'the maximum separation distance that constitutes a' - 'contact.') - parserA.add_argument('--nproc', type=int, default=1, help='Number of' - 'processes to use') - parserA.add_argument('--nslices', type=int, default=100, help='Number of' - 'trajectory segments to use (if encountering a' - 'memoryerror, try using a greater value)') - - parserB = subparsers.add_parser('combine', add_help=True, help='Bhelp') - parserB.add_argument('--contacts', nargs='+', required=True, - help="List of contact pickle files to combine (e.g.," - "contacts_7.0.pkl from different runs)") - parserB.add_argument( '--output', type=str, default='combined_contacts.pkl', - help="Output filename for combined contacts (default:" - "combined_contacts.pkl)") - parserB.add_argument( '--no-validate', action='store_true', help="Skip" - "compatibility validation (use with caution)") - - parserC = subparsers.add_parser('cluster', help='Chelp') - parserC.add_argument('--nproc', type=int, default=1) - parserC.add_argument('--cutoff', type=float) - parserC.add_argument('--niter', type=int, default=110000) - parserC.add_argument('--prot', type=str, default=None, nargs='?') - parserC.add_argument('--label-cutoff', type=float, default=3, - dest='label_cutoff', - help='Only label residues with tau > ' - 'LABEL-CUTOFF * . ') - parserC.add_argument('--structure', type=str, nargs='?') - # use for default values - parserC.add_argument('--gskip', type=int, default=1000, - help='Gibbs skip parameter for decorrelated samples;' - 'default from https://pubs.acs.org/doi/10.1021/acs.jctc.4c01522') - parserC.add_argument('--burnin', type=int, default=10000, - help='Burn-in parameter, drop first N samples as equilibration;' - 'default from https://pubs.acs.org/doi/10.1021/acs.jctc.4c01522') - - parserD = subparsers.add_parser('gibbs', help='Dhelp') - parserD.add_argument('--contacts') - parserD.add_argument('--resid', type=int, default=None) - parserD.add_argument('--nproc', type=int, default=1) - parserD.add_argument('--niter', type=int, default=110000) - parserD.add_argument('--ncomp', type=int, default=15) - - parserE = subparsers.add_parser('kinetics', help='Ehelp') - parserE.add_argument("--gibbs", type=str) - parserE.add_argument("--contacts", type=str) - parserE.add_argument("--top_n", type=int, nargs='?', default=None) - parserE.add_argument("--step", type=int, nargs='?', default=1) - parserE.add_argument("--wdensity", action='store_true') +# parserA = subparsers.add_parser('contacts', help='Ahelp') +# parserA.add_argument('--top', type=str, help='Topology') +# parserA.add_argument('--traj', type=str, help='Trajectory') +# parserA.add_argument('--sel1', type=str, help='First selection (group for' +# 'which tau is to be calculated)') +# parserA.add_argument('--sel2', type=str, help='Second selection (group of' +# 'interest in interactions with first selection)') +# parserA.add_argument('--cutoff', type=float, help='Value to use (in A) for' +# 'the maximum separation distance that constitutes a' +# 'contact.') +# parserA.add_argument('--nproc', type=int, default=1, help='Number of' +# 'processes to use') +# parserA.add_argument('--nslices', type=int, default=100, help='Number of' +# 'trajectory segments to use (if encountering a' +# 'memoryerror, try using a greater value)') +# +# parserB = subparsers.add_parser('combine', add_help=True, help='Bhelp') +# parserB.add_argument('--contacts', nargs='+', required=True, +# help="List of contact pickle files to combine (e.g.," +# "contacts_7.0.pkl from different runs)") +# parserB.add_argument( '--output', type=str, default='combined_contacts.pkl', +# help="Output filename for combined contacts (default:" +# "combined_contacts.pkl)") +# parserB.add_argument( '--no-validate', action='store_true', help="Skip" +# "compatibility validation (use with caution)") +# +# parserC = subparsers.add_parser('cluster', help='Chelp') +# parserC.add_argument('--nproc', type=int, default=1) +# parserC.add_argument('--cutoff', type=float) +# parserC.add_argument('--niter', type=int, default=110000) +# parserC.add_argument('--prot', type=str, default=None, nargs='?') +# parserC.add_argument('--label-cutoff', type=float, default=3, +# dest='label_cutoff', +# help='Only label residues with tau > ' +# 'LABEL-CUTOFF * . ') +# parserC.add_argument('--structure', type=str, nargs='?') +# # use for default values +# parserC.add_argument('--gskip', type=int, default=1000, +# help='Gibbs skip parameter for decorrelated samples;' +# 'default from https://pubs.acs.org/doi/10.1021/acs.jctc.4c01522') +# parserC.add_argument('--burnin', type=int, default=10000, +# help='Burn-in parameter, drop first N samples as equilibration;' +# 'default from https://pubs.acs.org/doi/10.1021/acs.jctc.4c01522') +# +# parserD = subparsers.add_parser('gibbs', help='Dhelp') +# parserD.add_argument('--contacts') +# parserD.add_argument('--resid', type=int, default=None) +# parserD.add_argument('--nproc', type=int, default=1) +# parserD.add_argument('--niter', type=int, default=110000) +# parserD.add_argument('--ncomp', type=int, default=15) +# +# parserE = subparsers.add_parser('kinetics', help='Ehelp') +# parserE.add_argument("--gibbs", type=str) +# parserE.add_argument("--contacts", type=str) +# parserE.add_argument("--top_n", type=int, nargs='?', default=None) +# parserE.add_argument("--step", type=int, nargs='?', default=1) +# parserE.add_argument("--wdensity", action='store_true') args = parser.parse_args() keys, values = vars(args).keys(), vars(args).values() diff --git a/basicrta/contacts.py b/basicrta/contacts.py index 2117c31..4a61dde 100644 --- a/basicrta/contacts.py +++ b/basicrta/contacts.py @@ -364,23 +364,9 @@ def run(self): return self.output_name - -if __name__ == '__main__': - """DOCSSS - """ - import argparse - parser = argparse.ArgumentParser(description="Create the primary contact \ - map and collect contacts based on the \ - desired cutoff distance") - parser.add_argument('--top', type=str, help="Topology") - parser.add_argument('--traj', type=str) - parser.add_argument('--sel1', type=str) - parser.add_argument('--sel2', type=str) - parser.add_argument('--cutoff', type=float) - parser.add_argument('--nproc', type=int, default=1) - parser.add_argument('--nslices', type=int, default=100) +def main(): + parser = get_parser() args = parser.parse_args() - u = mda.Universe(args.top, args.traj) cutoff, nproc, nslices = args.cutoff, args.nproc, args.nslices ag1 = u.select_atoms(args.sel1) @@ -396,3 +382,25 @@ def run(self): ProcessContacts(cutoff, mapname, nproc=nproc).run() + +def get_parser(): + import argparse + parser = argparse.ArgumentParser(description="Create the primary contact \ + map and collect contacts based on the \ + desired cutoff distance") + parser.add_argument('--top', type=str, help="Topology") + parser.add_argument('--traj', type=str) + parser.add_argument('--sel1', type=str) + parser.add_argument('--sel2', type=str) + parser.add_argument('--cutoff', type=float) + parser.add_argument('--nproc', type=int, default=1) + parser.add_argument('--nslices', type=int, default=100) + return parser + + +if __name__ == '__main__': + exit(main()) + """DOCSSS + """ + + From 2bf2b216c94b63577f8f08fa190ed01d2fd144bc Mon Sep 17 00:00:00 2001 From: Ricky Sexton Date: Fri, 14 Nov 2025 16:26:26 -0600 Subject: [PATCH 03/16] in-progress cli --- basicrta/cli.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/basicrta/cli.py b/basicrta/cli.py index e4bacff..6289abc 100644 --- a/basicrta/cli.py +++ b/basicrta/cli.py @@ -14,10 +14,11 @@ commands = ['contacts', 'cluster', 'combine', 'kinetics', 'gibbs'] def main(): + from basicrta.contacts import get_parser parser = argparse.ArgumentParser(prog='basicrta') #parser.add_argument('command', help='Step in workflow to execute', nargs='+') subparsers = parser.add_subparsers(dest='command') - + subparsers.add_parser(get_parser) # parserA = subparsers.add_parser('contacts', help='Ahelp') # parserA.add_argument('--top', type=str, help='Topology') From 66e947242e9c374d2058b39ad7f8bdb1accbc745 Mon Sep 17 00:00:00 2001 From: Ricky Sexton Date: Wed, 19 Nov 2025 15:28:47 -0600 Subject: [PATCH 04/16] working cli that takes parser from each script --- basicrta/cli.py | 83 +++++++------------------------------------- basicrta/cluster.py | 15 +++++--- basicrta/combine.py | 9 +++-- basicrta/gibbs.py | 12 +++++-- basicrta/kinetics.py | 12 +++++-- 5 files changed, 48 insertions(+), 83 deletions(-) diff --git a/basicrta/cli.py b/basicrta/cli.py index 6289abc..cb4bce3 100644 --- a/basicrta/cli.py +++ b/basicrta/cli.py @@ -3,88 +3,29 @@ A package to extract binding kinetics from molecular dynamics simulations """ -# Add imports here from importlib.metadata import version from basicrta import * import argparse import subprocess +import importlib +import sys __version__ = version("basicrta") commands = ['contacts', 'cluster', 'combine', 'kinetics', 'gibbs'] def main(): - from basicrta.contacts import get_parser - parser = argparse.ArgumentParser(prog='basicrta') - #parser.add_argument('command', help='Step in workflow to execute', nargs='+') - subparsers = parser.add_subparsers(dest='command') - subparsers.add_parser(get_parser) + parser = argparse.ArgumentParser(prog='basicrta', add_help=True) + subparsers = parser.add_subparsers() -# parserA = subparsers.add_parser('contacts', help='Ahelp') -# parserA.add_argument('--top', type=str, help='Topology') -# parserA.add_argument('--traj', type=str, help='Trajectory') -# parserA.add_argument('--sel1', type=str, help='First selection (group for' -# 'which tau is to be calculated)') -# parserA.add_argument('--sel2', type=str, help='Second selection (group of' -# 'interest in interactions with first selection)') -# parserA.add_argument('--cutoff', type=float, help='Value to use (in A) for' -# 'the maximum separation distance that constitutes a' -# 'contact.') -# parserA.add_argument('--nproc', type=int, default=1, help='Number of' -# 'processes to use') -# parserA.add_argument('--nslices', type=int, default=100, help='Number of' -# 'trajectory segments to use (if encountering a' -# 'memoryerror, try using a greater value)') -# -# parserB = subparsers.add_parser('combine', add_help=True, help='Bhelp') -# parserB.add_argument('--contacts', nargs='+', required=True, -# help="List of contact pickle files to combine (e.g.," -# "contacts_7.0.pkl from different runs)") -# parserB.add_argument( '--output', type=str, default='combined_contacts.pkl', -# help="Output filename for combined contacts (default:" -# "combined_contacts.pkl)") -# parserB.add_argument( '--no-validate', action='store_true', help="Skip" -# "compatibility validation (use with caution)") -# -# parserC = subparsers.add_parser('cluster', help='Chelp') -# parserC.add_argument('--nproc', type=int, default=1) -# parserC.add_argument('--cutoff', type=float) -# parserC.add_argument('--niter', type=int, default=110000) -# parserC.add_argument('--prot', type=str, default=None, nargs='?') -# parserC.add_argument('--label-cutoff', type=float, default=3, -# dest='label_cutoff', -# help='Only label residues with tau > ' -# 'LABEL-CUTOFF * . ') -# parserC.add_argument('--structure', type=str, nargs='?') -# # use for default values -# parserC.add_argument('--gskip', type=int, default=1000, -# help='Gibbs skip parameter for decorrelated samples;' -# 'default from https://pubs.acs.org/doi/10.1021/acs.jctc.4c01522') -# parserC.add_argument('--burnin', type=int, default=10000, -# help='Burn-in parameter, drop first N samples as equilibration;' -# 'default from https://pubs.acs.org/doi/10.1021/acs.jctc.4c01522') -# -# parserD = subparsers.add_parser('gibbs', help='Dhelp') -# parserD.add_argument('--contacts') -# parserD.add_argument('--resid', type=int, default=None) -# parserD.add_argument('--nproc', type=int, default=1) -# parserD.add_argument('--niter', type=int, default=110000) -# parserD.add_argument('--ncomp', type=int, default=15) -# -# parserE = subparsers.add_parser('kinetics', help='Ehelp') -# parserE.add_argument("--gibbs", type=str) -# parserE.add_argument("--contacts", type=str) -# parserE.add_argument("--top_n", type=int, nargs='?', default=None) -# parserE.add_argument("--step", type=int, nargs='?', default=1) -# parserE.add_argument("--wdensity", action='store_true') - - args = parser.parse_args() - keys, values = vars(args).keys(), vars(args).values() - inarr = [[f"--{key}", f"{value}"] for key, value in zip(keys, values) if key!='command'] - inlist = [aset for alist in inarr for aset in alist] - subprocess.run(['python', - f'/home/r2/opt/basicrta/basicrta/{args.command}.py'] + - inlist) + for command in commands: + subparser = importlib.import_module(f"basicrta.{command}").get_parser() + subparsers.add_parser(f'{command}', parents=[subparser], add_help=False) + + args = parser.parse_args(args=None if sys.argv[1:] else ['--help']) + + if len(sys.argv) == 2: + subparsers.choices[f'{sys.argv[1]}'].print_help() if __name__ == "__main__": main() diff --git a/basicrta/cluster.py b/basicrta/cluster.py index 3c48999..8acbb47 100644 --- a/basicrta/cluster.py +++ b/basicrta/cluster.py @@ -212,10 +212,7 @@ def b_color_structure(self, structure): u.select_atoms('protein').write('tau_bcolored.pdb') - -if __name__ == "__main__": #pragma: no cover - # the script is tested in the test_cluster.py but cannot be accounted for - # in the coverage report +def get_parser(): import argparse parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--nproc', type=int, default=1) @@ -234,7 +231,10 @@ def b_color_structure(self, structure): parser.add_argument('--burnin', type=int, default=10000, help='Burn-in parameter, drop first N samples as equilibration;' 'default from https://pubs.acs.org/doi/10.1021/acs.jctc.4c01522') + return parser +def main(): + parser = get_parser() args = parser.parse_args() pp = ProcessProtein(args.niter, args.prot, args.cutoff, @@ -243,3 +243,10 @@ def b_color_structure(self, structure): pp.get_taus() pp.write_data() pp.plot_protein(label_cutoff=args.label_cutoff) + + +if __name__ == "__main__": #pragma: no cover + # the script is tested in the test_cluster.py but cannot be accounted for + # in the coverage report + exit(main()) + diff --git a/basicrta/combine.py b/basicrta/combine.py index 52f691d..40dd350 100644 --- a/basicrta/combine.py +++ b/basicrta/combine.py @@ -137,7 +137,7 @@ def run(self): return self.output_name -if __name__ == "__main__": +def get_parser(): """Main function for combining contact files.""" parser = argparse.ArgumentParser( description="Combine contact timeseries from multiple repeat runs. " @@ -164,7 +164,10 @@ def run(self): action='store_true', help="Skip compatibility validation (use with caution)" ) - + return parser + +def main(): + parser = get_parser() args = parser.parse_args() # Validate input files exist @@ -207,3 +210,5 @@ def run(self): print(f"ERROR: {e}") return 1 +if __name__ == "__main__": + exit(main()) diff --git a/basicrta/gibbs.py b/basicrta/gibbs.py index bbdc4d8..3fdc483 100644 --- a/basicrta/gibbs.py +++ b/basicrta/gibbs.py @@ -287,7 +287,7 @@ def cluster(self, method="GaussianMixture", **kwargs): setattr(self.processed_results, 'indicator', pindicator) setattr(self.processed_results, 'labels', all_labels) - def process_gibbs(self, show=True): + def process_gibbs(self, show=False): r""" Process the samples collected from the Gibbs sampler. :meth:`process_gibbs` can be called multiple times to check the @@ -804,8 +804,7 @@ def plot_surv(self, scale=1, remove_noise=False, save=False, xlim=None, 's_vs_t.pdf', bbox_inches='tight') plt.show() - -if __name__ == '__main__': +def get_parser(): import argparse parser = argparse.ArgumentParser() parser.add_argument('--contacts') @@ -813,6 +812,10 @@ def plot_surv(self, scale=1, remove_noise=False, save=False, xlim=None, parser.add_argument('--nproc', type=int, default=1) parser.add_argument('--niter', type=int, default=110000) parser.add_argument('--ncomp', type=int, default=15) + return parser + +def main(): + parser = get_parser() args = parser.parse_args() contact_path = os.path.abspath(args.contacts) @@ -820,3 +823,6 @@ def plot_surv(self, scale=1, remove_noise=False, save=False, xlim=None, ParallelGibbs(contact_path, nproc=args.nproc, ncomp=args.ncomp, niter=args.niter).run(run_resids=args.resid) + +if __name__ == '__main__': + exit(main()) diff --git a/basicrta/kinetics.py b/basicrta/kinetics.py index 8613092..d2a651e 100644 --- a/basicrta/kinetics.py +++ b/basicrta/kinetics.py @@ -203,9 +203,7 @@ def weighted_densities(self, step=1, top_n=None, filterP=0): d.results.density.export(outname) - -if __name__ == "__main__": - from basicrta.gibbs import Gibbs +def get_parser(): import argparse parser = argparse.ArgumentParser() parser.add_argument("--gibbs", type=str) @@ -213,6 +211,11 @@ def weighted_densities(self, step=1, top_n=None, filterP=0): parser.add_argument("--top_n", type=int, nargs='?', default=None) parser.add_argument("--step", type=int, nargs='?', default=1) parser.add_argument("--wdensity", action='store_true') + return parser + +def main(): + from basicrta.gibbs import Gibbs + parser = get_parser() args = parser.parse_args() g = Gibbs().load(args.gibbs) @@ -220,3 +223,6 @@ def weighted_densities(self, step=1, top_n=None, filterP=0): mk.create_traj(top_n=args.top_n) if args.wdensity: mk.weighted_densities(step=args.step, top_n=args.top_n) + +if __name__ == "__main__": + exit(main()) From 7c18872785cd7c5e5a8bd12c054f9e77b0e82ff3 Mon Sep 17 00:00:00 2001 From: Ricky Sexton Date: Wed, 19 Nov 2025 17:12:13 -0600 Subject: [PATCH 05/16] forgot to add script execution, should be working now --- basicrta/cli.py | 13 +++++++++++++ basicrta/cluster.py | 8 ++++---- basicrta/gibbs.py | 2 +- 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/basicrta/cli.py b/basicrta/cli.py index cb4bce3..d2ea640 100644 --- a/basicrta/cli.py +++ b/basicrta/cli.py @@ -26,6 +26,19 @@ def main(): if len(sys.argv) == 2: subparsers.choices[f'{sys.argv[1]}'].print_help() + sys.exit() + + print(sys.argv[2:]) + #keys, values = vars(args).keys(), vars(args).values() + #inarr = [[f"--{key}", f"{value}"] for key, value in zip(keys, values) if + # value is not None] + #inlist = [aset for alist in inarr for aset in alist] + #subprocess.run(['python', + # f'/home/r2/opt/basicrta/basicrta/{sys.argv[1]}.py'] + + # inlist) + subprocess.run(['python', + f'/home/r2/opt/basicrta/basicrta/{sys.argv[1]}.py'] + + sys.argv[2:]) if __name__ == "__main__": main() diff --git a/basicrta/cluster.py b/basicrta/cluster.py index 8acbb47..595b1b7 100644 --- a/basicrta/cluster.py +++ b/basicrta/cluster.py @@ -219,10 +219,10 @@ def get_parser(): parser.add_argument('--cutoff', type=float) parser.add_argument('--niter', type=int, default=110000) parser.add_argument('--prot', type=str, default=None, nargs='?') - parser.add_argument('--label-cutoff', type=float, default=3, - dest='label_cutoff', - help='Only label residues with tau > ' - 'LABEL-CUTOFF * . ') + parser.add_argument('--label_cutoff', type=float, default=3, + dest='label_cutoff', + help='Only label residues with tau > ' + 'LABEL-CUTOFF * . ') parser.add_argument('--structure', type=str, nargs='?') # use for default values parser.add_argument('--gskip', type=int, default=1000, diff --git a/basicrta/gibbs.py b/basicrta/gibbs.py index 3fdc483..9fa6939 100644 --- a/basicrta/gibbs.py +++ b/basicrta/gibbs.py @@ -808,7 +808,7 @@ def get_parser(): import argparse parser = argparse.ArgumentParser() parser.add_argument('--contacts') - parser.add_argument('--resid', type=int, default=None) + parser.add_argument('--resid', type=int) parser.add_argument('--nproc', type=int, default=1) parser.add_argument('--niter', type=int, default=110000) parser.add_argument('--ncomp', type=int, default=15) From 807266aed83cca3a4be568dd64e63099b28cb245 Mon Sep 17 00:00:00 2001 From: Ricky Sexton Date: Thu, 20 Nov 2025 07:14:20 -0600 Subject: [PATCH 06/16] removed subprocess call --- basicrta/cli.py | 14 ++------------ basicrta/cluster.py | 2 ++ basicrta/combine.py | 2 ++ basicrta/contacts.py | 3 +++ basicrta/gibbs.py | 2 ++ basicrta/kinetics.py | 2 ++ 6 files changed, 13 insertions(+), 12 deletions(-) diff --git a/basicrta/cli.py b/basicrta/cli.py index d2ea640..d351e44 100644 --- a/basicrta/cli.py +++ b/basicrta/cli.py @@ -27,18 +27,8 @@ def main(): if len(sys.argv) == 2: subparsers.choices[f'{sys.argv[1]}'].print_help() sys.exit() - - print(sys.argv[2:]) - #keys, values = vars(args).keys(), vars(args).values() - #inarr = [[f"--{key}", f"{value}"] for key, value in zip(keys, values) if - # value is not None] - #inlist = [aset for alist in inarr for aset in alist] - #subprocess.run(['python', - # f'/home/r2/opt/basicrta/basicrta/{sys.argv[1]}.py'] + - # inlist) - subprocess.run(['python', - f'/home/r2/opt/basicrta/basicrta/{sys.argv[1]}.py'] + - sys.argv[2:]) + + importlib.import_module(f"basicrta.{sys.argv[1]}").main() if __name__ == "__main__": main() diff --git a/basicrta/cluster.py b/basicrta/cluster.py index e4942e0..aa093f4 100644 --- a/basicrta/cluster.py +++ b/basicrta/cluster.py @@ -256,6 +256,8 @@ def get_parser(): parser.add_argument('--burnin', type=int, default=10000, help='Burn-in parameter, drop first N samples as equilibration;' 'default from https://pubs.acs.org/doi/10.1021/acs.jctc.4c01522') + # this is to make the cli work, should be just a temporary solution + parser.add_argument('cluster', nargs='?') return parser def main(): diff --git a/basicrta/combine.py b/basicrta/combine.py index 40dd350..7bf3851 100644 --- a/basicrta/combine.py +++ b/basicrta/combine.py @@ -164,6 +164,8 @@ def get_parser(): action='store_true', help="Skip compatibility validation (use with caution)" ) + # this is to make the cli work, should be just a temporary solution + parser.add_argument('combine', nargs='?') return parser def main(): diff --git a/basicrta/contacts.py b/basicrta/contacts.py index 4a61dde..cac62c0 100644 --- a/basicrta/contacts.py +++ b/basicrta/contacts.py @@ -367,6 +367,7 @@ def run(self): def main(): parser = get_parser() args = parser.parse_args() + print(args) u = mda.Universe(args.top, args.traj) cutoff, nproc, nslices = args.cutoff, args.nproc, args.nslices ag1 = u.select_atoms(args.sel1) @@ -395,6 +396,8 @@ def get_parser(): parser.add_argument('--cutoff', type=float) parser.add_argument('--nproc', type=int, default=1) parser.add_argument('--nslices', type=int, default=100) + # this is to make the cli work, should be just a temporary solution + parser.add_argument('contacts', nargs='?') return parser diff --git a/basicrta/gibbs.py b/basicrta/gibbs.py index 41b8b53..c73a2bd 100644 --- a/basicrta/gibbs.py +++ b/basicrta/gibbs.py @@ -859,6 +859,8 @@ def get_parser(): parser.add_argument('--nproc', type=int, default=1) parser.add_argument('--niter', type=int, default=110000) parser.add_argument('--ncomp', type=int, default=15) + # this is to make the cli work, should be just a temporary solution + parser.add_argument('gibbs', nargs='?') return parser def main(): diff --git a/basicrta/kinetics.py b/basicrta/kinetics.py index d2a651e..b0d6e69 100644 --- a/basicrta/kinetics.py +++ b/basicrta/kinetics.py @@ -211,6 +211,8 @@ def get_parser(): parser.add_argument("--top_n", type=int, nargs='?', default=None) parser.add_argument("--step", type=int, nargs='?', default=1) parser.add_argument("--wdensity", action='store_true') + # this is to make the cli work, should be just a temporary solution + parser.add_argument('kinetics', nargs='?') return parser def main(): From 5fc67850fe9116c6c12006b076732f876a6d1d2b Mon Sep 17 00:00:00 2001 From: Ricky Sexton Date: Thu, 20 Nov 2025 11:50:40 -0600 Subject: [PATCH 07/16] removed unnecessary print statement --- basicrta/contacts.py | 1 - 1 file changed, 1 deletion(-) diff --git a/basicrta/contacts.py b/basicrta/contacts.py index cac62c0..c922c9d 100644 --- a/basicrta/contacts.py +++ b/basicrta/contacts.py @@ -367,7 +367,6 @@ def run(self): def main(): parser = get_parser() args = parser.parse_args() - print(args) u = mda.Universe(args.top, args.traj) cutoff, nproc, nslices = args.cutoff, args.nproc, args.nslices ag1 = u.select_atoms(args.sel1) From 688a3ef48ebe0e8102ebee6d4e6d66864ece176b Mon Sep 17 00:00:00 2001 From: Ricky Sexton Date: Mon, 24 Nov 2025 10:37:13 -0600 Subject: [PATCH 08/16] added help strings/subcommand description to cli --- basicrta/cli.py | 7 ++++--- basicrta/contacts.py | 23 +++++++++++++++++------ 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/basicrta/cli.py b/basicrta/cli.py index d351e44..5a3404b 100644 --- a/basicrta/cli.py +++ b/basicrta/cli.py @@ -15,12 +15,13 @@ commands = ['contacts', 'cluster', 'combine', 'kinetics', 'gibbs'] def main(): - parser = argparse.ArgumentParser(prog='basicrta', add_help=True) - subparsers = parser.add_subparsers() + parser = argparse.ArgumentParser(prog='basicrta', add_help=False) + subparsers = parser.add_subparsers(help='Step in the basicrta workflow to execute.') for command in commands: subparser = importlib.import_module(f"basicrta.{command}").get_parser() - subparsers.add_parser(f'{command}', parents=[subparser], add_help=False) + subparsers.add_parser(f'{command}', parents=[subparser, parser], add_help=True, + description=subparser.description, conflict_handler='resolve') args = parser.parse_args(args=None if sys.argv[1:] else ['--help']) diff --git a/basicrta/contacts.py b/basicrta/contacts.py index c922c9d..4725da6 100644 --- a/basicrta/contacts.py +++ b/basicrta/contacts.py @@ -389,12 +389,23 @@ def get_parser(): map and collect contacts based on the \ desired cutoff distance") parser.add_argument('--top', type=str, help="Topology") - parser.add_argument('--traj', type=str) - parser.add_argument('--sel1', type=str) - parser.add_argument('--sel2', type=str) - parser.add_argument('--cutoff', type=float) - parser.add_argument('--nproc', type=int, default=1) - parser.add_argument('--nslices', type=int, default=100) + parser.add_argument('--traj', type=str, help="Trajectory") + parser.add_argument('--sel1', type=str, help="Primary atom selection, based \ + on MDAnalysis atom selection. basicrta will produce \ + tau for each residue in this atom group.") + parser.add_argument('--sel2', type=str, help="Secondary atom selection, \ + based on MDAnalysis atom selection. basicrta will \ + collect contacts between each residue of this group \ + with each residue of `sel1`.") + parser.add_argument('--cutoff', type=float, help="""Value to use for defining + a contact (in Angstrom). Any atom of `sel2` that is at + a distance less than or equal to `cutoff` of any atom + in `sel1` will be considered in contact.""") + parser.add_argument('--nproc', type=int, default=1, help="""Number of + processes to use in multiprocessing""") + parser.add_argument('--nslices', type=int, default=100, help="""Number of + slices to break the trajectory into. Increase this to + reduce the amount of memory needed for each process.""") # this is to make the cli work, should be just a temporary solution parser.add_argument('contacts', nargs='?') return parser From 7ad82028489032c12804445787ad62fb81f20edd Mon Sep 17 00:00:00 2001 From: Ricky Sexton Date: Mon, 24 Nov 2025 11:17:26 -0600 Subject: [PATCH 09/16] added argument requirements --- basicrta/contacts.py | 2 +- basicrta/gibbs.py | 12 +++++++----- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/basicrta/contacts.py b/basicrta/contacts.py index 4725da6..bd75759 100644 --- a/basicrta/contacts.py +++ b/basicrta/contacts.py @@ -400,7 +400,7 @@ def get_parser(): parser.add_argument('--cutoff', type=float, help="""Value to use for defining a contact (in Angstrom). Any atom of `sel2` that is at a distance less than or equal to `cutoff` of any atom - in `sel1` will be considered in contact.""") + in `sel1` will be considered in contact.""", required=True) parser.add_argument('--nproc', type=int, default=1, help="""Number of processes to use in multiprocessing""") parser.add_argument('--nslices', type=int, default=100, help="""Number of diff --git a/basicrta/gibbs.py b/basicrta/gibbs.py index c73a2bd..b5b2e56 100644 --- a/basicrta/gibbs.py +++ b/basicrta/gibbs.py @@ -854,11 +854,13 @@ def plot_surv(self, scale=1, remove_noise=False, save=False, xlim=None, def get_parser(): import argparse parser = argparse.ArgumentParser() - parser.add_argument('--contacts') - parser.add_argument('--resid', type=int) - parser.add_argument('--nproc', type=int, default=1) - parser.add_argument('--niter', type=int, default=110000) - parser.add_argument('--ncomp', type=int, default=15) + required = parser.add_argument_group('required arguments') + optional = parser.add_argument_group('optional arguments') + required.add_argument('--contacts', required=True) + optional.add_argument('--resid', type=int) + optional.add_argument('--nproc', type=int, default=1) + optional.add_argument('--niter', type=int, default=110000) + optional.add_argument('--ncomp', type=int, default=15) # this is to make the cli work, should be just a temporary solution parser.add_argument('gibbs', nargs='?') return parser From 873a5017592eb108cd3031ddd19ad159e22468a0 Mon Sep 17 00:00:00 2001 From: Ricky Sexton Date: Mon, 24 Nov 2025 15:00:32 -0600 Subject: [PATCH 10/16] added help --- basicrta/cli.py | 25 +++++++++++++++---------- basicrta/cluster.py | 30 +++++++++++++++++++++--------- basicrta/combine.py | 8 +++++--- basicrta/contacts.py | 20 +++++++++++--------- basicrta/gibbs.py | 27 +++++++++++++++++++-------- basicrta/kinetics.py | 23 ++++++++++++++++------- 6 files changed, 87 insertions(+), 46 deletions(-) diff --git a/basicrta/cli.py b/basicrta/cli.py index 5a3404b..d9ad3c4 100644 --- a/basicrta/cli.py +++ b/basicrta/cli.py @@ -12,23 +12,28 @@ __version__ = version("basicrta") -commands = ['contacts', 'cluster', 'combine', 'kinetics', 'gibbs'] +commands = ['contacts', 'gibbs', 'cluster', 'combine', 'kinetics'] +parser_help = ''' +Step in the basicrta workflow to execute. +''' def main(): - parser = argparse.ArgumentParser(prog='basicrta', add_help=False) - subparsers = parser.add_subparsers(help='Step in the basicrta workflow to execute.') + parser = argparse.ArgumentParser(prog='basicrta', add_help=True) + subparsers = parser.add_subparsers(help=parser_help) for command in commands: subparser = importlib.import_module(f"basicrta.{command}").get_parser() - subparsers.add_parser(f'{command}', parents=[subparser, parser], add_help=True, - description=subparser.description, conflict_handler='resolve') - - args = parser.parse_args(args=None if sys.argv[1:] else ['--help']) - - if len(sys.argv) == 2: + subparsers.add_parser(f'{command}', parents=[subparser], add_help=True, + description=subparser.description, + conflict_handler='resolve', + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + help=subparser.description) + + if len(sys.argv) == 2 and sys.argv[1] in commands: subparsers.choices[f'{sys.argv[1]}'].print_help() sys.exit() - + + parser.parse_args(args=None if sys.argv[1:] else ['--help']) importlib.import_module(f"basicrta.{sys.argv[1]}").main() if __name__ == "__main__": diff --git a/basicrta/cluster.py b/basicrta/cluster.py index aa093f4..fde7f9f 100644 --- a/basicrta/cluster.py +++ b/basicrta/cluster.py @@ -239,16 +239,28 @@ def b_color_structure(self, structure): def get_parser(): import argparse - parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) - parser.add_argument('--nproc', type=int, default=1) - parser.add_argument('--cutoff', type=float) - parser.add_argument('--niter', type=int, default=110000) - parser.add_argument('--prot', type=str, default=None, nargs='?') + parser = argparse.ArgumentParser(description="""perform clustering for each + residue located in basicrta-{cutoff}/""", + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + required = parser.add_argument_group('required arguments') + + required.add_argument('--cutoff', required=True, type=float, help="""cutoff + used in contact analysis, will cluster results in + basicrta-{cutoff}/""") + parser.add_argument('--nproc', type=int, default=1, help="""number of + processes to use in multiprocessing""") + parser.add_argument('--niter', type=int, default=110000, help="""number of + iterations used in the gibbs sampler, used to load + gibbs_{niter}.pkl""") + parser.add_argument('--prot', type=str, nargs='?', help="""name of protein + in tm_dict.txt, used to draw TM bars in tau vs resid + plot""") parser.add_argument('--label_cutoff', type=float, default=3, dest='label_cutoff', - help='Only label residues with tau > ' - 'LABEL-CUTOFF * . ') - parser.add_argument('--structure', type=str, nargs='?') + help="""Only label residues with tau > + LABEL-CUTOFF * .""") + parser.add_argument('--structure', type=str, nargs='?', help="""will add tau + as bfactors to the structure if provided""") # use for default values parser.add_argument('--gskip', type=int, default=100, help='Gibbs skip parameter for decorrelated samples;' @@ -257,7 +269,7 @@ def get_parser(): help='Burn-in parameter, drop first N samples as equilibration;' 'default from https://pubs.acs.org/doi/10.1021/acs.jctc.4c01522') # this is to make the cli work, should be just a temporary solution - parser.add_argument('cluster', nargs='?') + parser.add_argument('cluster', nargs='?', help=argparse.SUPPRESS) return parser def main(): diff --git a/basicrta/combine.py b/basicrta/combine.py index 7bf3851..6d86799 100644 --- a/basicrta/combine.py +++ b/basicrta/combine.py @@ -145,11 +145,13 @@ def get_parser(): "and calculating posteriors from all data together." ) - parser.add_argument( + required = parser.add_argument_group('required arguments') + required.add_argument( '--contacts', nargs='+', required=True, - help="List of contact pickle files to combine (e.g., contacts_7.0.pkl from different runs)" + help="""List of contact pickle files to combine (e.g., contacts_7.0.pkl + from different runs)""", ) parser.add_argument( @@ -165,7 +167,7 @@ def get_parser(): help="Skip compatibility validation (use with caution)" ) # this is to make the cli work, should be just a temporary solution - parser.add_argument('combine', nargs='?') + parser.add_argument('combine', nargs='?', help=argparse.SUPPRESS) return parser def main(): diff --git a/basicrta/contacts.py b/basicrta/contacts.py index bd75759..807bef6 100644 --- a/basicrta/contacts.py +++ b/basicrta/contacts.py @@ -385,19 +385,21 @@ def main(): def get_parser(): import argparse - parser = argparse.ArgumentParser(description="Create the primary contact \ - map and collect contacts based on the \ - desired cutoff distance") - parser.add_argument('--top', type=str, help="Topology") - parser.add_argument('--traj', type=str, help="Trajectory") - parser.add_argument('--sel1', type=str, help="Primary atom selection, based \ + parser = argparse.ArgumentParser(description="""Create the initial contact + map and process it using a + prescribed cutoff""") + required = parser.add_argument_group('required arguments') + + required.add_argument('--top', type=str, help="Topology") + required.add_argument('--traj', type=str, help="Trajectory") + required.add_argument('--sel1', type=str, help="Primary atom selection, based \ on MDAnalysis atom selection. basicrta will produce \ tau for each residue in this atom group.") - parser.add_argument('--sel2', type=str, help="Secondary atom selection, \ + required.add_argument('--sel2', type=str, help="Secondary atom selection, \ based on MDAnalysis atom selection. basicrta will \ collect contacts between each residue of this group \ with each residue of `sel1`.") - parser.add_argument('--cutoff', type=float, help="""Value to use for defining + required.add_argument('--cutoff', type=float, help="""Value to use for defining a contact (in Angstrom). Any atom of `sel2` that is at a distance less than or equal to `cutoff` of any atom in `sel1` will be considered in contact.""", required=True) @@ -407,7 +409,7 @@ def get_parser(): slices to break the trajectory into. Increase this to reduce the amount of memory needed for each process.""") # this is to make the cli work, should be just a temporary solution - parser.add_argument('contacts', nargs='?') + parser.add_argument('contacts', nargs='?', help=argparse.SUPPRESS) return parser diff --git a/basicrta/gibbs.py b/basicrta/gibbs.py index b5b2e56..860ae44 100644 --- a/basicrta/gibbs.py +++ b/basicrta/gibbs.py @@ -853,16 +853,27 @@ def plot_surv(self, scale=1, remove_noise=False, save=False, xlim=None, def get_parser(): import argparse - parser = argparse.ArgumentParser() + parser = argparse.ArgumentParser(description="""run gibbs samplers for all + or a specified residue present in the + contact map""", + formatter_class=argparse.ArgumentDefaultsHelpFormatter) required = parser.add_argument_group('required arguments') - optional = parser.add_argument_group('optional arguments') - required.add_argument('--contacts', required=True) - optional.add_argument('--resid', type=int) - optional.add_argument('--nproc', type=int, default=1) - optional.add_argument('--niter', type=int, default=110000) - optional.add_argument('--ncomp', type=int, default=15) + + required.add_argument('--contacts', required=True, help="""Contact file + produced from `basicrta contacts`, default is + contacts_{cutoff}.pkl""") + parser.add_argument('--resid', type=int, help="""run gibbs sampler for + this residue. Will collect cutoff from contact file + name.""") + parser.add_argument('--nproc', type=int, default=1, help="""number of + processes to use in multiprocessing""") + parser.add_argument('--niter', type=int, default=110000, help="""number of + iterations to use for the gibbs sampler""") + parser.add_argument('--ncomp', type=int, default=15, help="""number of + components to use for the exponential mixture + model""") # this is to make the cli work, should be just a temporary solution - parser.add_argument('gibbs', nargs='?') + parser.add_argument('gibbs', nargs='?', help=argparse.SUPPRESS) return parser def main(): diff --git a/basicrta/kinetics.py b/basicrta/kinetics.py index b0d6e69..06c0975 100644 --- a/basicrta/kinetics.py +++ b/basicrta/kinetics.py @@ -205,14 +205,23 @@ def weighted_densities(self, step=1, top_n=None, filterP=0): def get_parser(): import argparse - parser = argparse.ArgumentParser() - parser.add_argument("--gibbs", type=str) - parser.add_argument("--contacts", type=str) - parser.add_argument("--top_n", type=int, nargs='?', default=None) - parser.add_argument("--step", type=int, nargs='?', default=1) - parser.add_argument("--wdensity", action='store_true') + parser = argparse.ArgumentParser(description="""map kinetics from clustered + results onto trajectory, create weighted + densities if flag is used""") + required = parser.add_argument_group('required arguments') + required.add_argument("--gibbs", type=str, required=True, help="""gibbs pickle + file to use for creating kinetic trajectories and + densities""") + required.add_argument("--contacts", type=str, required=True, help="""contacts + file used in creation of the gibbs sampler data""") + parser.add_argument("--top_n", type=int, nargs='?', help="""use the `top_n` + most likely frames to create trajectory or densities""") + parser.add_argument("--step", type=int, nargs='?', default=1, help="""write + out frame if frame%%step=0""") + parser.add_argument("--wdensity", action='store_true', help="""create + weighted densities""") # this is to make the cli work, should be just a temporary solution - parser.add_argument('kinetics', nargs='?') + parser.add_argument('kinetics', nargs='?', help=argparse.SUPPRESS) return parser def main(): From 53695fe19c44c99b75c1993c7e9b54bfec795e8e Mon Sep 17 00:00:00 2001 From: Ricky Sexton Date: Mon, 24 Nov 2025 15:57:18 -0600 Subject: [PATCH 11/16] added cli addition to CHANGELOG --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c2d7d6f..159b868 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ The rules for this file: ### Authors * @orbeckst +* @rsexton2 ### Fixed * Have cluster.ProcessProtein.reprocess() record "no result" if @@ -26,6 +27,9 @@ The rules for this file: number of samples. Otherwise `python -m cluster` fails to process whole proteins. +### Added +* Added command-line interface for basicrta workflow + ## [1.1.3] - 2025-09-11 ### Authors From dbf1977ed7e5f5c514461c1e637ed6381bd3461d Mon Sep 17 00:00:00 2001 From: Ricky Sexton Date: Mon, 24 Nov 2025 15:57:46 -0600 Subject: [PATCH 12/16] added documentation --- basicrta/cli.py | 37 ++++++++++++++++++++++++----- basicrta/cluster.py | 7 +++--- basicrta/combine.py | 11 +++++++-- basicrta/contacts.py | 10 ++++++++ basicrta/gibbs.py | 8 +++++++ basicrta/kinetics.py | 7 ++++++ docs/source/api.rst | 3 ++- docs/source/autosummary/cli.rst | 6 +++++ docs/source/autosummary/combine.rst | 6 +++++ 9 files changed, 83 insertions(+), 12 deletions(-) create mode 100644 docs/source/autosummary/cli.rst create mode 100644 docs/source/autosummary/combine.rst diff --git a/basicrta/cli.py b/basicrta/cli.py index d9ad3c4..a045694 100644 --- a/basicrta/cli.py +++ b/basicrta/cli.py @@ -1,6 +1,13 @@ """ -basicrta -A package to extract binding kinetics from molecular dynamics simulations +Command line functionality of basicrta. + +The `main()` function of this module gets the argument parser from each of the +scripts below and executes the `main()` function of the module called. The +function also collects help from the subparsers and provides it at the command +line. + +Modules callable from the cli: contacts.py, gibbs.py, cluster.py, kinetics.py, +combine.py. """ from importlib.metadata import version @@ -12,15 +19,29 @@ __version__ = version("basicrta") +# define which scripts can be ran from cli +# can easily add functionality to cli as modules are added commands = ['contacts', 'gibbs', 'cluster', 'combine', 'kinetics'] -parser_help = ''' -Step in the basicrta workflow to execute. -''' def main(): + """ This module provides the functionality for a command line interface for + basicrta scripts. The scripts available to the cli are: + + * contacts.py + * gibbs.py + * cluster.py + * combine.py + * kinetics.py + + Each script is called and ran using the `main()` function of each module and + the parser is passed to the cli using the `get_parser()` function. Any + module added to the cli needs to have both functions. + """ parser = argparse.ArgumentParser(prog='basicrta', add_help=True) - subparsers = parser.add_subparsers(help=parser_help) + subparsers = parser.add_subparsers(help="""step in the basicrta workflow to + execute""") + # collect parser from each script in `commands` for command in commands: subparser = importlib.import_module(f"basicrta.{command}").get_parser() subparsers.add_parser(f'{command}', parents=[subparser], add_help=True, @@ -29,11 +50,15 @@ def main(): formatter_class=argparse.ArgumentDefaultsHelpFormatter, help=subparser.description) + # print subparser help if no arguments given if len(sys.argv) == 2 and sys.argv[1] in commands: subparsers.choices[f'{sys.argv[1]}'].print_help() sys.exit() + # print basicrta help if no subcommand given parser.parse_args(args=None if sys.argv[1:] else ['--help']) + + # execute basicrta script importlib.import_module(f"basicrta.{sys.argv[1]}").main() if __name__ == "__main__": diff --git a/basicrta/cluster.py b/basicrta/cluster.py index fde7f9f..83d4ed3 100644 --- a/basicrta/cluster.py +++ b/basicrta/cluster.py @@ -1,3 +1,7 @@ +"""This module provides the ProcessProtein class, which collects and processes +Gibbs sampler data. +""" + import os import gc import warnings @@ -11,9 +15,6 @@ from basicrta.gibbs import Gibbs gc.enable() -"""This module provides the ProcessProtein class, which collects and processes -Gibbs sampler data. -""" class ProcessProtein(object): r"""ProcessProtein is the class that collects and processes Gibbs sampler diff --git a/basicrta/combine.py b/basicrta/combine.py index 6d86799..8d27924 100644 --- a/basicrta/combine.py +++ b/basicrta/combine.py @@ -1,7 +1,7 @@ #!/usr/bin/env python """ -Command-line interface for combining contact timeseries from multiple repeat runs. +Combine contact timeseries from multiple repeat runs. This module provides functionality to combine contact files from multiple trajectory repeats, enabling pooled analysis of binding kinetics. @@ -138,7 +138,12 @@ def run(self): return self.output_name def get_parser(): - """Main function for combining contact files.""" + """Create parser, parse command line arguments, and return ArgumentParser + object. + + :return: An ArgumentParser instance with command line arguments stored. + :rtype: `ArgumentParser` object + """ parser = argparse.ArgumentParser( description="Combine contact timeseries from multiple repeat runs. " "This enables pooling data from multiple trajectory repeats " @@ -171,6 +176,8 @@ def get_parser(): return parser def main(): + """Execute this function when this script is called from the command line. + """ parser = get_parser() args = parser.parse_args() diff --git a/basicrta/contacts.py b/basicrta/contacts.py index 807bef6..f17503b 100644 --- a/basicrta/contacts.py +++ b/basicrta/contacts.py @@ -1,3 +1,13 @@ +""" +Create contact maps between two atom groups. + +This module provides the `MapContacts` class, which creates the initial contact +map between the two atom groups using a maximum cutoff (`max_cutoff`), which +provides for quicker processing if creating results for multiple cutoffs. The +`ProcessContacts` class takes the initial contact map and creates the processed +contact map based on the prescribed cutoff. +""" + from tqdm import tqdm from MDAnalysis.lib import distances from multiprocessing import Pool, Lock diff --git a/basicrta/gibbs.py b/basicrta/gibbs.py index 860ae44..0c49382 100644 --- a/basicrta/gibbs.py +++ b/basicrta/gibbs.py @@ -1,3 +1,11 @@ +""" +Perform Gibbs samplers and process data. + +This module provides the `ParallelGibbs` class, which parallelizes the creation +of Gibbs samplers for each residue in the contact map. This module also provides +the `Gibbs` class, which allows for the loading and processing of the gibbs +sampler data, as well as plotting and saving processed results. +""" import os import gc import pickle diff --git a/basicrta/kinetics.py b/basicrta/kinetics.py index 06c0975..818c390 100644 --- a/basicrta/kinetics.py +++ b/basicrta/kinetics.py @@ -1,3 +1,10 @@ +""" +Map kinetics from gibbs data to md trajectory. + +This module provides the `MapKinetics` class, which creates trajectories and +weighted densities based on the clustered gibbs data and original trajectory. +""" + from tqdm import tqdm from basicrta.util import get_start_stop_frames import numpy as np diff --git a/docs/source/api.rst b/docs/source/api.rst index ea1acdb..2580a39 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -9,5 +9,6 @@ API Documentation contacts cluster kinetics + combine util - + cli diff --git a/docs/source/autosummary/cli.rst b/docs/source/autosummary/cli.rst new file mode 100644 index 0000000..4a04c36 --- /dev/null +++ b/docs/source/autosummary/cli.rst @@ -0,0 +1,6 @@ +cli +=== + +.. automodule:: cli + :members: + :undoc-members: \ No newline at end of file diff --git a/docs/source/autosummary/combine.rst b/docs/source/autosummary/combine.rst new file mode 100644 index 0000000..21eb381 --- /dev/null +++ b/docs/source/autosummary/combine.rst @@ -0,0 +1,6 @@ +combine +======= + +.. automodule:: combine + :members: + :undoc-members: \ No newline at end of file From dea8eb5ee36ab960bf68d8014efe733bb3d60819 Mon Sep 17 00:00:00 2001 From: Ricky Sexton Date: Wed, 26 Nov 2025 14:20:37 -0600 Subject: [PATCH 13/16] added a few tests --- basicrta/tests/test_cli.py | 61 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 basicrta/tests/test_cli.py diff --git a/basicrta/tests/test_cli.py b/basicrta/tests/test_cli.py new file mode 100644 index 0000000..c7623f8 --- /dev/null +++ b/basicrta/tests/test_cli.py @@ -0,0 +1,61 @@ +""" +Tests for combining contact timeseries from multiple repeat runs. +""" +import basicrta +import os +import pytest +import numpy as np +import pickle +import subprocess +import basicrta.cli +import importlib +from basicrta.contacts import CombineContacts + + +class TestCLI: + """Test class for cli.py functionality.""" + modules = basicrta.cli.commands + + def test_cli_modules(self): + """Test cli as module""" + for module in self.modules: + #help successfully printed + help_ret = subprocess.run(['python', '-m', f'basicrta.{module}', + '--help']) + assert help_ret.returncode == 0 + + # error if required arguments not given + nohelp = subprocess.run(['python', '-m', f'basicrta.{module}']) + assert nohelp.returncode == 2 + + def test_cli_entrypoint(self): + # print general help if no command given + assert subprocess.run('basicrta').returncode == 0 + + for module in self.modules: + #help successfully printed + help_ret = subprocess.run(['basicrta', f'{module}', '--help']) + assert help_ret.returncode == 0 + + # help is printed if no arguments given + nohelp = subprocess.run(['basicrta', f'{module}']) + assert nohelp.returncode == 0 + + def test_get_parser(self): + import importlib + import argparse + for module in self.modules: + parser = importlib.import_module(f"basicrta.{module}").get_parser() + assert type(parser) == argparse.ArgumentParser + + def test_call_main_empty(self): + for module in self.modules: + with pytest.raises(SystemExit): + importlib.import_module(f"basicrta.{module}").main() + +# def test_call_main(self): +# +# for module in self.modules: +# with pytest.raises(SystemExit): +# importlib.import_module(f"basicrta.{module}").main() + From d50d4a1c00add1c26816fa64c18b7be85bc2140c Mon Sep 17 00:00:00 2001 From: Ricky Sexton Date: Wed, 26 Nov 2025 14:56:24 -0600 Subject: [PATCH 14/16] added test --- basicrta/cli.py | 2 +- basicrta/tests/test_cli.py | 25 +++++++++++++++++-------- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/basicrta/cli.py b/basicrta/cli.py index a045694..b9e6822 100644 --- a/basicrta/cli.py +++ b/basicrta/cli.py @@ -11,7 +11,7 @@ """ from importlib.metadata import version -from basicrta import * +import basicrta import argparse import subprocess import importlib diff --git a/basicrta/tests/test_cli.py b/basicrta/tests/test_cli.py index c7623f8..e6e09b2 100644 --- a/basicrta/tests/test_cli.py +++ b/basicrta/tests/test_cli.py @@ -9,6 +9,7 @@ import subprocess import basicrta.cli import importlib +import argparse from basicrta.contacts import CombineContacts @@ -41,9 +42,7 @@ def test_cli_entrypoint(self): nohelp = subprocess.run(['basicrta', f'{module}']) assert nohelp.returncode == 0 - def test_get_parser(self): - import importlib - import argparse + def test_get_module_parsers(self): for module in self.modules: parser = importlib.import_module(f"basicrta.{module}").get_parser() assert type(parser) == argparse.ArgumentParser @@ -53,9 +52,19 @@ def test_call_main_empty(self): with pytest.raises(SystemExit): importlib.import_module(f"basicrta.{module}").main() -# def test_call_main(self): -# -# for module in self.modules: -# with pytest.raises(SystemExit): -# importlib.import_module(f"basicrta.{module}").main() + def test_cli_script_call(self): + #help successfully printed + help_ret = subprocess.run(['python', '-m', 'basicrta.cli', + '--help']) + assert help_ret.returncode == 0 + + # error if required arguments not given + nohelp = subprocess.run(['python', '-m', 'basicrta.cli']) + assert nohelp.returncode == 2 + + +# def test_call_main_args(self): +# with mock.patch('sys.argv', ['cluster', '--cutoff', '6.9']): +# importlib.import_module(f"basicrta.cluster").main() + From 3c5b372c1b3b08dfe1516aca032b2679eed9bc98 Mon Sep 17 00:00:00 2001 From: Ricky Sexton Date: Wed, 26 Nov 2025 15:04:51 -0600 Subject: [PATCH 15/16] fixed test --- basicrta/tests/test_cli.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/basicrta/tests/test_cli.py b/basicrta/tests/test_cli.py index e6e09b2..e466240 100644 --- a/basicrta/tests/test_cli.py +++ b/basicrta/tests/test_cli.py @@ -58,9 +58,9 @@ def test_cli_script_call(self): '--help']) assert help_ret.returncode == 0 - # error if required arguments not given + # print help if arguments not given nohelp = subprocess.run(['python', '-m', 'basicrta.cli']) - assert nohelp.returncode == 2 + assert nohelp.returncode == 0 # def test_call_main_args(self): From 143c0e05d216260d0b98f649a2f5b6b2f34edb74 Mon Sep 17 00:00:00 2001 From: Ricky Sexton Date: Wed, 26 Nov 2025 15:13:26 -0600 Subject: [PATCH 16/16] referenced issue in CHANGELOG --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 159b868..fb2dcb1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,7 +28,7 @@ The rules for this file: whole proteins. ### Added -* Added command-line interface for basicrta workflow +* Added command-line interface for basicrta workflow (Issue #20) ## [1.1.3] - 2025-09-11