Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions scripts/ClusterMSA.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,10 @@ def plot_landscape(x, y, df, query_, plot_type):
p.add_argument('--eps_val', action='store', type=float, help="Use single value for eps instead of scanning.")
p.add_argument('--resample', action='store_true', help='If included, will resample the original MSA with replacement before writing.')
p.add_argument("--gap_cutoff", action='store', type=float, default=0.25, help='Remove sequences with gaps representing more than this frac of seq.')
p.add_argument('--min_eps', action='store',default=3, help='Min epsilon value to scan for DBSCAN (Default 3).')
p.add_argument('--max_eps', action='store',default=20, help='Max epsilon value to scan for DBSCAN (Default 20).')
p.add_argument('--eps_step', action='store',default=.5, help='step for epsilon scan for DBSCAN (Default 0.5).')
p.add_argument('--min_samples', action='store',default=3, help='Default min_samples for DBSCAN (Default 3, recommended no lower than that).')
p.add_argument('--min_eps', action='store',default=3, type=int, help='Min epsilon value to scan for DBSCAN (Default 3).')
p.add_argument('--max_eps', action='store',default=20, type=int, help='Max epsilon value to scan for DBSCAN (Default 20).')
p.add_argument('--eps_step', action='store',default=.5, type=float, help='step for epsilon scan for DBSCAN (Default 0.5).')
p.add_argument('--min_samples', action='store',default=3, type=int, help='Default min_samples for DBSCAN (Default 3, recommended no lower than that).')

p.add_argument('--run_PCA', action='store_true', help='Run PCA on one-hot embedding of sequences and store in output_cluster_metadata.tsv')
p.add_argument('--run_TSNE', action='store_true', help='Run TSNE on one-hot embedding of sequences and store in output_cluster_metadata.tsv')
Expand Down Expand Up @@ -197,7 +197,7 @@ def plot_landscape(x, y, df, query_, plot_type):

if args.run_TSNE:
lprint('Running TSNE ...',f)
ohe_vecs = encode_seqs(df.sequence.tolist()+[query_.sequence.tolist()], max_len=L)
ohe_vecs = encode_seqs(df.sequence.tolist()+query_.sequence.tolist(), max_len=L)
# different than PCA because tSNE doesn't have .transform attribute

mdl = TSNE()
Expand Down