Skip to content

Commit e790626

Browse files
committed
Added basic molecular formula searching
1 parent f678f21 commit e790626

4 files changed

Lines changed: 128 additions & 45 deletions

File tree

Cargo.lock

Lines changed: 74 additions & 41 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,14 @@ path = "src/main.rs"
1515

1616
[dependencies]
1717
clap = { version = "4.5", features = ["derive", "cargo"] }
18-
colored = "2"
19-
itertools = "0.13"
18+
colored = "3"
19+
itertools = "0.14"
2020
rayon = "1.10"
2121
rustyms = "0.9.0"
2222

2323
[patch.crates-io]
2424
# rustyms = { git = "https://github.com/snijderlab/rustyms", branch = "main" }
25-
#rustyms = { path = "../rustyms/rustyms" }
25+
rustyms = { path = "../rustyms/rustyms" }
2626

2727
[profile.release]
2828
debug = true

src/cli.rs

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,11 @@ pub struct Cli {
170170
/// Multiple positions can be specified by using this argument multiple times.
171171
#[arg(long, value_parser=positions_parser)]
172172
pub positions: Option<Vec<(Vec<AminoAcid>, Position)>>,
173+
174+
/// Search for a fitting molecular formula for this mass.
175+
// Contains the mass and number of digits
176+
#[arg(long = "formula", value_parser=formula_parser)]
177+
pub formula_target: Option<(Mass, usize)>,
173178
}
174179

175180
impl Cli {
@@ -189,6 +194,23 @@ impl Cli {
189194
}
190195
}
191196

197+
fn formula_parser(value: &str) -> Result<(Mass, usize), String> {
198+
let target = Mass::new::<rustyms::system::dalton>(value.parse::<f64>().map_err(|err| {
199+
format!("Given target mass for formula search is not a valid number: {err}")
200+
})?);
201+
Ok((
202+
target,
203+
if let Some((_, tail)) = value.split_once('.') {
204+
tail.to_lowercase()
205+
.split_once('e')
206+
.map_or(tail, |(t, _)| t)
207+
.len()
208+
} else {
209+
0
210+
},
211+
))
212+
}
213+
192214
fn positions_parser(value: &str) -> Result<(Vec<AminoAcid>, Position), String> {
193215
value
194216
.split_once('@')

src/main.rs

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ use itertools::Itertools;
55
use rayon::prelude::*;
66
use rustyms::align::par_consecutive_align;
77
use rustyms::imgt::Selection;
8-
use rustyms::system::Mass;
8+
use rustyms::system::{dalton, Mass};
99
use rustyms::{
1010
align::*,
1111
find_isobaric_sets, imgt,
@@ -18,6 +18,8 @@ use rustyms::{
1818
AminoAcid, AtMax, Chemical, MassMode, MolecularFormula, Multi, Peptidoform, SimpleLinear,
1919
Tolerance, UnAmbiguous,
2020
};
21+
use rustyms::{find_formulas, Element};
22+
use std::num::NonZeroU16;
2123
use std::{
2224
collections::HashSet,
2325
io::{BufWriter, Write},
@@ -414,6 +416,32 @@ fn main() {
414416
}
415417
display_germline(allele, &args);
416418
}
419+
} else if let Some(target) = args.formula_target {
420+
const DEFAULT_ELEMENTS: &[(Element, Option<NonZeroU16>)] = &[
421+
(Element::H, None),
422+
(Element::C, None),
423+
(Element::O, None),
424+
(Element::N, None),
425+
(Element::S, None),
426+
];
427+
let mut data = vec![["Formula".to_string(), "Mass".to_string()]];
428+
for formula in find_formulas(
429+
target.0,
430+
Tolerance::Absolute(Mass::new::<dalton>(10.0_f64.powf(-(target.1 as f64)) / 2.0)),
431+
DEFAULT_ELEMENTS,
432+
)
433+
.iter()
434+
{
435+
data.push([
436+
formula.hill_notation_fancy(),
437+
display_mass(
438+
formula.monoisotopic_mass(),
439+
false,
440+
Some((target.1 + 2).max(3)),
441+
),
442+
]);
443+
}
444+
table(&data, true, &[Styling::none(), Styling::none()]);
417445
} else {
418446
println!("Please provide an argument to work with, use --help to see all options.")
419447
}

0 commit comments

Comments
 (0)