Skip to content

Commit 49c10b3

Browse files
committed
Detect some more improper behaviour in ProForma
1 parent 52332cf commit 49c10b3

7 files changed

Lines changed: 138 additions & 19 deletions

File tree

mzcore/src/quantities/tolerance.rs

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,27 @@ impl WithinTolerance<Mass, Mass> for Tolerance<OrderedMass> {
277277
}
278278
}
279279

280+
impl WithinTolerance<OrderedMass, Mass> for Tolerance<OrderedMass> {
281+
#[inline]
282+
fn within(&self, a: &OrderedMass, b: &Mass) -> bool {
283+
self.within(&a.into_inner(), b)
284+
}
285+
}
286+
287+
impl WithinTolerance<Mass, OrderedMass> for Tolerance<OrderedMass> {
288+
#[inline]
289+
fn within(&self, a: &Mass, b: &OrderedMass) -> bool {
290+
self.within(a, &b.into_inner())
291+
}
292+
}
293+
294+
impl WithinTolerance<OrderedMass, OrderedMass> for Tolerance<OrderedMass> {
295+
#[inline]
296+
fn within(&self, a: &OrderedMass, b: &OrderedMass) -> bool {
297+
self.within(&a.into_inner(), &b.into_inner())
298+
}
299+
}
300+
280301
impl WithinTolerance<Multi<Mass>, Multi<Mass>> for Tolerance<OrderedMass> {
281302
#[inline]
282303
fn within(&self, a: &Multi<Mass>, b: &Multi<Mass>) -> bool {

mzcore/src/sequence/modification.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -685,6 +685,18 @@ impl Modification {
685685
}
686686
}
687687

688+
/// Get the underlying simple mod, even if this is a cross-link or ambiguous
689+
pub fn get_simple(&self) -> SimpleModification {
690+
match self {
691+
Self::Simple(modification)
692+
| Self::CrossLink {
693+
linker: modification,
694+
..
695+
}
696+
| Self::Ambiguous { modification, .. } => modification.clone(),
697+
}
698+
}
699+
688700
/// Get a url for more information on this modification. Only defined for modifications from ontologies.
689701
pub fn ontology_url(&self) -> Option<String> {
690702
match self {

mzcore/src/sequence/peptidoform/compound_peptidoform_ion.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ impl CompoundPeptidoformIon {
146146
}
147147

148148
/// Get the name
149-
pub fn name_mut(&mut self) -> &mut String {
149+
pub const fn name_mut(&mut self) -> &mut String {
150150
&mut self.name
151151
}
152152
}

mzcore/src/sequence/peptidoform/parse.rs

Lines changed: 91 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,16 @@ use ordered_float::OrderedFloat;
66

77
use crate::{
88
ParserResult,
9-
chemistry::{Element, MolecularCharge, MolecularFormula},
10-
helper_functions::{self, *},
9+
chemistry::{Chemical, Element, MolecularCharge, MolecularFormula},
10+
helper_functions::*,
1111
ontology::Ontologies,
12+
quantities::{Tolerance, WithinTolerance},
1213
sequence::{
1314
AmbiguousLookup, AmbiguousLookupEntry, AminoAcid, CheckedAminoAcid, CompoundPeptidoformIon,
14-
CrossLinkLookup, Linked, MUPSettings, Peptidoform, PeptidoformIon, PlacementRule, Position,
15-
SequenceElement, SequencePosition, SimpleModification, SimpleModificationInner,
15+
CrossLinkLookup, Linked, MUPSettings, MassTag, Peptidoform, PeptidoformIon, PlacementRule,
16+
Position, SequenceElement, SequencePosition, SimpleModification, SimpleModificationInner,
1617
},
18+
system::OrderedMass,
1719
};
1820

1921
use super::{GlobalModification, Linear, ReturnModification, SemiAmbiguous};
@@ -274,6 +276,79 @@ impl CompoundPeptidoformIon {
274276
start = tail;
275277
}
276278

279+
if STRICT {
280+
for p in peptidoforms.iter().flat_map(|pi| &pi.peptidoforms) {
281+
let mut ambiguous: Option<(std::num::NonZero<u32>, Vec<AminoAcid>)> = None;
282+
for s in p.sequence() {
283+
// Check if (iso)leucine ambiguity is written in the correct way
284+
if let Some(id) = s.ambiguous {
285+
ambiguous = match ambiguous {
286+
Some((i, mut seq)) if i == id => {
287+
seq.push(s.aminoacid.aminoacid());
288+
Some((i, seq))
289+
}
290+
prev => {
291+
if let Some((_, seq)) = prev
292+
&& (seq == vec![AminoAcid::Isoleucine]
293+
|| seq == vec![AminoAcid::Leucine])
294+
{
295+
combine_error(
296+
&mut errors,
297+
BoxedError::new(
298+
BasicKind::Warning,
299+
"Improper ambiguous leucine",
300+
"The amino acid 'J' should be used to represent leucine/isoleucine ambiguity",
301+
base_context.clone(),
302+
),
303+
);
304+
}
305+
Some((id, vec![s.aminoacid.aminoacid()]))
306+
}
307+
}
308+
}
309+
// Check if X is defined with a mod
310+
if s.aminoacid.aminoacid() == AminoAcid::Unknown && s.modifications.is_empty() {
311+
combine_error(
312+
&mut errors,
313+
BoxedError::new(
314+
BasicKind::Warning,
315+
"Improper unknown aminoacid",
316+
"The amino acid 'X' should always be followed by a modification to indicate the composition of this gap",
317+
base_context.clone(),
318+
),
319+
);
320+
}
321+
for m in &s.modifications {
322+
if let &SimpleModificationInner::Mass(tag, mass, precision) =
323+
m.get_simple().as_ref()
324+
{
325+
let tolerance: Tolerance<OrderedMass> = Tolerance::Absolute(
326+
crate::system::Mass::new::<crate::system::dalton>(
327+
10.0_f64.powf(-f64::from(precision.unwrap_or(6))) / 2.0,
328+
)
329+
.into(),
330+
);
331+
if let MassTag::Ontology(ontology) = tag
332+
&& ontologies.data(&[ontology]).all(|m| {
333+
!tolerance.within(&mass, &m.formula().monoisotopic_mass())
334+
})
335+
{
336+
combine_error(
337+
&mut errors,
338+
BoxedError::new(
339+
BasicKind::Warning,
340+
"Improper prefixed mass modification",
341+
"A prefixed mass modification must use a modification that is defined in the referenced ontology",
342+
base_context.clone(),
343+
),
344+
);
345+
}
346+
}
347+
}
348+
}
349+
}
350+
}
351+
277352
if peptidoforms.is_empty() {
278353
combine_error(
279354
&mut errors,
@@ -390,20 +465,8 @@ impl CompoundPeptidoformIon {
390465
}
391466
}
392467

393-
if peptides.is_empty() {
394-
combine_error(
395-
&mut errors,
396-
BoxedError::new(
397-
BasicKind::Error,
398-
"No peptide found",
399-
"The peptidoform definition is empty",
400-
base_context.clone().add_highlight((0, range)),
401-
),
402-
);
403-
Err(errors)
404-
} else {
468+
if let Some(last) = peptides.last() {
405469
// Ensure that only one charge is set
406-
let last = peptides.last().unwrap();
407470
let c = last.get_charge_carriers().cloned();
408471
let len = peptides.len() - 1;
409472
for p in &mut peptides[..len] {
@@ -436,6 +499,17 @@ impl CompoundPeptidoformIon {
436499
} else {
437500
Ok(((peptidoform, index), errors))
438501
}
502+
} else {
503+
combine_error(
504+
&mut errors,
505+
BoxedError::new(
506+
BasicKind::Error,
507+
"No peptide found",
508+
"The peptidoform definition is empty",
509+
base_context.clone().add_highlight((0, range)),
510+
),
511+
);
512+
Err(errors)
439513
}
440514
}
441515

mzcore/src/sequence/peptidoform/peptidoform.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -379,7 +379,7 @@ impl<Complexity> Peptidoform<Complexity> {
379379
}
380380

381381
/// Get the name
382-
pub fn name_mut(&mut self) -> &mut String {
382+
pub const fn name_mut(&mut self) -> &mut String {
383383
&mut self.name
384384
}
385385
}

mzcore/src/sequence/peptidoform/peptidoform_ion.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,16 @@ impl PeptidoformIon {
128128
.and_then(|p| p.get_charge_carriers())
129129
}
130130

131+
/// Get the name
132+
pub fn name(&self) -> &str {
133+
&self.name
134+
}
135+
136+
/// Get the name
137+
pub const fn name_mut(&mut self) -> &mut String {
138+
&mut self.name
139+
}
140+
131141
/// Add a cross-link to this peptidoform and check if it is placed according to its placement rules.
132142
/// The positions are first the peptide index and second the sequence index.
133143
pub fn add_cross_link(

mzcore/src/sequence/peptidoform/tests/positive.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,8 @@ Id,Example,Source,Key,Notes
233233
299,"EM[U:Oxidation]EVEES[U:Phospho]PEK",spec2_1,"",""
234234
300,"EM[M:L-methionine sulfoxide]EVEES[M:O-phospho-L-serine]PEK",spec2_1,"",""
235235
301,"EM[RESID:AA0581]EVEES[RESID:AA0037]PEK",spec2_1,"",""
236+
3010,"(>My (very) nice peptide)PEPTIDE", spec2_1, "casing_specific", ""
237+
3011,"(>Interesting ]][ yes | [valid] #description_not_tag)PEPTIDE", spec2_1, "casing_specific", ""
236238
302,"(>Trypsin)AANSIPYQVSLNS+(>Keratin)AKEQFERQTA",spec2_1,"casing_specific",""
237239
303,"(>P07225 Vitamin K-dependent protein S OS=Homo sapiens OX=9606 GN=PROS1 PE=1 (SV=1) RANGE=12..42)GGK[xlink:dss[138]#XLDSS]IEVQLK//(>P07225 Vitamin K-dependent protein S OS=Homo sapiens OX=9606 GN=PROS1 PE=1 SV=1)KVESELIK[#XLDSS]PINPR",spec2_1,"casing_specific",""
238240
304,"(>>>Trastuzumab Fab and coeluting Fc)(>>Fab)(>Heavy chain)EVQLVESGGGLVQPGGSLRLSC[M:l-cystine (cross-link)#XL1]AASGFNIKDTYIHWVRQAPGKGLEWVARIYPTNGYTRYADSVKGRFTISADTSKNTAYLQMNSLRAEDTAVYYC[#XL1]SRWGGDGFYAMDYWGQGTLVTVSSASTKGPSVFPLAPSSKSTSGGTAALGC[M:l-cystine (cross-link)#XL2]LVKDYFPEPVTVSWNSGALTSGVHTFPAVLQSSGLYSLSSVVTVPSSSLGTQTYIC[#XL2]NVNHKPSNTKVDKKVEPKSC[M:l-cystine (cross-link)#XL3]DKT//(>Light chain)DIQMTQSPSSLSASVGDRVTITC[M:l-cystine (cross-link)#XL4]RASQDVNTAVAWYQQKPGKAPKLLIYSASFLYSGVPSRFSGSRSGTDFTLTISSLQPEDFATYYC[#XL4]QQHYTTPPTFGQGTKVEIKRTVAAPSVFIFPPSDEQLKSGTASVVC[M:l-cystine (cross-link)#XL5]LLNNFYPREAKVQWKVDNALQSGNSQESVTEQDSKDSTYSLSSTLTLSKADYEKHKVYAC[#XL5]EVTHQGLSSPVTKSFNRGEC[#XL3]+(>Fc)HTCPPCPAPELLGGPSVFLFPPKPKDTLMISRTPEVTCVVVDVSHEDPEVKFNWYVDGVEVHNAKTKPREEQYNSTYRVVSVLTVLHQDWLNGKEYKCKVSNKALPAPIEKTISKAKGQPREPQVYTLPPSREEMTKNQVSLTCLVKGFYPSDIAVEWESNGQPENNYKTTPPVLDSDGSFFLYSKLTVDKSRWQQGNVFSCSVMHEALHNHYTQKSLSLSPGK",spec2_1,"casing_specific",""

0 commit comments

Comments
 (0)