diff --git a/src/Common/CommonStandard/Algorithm/Function/SpectrumHandler.cs b/src/Common/CommonStandard/Algorithm/Function/SpectrumHandler.cs index 4b05eaece..9f2a44055 100644 --- a/src/Common/CommonStandard/Algorithm/Function/SpectrumHandler.cs +++ b/src/Common/CommonStandard/Algorithm/Function/SpectrumHandler.cs @@ -64,6 +64,11 @@ public static List GetNormalizedPeaks(List spectrum, return spectrum.Select(n => new SpectrumPeak { Mass = n.Mass, Intensity = Math.Pow(n.Intensity, powFactor) / maxIntensity * maxValue }).ToList(); } + public static List GetNormalizedByTotalIntensityPeaks(List spectrum) { + var sumIntensity = spectrum.Sum(n => n.Intensity); + return spectrum.Select(n => new SpectrumPeak { Mass = n.Mass, Intensity = n.Intensity / sumIntensity }).ToList(); + } + public static List GetBinnedSpectrum(List spectrum, double delta = 100, int maxPeaks = 12) { var peaks = new List(); diff --git a/src/Common/CommonStandard/Algorithm/Scoring/MsScanMatching.cs b/src/Common/CommonStandard/Algorithm/Scoring/MsScanMatching.cs index bba198baa..87e76bf0d 100644 --- a/src/Common/CommonStandard/Algorithm/Scoring/MsScanMatching.cs +++ b/src/Common/CommonStandard/Algorithm/Scoring/MsScanMatching.cs @@ -792,11 +792,13 @@ public static double[] GetMatchedPeaksScores(List peaks1, List peaks1, List peaks2, double bin) { - var combinedSpectrum = SpectrumHandler.GetCombinedSpectrum(peaks1, peaks2, bin); + public static double GetSpectralEntropySimilarity(List peaks1, List peaks2, double bin) { + if (!IsComparedAvailable(peaks1, peaks2)) return -1d; + + var combinedSpectrum = SpectrumHandler.GetCombinedSpectrum(SpectrumHandler.GetNormalizedByTotalIntensityPeaks(peaks1), SpectrumHandler.GetNormalizedByTotalIntensityPeaks(peaks2), bin); var entropy12 = GetSpectralEntropy(combinedSpectrum); - var entropy1 = GetSpectralEntropy(peaks1); - var entropy2 = GetSpectralEntropy(peaks2); + var entropy1 = GetSpectralEntropy(SpectrumHandler.GetBinnedSpectrum(peaks1, bin)); + var entropy2 = GetSpectralEntropy(SpectrumHandler.GetBinnedSpectrum(peaks2, bin)); return 1 - (2 * entropy12 - entropy1 - entropy2) * 0.5; } @@ -4070,6 +4072,155 @@ public static double GetReverseDotProduct(IMSScanProperty prop1, IMSScanProperty else { return Math.Pow(covariance, 2) / scalarM / scalarR * peakCountPenalty; } } + /// + /// Calculates an enhanced dot product–based spectral similarity score between a query scan + /// and a reference scan over a specified m/z range. + /// This method is intended to evaluate the similarity of an experimental MS/MS spectrum + /// with respect to a reference (library or theoretical) spectrum. + /// + /// The query scan to compare (experimental MS/MS spectrum). Must provide a non-null spectrum. + /// The reference scan to compare against (library or theoretical MS/MS spectrum). Must provide a non-null spectrum. + /// The bin width, in m/z units, used to merge and compare spectral peaks. Must be a positive value. + /// The lower bound of the m/z range to include in the comparison. + /// The upper bound of the m/z range to include in the comparison. + /// + /// A weighting factor controlling how unmatched peaks in the query scan contribute to the similarity score. + /// Values should be between 0 and 1: + /// a value of 1 yields behavior equivalent to the reverse dot product, + /// while a value of 0 yields behavior equivalent to the standard dot product. + /// + /// + /// A double value representing the enhanced dot product spectral similarity score. + /// Returns -1 if the scans cannot be compared, or 0 if no valid peaks are found + /// in the specified m/z range. + /// + /// + /// This method is based on the enhanced dot product similarity approach described in: + /// Xing, S., Charron-Lamoureux, V., Zhao, H. N., El Abiead, Y., Wang, M., & Dorrestein, P. C. + /// "Reverse Spectral Search Reimagined: A Simple but Overlooked Solution for Chimeric Spectral Annotation." + /// Analytical Chemistry, 2025, 97(33). + /// https://pubs.acs.org/doi/10.1021/acs.analchem.5c02047 + /// + public static double GetEnhancedDotProduct(IMSScanProperty prop1, IMSScanProperty prop2, double bin, + double massBegin, double massEnd, double penalty) { + double scalarM = 0, scalarR = 0, covariance = 0; + double sumM = 0, sumL = 0; + if (!IsComparedAvailable(prop1, prop2)) + { + return -1; + } + + var peaks1 = prop1.Spectrum; + var peaks2 = prop2.Spectrum; + + double minMz = Math.Max(massBegin, Math.Min(peaks1[0].Mass, peaks2[0].Mass)); + double maxMz = Math.Min(massEnd, Math.Max(peaks1[peaks1.Count - 1].Mass, peaks2[peaks2.Count - 1].Mass)); + + double focusedMz = minMz; + int remaindIndexM = 0, remaindIndexL = 0; + int counter = 0; + + SummedPeak[] measuredMassBuffer = ArrayPool.Shared.Rent(peaks1.Count + peaks2.Count); + SummedPeak[] referenceMassBuffer = ArrayPool.Shared.Rent(peaks1.Count + peaks2.Count); + int size = 0; + + double sumMeasure = 0, sumReference = 0, baseM = double.MinValue, baseR = double.MinValue; + + while (focusedMz <= maxMz) { + sumL = 0; + for (int i = remaindIndexL; i < peaks2.Count; i++) { + if (peaks2[i].Mass < focusedMz - bin) continue; + else if (focusedMz - bin <= peaks2[i].Mass && peaks2[i].Mass < focusedMz + bin) + sumL += peaks2[i].Intensity; + else { remaindIndexL = i; break; } + } + + sumM = 0; + for (int i = remaindIndexM; i < peaks1.Count; i++) { + if (peaks1[i].Mass < focusedMz - bin) continue; + else if (focusedMz - bin <= peaks1[i].Mass && peaks1[i].Mass < focusedMz + bin) + sumM += peaks1[i].Intensity; + else { remaindIndexM = i; break; } + } + + if (sumM <= 0) { + measuredMassBuffer[size] = new SummedPeak(focusedMz: focusedMz, intensity: sumM); + if (sumM > baseM) baseM = sumM; + + referenceMassBuffer[size] = new SummedPeak(focusedMz: focusedMz, intensity: sumL); + if (sumL > baseR) baseR = sumL; + } + else { + measuredMassBuffer[size] = new SummedPeak(focusedMz: focusedMz, intensity: sumM); + if (sumM > baseM) baseM = sumM; + + referenceMassBuffer[size] = new SummedPeak(focusedMz: focusedMz, intensity: sumL); + if (sumL > baseR) baseR = sumL; + + counter++; + } + size++; + + if (focusedMz + bin > Math.Max(peaks1[peaks1.Count - 1].Mass, peaks2[peaks2.Count - 1].Mass)) break; + if (focusedMz + bin > peaks2[remaindIndexL].Mass && focusedMz + bin <= peaks1[remaindIndexM].Mass) + focusedMz = peaks1[remaindIndexM].Mass; + else if (focusedMz + bin <= peaks2[remaindIndexL].Mass && focusedMz + bin > peaks1[remaindIndexM].Mass) + focusedMz = peaks2[remaindIndexL].Mass; + else + focusedMz = Math.Min(peaks1[remaindIndexM].Mass, peaks2[remaindIndexL].Mass); + } + + if (baseM == 0 || baseR == 0) { + ArrayPool.Shared.Return(measuredMassBuffer); + ArrayPool.Shared.Return(referenceMassBuffer); + return 0; + } + + var eSpectrumCounter = 0; + var lSpectrumCounter = 0; + for (int i = 0; i < size; i++) { + measuredMassBuffer[i] = new SummedPeak(focusedMz: measuredMassBuffer[i].FocusedMz, intensity: measuredMassBuffer[i].Intensity / baseM); + referenceMassBuffer[i] = new SummedPeak(focusedMz: referenceMassBuffer[i].FocusedMz, intensity: referenceMassBuffer[i].Intensity / baseR); + sumMeasure += measuredMassBuffer[i].Intensity; + sumReference += referenceMassBuffer[i].Intensity; + + if (measuredMassBuffer[i].Intensity > 0.1) eSpectrumCounter++; + if (referenceMassBuffer[i].Intensity > 0.1) lSpectrumCounter++; + } + + var cutoff = 0.01; + for (int i = 0; i < size; i++) { + if (referenceMassBuffer[i].Intensity < cutoff) { + continue; + } + + if (measuredMassBuffer[i].Intensity == 0d) { + scalarM += measuredMassBuffer[i].Intensity * (1 - penalty) * measuredMassBuffer[i].FocusedMz; + } + else { + scalarM += measuredMassBuffer[i].Intensity * measuredMassBuffer[i].FocusedMz; + } + scalarR += referenceMassBuffer[i].Intensity * referenceMassBuffer[i].FocusedMz; + covariance += Math.Sqrt(measuredMassBuffer[i].Intensity * referenceMassBuffer[i].Intensity) * measuredMassBuffer[i].FocusedMz; + } + + ArrayPool.Shared.Return(measuredMassBuffer); + ArrayPool.Shared.Return(referenceMassBuffer); + + var peakCountPenalty = 1.0; + if (lSpectrumCounter == 1) peakCountPenalty = 0.75; + else if (lSpectrumCounter == 2) peakCountPenalty = 0.88; + else if (lSpectrumCounter == 3) peakCountPenalty = 0.94; + else if (lSpectrumCounter == 4) peakCountPenalty = 0.97; + + if (scalarM == 0 || scalarR == 0) { + return 0; + } + else { + return Math.Pow(covariance, 2) / scalarM / scalarR * peakCountPenalty; + } + } + /// /// This program will return so called dot product similarity as described in the previous resport. /// Stein, S. E. An Integrated Method for Spectrum Extraction. J.Am.Soc.Mass.Spectrom, 10, 770-781, 1999. diff --git a/src/Common/CommonStandard/DataObj/Result/MsScanMatchResult.cs b/src/Common/CommonStandard/DataObj/Result/MsScanMatchResult.cs index a36eb20e4..717fd9f19 100644 --- a/src/Common/CommonStandard/DataObj/Result/MsScanMatchResult.cs +++ b/src/Common/CommonStandard/DataObj/Result/MsScanMatchResult.cs @@ -126,6 +126,10 @@ public float ReverseDotProduct { public bool IsAnnotationSuggested { get; set; } = false; [Key(36)] public double CollisionEnergy { get; set; } + [Key(37)] + public float EnhancedDotProduct { get; set; } + [Key(38)] + public float SpectralEntropy { get; set; } public MsScanMatchResult Clone() { return (MsScanMatchResult)MemberwiseClone(); diff --git a/src/MSDIAL5/MsdialCore/Algorithm/Annotation/MsReferenceScorer.cs b/src/MSDIAL5/MsdialCore/Algorithm/Annotation/MsReferenceScorer.cs index 35181bf39..70f6bf689 100644 --- a/src/MSDIAL5/MsdialCore/Algorithm/Annotation/MsReferenceScorer.cs +++ b/src/MSDIAL5/MsdialCore/Algorithm/Annotation/MsReferenceScorer.cs @@ -40,6 +40,8 @@ public MsScanMatchResult CalculateScore(IMSIonProperty property, IMSScanProperty var sqweightedDotProduct = MsScanMatching.GetWeightedDotProduct(scan, reference, parameter.Ms2Tolerance, parameter.MassRangeBegin, parameter.MassRangeEnd); var sqsimpleDotProduct = MsScanMatching.GetSimpleDotProduct(scan, reference, parameter.Ms2Tolerance, parameter.MassRangeBegin, parameter.MassRangeEnd); var sqreverseDotProduct = MsScanMatching.GetReverseDotProduct(scan, reference, parameter.Ms2Tolerance, parameter.MassRangeBegin, parameter.MassRangeEnd); + var sqenhancedDotProduct = MsScanMatching.GetEnhancedDotProduct(scan, reference, parameter.Ms2Tolerance, parameter.MassRangeBegin, parameter.MassRangeEnd, .6d); + var spectrumEntropy = MsScanMatching.GetSpectralEntropySimilarity(scan.Spectrum, reference.Spectrum, parameter.Ms2Tolerance); var spectrumPenalty = reference.Spectrum != null && reference.Spectrum.Count == 1 ? true : false; double[] matchedPeaksScores = null; if (omics == TargetOmics.Lipidomics) { @@ -93,6 +95,8 @@ public MsScanMatchResult CalculateScore(IMSIonProperty property, IMSScanProperty MatchedPeaksCount = (float)matchedPeaksScores[1], AcurateMassSimilarity = (float)ms1Similarity, IsotopeSimilarity = (float)isotopeSimilarity, + EnhancedDotProduct = (float)Math.Sqrt(sqenhancedDotProduct), + SpectralEntropy = (float)spectrumEntropy, Source = source, AnnotatorID = id, Priority = priority, diff --git a/src/MSDIAL5/MsdialCore/Export/IAnalysisMetadataAccessor.cs b/src/MSDIAL5/MsdialCore/Export/IAnalysisMetadataAccessor.cs index d46d5e713..9b8502e46 100644 --- a/src/MSDIAL5/MsdialCore/Export/IAnalysisMetadataAccessor.cs +++ b/src/MSDIAL5/MsdialCore/Export/IAnalysisMetadataAccessor.cs @@ -70,6 +70,8 @@ protected virtual string[] GetHeadersCore() { "Simple dot product", "Weighted dot product", "Reverse dot product", + "Enhanced dot product", + "Spectrum entropy", "Matched peaks count", "Matched peaks percentage", "Total score", @@ -121,6 +123,8 @@ protected virtual Dictionary GetContentCore( { "Simple dot product", ValueOrNull(matchResult?.SimpleDotProduct, "F3") }, { "Weighted dot product", ValueOrNull(matchResult?.WeightedDotProduct, "F3") }, { "Reverse dot product", ValueOrNull(matchResult?.ReverseDotProduct, "F3") }, + { "Enhanced dot product", ValueOrNull(matchResult?.EnhancedDotProduct, "F3") }, + { "Spectrum entropy", ValueOrNull(matchResult?.SpectralEntropy, "F3") }, { "Matched peaks count", ValueOrNull(matchResult?.MatchedPeaksCount, "F2") }, { "Matched peaks percentage", ValueOrNull(matchResult?.MatchedPeaksPercentage, "F2") }, { "Total score", ValueOrNull(matchResult?.TotalScore, "F3") }, diff --git a/tests/MSDIAL5/MsdialCoreTests/Export/BaseAnalysisMetadataAccessorTests.cs b/tests/MSDIAL5/MsdialCoreTests/Export/BaseAnalysisMetadataAccessorTests.cs index e1bb6e8d2..d1a0087ba 100644 --- a/tests/MSDIAL5/MsdialCoreTests/Export/BaseAnalysisMetadataAccessorTests.cs +++ b/tests/MSDIAL5/MsdialCoreTests/Export/BaseAnalysisMetadataAccessorTests.cs @@ -39,6 +39,8 @@ public class BaseAnalysisMetadataAccessorTests "Simple dot product", "Weighted dot product", "Reverse dot product", + "Enhanced dot product", + "Spectrum entropy", "Matched peaks count", "Matched peaks percentage", "Total score",