From ad6735c951dd3587970eb70d00895a0fa93fc972 Mon Sep 17 00:00:00 2001 From: venkatesh6114 Date: Sat, 2 May 2026 01:17:31 +0530 Subject: [PATCH 1/2] handle canonicalize failure in containerized environments --- src/prefs.rs | 16 +++++++++++++++- src/shim_filesystem.rs | 21 +++++++++++++++++++-- 2 files changed, 34 insertions(+), 3 deletions(-) diff --git a/src/prefs.rs b/src/prefs.rs index 67108ee2..5e671f89 100644 --- a/src/prefs.rs +++ b/src/prefs.rs @@ -259,9 +259,23 @@ impl PreferenceManager { /// /// If rules_dir is an empty PathBuf, the existing rules_dir is used (an error if it doesn't exist) pub fn initialize(&mut self, rules_dir: PathBuf) -> Result<()> { + // Resolve the rules directory to an absolute, canonical path. + // If canonicalize() fails (e.g., ACCESS_DENIED in containers), fall back to: + // - returning the path as-is if it is already absolute, + // - prepending the current working directory if it is relative. + // Note: if current_dir() also fails, unwrap_or_default yields an empty PathBuf, + // and the result may remain relative. #[cfg(not(feature = "include-zip"))] let rules_dir = match rules_dir.canonicalize() { - Err(e) => bail!("set_rules_dir: could not canonicalize path {}: {}", rules_dir.display(), e), + Err(_e) => { + if rules_dir.is_absolute() { + rules_dir + } else { + std::env::current_dir() + .unwrap_or_default() + .join(&rules_dir) + } + }, Ok(rules_dir) => rules_dir, }; diff --git a/src/shim_filesystem.rs b/src/shim_filesystem.rs index 085d9024..b758a421 100644 --- a/src/shim_filesystem.rs +++ b/src/shim_filesystem.rs @@ -340,14 +340,31 @@ cfg_if! { } } + /// Resolves the path to an absolute, canonical form using the OS. + /// If `canonicalize()` fails (e.g., ACCESS_DENIED in containers), falls back to: + /// - returning the path as-is if it is already absolute, + /// - prepending the current working directory if it is relative. + /// Note: the fallback does not resolve symlinks or normalize `..`/`.` segments. pub fn canonicalize_shim(path: &Path) -> std::io::Result { - return path.canonicalize(); + match path.canonicalize() { + Ok(p) => Ok(p), + Err(_) => { + if path.is_absolute() { + Ok(path.to_path_buf()) + } else { + // Prepend cwd to make the relative path absolute. + // unwrap_or_default yields an empty PathBuf if cwd is unavailable, + // in which case the returned path will still be relative. + Ok(std::env::current_dir().unwrap_or_default().join(path)) + } + } + } } pub fn read_to_string_shim(path: &Path) -> Result { let path = match path.canonicalize() { Ok(path) => path, - Err(e) => bail!("Read error while trying to canonicalize in read_to_string_shim {}: {}", path.display(), e), + Err(_) => path.to_path_buf(), }; debug!("Reading file '{}'", &path.display()); match std::fs::read_to_string(&path) { From 45f114834fee73a6309e6aaed46a9f5fd72372e5 Mon Sep 17 00:00:00 2001 From: github-actions Date: Mon, 4 May 2026 09:37:45 +0000 Subject: [PATCH 2/2] chore: refresh llvm-cov HTML report [skip ci] ad6735c951dd3587970eb70d00895a0fa93fc972 --- .../runner/work/MathCAT/MathCAT/src/bin/mathml2text.rs.html | 2 +- .../home/runner/work/MathCAT/MathCAT/src/braille.rs.html | 2 +- .../home/runner/work/MathCAT/MathCAT/src/canonicalize.rs.html | 2 +- .../home/runner/work/MathCAT/MathCAT/src/chemistry.rs.html | 2 +- .../home/runner/work/MathCAT/MathCAT/src/definitions.rs.html | 2 +- .../home/runner/work/MathCAT/MathCAT/src/infer_intent.rs.html | 2 +- .../home/runner/work/MathCAT/MathCAT/src/interface.rs.html | 2 +- .../coverage/home/runner/work/MathCAT/MathCAT/src/lib.rs.html | 2 +- .../coverage/home/runner/work/MathCAT/MathCAT/src/main.rs.html | 2 +- .../home/runner/work/MathCAT/MathCAT/src/navigate.rs.html | 2 +- .../coverage/home/runner/work/MathCAT/MathCAT/src/prefs.rs.html | 2 +- .../home/runner/work/MathCAT/MathCAT/src/pretty_print.rs.html | 2 +- .../runner/work/MathCAT/MathCAT/src/shim_filesystem.rs.html | 2 +- .../home/runner/work/MathCAT/MathCAT/src/speech.rs.html | 2 +- .../coverage/home/runner/work/MathCAT/MathCAT/src/tts.rs.html | 2 +- .../runner/work/MathCAT/MathCAT/src/xpath_functions.rs.html | 2 +- docs/llvm-cov/html/index.html | 2 +- 17 files changed, 17 insertions(+), 17 deletions(-) diff --git a/docs/llvm-cov/html/coverage/home/runner/work/MathCAT/MathCAT/src/bin/mathml2text.rs.html b/docs/llvm-cov/html/coverage/home/runner/work/MathCAT/MathCAT/src/bin/mathml2text.rs.html index 88a1a196..c5f32ffd 100644 --- a/docs/llvm-cov/html/coverage/home/runner/work/MathCAT/MathCAT/src/bin/mathml2text.rs.html +++ b/docs/llvm-cov/html/coverage/home/runner/work/MathCAT/MathCAT/src/bin/mathml2text.rs.html @@ -1 +1 @@ -

Coverage Report

Created: 2026-04-30 05:51

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/home/runner/work/MathCAT/MathCAT/src/bin/mathml2text.rs
Line
Count
Source
1
// *** MathCAT doesn't normally want to build a binary ***
2
// *** This file is here because it is useful for trying out things ***
3
#![allow(clippy::needless_return)]
4
5
use libmathcat::{errors::*, interface::*};
6
use log::*;
7
use std::path::PathBuf;
8
use clap::{Parser, ValueEnum};
9
10
// Maybe also have this speak to test the TTS generation.
11
// There is a rust winapi crate that mirrors the WinPAI and has "Speak(...)" in it
12
13
// env RUST_LOG=DEBUG cargo run --features "include-zip"
14
0
fn get_rules_dir() -> String {
15
    // for testing with zipped rules dir
16
    // let rules_path = std::env::current_exe().unwrap().parent().unwrap().join("../../../MathCATForPython/addon/globalPlugins/MathCAT/Rules");
17
0
    let rules_path = std::env::current_exe().unwrap().parent().unwrap().join("../../Rules");
18
0
    return rules_path.as_os_str().to_str().unwrap().to_string();
19
0
}
20
21
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, ValueEnum)]
22
enum OutputType {
23
    Text,
24
    Braille,
25
    #[cfg(feature="tts")]
26
    Speech,
27
}
28
29
#[derive(Parser)]
30
#[command(version, about)]
31
struct Options {
32
    #[arg(short, long)]
33
    rules_dir: Option<PathBuf>,
34
35
    input_file: Option<PathBuf>,
36
37
    #[arg(short, long, default_value="en")]
38
    language: String,
39
40
    #[arg(value_enum, long, default_value="text")]
41
    output: OutputType,
42
}
43
44
45
0
fn main() -> Result<()> {
46
0
    env_logger::builder()
47
0
      .format_timestamp(None)
48
0
      .format_module_path(false)
49
0
      .format_indent(Some(2))
50
0
      .format_level(false)
51
0
      .init();
52
53
0
    let cli = Options::parse();
54
55
0
    let expr = if let Some(f) = cli.input_file {
56
0
  std::fs::read_to_string(&f).with_context(|| format!("unable to open {}", f.to_str().unwrap_or_default()))?
57
    } else {
58
0
        r#"
59
0
            <math xmlns="http://www.w3.org/1998/Math/MathML"><mo>(</mo><mn>1</mn><mo>)</mo></math>
60
0
    "#.to_string()
61
    };
62
63
0
    if let Err(e) = set_rules_dir(get_rules_dir()) {
64
0
  panic!("Error: exiting -- {}", errors_to_string(&e));
65
0
    }
66
0
    debug!("Languages: {}", libmathcat::interface::get_supported_languages()?.join(", "));
67
68
    #[cfg(feature = "include-zip")]
69
    info!("***********include-zip is present**********");
70
0
    info!("Version = '{}' using Rules dir {}", get_version(), get_rules_dir());
71
0
    set_preference("Language", cli.language)?;
72
73
0
    set_preference("DecimalSeparator", "Auto").unwrap();
74
0
    set_preference("BrailleCode", "Nemeth").unwrap();
75
0
    set_preference("TTS", "None").unwrap();
76
0
    set_preference("Verbosity", "Verbose").unwrap();
77
0
    set_preference("NavVerbosity", "Verbose").unwrap();
78
0
    set_preference("NavMode", "Enhanced").unwrap();
79
0
    set_preference("Impairment", "Blindness").unwrap();
80
0
    set_preference("SpeechOverrides_CapitalLetters", "").unwrap();
81
0
    set_preference("MathRate", "80").unwrap();
82
0
    set_preference("CapitalLetters_Beep", "true").unwrap();
83
0
    set_preference("IntentErrorRecovery", "Error").unwrap();
84
85
0
    set_preference("Bookmark", "false").unwrap();
86
0
    set_preference("SpeechStyle", "ClearSpeak").unwrap();
87
0
    info!("Languages: {}", libmathcat::interface::get_supported_languages()?.join(", "));
88
0
    info!("Speech styles: {}", libmathcat::interface::get_supported_speech_styles("ClearSpeak")?.join(", "));
89
0
    info!("BrailleCodes: {}", libmathcat::interface::get_supported_braille_codes()?.join(", "));
90
91
0
    debug!("Speech language is {}", get_preference("Language").unwrap());
92
0
    debug!("DecimalSeparator: {:?}", get_preference("DecimalSeparator").unwrap());
93
0
    debug!("DecimalSeparators: {:?}, BlockSeparators: {:?}", get_preference("DecimalSeparators").unwrap(), get_preference("BlockSeparators").unwrap());
94
0
    debug!("SpeechStyle: {:?}", get_preference("SpeechStyle").unwrap());
95
0
    debug!("Verbosity: {:?}", get_preference("Verbosity").unwrap());
96
97
0
    match set_mathml(&expr) {
98
0
  Err(e) => {
99
0
      panic!("Error: exiting -- {}", errors_to_string(&e));
100
  },
101
0
  Ok(fmt) => {
102
0
      info!("formatted input mathml into {fmt}");
103
  }
104
    }
105
106
0
    match cli.output {
107
  OutputType::Text => {
108
0
      match get_spoken_text() {
109
0
    Ok(speech) => println!("{speech}"),
110
0
    Err(e) => panic!("{}", errors_to_string(&e)),
111
      }
112
  },
113
  OutputType::Braille => {
114
0
      debug!("...using BrailleCode: {:?}", get_preference("BrailleCode").unwrap());
115
0
      match get_braille("") {
116
0
    Ok(braille) => println!("{braille}"),
117
0
    Err(e) => panic!("{}", errors_to_string(&e)),
118
      }
119
  },
120
  #[cfg(feature="tts")]
121
  OutputType::Speech => {
122
      // Create the NaturalTts struct using the builder pattern.
123
      let mut natural = natural_tts::NaturalTtsBuilder::default()
124
    .gtts_model(natural_tts::models::gtts::GttsModel::default())
125
    .default_model(natural_tts::Model::Gtts)
126
    .build().expect("failed to generate natural tts gtts model");
127
128
129
      // Start producing an output using the default_model.
130
      let _ = natural.start(get_spoken_text().unwrap(), &PathBuf::from("output.wav"));
131
132
      // Play the audio until it finishes
133
      natural.sleep_until_end();
134
  }
135
    }
136
137
0
    Ok(())
138
0
}
\ No newline at end of file +

Coverage Report

Created: 2026-05-04 09:37

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/home/runner/work/MathCAT/MathCAT/src/bin/mathml2text.rs
Line
Count
Source
1
// *** MathCAT doesn't normally want to build a binary ***
2
// *** This file is here because it is useful for trying out things ***
3
#![allow(clippy::needless_return)]
4
5
use libmathcat::{errors::*, interface::*};
6
use log::*;
7
use std::path::PathBuf;
8
use clap::{Parser, ValueEnum};
9
10
// Maybe also have this speak to test the TTS generation.
11
// There is a rust winapi crate that mirrors the WinPAI and has "Speak(...)" in it
12
13
// env RUST_LOG=DEBUG cargo run --features "include-zip"
14
0
fn get_rules_dir() -> String {
15
    // for testing with zipped rules dir
16
    // let rules_path = std::env::current_exe().unwrap().parent().unwrap().join("../../../MathCATForPython/addon/globalPlugins/MathCAT/Rules");
17
0
    let rules_path = std::env::current_exe().unwrap().parent().unwrap().join("../../Rules");
18
0
    return rules_path.as_os_str().to_str().unwrap().to_string();
19
0
}
20
21
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, ValueEnum)]
22
enum OutputType {
23
    Text,
24
    Braille,
25
    #[cfg(feature="tts")]
26
    Speech,
27
}
28
29
#[derive(Parser)]
30
#[command(version, about)]
31
struct Options {
32
    #[arg(short, long)]
33
    rules_dir: Option<PathBuf>,
34
35
    input_file: Option<PathBuf>,
36
37
    #[arg(short, long, default_value="en")]
38
    language: String,
39
40
    #[arg(value_enum, long, default_value="text")]
41
    output: OutputType,
42
}
43
44
45
0
fn main() -> Result<()> {
46
0
    env_logger::builder()
47
0
      .format_timestamp(None)
48
0
      .format_module_path(false)
49
0
      .format_indent(Some(2))
50
0
      .format_level(false)
51
0
      .init();
52
53
0
    let cli = Options::parse();
54
55
0
    let expr = if let Some(f) = cli.input_file {
56
0
  std::fs::read_to_string(&f).with_context(|| format!("unable to open {}", f.to_str().unwrap_or_default()))?
57
    } else {
58
0
        r#"
59
0
            <math xmlns="http://www.w3.org/1998/Math/MathML"><mo>(</mo><mn>1</mn><mo>)</mo></math>
60
0
    "#.to_string()
61
    };
62
63
0
    if let Err(e) = set_rules_dir(get_rules_dir()) {
64
0
  panic!("Error: exiting -- {}", errors_to_string(&e));
65
0
    }
66
0
    debug!("Languages: {}", libmathcat::interface::get_supported_languages()?.join(", "));
67
68
    #[cfg(feature = "include-zip")]
69
    info!("***********include-zip is present**********");
70
0
    info!("Version = '{}' using Rules dir {}", get_version(), get_rules_dir());
71
0
    set_preference("Language", cli.language)?;
72
73
0
    set_preference("DecimalSeparator", "Auto").unwrap();
74
0
    set_preference("BrailleCode", "Nemeth").unwrap();
75
0
    set_preference("TTS", "None").unwrap();
76
0
    set_preference("Verbosity", "Verbose").unwrap();
77
0
    set_preference("NavVerbosity", "Verbose").unwrap();
78
0
    set_preference("NavMode", "Enhanced").unwrap();
79
0
    set_preference("Impairment", "Blindness").unwrap();
80
0
    set_preference("SpeechOverrides_CapitalLetters", "").unwrap();
81
0
    set_preference("MathRate", "80").unwrap();
82
0
    set_preference("CapitalLetters_Beep", "true").unwrap();
83
0
    set_preference("IntentErrorRecovery", "Error").unwrap();
84
85
0
    set_preference("Bookmark", "false").unwrap();
86
0
    set_preference("SpeechStyle", "ClearSpeak").unwrap();
87
0
    info!("Languages: {}", libmathcat::interface::get_supported_languages()?.join(", "));
88
0
    info!("Speech styles: {}", libmathcat::interface::get_supported_speech_styles("ClearSpeak")?.join(", "));
89
0
    info!("BrailleCodes: {}", libmathcat::interface::get_supported_braille_codes()?.join(", "));
90
91
0
    debug!("Speech language is {}", get_preference("Language").unwrap());
92
0
    debug!("DecimalSeparator: {:?}", get_preference("DecimalSeparator").unwrap());
93
0
    debug!("DecimalSeparators: {:?}, BlockSeparators: {:?}", get_preference("DecimalSeparators").unwrap(), get_preference("BlockSeparators").unwrap());
94
0
    debug!("SpeechStyle: {:?}", get_preference("SpeechStyle").unwrap());
95
0
    debug!("Verbosity: {:?}", get_preference("Verbosity").unwrap());
96
97
0
    match set_mathml(&expr) {
98
0
  Err(e) => {
99
0
      panic!("Error: exiting -- {}", errors_to_string(&e));
100
  },
101
0
  Ok(fmt) => {
102
0
      info!("formatted input mathml into {fmt}");
103
  }
104
    }
105
106
0
    match cli.output {
107
  OutputType::Text => {
108
0
      match get_spoken_text() {
109
0
    Ok(speech) => println!("{speech}"),
110
0
    Err(e) => panic!("{}", errors_to_string(&e)),
111
      }
112
  },
113
  OutputType::Braille => {
114
0
      debug!("...using BrailleCode: {:?}", get_preference("BrailleCode").unwrap());
115
0
      match get_braille("") {
116
0
    Ok(braille) => println!("{braille}"),
117
0
    Err(e) => panic!("{}", errors_to_string(&e)),
118
      }
119
  },
120
  #[cfg(feature="tts")]
121
  OutputType::Speech => {
122
      // Create the NaturalTts struct using the builder pattern.
123
      let mut natural = natural_tts::NaturalTtsBuilder::default()
124
    .gtts_model(natural_tts::models::gtts::GttsModel::default())
125
    .default_model(natural_tts::Model::Gtts)
126
    .build().expect("failed to generate natural tts gtts model");
127
128
129
      // Start producing an output using the default_model.
130
      let _ = natural.start(get_spoken_text().unwrap(), &PathBuf::from("output.wav"));
131
132
      // Play the audio until it finishes
133
      natural.sleep_until_end();
134
  }
135
    }
136
137
0
    Ok(())
138
0
}
\ No newline at end of file diff --git a/docs/llvm-cov/html/coverage/home/runner/work/MathCAT/MathCAT/src/braille.rs.html b/docs/llvm-cov/html/coverage/home/runner/work/MathCAT/MathCAT/src/braille.rs.html index a916a993..6cc40ec8 100644 --- a/docs/llvm-cov/html/coverage/home/runner/work/MathCAT/MathCAT/src/braille.rs.html +++ b/docs/llvm-cov/html/coverage/home/runner/work/MathCAT/MathCAT/src/braille.rs.html @@ -1 +1 @@ -

Coverage Report

Created: 2026-04-30 05:51

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/home/runner/work/MathCAT/MathCAT/src/braille.rs
Line
Count
Source
1
#![allow(clippy::needless_return)]
2
use strum_macros::Display;
3
use sxd_document::dom::{Element, ChildOfElement};
4
use sxd_document::Package;
5
use crate::definitions::SPEECH_DEFINITIONS;
6
use crate::errors::*;
7
use crate::pretty_print::mml_to_string;
8
use crate::prefs::PreferenceManager;
9
use std::cell::Ref;
10
use regex::{Captures, Regex, RegexSet};
11
use phf::{phf_map, phf_set};
12
use crate::speech::{BRAILLE_RULES, SpeechRulesWithContext, braille_replace_chars, make_quoted_string};
13
use crate::canonicalize::get_parent;
14
use std::borrow::Cow;
15
use std::ops::Range;
16
use std::sync::LazyLock;
17
use log::{debug, error};
18
19
320
fn is_ueb_prefix(ch: char) -> bool {
20
320
    
matches!262
(ch, '⠼' | '⠈' | '⠘' | '⠸' | '⠐' | '⠨' | '⠰' | '⠠')
21
320
}
22
23
/// Returns the braille *char* at the given position in the braille string.
24
971
fn braille_at(braille: &str, index: usize) -> char {
25
    // braille is always 3 bytes per char
26
971
    return braille[index..index+3].chars().next().unwrap();
27
28
971
}
29
30
/// braille the MathML
31
/// If 'nav_node_id' is not an empty string, then the element with that id will have dots 7 & 8 turned on as per the pref
32
/// Returns the braille string (highlighted) along with the *character* start/end of the highlight (whole string if no highlight)
33
1.82k
pub fn braille_mathml(mathml: Element, nav_node_id: &str) -> Result<(String, usize, usize)> {
34
1.82k
    return BRAILLE_RULES.with(|rules| {
35
1.82k
        rules.borrow_mut().read_files()
?0
;
36
1.82k
        let rules = rules.borrow();
37
1.82k
        let new_package = Package::new();
38
1.82k
        let mut rules_with_context = SpeechRulesWithContext::new(&rules, new_package.as_document(), nav_node_id, 0);
39
1.82k
        let braille_string = rules_with_context.match_pattern::<String>(mathml)
40
1.82k
                        .context("Pattern match/replacement failure!")
?0
;
41
        // debug!("braille_mathml: braille string: {}", &braille_string);
42
1.82k
        let braille_string = braille_string.replace(' ', "");
43
1.82k
        let pref_manager = rules_with_context.get_rules().pref_manager.borrow();
44
1.82k
        let highlight_style = pref_manager.pref_to_string("BrailleNavHighlight");
45
1.82k
        let braille_code = pref_manager.pref_to_string("BrailleCode");
46
1.82k
        let braille = match braille_code.as_str() {
47
1.82k
            "Nemeth" => 
nemeth_cleanup888
(
pref_manager888
,
braille_string888
),
48
941
            "UEB" => 
ueb_cleanup366
(
pref_manager366
,
braille_string366
),
49
575
            "Vietnam" => 
vietnam_cleanup112
(
pref_manager112
,
braille_string112
),
50
463
            "CMU" => 
cmu_cleanup372
(
pref_manager372
,
braille_string372
),
51
91
            "Finnish" => 
finnish_cleanup0
(
pref_manager0
,
braille_string0
),
52
91
            "Swedish" => 
swedish_cleanup0
(
pref_manager0
,
braille_string0
),
53
91
            "LaTeX" => 
LaTeX_cleanup50
(
pref_manager50
,
braille_string50
),
54
41
            "ASCIIMath" => ASCIIMath_cleanup(pref_manager, braille_string),
55
0
            "ASCIIMath-fi" => ASCIIMath_cleanup(pref_manager, braille_string),
56
0
            _ => braille_string.trim_matches('⠀').to_string(),    // probably needs cleanup if someone has another code, but this will have to get added by hand
57
        };
58
59
        return Ok(
60
1.82k
            if highlight_style != "Off" {
61
520
                highlight_braille_chars(braille, &braille_code, highlight_style == "All")
62
            } else {
63
1.30k
                let end = braille.len()/3;
64
1.30k
                (braille, 0, end)
65
            }
66
        );
67
1.82k
    });
68
69
    /// highlight with dots 7 & 8 based on the highlight style
70
    /// both the start and stop points will be extended to deal with indicators such as capitalization
71
    /// if 'fill_range' is true, the interior will be highlighted
72
    /// Returns the braille string (highlighted) along with the [start, end) *character* of the highlight (whole string if no highlight)
73
520
    fn highlight_braille_chars(braille: String, braille_code: &str, fill_range: bool) -> (String, usize, usize) {
74
520
        let mut braille = braille;
75
        // some special (non-braille) chars weren't converted to having dots 7 & 8 to indicate navigation position
76
        // they need to be added to the start
77
78
        // find start and end (byte) indexes of the highlighted region (braille chars have length=3 bytes)
79
520
        let start = braille.find(is_highlighted);
80
520
        let end = braille.rfind(is_highlighted);
81
520
        if start.is_none() {
82
57
            assert!(end.is_none());
83
57
            let end = braille.len();
84
57
            return (braille, 0, end/3);
85
463
        };
86
87
463
        let start = start.unwrap();
88
463
        let mut end = end.unwrap() + 3;         // always exists if start exists ('end' is exclusive)
89
        // debug!("braille highlight: start/end={}/{}; braille={}", start/3, end/3, braille);
90
463
        let mut start = highlight_first_indicator(&mut braille, braille_code, start, end);
91
463
        if let Some(
new_range45
) = expand_highlight(&mut braille, braille_code, start, end) {
92
45
            (start, end) = new_range
93
418
        }
94
95
463
        if start == end {
96
0
            return (braille, start/3, end/3);
97
463
        }
98
99
463
        if !fill_range {
100
459
            return (braille, start/3, end/3);
101
4
        }
102
103
4
        let mut result = String::with_capacity(braille.len());
104
4
        result.push_str(&braille[..start]);
105
4
        let highlight_region =&mut braille[start..end];
106
8
        for ch in 
highlight_region4
.
chars4
() {
107
8
            result.push( highlight(ch) );
108
8
        };
109
4
        result.push_str(&braille[end..]);
110
4
        return (result, start/3, end/3);
111
112
        /// Return the byte index of the first place to highlight
113
463
        fn highlight_first_indicator(braille: &mut String, braille_code: &str, start_index: usize, end_index: usize) -> usize {
114
            // chars in the braille block range use 3 bytes -- we can use that to optimize the code some
115
463
            let first_ch = unhighlight(braille_at(braille, start_index));
116
117
            // need to highlight (optional) capital/number, language, and style (max 2 chars) also in that (rev) order
118
463
            let mut prefix_ch_index = std::cmp::max(0, start_index as isize - 5*3) as usize;
119
463
            if prefix_ch_index == 0 && 
braille_code == "UEB"194
{
120
                // don't count the word or passage mode as part of a indicator
121
46
                if braille.starts_with("⠰⠰⠰") {
122
42
                    prefix_ch_index = 9;
123
42
                } else if 
braille.starts_with("⠰⠰")4
{
124
0
                    prefix_ch_index = 6;
125
4
                }
126
417
            }
127
463
            let indicators = &braille[prefix_ch_index..start_index];   // chars to be examined
128
463
            let i_byte_start = start_index - 3 * match braille_code {
129
463
                "Nemeth" => 
i_start_nemeth129
(
indicators129
,
first_ch129
),
130
334
                _ => i_start_ueb(indicators),               // treat all the other like UEB because they probably have similar number and letter prefixes
131
            };
132
463
            if i_byte_start < start_index {
133
                // remove old highlight as long as we don't wipe out the end highlight
134
59
                if start_index < end_index {
135
59
                    let old_first_char_bytes = start_index..start_index+3;
136
59
                    let replacement_str = unhighlight(braille_at(braille, start_index)).to_string();
137
59
                    braille.replace_range(old_first_char_bytes, &replacement_str);
138
59
                
}0
139
140
                // add new highlight
141
59
                let new_first_char_bytes = i_byte_start..i_byte_start+3;
142
59
                let replacement_str = highlight(braille_at(braille, i_byte_start)).to_string();
143
59
                braille.replace_range(new_first_char_bytes, &replacement_str);
144
404
            }
145
146
463
            return i_byte_start;
147
463
        }
148
149
        /// Return the byte indexes of the first and last place to highlight
150
        /// Currently, this only does something for CMU braille
151
463
        fn expand_highlight(braille: &mut String, braille_code: &str, start_index: usize, end_index: usize) -> Option<(usize, usize)> {
152
            // For CMU, we want to expand mrows to include the opening and closing grouping indicators if they exist
153
463
            if start_index == 0 || 
end_index402
== braille.len() ||
braille_code != "CMU"352
{
154
358
                return None;
155
105
            }
156
157
105
            let first_ch = unhighlight(braille_at(braille, start_index));
158
105
            let last_ch = unhighlight(braille_at(braille, end_index-3));
159
            // We need to be careful not to expand the selection if we are already on a grouping indicator
160
105
            if first_ch == '⠢' && 
last_ch == '⠔'0
{
161
0
                return None;
162
105
            }
163
105
            let preceding_ch = braille_at(braille, start_index-3);
164
105
            if preceding_ch != '⠢' {
165
43
                return None;
166
62
            }
167
168
62
            let following_ch = braille_at(braille, end_index);
169
62
            if following_ch != '⠔' {
170
17
                return None;
171
45
            }
172
173
45
            let preceding_ch = highlight(preceding_ch);
174
45
            braille.replace_range(start_index-3..start_index+3, format!("{preceding_ch}{first_ch}").as_str());
175
45
            let following_ch = highlight(following_ch);
176
45
            braille.replace_range(end_index-3..end_index+3, format!("{last_ch}{following_ch}").as_str());
177
45
            return Some( (start_index-3, end_index + 3) );
178
463
        }
179
520
    }
180
181
    /// Given a position in a Nemeth string, what is the position character that starts it (e.g, the prev char for capital letter)
182
129
    fn i_start_nemeth(braille_prefix: &str, first_ch: char) -> usize {
183
0
        fn is_nemeth_number(ch: char) -> bool {
184
0
            matches!(ch, '⠂' | '⠆' | '⠒' | '⠲' | '⠢' | '⠖' | '⠶' | '⠦' | '⠔' | '⠴' | '⠨')
185
0
        }
186
129
        let mut n_chars = 0;
187
129
        let prefix = &mut braille_prefix.chars().rev().peekable();
188
129
        if prefix.peek() == Some(&'⠠') ||  // cap indicator
189
129
           (prefix.peek() == Some(&'⠼') && 
is_nemeth_number0
(
first_ch0
)) || // number indicator
190
129
           [Some(&'⠸'), Some(&'⠈'), Some(&'⠨')].contains(&prefix.peek()) {         // bold, script/blackboard, italic indicator
191
1
            n_chars += 1;
192
1
            prefix.next();
193
128
        } 
194
195
129
        if [Some(&'⠰'), Some(&'⠸'), Some(&'⠨')].contains(&prefix.peek()) {   // English, German, Greek
196
0
            n_chars += 1;
197
129
        } else if prefix.peek() == Some(&'⠈') {  
198
0
            let ch = prefix.next();                              // Russian/Greek Variant
199
0
            if ch == Some('⠈') || ch == Some('⠨') {
200
0
                n_chars += 2;
201
0
            }
202
129
        } else if prefix.peek() == Some(&'⠠')  { // Hebrew 
203
0
            let ch = prefix.next();                              // Russian/Greek Variant
204
0
            if ch == Some('⠠') {
205
0
                n_chars += 2;
206
0
            }
207
129
        };
208
129
        return n_chars;
209
129
    }
210
211
    /// Given a position in a UEB string, what is the position character that starts it (e.g, the prev char for capital letter)
212
334
    fn i_start_ueb(braille_prefix: &str) -> usize {
213
334
        let prefix = &mut braille_prefix.chars().rev().peekable();
214
334
        let mut n_chars = 0;
215
392
        while let Some(
ch320
) = prefix.next() {
216
320
            if is_ueb_prefix(ch) {
217
58
                n_chars += 1;
218
262
            } else if ch == '⠆' {
219
0
                let n_typeform_chars = check_for_typeform(prefix);
220
0
                if n_typeform_chars > 0 {
221
0
                    n_chars += n_typeform_chars;
222
0
                } else {
223
0
                    break;
224
                }
225
            } else {
226
262
                break;
227
            }
228
        }
229
334
        return n_chars;
230
334
    }
231
232
    
233
0
    fn check_for_typeform(prefix: &mut dyn std::iter::Iterator<Item=char>) -> usize {
234
0
        fn is_ueb_typeform_prefix(ch: char) -> bool {
235
0
            matches!(ch, '⠈' | '⠘' | '⠸' | '⠨')
236
0
        }
237
238
0
        if let Some(typeform_indicator) = prefix.next() {
239
0
            if is_ueb_typeform_prefix(typeform_indicator) {
240
0
                return 2;
241
0
            } else if typeform_indicator == '⠼' &&
242
0
                      let Some(user_defined_typeform_indicator) = prefix.next() &&
243
0
                      (is_ueb_typeform_prefix(user_defined_typeform_indicator) || user_defined_typeform_indicator == '⠐') {
244
0
                        return 3;
245
0
                    }
246
0
        }
247
0
        return 0;
248
0
    }
249
1.82k
}
250
251
// FIX: if 8-dot braille is needed, perhaps the highlights can be shifted to a "highlighted" 256 char block in private space 
252
//   they would need to be unshifted for the external world
253
11.0k
fn is_highlighted(ch: char) -> bool {
254
11.0k
    let ch_as_u32 = ch as u32;
255
11.0k
    return (0x28C0..0x28FF).contains(&ch_as_u32) || 
ch == '𝑏'9.99k
; // 0x28C0..0x28FF all have dots 7 & 8 on
256
11.0k
}
257
258
159
fn highlight(ch: char) -> char {
259
    // safe because we have checked the range
260
159
    return unsafe{char::from_u32_unchecked(ch as u32 | 0xC0)};    // 0x28C0..0x28FF all have dots 7 & 8 on
261
159
}
262
263
3.12k
fn unhighlight(ch: char) -> char {
264
3.12k
    let ch_as_u32 = ch as u32;
265
3.12k
    if (0x28C0..0x28FF).contains(&ch_as_u32) {              // 0x28C0..0x28FF all have dots 7 & 8 on
266
903
        return unsafe{char::from_u32_unchecked(ch_as_u32 & 0x283F)};  // safe because we have checked the range
267
    } else {
268
2.22k
        return ch;
269
    }
270
3.12k
}
271
272
use std::cell::RefCell;
273
thread_local!{
274
    /// Count number of probes -- get a sense of how well algorithm is working (for debugging)
275
    static N_PROBES: RefCell<usize> = const { RefCell::new(0) };
276
}
277
278
279
/// Given a 0-based braille position, return the id of the smallest MathML node enclosing it.
280
/// This node might be a leaf with an offset.
281
91
pub fn get_navigation_node_from_braille_position(mathml: Element, position: usize) -> Result<(String, usize)> {
282
    // This works via a "smart" binary search (the trees aren't binary or balanced, we estimate the child to look in):
283
    //   braille the mathml with a nav node and see where 'position' is in relation to the start/end of the nav node
284
    // Each call to find_navigation_node() returns a search state that tell us where to look next if not found
285
    #[derive(Debug, Display)]
286
    enum SearchStatus {
287
        LookInParent,       // look up a level for exact match
288
        LookLeft,           // went too far, backup
289
        LookRight,          // continue searching right
290
        Found,
291
    }
292
293
    struct SearchState<'e> {
294
        status: SearchStatus,
295
        node: Element<'e>,
296
        highlight_start: usize,     // if status is Found, then this is the offset within a leaf node
297
        highlight_end: usize,       // if status is Found, this is ignored
298
    }
299
300
    // save the current highlight state, set the state to be the end points so we can find the braille, then restore the state
301
    // FIX: this can fail if there is 8-dot braille
302
    use crate::interface::{get_preference, set_preference};
303
91
    let saved_highlight_style = get_preference("BrailleNavHighlight").unwrap();
304
91
    set_preference("BrailleNavHighlight", "EndPoints").unwrap();
305
306
91
    N_PROBES.with(|n| {*n.borrow_mut() = 0});
307
    // dive into the child of the <math> element (should only be one)
308
91
    let search_state = find_navigation_node(mathml, as_element(mathml.children()[0]), position)
?0
;
309
91
    set_preference("BrailleNavHighlight", saved_highlight_style.as_str()).unwrap();
310
311
    // we know the attr value exists because it was found internally
312
    // FIX: what should be done if we never did the search?
313
91
    match search_state.status {
314
        SearchStatus::Found | SearchStatus::LookInParent => {
315
86
            return Ok( (search_state.node.attribute_value("id").unwrap().to_string(), search_state.highlight_start) )
316
        },
317
        _ => {
318
            // weird state -- return the entire expr
319
5
            match mathml.attribute_value("id") {
320
0
                None => bail!("'id' is not present on mathml: {}", mml_to_string(mathml)),
321
5
                Some(id) => return Ok( (id.to_string(), 0) ),
322
            }
323
        }
324
    } 
325
326
    /// find the navigation node that most tightly encapsulates the target position (0-based)
327
    /// 'node' is the current node we are on inside of 'mathml'
328
465
    fn find_navigation_node<'e>(mathml: Element<'e>, node: Element<'e>, target_position: usize) -> Result<SearchState<'e>> {
329
465
        let node_id = match node.attribute_value("id") {
330
465
            Some(id) => id,
331
0
            None => bail!("'id' is not present on mathml: {}", mml_to_string(node)),
332
        };
333
465
        N_PROBES.with(|n| {*n.borrow_mut() += 1});
334
465
        let (braille, char_start, char_end) = braille_mathml(mathml, node_id)
?0
;
335
465
        let mut status = None;
336
        // debug!("find_navigation_node ({}, id={}): highlight=[{}, {});  target={}", name(node), node_id, char_start, char_end, target_position);
337
465
        if is_leaf(node) {
338
100
            if char_start == 0 && 
char_end10
== braille.len()/3 {
339
6
                // nothing highlighted -- probably invisible char not represented in braille -- continue looking to the right
340
6
                // debug!("  return due invisible char (?)' ");
341
6
                status = Some(SearchStatus::LookRight);
342
94
            } else if char_start <= target_position && 
target_position < char_end88
{
343
                // FIX: need to handle multi-char leaves and set the offset (char_start) appropriately
344
                // debug!("  return due to target_position inside leaf: {} <= {} < {}", char_start, target_position, char_end);
345
58
                return Ok( SearchState {
346
58
                    status: SearchStatus::Found,
347
58
                    node,
348
58
                    highlight_start: target_position - char_start,
349
58
                    highlight_end: 0,
350
58
                });
351
36
            } else if name(node) == "mo" {
352
                // if there is whitespace before or after the operator, consider the operator to be a match
353
18
                if (char_start > 0 && target_position == char_start - 1 && 
354
2
                    braille_at(&braille, 3*(char_start - 1)) == '⠀' && is_operator_that_adds_whitespace(node)) ||
355
16
                   (3*char_end < braille.len() && target_position == char_end &&
356
11
                    braille_at(&braille, 3*char_end) == '⠀' && 
is_operator_that_adds_whitespace2
(
node2
)) {
357
4
                    return Ok( SearchState {
358
4
                        status: SearchStatus::Found,
359
4
                        node,
360
4
                        highlight_start: 0,
361
4
                        highlight_end: 0,
362
4
                    } );
363
14
                }
364
18
            }
365
365
        }
366
403
        if status.is_none() {
367
397
            if target_position < char_start {
368
23
                // debug!("  return due to target_position {} < start {}", target_position, char_start);
369
23
                status = Some(SearchStatus::LookLeft);
370
374
            } else if target_position >= char_end {
371
49
                // debug!("  return due to target_position {} >= end {}", target_position, char_end);
372
49
                status = Some(SearchStatus::LookRight);
373
325
            }
374
6
        }
375
403
        if let Some(
status78
) = status {
376
78
            return Ok( SearchState {
377
78
                status,
378
78
                node,
379
78
                highlight_start: char_start,
380
78
                highlight_end: char_end,
381
78
            } );
382
325
        }
383
384
325
        let children = node.children();
385
325
        let mut i_left_child = 0;                         // inclusive
386
325
        let mut i_right_child = children.len();           // exclusive
387
325
        let mut call_start = char_start;
388
325
        let mut guess_fn: Box<dyn Fn(usize, usize, usize, usize) -> usize> = Box::new(|i_left, i_right, start, target: usize| guess_child_node_ltr(&children, i_left, i_right, start, target));
389
398
        while i_left_child < i_right_child {
390
374
            let i_guess_child = guess_fn(i_left_child, i_right_child, call_start, target_position);
391
374
            let status = find_navigation_node(mathml, as_element(children[i_guess_child]), target_position)
?0
;
392
            // debug!("  in {} loop: status: {}, child: left/guess/right {}/({},{})/{}; highlight=[{}, {})", 
393
            //         name(node), status.status,
394
            //         i_left_child, i_guess_child, name(as_element(children[i_guess_child])),i_right_child,
395
            //         status.highlight_start, status.highlight_end);
396
374
            match status.status {
397
                SearchStatus::Found => {
398
301
                    return Ok(status);
399
                },
400
                SearchStatus::LookInParent => {
401
0
                    let (_, start, end) = braille_mathml(mathml, node_id)?;
402
                    // debug!("  parent ({}) braille: start/end={}/{};  target_position={}", name(node), start, end, target_position);
403
0
                    if start <= target_position && target_position < end {
404
                        // debug!("  ..found: id={}", node_id);
405
0
                        return Ok( SearchState{
406
0
                            status: SearchStatus::Found,
407
0
                            node,
408
0
                            highlight_start: 0,
409
0
                            highlight_end: 0,
410
0
                        } );      // done or look up another level
411
0
                    }
412
0
                    return Ok(status);  // look up a level
413
                },
414
                SearchStatus::LookLeft => {
415
20
                    i_right_child = if i_guess_child == 0 {
09
} else {
i_guess_child11
}; // exclusive
416
20
                    call_start = status.highlight_start-1;
417
20
                    guess_fn = Box::new(|i_left, i_right, start, target| 
guess_child_node_rtl7
(
&children7
,
i_left7
,
i_right7
,
start7
,
target7
));
418
                },
419
                SearchStatus::LookRight => {
420
53
                    i_left_child = i_guess_child+1;
421
53
                    call_start = status.highlight_end+1;
422
53
                    guess_fn = Box::new(|i_left, i_right, start, target| 
guess_child_node_ltr42
(
&children42
,
i_left42
,
i_right42
,
start42
,
target42
));
423
                },
424
            }
425
        }
426
        // debug!("Didn't child in node {}: left/right={}/{};  target_position={}", name(node), i_left_child, i_right_child, target_position);
427
428
        // if we get here, we didn't find it in the children
429
        // debug!("..end of loop: look in parent of {} has start/end={}/{}", name(node), char_start, char_end);
430
        return Ok( SearchState{
431
24
            status: if char_start <= target_position && target_position <= char_end {SearchStatus::Found} else {
SearchStatus::LookInParent0
},
432
24
            node,
433
            highlight_start: 0,
434
            highlight_end: 0,
435
        } );
436
465
    }
437
438
4
    fn is_operator_that_adds_whitespace(node: Element) -> bool {
439
        use crate::definitions::BRAILLE_DEFINITIONS;
440
4
        if PreferenceManager::get().borrow().pref_to_string("UseSpacesAroundAllOperators") == "true" {
441
0
            return true;
442
4
        } 
443
444
4
        return BRAILLE_DEFINITIONS.with(|definitions| {
445
4
            let definitions = definitions.borrow();
446
4
            let comparison_operators = definitions.get_hashset("ComparisonOperators").unwrap();
447
4
            return comparison_operators.contains(as_text(node));
448
4
        });        
449
4
    }
450
451
    /// look in children[i_left..i_right] for a count that exceeds target
452
367
    fn guess_child_node_ltr(children: &[ChildOfElement], i_left: usize, i_right: usize, start: usize, target: usize) -> usize {
453
367
        let mut estimated_position = start;
454
        // number of chars to add for number indicators
455
367
        let n_number_indicator = if PreferenceManager::get().borrow().pref_to_string("BrailleCode") == "Nemeth" {
0106
} else {
1261
}; // Nemeth doesn't typically need number or letter indicators
456
        #[allow(clippy::needless_range_loop)]  // I don't like enumerate/take/skip here
457
666
        for i in 
i_left..i_right367
{
458
666
            estimated_position += estimate_braille_chars(children[i], n_number_indicator);
459
666
            if estimated_position >= target {
460
344
                return i;
461
322
            }
462
        }
463
23
        return i_right-1;       // estimate was too large, return the last child as a guess
464
367
    }
465
466
    /// look in children[i_left..i_right].rev for a count that is less than target
467
7
    fn guess_child_node_rtl(children: &[ChildOfElement], i_left: usize, i_right: usize, start: usize, target: usize) -> usize {
468
7
        let mut estimated_position = start;
469
7
        let n_number_indicator = if PreferenceManager::get().borrow().pref_to_string("BrailleCode") == "Nemeth" {
01
} else {
16
}; // Nemeth doesn't typically need number or letter indicators
470
7
        for i in (i_left..i_right).rev() {
471
7
            estimated_position -= estimate_braille_chars(children[i], n_number_indicator);
472
7
            if estimated_position <= target {
473
7
                return i;
474
0
            }
475
        }
476
0
        return i_left;       // estimate was too small, return the first child as a guess
477
7
    }
478
479
4.58k
    fn estimate_braille_chars(child: ChildOfElement, n_number_indicator: usize) -> usize {
480
4.58k
        let node = as_element(child);
481
4.58k
        let leaf_name = name(node);
482
4.58k
        if is_leaf(node) {
483
3.13k
            let text = as_text(node);
484
            // len() is close since mn's probably have ASCII digits and lower case vars are common (count as) and other chars need extra braille chars
485
            // don't want to count invisible chars since they don't display and would give a length = 3
486
3.13k
            if text == "\u{2061}" || text == "\u{2062}"  {       // invisible function apply/times (most common by far)
487
597
                return 0;
488
2.53k
            }
489
            // FIX: this assumption is bad for 8-dot braille
490
2.53k
            return match leaf_name {
491
2.53k
                "mn" => 
n_number_indicator632
+ text.len(),
492
1.90k
                "mo" => 
2741
, // could do better by actually brailling char, but that is more expensive
493
1.16k
                _ => text.len(),
494
            }
495
1.45k
        }
496
1.45k
        let mut estimate = if leaf_name == "mrow" {
0924
} else {
node.children().len() + 1526
}; // guess extra chars need for mfrac, msub, etc (start+intermediate+end).
497
1.45k
        if leaf_name == "msup" || 
leaf_name == "msub"1.19k
||
leaf_name == "msubsup"1.19k
{
498
260
            estimate -= 1;   // opening superscript/subscript indicator not needed
499
1.19k
        }
500
3.91k
        for child in 
node1.45k
.
children1.45k
() {
501
3.91k
            estimate += estimate_braille_chars(child, n_number_indicator);
502
3.91k
        }
503
        // debug!("estimate_braille_chars for {}: {}", crate::canonicalize::element_summary(as_element(child)), estimate);
504
1.45k
        return estimate;
505
4.58k
    }
506
91
}
507
508
888
fn nemeth_cleanup(pref_manager: Ref<PreferenceManager>, raw_braille: String) -> String {
509
    // Typeface: S: sans-serif, B: bold, T: script/blackboard, I: italic, R: Roman
510
    // Language: E: English, D: German, G: Greek, V: Greek variants, H: Hebrew, U: Russian
511
    // Indicators: C: capital, N: number, P: punctuation, M: multipurpose
512
    // Others:
513
    //      W -- whitespace that should be kept (e.g, in a numeral)
514
    //      𝑁 -- hack for special case of a lone decimal pt -- not considered a number but follows rules mostly 
515
    // SRE doesn't have H: Hebrew or U: Russian, so not encoded (yet)
516
    // Note: some "positive" patterns find cases to keep the char and transform them to the lower case version
517
    static NEMETH_INDICATOR_REPLACEMENTS: phf::Map<&str, &str> = phf_map! {
518
        "S" => "⠠⠨",    // sans-serif
519
        "B" => "⠸",     // bold
520
        "𝔹" => "⠨",     // blackboard
521
        "T" => "⠈",     // script
522
        "I" => "⠨",     // italic (mapped to be the same a blackboard)
523
        "R" => "",      // roman
524
        "E" => "⠰",     // English
525
        "D" => "⠸",     // German (Deutsche)
526
        "G" => "⠨",     // Greek
527
        "V" => "⠨⠈",    // Greek Variants
528
        "H" => "⠠⠠",    // Hebrew
529
        "U" => "⠈⠈",    // Russian
530
        "C" => "⠠",     // capital
531
        "P" => "⠸",     // punctuation
532
        "𝐏" => "⠸",     // hack for punctuation after a roman numeral -- never removed
533
        "L" => "",      // letter
534
        "l" => "",      // letter inside enclosed list
535
        "M" => "",      // multipurpose indicator
536
        "m" => "⠐",     // required multipurpose indicator
537
        "N" => "",      // potential number indicator before digit
538
        "n" => "⠼",     // required number indicator before digit
539
        "𝑁" => "",      // hack for special case of a lone decimal pt -- not considered a number but follows rules mostly
540
        "W" => "⠀",     // whitespace
541
        "w" => "⠀",     // whitespace from comparison operator
542
        "," => "⠠⠀",    // comma
543
        "b" => "⠐",     // baseline
544
        "𝑏" => "⣐",     // highlight baseline (it's a hack)
545
        "↑" => "⠘",     // superscript
546
        "↓" => "⠰",     // subscript
547
    };
548
549
    // Add an English Letter indicator. This involves finding "single letters".
550
    // The green book has a complicated set of cases, but the Nemeth UEB Rule book (May 2020), 4.10 has a much shorter explanation:
551
    //   punctuation or whitespace on the left and right ignoring open/close chars
552
    //   https://nfb.org/sites/www.nfb.org/files/files-pdf/braille-certification/lesson-4--provisional-5-9-20.pdf
553
2
    static ADD_ENGLISH_LETTER_INDICATOR: LazyLock<Regex> = LazyLock::new(|| {
554
2
        Regex::new(r"(?P<start>^|W|P.[\u2800-\u28FF]?|,)(?P<open>[\u2800-\u28FF]?⠷)?(?P<letter>C?L.)(?P<close>[\u2800-\u28FF]?⠾)?(?P<end>W|P|,|$)").unwrap()
555
2
    });
556
        
557
    // Trim braille spaces before and after braille indicators
558
    // In order: fraction, /, cancellation, letter, baseline
559
    // Note: fraction over is not listed due to example 42(4) which shows a space before the "/"
560
    static REMOVE_SPACE_BEFORE_BRAILLE_INDICATORS: LazyLock<Regex> = 
561
2
        LazyLock::new(|| Regex::new(r"(⠄⠄⠄|⠤⠤⠤⠤)[Ww]+([⠼⠸⠪])").unwrap());
562
    static REMOVE_SPACE_AFTER_BRAILLE_INDICATORS: LazyLock<Regex> =
563
2
        LazyLock::new(|| Regex::new(r"([⠹⠻Llb])[Ww]+(⠄⠄⠄|⠤⠤⠤⠤)").unwrap());
564
565
    // Hack to convert non-numeric '.' to numeric '.'
566
    // The problem is that the numbers are hidden inside of mover -- this might be more general than rule 99_2.
567
2
    static DOTS_99_A_2: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"𝑁⠨mN").unwrap());
568
569
    // Punctuation is one or two chars. There are (currently) only 3 2-char punct chars (—‘’) -- we explicitly list them below
570
    static REMOVE_SPACE_BEFORE_PUNCTUATION_151: LazyLock<Regex> =
571
2
        LazyLock::new(|| Regex::new(r"w(P.[⠤⠦⠠]?|[\u2800-\u28FF]?⠾)").unwrap());
572
    static REMOVE_SPACE_AFTER_PUNCTUATION_151: LazyLock<Regex> =
573
2
        LazyLock::new(|| Regex::new(r"(P.[⠤⠦⠠]?|[\u2800-\u28FF]?⠷)w").unwrap());
574
575
    // Multipurpose indicator insertion
576
    // 149 -- consecutive comparison operators have no space -- instead a multipurpose indicator is used (doesn't require a regex)
577
578
    // 177.2 -- add after a letter and before a digit (or decimal pt) -- digits will start with N
579
2
    static MULTI_177_2: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"([Ll].)[N𝑁]").unwrap());
580
581
    // keep between numeric subscript and digit ('M' added by subscript rule)
582
2
    static MULTI_177_3: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"([N𝑁].)M([N𝑁].)").unwrap());
583
584
    // Add after decimal pt for non-digits except for comma and punctuation
585
    // Note: since "." can be in the middle of a number, there is not necessarily a "N"
586
    // Although not mentioned in 177_5, don't add an 'M' before an 'm'
587
    static MULTI_177_5: LazyLock<Regex> =
588
2
        LazyLock::new(|| Regex::new(r"([N𝑁]⠨)([^⠂⠆⠒⠲⠢⠖⠶⠦⠔N𝑁,Pm])").unwrap());
589
590
    // Pattern for rule II.9a (add numeric indicator at start of line or after a space)
591
    // 1. start of line
592
    // 2. optional minus sign (⠤)
593
    // 3. optional typeface indicator
594
    // 4. number (N)
595
2
    static NUM_IND_9A: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(?P<start>^|[,Ww])(?P<minus>⠤?)N").unwrap());
596
597
    // Needed after section mark(§), paragraph mark(¶), #, or *
598
2
    static NUM_IND_9C: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(⠤?)(⠠⠷|⠠⠳|⠠⠈⠷)N").unwrap());
599
600
    // Needed after section mark(§), paragraph mark(¶), #, or *
601
2
    static NUM_IND_9D: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(⠈⠠⠎|⠈⠠⠏|⠨⠼|⠈⠼)N").unwrap());
602
603
    // Needed after a typeface change or interior shape modifier indicator
604
2
    static NUM_IND_9E: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(?P<face>[SB𝔹TIR]+?)N").unwrap());
605
2
    static NUM_IND_9E_SHAPE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(?P<mod>⠸⠫)N").unwrap());
606
607
    // Needed after hyphen that follows a word, abbreviation, or punctuation (caution about rule 11d)
608
    // Note -- hyphen might encode as either "P⠤" or "⠤" depending on the tag used
609
2
    static NUM_IND_9F: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"([Ll].[Ll].|P.)(P?⠤)N").unwrap());
610
611
    // Enclosed list exception
612
    // Normally we don't add numeric indicators in enclosed lists (done in get_braille_nemeth_chars).
613
    // The green book says "at the start" of an item, don't add the numeric indicator.
614
    // The NFB list exceptions after function abbreviations and angles, but what this really means is "after a space"
615
2
    static NUM_IND_ENCLOSED_LIST: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"w([⠂⠆⠒⠲⠢⠖⠶⠦⠔⠴])").unwrap());
616
617
    // Punctuation chars (Rule 38.6 says don't use before ",", "hyphen", "-", "…")
618
    // Never use punctuation indicator before these (38-6)
619
    //      "…": "⠀⠄⠄⠄"
620
    //      "-": "⠸⠤" (hyphen and dash)
621
    //      ",": "⠠⠀"     -- spacing already added
622
    // Rule II.9b (add numeric indicator after punctuation [optional minus[optional .][digit]
623
    //  because this is run after the above rule, some cases are already caught, so don't
624
    //  match if there is already a numeric indicator
625
2
    static NUM_IND_9B: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(?P<punct>P..?)(?P<minus>⠤?)N").unwrap());
626
627
    // Before 79b (punctuation)
628
2
    static REMOVE_LEVEL_IND_BEFORE_SPACE_COMMA_PUNCT: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(?:[↑↓]+[b𝑏]?|[b𝑏])([Ww,P]|$)").unwrap());
629
630
    // Most commas have a space after them, but not when followed by a close quote (others?)
631
2
    static NO_SPACE_AFTER_COMMA: LazyLock<Regex> = LazyLock::new(|| Regex::new(r",P⠴").unwrap()); // captures both single and double close quote
632
2
    static REMOVE_LEVEL_IND_BEFORE_BASELINE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(?:[↑↓mb𝑏]+)([b𝑏])").unwrap());
633
634
    // Except for the four chars above, the unicode rules always include a punctuation indicator.
635
    // The cases to remove them (that seem relevant to MathML) are:
636
    //   Beginning of line or after a space (V 38.1)
637
    //   After a word (38.4)
638
    //   2nd or subsequent punctuation (includes, "-", etc) (38.7)
639
2
    static REMOVE_AFTER_PUNCT_IND: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(^|[Ww]|[Ll].[Ll].)P(.)").unwrap());
640
2
    static REPLACE_INDICATORS: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"([SB𝔹TIREDGVHUP𝐏CLlMmb𝑏↑↓Nn𝑁Ww,])").unwrap());
641
2
    static COLLAPSE_SPACES: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"⠀⠀+").unwrap());
642
643
//   debug!("Before:  \"{}\"", raw_braille);
644
    // replacements might overlap at boundaries (e.g., whitespace) -- need to repeat
645
888
    let mut start = 0;
646
888
    let mut result = String::with_capacity(raw_braille.len()+ raw_braille.len()/4);  // likely upper bound
647
923
    while let Some(
matched35
) = ADD_ENGLISH_LETTER_INDICATOR.find_at(&raw_braille, start) {
648
35
        result.push_str(&raw_braille[start..matched.start()]);
649
35
        let replacement = ADD_ENGLISH_LETTER_INDICATOR.replace(
650
35
                &raw_braille[matched.start()..matched.end()], "${start}${open}E${letter}${close}");
651
35
        // debug!("matched='{}', start/end={}/{}; replacement: {}", &raw_braille[matched.start()..matched.end()], matched.start(), matched.end(), replacement);
652
35
        result.push_str(&replacement);
653
35
        // put $end back on because needed for next match (e.g., whitespace at end and then start of next match)
654
35
        // but it could also match because it was at the end, in which case "-1" is wrong -- tested after loop for that
655
35
        start = matched.end() - 1;
656
35
    }
657
888
    if !raw_braille.is_empty() && ( start < raw_braille.len()-1 || 
"WP,"8
.
contains8
(
raw_braille.chars()8
.
nth_back8
(0).
unwrap8
()) ) { // see comment about $end above
658
882
        result.push_str(&raw_braille[start..]);
659
882
    
}6
660
//   debug!("ELIs:    \"{}\"", result);
661
662
888
    let result = NUM_IND_ENCLOSED_LIST.replace_all(&result, "wn${1}");
663
664
    // Remove blanks before and after braille indicators
665
888
    let result = REMOVE_SPACE_BEFORE_BRAILLE_INDICATORS.replace_all(&result, "$1$2");
666
888
    let result = REMOVE_SPACE_AFTER_BRAILLE_INDICATORS.replace_all(&result, "$1$2");
667
668
888
    let result = REMOVE_SPACE_BEFORE_PUNCTUATION_151.replace_all(&result, "$1");
669
888
    let result = REMOVE_SPACE_AFTER_PUNCTUATION_151.replace_all(&result, "$1");
670
//   debug!("spaces:  \"{}\"", result);
671
672
888
    let result = DOTS_99_A_2.replace_all(&result, "N⠨mN");
673
674
    // Multipurpose indicator
675
888
    let result = result.replace("ww", "m"); // 149
676
888
    let result = MULTI_177_2.replace_all(&result, "${1}m${2}");
677
888
    let result = MULTI_177_3.replace_all(&result, "${1}m$2");
678
888
    let result = MULTI_177_5.replace_all(&result, "${1}m$2");
679
//   debug!("MULTI:   \"{}\"", result);
680
681
888
    let result = NUM_IND_9A.replace_all(&result, "${start}${minus}n");
682
    // debug!("IND_9A:  \"{}\"", result);
683
888
    let result = NUM_IND_9C.replace_all(&result, "${1}${2}n");
684
888
    let result = NUM_IND_9D.replace_all(&result, "${1}n");
685
888
    let result = NUM_IND_9E.replace_all(&result, "${face}n");
686
888
    let result = NUM_IND_9E_SHAPE.replace_all(&result, "${mod}n");
687
888
    let result = NUM_IND_9F.replace_all(&result, "${1}${2}n");
688
689
//   debug!("IND_9F:  \"{}\"", result);
690
691
    // 9b: insert after punctuation (optional minus sign)
692
    // common punctuation adds a space, so 9a handled it. Here we deal with other "punctuation" 
693
    // FIX other punctuation and reference symbols (9d)
694
888
    let result = NUM_IND_9B.replace_all(&result, "$punct${minus}n");
695
//   debug!("A PUNCT: \"{}\"", &result);
696
697
    // strip level indicators
698
    // check first to remove level indicators before baseline, then potentially remove the baseline
699
888
    let mut result = REMOVE_LEVEL_IND_BEFORE_BASELINE.replace_all(&result, "$1");
700
//   debug!("Punct  : \"{}\"", &result);
701
    // checks for punctuation char, so needs to before punctuation is stripped.
702
    // if '𝑏' is removed, then the highlight needs to be shifted to the left in some cases
703
888
    let result = remove_baseline_before_space_or_punctuation(&mut result);
704
//   debug!("Removed: \"{}\"", &result);
705
706
888
    let result = NO_SPACE_AFTER_COMMA.replace_all(&result, "⠠P⠴");
707
708
888
    let result = REMOVE_AFTER_PUNCT_IND.replace_all(&result, "$1$2");
709
//   debug!("Punct38: \"{}\"", &result);
710
711
    // these typeforms need to get pulled from user-prefs as they are transcriber-defined
712
888
    let sans_serif = pref_manager.pref_to_string("Nemeth_SansSerif");
713
888
    let bold = pref_manager.pref_to_string("Nemeth_Bold");
714
888
    let double_struck = pref_manager.pref_to_string("Nemeth_DoubleStruck");
715
888
    let script = pref_manager.pref_to_string("Nemeth_Script");
716
888
    let italic = pref_manager.pref_to_string("Nemeth_Italic");
717
718
7.57k
    let 
result888
=
REPLACE_INDICATORS888
.
replace_all888
(
&result888
, |cap: &Captures| {
719
7.57k
        let matched_char = &cap[0];
720
7.57k
        match matched_char {
721
7.57k
            "S" => 
&sans_serif2
,
722
7.57k
            "B" => 
&bold47
,
723
7.52k
            "𝔹" => 
&double_struck28
,
724
7.49k
            "T" => 
&script6
,
725
7.49k
            "I" => 
&italic2
,
726
7.48k
            _ => match NEMETH_INDICATOR_REPLACEMENTS.get(&cap[0]) {
727
0
                None => {error!("REPLACE_INDICATORS and NEMETH_INDICATOR_REPLACEMENTS are not in sync"); ""},
728
7.48k
                Some(&ch) => ch,
729
            }
730
        }
731
7.57k
    });
732
733
    // Remove unicode blanks at start and end -- do this after the substitutions because ',' introduces spaces
734
888
    let result = result.trim_start_matches('⠀').trim_end_matches('⠀');
735
888
    let result = COLLAPSE_SPACES.replace_all(result, "⠀");
736
   
737
888
    return result.to_string();
738
739
888
    fn remove_baseline_before_space_or_punctuation<'a>(braille: &'a mut Cow<'a, str>) -> Cow<'a, str> {
740
        // If the baseline highlight is at the end of the string and it is going to be deleted by the regex,
741
        //   then we need to shift the highlight to the left if what is to it's left is not whitespace (which should never be a highlight end)
742
        // This only happens when BrailleNavHighlight == "EndPoints".
743
888
        let highlight_style = PreferenceManager::get().borrow().pref_to_string("BrailleNavHighlight");
744
888
        if highlight_style == "EndPoints" &&
745
132
            let Some(
last_highlighted129
) = braille.rfind(is_highlighted) &&
746
129
            braille[last_highlighted..].starts_with('𝑏') {
747
7
                    let i_after_baseline = last_highlighted + '𝑏'.len_utf8();
748
7
                    if i_after_baseline == braille.len() || 
braille[i_after_baseline..]5
.
starts_with5
(
['W', 'w', ',', 'P']5
) {
749
                        // shift the highlight to the left after doing just the replacement (if any) that the regex below does
750
                        // the shift runs until a non blank braille char is found
751
2
                        let mut bytes_deleted = 0;
752
2
                        let mut char_to_highlight = "".to_string();   // illegal value
753
2
                        for ch in braille[..last_highlighted].chars().rev() {
754
2
                            bytes_deleted += ch.len_utf8();
755
2
                            if (0x2801..0x28FF).contains(&(ch as u32)) {
756
2
                                char_to_highlight = highlight(ch).to_string();
757
2
                                break;
758
0
                            }
759
                        }
760
2
                        braille.to_mut().replace_range(last_highlighted-bytes_deleted..last_highlighted+'𝑏'.len_utf8(),
761
2
                                                        &char_to_highlight);
762
5
                    }
763
881
                }
764
888
        return REMOVE_LEVEL_IND_BEFORE_SPACE_COMMA_PUNCT.replace_all(braille, "$1");
765
766
888
    }
767
888
}
768
769
// Typeface: S: sans-serif, B: bold, T: script/blackboard, I: italic, R: Roman
770
// Language: E: English, D: German, G: Greek, V: Greek variants, H: Hebrew, U: Russian
771
// Indicators: C: capital, N: number, P: punctuation, M: multipurpose
772
// Others:
773
//      W -- whitespace that should be kept (e.g, in a numeral)
774
//      𝑁 -- hack for special case of a lone decimal pt -- not considered a number but follows rules mostly 
775
// Note: some "positive" patterns find cases to keep the char and transform them to the lower case version
776
static UEB_INDICATOR_REPLACEMENTS: phf::Map<&str, &str> = phf_map! {
777
    "S" => "XXX",    // sans-serif -- from prefs
778
    "B" => "⠘",     // bold
779
    "𝔹" => "XXX",     // blackboard -- from prefs
780
    "T" => "⠈",     // script
781
    "I" => "⠨",     // italic
782
    "R" => "",      // roman
783
    // "E" => "⠰",     // English
784
    "1" => "⠰",      // Grade 1 symbol
785
    "𝟙" => "⠰⠰",     // Grade 1 word
786
    "L" => "",       // Letter left in to assist in locating letters
787
    "D" => "XXX",    // German (Deutsche) -- from prefs
788
    "G" => "⠨",      // Greek
789
    "V" => "⠨⠈",     // Greek Variants
790
    // "H" => "⠠⠠",  // Hebrew
791
    // "U" => "⠈⠈",  // Russian
792
    "C" => "⠠",      // capital
793
    "𝐶" => "⠠",      // capital that never should get word indicator (from chemical element)
794
    "N" => "⠼",     // number indicator
795
    "t" => "⠱",     // shape terminator
796
    "W" => "⠀",     // whitespace
797
    "𝐖"=> "⠀",     // whitespace (hard break -- basically, it separates exprs)
798
    "s" => "⠆",     // typeface single char indicator
799
    "w" => "⠂",     // typeface word indicator
800
    "e" => "⠄",     // typeface & capital terminator 
801
    "o" => "",       // flag that what follows is an open indicator (used for standing alone rule)
802
    "c" => "",       // flag that what follows is an close indicator (used for standing alone rule)
803
    "b" => "",       // flag that what follows is an open or close indicator (used for standing alone rule)
804
    "," => "⠂",     // comma
805
    "." => "⠲",     // period
806
    "-" => "-",     // hyphen
807
    "—" => "⠠⠤",   // normal dash (2014) -- assume all normal dashes are unified here [RUEB appendix 3]
808
    "―" => "⠐⠠⠤",  // long dash (2015) -- assume all long dashes are unified here [RUEB appendix 3]
809
    "#" => "",      // signals end of script
810
    // '(', '{', '[', '"', '\'', '“', '‘', '«',    // opening chars
811
    // ')', '}', ']', '\"', '\'', '”', '’', '»',           // closing chars
812
    // ',', ';', ':', '.', '…', '!', '?'                    // punctuation           
813
814
};
815
816
// static LETTERS: phf::Set<char> = phf_set! {
817
//     '⠁', '⠃', '⠉', '⠙', '⠑', '⠋', '⠛', '⠓', '⠊', '⠚', '⠅', '⠇', '⠍', 
818
//     '⠝', '⠕', '⠏', '⠟', '⠗', '⠎', '⠞', '⠥', '⠧', '⠺', '⠭', '⠽', '⠵',
819
// };
820
821
2.39k
fn is_letter_number(ch: char) -> bool {
822
2.39k
    
matches!986
(ch, '⠁' | '⠃' | '⠉' | '⠙' | '⠑' | '⠋' | '⠛' | '⠓' | '⠊' | '⠚')
823
2.39k
}
824
825
static SHORT_FORMS: phf::Set<&str> = phf_set! {
826
    "L⠁L⠃", "L⠁L⠃L⠧", "L⠁L⠉", "L⠁L⠉L⠗", "L⠁L⠋",
827
    "L⠁L⠋L⠝", "L⠁L⠋L⠺", "L⠁L⠛", "L⠁L⠛L⠌", "L⠁L⠇",
828
     "L⠁L⠇L⠍", "L⠁L⠇L⠗", "L⠁L⠇L⠞", "L⠁L⠇L⠹", "L⠁L⠇L⠺",
829
     "L⠃L⠇", "L⠃L⠗L⠇", "L⠉L⠙", "L⠙L⠉L⠇", "L⠙L⠉L⠇L⠛",
830
     "L⠙L⠉L⠧", "L⠙L⠉L⠧L⠛", "L⠑L⠊", "L⠋L⠗", "L⠋L⠌", "L⠛L⠙",
831
     "L⠛L⠗L⠞", "L⠓L⠍", "L⠓L⠍L⠋", "L⠓L⠻L⠋", "L⠊L⠍L⠍", "L⠇L⠇", "L⠇L⠗",
832
     "L⠍L⠽L⠋", "L⠍L⠡", "L⠍L⠌", "L⠝L⠑L⠉", "L⠝L⠑L⠊", "L⠏L⠙",
833
     "L⠏L⠻L⠉L⠧", "L⠏L⠻L⠉L⠧L⠛", "L⠏L⠻L⠓", "L⠟L⠅", "L⠗L⠉L⠧",
834
     "L⠗L⠉L⠧L⠛", "L⠗L⠚L⠉", "L⠗L⠚L⠉L⠛", "L⠎L⠙", "L⠎L⠡", "L⠞L⠙",
835
     "L⠞L⠛L⠗", "L⠞L⠍", "L⠞L⠝", "L⠭L⠋", "L⠭L⠎", "L⠽L⠗", "L⠽L⠗L⠋",
836
     "L⠽L⠗L⠧L⠎", "L⠮L⠍L⠧L⠎", "L⠡L⠝", "L⠩L⠙", "L⠹L⠽L⠋", "L⠳L⠗L⠧L⠎",
837
     "L⠺L⠙", "L⠆L⠉", "L⠆L⠋", "L⠆L⠓", "L⠆L⠇", "L⠆L⠝", "L⠆L⠎", "L⠆L⠞",
838
     "L⠆L⠽", "L⠒L⠉L⠧", "L⠒L⠉L⠧L⠛", "L⠐L⠕L⠋"
839
};
840
841
1.75k
fn is_letter_prefix(ch: char) -> bool {
842
1.75k
    
matches!1.61k
(ch, 'B' | 'I' | '𝔹' | 'S' | 'T' | 'D' | 'C' | '𝐶' | '𝑐')
843
1.75k
}
844
845
// Trim braille spaces before and after braille indicators
846
// In order: fraction, /, cancellation, letter, baseline
847
// Note: fraction over is not listed due to example 42(4) which shows a space before the "/"
848
// static ref REMOVE_SPACE_BEFORE_BRAILLE_INDICATORS: Regex =
849
//     Regex::new(r"(⠄⠄⠄|⠤⠤⠤)W+([⠼⠸⠪])").unwrap();
850
2
static REPLACE_INDICATORS: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"([1𝟙SB𝔹TIREDGVHP𝐶𝑐CLMNW𝐖swe,.-—―#ocb])").unwrap());
851
2
static COLLAPSE_SPACES: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"⠀⠀+").unwrap());
852
853
35
fn is_short_form(chars: &[char]) -> bool {
854
204
    let 
chars_as_string35
=
chars35
.
iter35
().
map35
(|ch| ch.to_string()).
collect35
::<String>();
855
35
    return SHORT_FORMS.contains(&chars_as_string);
856
35
}
857
858
366
fn ueb_cleanup(pref_manager: Ref<PreferenceManager>, raw_braille: String) -> String {
859
    // debug!("ueb_cleanup: start={}", raw_braille);
860
366
    let result = typeface_to_word_mode(&raw_braille);
861
366
    let result = capitals_to_word_mode(&result);
862
863
366
    let use_only_grade1 = pref_manager.pref_to_string("UEB_START_MODE").as_str() == "Grade1";
864
    
865
    // '𝐖' is a hard break -- basically, it separates exprs
866
366
    let mut result = result.split('𝐖')
867
370
                        .
map366
(|str| pick_start_mode(str, use_only_grade1) + "W")
868
366
                        .collect::<String>();
869
366
    result.pop();   // we added a 'W' at the end that needs to be removed.
870
871
366
    let result = result.replace("tW", "W");
872
873
    // these typeforms need to get pulled from user-prefs as they are transcriber-defined
874
366
    let double_struck = pref_manager.pref_to_string("UEB_DoubleStruck");
875
366
    let sans_serif = pref_manager.pref_to_string("UEB_SansSerif");
876
366
    let fraktur = pref_manager.pref_to_string("UEB_Fraktur");
877
366
    let greek_variant = pref_manager.pref_to_string("UEB_GreekVariant");
878
879
3.77k
    let 
result366
=
REPLACE_INDICATORS366
.
replace_all366
(
&result366
, |cap: &Captures| {
880
3.77k
        let matched_char = &cap[0];
881
3.77k
        match matched_char {
882
3.77k
            "𝔹" => 
&double_struck0
,
883
3.77k
            "S" => 
&sans_serif0
,
884
3.77k
            "D" => 
&fraktur2
,
885
3.77k
            "V" => 
&greek_variant0
,
886
3.77k
            _ => match UEB_INDICATOR_REPLACEMENTS.get(matched_char) {
887
0
                None => {error!("REPLACE_INDICATORS and UEB_INDICATOR_REPLACEMENTS are not in sync: missing '{matched_char}'"); ""},
888
3.77k
                Some(&ch) => ch,
889
            },
890
        }
891
3.77k
    });
892
893
    // Remove unicode blanks at start and end -- do this after the substitutions because ',' introduces spaces
894
    // let result = result.trim_start_matches('⠀').trim_end_matches('⠀');
895
366
    let result = COLLAPSE_SPACES.replace_all(&result, "⠀");
896
   
897
366
    return result.to_string();
898
899
370
    fn pick_start_mode(raw_braille: &str, use_only_grade1: bool) -> String {
900
        // Need to decide what the start mode should be
901
        // From http://www.brailleauthority.org/ueb/ueb_math_guidance/final_for_posting_ueb_math_guidance_may_2019_102419.pdf
902
        //   Unless a math expression can be correctly represented with only a grade 1 symbol indicator in the first three cells
903
        //   or before a single letter standing alone anywhere in the expression,
904
        //   begin the expression with a grade 1 word indicator (or a passage indicator if the expression includes spaces)
905
        // Apparently "only a grade 1 symbol..." means at most one grade 1 symbol based on some examples (GTM 6.4, example 4)
906
        // debug!("before determining mode:  '{}'", raw_braille);
907
908
        // a bit ugly because we need to store the string if we have cap passage mode
909
370
        let raw_braille_string = if is_cap_passage_mode_good(raw_braille) {
convert_to_cap_passage_mode3
(
raw_braille3
)} else {
String::default367
()};
910
370
        let raw_braille = if raw_braille_string.is_empty() {
raw_braille367
} else {
&raw_braille_string3
};
911
370
        if use_only_grade1 {
912
1
            return remove_unneeded_mode_changes(raw_braille, UEB_Mode::Grade1, UEB_Duration::Passage);
913
369
        }
914
369
        let grade2 = remove_unneeded_mode_changes(raw_braille, UEB_Mode::Grade2, UEB_Duration::Symbol);
915
369
        debug!("Symbol mode:  '{}'", grade2);
916
917
369
        if is_grade2_string_ok(&grade2) {
918
143
            return grade2;
919
        } else {
920
            // BANA says use g1 word mode if spaces are present, but that's not what their examples do
921
            // A conversation with Ms. DeAndrea from BANA said that they mean use passage mode if ≥3 "segments" (≥2 blanks)
922
            // The G1 Word mode might not be at the start (iceb.rs:omission_3_6_7)
923
226
            let grade1_word = try_grade1_word_mode(raw_braille);
924
226
            debug!("Word mode:    '{}'", grade1_word);
925
226
            if !grade1_word.is_empty() {
926
36
                return grade1_word;
927
            } else {
928
190
                let grade1_passage = remove_unneeded_mode_changes(raw_braille, UEB_Mode::Grade1, UEB_Duration::Passage);
929
190
                return "⠰⠰⠰".to_string() + &grade1_passage + "⠰⠄";
930
            }
931
        }
932
933
        /// Return true if at least five (= # of cap passage indicators) cap indicators and no lower case letters
934
370
        fn is_cap_passage_mode_good(braille: &str) -> bool {
935
370
            let mut n_caps = 0;
936
370
            let mut is_cap_mode = false;
937
370
            let mut cap_mode = UEB_Duration::Symbol;    // real value set when is_cap_mode is set to true
938
370
            let mut chars = braille.chars();
939
940
            // look CL or CCL for caps (CC runs until we get whitespace)
941
            // if we find an L not in caps mode, we return false
942
            // Note: caps can be C𝐶, whitespace can be W𝐖
943
2.03k
            while let Some(
ch1.96k
) = chars.next() {
944
1.96k
                if ch == 'L' {
945
401
                    if !is_cap_mode {
946
288
                        return false;
947
113
                    }
948
113
                    chars.next();       // skip letter
949
113
                    if cap_mode == UEB_Duration::Symbol {
950
79
                        is_cap_mode = false;
951
79
                    
}34
952
1.55k
                } else if ch == 'C' || 
ch == '𝐶'1.49k
{
953
107
                    if is_cap_mode {
954
16
                        if cap_mode == UEB_Duration::Symbol {
955
12
                            cap_mode = UEB_Duration::Word;
956
12
                        
}4
957
91
                    } else {
958
91
                        is_cap_mode = true;
959
91
                        cap_mode = UEB_Duration::Symbol;
960
91
                    }
961
107
                    n_caps += 1;
962
1.45k
                } else if ch == 'W' || 
ch == '𝐖'1.33k
{
963
119
                    if is_cap_mode {
964
2
                        assert!(cap_mode == UEB_Duration::Word);
965
117
                    }
966
119
                    is_cap_mode = false;
967
1.33k
                } else if ch == '1' && 
is_cap_mode117
{
968
3
                    break;
969
1.33k
                }
970
            }
971
82
            return n_caps > 4;
972
370
        }
973
974
3
        fn convert_to_cap_passage_mode(braille: &str) -> String {
975
3
            return "⠠⠠⠠".to_string() + &braille.replace(['C', '𝐶'], "") + "⠠⠄";
976
3
        }
977
978
        /// Return true if the BANA or ICEB guidelines say it is ok to start with grade 2
979
369
        fn is_grade2_string_ok(grade2_braille: &str) -> bool {
980
            // BANA says use grade 2 if there is not more than one grade one symbol or single letter standing alone.
981
            // The exact quote from their guidance:
982
            //    Unless a math expression can be correctly represented with only a grade 1 symbol indicator in the first three cells
983
            //    or before a single letter standing alone anywhere in the expression,
984
            //    begin the expression with a grade 1 word indicator
985
            // Note: I modified this slightly to exclude the cap indicator in the count. That allows three more ICEB rule to pass and seems
986
            //    like it is a reasonable thing to do.
987
            // Another modification is allow a single G1 indicator to occur after whitespace later on
988
            //    because ICEB examples show it and it seems better than going to passage mode if it is the only G1 indicator
989
990
            // Because of the 'L's which go away, we have to put a little more work into finding the first three chars
991
369
            let chars = grade2_braille.chars().collect::<Vec<char>>();
992
369
            let mut n_real_chars = 0;  // actually number of chars
993
369
            let mut found_g1 = false;
994
369
            let mut i = 0;
995
1.75k
            while i < chars.len() {
996
1.75k
                let ch = chars[i];
997
1.75k
                if ch == '1' && 
!275
is_forced_grade1275
(&chars, i) {
998
269
                    if found_g1 {
999
19
                        return false;
1000
250
                    }
1001
250
                    found_g1 = true;
1002
1.48k
                } else if !"𝐶CLobc".contains(ch) {
1003
1.07k
                    if n_real_chars == 2 {
1004
347
                        i += 1;
1005
347
                        break;              // this is the third real char
1006
730
                    };
1007
730
                    n_real_chars += 1;
1008
407
                }
1009
1.38k
                i += 1
1010
            }
1011
1012
            // if we find *another* g1 that isn't forced and isn't standing alone, we are done
1013
            // I've added a 'follows whitespace' clause for test iceb.rs:omission_3_6_2 to the standing alone rule
1014
            // we only allow one standing alone example -- not sure if BANA guidance has this limit, but GTM 11_5_5_3 seems better with it
1015
            // Same for GTM 1_7_3_1 (passage mode is mentioned also)
1016
350
            let mut is_standing_alone_already_encountered = false;
1017
350
            let mut is_after_whitespace = false;
1018
2.43k
            while i < chars.len() {
1019
2.29k
                let ch = chars[i];
1020
2.29k
                if ch == 'W' {
1021
355
                    is_after_whitespace = true;
1022
1.93k
                } else if ch == '1' && 
!239
is_forced_grade1239
(&chars, i) {
1023
235
                    if is_standing_alone_already_encountered ||
1024
226
                       ((found_g1 || 
!is_after_whitespace33
) &&
!203
is_single_letter_on_right203
(&chars, i)) {
1025
207
                        return false;
1026
28
                    }
1027
28
                    found_g1 = true;
1028
28
                    is_standing_alone_already_encountered = true;
1029
1.70k
                }
1030
2.08k
                i += 1;
1031
            }
1032
143
            return true;
1033
369
        }
1034
1035
        /// Return true if the sequence of chars forces a '1' at the `i`th position
1036
        /// Note: `chars[i]` should be '1'
1037
930
        fn is_forced_grade1(chars: &[char], i: usize) -> bool {
1038
            // A '1' is forced if 'a-j' follows a digit
1039
930
            assert_eq!(chars[i], '1', "'is_forced_grade1' didn't start with '1'");
1040
            // check that a-j follows the '1' -- we have '1Lx' where 'x' is the letter to check
1041
930
            if i+2 < chars.len() && 
is_letter_number927
(
unhighlight927
(
chars[i+2]927
)) {
1042
                // check for a number before the '1'
1043
                // this will be 'N' followed by LETTER_NUMBERS or the number ".", ",", or " "
1044
25
                for j in (
0..i12
).
rev12
() {
1045
25
                    let ch = chars[j];
1046
25
                    if !(is_letter_number(unhighlight(ch)) || 
".,W𝐖"14
.
contains14
(
ch14
)) {
1047
12
                        return ch == 'N'
1048
13
                    }
1049
                }
1050
918
            }
1051
918
            return false;
1052
930
        }
1053
1054
203
        fn is_single_letter_on_right(chars: &[char], i: usize) -> bool {
1055
205
            fn is_skip_char(ch: char) -> bool {
1056
205
                
matches!204
(ch, 'B' | 'I' | '𝔹' | 'S' | 'T' | 'D' | 'C' | '𝐶' | 's' | 'w')
1057
205
            }
1058
1059
            // find the first char (if any)
1060
203
            let mut count = 0;      // how many letters
1061
203
            let mut i = i+1;
1062
209
            while i < chars.len() {
1063
205
                let ch = chars[i];
1064
205
                if !is_skip_char(ch) {
1065
204
                    if ch == 'L' {
1066
5
                        if count == 1 {
1067
0
                            return false;   // found a second letter in the sequence
1068
5
                        }
1069
5
                        count += 1;
1070
                    } else {
1071
199
                        return count==1;
1072
                    }
1073
5
                    i += 2;   // eat 'L' and actual letter
1074
1
                } else {
1075
1
                    i += 1;
1076
1
                }
1077
            }
1078
4
            return true;
1079
203
        }
1080
1081
226
        fn try_grade1_word_mode(raw_braille: &str) -> String {
1082
            // this isn't quite right, but pretty close -- try splitting at 'W' (words)
1083
            // only one of the parts can be in word mode and none of the others can have '1' unless forced
1084
226
            let mut g1_words = Vec::default();
1085
226
            let mut found_word_mode = false;
1086
622
            for raw_word in 
raw_braille226
.
split226
('W') {
1087
622
                let word = remove_unneeded_mode_changes(raw_word, UEB_Mode::Grade2, UEB_Duration::Symbol);
1088
                // debug!("try_grade1_word_mode: word='{}'", word);
1089
622
                let word_chars = word.chars().collect::<Vec<char>>();
1090
622
                let needs_word_mode = word_chars.iter().enumerate()
1091
1.12k
                    .
any622
(|(i, &ch) | ch == '1' &&
!416
is_forced_grade1416
(&word_chars, i));
1092
622
                if needs_word_mode {
1093
416
                    if found_word_mode {
1094
190
                        return "".to_string();
1095
226
                    }
1096
226
                    found_word_mode = true;
1097
226
                    g1_words.push("⠰⠰".to_string() + &remove_unneeded_mode_changes(raw_word, UEB_Mode::Grade1, UEB_Duration::Word)
1098
                    );
1099
206
                } else {
1100
206
                    g1_words.push(word);
1101
206
                }
1102
            }
1103
36
            return if found_word_mode {g1_words.join("W")} else {
""0
.
to_string0
()};
1104
226
        }
1105
370
    }
1106
366
}
1107
1108
478
fn typeface_to_word_mode(braille: &str) -> String {
1109
2
    static HAS_TYPEFACE: LazyLock<Regex> = LazyLock::new(|| Regex::new("[BI𝔹STD]").unwrap());
1110
    // debug!("before typeface fix:  '{}'", braille);
1111
1112
478
    let mut result = "".to_string();
1113
478
    let chars = braille.chars().collect::<Vec<char>>();
1114
478
    let mut word_mode = Vec::with_capacity(5);
1115
478
    let mut word_mode_end = Vec::with_capacity(5);
1116
478
    let mut i = 0;
1117
11.5k
    while i < chars.len() {
1118
11.0k
        let ch = chars[i];
1119
11.0k
        if HAS_TYPEFACE.is_match(ch.to_string().as_str()) {
1120
8
            let i_next_char_target = find_next_char(&chars[i+1..], ch);
1121
8
            if word_mode.contains(&ch) {
1122
3
                if i_next_char_target.is_none() {
1123
2
                    word_mode.retain(|&item| item!=ch);  // drop the char since word mode is done
1124
2
                    word_mode_end.push(ch);   // add the char to signal to add end sequence
1125
1
                }
1126
            } else {
1127
5
                result.push(ch);
1128
5
                if i_next_char_target.is_some() {
1129
2
                    result.push('w');     // typeface word indicator
1130
2
                    word_mode.push(ch);      // starting word mode for this char
1131
3
                } else {
1132
3
                    result.push('s');     // typeface single char indicator
1133
3
                }
1134
            }
1135
8
            i += 1; // eat "B", etc
1136
11.0k
        } else if ch == 'L' || 
ch == 'N'8.72k
{
1137
3.70k
            result.push(chars[i]);
1138
3.70k
            result.push(chars[i+1]);
1139
3.70k
            if !word_mode_end.is_empty() && 
i+22
< chars.len() && !(
chars[i+2] == 'W'1
||
chars[i+2] == '𝐖'1
) {
1140
                // add terminator unless word sequence is terminated by end of string or whitespace
1141
1
                for &ch in &word_mode_end {
1142
1
                    result.push(ch);
1143
1
                    result.push('e');
1144
1
                };
1145
1
                word_mode_end.clear();
1146
3.70k
            }
1147
3.70k
            i += 2; // eat Ll/Nd
1148
7.30k
        } else {
1149
7.30k
            result.push(ch);
1150
7.30k
            i += 1;
1151
7.30k
        }
1152
    }
1153
478
    return result;
1154
1155
478
}
1156
1157
478
fn capitals_to_word_mode(braille: &str) -> String {
1158
    use std::iter::FromIterator;
1159
    // debug!("before capitals fix:  '{}'", braille);
1160
1161
478
    let mut result = "".to_string();
1162
478
    let chars = braille.chars().collect::<Vec<char>>();
1163
478
    let mut is_word_mode = false;
1164
478
    let mut i = 0;
1165
    // look for a sequence of CLxCLy... and create CCLxLy...
1166
12.6k
    while i < chars.len() {
1167
12.1k
        let ch = chars[i];
1168
12.1k
        if ch == 'C' {
1169
            // '𝑐' should only occur after a 'C', so we don't have top-level check for it
1170
256
            let mut next_non_cap = i+1;
1171
257
            while let Some(
i_next1
) = find_next_char(&chars[next_non_cap..], '𝑐') {
1172
1
                next_non_cap += i_next + 1; // C/𝑐, L, letter
1173
1
            }
1174
256
            if find_next_char(&chars[next_non_cap..], 'C').is_some() { // next letter sequence "C..."
1175
63
                if is_next_char_start_of_section_12_modifier(&chars[next_non_cap+1..]) {
1176
                    // to me this is tricky -- section 12 modifiers apply to the previous item
1177
                    // the last clause of the "item" def is the previous indivisible symbol" which ICEB 2.1 say is:
1178
                    //   braille sign: one or more consecutive braille characters comprising a unit,
1179
                    //     consisting of a root on its own or a root preceded by one or more
1180
                    //     prefixes (also referred to as braille symbol)
1181
                    // this means the capital indicator needs to be stated and can't be part of a word or passage
1182
1
                    is_word_mode = false;
1183
1
                    result.push_str(String::from_iter(&chars[i..next_non_cap]).as_str());
1184
1
                    i = next_non_cap;
1185
1
                    continue;
1186
62
                }
1187
62
                if is_word_mode {
1188
12
                    i += 1;     // skip the 'C'
1189
50
                } else {
1190
50
                    // start word mode -- need an extra 'C'
1191
50
                    result.push('C');
1192
50
                    is_word_mode = true;
1193
50
                }
1194
193
            } else if is_word_mode {
1195
50
                i += 1;         // skip the 'C'
1196
143
            }
1197
255
            if chars[next_non_cap] == 'G' {
1198
8
                // Greek letters are a bit exceptional in that the pattern is "CGLx" -- bump 'i'
1199
8
                next_non_cap += 1;
1200
247
            }
1201
255
            if chars[next_non_cap] != 'L' {
1202
0
                error!("capitals_to_word_mode: internal error: didn't find L after C in '{}'.",
1203
0
                       chars[i..next_non_cap+2].iter().collect::<String>().as_str());
1204
255
            }
1205
255
            let i_braille_char = next_non_cap + 2;
1206
255
            result.push_str(String::from_iter(&chars[i..i_braille_char]).as_str());
1207
255
            i = i_braille_char;
1208
11.9k
        } else if ch == 'L' {       // must be lowercase -- uppercase consumed above
1209
            // assert!(LETTERS.contains(&unhighlight(chars[i+1]))); not true for other alphabets
1210
2.03k
            if is_word_mode {
1211
2
                result.push('e');       // terminate Word mode (letter after caps)
1212
2
                is_word_mode = false;
1213
2.03k
            }
1214
2.03k
            result.push('L');
1215
2.03k
            result.push(chars[i+1]);
1216
2.03k
            i += 2; // eat L, letter
1217
9.88k
        } else {
1218
9.88k
            is_word_mode = false;   // non-letters terminate cap word mode
1219
9.88k
            result.push(ch);
1220
9.88k
            i += 1;
1221
9.88k
        }
1222
    }
1223
478
    return result;
1224
1225
63
    fn is_next_char_start_of_section_12_modifier(chars: &[char]) -> bool {
1226
        // first find the L and eat the char so that we are at the potential start of where the target lies
1227
63
        let chars_len = chars.len();
1228
63
        let mut i_cap = 0;
1229
126
        while chars[i_cap] != 'C' {     // we know 'C' is in the string, so no need to check for exceeding chars_len
1230
63
            i_cap += 1;
1231
63
        }
1232
73
        for i_end in 
i_cap+1..chars_len63
{
1233
73
            if chars[i_end] == 'L' {
1234
                // skip the next char to get to the real start, and then look for the modifier string or next L/N
1235
                // debug!("   after L '{}'", chars[i_end+2..].iter().collect::<String>());
1236
65
                for i in 
i_end+2..chars_len63
{
1237
65
                    let ch = chars[i];
1238
65
                    if ch == '1' {
1239
                        // Fix: there's probably a much better way to check if we have a match against one of "⠱", "⠘⠱", "⠘⠲", "⠸⠱", "⠐⠱ ", "⠨⠸⠱"
1240
5
                        if chars[i+1] == '⠱' {
1241
0
                            return true;
1242
5
                        } else if i+2 < chars_len {
1243
5
                            let mut str = chars[i+1].to_string();
1244
5
                            str.push(chars[i+2]);
1245
5
                            if str == "⠘⠱" || str == "⠘⠲" || str == "⠸⠱" || str == "⠐⠱" {
1246
1
                                return true;
1247
4
                            } else if i+3 < chars_len {
1248
4
                                str.push(chars[i+3]);
1249
4
                                return str == "⠨⠸⠱";
1250
0
                            }
1251
0
                            return false;
1252
0
                        }
1253
60
                    }
1254
60
                    if ch == 'L' || 
ch == 'N'46
||
!is_letter_prefix(ch)46
{
1255
48
                        return false;
1256
12
                    }
1257
                }
1258
10
            }
1259
        }
1260
10
        return false;
1261
63
    }    
1262
478
}
1263
1264
521
fn find_next_char(chars: &[char], target: char) -> Option<usize> {        
1265
    // first find the L or N and eat the char so that we are at the potential start of where the target lies
1266
    // debug!("Looking for '{}' in '{}'", target, chars.iter().collect::<String>());
1267
610
    for i_end in 
0..chars.len()521
{
1268
610
        if chars[i_end] == 'L' || 
chars[i_end] == 'N'95
{
1269
            // skip the next char to get to the real start, and then look for the target
1270
            // stop when L/N signals past potential target or we hit some non L/N char (actual braille)
1271
            // debug!("   after L/N '{}'", chars[i_end+2..].iter().collect::<String>());
1272
521
            for (
i515
, &
ch515
) in chars.iter().enumerate().skip(i_end+2) {
1273
515
                if ch == 'L' || 
ch == 'N'368
||
!is_letter_prefix(ch)366
{
1274
383
                    return None;
1275
132
                } else if ch == target {
1276
                    // debug!("   found target");
1277
67
                    return Some(i);
1278
65
                }
1279
            }
1280
89
        }
1281
    }
1282
71
    return None;
1283
521
}
1284
1285
#[allow(non_camel_case_types)]
1286
#[derive(Debug, PartialEq, Copy, Clone)]
1287
enum UEB_Mode {
1288
    Numeric,        // also includes Grade1
1289
    Grade1,
1290
    Grade2,
1291
}
1292
1293
#[allow(non_camel_case_types)]
1294
#[derive(Debug, PartialEq, Copy, Clone)]
1295
enum UEB_Duration {
1296
    // Standing alone: A braille symbol that is standing alone may have a contracted (grade 2) meaning.
1297
    // A letter or unbroken sequence of letters is “standing alone” if the symbols before and after the letter or
1298
    //   sequence are spaces, hyphens, dashes or any combination thereof, including some common punctuation.
1299
    // Item: An “item” is defined as the next symbol or one of seven groupings listed in Rules of Unified English Braille, §11.4.1.
1300
    Symbol,
1301
1302
    // The grade 1 word indicator sets grade 1 mode for the next word or symbol sequence.
1303
    // A symbol sequence in UEB is defined as an unbroken string of braille signs,
1304
    //   whether alphabetic or non-alphabetic, preceded and followed by a space.
1305
    Word,
1306
    Passage,
1307
}
1308
1309
// used to determine standing alone (on left side)
1310
4.53k
fn is_left_intervening_char(ch: char) -> bool {
1311
4.53k
    
matches!4.34k
(ch, 'B' | 'I' | '𝔹' | 'S' | 'T' | 'D' | 'C' | '𝐶' | 's' | 'w')
1312
4.53k
}
1313
1314
/// Return value for use_g1_word_mode()
1315
#[derive(Debug, PartialEq)]
1316
enum Grade1WordIndicator {
1317
    NotInWord,        // no '𝟙' in the current/next word
1318
    InWord,           // '𝟙' in the current/next word
1319
    NotInChars,       // no '𝟙' in the entire string (optimization for common case)
1320
}
1321
1322
1.89k
fn remove_unneeded_mode_changes(raw_braille: &str, start_mode: UEB_Mode, start_duration: UEB_Duration) -> String {
1323
    // FIX: need to be smarter about moving on wrt to typeforms/typefaces, caps, bold/italic. [maybe just let them loop through the default?]
1324
1.89k
    let mut mode = start_mode;
1325
1.89k
    let mut duration = start_duration;
1326
1.89k
    let mut start_g2_letter = None;    // used for start of contraction checks
1327
1.89k
    let mut i_g2_start = None;  // set to 'i' when entering G2 mode; None in other modes. '1' indicator goes here if standing alone
1328
1.89k
    let mut cap_word_mode = false;     // only set to true in G2 to prevent contractions
1329
1.89k
    let mut result = String::default();
1330
1.89k
    let chars = raw_braille.chars().collect::<Vec<char>>();
1331
1.89k
    let mut g1_word_indicator = Grade1WordIndicator::NotInChars;        // almost always true (and often irrelevant)
1332
1.89k
    if mode == UEB_Mode::Grade2 || 
duration == UEB_Duration::Symbol901
{
1333
991
        g1_word_indicator = use_g1_word_mode(&chars);
1334
991
        if g1_word_indicator == Grade1WordIndicator::InWord {
1335
1
            mode = UEB_Mode::Grade1;
1336
1
            if duration == UEB_Duration::Symbol {
1337
1
                duration = UEB_Duration::Word;     // if Passage mode, leave as is
1338
1
                result.push('𝟙')
1339
0
            }
1340
990
        }
1341
901
    }
1342
1.89k
    let mut i = 0;
1343
37.0k
    while i < chars.len() {
1344
35.1k
        let ch = chars[i];
1345
35.1k
        match mode {
1346
            UEB_Mode::Numeric => {
1347
                // Numeric Mode: (from https://uebmath.aphtech.org/lesson1.0 and lesson4.0)
1348
                // Symbols that can appear within numeric mode include the ten digits, comma, period, simple fraction line,
1349
                // line continuation indicator, and numeric space digit symbols.
1350
                // A space or any other symbol not listed here terminates numeric mode.
1351
                // Numeric mode is also terminated by the "!" -- used after a script
1352
                //
1353
                // The numeric indicator also turns on grade 1 mode.
1354
                // When grade 1 mode is set by the numeric indicator,
1355
                //   grade 1 indicators are not used unless a single lower-case letter a-j immediately follows a digit.
1356
                // Grade 1 mode when set by the numeric indicator is terminated by a space, hyphen, dash, or a grade 1 indicator.
1357
3.31k
                i_g2_start = None;
1358
                // debug!("Numeric: ch={}, duration: {:?}", ch, duration);
1359
3.31k
                match ch {
1360
                    'L' => {
1361
                        // terminate numeric mode -- duration doesn't change
1362
                        // let the default case handle pushing on the chars for the letter
1363
1.42k
                        if is_letter_number(unhighlight(chars[i+1])) {
1364
1.37k
                            result.push('1');   // need to distinguish a-j from a digit
1365
1.37k
                        
}44
1366
1.42k
                        result.push(ch);
1367
1.42k
                        i += 1;
1368
1.42k
                        mode = UEB_Mode::Grade1;
1369
                        // duration remains Word
1370
                    },
1371
                    '1' | '𝟙' => {
1372
                        // numeric mode implies grade 1, so don't output indicator;
1373
107
                        i += 1;
1374
107
                        mode = UEB_Mode::Grade1;
1375
107
                        if start_duration == UEB_Duration::Passage {
1376
15
                            duration = UEB_Duration::Passage;      // otherwise it remains at Word
1377
92
                        }
1378
                    },
1379
                    '#' => {
1380
                        // terminate numeric mode -- duration doesn't change
1381
738
                        i += 1;
1382
738
                        if i+1 < chars.len() && 
chars[i] == 'L'691
&&
is_letter_number22
(
unhighlight22
(
chars[i+1]22
)) {
1383
9
                            // special case where the script was numeric and a letter follows, so need to put out G1 indicator
1384
9
                            result.push('1');
1385
9
                            // the G1 case should work with 'L' now
1386
729
                        }
1387
738
                        mode = UEB_Mode::Grade1;
1388
                    },
1389
521
                    'N' => {
1390
521
                        // stay in the same mode (includes numeric "," and "." space) -- don't let default get these chars
1391
521
                        result.push(chars[i+1]);
1392
521
                        i += 2;
1393
521
                    },
1394
                    _ => {
1395
                        // moving out of numeric mode
1396
524
                        result.push(ch);
1397
524
                        i += 1;
1398
524
                        if "W𝐖-—―".contains(ch) {
1399
94
                            mode = start_mode;
1400
94
                            if mode == UEB_Mode::Grade2 {
1401
47
                                start_g2_letter = None;        // will be set to real letter
1402
47
                            }
1403
94
                            if start_duration != UEB_Duration::Passage {
1404
47
                                duration = UEB_Duration::Symbol;
1405
47
                            }
1406
                        } else {
1407
430
                            mode = UEB_Mode::Grade1
1408
                        }
1409
                    },
1410
                }
1411
            },
1412
            UEB_Mode::Grade1 => {
1413
                // Grade 1 Mode:
1414
                // The numeric indicator also sets grade 1 mode.
1415
                // Grade 1 mode, when initiated by the numeric indicator, is terminated by a space, hyphen, dash or grade 1 terminator.
1416
                // Grade 1 mode is also set by grade 1 indicators.
1417
25.0k
                i_g2_start = None;
1418
                // debug!("Grade 1: ch={}, duration: {:?}", ch, duration);
1419
25.0k
                match ch {
1420
3.34k
                    'L' => {
1421
3.34k
                        // note: be aware of '#' case for Numeric because '1' might already be generated
1422
3.34k
                        // let prev_ch = if i > 1 {chars[i-1]} else {'1'};   // '1' -- anything beside ',' or '.'
1423
3.34k
                        // if duration == UEB_Duration::Symbol || 
1424
3.34k
                        //     ( ",. ".contains(prev_ch) && LETTER_NUMBERS.contains(&unhighlight(chars[i+1])) ) {
1425
3.34k
                        //     result.push('1');        // need to retain grade 1 indicator (RUEB 6.5.2)
1426
3.34k
                        // }
1427
3.34k
                        // let the default case handle pushing on the chars for the letter
1428
3.34k
                        result.push(ch);
1429
3.34k
                        i += 1;
1430
3.34k
                    },
1431
                    '1' | '𝟙' => {
1432
2.35k
                        assert!(ch == '1' || 
duration != UEB_Duration::Symbol2
); // if '𝟙', should be Word or Passage duration
1433
                        // nothing to do -- let the default case handle the following chars
1434
2.35k
                        i += 1;
1435
                    },
1436
2.36k
                    'N' => {
1437
2.36k
                        result.push(ch);
1438
2.36k
                        result.push(chars[i+1]);
1439
2.36k
                        i += 2;
1440
2.36k
                        mode = UEB_Mode::Numeric;
1441
2.36k
                        duration = UEB_Duration::Word;
1442
2.36k
                    },
1443
                    'W' | '𝐖' => {
1444
                        // this terminates a word mode if there was one
1445
711
                        result.push(ch);
1446
711
                        i += 1;
1447
711
                        if start_duration != UEB_Duration::Passage {
1448
224
                            duration = UEB_Duration::Symbol;
1449
224
                            mode = UEB_Mode::Grade2;
1450
487
                        }
1451
                    },
1452
                    _ => {
1453
16.3k
                        result.push(ch);
1454
16.3k
                        i += 1;
1455
16.3k
                        if duration == UEB_Duration::Symbol && 
!is_letter_prefix(ch)1.34k
{
1456
1.34k
                            mode = start_mode;
1457
14.9k
                        }
1458
                    }
1459
                }
1460
25.0k
                if mode == UEB_Mode::Grade2 {
1461
1.56k
                    start_g2_letter = None;        // will be set to real letter
1462
23.5k
                }
1463
1464
            },
1465
            UEB_Mode::Grade2 => {
1466
                // note: if we ended up using a '1', it only extends to the next char, which is also dealt with, so mode doesn't change
1467
6.79k
               if i_g2_start.is_none() {
1468
2.58k
                   i_g2_start = Some(i);
1469
2.58k
                   cap_word_mode = false;
1470
4.21k
               }
1471
                // debug!("Grade 2: ch={}, duration: {:?}", ch, duration);
1472
6.79k
                match ch {
1473
                    'L' => {
1474
1.44k
                        if start_g2_letter.is_none() {
1475
1.34k
                            start_g2_letter = Some(i);
1476
1.34k
                        
}97
1477
1.44k
                        let (is_alone, right_matched_chars, n_letters) = stands_alone(&chars, i);
1478
                        // GTM 1.2.1 says we only need to use G1 for single letters or sequences that are a shortform (e.g, "ab")
1479
1.44k
                        if is_alone && (
n_letters == 1400
||
is_short_form28
(
&right_matched_chars[..2*n_letters]28
)) {
1480
373
                            // debug!("  is_alone -- pushing '1'");
1481
373
                            result.push('1');
1482
373
                            mode = UEB_Mode::Grade1;
1483
1.07k
                        }
1484
                        // debug!("  pushing {:?}", right_matched_chars);
1485
3.13k
                        
right_matched_chars1.44k
.
iter1.44k
().
for_each1.44k
(|&ch| result.push(ch));
1486
1.44k
                        i += right_matched_chars.len();
1487
                    },
1488
                    'C' => {
1489
                        // Want 'C' before 'L'; Could be CC for word cap -- if so, eat it and move on
1490
                        // Note: guaranteed that there is a char after the 'C', so chars[i+1] is safe
1491
99
                        if chars[i+1] == 'C' {
1492
14
                            cap_word_mode = true;
1493
14
                            i += 1;
1494
14
                        } else {
1495
85
                            let is_greek = chars[i+1] == 'G';
1496
85
                            let (is_alone, right_matched_chars, n_letters) = stands_alone(&chars, if is_greek {
i+22
} else {
i+183
});
1497
                            // GTM 1.2.1 says we only need to use G1 for single letters or sequences that are a shortform (e.g, "ab")
1498
85
                            if is_alone && (
n_letters == 122
||
is_short_form7
(
&right_matched_chars[..2*n_letters]7
)) {
1499
16
                                // debug!("  is_alone -- pushing '1'");
1500
16
                                result.push('1');
1501
16
                                mode = UEB_Mode::Grade1;
1502
69
                            }
1503
85
                            if cap_word_mode {
1504
14
                                result.push('C');   // first 'C' if cap word
1505
71
                            }
1506
85
                            result.push('C');
1507
85
                            if is_greek {
1508
2
                                result.push('G');
1509
2
                                i += 1;
1510
83
                            }
1511
85
                            start_g2_letter = Some(i);
1512
                            // debug!("  pushing 'C' + {:?}", right_matched_chars);
1513
256
                            
right_matched_chars85
.
iter85
().
for_each85
(|&ch| result.push(ch));
1514
85
                            i += 1 + right_matched_chars.len();
1515
                        }
1516
                    },
1517
1.34k
                    '1' => {
1518
1.34k
                        result.push(ch);
1519
1.34k
                        i += 1;
1520
1.34k
                        mode = UEB_Mode::Grade1;
1521
1.34k
                        duration = UEB_Duration::Symbol;
1522
1.34k
                    },
1523
                    '𝟙' => {
1524
                        // '𝟙' should have forced G1 Word mode
1525
0
                        error!("Internal error: '𝟙' found in G2 mode: index={i} in '{raw_braille}'");
1526
0
                        i += 1;
1527
                    }
1528
582
                    'N' => {
1529
582
                        result.push(ch);
1530
582
                        result.push(chars[i+1]);
1531
582
                        i += 2;
1532
582
                        mode = UEB_Mode::Numeric;
1533
582
                        duration = UEB_Duration::Word;
1534
582
                    },
1535
                    _ => {
1536
3.32k
                        if let Some(
start505
) = start_g2_letter {
1537
505
                            if !cap_word_mode {
1538
504
                                result = handle_contractions(&chars[start..i], result);
1539
504
                            
}1
1540
505
                            cap_word_mode = false;
1541
505
                            start_g2_letter = None;     // not start of char sequence
1542
2.81k
                        }
1543
3.32k
                        result.push(ch);
1544
3.32k
                        i += 1;
1545
3.32k
                        if !is_left_intervening_char(ch) {
1546
3.29k
                            cap_word_mode = false;
1547
3.29k
                            i_g2_start = Some(i);
1548
3.29k
                        
}29
1549
1550
                    }
1551
                }
1552
6.79k
                if mode != UEB_Mode::Grade2 && 
!cap_word_mode2.31k
&&
1553
2.30k
                   let Some(
start883
) = start_g2_letter {
1554
883
                        result = handle_contractions(&chars[start..i], result);
1555
883
                        start_g2_letter = None;     // not start of char sequence
1556
5.91k
                    }
1557
            },
1558
        }
1559
1560
35.1k
        if (ch == 'W' || 
ch == '𝐖'34.0k
) &&
g1_word_indicator != Grade1WordIndicator::NotInChars1.13k
&&
1561
602
           (mode == UEB_Mode::Grade2 || 
duration == UEB_Duration::Symbol0
) {
1562
602
            g1_word_indicator = use_g1_word_mode(&chars[i..]);
1563
602
            if g1_word_indicator == Grade1WordIndicator::InWord {
1564
1
                mode = UEB_Mode::Grade1;
1565
1
                if duration == UEB_Duration::Symbol {
1566
1
                    duration = UEB_Duration::Word;     // if Passage mode, leave as is
1567
1
                    result.push('𝟙')
1568
0
                }
1569
601
            }
1570
34.5k
        }
1571
    }
1572
1.89k
    if mode == UEB_Mode::Grade2 &&
1573
289
       let Some(
start31
) = start_g2_letter {
1574
31
            result = handle_contractions(&chars[start..i], result);
1575
1.86k
        }
1576
1577
1.89k
    return result;
1578
1579
1580
1.59k
    fn use_g1_word_mode(chars: &[char]) -> Grade1WordIndicator {
1581
        // debug!("use_g1_word_mode: chars='{:?}'", chars);
1582
19.5k
        for &ch in 
chars1.59k
{
1583
19.5k
            if ch == 'W' || 
ch == '𝐖'18.9k
{
1584
601
                return Grade1WordIndicator::NotInWord;       // reached a word boundary
1585
18.9k
            }
1586
18.9k
            if ch == '𝟙' {
1587
2
                return Grade1WordIndicator::InWord;        // need word mode in this "word"
1588
18.9k
            }
1589
        }
1590
990
        return Grade1WordIndicator::NotInChars;               // 
1591
1.59k
    }
1592
1.89k
}
1593
1594
/// Returns a tuple:
1595
///   true if the ith char "stands alone" (UEB 2.6)
1596
///   the chars on the right that are part of the standing alone sequence
1597
///   the number of letters in that sequence
1598
/// This basically means a letter sequence surrounded by white space with some potentially intervening chars
1599
/// The intervening chars can be typeform/cap indicators, along with various forms of punctuation
1600
/// The ith char should be an "L"
1601
/// This assumes that there is whitespace before and after the character string
1602
1.52k
fn stands_alone(chars: &[char], i: usize) -> (bool, &[char], usize) {
1603
    // scan backward and check the conditions for "standing-alone"
1604
    // we scan forward and check the conditions for "standing-alone"
1605
1.52k
    assert_eq!(chars[i], 'L', "'stands_alone' starts with non 'L'");
1606
    // debug!("stands_alone: i={}, chars: {:?}", i, chars);
1607
1.52k
    if !left_side_stands_alone(&chars[0..i]) {
1608
977
        return (false, &chars[i..i+2], 0);
1609
552
    }
1610
1611
552
    let (mut is_alone, n_letters, n_right_matched) = right_side_stands_alone(&chars[i+2..]);
1612
    // debug!("left is alone, right is alone: {}, : n_letters={}, n_right_matched={}", is_alone, n_letters, n_right_matched);
1613
1614
552
    if is_alone && 
n_letters == 1425
{
1615
390
        let ch = chars[i+1];
1616
390
        if ch=='⠁' || 
ch=='⠊'389
||
ch=='⠕'387
{ // a, i, o
1617
3
            is_alone = false;
1618
387
        }
1619
162
    }
1620
552
    return (is_alone, &chars[i..i+2+n_right_matched], n_letters);
1621
1622
    /// chars before 'L'
1623
1.52k
    fn left_side_stands_alone(chars: &[char]) -> bool {
1624
        // scan backwards to skip letters and intervening chars
1625
        // once we hit an intervening char, only intervening chars are allowed if standing alone
1626
1.52k
        let mut intervening_chars_mode = false; // true when we are on the final stretch
1627
1.52k
        let mut i = chars.len();
1628
1.86k
        while i > 0 {
1629
1.38k
            i -= 1;
1630
1.38k
            let ch = chars[i];
1631
1.38k
            let prev_ch = if i > 0 {
chars[i-1]1.34k
} else {
' '45
}; // ' ' is a char not in input
1632
            // debug!("  left alone: prev/ch {}/{}", prev_ch, ch);
1633
1.38k
            if (!intervening_chars_mode && 
prev_ch == 'L'1.10k
) ||
1634
1.30k
               (prev_ch == 'o' || 
prev_ch == 'b'1.21k
) {
1635
174
                intervening_chars_mode = true;
1636
174
                i -= 1;       // ignore 'Lx' and also ignore 'ox'
1637
1.21k
            } else if is_left_intervening_char(ch) {
1638
161
                intervening_chars_mode = true;
1639
161
            } else {
1640
1.05k
                return "W𝐖-—―".contains(ch);
1641
            }
1642
        }
1643
1644
475
        return true;
1645
1.52k
    }
1646
1647
    // chars after character we are testing
1648
552
    fn right_side_stands_alone(chars: &[char]) -> (bool, usize, usize) {
1649
        // see RUEB 2.6.3
1650
355
        fn is_right_intervening_char(ch: char) -> bool {
1651
355
            
matches!342
(ch, 'B' | 'I' | '𝔹' | 'S' | 'T' | 'D' | 'C' | '𝐶' | 's' | 'w' | 'e')
1652
355
        }
1653
        // scan forward to skip letters and intervening chars
1654
        // once we hit an intervening char, only intervening chars are allowed if standing alone ('c' and 'b' are part of them)
1655
552
        let mut intervening_chars_mode = false; // true when we are on the final stretch
1656
552
        let mut i = 0;
1657
552
        let mut n_letters = 1;      // we have skipped the first letter
1658
725
        while i < chars.len() {
1659
515
            let ch = chars[i];
1660
            // debug!("  right alone: ch/next {}/{}", ch, if i+1<chars.len() {chars[i+1]} else {' '});
1661
515
            if !intervening_chars_mode && 
ch == 'L'502
{
1662
140
                n_letters += 1;
1663
140
                i += 1;       // ignore 'Lx' and also ignore 'ox'
1664
375
            } else if ch == 'c' || 
ch == 'b'355
{
1665
20
                i += 1;       // ignore 'Lx' and also ignore 'ox'
1666
355
            } else if is_right_intervening_char(ch) {  
1667
13
                intervening_chars_mode = true;
1668
13
            } else {
1669
342
                return if "W𝐖-—―".contains(ch) {
(true, n_letters, i)215
} else {
(false, n_letters, i)127
};
1670
            }
1671
173
            i += 1;
1672
        }
1673
1674
210
        return (true, n_letters, chars.len());
1675
552
    }
1676
1.52k
}
1677
1678
1679
/// Return a modified result if chars can be contracted.
1680
/// Otherwise, the original string is returned
1681
1.41k
fn handle_contractions(chars: &[char], mut result: String) -> String {
1682
    struct Replacement {
1683
        pattern: String,
1684
        replacement: &'static str
1685
    }
1686
1687
    const ASCII_TO_UNICODE: &[char] = &[
1688
        '⠀', '⠮', '⠐', '⠼', '⠫', '⠩', '⠯', '⠄', '⠷', '⠾', '⠡', '⠬', '⠠', '⠤', '⠨', '⠌',
1689
        '⠴', '⠂', '⠆', '⠒', '⠲', '⠢', '⠖', '⠶', '⠦', '⠔', '⠱', '⠰', '⠣', '⠿', '⠜', '⠹',
1690
        '⠈', '⠁', '⠃', '⠉', '⠙', '⠑', '⠋', '⠛', '⠓', '⠊', '⠚', '⠅', '⠇', '⠍', '⠝', '⠕',
1691
        '⠏', '⠟', '⠗', '⠎', '⠞', '⠥', '⠧', '⠺', '⠭', '⠽', '⠵', '⠪', '⠳', '⠻', '⠘', '⠸',
1692
    ];
1693
1694
36
    fn to_unicode_braille(ascii: &str) -> String {
1695
36
        let mut unicode = String::with_capacity(4*ascii.len());   // 'L' + 3 bytes for braille char
1696
82
        for ch in 
ascii36
.
as_bytes36
() {
1697
82
            unicode.push('L');
1698
82
            unicode.push(ASCII_TO_UNICODE[(ch.to_ascii_uppercase() - 32) as usize])
1699
        }
1700
36
        return unicode;
1701
36
    }
1702
1703
    // It would be much better from an extensibility point of view to read the table in from a file
1704
2
    static CONTRACTIONS: LazyLock<Vec<Replacement>> = LazyLock::new(|| { vec![
1705
            // 10.3: Strong contractions
1706
2
            Replacement{ pattern: to_unicode_braille("and"), replacement: "L⠯"},
1707
2
            Replacement{ pattern: to_unicode_braille("for"), replacement: "L⠿"},
1708
2
            Replacement{ pattern: to_unicode_braille("of"), replacement: "L⠷"},
1709
2
            Replacement{ pattern: to_unicode_braille("the"), replacement: "L⠮"},
1710
2
            Replacement{ pattern: to_unicode_braille("with"), replacement: "L⠾"},
1711
            
1712
            // 10.8: final-letter group signs (this need to precede 'en' and any other shorter contraction)
1713
2
            Replacement{ pattern: "(?P<s>L.)L⠍L⠑L⠝L⠞".to_string(), replacement: "${s}L⠰L⠞" }, // ment
1714
2
            Replacement{ pattern: "(?P<s>L.)L⠞L⠊L⠕L⠝".to_string(), replacement: "${s}L⠰L⠝" } ,// tion
1715
1716
            // 10.4: Strong group signs
1717
2
            Replacement{ pattern: to_unicode_braille("ch"), replacement: "L⠡"},
1718
2
            Replacement{ pattern: to_unicode_braille("gh"), replacement: "L⠣"},
1719
2
            Replacement{ pattern: to_unicode_braille("sh"), replacement: "L⠩"},
1720
2
            Replacement{ pattern: to_unicode_braille("th"), replacement: "L⠹"},
1721
2
            Replacement{ pattern: to_unicode_braille("wh"), replacement: "L⠱"},
1722
2
            Replacement{ pattern: to_unicode_braille("ed"), replacement: "L⠫"},
1723
2
            Replacement{ pattern: to_unicode_braille("er"), replacement: "L⠻"},
1724
2
            Replacement{ pattern: to_unicode_braille("ou"), replacement: "L⠳"},
1725
2
            Replacement{ pattern: to_unicode_braille("ow"), replacement: "L⠪"},
1726
2
            Replacement{ pattern: to_unicode_braille("st"), replacement: "L⠌"},
1727
2
            Replacement{ pattern: "(?P<s>L.)L⠊L⠝L⠛".to_string(), replacement: "${s}L⠬" },  // 'ing', not at start
1728
2
            Replacement{ pattern: to_unicode_braille("ar"), replacement: "L⠜"},
1729
1730
            // 10.6.5: Lower group signs preceded and followed by letters
1731
            // FIX: don't match if after/before a cap letter -- can't use negative pattern (?!...) in regex package
1732
            // Note: removed cc because "arccos" shouldn't be contracted (10.11.1), but there is no way to know about compound words
1733
            // Add it back after implementing a lookup dictionary of exceptions
1734
2
            Replacement{ pattern: "(?P<s>L.)L⠑L⠁(?P<e>L.)".to_string(), replacement: "${s}L⠂${e}" },  // ea
1735
2
            Replacement{ pattern: "(?P<s>L.)L⠃L⠃(?P<e>L.)".to_string(), replacement: "${s}L⠆${e}" },  // bb
1736
            // Replacement{ pattern: "(?P<s>L.)L⠉L⠉(?P<e>L.)".to_string(), replacement: "${s}L⠒${e}" },  // cc
1737
2
            Replacement{ pattern: "(?P<s>L.)L⠋L⠋(?P<e>L.)".to_string(), replacement: "${s}L⠖${e}" },  // ff
1738
2
            Replacement{ pattern: "(?P<s>L.)L⠛L⠛(?P<e>L.)".to_string(), replacement: "${s}L⠶${e}" },  // gg
1739
1740
            // 10.6.8: Lower group signs ("in" also 10.5.4 lower word signs)
1741
            // FIX: these need restrictions about only applying when upper dots are present
1742
2
            Replacement{ pattern: to_unicode_braille("en"), replacement: "⠢"},
1743
2
            Replacement{ pattern: to_unicode_braille("in"), replacement: "⠔"},
1744
           
1745
        ]
1746
2
    });
1747
1748
2
    static CONTRACTION_PATTERNS: LazyLock<RegexSet> = LazyLock::new(|| init_patterns(&CONTRACTIONS));
1749
1
    static CONTRACTION_REGEX: LazyLock<Vec<Regex>> = LazyLock::new(|| init_regex(&CONTRACTIONS));
1750
1751
1.41k
    let mut chars_as_str = chars.iter().collect::<String>();
1752
    // debug!("  handle_contractions: examine '{}'", &chars_as_str);
1753
1.41k
    let matches = CONTRACTION_PATTERNS.matches(&chars_as_str);
1754
1.41k
    for 
i35
in matches.iter() {
1755
35
        let element = &CONTRACTIONS[i];
1756
35
        // debug!("  replacing '{}' with '{}' in '{}'", element.pattern, element.replacement, &chars_as_str);
1757
35
        result.truncate(result.len() - chars_as_str.len());
1758
35
        chars_as_str = CONTRACTION_REGEX[i].replace_all(&chars_as_str, element.replacement).to_string();
1759
35
        result.push_str(&chars_as_str);
1760
35
        // debug!("  result after replace '{}'", result);
1761
35
    }
1762
1.41k
    return result;
1763
1764
1765
1766
2
    fn init_patterns(contractions: &[Replacement]) -> RegexSet {
1767
2
        let mut vec: Vec<&str> = Vec::with_capacity(contractions.len());
1768
50
        for contraction in 
contractions2
{
1769
50
            vec.push(&contraction.pattern);
1770
50
        }
1771
2
        return RegexSet::new(&vec).unwrap();
1772
2
    }
1773
1774
1
    fn init_regex(contractions: &[Replacement]) -> Vec<Regex> {
1775
1
        let mut vec = Vec::with_capacity(contractions.len());
1776
25
        for contraction in 
contractions1
{
1777
25
            vec.push(Regex::new(&contraction.pattern).unwrap());
1778
25
        }
1779
1
        return vec;
1780
1
    }
1781
1.41k
}
1782
1783
1784
1785
1786
static VIETNAM_INDICATOR_REPLACEMENTS: phf::Map<&str, &str> = phf_map! {
1787
    "S" => "XXX",    // sans-serif -- from prefs
1788
    "B" => "⠘",     // bold
1789
    "𝔹" => "XXX",     // blackboard -- from prefs
1790
    "T" => "⠈",     // script
1791
    "I" => "⠨",     // italic
1792
    "R" => "",      // roman
1793
    // "E" => "⠰",     // English
1794
    "1" => "⠠",     // Grade 1 symbol
1795
    "L" => "",     // Letter left in to assist in locating letters
1796
    "D" => "XXX",     // German (Deutsche) -- from prefs
1797
    "G" => "⠰",     // Greek
1798
    "V" => "XXX",    // Greek Variants
1799
    // "H" => "⠠⠠",    // Hebrew
1800
    // "U" => "⠈⠈",    // Russian
1801
    "C" => "⠨",      // capital
1802
    "𝑐" => "",       // second or latter braille cell of a capital letter
1803
    "𝐶" => "⠨",      // capital that never should get word indicator (from chemical element)
1804
    "N" => "⠼",     // number indicator
1805
    "t" => "⠱",     // shape terminator
1806
    "W" => "⠀",     // whitespace"
1807
    "𝐖"=> "⠀",     // whitespace
1808
    "s" => "⠆",     // typeface single char indicator
1809
    "w" => "",     // typeface word indicator
1810
    "e" => "",     // typeface & capital terminator 
1811
    "o" => "",       // flag that what follows is an open indicator (used for standing alone rule)
1812
    "c" => "",     // flag that what follows is an close indicator (used for standing alone rule)
1813
    "b" => "",       // flag that what follows is an open or close indicator (used for standing alone rule)
1814
    "," => "⠂",     // comma
1815
    "." => "⠲",     // period
1816
    "-" => "-",     // hyphen
1817
    "—" => "⠠⠤",   // normal dash (2014) -- assume all normal dashes are unified here [RUEB appendix 3]
1818
    "―" => "⠐⠠⠤",  // long dash (2015) -- assume all long dashes are unified here [RUEB appendix 3]
1819
    "#" => "",      // signals end of script
1820
    "!" => "",      // Hack used to prevent some regular expression matches
1821
};
1822
1823
112
fn vietnam_cleanup(pref_manager: Ref<PreferenceManager>, raw_braille: String) -> String {
1824
    // Deal with Vietnamese "rhymes" -- moving accents around
1825
    // See "Vietnamese Uncontracted Braille Update in MathCAT" or maybe https://icanreadvietnamese.com/blog/14-rule-of-tone-mark-placement
1826
    // Note: I don't know how to write (for example) I_E_RULE so that it excludes "qu" and "gi", so I use two rules
1827
    // The first rule rewrites the patterns with "qu" and "gi" to add "!" to prevent a match of the second rule -- "!" is dropped later
1828
1
    static QU_GI_RULE_EXCEPTION: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(L⠟L⠥|L⠛L⠊)").unwrap());
1829
1
    static IUOY_E_RULE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"L(⠊|⠥|⠕|⠽)(L[⠔⠰⠢⠤⠠])L(⠑|⠣)").unwrap()); // ie, ue, oe, and ye rule
1830
1
    static UO_A_RULE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"L(⠥|⠕)(L[⠔⠰⠢⠤⠠])L(⠁|⠡|⠜)").unwrap()); // ua, oa rule
1831
1
    static UU_O_RULE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"L(⠥|⠳)(L[⠔⠰⠢⠤⠠])L(⠪|⠹)").unwrap()); // uo, ưo rule
1832
1
    static UYE_RULE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"L⠥L([⠔⠰⠢⠤⠠])L⠽L⠣").unwrap()); // uo, ưo rule
1833
1
    static UY_RULE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"L⠥L([⠔⠰⠢⠤⠠])L⠽").unwrap()); // uo, ưo rule
1834
1
    static REPLACE_INDICATORS: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"([1𝟙SB𝔹TIREDGVHP𝐶𝑐CLMNW𝐖swe,.-—―#ocb!])").unwrap());
1835
    // debug!("vietnam_cleanup: start={}", raw_braille);
1836
112
    let result = typeface_to_word_mode(&raw_braille);
1837
112
    let result = capitals_to_word_mode(&result);
1838
1839
112
    let result = result.replace("tW", "W");
1840
112
    let result = result.replace("CG", "⠸");    // capital Greek letters are problematic in Vietnam braille
1841
112
    let result = result.replace("CC", "⠸");    // capital word more is the same as capital Greek letters
1842
    // debug!("   after typeface/caps={}", &result);
1843
1844
    // deal with "rhymes"
1845
112
    let result = QU_GI_RULE_EXCEPTION.replace_all(&result, "${1}!");
1846
    // debug!("          after except={}", &result);
1847
112
    let result = IUOY_E_RULE.replace_all(&result, "${2}L${1}L${3}");
1848
    // debug!("          after IUOY_E={}", &result);
1849
112
    let result = UO_A_RULE.replace_all(&result, "${2}L${1}L${3}");
1850
    // debug!("          after   UO_A={}", &result);
1851
112
    let result = UU_O_RULE.replace_all(&result, "${2}L${1}L${3}");
1852
    // debug!("          after   UO_O={}", &result);
1853
112
    let result = UYE_RULE.replace_all(&result, "${1}L⠥L⠽L⠣");  // longer match first
1854
    // debug!("          after    UYE={}", &result);
1855
112
    let result = UY_RULE.replace_all(&result, "${1}L⠥L⠽");
1856
    // debug!("          after     UY={}", &result);
1857
1858
    // these typeforms need to get pulled from user-prefs as they are transcriber-defined
1859
112
    let double_struck = pref_manager.pref_to_string("Vietnam_DoubleStruck");
1860
112
    let sans_serif = pref_manager.pref_to_string("Vietnam_SansSerif");
1861
112
    let fraktur = pref_manager.pref_to_string("Vietnam_Fraktur");
1862
112
    let greek_variant = pref_manager.pref_to_string("Vietnam_GreekVariant");
1863
1864
    // This reuses the code just for getting rid of unnecessary "L"s and "N"s
1865
112
    let result = remove_unneeded_mode_changes(&result, UEB_Mode::Grade1, UEB_Duration::Passage);
1866
1867
1868
1.23k
    let 
result112
=
REPLACE_INDICATORS112
.
replace_all112
(
&result112
, |cap: &Captures| {
1869
1.23k
        let matched_char = &cap[0];
1870
1.23k
        match matched_char {
1871
1.23k
            "𝔹" => 
&double_struck0
,
1872
1.23k
            "S" => 
&sans_serif0
,
1873
1.23k
            "D" => 
&fraktur0
,
1874
1.23k
            "V" => 
&greek_variant0
,
1875
1.23k
            _ => match VIETNAM_INDICATOR_REPLACEMENTS.get(matched_char) {
1876
0
                None => {error!("REPLACE_INDICATORS and VIETNAM_INDICATOR_REPLACEMENTS are not in sync: missing '{matched_char}'"); ""},
1877
1.23k
                Some(&ch) => ch,
1878
            },
1879
        }
1880
1.23k
    });
1881
1882
    // Remove unicode blanks at start and end -- do this after the substitutions because ',' introduces spaces
1883
    // let result = result.trim_start_matches('⠀').trim_end_matches('⠀');
1884
112
    let result = COLLAPSE_SPACES.replace_all(&result, "⠀");
1885
   
1886
112
    return result.to_string();
1887
112
}
1888
1889
1890
static CMU_INDICATOR_REPLACEMENTS: phf::Map<&str, &str> = phf_map! {
1891
    // "S" => "XXX",    // sans-serif -- from prefs
1892
    "B" => "⠔",     // bold
1893
    "𝔹" => "⠬",     // blackboard -- from prefs
1894
    // "T" => "⠈",     // script
1895
    "I" => "⠔",     // italic -- same as bold
1896
    // "R" => "",      // roman
1897
    // "E" => "⠰",     // English
1898
    "1" => "⠐",     // Grade 1 symbol -- used here for a-j after number
1899
    "L" => "",     // Letter left in to assist in locating letters
1900
    "D" => "⠠",     // German (Gothic)
1901
    "G" => "⠈",     // Greek
1902
    "V" => "⠈⠬",    // Greek Variants
1903
    // "H" => "⠠⠠",    // Hebrew
1904
    // "U" => "⠈⠈",    // Russian
1905
    "C" => "⠨",      // capital
1906
    "𝐶" => "⠨",      // capital that never should get word indicator (from chemical element)
1907
    "N" => "⠼",     // number indicator
1908
    "𝑁" => "",      // continue number
1909
    // "t" => "⠱",     // shape terminator
1910
    "W" => "⠀",     // whitespace"
1911
    "𝐖"=> "⠀",     // whitespace
1912
    // "𝘄" => "⠀",    // add whitespace if char to the left has dots 1, 2, or 3 -- special rule handled separately, so commented out
1913
    "s" => "",     // typeface single char indicator
1914
    // "w" => "⠂",     // typeface word indicator
1915
    // "e" => "⠄",     // typeface & capital terminator 
1916
    // "o" => "",       // flag that what follows is an open indicator (used for standing alone rule)
1917
    // "c" => "",       // flag that what follows is an close indicator (used for standing alone rule)
1918
    // "b" => "",       // flag that what follows is an open or close indicator (used for standing alone rule)
1919
    "," => "⠂",     // comma
1920
    "." => "⠄",     // period
1921
    "-" => "⠤",     // hyphen
1922
    "—" => "⠤⠤",   // normal dash (2014) -- assume all normal dashes are unified here [RUEB appendix 3]
1923
    // "―" => "⠐⠤⠤",  // long dash (2015) -- assume all long dashes are unified here [RUEB appendix 3]
1924
    "#" => "⠼",      // signals to end/restart of numeric mode (mixed fractions)
1925
};
1926
1927
1928
372
fn cmu_cleanup(_pref_manager: Ref<PreferenceManager>, raw_braille: String) -> String {
1929
2
    static ADD_WHITE_SPACE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"𝘄(.)|𝘄$").unwrap());
1930
1931
    // debug!("cmu_cleanup: start={}", raw_braille);
1932
    // let result = typeface_to_word_mode(&raw_braille);
1933
1934
    // let result = result.replace("tW", "W");
1935
372
    let result = raw_braille.replace("CG", "⠘")
1936
372
                                .replace("𝔹C", "⠩")
1937
372
                                .replace("DC", "⠰");
1938
    // let result = result.replace("CC", "⠸");
1939
1940
    // these typeforms need to get pulled from user-prefs as they are transcriber-defined
1941
    // let double_struck = pref_manager.pref_to_string("CMU_DoubleStruck");
1942
    // let sans_serif = pref_manager.pref_to_string("CMU_SansSerif");
1943
    // let fraktur = pref_manager.pref_to_string("CMU_Fraktur");
1944
1945
    // debug!("Before remove mode changes: '{}'", &result);
1946
    // This reuses the code just for getting rid of unnecessary "L"s and "N"s
1947
372
    let result = remove_unneeded_mode_changes(&result, UEB_Mode::Grade1, UEB_Duration::Passage);
1948
372
    let result = result.replace("𝑁N", "");
1949
    // debug!(" After remove mode changes: '{}'", &result);
1950
1951
2.58k
    let 
result372
=
REPLACE_INDICATORS372
.
replace_all372
(
&result372
, |cap: &Captures| {
1952
2.58k
        match CMU_INDICATOR_REPLACEMENTS.get(&cap[0]) {
1953
0
            None => {error!("REPLACE_INDICATORS and CMU_INDICATOR_REPLACEMENTS are not in sync"); ""},
1954
2.58k
            Some(&ch) => ch,
1955
        }
1956
2.58k
    });
1957
372
    let result = ADD_WHITE_SPACE.replace_all(&result, |cap: &Captures| 
{12
1958
12
        if cap.get(1).is_none() {
1959
2
            return "⠀".to_string();
1960
        } else {
1961
            // debug!("ADD_WHITE_SPACE match='{}', has left dots = {}", &cap[1], has_left_dots(cap[1].chars().next().unwrap()));
1962
10
            let mut next_chars = cap[1].chars();
1963
10
            let next_char = next_chars.next().unwrap();
1964
10
            assert!(next_chars.next().is_none());
1965
10
            return (if has_left_dots(next_char) {
"⠀"9
} else {
""1
}).to_string() + &cap[1];
1966
        }
1967
12
    });
1968
    
1969
    // Remove unicode blanks at start and end -- do this after the substitutions because ',' introduces spaces
1970
372
    let result = COLLAPSE_SPACES.replace_all(&result, "⠀");
1971
372
    let result = result.trim_start_matches('⠀');            // don't trip end (e.g., see once::vector_11_2_5)
1972
372
    return result.to_string();
1973
1974
10
    fn has_left_dots(ch: char) -> bool {
1975
        // Unicode braille is set up so dot 1 is 2^0, dot 2 is 2^1, etc
1976
10
        return ( (ch as u32 - 0x2800) >> 4 ) > 0;
1977
10
    }
1978
372
}
1979
1980
1981
1982
static SWEDISH_INDICATOR_REPLACEMENTS: phf::Map<&str, &str> = phf_map! {
1983
    // FIX: this needs cleaning up -- not all of these are used
1984
    "S" => "XXX",    // sans-serif -- from prefs
1985
    "B" => "⠨",     // bold
1986
    "𝔹" => "XXX",     // blackboard -- from prefs
1987
    "T" => "⠈",     // script
1988
    "I" => "⠨",     // italic
1989
    "R" => "",      // roman
1990
    "1" => "⠱",     // Grade 1 symbol (used for number followed by a letter)
1991
    "L" => "",     // Letter left in to assist in locating letters
1992
    "D" => "XXX",     // German (Deutsche) -- from prefs
1993
    "G" => "⠰",     // Greek
1994
    "V" => "XXX",    // Greek Variants
1995
    // "H" => "⠠⠠",    // Hebrew
1996
    // "U" => "⠈⠈",    // Russian
1997
    "C" => "⠠",      // capital
1998
    "𝑐" => "",       // second or latter braille cell of a capital letter
1999
    "𝐶" => "⠠",      // capital that never should get word indicator (from chemical element)
2000
    "N" => "⠼",     // number indicator
2001
    "t" => "⠱",     // shape terminator
2002
    "W" => "⠀",     // whitespace"
2003
    "𝐖"=> "⠀",     // whitespace
2004
    "w" => "⠀",     // whitespace after function name
2005
    "s" => "",     // typeface single char indicator
2006
    "e" => "",     // typeface & capital terminator 
2007
    "E" => "⠱",     // empty base -- see index of radical
2008
    "o" => "",       // flag that what follows is an open indicator (used for standing alone rule)
2009
    "c" => "",     // flag that what follows is an close indicator (used for standing alone rule)
2010
    "b" => "",       // flag that what follows is an open or close indicator (used for standing alone rule)
2011
    "," => "⠂",     // comma
2012
    "." => "⠲",     // period
2013
    "-" => "-",     // hyphen
2014
    "—" => "⠠⠤",   // normal dash (2014) -- assume all normal dashes are unified here [RUEB appendix 3]
2015
    "―" => "⠐⠠⠤",  // long dash (2015) -- assume all long dashes are unified here [RUEB appendix 3]
2016
    "#" => "",      // signals end of script
2017
2018
};
2019
2020
2021
static FINNISH_INDICATOR_REPLACEMENTS: phf::Map<&str, &str> = phf_map! {
2022
    // FIX: this needs cleaning up -- not all of these are used
2023
    "S" => "XXX",    // sans-serif -- from prefs
2024
    "B" => "⠨",     // bold
2025
    "𝔹" => "XXX",     // blackboard -- from prefs
2026
    "T" => "⠈",     // script
2027
    "I" => "⠨",     // italic
2028
    "R" => "",      // roman
2029
    "E" => "⠰",     // English
2030
    "1" => "⠀",     // Grade 1 symbol (used for number followed by a letter)
2031
    "L" => "",     // Letter left in to assist in locating letters
2032
    "D" => "XXX",     // German (Deutsche) -- from prefs
2033
    "G" => "⠨",     // Greek
2034
    "V" => "XXX",    // Greek Variants
2035
    // "H" => "⠠⠠",    // Hebrew
2036
    // "U" => "⠈⠈",    // Russian
2037
    "C" => "⠠",      // capital
2038
    "𝑐" => "",       // second or latter braille cell of a capital letter
2039
    "𝐶" => "⠠",      // capital that never should get whitespace in front (from chemical element)
2040
    "N" => "⠼",     // number indicator
2041
    "n" => "⠼",     // number indicator for drop numbers (special case with close parens)
2042
    "t" => "⠱",     // shape terminator
2043
    "W" => "⠀",     // whitespace"
2044
    "𝐖"=> "⠀",     // whitespace
2045
    "s" => "⠆",     // typeface single char indicator
2046
    "w" => "",     // typeface word indicator
2047
    "e" => "",     // typeface & capital terminator 
2048
    "," => "⠂",     // comma
2049
    "." => "⠲",     // period
2050
    "-" => "-",     // hyphen
2051
    "—" => "⠠⠤",   // normal dash (2014) -- assume all normal dashes are unified here [RUEB appendix 3]
2052
    "―" => "⠐⠠⠤",  // long dash (2015) -- assume all long dashes are unified here [RUEB appendix 3]
2053
    "(" => "⠦",     // Not really needed, but done for consistency with ")"
2054
    ")" => "⠴",     // Needed for rules with drop numbers to avoid mistaking for dropped 0
2055
    "↑" => "⠬",     // superscript
2056
    "↓" => "⠡",     // subscript
2057
    "#" => "",      // signals end of script
2058
    "Z" => "⠐",     // signals end of index of root, integrand/lim from function ("zone change")
2059
2060
};
2061
2062
0
fn finnish_cleanup(pref_manager: Ref<PreferenceManager>, raw_braille: String) -> String {
2063
0
    static REPLACE_INDICATORS: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"([SB𝔹TIREDGVHUP𝐏C𝐶LlMmb↑↓Nn𝑁WwZ,()])").unwrap());
2064
    // Numbers need to end with a space, but sometimes there is one there for other reasons
2065
0
    static DROP_NUMBER_SEPARATOR: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(n.)\)").unwrap());
2066
0
    static NUMBER_MATCH: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"((N.)+[^WN𝐶#↑↓Z])").unwrap());
2067
2068
    // debug!("finnish_cleanup: start={}", raw_braille);
2069
0
    let result = DROP_NUMBER_SEPARATOR.replace_all(&raw_braille, |cap: &Captures| {
2070
        // match includes the char after the number -- insert the whitespace before it
2071
        // debug!("DROP_NUMBER_SEPARATOR match='{}'", &cap[1]);
2072
0
        return cap[1].to_string() + "𝐶)";       // hack to use "𝐶" instead of dot 6 directly, but works for NUMBER_MATCH
2073
0
    });
2074
0
    let result = result.replace('n', "N");  // avoids having to modify remove_unneeded_mode_changes()
2075
0
    let result = NUMBER_MATCH.replace_all(&result, |cap: &Captures| {
2076
        // match includes the char after the number -- insert the whitespace before it
2077
        // debug!("NUMBER_MATCH match='{}'", &cap[1]);
2078
0
        let mut chars = cap[0].chars();
2079
0
        let last_char = chars.next_back().unwrap(); // unwrap safe since several chars were matched
2080
0
        return chars.as_str().to_string() + "W" + &last_char.to_string();
2081
0
    });
2082
2083
    // FIX: need to implement this -- this is just a copy of the Vietnam code
2084
0
    let result = result.replace("CG", "⠘")
2085
0
                                    .replace("𝔹C", "⠩")
2086
0
                                    .replace("DC", "⠰");
2087
2088
    // debug!("   after typeface/caps={}", &result);
2089
2090
    // these typeforms need to get pulled from user-prefs as they are transcriber-defined
2091
0
    let double_struck = pref_manager.pref_to_string("Vietnam_DoubleStruck");
2092
0
    let sans_serif = pref_manager.pref_to_string("Vietnam_SansSerif");
2093
0
    let fraktur = pref_manager.pref_to_string("Vietnam_Fraktur");
2094
0
    let greek_variant = pref_manager.pref_to_string("Vietnam_GreekVariant");
2095
2096
    // This reuses the code just for getting rid of unnecessary "L"s and "N"s
2097
0
    let result = remove_unneeded_mode_changes(&result, UEB_Mode::Grade1, UEB_Duration::Passage);
2098
    // debug!("   remove_unneeded_mode_changes={}", &result);
2099
2100
2101
0
    let result = REPLACE_INDICATORS.replace_all(&result, |cap: &Captures| {
2102
0
        let matched_char = &cap[0];
2103
0
        match matched_char {
2104
0
            "𝔹" => &double_struck,
2105
0
            "S" => &sans_serif,
2106
0
            "D" => &fraktur,
2107
0
            "V" => &greek_variant,
2108
0
            _ => match FINNISH_INDICATOR_REPLACEMENTS.get(matched_char) {
2109
0
                None => {error!("REPLACE_INDICATORS and SWEDISH_INDICATOR_REPLACEMENTS are not in sync: missing '{matched_char}'"); ""},
2110
0
                Some(&ch) => ch,
2111
            },
2112
        }
2113
0
    });
2114
2115
    // Remove unicode blanks at start and end -- do this after the substitutions because ',' introduces spaces
2116
    // let result = result.trim_start_matches('⠀').trim_end_matches('⠀');
2117
0
    let result = COLLAPSE_SPACES.replace_all(&result, "⠀");
2118
   
2119
0
    return result.to_string();
2120
0
}
2121
2122
2123
0
fn swedish_cleanup(pref_manager: Ref<PreferenceManager>, raw_braille: String) -> String {
2124
    // FIX: need to implement this -- this is just a copy of the Vietnam code
2125
    // Empty bases are ok if they follow whitespace
2126
0
    static EMPTY_BASE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(^|[W𝐖w])E").unwrap());
2127
    // debug!("swedish_cleanup: start={}", raw_braille);
2128
0
    let result = typeface_to_word_mode(&raw_braille);
2129
0
    let result = capitals_to_word_mode(&result);
2130
2131
0
    let result = result.replace("CG", "⠘")
2132
0
                                    .replace("𝔹C", "⠩")
2133
0
                                    .replace("DC", "⠰");
2134
2135
    // debug!("   after typeface/caps={}", &result);
2136
2137
    // these typeforms need to get pulled from user-prefs as they are transcriber-defined
2138
0
    let double_struck = pref_manager.pref_to_string("Vietnam_DoubleStruck");
2139
0
    let sans_serif = pref_manager.pref_to_string("Vietnam_SansSerif");
2140
0
    let fraktur = pref_manager.pref_to_string("Vietnam_Fraktur");
2141
0
    let greek_variant = pref_manager.pref_to_string("Vietnam_GreekVariant");
2142
2143
    // This reuses the code just for getting rid of unnecessary "L"s and "N"s
2144
0
    let result = remove_unneeded_mode_changes(&result, UEB_Mode::Grade1, UEB_Duration::Passage);
2145
    // debug!("   after removing mode changes={}", &result);
2146
2147
2148
0
    let result = EMPTY_BASE.replace_all(&result, "$1");
2149
0
    let result = REPLACE_INDICATORS.replace_all(&result, |cap: &Captures| {
2150
0
        let matched_char = &cap[0];
2151
0
        match matched_char {
2152
0
            "𝔹" => &double_struck,
2153
0
            "S" => &sans_serif,
2154
0
            "D" => &fraktur,
2155
0
            "V" => &greek_variant,
2156
0
            _ => match SWEDISH_INDICATOR_REPLACEMENTS.get(matched_char) {
2157
0
                None => {error!("REPLACE_INDICATORS and SWEDISH_INDICATOR_REPLACEMENTS are not in sync: missing '{matched_char}'"); ""},
2158
0
                Some(&ch) => ch,
2159
            },
2160
        }
2161
0
    });
2162
2163
    // Remove unicode blanks at start and end -- do this after the substitutions because ',' introduces spaces
2164
    // let result = result.trim_start_matches('⠀').trim_end_matches('⠀');
2165
0
    let result = COLLAPSE_SPACES.replace_all(&result, "⠀");
2166
   
2167
0
    return result.to_string();
2168
0
}
2169
2170
#[allow(non_snake_case)]
2171
50
fn LaTeX_cleanup(_pref_manager: Ref<PreferenceManager>, raw_braille: String) -> String {
2172
1
    static REMOVE_SPACE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r" ([\^_,;)\]}])").unwrap()); // '^', '_', ',', ';', ')', ']', '}'
2173
1
    static COLLAPSE_SPACES: LazyLock<Regex> = LazyLock::new(|| Regex::new(r" +").unwrap());
2174
    // debug!("LaTeX_cleanup: start={}", raw_braille);
2175
50
    let result = raw_braille.replace('𝐖', " ");
2176
    // let result = COLLAPSE_SPACES.replace_all(&raw_braille, "⠀");
2177
50
    let result = COLLAPSE_SPACES.replace_all(&result, " ");
2178
    // debug!("After collapse: {}", &result);
2179
50
    let result = REMOVE_SPACE.replace_all(&result, "$1");
2180
    // debug!("After remove: {}", &result);
2181
    // let result = result.trim_matches('⠀');
2182
50
    let result = result.trim_matches(' ');
2183
   
2184
50
    return result.to_string();
2185
50
}
2186
2187
#[allow(non_snake_case)]
2188
41
fn ASCIIMath_cleanup(_pref_manager: Ref<PreferenceManager>, raw_braille: String) -> String {
2189
1
    static REMOVE_SPACE_BEFORE_OP: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"([\w\d]) +([^\w\d"]|[\^_,;)\]}])"#).unwrap());
2190
1
    static REMOVE_SPACE_AFTER_OP: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"([^\^_,;)\]}\w\d"]) +([\w\d])"#).unwrap());
2191
1
    static COLLAPSE_SPACES: LazyLock<Regex> = LazyLock::new(|| Regex::new(r" +").unwrap());
2192
    // debug!("ASCIIMath_cleanup: start={}", raw_braille);
2193
41
    let result  = raw_braille.replace("|𝐖__|", "|𝐰__|");    // protect the whitespace to prevent misinterpretation as lfloor
2194
41
    let result = result.replace('𝐖', " ");
2195
41
    let result = COLLAPSE_SPACES.replace_all(&result, " ");
2196
    // debug!("After collapse: {}", &result);
2197
41
    let result = REMOVE_SPACE_BEFORE_OP.replace_all(&result, "$1$2");
2198
41
    let result = REMOVE_SPACE_AFTER_OP.replace_all(&result, "$1$2");
2199
41
    let result = result.replace('𝐰', " ");     // spaces around relational operators
2200
41
    let result = COLLAPSE_SPACES.replace_all(&result, " ");
2201
    // debug!("After remove: {}", &result);
2202
    // let result = result.trim_matches('⠀');
2203
41
    let result = result.trim_matches(' ');
2204
   
2205
41
    return result.to_string();
2206
41
}
2207
2208
2209
/************** Braille xpath functionality ***************/
2210
use crate::canonicalize::{as_element, as_text, name};
2211
use crate::xpath_functions::{is_leaf, validate_one_node, IsBracketed};
2212
use std::result::Result as StdResult;
2213
use sxd_document::dom::ParentOfChild;
2214
use sxd_xpath::function::Error as XPathError;
2215
use sxd_xpath::function::{Args, Function};
2216
use sxd_xpath::{context, nodeset::*, Value};
2217
2218
pub struct NemethNestingChars;
2219
const NEMETH_FRAC_LEVEL: &str = "data-nemeth-frac-level";    // name of attr where value is cached
2220
const FIRST_CHILD_ONLY: &[&str] = &["mroot", "msub", "msup", "msubsup", "munder", "mover", "munderover", "mmultiscripts"];
2221
impl NemethNestingChars {
2222
    // returns a 'repeat_char' corresponding to the Nemeth rules for nesting
2223
    // note: this value is likely one char too long because the starting fraction is counted
2224
537
    fn nemeth_frac_value(node: Element, repeat_char: &str) -> String {
2225
537
        let children = node.children();
2226
537
        let name = name(node);
2227
537
        if is_leaf(node) {
2228
244
            return "".to_string();
2229
293
        } else if name == "mfrac" {
2230
            // have we already computed the value?
2231
221
            if let Some(
value152
) = node.attribute_value(NEMETH_FRAC_LEVEL) {
2232
152
                return value.to_string();
2233
69
            }
2234
2235
69
            let num_value = NemethNestingChars::nemeth_frac_value(as_element(children[0]), repeat_char);
2236
69
            let denom_value = NemethNestingChars::nemeth_frac_value(as_element(children[1]), repeat_char);
2237
69
            let mut max_value = if num_value.len() > denom_value.len() {
num_value8
} else {
denom_value61
};
2238
69
            max_value += repeat_char;
2239
69
            node.set_attribute_value(NEMETH_FRAC_LEVEL, &max_value);
2240
69
            return max_value;
2241
72
        } else if FIRST_CHILD_ONLY.contains(&name) {
2242
            // only look at the base -- ignore scripts/index
2243
10
            return NemethNestingChars::nemeth_frac_value(as_element(children[0]), repeat_char);
2244
        } else {
2245
62
            let mut result = "".to_string();
2246
197
            for child in 
children62
{
2247
197
                let value = NemethNestingChars::nemeth_frac_value(as_element(child), repeat_char);
2248
197
                if value.len() > result.len() {
2249
19
                    result = value;
2250
178
                }
2251
            }
2252
62
            return result;
2253
        }
2254
537
    }
2255
2256
0
    fn nemeth_root_value(node: Element, repeat_char: &str) -> StdResult<String, XPathError> {
2257
        // returns the correct number of repeat_chars to use
2258
        // note: because the highest count is toward the leaves and
2259
        //    because this is a loop and not recursive, caching doesn't work without a lot of overhead
2260
0
        let parent = node.parent().unwrap();
2261
0
        if let ParentOfChild::Element(e) =  parent {
2262
0
            let mut parent = e;
2263
0
            let mut result = "".to_string();
2264
            loop {
2265
0
                let name = name(parent);
2266
0
                if name == "math" {
2267
0
                    return Ok( result );
2268
0
                }
2269
0
                if name == "msqrt" || name == "mroot" {
2270
0
                    result += repeat_char;
2271
0
                }
2272
0
                let parent_of_child = parent.parent().unwrap();
2273
0
                if let ParentOfChild::Element(e) =  parent_of_child {
2274
0
                    parent = e;
2275
0
                } else {
2276
0
                    return Err( sxd_xpath::function::Error::Other("Internal error in nemeth_root_value: didn't find 'math' tag".to_string()) );
2277
                }
2278
            }
2279
0
        }
2280
0
        return Err( XPathError::Other("Internal error in nemeth_root_value: didn't find 'math' tag".to_string()) );
2281
0
    }
2282
}
2283
2284
impl Function for NemethNestingChars {
2285
/**
2286
 * Returns a string with the correct number of nesting chars (could be an empty string)
2287
 * @param(node) -- current node
2288
 * @param(char) -- char (string) that should be repeated
2289
 * Note: as a side effect, an attribute with the value so repeated calls to this or a child will be fast
2290
 */
2291
192
 fn evaluate<'d>(&self,
2292
192
                        _context: &context::Evaluation<'_, 'd>,
2293
192
                        args: Vec<Value<'d>>)
2294
192
                        -> StdResult<Value<'d>, XPathError>
2295
    {
2296
192
        let mut args = Args(args);
2297
192
        args.exactly(2)
?0
;
2298
192
        let repeat_char = args.pop_string()
?0
;
2299
192
        let node = crate::xpath_functions::validate_one_node(args.pop_nodeset()
?0
, "NestingChars")
?0
;
2300
192
        if let Node::Element(el) = node {
2301
192
            let name = name(el);
2302
            // it is likely a bug to call this one a non mfrac
2303
192
            if name == "mfrac" {
2304
                // because it is called on itself, the fraction is counted one too many times -- chop one off
2305
                // this is slightly messy because we are chopping off a char, not a byte
2306
                const BRAILLE_BYTE_LEN: usize = "⠹".len();      // all Unicode braille symbols have the same number of bytes
2307
192
                return Ok( Value::String( NemethNestingChars::nemeth_frac_value(el, &repeat_char)[BRAILLE_BYTE_LEN..].to_string() ) );
2308
0
            } else if name == "msqrt" || name == "mroot" {
2309
0
                return Ok( Value::String( NemethNestingChars::nemeth_root_value(el, &repeat_char)? ) );
2310
            } else {
2311
0
                return Err(XPathError::Other(format!("NestingChars chars should be used only on 'mfrac'. '{}' was passed in", name)));
2312
            }
2313
        } else {
2314
            // not an element, so nothing to do
2315
0
            return Ok( Value::String("".to_string()) );
2316
        }
2317
192
    }
2318
}
2319
2320
pub struct BrailleChars;
2321
impl BrailleChars {
2322
    // returns a string for the chars in the *leaf* node.
2323
    // this string follows the Nemeth rules typefaces and deals with mathvariant
2324
    //  which has partially turned chars to the alphanumeric block
2325
12.5k
    fn get_braille_chars(node: Element, code: &str, text_range: Option<Range<usize>>) -> StdResult<String, XPathError> {
2326
12.5k
        let result = match code {
2327
12.5k
            "Nemeth" => 
BrailleChars::get_braille_nemeth_chars5.99k
(
node5.99k
,
text_range5.99k
),
2328
6.52k
            "UEB" => 
BrailleChars:: get_braille_ueb_chars2.28k
(
node2.28k
,
text_range2.28k
),
2329
4.24k
            "CMU" => 
BrailleChars:: get_braille_cmu_chars3.70k
(
node3.70k
,
text_range3.70k
),
2330
536
            "Vietnam" => BrailleChars:: get_braille_vietnam_chars(node, text_range),
2331
0
            "Swedish" => BrailleChars:: get_braille_ueb_chars(node, text_range),    // FIX: need to figure out what to implement
2332
0
            "Finnish" => BrailleChars:: get_braille_ueb_chars(node, text_range),    // FIX: need to figure out what to implement
2333
0
            _ => return Err(sxd_xpath::function::Error::Other(format!("get_braille_chars: unknown braille code '{code}'")))
2334
        };
2335
12.5k
        return match result {
2336
12.5k
            Ok(string) => Ok(make_quoted_string(string)),
2337
0
            Err(err) => return Err(sxd_xpath::function::Error::Other(err.to_string())),
2338
        }
2339
12.5k
    }
2340
2341
5.99k
    fn get_braille_nemeth_chars(node: Element, text_range: Option<Range<usize>>) -> Result<String> {
2342
        // To greatly simplify typeface/language generation, the chars have unique ASCII chars for them:
2343
        // Typeface: S: sans-serif, B: bold, 𝔹: blackboard, T: script, I: italic, R: Roman
2344
        // Language: E: English, D: German, G: Greek, V: Greek variants, H: Hebrew, U: Russian
2345
        // Indicators: C: capital, L: letter, N: number, P: punctuation, M: multipurpose
2346
2
        static PICK_APART_CHAR: LazyLock<Regex> = LazyLock::new(|| {
2347
2
            Regex::new(r"(?P<face>[SB𝔹TIR]*)(?P<lang>[EDGVHU]?)(?P<cap>C?)(?P<letter>L?)(?P<num>[N]?)(?P<char>.)").unwrap()
2348
2
        });
2349
5.99k
        let math_variant = node.attribute_value("mathvariant");
2350
        // FIX: cover all the options -- use phf::Map
2351
5.99k
        let  attr_typeface = match math_variant {
2352
5.76k
            None => "R",
2353
233
            Some(variant) => match variant {
2354
233
                "bold" => 
"B"42
,
2355
191
                "italic" => 
"I"2
,
2356
189
                "double-struck" => 
"𝔹"27
,
2357
162
                "script" => 
"T"5
,
2358
157
                "fraktur" => 
"D"0
,
2359
157
                "sans-serif" => 
"S"1
,
2360
156
                _ => "R",       // normal and unknown
2361
            },
2362
        };
2363
5.99k
        let text = BrailleChars::substring(as_text(node), &text_range);
2364
5.99k
        let braille_chars = braille_replace_chars(&text, node)
?0
;
2365
        // debug!("Nemeth chars: text='{}', braille_chars='{}'", &text, &braille_chars);
2366
        
2367
        // we want to pull the prefix (typeface, language) out to the front until a change happens
2368
        // the same is true for number indicator
2369
        // also true (sort of) for capitalization -- if all caps, use double cap in front (assume abbr or Roman Numeral)
2370
        
2371
        // we only care about this for numbers and identifiers/text, so we filter for only those
2372
5.99k
        let node_name = name(node);
2373
5.99k
        let is_in_enclosed_list = node_name != "mo" && 
BrailleChars::is_in_enclosed_list3.45k
(
node3.45k
);
2374
5.99k
        let is_mn_in_enclosed_list = is_in_enclosed_list && 
node_name == "mn"120
;
2375
5.99k
        let mut typeface = "R".to_string();     // assumption is "R" and if attr or letter is different, something happens
2376
5.99k
        let mut is_all_caps = true;
2377
5.99k
        let mut is_all_caps_valid = false;      // all_caps only valid if we did a replacement
2378
7.87k
        let 
result5.99k
=
PICK_APART_CHAR5.99k
.
replace_all5.99k
(
&braille_chars5.99k
, |caps: &Captures| {
2379
            // debug!("  face: {:?}, lang: {:?}, num {:?}, letter: {:?}, cap: {:?}, char: {:?}",
2380
            //        &caps["face"], &caps["lang"], &caps["num"], &caps["letter"], &caps["cap"], &caps["char"]);
2381
7.87k
            let mut nemeth_chars = "".to_string();
2382
7.87k
            let char_face = if caps["face"].is_empty() {
attr_typeface7.78k
} else {
&caps["face"]86
};
2383
7.87k
            let typeface_changed =  typeface != char_face;
2384
7.87k
            if typeface_changed {
2385
86
                typeface = char_face.to_string();   // needs to outlast this instance of the loop
2386
86
                nemeth_chars += &typeface;
2387
86
                nemeth_chars +=  &caps["lang"];
2388
7.78k
            } else {
2389
7.78k
                nemeth_chars +=  &caps["lang"];
2390
7.78k
            }
2391
            // debug!("  typeface changed: {}, is_in_list: {}; num: {}", typeface_changed, is_in_enclosed_list, !caps["num"].is_empty());
2392
7.87k
            if !caps["num"].is_empty() && (
typeface_changed2.74k
||
!is_mn_in_enclosed_list2.72k
) {
2393
2.58k
                nemeth_chars += "N";
2394
5.28k
            }
2395
7.87k
            is_all_caps_valid = true;
2396
7.87k
            is_all_caps &= !&caps["cap"].is_empty();
2397
7.87k
            nemeth_chars += &caps["cap"];       // will be stripped later if all caps
2398
7.87k
            if is_in_enclosed_list {
2399
228
                nemeth_chars += &caps["letter"].replace('L', "l");
2400
7.64k
            } else {
2401
7.64k
                nemeth_chars += &caps["letter"];
2402
7.64k
            }
2403
7.87k
            nemeth_chars += &caps["char"];
2404
7.87k
            return nemeth_chars;
2405
7.87k
        });
2406
        // debug!("  result: {}", &result);
2407
5.99k
        let mut text_chars = text.chars();     // see if more than one char
2408
5.99k
        if is_all_caps_valid && 
is_all_caps5.22k
&&
text_chars.next()369
.
is_some369
() &&
text_chars.next()369
.
is_some369
() {
2409
7
            return Ok( "CC".to_string() + &result.replace('C', ""));
2410
        } else {
2411
5.98k
            return Ok( result.to_string() );
2412
        }
2413
5.99k
    }
2414
2415
2.82k
    fn get_braille_ueb_chars(node: Element, text_range: Option<Range<usize>>) -> Result<String> {
2416
        // Because in UEB typeforms and caps may extend for multiple tokens,
2417
        //   this routine merely deals with the mathvariant attr.
2418
        // Canonicalize has already transformed all chars it can to math alphanumerics, but not all have bold/italic 
2419
        // The typeform/caps transforms to (potentially) word mode are handled later.
2420
1
        static HAS_TYPEFACE: LazyLock<Regex> = LazyLock::new(|| Regex::new(".*?(double-struck|script|fraktur|sans-serif).*").unwrap());
2421
1
        static PICK_APART_CHAR: LazyLock<Regex> = LazyLock::new(|| {
2422
1
            Regex::new(r"(?P<bold>B??)(?P<italic>I??)(?P<face>[S𝔹TD]??)s??(?P<cap>C??)(?P<greek>G??)(?P<char>[NL].)").unwrap()
2423
1
        });
2424
    
2425
2.82k
        let math_variant = node.attribute_value("mathvariant");
2426
2.82k
        let text = BrailleChars::substring(as_text(node), &text_range);
2427
2.82k
        let mut braille_chars = braille_replace_chars(&text, node)
?0
;
2428
2429
        // debug!("get_braille_ueb_chars: before/after unicode.yaml: '{}'/'{}'", text, braille_chars);
2430
2.82k
        if math_variant.is_none() {         // nothing we need to do
2431
2.71k
            return Ok(braille_chars);
2432
108
        }
2433
        // mathvariant could be "sans-serif-bold-italic" -- get the parts
2434
108
        let math_variant = math_variant.unwrap();
2435
108
        let italic = math_variant.contains("italic");
2436
108
        if italic & !braille_chars.contains('I') {
2437
0
            braille_chars = "I".to_string() + &braille_chars;
2438
108
        }
2439
108
        let bold = math_variant.contains("bold");
2440
108
        if bold & !braille_chars.contains('B') {
2441
0
            braille_chars = "B".to_string() + &braille_chars;
2442
108
        }
2443
108
        let typeface = match HAS_TYPEFACE.find(math_variant) {
2444
107
            None => "",
2445
1
            Some(m) => match m.as_str() {
2446
1
                "double-struck" => 
"𝔹"0
,
2447
1
                "script" => 
"T"0
,
2448
1
                "fraktur" => "D",
2449
0
                "sans-serif" => "S",
2450
                //  don't consider monospace as a typeform
2451
0
                _ => "",
2452
            },
2453
        };
2454
116
        let 
result108
=
PICK_APART_CHAR108
.
replace_all108
(
&braille_chars108
, |caps: &Captures| {
2455
            // debug!("captures: {:?}", caps);
2456
            // debug!("  bold: {:?}, italic: {:?}, face: {:?}, cap: {:?}, char: {:?}",
2457
            //        &caps["bold"], &caps["italic"], &caps["face"], &caps["cap"], &caps["char"]);
2458
116
            if bold || 
!caps["bold"].is_empty()111
{
"B"5
} else {
""111
}.to_string()
2459
116
                + if italic || !caps["italic"].is_empty() {
"I"0
} else {""}
2460
116
                + if !&caps["face"].is_empty() {
&caps["face"]1
} else {
typeface115
}
2461
116
                + &caps["cap"]
2462
116
                + &caps["greek"]
2463
116
                + &caps["char"]
2464
116
        });
2465
        // debug!("get_braille_ueb_chars: '{}'", &result);
2466
108
        return Ok(result.to_string())
2467
2.82k
    }
2468
2469
3.70k
    fn get_braille_cmu_chars(node: Element, text_range: Option<Range<usize>>) -> Result<String> {
2470
        // In CMU, we need to replace spaces used for number blocks with "."
2471
        // For other numbers, we need to add "." to create digit blocks
2472
2473
1
        static HAS_TYPEFACE: LazyLock<Regex> = LazyLock::new(|| Regex::new(".*?(double-struck|script|fraktur|sans-serif).*").unwrap());
2474
1
        static PICK_APART_CHAR: LazyLock<Regex> = LazyLock::new(|| {
2475
1
            Regex::new(r"(?P<bold>B??)(?P<italic>I??)(?P<face>[S𝔹TD]??)s??(?P<cap>C??)(?P<greek>G??)(?P<char>[NL].)").unwrap()
2476
1
        });
2477
    
2478
3.70k
        let math_variant = node.attribute_value("mathvariant");
2479
3.70k
        let text = BrailleChars::substring(as_text(node), &text_range);
2480
3.70k
        let text = add_separator(text);
2481
2482
3.70k
        let braille_chars = braille_replace_chars(&text, node)
?0
;
2483
2484
        // debug!("get_braille_ueb_chars: before/after unicode.yaml: '{}'/'{}'", text, braille_chars);
2485
3.70k
        if math_variant.is_none() {         // nothing we need to do
2486
3.70k
            return Ok(braille_chars);
2487
4
        }
2488
        // mathvariant could be "sans-serif-bold-italic" -- get the parts
2489
4
        let math_variant = math_variant.unwrap();
2490
4
        let bold = math_variant.contains("bold");
2491
4
        let italic = math_variant.contains("italic");
2492
4
        let typeface = match HAS_TYPEFACE.find(math_variant) {
2493
4
            None => "",
2494
0
            Some(m) => match m.as_str() {
2495
0
                "double-struck" => "𝔹",
2496
0
                "script" => "T",
2497
0
                "fraktur" => "D",
2498
0
                "sans-serif" => "S",
2499
                //  don't consider monospace as a typeform
2500
0
                _ => "",
2501
            },
2502
        };
2503
4
        let result = PICK_APART_CHAR.replace_all(&braille_chars, |caps: &Captures| {
2504
            // debug!("captures: {:?}", caps);
2505
            // debug!("  bold: {:?}, italic: {:?}, face: {:?}, cap: {:?}, char: {:?}",
2506
            //        &caps["bold"], &caps["italic"], &caps["face"], &caps["cap"], &caps["char"]);
2507
4
            if bold || !caps["bold"].is_empty() {
"B"0
} else {""}.to_string()
2508
4
                + if italic || !caps["italic"].is_empty() {
"I"0
} else {""}
2509
4
                + if !&caps["face"].is_empty() {
&caps["face"]0
} else {typeface}
2510
4
                + &caps["cap"]
2511
4
                + &caps["greek"]
2512
4
                + &caps["char"]
2513
4
        });
2514
4
        return Ok(result.to_string());
2515
2516
3.70k
        fn add_separator(text: String) -> String {
2517
            use crate::definitions::BRAILLE_DEFINITIONS;
2518
3.70k
            if let Some(
text_without_arc0
) = text.strip_prefix("arc") {
2519
                // "." after arc (7.5.3)
2520
0
                let is_function_name = BRAILLE_DEFINITIONS.with(|definitions| {
2521
0
                    let definitions = definitions.borrow();
2522
0
                    let set = definitions.get_hashset("CMUFunctionNames").unwrap();
2523
0
                    return set.contains(&text);
2524
0
                });
2525
0
                if is_function_name {
2526
0
                    return "arc.".to_string() + text_without_arc;
2527
0
                }
2528
3.70k
            } 
2529
3.70k
            return text;
2530
3.70k
        }
2531
3.70k
    }
2532
2533
536
    fn get_braille_vietnam_chars(node: Element, text_range: Option<Range<usize>>) -> Result<String> {
2534
        // this is basically the same as for ueb except:
2535
        // 1. we deal with switching '.' and ',' if in English style for numbers
2536
        // 2. if it is identified as a Roman Numeral, we make all but the first char lower case because they shouldn't get a cap indicator
2537
        // 3. double letter chemical elements should NOT be part of a cap word sequence
2538
536
        if name(node) == "mn" {
2539
248
            // text of element is modified by these if needed
2540
248
            lower_case_roman_numerals(node);
2541
248
            switch_if_english_style_number(node);
2542
288
        }
2543
536
        let result = BrailleChars::get_braille_ueb_chars(node, text_range)
?0
;
2544
536
        return Ok(result);
2545
2546
248
        fn lower_case_roman_numerals(mn_node: Element) {
2547
248
            if mn_node.attribute("data-roman-numeral").is_some() {
2548
2
                // if a roman numeral, all ASCII so we can optimize
2549
2
                let text = as_text(mn_node);
2550
2
                let mut new_text = String::from(&text[..1]);
2551
2
                new_text.push_str(text[1..].to_ascii_lowercase().as_str());    // works for single char too
2552
2
                mn_node.set_text(&new_text);
2553
246
            }
2554
248
        }
2555
248
        fn switch_if_english_style_number(mn_node: Element) {
2556
248
            let text = as_text(mn_node);
2557
248
            let dot = text.find('.');
2558
248
            let comma = text.find(',');
2559
248
            match (dot, comma) {
2560
218
                (None, None) => (),
2561
4
                (Some(dot), Some(comma)) => {
2562
4
                    if comma < dot {
2563
2
                        // switch dot/comma -- using "\x01" as a temp when switching the two chars
2564
2
                        let switched = text.replace('.', "\x01").replace(',', ".").replace('\x01', ",");
2565
2
                        mn_node.set_text(&switched);
2566
2
                    }
2567
                },
2568
17
                (Some(dot), None) => {
2569
                    // If it starts with a '.', a leading 0, or if there is only one '.' and not three chars after it
2570
17
                    if dot==0 ||
2571
15
                       (dot==1 && 
text11
.
starts_with11
('0')) ||
2572
13
                       (text[dot+1..].find('.').is_none() && 
text[dot+1..].len()!=310
) {
2573
5
                        mn_node.set_text(&text.replace('.', ","));
2574
12
                    }
2575
                },
2576
9
                (None, Some(comma)) => {
2577
                    // if there is more than one ",", than it can't be a decimal separator
2578
9
                    if text[comma+1..].find(',').is_some() {
2579
1
                        mn_node.set_text(&text.replace(',', "."));
2580
8
                    }
2581
                },
2582
            }
2583
248
        }
2584
2585
536
    }
2586
2587
2588
3.45k
    fn is_in_enclosed_list(node: Element) -> bool {
2589
        // Nemeth Rule 10 defines an enclosed list:
2590
        // 1: begins and ends with fence
2591
        // 2: FIX: not implemented -- must contain no word, abbreviation, ordinal or plural ending
2592
        // 3: function names or signs of shape and the signs which follow them are a single item (not a word)
2593
        // 4: an item of the list may be an ellipsis or any sign used for omission
2594
        // 5: no relational operator may appear within the list
2595
        // 6: the list must have at least 2 items.
2596
        //       Items are separated by commas, can not have other punctuation (except ellipsis and dash)
2597
3.45k
        let mut parent = get_parent(node); // safe since 'math' is always at root
2598
7.37k
        while name(parent) == "mrow" {
2599
4.04k
            if IsBracketed::is_bracketed(parent, "", "", true, false) {
2600
388
                for child in 
parent134
.
children134
() {
2601
388
                    if !child_meets_conditions(as_element(child)) {
2602
14
                        return false;
2603
374
                    }
2604
                }
2605
120
                return true;
2606
3.91k
            }
2607
3.91k
            parent = get_parent(parent);
2608
        }
2609
3.32k
        return false;
2610
2611
1.55k
        fn child_meets_conditions(node: Element) -> bool {
2612
1.55k
            let name = name(node);
2613
1.55k
            return match name {
2614
1.55k
                "mi" | 
"mn"1.39k
=>
true476
,
2615
1.07k
                "mo"  => 
!crate::canonicalize::is_relational_op(node)664
,
2616
412
                "mtext" => {
2617
9
                    let text = as_text(node).trim();
2618
9
                    return text=="?" || text=="-?-" || text.is_empty();   // various forms of "fill in missing content" (see also Nemeth_RULEs.yaml, "omissions")
2619
                },
2620
403
                "mrow" => {
2621
385
                    if IsBracketed::is_bracketed(node, "", "", false, false) {
2622
125
                        return child_meets_conditions(as_element(node.children()[1]));
2623
                    } else {
2624
1.00k
                        for child in 
node260
.
children260
() {
2625
1.00k
                            if !child_meets_conditions(as_element(child)) {
2626
28
                                return false;
2627
975
                            }
2628
                        }
2629
                    }  
2630
232
                    true      
2631
                },
2632
18
                "menclose" => {
2633
0
                    if let Some(notation) = node.attribute_value("notation") {
2634
0
                        if notation != "bottom" || notation != "box" {
2635
0
                            return false;
2636
0
                        }
2637
0
                        let child = as_element(node.children()[0]);     // menclose has exactly one child
2638
0
                        return is_leaf(child) && as_text(child) == "?";
2639
0
                    }
2640
0
                    return false;
2641
                },
2642
                _ => {
2643
36
                    for child in 
node18
.
children18
() {
2644
36
                        if !child_meets_conditions(as_element(child)) {
2645
0
                            return false;
2646
36
                        }
2647
                    }
2648
18
                    true
2649
                },
2650
            }
2651
1.55k
        }
2652
3.45k
    }
2653
2654
    /// Extract the `char`s from `str` within `range` (these are chars, not byte offsets)
2655
12.5k
    fn substring(str: &str, text_range: &Option<Range<usize>>) -> String {
2656
12.5k
        return match text_range {
2657
9.99k
            None => str.to_string(),
2658
2.52k
            Some(range) => str.chars().skip(range.start).take(range.end - range.start).collect(),
2659
        }
2660
12.5k
    }
2661
}
2662
2663
impl Function for BrailleChars {
2664
    /**
2665
     * Returns a string with the correct number of nesting chars (could be an empty string)
2666
     * @param(node) -- current node or string
2667
     * @param(char) -- char (string) that should be repeated
2668
     * Note: as a side effect, an attribute with the value so repeated calls to this or a child will be fast
2669
     */
2670
12.5k
    fn evaluate<'d>(&self,
2671
12.5k
                        context: &context::Evaluation<'_, 'd>,
2672
12.5k
                        args: Vec<Value<'d>>)
2673
12.5k
                        -> StdResult<Value<'d>, XPathError>
2674
    {
2675
        use crate::canonicalize::create_mathml_element;
2676
12.5k
        let mut args = Args(args);
2677
12.5k
        if let Err(
e0
) = args.exactly(2).or_else(|_|
args2.52k
.
exactly2.52k
(4)) {
2678
0
            return Err( XPathError::Other(format!("BrailleChars requires 2 or 4 args: {e}")));
2679
12.5k
        };
2680
2681
12.5k
        let range = if args.len() == 4 {
2682
2.52k
            let end = args.pop_number()
?0
as usize - 1; // non-inclusive at end, 0-based
2683
2.52k
            let start = args.pop_number()
?0
as usize - 1; // inclusive at start, a 0-based
2684
2.52k
            Some(start..end)
2685
        } else {
2686
9.99k
            None
2687
        };
2688
12.5k
        let braille_code = args.pop_string()
?0
;
2689
12.5k
        let v: Value<'_> = args.0.pop().ok_or(XPathError::ArgumentMissing)
?0
;
2690
12.5k
        let node = match v {
2691
11.8k
            Value::Nodeset(nodes) => {
2692
11.8k
                validate_one_node(nodes, "BrailleChars")
?0
.element().unwrap()
2693
            },
2694
2
            Value::Number(n) => {
2695
2
                let new_node = create_mathml_element(&context.node.document(), "mn");
2696
2
                new_node.set_text(&n.to_string());
2697
2
                new_node
2698
            },
2699
681
            Value::String(s) => {
2700
681
                let new_node = create_mathml_element(&context.node.document(), "mi");   // FIX: try to guess mi vs mo???
2701
681
                new_node.set_text(&s);
2702
681
                new_node
2703
            },
2704
            _ => {
2705
0
                return Ok( Value::String("".to_string()) ) // not an element, so nothing to do
2706
            },
2707
        };
2708
2709
12.5k
        if !is_leaf(node) {
2710
0
            return Err( XPathError::Other(format!("BrailleChars called on non-leaf element '{}'", mml_to_string(node))) );
2711
12.5k
        }
2712
12.5k
        return Ok( Value::String( BrailleChars::get_braille_chars(node, &braille_code, range)
?0
) );
2713
12.5k
    }
2714
}
2715
2716
pub struct NeedsToBeGrouped;
2717
impl NeedsToBeGrouped {
2718
    // ordinals often have an irregular start (e.g., "half") before becoming regular.
2719
    // if the number is irregular, return the ordinal form, otherwise return 'None'.
2720
805
    fn needs_grouping_for_cmu(element: Element, _is_base: bool) -> bool {
2721
805
        let node_name = name(element);
2722
805
        let children = element.children();
2723
805
        if node_name == "mrow" {
2724
            // check for bracketed exprs
2725
544
            if IsBracketed::is_bracketed(element, "", "", false, true) {
2726
0
                return false;
2727
544
            }
2728
2729
            // check for prefix and postfix ops at start or end (=> len()==2, prefix is first op, postfix is last op)
2730
544
            if children.len() == 2 &&
2731
9
                (name(as_element(children[0])) == "mo" || 
name5
(
as_element5
(children[1])) == "mo") {
2732
7
                return false;
2733
537
            }
2734
2735
537
            if children.len() != 3 {  // ==3, need to check if it a linear fraction
2736
4
                return true;
2737
533
            }
2738
533
            let operator = as_element(children[1]);
2739
533
            if name(operator) != "mo" || as_text(operator) != "/" {
2740
532
                return true;
2741
1
            }
2742
261
        }
2743
2744
262
        if !(node_name == "mrow" || 
node_name == "mfrac"261
) {
2745
258
            return false;
2746
4
        }
2747
        // check for numeric fractions (regular fractions need brackets, not numeric fractions), either as an mfrac or with "/"
2748
        // if the fraction starts with a "-", it is still a numeric fraction that doesn't need parens
2749
4
        let mut numerator = as_element(children[0]);
2750
4
        let denominator = as_element(children[children.len()-1]);
2751
4
        let decimal_separator = crate::interface::get_preference("DecimalSeparators").unwrap()
2752
4
                                                        .chars().next().unwrap_or('.');
2753
4
        if is_integer(denominator, decimal_separator) {
2754
            // check numerator being either an integer "- integer"
2755
2
            if name(numerator) == "mrow" {
2756
1
                let numerator_children = numerator.children();
2757
1
                if !(numerator_children.len() == 2 &&
2758
1
                        name(as_element(numerator_children[0])) == "mo" &&
2759
1
                        as_text(as_element(numerator_children[0])) == "-") {
2760
0
                    return true;
2761
1
                }
2762
1
                numerator = as_element(numerator_children[1]);
2763
1
            }
2764
2
            return !is_integer(numerator, decimal_separator);
2765
2
        }
2766
2
        return true;
2767
2768
6
        fn is_integer(mathml: Element, decimal_separator: char) -> bool {
2769
6
            return name(mathml) == "mn" && 
!4
as_text(mathml)4
.contains(decimal_separator)
2770
6
        }
2771
805
    }
2772
2773
    /// FIX: what needs to be implemented?
2774
0
    fn needs_grouping_for_finnish(mathml: Element, is_base: bool) -> bool {
2775
        use crate::xpath_functions::IsInDefinition;
2776
0
        let mut node_name = name(mathml);
2777
0
        if mathml.attribute_value("data-roman-numeral").is_some() {
2778
0
            node_name = "mi";           // roman numerals don't follow number rules
2779
0
        }
2780
2781
        // FIX: the leaf rules are from UEB -- check the Swedish rules
2782
0
        match node_name {
2783
0
            "mn" => {   
2784
0
                if !is_base {
2785
0
                    return false;
2786
0
                }                                                                                        // clause 1
2787
                // two 'mn's can be adjacent, in which case we need to group the 'mn' to make it clear it is separate (see bug #204)
2788
0
                let parent = get_parent(mathml);   // there is always a "math" node
2789
0
                let grandparent = if name(parent) == "math" {parent} else {get_parent(parent)};
2790
0
                if name(grandparent) != "mrow" {
2791
0
                    return false;
2792
0
                }
2793
0
                let preceding = parent.preceding_siblings();
2794
0
                if preceding.len()  < 2 {
2795
0
                    return false;
2796
0
                }
2797
                // any 'mn' would be separated from this node by invisible times
2798
0
                let previous_child = as_element(preceding[preceding.len()-1]);
2799
0
                if name(previous_child) == "mo" && as_text(previous_child) == "\u{2062}" {
2800
0
                    let previous_child = as_element(preceding[preceding.len()-2]);
2801
0
                    return name(previous_child) == "mn"
2802
                } else {
2803
0
                    return false;
2804
                }
2805
            },
2806
0
            "mi" | "mo" | "mtext" => {
2807
0
                let text = as_text(mathml);
2808
0
                let parent = get_parent(mathml);   // there is always a "math" node
2809
0
                let parent_name = name(parent);   // there is always a "math" node
2810
0
                if is_base && (parent_name == "msub" || parent_name == "msup" || parent_name == "msubsup") && !text.contains([' ', '\u{00A0}']) {
2811
0
                    return false;
2812
0
                }
2813
0
                let mut chars = text.chars();
2814
0
                let first_char = chars.next().unwrap();             // canonicalization assures it isn't empty;
2815
0
                let is_one_char = chars.next().is_none();
2816
                // '¨', etc., brailles as two chars -- there probably is some exception list but I haven't found it -- these are the ones I know about
2817
0
                return !((is_one_char && !['¨', '″', '‴', '⁗'].contains(&first_char)) ||                       // clause 8
2818
                            // "lim", "cos", etc., appear not to get parens, but the rules don't mention it (tests show it)
2819
0
                            IsInDefinition::is_defined_in(text, &SPEECH_DEFINITIONS, "FunctionNames").unwrap() ||
2820
0
                            IsInDefinition::is_defined_in(text, &SPEECH_DEFINITIONS, "Arrows").unwrap() ||          // clause 4
2821
0
                            IsInDefinition::is_defined_in(text, &SPEECH_DEFINITIONS, "GeometryShapes").unwrap());   // clause 5
2822
            },
2823
0
            "mrow" => {
2824
                // check for bracketed exprs
2825
0
                if IsBracketed::is_bracketed(mathml, "", "", false, true) {
2826
0
                    return false;
2827
0
                }
2828
2829
0
                let parent = get_parent(mathml); // safe since 'math' is always at root
2830
0
                if name(parent) == "mfrac" {
2831
0
                    let children = mathml.children();
2832
0
                    if mathml.preceding_siblings().is_empty() {
2833
                        // numerator: check for multiplication -- doesn't need grouping in numerator
2834
0
                        if children.len() >= 3 {
2835
0
                            let operator = as_element(children[1]);
2836
0
                            if name(operator) == "mo" {
2837
0
                                let ch = as_text(operator);
2838
0
                                if ch == "\u{2062}" || ch == "⋅" || ch == "×"  {
2839
0
                                    return false;
2840
0
                                }
2841
0
                            }
2842
0
                        }
2843
0
                        return true;
2844
                    } else {
2845
                        // denominator
2846
0
                        return true;
2847
                    }
2848
2849
0
                }
2850
                // check for prefix at start
2851
                // example 7.12 has "2-" in superscript and is grouped, so we don't consider postfix ops
2852
0
                let children = mathml.children();
2853
0
                if children.len() == 2 &&
2854
0
                    (name(as_element(children[0])) == "mo") {
2855
0
                    return false;
2856
0
                }
2857
0
                return true;
2858
            },
2859
0
            _ => return false,
2860
        }
2861
0
    }
2862
2863
    // ordinals often have an irregular start (e.g., "half") before becoming regular.
2864
    // if the number is irregular, return the ordinal form, otherwise return 'None'.
2865
0
    fn needs_grouping_for_swedish(mathml: Element, is_base: bool) -> bool {
2866
        use crate::xpath_functions::IsInDefinition;
2867
0
        let mut node_name = name(mathml);
2868
0
        if mathml.attribute_value("data-roman-numeral").is_some() {
2869
0
            node_name = "mi";           // roman numerals don't follow number rules
2870
0
        }
2871
2872
0
        match node_name {
2873
0
            "mn" => return false,
2874
0
            "mi" | "mo" | "mtext" => {
2875
0
                let text = as_text(mathml);
2876
0
                let parent = get_parent(mathml);   // there is always a "math" node
2877
0
                let parent_name = name(parent);   // there is always a "math" node
2878
0
                if is_base && (parent_name == "msub" || parent_name == "msup" || parent_name == "msubsup") && !text.contains([' ', '\u{00A0}']) {
2879
0
                    return false;
2880
0
                }
2881
0
                let mut chars = text.chars();
2882
0
                let first_char = chars.next().unwrap();             // canonicalization assures it isn't empty;
2883
0
                let is_one_char = chars.next().is_none();
2884
                // '¨', etc., brailles as two chars -- there probably is some exception list but I haven't found it -- these are the ones I know about
2885
0
                return !((is_one_char && !['¨', '″', '‴', '⁗'].contains(&first_char)) ||                       // clause 8
2886
                            // "lim", "cos", etc., appear not to get parens, but the rules don't mention it (tests show it)
2887
0
                            IsInDefinition::is_defined_in(text, &SPEECH_DEFINITIONS, "FunctionNames").unwrap() ||
2888
0
                            IsInDefinition::is_defined_in(text, &SPEECH_DEFINITIONS, "Arrows").unwrap() ||          // clause 4
2889
0
                            IsInDefinition::is_defined_in(text, &SPEECH_DEFINITIONS, "GeometryShapes").unwrap());   // clause 5
2890
            },
2891
0
            "mrow" => {
2892
                // check for bracketed exprs
2893
0
                if IsBracketed::is_bracketed(mathml, "", "", false, true) {
2894
0
                    return false;
2895
0
                }
2896
2897
                // check for prefix at start
2898
                // example 7.12 has "2-" in superscript and is grouped, so we don't consider postfix ops
2899
0
                let children = mathml.children();
2900
0
                if children.len() == 2 &&
2901
0
                    (name(as_element(children[0])) == "mo") {
2902
0
                    return false;
2903
0
                }
2904
0
                return true;
2905
            },
2906
0
            "mfrac" => {
2907
                // exclude simple fractions -- they are not bracketed with start/end marks
2908
0
                let children = mathml.children();
2909
0
                return !(NeedsToBeGrouped::needs_grouping_for_swedish(as_element(children[0]), true) ||
2910
0
                         NeedsToBeGrouped::needs_grouping_for_swedish(as_element(children[0]), true));
2911
            },
2912
            // At least for msup (Ex 7.7, and 7.32 and maybe more), spec seems to feel grouping is not needed.
2913
            // "msub" | "msup" | "msubsup" | "munder" | "mover" | "munderover" => return true,
2914
0
            "mtable" => return true,    // Fix: should check for trivial cases that don't need grouping
2915
0
            _ => return false,
2916
        }
2917
0
    }
2918
2919
    /// Returns true if the element needs grouping symbols
2920
    /// Bases need extra attention because if they are a number and the item to the left is one, that needs distinguishing
2921
538
    fn needs_grouping_for_ueb(mathml: Element, is_base: bool) -> bool {
2922
        // From GTM 7.1
2923
        // 1. An entire number, i.e. the initiating numeric symbol and all succeeding symbols within the numeric mode thus
2924
        //     established (which would include any interior decimal points, commas, separator spaces, or simple numeric fraction lines).
2925
        // 2. An entire general fraction, enclosed in fraction indicators.
2926
        // 3. An entire radical expression, enclosed in radical indicators.
2927
        // 4. An arrow.
2928
        // 5. An arbitrary shape.
2929
        // 6. Any expression enclosed in matching pairs of round parentheses, square brackets or curly braces.
2930
        // 7. Any expression enclosed in the braille grouping indicators.   [Note: not possible here]
2931
        // 8. If none of the foregoing apply, the item is simply the [this element's] individual symbol.
2932
2933
        use crate::xpath_functions::IsInDefinition;
2934
538
        let mut node_name = name(mathml);
2935
538
        if mathml.attribute_value("data-roman-numeral").is_some() {
2936
1
            node_name = "mi";           // roman numerals don't follow number rules
2937
537
        }
2938
538
        match node_name {
2939
538
            "mn" => {   
2940
250
                if !is_base {
2941
233
                    return false;
2942
17
                }                                                                                        // clause 1
2943
                // two 'mn's can be adjacent, in which case we need to group the 'mn' to make it clear it is separate (see bug #204)
2944
17
                let parent = get_parent(mathml);   // there is always a "math" node
2945
17
                let grandparent = if name(parent) == "math" {
parent0
} else {get_parent(parent)};
2946
17
                if name(grandparent) != "mrow" {
2947
2
                    return false;
2948
15
                }
2949
15
                let preceding = parent.preceding_siblings();
2950
15
                if preceding.len()  < 2 {
2951
6
                    return false;
2952
9
                }
2953
                // any 'mn' would be separated from this node by invisible times
2954
9
                let previous_child = as_element(preceding[preceding.len()-1]);
2955
9
                if name(previous_child) == "mo" && as_text(previous_child) == "\u{2062}" {
2956
6
                    let previous_child = as_element(preceding[preceding.len()-2]);
2957
6
                    return name(previous_child) == "mn"
2958
                } else {
2959
3
                    return false;
2960
                }
2961
            },
2962
288
            "mi" | 
"mo"44
|
"mtext"32
=> {
2963
258
                let text = as_text(mathml);
2964
258
                let parent = get_parent(mathml);   // there is always a "math" node
2965
258
                let parent_name = name(parent);   // there is always a "math" node
2966
258
                if is_base && (
parent_name == "msub"230
||
parent_name == "msup"224
||
parent_name == "msubsup"10
) &&
!224
text224
.contains([' ', '\u{00A0}']) {
2967
224
                    return false;
2968
34
                }
2969
34
                let mut chars = text.chars();
2970
34
                let first_char = chars.next().unwrap();             // canonicalization assures it isn't empty;
2971
34
                let is_one_char = chars.next().is_none();
2972
                // '¨', etc., brailles as two chars -- there probably is some exception list but I haven't found it -- these are the ones I know about
2973
34
                return !((is_one_char && 
!31
['¨', '″', '‴', '⁗']31
.contains(&first_char)) || // clause 8
2974
                            // "lim", "cos", etc., appear not to get parens, but the rules don't mention it (tests show it)
2975
4
                            IsInDefinition::is_defined_in(text, &SPEECH_DEFINITIONS, "FunctionNames").unwrap() ||
2976
3
                            IsInDefinition::is_defined_in(text, &SPEECH_DEFINITIONS, "Arrows").unwrap() ||          // clause 4
2977
3
                            IsInDefinition::is_defined_in(text, &SPEECH_DEFINITIONS, "GeometryShapes").unwrap());   // clause 5
2978
            },
2979
30
            "mfrac" => return 
false2
, // clause 2 (test GTM 8.2(4) shows numeric fractions are not special)
2980
28
            "msqrt" | "mroot" => return 
false0
, // clause 3
2981
                    // clause 6 only mentions three grouping chars, I'm a little suspicious of that, but that's what it says
2982
28
            "mrow" => return !(
IsBracketed::is_bracketed22
(
mathml22
,
"("22
,
")"22
, false, false) ||
2983
16
                                IsBracketed::is_bracketed(mathml, "[", "]", false, false) || 
2984
15
                                IsBracketed::is_bracketed(mathml, "{", "}", false, false) ),
2985
6
            "msub" | 
"msup"4
|
"msubsup"1
=> {
2986
                // I'm a little dubious about the false value, but see GTM 7.7(2)
2987
5
                if !is_base {
2988
3
                    return true;
2989
2
                } 
2990
                // need to group nested scripts in base -- see GTM 12.2(2)                                         
2991
2
                let parent = get_parent(mathml);   // there is always a "math" node
2992
2
                let parent_name = name(parent);   // there is always a "math" node
2993
2
                return parent_name == "munder" || parent_name == "mover" || 
parent_name == "munderover"1
;
2994
            },
2995
1
            _ => return true,
2996
        }
2997
2998
538
    }
2999
}
3000
3001
impl Function for NeedsToBeGrouped {
3002
    // convert a node to an ordinal number
3003
1.34k
    fn evaluate<'d>(&self,
3004
1.34k
                        _context: &context::Evaluation<'_, 'd>,
3005
1.34k
                        args: Vec<Value<'d>>)
3006
1.34k
                        -> StdResult<Value<'d>, XPathError>
3007
    {
3008
1.34k
        let mut args = Args(args);
3009
1.34k
        args.exactly(3)
?0
;
3010
1.34k
        let is_base = args.pop_boolean()
?0
;
3011
1.34k
        let braille_code = args.pop_string()
?0
;
3012
1.34k
        let node = validate_one_node(args.pop_nodeset()
?0
, "NeedsToBeGrouped")
?0
;
3013
1.34k
        if let Node::Element(e) = node {
3014
1.34k
            let answer = match braille_code.as_str() {
3015
1.34k
                "CMU" => 
NeedsToBeGrouped::needs_grouping_for_cmu805
(
e805
,
is_base805
),
3016
538
                "UEB" => NeedsToBeGrouped::needs_grouping_for_ueb(e, is_base),
3017
0
                "Finnish" => NeedsToBeGrouped::needs_grouping_for_finnish(e, is_base),
3018
0
                "Swedish" => NeedsToBeGrouped::needs_grouping_for_swedish(e, is_base),
3019
0
                _ => return Err(XPathError::Other(format!("NeedsToBeGrouped: braille code arg '{braille_code:?}' is not a known code ('UEB', 'CMU', or 'Swedish')"))),
3020
            };
3021
1.34k
            return Ok( Value::Boolean( answer ) );
3022
0
        }
3023
3024
0
        return Err(XPathError::Other(format!("NeedsToBeGrouped: first arg '{node:?}' is not a node")));
3025
1.34k
    }
3026
}
3027
    
3028
    
3029
    
3030
#[cfg(test)]
3031
mod tests {
3032
    use super::*;
3033
    #[allow(unused_imports)]
3034
    use crate::init_logger;
3035
    use crate::interface::*;
3036
    use log::debug;
3037
3038
    #[test]
3039
1
    fn ueb_highlight_24() -> Result<()> {       // issue 24
3040
1
        let mathml_str = "<math display='block' id='id-0'>
3041
1
            <mrow id='id-1'>
3042
1
                <mn id='id-2'>4</mn>
3043
1
                <mo id='id-3'>&#x2062;</mo>
3044
1
                <mi id='id-4'>a</mi>
3045
1
                <mo id='id-5'>&#x2062;</mo>
3046
1
                <mi id='id-6'>c</mi>
3047
1
            </mrow>
3048
1
        </math>";
3049
1
        crate::interface::set_rules_dir(super::super::abs_rules_dir_path()).unwrap();
3050
1
        set_mathml(mathml_str).unwrap();
3051
1
        set_preference("BrailleCode", "UEB").unwrap();
3052
1
        set_preference("BrailleNavHighlight", "All").unwrap();
3053
1
        let braille = get_braille("id-2")
?0
;
3054
1
        assert_eq!("⣼⣙⠰⠁⠉", braille);
3055
1
        set_navigation_node("id-2", 0)
?0
;
3056
1
        assert_eq!( get_braille_position()
?0
, (0,2));
3057
3058
1
        let braille = get_braille("id-4")
?0
;
3059
1
        assert_eq!("⠼⠙⣰⣁⠉", braille);
3060
1
        set_navigation_node("id-4", 0)
?0
;
3061
1
        assert_eq!( get_braille_position()
?0
, (2,4));
3062
1
        return Ok( () );
3063
1
    }
3064
    
3065
    #[test]
3066
    // This test probably should be repeated for each braille code and be taken out of here
3067
1
    fn find_mathml_from_braille() -> Result<()> { 
3068
        use std::time::Instant;
3069
1
        let mathml_str = "<math id='id-0'>
3070
1
        <mrow data-changed='added' id='id-1'>
3071
1
          <mi id='id-2'>x</mi>
3072
1
          <mo id='id-3'>=</mo>
3073
1
          <mfrac id='id-4'>
3074
1
            <mrow id='id-5'>
3075
1
              <mrow data-changed='added' id='id-6'>
3076
1
                <mo id='id-7'>-</mo>
3077
1
                <mi id='id-8'>b</mi>
3078
1
              </mrow>
3079
1
              <mo id='id-9'>±</mo>
3080
1
              <msqrt id='id-10'>
3081
1
                <mrow data-changed='added' id='id-11'>
3082
1
                  <msup id='id-12'>
3083
1
                    <mi id='id-13'>b</mi>
3084
1
                    <mn id='id-14'>2</mn>
3085
1
                  </msup>
3086
1
                  <mo id='id-15'>-</mo>
3087
1
                  <mrow data-changed='added' id='id-16'>
3088
1
                    <mn id='id-17'>4</mn>
3089
1
                    <mo data-changed='added' id='id-18'>&#x2062;</mo>
3090
1
                    <mi id='id-19'>a</mi>
3091
1
                    <mo data-changed='added' id='id-20'>&#x2062;</mo>
3092
1
                    <mi id='id-21'>c</mi>
3093
1
                  </mrow>
3094
1
                </mrow>
3095
1
              </msqrt>
3096
1
            </mrow>
3097
1
            <mrow id='id-22'>
3098
1
              <mn id='id-23'>2</mn>
3099
1
              <mo data-changed='added' id='id-24'>&#x2062;</mo>
3100
1
              <mi id='id-25'>a</mi>
3101
1
            </mrow>
3102
1
          </mfrac>
3103
1
        </mrow>
3104
1
       </math>";
3105
1
        crate::interface::set_rules_dir(super::super::abs_rules_dir_path()).unwrap();
3106
1
        set_mathml(mathml_str).unwrap();
3107
1
        set_preference("BrailleNavHighlight", "Off").unwrap();
3108
3109
1
        set_preference("BrailleCode", "Nemeth").unwrap();
3110
1
        let _braille = get_braille("")
?0
;
3111
1
        let answers= &[2, 3, 3, 3, 3, 4, 7, 8, 9, 9,   10, 13, 12, 14, 12, 15, 17, 19, 21, 10,   4, 23, 25, 4];
3112
24
        let 
answers1
=
answers1
.
map1
(|num| format!("id-{}", num));
3113
1
        debug!("\n*** Testing Nemeth ***");
3114
24
        for (i, answer) in 
answers1
.
iter1
().
enumerate1
() {
3115
24
            debug!("\n===  i={}  ===", i);
3116
24
            let instant = Instant::now();
3117
24
            let (id, _offset) = crate::interface::get_navigation_node_from_braille_position(i)
?0
;
3118
24
            N_PROBES.with(|n| {debug!("test {:2} #probes = {}", i, 
n0
.
borrow0
())});
3119
24
            debug!("Time taken: {}ms", 
instant.elapsed()0
.
as_millis0
());
3120
24
            assert_eq!(*answer, id, "\nNemeth test ith position={}", i);
3121
        }
3122
3123
1
        set_preference("BrailleCode", "UEB").unwrap();
3124
1
        let _braille = get_braille("")
?0
;
3125
1
        let answers= &[0, 0, 0, 2, 3, 3, 3, 3, 4, 7,   7, 8, 9, 9, 10, 13, 12, 14, 14, 15,   15, 17, 17, 19, 19, 21, 10, 4, 4, 23,   23, 25, 25, 4, 0, 0];
3126
36
        let 
answers1
=
answers1
.
map1
(|num| format!("id-{}", num));
3127
1
        debug!("\n\n*** Testing UEB ***");
3128
36
        for (i, answer) in 
answers1
.
iter1
().
enumerate1
() {
3129
36
            debug!("\n===  i={}  ===", i);
3130
36
            let instant = Instant::now();
3131
36
            let (id, _offset) = crate::interface::get_navigation_node_from_braille_position(i)
?0
;
3132
36
            N_PROBES.with(|n| {debug!("test {:2} #probes = {}", i, 
n0
.
borrow0
())});
3133
36
            debug!("Time taken: {}ms", 
instant.elapsed()0
.
as_millis0
());
3134
36
            assert_eq!(*answer, id, "\nUEB test ith position={}", i);
3135
        }
3136
1
        set_preference("BrailleCode", "CMU").unwrap();
3137
1
        let braille = get_braille("")
?0
;
3138
1
        let answers= &[2, 3, 5, 7, 8, 9, 9, 9, 10, 10,   11, 13, 12, 14, 14, 15, 17, 17, 19, 19,   21, 11, 5, 4, 22, 23, 23, 25, 25, 22,];
3139
30
        let 
answers1
=
answers1
.
map1
(|num| format!("id-{}", num));
3140
1
        debug!("\n\n*** Testing CMU ***");
3141
1
        debug!("Braille: {}", braille);
3142
30
        for (i, answer) in 
answers1
.
iter1
().
enumerate1
() {
3143
30
            debug!("\n===  i={}  ===", i);
3144
30
            let instant = Instant::now();
3145
30
            let (id, _offset) = crate::interface::get_navigation_node_from_braille_position(i)
?0
;
3146
30
            N_PROBES.with(|n| {debug!("test {:2} #probes = {}", i, 
n0
.
borrow0
())});
3147
30
            debug!("Time taken: {}ms", 
instant.elapsed()0
.
as_millis0
());
3148
30
            assert_eq!(*answer, id, "\nCMU test ith position={}", i);
3149
        }
3150
1
        return Ok( () );
3151
1
    }
3152
    
3153
    #[test]
3154
    #[allow(non_snake_case)]
3155
1
    fn test_UEB_start_mode() -> Result<()> {
3156
1
        let mathml_str = "<math><msup><mi>x</mi><mi>n</mi></msup></math>";
3157
1
        crate::interface::set_rules_dir(super::super::abs_rules_dir_path()).unwrap();
3158
1
        set_mathml(mathml_str).unwrap();
3159
1
        set_preference("BrailleCode", "UEB").unwrap();
3160
1
        set_preference("UEB_START_MODE", "Grade2").unwrap();
3161
1
        let braille = get_braille("")
?0
;
3162
1
        assert_eq!("⠭⠰⠔⠝", braille, "Grade2");
3163
1
        set_preference("UEB_START_MODE", "Grade1").unwrap();
3164
1
        let braille = get_braille("")
?0
;
3165
1
        assert_eq!("⠭⠔⠝", braille, "Grade1");
3166
1
        return Ok( () );
3167
1
    }
3168
}
\ No newline at end of file +

Coverage Report

Created: 2026-05-04 09:37

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/home/runner/work/MathCAT/MathCAT/src/braille.rs
Line
Count
Source
1
#![allow(clippy::needless_return)]
2
use strum_macros::Display;
3
use sxd_document::dom::{Element, ChildOfElement};
4
use sxd_document::Package;
5
use crate::definitions::SPEECH_DEFINITIONS;
6
use crate::errors::*;
7
use crate::pretty_print::mml_to_string;
8
use crate::prefs::PreferenceManager;
9
use std::cell::Ref;
10
use regex::{Captures, Regex, RegexSet};
11
use phf::{phf_map, phf_set};
12
use crate::speech::{BRAILLE_RULES, SpeechRulesWithContext, braille_replace_chars, make_quoted_string};
13
use crate::canonicalize::get_parent;
14
use std::borrow::Cow;
15
use std::ops::Range;
16
use std::sync::LazyLock;
17
use log::{debug, error};
18
19
320
fn is_ueb_prefix(ch: char) -> bool {
20
320
    
matches!262
(ch, '⠼' | '⠈' | '⠘' | '⠸' | '⠐' | '⠨' | '⠰' | '⠠')
21
320
}
22
23
/// Returns the braille *char* at the given position in the braille string.
24
971
fn braille_at(braille: &str, index: usize) -> char {
25
    // braille is always 3 bytes per char
26
971
    return braille[index..index+3].chars().next().unwrap();
27
28
971
}
29
30
/// braille the MathML
31
/// If 'nav_node_id' is not an empty string, then the element with that id will have dots 7 & 8 turned on as per the pref
32
/// Returns the braille string (highlighted) along with the *character* start/end of the highlight (whole string if no highlight)
33
1.82k
pub fn braille_mathml(mathml: Element, nav_node_id: &str) -> Result<(String, usize, usize)> {
34
1.82k
    return BRAILLE_RULES.with(|rules| {
35
1.82k
        rules.borrow_mut().read_files()
?0
;
36
1.82k
        let rules = rules.borrow();
37
1.82k
        let new_package = Package::new();
38
1.82k
        let mut rules_with_context = SpeechRulesWithContext::new(&rules, new_package.as_document(), nav_node_id, 0);
39
1.82k
        let braille_string = rules_with_context.match_pattern::<String>(mathml)
40
1.82k
                        .context("Pattern match/replacement failure!")
?0
;
41
        // debug!("braille_mathml: braille string: {}", &braille_string);
42
1.82k
        let braille_string = braille_string.replace(' ', "");
43
1.82k
        let pref_manager = rules_with_context.get_rules().pref_manager.borrow();
44
1.82k
        let highlight_style = pref_manager.pref_to_string("BrailleNavHighlight");
45
1.82k
        let braille_code = pref_manager.pref_to_string("BrailleCode");
46
1.82k
        let braille = match braille_code.as_str() {
47
1.82k
            "Nemeth" => 
nemeth_cleanup888
(
pref_manager888
,
braille_string888
),
48
941
            "UEB" => 
ueb_cleanup366
(
pref_manager366
,
braille_string366
),
49
575
            "Vietnam" => 
vietnam_cleanup112
(
pref_manager112
,
braille_string112
),
50
463
            "CMU" => 
cmu_cleanup372
(
pref_manager372
,
braille_string372
),
51
91
            "Finnish" => 
finnish_cleanup0
(
pref_manager0
,
braille_string0
),
52
91
            "Swedish" => 
swedish_cleanup0
(
pref_manager0
,
braille_string0
),
53
91
            "LaTeX" => 
LaTeX_cleanup50
(
pref_manager50
,
braille_string50
),
54
41
            "ASCIIMath" => ASCIIMath_cleanup(pref_manager, braille_string),
55
0
            "ASCIIMath-fi" => ASCIIMath_cleanup(pref_manager, braille_string),
56
0
            _ => braille_string.trim_matches('⠀').to_string(),    // probably needs cleanup if someone has another code, but this will have to get added by hand
57
        };
58
59
        return Ok(
60
1.82k
            if highlight_style != "Off" {
61
520
                highlight_braille_chars(braille, &braille_code, highlight_style == "All")
62
            } else {
63
1.30k
                let end = braille.len()/3;
64
1.30k
                (braille, 0, end)
65
            }
66
        );
67
1.82k
    });
68
69
    /// highlight with dots 7 & 8 based on the highlight style
70
    /// both the start and stop points will be extended to deal with indicators such as capitalization
71
    /// if 'fill_range' is true, the interior will be highlighted
72
    /// Returns the braille string (highlighted) along with the [start, end) *character* of the highlight (whole string if no highlight)
73
520
    fn highlight_braille_chars(braille: String, braille_code: &str, fill_range: bool) -> (String, usize, usize) {
74
520
        let mut braille = braille;
75
        // some special (non-braille) chars weren't converted to having dots 7 & 8 to indicate navigation position
76
        // they need to be added to the start
77
78
        // find start and end (byte) indexes of the highlighted region (braille chars have length=3 bytes)
79
520
        let start = braille.find(is_highlighted);
80
520
        let end = braille.rfind(is_highlighted);
81
520
        if start.is_none() {
82
57
            assert!(end.is_none());
83
57
            let end = braille.len();
84
57
            return (braille, 0, end/3);
85
463
        };
86
87
463
        let start = start.unwrap();
88
463
        let mut end = end.unwrap() + 3;         // always exists if start exists ('end' is exclusive)
89
        // debug!("braille highlight: start/end={}/{}; braille={}", start/3, end/3, braille);
90
463
        let mut start = highlight_first_indicator(&mut braille, braille_code, start, end);
91
463
        if let Some(
new_range45
) = expand_highlight(&mut braille, braille_code, start, end) {
92
45
            (start, end) = new_range
93
418
        }
94
95
463
        if start == end {
96
0
            return (braille, start/3, end/3);
97
463
        }
98
99
463
        if !fill_range {
100
459
            return (braille, start/3, end/3);
101
4
        }
102
103
4
        let mut result = String::with_capacity(braille.len());
104
4
        result.push_str(&braille[..start]);
105
4
        let highlight_region =&mut braille[start..end];
106
8
        for ch in 
highlight_region4
.
chars4
() {
107
8
            result.push( highlight(ch) );
108
8
        };
109
4
        result.push_str(&braille[end..]);
110
4
        return (result, start/3, end/3);
111
112
        /// Return the byte index of the first place to highlight
113
463
        fn highlight_first_indicator(braille: &mut String, braille_code: &str, start_index: usize, end_index: usize) -> usize {
114
            // chars in the braille block range use 3 bytes -- we can use that to optimize the code some
115
463
            let first_ch = unhighlight(braille_at(braille, start_index));
116
117
            // need to highlight (optional) capital/number, language, and style (max 2 chars) also in that (rev) order
118
463
            let mut prefix_ch_index = std::cmp::max(0, start_index as isize - 5*3) as usize;
119
463
            if prefix_ch_index == 0 && 
braille_code == "UEB"194
{
120
                // don't count the word or passage mode as part of a indicator
121
46
                if braille.starts_with("⠰⠰⠰") {
122
42
                    prefix_ch_index = 9;
123
42
                } else if 
braille.starts_with("⠰⠰")4
{
124
0
                    prefix_ch_index = 6;
125
4
                }
126
417
            }
127
463
            let indicators = &braille[prefix_ch_index..start_index];   // chars to be examined
128
463
            let i_byte_start = start_index - 3 * match braille_code {
129
463
                "Nemeth" => 
i_start_nemeth129
(
indicators129
,
first_ch129
),
130
334
                _ => i_start_ueb(indicators),               // treat all the other like UEB because they probably have similar number and letter prefixes
131
            };
132
463
            if i_byte_start < start_index {
133
                // remove old highlight as long as we don't wipe out the end highlight
134
59
                if start_index < end_index {
135
59
                    let old_first_char_bytes = start_index..start_index+3;
136
59
                    let replacement_str = unhighlight(braille_at(braille, start_index)).to_string();
137
59
                    braille.replace_range(old_first_char_bytes, &replacement_str);
138
59
                
}0
139
140
                // add new highlight
141
59
                let new_first_char_bytes = i_byte_start..i_byte_start+3;
142
59
                let replacement_str = highlight(braille_at(braille, i_byte_start)).to_string();
143
59
                braille.replace_range(new_first_char_bytes, &replacement_str);
144
404
            }
145
146
463
            return i_byte_start;
147
463
        }
148
149
        /// Return the byte indexes of the first and last place to highlight
150
        /// Currently, this only does something for CMU braille
151
463
        fn expand_highlight(braille: &mut String, braille_code: &str, start_index: usize, end_index: usize) -> Option<(usize, usize)> {
152
            // For CMU, we want to expand mrows to include the opening and closing grouping indicators if they exist
153
463
            if start_index == 0 || 
end_index402
== braille.len() ||
braille_code != "CMU"352
{
154
358
                return None;
155
105
            }
156
157
105
            let first_ch = unhighlight(braille_at(braille, start_index));
158
105
            let last_ch = unhighlight(braille_at(braille, end_index-3));
159
            // We need to be careful not to expand the selection if we are already on a grouping indicator
160
105
            if first_ch == '⠢' && 
last_ch == '⠔'0
{
161
0
                return None;
162
105
            }
163
105
            let preceding_ch = braille_at(braille, start_index-3);
164
105
            if preceding_ch != '⠢' {
165
43
                return None;
166
62
            }
167
168
62
            let following_ch = braille_at(braille, end_index);
169
62
            if following_ch != '⠔' {
170
17
                return None;
171
45
            }
172
173
45
            let preceding_ch = highlight(preceding_ch);
174
45
            braille.replace_range(start_index-3..start_index+3, format!("{preceding_ch}{first_ch}").as_str());
175
45
            let following_ch = highlight(following_ch);
176
45
            braille.replace_range(end_index-3..end_index+3, format!("{last_ch}{following_ch}").as_str());
177
45
            return Some( (start_index-3, end_index + 3) );
178
463
        }
179
520
    }
180
181
    /// Given a position in a Nemeth string, what is the position character that starts it (e.g, the prev char for capital letter)
182
129
    fn i_start_nemeth(braille_prefix: &str, first_ch: char) -> usize {
183
0
        fn is_nemeth_number(ch: char) -> bool {
184
0
            matches!(ch, '⠂' | '⠆' | '⠒' | '⠲' | '⠢' | '⠖' | '⠶' | '⠦' | '⠔' | '⠴' | '⠨')
185
0
        }
186
129
        let mut n_chars = 0;
187
129
        let prefix = &mut braille_prefix.chars().rev().peekable();
188
129
        if prefix.peek() == Some(&'⠠') ||  // cap indicator
189
129
           (prefix.peek() == Some(&'⠼') && 
is_nemeth_number0
(
first_ch0
)) || // number indicator
190
129
           [Some(&'⠸'), Some(&'⠈'), Some(&'⠨')].contains(&prefix.peek()) {         // bold, script/blackboard, italic indicator
191
1
            n_chars += 1;
192
1
            prefix.next();
193
128
        } 
194
195
129
        if [Some(&'⠰'), Some(&'⠸'), Some(&'⠨')].contains(&prefix.peek()) {   // English, German, Greek
196
0
            n_chars += 1;
197
129
        } else if prefix.peek() == Some(&'⠈') {  
198
0
            let ch = prefix.next();                              // Russian/Greek Variant
199
0
            if ch == Some('⠈') || ch == Some('⠨') {
200
0
                n_chars += 2;
201
0
            }
202
129
        } else if prefix.peek() == Some(&'⠠')  { // Hebrew 
203
0
            let ch = prefix.next();                              // Russian/Greek Variant
204
0
            if ch == Some('⠠') {
205
0
                n_chars += 2;
206
0
            }
207
129
        };
208
129
        return n_chars;
209
129
    }
210
211
    /// Given a position in a UEB string, what is the position character that starts it (e.g, the prev char for capital letter)
212
334
    fn i_start_ueb(braille_prefix: &str) -> usize {
213
334
        let prefix = &mut braille_prefix.chars().rev().peekable();
214
334
        let mut n_chars = 0;
215
392
        while let Some(
ch320
) = prefix.next() {
216
320
            if is_ueb_prefix(ch) {
217
58
                n_chars += 1;
218
262
            } else if ch == '⠆' {
219
0
                let n_typeform_chars = check_for_typeform(prefix);
220
0
                if n_typeform_chars > 0 {
221
0
                    n_chars += n_typeform_chars;
222
0
                } else {
223
0
                    break;
224
                }
225
            } else {
226
262
                break;
227
            }
228
        }
229
334
        return n_chars;
230
334
    }
231
232
    
233
0
    fn check_for_typeform(prefix: &mut dyn std::iter::Iterator<Item=char>) -> usize {
234
0
        fn is_ueb_typeform_prefix(ch: char) -> bool {
235
0
            matches!(ch, '⠈' | '⠘' | '⠸' | '⠨')
236
0
        }
237
238
0
        if let Some(typeform_indicator) = prefix.next() {
239
0
            if is_ueb_typeform_prefix(typeform_indicator) {
240
0
                return 2;
241
0
            } else if typeform_indicator == '⠼' &&
242
0
                      let Some(user_defined_typeform_indicator) = prefix.next() &&
243
0
                      (is_ueb_typeform_prefix(user_defined_typeform_indicator) || user_defined_typeform_indicator == '⠐') {
244
0
                        return 3;
245
0
                    }
246
0
        }
247
0
        return 0;
248
0
    }
249
1.82k
}
250
251
// FIX: if 8-dot braille is needed, perhaps the highlights can be shifted to a "highlighted" 256 char block in private space 
252
//   they would need to be unshifted for the external world
253
11.0k
fn is_highlighted(ch: char) -> bool {
254
11.0k
    let ch_as_u32 = ch as u32;
255
11.0k
    return (0x28C0..0x28FF).contains(&ch_as_u32) || 
ch == '𝑏'9.99k
; // 0x28C0..0x28FF all have dots 7 & 8 on
256
11.0k
}
257
258
159
fn highlight(ch: char) -> char {
259
    // safe because we have checked the range
260
159
    return unsafe{char::from_u32_unchecked(ch as u32 | 0xC0)};    // 0x28C0..0x28FF all have dots 7 & 8 on
261
159
}
262
263
3.12k
fn unhighlight(ch: char) -> char {
264
3.12k
    let ch_as_u32 = ch as u32;
265
3.12k
    if (0x28C0..0x28FF).contains(&ch_as_u32) {              // 0x28C0..0x28FF all have dots 7 & 8 on
266
903
        return unsafe{char::from_u32_unchecked(ch_as_u32 & 0x283F)};  // safe because we have checked the range
267
    } else {
268
2.22k
        return ch;
269
    }
270
3.12k
}
271
272
use std::cell::RefCell;
273
thread_local!{
274
    /// Count number of probes -- get a sense of how well algorithm is working (for debugging)
275
    static N_PROBES: RefCell<usize> = const { RefCell::new(0) };
276
}
277
278
279
/// Given a 0-based braille position, return the id of the smallest MathML node enclosing it.
280
/// This node might be a leaf with an offset.
281
91
pub fn get_navigation_node_from_braille_position(mathml: Element, position: usize) -> Result<(String, usize)> {
282
    // This works via a "smart" binary search (the trees aren't binary or balanced, we estimate the child to look in):
283
    //   braille the mathml with a nav node and see where 'position' is in relation to the start/end of the nav node
284
    // Each call to find_navigation_node() returns a search state that tell us where to look next if not found
285
    #[derive(Debug, Display)]
286
    enum SearchStatus {
287
        LookInParent,       // look up a level for exact match
288
        LookLeft,           // went too far, backup
289
        LookRight,          // continue searching right
290
        Found,
291
    }
292
293
    struct SearchState<'e> {
294
        status: SearchStatus,
295
        node: Element<'e>,
296
        highlight_start: usize,     // if status is Found, then this is the offset within a leaf node
297
        highlight_end: usize,       // if status is Found, this is ignored
298
    }
299
300
    // save the current highlight state, set the state to be the end points so we can find the braille, then restore the state
301
    // FIX: this can fail if there is 8-dot braille
302
    use crate::interface::{get_preference, set_preference};
303
91
    let saved_highlight_style = get_preference("BrailleNavHighlight").unwrap();
304
91
    set_preference("BrailleNavHighlight", "EndPoints").unwrap();
305
306
91
    N_PROBES.with(|n| {*n.borrow_mut() = 0});
307
    // dive into the child of the <math> element (should only be one)
308
91
    let search_state = find_navigation_node(mathml, as_element(mathml.children()[0]), position)
?0
;
309
91
    set_preference("BrailleNavHighlight", saved_highlight_style.as_str()).unwrap();
310
311
    // we know the attr value exists because it was found internally
312
    // FIX: what should be done if we never did the search?
313
91
    match search_state.status {
314
        SearchStatus::Found | SearchStatus::LookInParent => {
315
86
            return Ok( (search_state.node.attribute_value("id").unwrap().to_string(), search_state.highlight_start) )
316
        },
317
        _ => {
318
            // weird state -- return the entire expr
319
5
            match mathml.attribute_value("id") {
320
0
                None => bail!("'id' is not present on mathml: {}", mml_to_string(mathml)),
321
5
                Some(id) => return Ok( (id.to_string(), 0) ),
322
            }
323
        }
324
    } 
325
326
    /// find the navigation node that most tightly encapsulates the target position (0-based)
327
    /// 'node' is the current node we are on inside of 'mathml'
328
465
    fn find_navigation_node<'e>(mathml: Element<'e>, node: Element<'e>, target_position: usize) -> Result<SearchState<'e>> {
329
465
        let node_id = match node.attribute_value("id") {
330
465
            Some(id) => id,
331
0
            None => bail!("'id' is not present on mathml: {}", mml_to_string(node)),
332
        };
333
465
        N_PROBES.with(|n| {*n.borrow_mut() += 1});
334
465
        let (braille, char_start, char_end) = braille_mathml(mathml, node_id)
?0
;
335
465
        let mut status = None;
336
        // debug!("find_navigation_node ({}, id={}): highlight=[{}, {});  target={}", name(node), node_id, char_start, char_end, target_position);
337
465
        if is_leaf(node) {
338
100
            if char_start == 0 && 
char_end10
== braille.len()/3 {
339
6
                // nothing highlighted -- probably invisible char not represented in braille -- continue looking to the right
340
6
                // debug!("  return due invisible char (?)' ");
341
6
                status = Some(SearchStatus::LookRight);
342
94
            } else if char_start <= target_position && 
target_position < char_end88
{
343
                // FIX: need to handle multi-char leaves and set the offset (char_start) appropriately
344
                // debug!("  return due to target_position inside leaf: {} <= {} < {}", char_start, target_position, char_end);
345
58
                return Ok( SearchState {
346
58
                    status: SearchStatus::Found,
347
58
                    node,
348
58
                    highlight_start: target_position - char_start,
349
58
                    highlight_end: 0,
350
58
                });
351
36
            } else if name(node) == "mo" {
352
                // if there is whitespace before or after the operator, consider the operator to be a match
353
18
                if (char_start > 0 && target_position == char_start - 1 && 
354
2
                    braille_at(&braille, 3*(char_start - 1)) == '⠀' && is_operator_that_adds_whitespace(node)) ||
355
16
                   (3*char_end < braille.len() && target_position == char_end &&
356
11
                    braille_at(&braille, 3*char_end) == '⠀' && 
is_operator_that_adds_whitespace2
(
node2
)) {
357
4
                    return Ok( SearchState {
358
4
                        status: SearchStatus::Found,
359
4
                        node,
360
4
                        highlight_start: 0,
361
4
                        highlight_end: 0,
362
4
                    } );
363
14
                }
364
18
            }
365
365
        }
366
403
        if status.is_none() {
367
397
            if target_position < char_start {
368
23
                // debug!("  return due to target_position {} < start {}", target_position, char_start);
369
23
                status = Some(SearchStatus::LookLeft);
370
374
            } else if target_position >= char_end {
371
49
                // debug!("  return due to target_position {} >= end {}", target_position, char_end);
372
49
                status = Some(SearchStatus::LookRight);
373
325
            }
374
6
        }
375
403
        if let Some(
status78
) = status {
376
78
            return Ok( SearchState {
377
78
                status,
378
78
                node,
379
78
                highlight_start: char_start,
380
78
                highlight_end: char_end,
381
78
            } );
382
325
        }
383
384
325
        let children = node.children();
385
325
        let mut i_left_child = 0;                         // inclusive
386
325
        let mut i_right_child = children.len();           // exclusive
387
325
        let mut call_start = char_start;
388
325
        let mut guess_fn: Box<dyn Fn(usize, usize, usize, usize) -> usize> = Box::new(|i_left, i_right, start, target: usize| guess_child_node_ltr(&children, i_left, i_right, start, target));
389
398
        while i_left_child < i_right_child {
390
374
            let i_guess_child = guess_fn(i_left_child, i_right_child, call_start, target_position);
391
374
            let status = find_navigation_node(mathml, as_element(children[i_guess_child]), target_position)
?0
;
392
            // debug!("  in {} loop: status: {}, child: left/guess/right {}/({},{})/{}; highlight=[{}, {})", 
393
            //         name(node), status.status,
394
            //         i_left_child, i_guess_child, name(as_element(children[i_guess_child])),i_right_child,
395
            //         status.highlight_start, status.highlight_end);
396
374
            match status.status {
397
                SearchStatus::Found => {
398
301
                    return Ok(status);
399
                },
400
                SearchStatus::LookInParent => {
401
0
                    let (_, start, end) = braille_mathml(mathml, node_id)?;
402
                    // debug!("  parent ({}) braille: start/end={}/{};  target_position={}", name(node), start, end, target_position);
403
0
                    if start <= target_position && target_position < end {
404
                        // debug!("  ..found: id={}", node_id);
405
0
                        return Ok( SearchState{
406
0
                            status: SearchStatus::Found,
407
0
                            node,
408
0
                            highlight_start: 0,
409
0
                            highlight_end: 0,
410
0
                        } );      // done or look up another level
411
0
                    }
412
0
                    return Ok(status);  // look up a level
413
                },
414
                SearchStatus::LookLeft => {
415
20
                    i_right_child = if i_guess_child == 0 {
09
} else {
i_guess_child11
}; // exclusive
416
20
                    call_start = status.highlight_start-1;
417
20
                    guess_fn = Box::new(|i_left, i_right, start, target| 
guess_child_node_rtl7
(
&children7
,
i_left7
,
i_right7
,
start7
,
target7
));
418
                },
419
                SearchStatus::LookRight => {
420
53
                    i_left_child = i_guess_child+1;
421
53
                    call_start = status.highlight_end+1;
422
53
                    guess_fn = Box::new(|i_left, i_right, start, target| 
guess_child_node_ltr42
(
&children42
,
i_left42
,
i_right42
,
start42
,
target42
));
423
                },
424
            }
425
        }
426
        // debug!("Didn't child in node {}: left/right={}/{};  target_position={}", name(node), i_left_child, i_right_child, target_position);
427
428
        // if we get here, we didn't find it in the children
429
        // debug!("..end of loop: look in parent of {} has start/end={}/{}", name(node), char_start, char_end);
430
        return Ok( SearchState{
431
24
            status: if char_start <= target_position && target_position <= char_end {SearchStatus::Found} else {
SearchStatus::LookInParent0
},
432
24
            node,
433
            highlight_start: 0,
434
            highlight_end: 0,
435
        } );
436
465
    }
437
438
4
    fn is_operator_that_adds_whitespace(node: Element) -> bool {
439
        use crate::definitions::BRAILLE_DEFINITIONS;
440
4
        if PreferenceManager::get().borrow().pref_to_string("UseSpacesAroundAllOperators") == "true" {
441
0
            return true;
442
4
        } 
443
444
4
        return BRAILLE_DEFINITIONS.with(|definitions| {
445
4
            let definitions = definitions.borrow();
446
4
            let comparison_operators = definitions.get_hashset("ComparisonOperators").unwrap();
447
4
            return comparison_operators.contains(as_text(node));
448
4
        });        
449
4
    }
450
451
    /// look in children[i_left..i_right] for a count that exceeds target
452
367
    fn guess_child_node_ltr(children: &[ChildOfElement], i_left: usize, i_right: usize, start: usize, target: usize) -> usize {
453
367
        let mut estimated_position = start;
454
        // number of chars to add for number indicators
455
367
        let n_number_indicator = if PreferenceManager::get().borrow().pref_to_string("BrailleCode") == "Nemeth" {
0106
} else {
1261
}; // Nemeth doesn't typically need number or letter indicators
456
        #[allow(clippy::needless_range_loop)]  // I don't like enumerate/take/skip here
457
666
        for i in 
i_left..i_right367
{
458
666
            estimated_position += estimate_braille_chars(children[i], n_number_indicator);
459
666
            if estimated_position >= target {
460
344
                return i;
461
322
            }
462
        }
463
23
        return i_right-1;       // estimate was too large, return the last child as a guess
464
367
    }
465
466
    /// look in children[i_left..i_right].rev for a count that is less than target
467
7
    fn guess_child_node_rtl(children: &[ChildOfElement], i_left: usize, i_right: usize, start: usize, target: usize) -> usize {
468
7
        let mut estimated_position = start;
469
7
        let n_number_indicator = if PreferenceManager::get().borrow().pref_to_string("BrailleCode") == "Nemeth" {
01
} else {
16
}; // Nemeth doesn't typically need number or letter indicators
470
7
        for i in (i_left..i_right).rev() {
471
7
            estimated_position -= estimate_braille_chars(children[i], n_number_indicator);
472
7
            if estimated_position <= target {
473
7
                return i;
474
0
            }
475
        }
476
0
        return i_left;       // estimate was too small, return the first child as a guess
477
7
    }
478
479
4.58k
    fn estimate_braille_chars(child: ChildOfElement, n_number_indicator: usize) -> usize {
480
4.58k
        let node = as_element(child);
481
4.58k
        let leaf_name = name(node);
482
4.58k
        if is_leaf(node) {
483
3.13k
            let text = as_text(node);
484
            // len() is close since mn's probably have ASCII digits and lower case vars are common (count as) and other chars need extra braille chars
485
            // don't want to count invisible chars since they don't display and would give a length = 3
486
3.13k
            if text == "\u{2061}" || text == "\u{2062}"  {       // invisible function apply/times (most common by far)
487
597
                return 0;
488
2.53k
            }
489
            // FIX: this assumption is bad for 8-dot braille
490
2.53k
            return match leaf_name {
491
2.53k
                "mn" => 
n_number_indicator632
+ text.len(),
492
1.90k
                "mo" => 
2741
, // could do better by actually brailling char, but that is more expensive
493
1.16k
                _ => text.len(),
494
            }
495
1.45k
        }
496
1.45k
        let mut estimate = if leaf_name == "mrow" {
0924
} else {
node.children().len() + 1526
}; // guess extra chars need for mfrac, msub, etc (start+intermediate+end).
497
1.45k
        if leaf_name == "msup" || 
leaf_name == "msub"1.19k
||
leaf_name == "msubsup"1.19k
{
498
260
            estimate -= 1;   // opening superscript/subscript indicator not needed
499
1.19k
        }
500
3.91k
        for child in 
node1.45k
.
children1.45k
() {
501
3.91k
            estimate += estimate_braille_chars(child, n_number_indicator);
502
3.91k
        }
503
        // debug!("estimate_braille_chars for {}: {}", crate::canonicalize::element_summary(as_element(child)), estimate);
504
1.45k
        return estimate;
505
4.58k
    }
506
91
}
507
508
888
fn nemeth_cleanup(pref_manager: Ref<PreferenceManager>, raw_braille: String) -> String {
509
    // Typeface: S: sans-serif, B: bold, T: script/blackboard, I: italic, R: Roman
510
    // Language: E: English, D: German, G: Greek, V: Greek variants, H: Hebrew, U: Russian
511
    // Indicators: C: capital, N: number, P: punctuation, M: multipurpose
512
    // Others:
513
    //      W -- whitespace that should be kept (e.g, in a numeral)
514
    //      𝑁 -- hack for special case of a lone decimal pt -- not considered a number but follows rules mostly 
515
    // SRE doesn't have H: Hebrew or U: Russian, so not encoded (yet)
516
    // Note: some "positive" patterns find cases to keep the char and transform them to the lower case version
517
    static NEMETH_INDICATOR_REPLACEMENTS: phf::Map<&str, &str> = phf_map! {
518
        "S" => "⠠⠨",    // sans-serif
519
        "B" => "⠸",     // bold
520
        "𝔹" => "⠨",     // blackboard
521
        "T" => "⠈",     // script
522
        "I" => "⠨",     // italic (mapped to be the same a blackboard)
523
        "R" => "",      // roman
524
        "E" => "⠰",     // English
525
        "D" => "⠸",     // German (Deutsche)
526
        "G" => "⠨",     // Greek
527
        "V" => "⠨⠈",    // Greek Variants
528
        "H" => "⠠⠠",    // Hebrew
529
        "U" => "⠈⠈",    // Russian
530
        "C" => "⠠",     // capital
531
        "P" => "⠸",     // punctuation
532
        "𝐏" => "⠸",     // hack for punctuation after a roman numeral -- never removed
533
        "L" => "",      // letter
534
        "l" => "",      // letter inside enclosed list
535
        "M" => "",      // multipurpose indicator
536
        "m" => "⠐",     // required multipurpose indicator
537
        "N" => "",      // potential number indicator before digit
538
        "n" => "⠼",     // required number indicator before digit
539
        "𝑁" => "",      // hack for special case of a lone decimal pt -- not considered a number but follows rules mostly
540
        "W" => "⠀",     // whitespace
541
        "w" => "⠀",     // whitespace from comparison operator
542
        "," => "⠠⠀",    // comma
543
        "b" => "⠐",     // baseline
544
        "𝑏" => "⣐",     // highlight baseline (it's a hack)
545
        "↑" => "⠘",     // superscript
546
        "↓" => "⠰",     // subscript
547
    };
548
549
    // Add an English Letter indicator. This involves finding "single letters".
550
    // The green book has a complicated set of cases, but the Nemeth UEB Rule book (May 2020), 4.10 has a much shorter explanation:
551
    //   punctuation or whitespace on the left and right ignoring open/close chars
552
    //   https://nfb.org/sites/www.nfb.org/files/files-pdf/braille-certification/lesson-4--provisional-5-9-20.pdf
553
2
    static ADD_ENGLISH_LETTER_INDICATOR: LazyLock<Regex> = LazyLock::new(|| {
554
2
        Regex::new(r"(?P<start>^|W|P.[\u2800-\u28FF]?|,)(?P<open>[\u2800-\u28FF]?⠷)?(?P<letter>C?L.)(?P<close>[\u2800-\u28FF]?⠾)?(?P<end>W|P|,|$)").unwrap()
555
2
    });
556
        
557
    // Trim braille spaces before and after braille indicators
558
    // In order: fraction, /, cancellation, letter, baseline
559
    // Note: fraction over is not listed due to example 42(4) which shows a space before the "/"
560
    static REMOVE_SPACE_BEFORE_BRAILLE_INDICATORS: LazyLock<Regex> = 
561
2
        LazyLock::new(|| Regex::new(r"(⠄⠄⠄|⠤⠤⠤⠤)[Ww]+([⠼⠸⠪])").unwrap());
562
    static REMOVE_SPACE_AFTER_BRAILLE_INDICATORS: LazyLock<Regex> =
563
2
        LazyLock::new(|| Regex::new(r"([⠹⠻Llb])[Ww]+(⠄⠄⠄|⠤⠤⠤⠤)").unwrap());
564
565
    // Hack to convert non-numeric '.' to numeric '.'
566
    // The problem is that the numbers are hidden inside of mover -- this might be more general than rule 99_2.
567
2
    static DOTS_99_A_2: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"𝑁⠨mN").unwrap());
568
569
    // Punctuation is one or two chars. There are (currently) only 3 2-char punct chars (—‘’) -- we explicitly list them below
570
    static REMOVE_SPACE_BEFORE_PUNCTUATION_151: LazyLock<Regex> =
571
2
        LazyLock::new(|| Regex::new(r"w(P.[⠤⠦⠠]?|[\u2800-\u28FF]?⠾)").unwrap());
572
    static REMOVE_SPACE_AFTER_PUNCTUATION_151: LazyLock<Regex> =
573
2
        LazyLock::new(|| Regex::new(r"(P.[⠤⠦⠠]?|[\u2800-\u28FF]?⠷)w").unwrap());
574
575
    // Multipurpose indicator insertion
576
    // 149 -- consecutive comparison operators have no space -- instead a multipurpose indicator is used (doesn't require a regex)
577
578
    // 177.2 -- add after a letter and before a digit (or decimal pt) -- digits will start with N
579
2
    static MULTI_177_2: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"([Ll].)[N𝑁]").unwrap());
580
581
    // keep between numeric subscript and digit ('M' added by subscript rule)
582
2
    static MULTI_177_3: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"([N𝑁].)M([N𝑁].)").unwrap());
583
584
    // Add after decimal pt for non-digits except for comma and punctuation
585
    // Note: since "." can be in the middle of a number, there is not necessarily a "N"
586
    // Although not mentioned in 177_5, don't add an 'M' before an 'm'
587
    static MULTI_177_5: LazyLock<Regex> =
588
2
        LazyLock::new(|| Regex::new(r"([N𝑁]⠨)([^⠂⠆⠒⠲⠢⠖⠶⠦⠔N𝑁,Pm])").unwrap());
589
590
    // Pattern for rule II.9a (add numeric indicator at start of line or after a space)
591
    // 1. start of line
592
    // 2. optional minus sign (⠤)
593
    // 3. optional typeface indicator
594
    // 4. number (N)
595
2
    static NUM_IND_9A: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(?P<start>^|[,Ww])(?P<minus>⠤?)N").unwrap());
596
597
    // Needed after section mark(§), paragraph mark(¶), #, or *
598
2
    static NUM_IND_9C: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(⠤?)(⠠⠷|⠠⠳|⠠⠈⠷)N").unwrap());
599
600
    // Needed after section mark(§), paragraph mark(¶), #, or *
601
2
    static NUM_IND_9D: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(⠈⠠⠎|⠈⠠⠏|⠨⠼|⠈⠼)N").unwrap());
602
603
    // Needed after a typeface change or interior shape modifier indicator
604
2
    static NUM_IND_9E: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(?P<face>[SB𝔹TIR]+?)N").unwrap());
605
2
    static NUM_IND_9E_SHAPE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(?P<mod>⠸⠫)N").unwrap());
606
607
    // Needed after hyphen that follows a word, abbreviation, or punctuation (caution about rule 11d)
608
    // Note -- hyphen might encode as either "P⠤" or "⠤" depending on the tag used
609
2
    static NUM_IND_9F: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"([Ll].[Ll].|P.)(P?⠤)N").unwrap());
610
611
    // Enclosed list exception
612
    // Normally we don't add numeric indicators in enclosed lists (done in get_braille_nemeth_chars).
613
    // The green book says "at the start" of an item, don't add the numeric indicator.
614
    // The NFB list exceptions after function abbreviations and angles, but what this really means is "after a space"
615
2
    static NUM_IND_ENCLOSED_LIST: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"w([⠂⠆⠒⠲⠢⠖⠶⠦⠔⠴])").unwrap());
616
617
    // Punctuation chars (Rule 38.6 says don't use before ",", "hyphen", "-", "…")
618
    // Never use punctuation indicator before these (38-6)
619
    //      "…": "⠀⠄⠄⠄"
620
    //      "-": "⠸⠤" (hyphen and dash)
621
    //      ",": "⠠⠀"     -- spacing already added
622
    // Rule II.9b (add numeric indicator after punctuation [optional minus[optional .][digit]
623
    //  because this is run after the above rule, some cases are already caught, so don't
624
    //  match if there is already a numeric indicator
625
2
    static NUM_IND_9B: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(?P<punct>P..?)(?P<minus>⠤?)N").unwrap());
626
627
    // Before 79b (punctuation)
628
2
    static REMOVE_LEVEL_IND_BEFORE_SPACE_COMMA_PUNCT: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(?:[↑↓]+[b𝑏]?|[b𝑏])([Ww,P]|$)").unwrap());
629
630
    // Most commas have a space after them, but not when followed by a close quote (others?)
631
2
    static NO_SPACE_AFTER_COMMA: LazyLock<Regex> = LazyLock::new(|| Regex::new(r",P⠴").unwrap()); // captures both single and double close quote
632
2
    static REMOVE_LEVEL_IND_BEFORE_BASELINE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(?:[↑↓mb𝑏]+)([b𝑏])").unwrap());
633
634
    // Except for the four chars above, the unicode rules always include a punctuation indicator.
635
    // The cases to remove them (that seem relevant to MathML) are:
636
    //   Beginning of line or after a space (V 38.1)
637
    //   After a word (38.4)
638
    //   2nd or subsequent punctuation (includes, "-", etc) (38.7)
639
2
    static REMOVE_AFTER_PUNCT_IND: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(^|[Ww]|[Ll].[Ll].)P(.)").unwrap());
640
2
    static REPLACE_INDICATORS: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"([SB𝔹TIREDGVHUP𝐏CLlMmb𝑏↑↓Nn𝑁Ww,])").unwrap());
641
2
    static COLLAPSE_SPACES: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"⠀⠀+").unwrap());
642
643
//   debug!("Before:  \"{}\"", raw_braille);
644
    // replacements might overlap at boundaries (e.g., whitespace) -- need to repeat
645
888
    let mut start = 0;
646
888
    let mut result = String::with_capacity(raw_braille.len()+ raw_braille.len()/4);  // likely upper bound
647
923
    while let Some(
matched35
) = ADD_ENGLISH_LETTER_INDICATOR.find_at(&raw_braille, start) {
648
35
        result.push_str(&raw_braille[start..matched.start()]);
649
35
        let replacement = ADD_ENGLISH_LETTER_INDICATOR.replace(
650
35
                &raw_braille[matched.start()..matched.end()], "${start}${open}E${letter}${close}");
651
35
        // debug!("matched='{}', start/end={}/{}; replacement: {}", &raw_braille[matched.start()..matched.end()], matched.start(), matched.end(), replacement);
652
35
        result.push_str(&replacement);
653
35
        // put $end back on because needed for next match (e.g., whitespace at end and then start of next match)
654
35
        // but it could also match because it was at the end, in which case "-1" is wrong -- tested after loop for that
655
35
        start = matched.end() - 1;
656
35
    }
657
888
    if !raw_braille.is_empty() && ( start < raw_braille.len()-1 || 
"WP,"8
.
contains8
(
raw_braille.chars()8
.
nth_back8
(0).
unwrap8
()) ) { // see comment about $end above
658
882
        result.push_str(&raw_braille[start..]);
659
882
    
}6
660
//   debug!("ELIs:    \"{}\"", result);
661
662
888
    let result = NUM_IND_ENCLOSED_LIST.replace_all(&result, "wn${1}");
663
664
    // Remove blanks before and after braille indicators
665
888
    let result = REMOVE_SPACE_BEFORE_BRAILLE_INDICATORS.replace_all(&result, "$1$2");
666
888
    let result = REMOVE_SPACE_AFTER_BRAILLE_INDICATORS.replace_all(&result, "$1$2");
667
668
888
    let result = REMOVE_SPACE_BEFORE_PUNCTUATION_151.replace_all(&result, "$1");
669
888
    let result = REMOVE_SPACE_AFTER_PUNCTUATION_151.replace_all(&result, "$1");
670
//   debug!("spaces:  \"{}\"", result);
671
672
888
    let result = DOTS_99_A_2.replace_all(&result, "N⠨mN");
673
674
    // Multipurpose indicator
675
888
    let result = result.replace("ww", "m"); // 149
676
888
    let result = MULTI_177_2.replace_all(&result, "${1}m${2}");
677
888
    let result = MULTI_177_3.replace_all(&result, "${1}m$2");
678
888
    let result = MULTI_177_5.replace_all(&result, "${1}m$2");
679
//   debug!("MULTI:   \"{}\"", result);
680
681
888
    let result = NUM_IND_9A.replace_all(&result, "${start}${minus}n");
682
    // debug!("IND_9A:  \"{}\"", result);
683
888
    let result = NUM_IND_9C.replace_all(&result, "${1}${2}n");
684
888
    let result = NUM_IND_9D.replace_all(&result, "${1}n");
685
888
    let result = NUM_IND_9E.replace_all(&result, "${face}n");
686
888
    let result = NUM_IND_9E_SHAPE.replace_all(&result, "${mod}n");
687
888
    let result = NUM_IND_9F.replace_all(&result, "${1}${2}n");
688
689
//   debug!("IND_9F:  \"{}\"", result);
690
691
    // 9b: insert after punctuation (optional minus sign)
692
    // common punctuation adds a space, so 9a handled it. Here we deal with other "punctuation" 
693
    // FIX other punctuation and reference symbols (9d)
694
888
    let result = NUM_IND_9B.replace_all(&result, "$punct${minus}n");
695
//   debug!("A PUNCT: \"{}\"", &result);
696
697
    // strip level indicators
698
    // check first to remove level indicators before baseline, then potentially remove the baseline
699
888
    let mut result = REMOVE_LEVEL_IND_BEFORE_BASELINE.replace_all(&result, "$1");
700
//   debug!("Punct  : \"{}\"", &result);
701
    // checks for punctuation char, so needs to before punctuation is stripped.
702
    // if '𝑏' is removed, then the highlight needs to be shifted to the left in some cases
703
888
    let result = remove_baseline_before_space_or_punctuation(&mut result);
704
//   debug!("Removed: \"{}\"", &result);
705
706
888
    let result = NO_SPACE_AFTER_COMMA.replace_all(&result, "⠠P⠴");
707
708
888
    let result = REMOVE_AFTER_PUNCT_IND.replace_all(&result, "$1$2");
709
//   debug!("Punct38: \"{}\"", &result);
710
711
    // these typeforms need to get pulled from user-prefs as they are transcriber-defined
712
888
    let sans_serif = pref_manager.pref_to_string("Nemeth_SansSerif");
713
888
    let bold = pref_manager.pref_to_string("Nemeth_Bold");
714
888
    let double_struck = pref_manager.pref_to_string("Nemeth_DoubleStruck");
715
888
    let script = pref_manager.pref_to_string("Nemeth_Script");
716
888
    let italic = pref_manager.pref_to_string("Nemeth_Italic");
717
718
7.57k
    let 
result888
=
REPLACE_INDICATORS888
.
replace_all888
(
&result888
, |cap: &Captures| {
719
7.57k
        let matched_char = &cap[0];
720
7.57k
        match matched_char {
721
7.57k
            "S" => 
&sans_serif2
,
722
7.57k
            "B" => 
&bold47
,
723
7.52k
            "𝔹" => 
&double_struck28
,
724
7.49k
            "T" => 
&script6
,
725
7.49k
            "I" => 
&italic2
,
726
7.48k
            _ => match NEMETH_INDICATOR_REPLACEMENTS.get(&cap[0]) {
727
0
                None => {error!("REPLACE_INDICATORS and NEMETH_INDICATOR_REPLACEMENTS are not in sync"); ""},
728
7.48k
                Some(&ch) => ch,
729
            }
730
        }
731
7.57k
    });
732
733
    // Remove unicode blanks at start and end -- do this after the substitutions because ',' introduces spaces
734
888
    let result = result.trim_start_matches('⠀').trim_end_matches('⠀');
735
888
    let result = COLLAPSE_SPACES.replace_all(result, "⠀");
736
   
737
888
    return result.to_string();
738
739
888
    fn remove_baseline_before_space_or_punctuation<'a>(braille: &'a mut Cow<'a, str>) -> Cow<'a, str> {
740
        // If the baseline highlight is at the end of the string and it is going to be deleted by the regex,
741
        //   then we need to shift the highlight to the left if what is to it's left is not whitespace (which should never be a highlight end)
742
        // This only happens when BrailleNavHighlight == "EndPoints".
743
888
        let highlight_style = PreferenceManager::get().borrow().pref_to_string("BrailleNavHighlight");
744
888
        if highlight_style == "EndPoints" &&
745
132
            let Some(
last_highlighted129
) = braille.rfind(is_highlighted) &&
746
129
            braille[last_highlighted..].starts_with('𝑏') {
747
7
                    let i_after_baseline = last_highlighted + '𝑏'.len_utf8();
748
7
                    if i_after_baseline == braille.len() || 
braille[i_after_baseline..]5
.
starts_with5
(
['W', 'w', ',', 'P']5
) {
749
                        // shift the highlight to the left after doing just the replacement (if any) that the regex below does
750
                        // the shift runs until a non blank braille char is found
751
2
                        let mut bytes_deleted = 0;
752
2
                        let mut char_to_highlight = "".to_string();   // illegal value
753
2
                        for ch in braille[..last_highlighted].chars().rev() {
754
2
                            bytes_deleted += ch.len_utf8();
755
2
                            if (0x2801..0x28FF).contains(&(ch as u32)) {
756
2
                                char_to_highlight = highlight(ch).to_string();
757
2
                                break;
758
0
                            }
759
                        }
760
2
                        braille.to_mut().replace_range(last_highlighted-bytes_deleted..last_highlighted+'𝑏'.len_utf8(),
761
2
                                                        &char_to_highlight);
762
5
                    }
763
881
                }
764
888
        return REMOVE_LEVEL_IND_BEFORE_SPACE_COMMA_PUNCT.replace_all(braille, "$1");
765
766
888
    }
767
888
}
768
769
// Typeface: S: sans-serif, B: bold, T: script/blackboard, I: italic, R: Roman
770
// Language: E: English, D: German, G: Greek, V: Greek variants, H: Hebrew, U: Russian
771
// Indicators: C: capital, N: number, P: punctuation, M: multipurpose
772
// Others:
773
//      W -- whitespace that should be kept (e.g, in a numeral)
774
//      𝑁 -- hack for special case of a lone decimal pt -- not considered a number but follows rules mostly 
775
// Note: some "positive" patterns find cases to keep the char and transform them to the lower case version
776
static UEB_INDICATOR_REPLACEMENTS: phf::Map<&str, &str> = phf_map! {
777
    "S" => "XXX",    // sans-serif -- from prefs
778
    "B" => "⠘",     // bold
779
    "𝔹" => "XXX",     // blackboard -- from prefs
780
    "T" => "⠈",     // script
781
    "I" => "⠨",     // italic
782
    "R" => "",      // roman
783
    // "E" => "⠰",     // English
784
    "1" => "⠰",      // Grade 1 symbol
785
    "𝟙" => "⠰⠰",     // Grade 1 word
786
    "L" => "",       // Letter left in to assist in locating letters
787
    "D" => "XXX",    // German (Deutsche) -- from prefs
788
    "G" => "⠨",      // Greek
789
    "V" => "⠨⠈",     // Greek Variants
790
    // "H" => "⠠⠠",  // Hebrew
791
    // "U" => "⠈⠈",  // Russian
792
    "C" => "⠠",      // capital
793
    "𝐶" => "⠠",      // capital that never should get word indicator (from chemical element)
794
    "N" => "⠼",     // number indicator
795
    "t" => "⠱",     // shape terminator
796
    "W" => "⠀",     // whitespace
797
    "𝐖"=> "⠀",     // whitespace (hard break -- basically, it separates exprs)
798
    "s" => "⠆",     // typeface single char indicator
799
    "w" => "⠂",     // typeface word indicator
800
    "e" => "⠄",     // typeface & capital terminator 
801
    "o" => "",       // flag that what follows is an open indicator (used for standing alone rule)
802
    "c" => "",       // flag that what follows is an close indicator (used for standing alone rule)
803
    "b" => "",       // flag that what follows is an open or close indicator (used for standing alone rule)
804
    "," => "⠂",     // comma
805
    "." => "⠲",     // period
806
    "-" => "-",     // hyphen
807
    "—" => "⠠⠤",   // normal dash (2014) -- assume all normal dashes are unified here [RUEB appendix 3]
808
    "―" => "⠐⠠⠤",  // long dash (2015) -- assume all long dashes are unified here [RUEB appendix 3]
809
    "#" => "",      // signals end of script
810
    // '(', '{', '[', '"', '\'', '“', '‘', '«',    // opening chars
811
    // ')', '}', ']', '\"', '\'', '”', '’', '»',           // closing chars
812
    // ',', ';', ':', '.', '…', '!', '?'                    // punctuation           
813
814
};
815
816
// static LETTERS: phf::Set<char> = phf_set! {
817
//     '⠁', '⠃', '⠉', '⠙', '⠑', '⠋', '⠛', '⠓', '⠊', '⠚', '⠅', '⠇', '⠍', 
818
//     '⠝', '⠕', '⠏', '⠟', '⠗', '⠎', '⠞', '⠥', '⠧', '⠺', '⠭', '⠽', '⠵',
819
// };
820
821
2.39k
fn is_letter_number(ch: char) -> bool {
822
2.39k
    
matches!986
(ch, '⠁' | '⠃' | '⠉' | '⠙' | '⠑' | '⠋' | '⠛' | '⠓' | '⠊' | '⠚')
823
2.39k
}
824
825
static SHORT_FORMS: phf::Set<&str> = phf_set! {
826
    "L⠁L⠃", "L⠁L⠃L⠧", "L⠁L⠉", "L⠁L⠉L⠗", "L⠁L⠋",
827
    "L⠁L⠋L⠝", "L⠁L⠋L⠺", "L⠁L⠛", "L⠁L⠛L⠌", "L⠁L⠇",
828
     "L⠁L⠇L⠍", "L⠁L⠇L⠗", "L⠁L⠇L⠞", "L⠁L⠇L⠹", "L⠁L⠇L⠺",
829
     "L⠃L⠇", "L⠃L⠗L⠇", "L⠉L⠙", "L⠙L⠉L⠇", "L⠙L⠉L⠇L⠛",
830
     "L⠙L⠉L⠧", "L⠙L⠉L⠧L⠛", "L⠑L⠊", "L⠋L⠗", "L⠋L⠌", "L⠛L⠙",
831
     "L⠛L⠗L⠞", "L⠓L⠍", "L⠓L⠍L⠋", "L⠓L⠻L⠋", "L⠊L⠍L⠍", "L⠇L⠇", "L⠇L⠗",
832
     "L⠍L⠽L⠋", "L⠍L⠡", "L⠍L⠌", "L⠝L⠑L⠉", "L⠝L⠑L⠊", "L⠏L⠙",
833
     "L⠏L⠻L⠉L⠧", "L⠏L⠻L⠉L⠧L⠛", "L⠏L⠻L⠓", "L⠟L⠅", "L⠗L⠉L⠧",
834
     "L⠗L⠉L⠧L⠛", "L⠗L⠚L⠉", "L⠗L⠚L⠉L⠛", "L⠎L⠙", "L⠎L⠡", "L⠞L⠙",
835
     "L⠞L⠛L⠗", "L⠞L⠍", "L⠞L⠝", "L⠭L⠋", "L⠭L⠎", "L⠽L⠗", "L⠽L⠗L⠋",
836
     "L⠽L⠗L⠧L⠎", "L⠮L⠍L⠧L⠎", "L⠡L⠝", "L⠩L⠙", "L⠹L⠽L⠋", "L⠳L⠗L⠧L⠎",
837
     "L⠺L⠙", "L⠆L⠉", "L⠆L⠋", "L⠆L⠓", "L⠆L⠇", "L⠆L⠝", "L⠆L⠎", "L⠆L⠞",
838
     "L⠆L⠽", "L⠒L⠉L⠧", "L⠒L⠉L⠧L⠛", "L⠐L⠕L⠋"
839
};
840
841
1.75k
fn is_letter_prefix(ch: char) -> bool {
842
1.75k
    
matches!1.61k
(ch, 'B' | 'I' | '𝔹' | 'S' | 'T' | 'D' | 'C' | '𝐶' | '𝑐')
843
1.75k
}
844
845
// Trim braille spaces before and after braille indicators
846
// In order: fraction, /, cancellation, letter, baseline
847
// Note: fraction over is not listed due to example 42(4) which shows a space before the "/"
848
// static ref REMOVE_SPACE_BEFORE_BRAILLE_INDICATORS: Regex =
849
//     Regex::new(r"(⠄⠄⠄|⠤⠤⠤)W+([⠼⠸⠪])").unwrap();
850
2
static REPLACE_INDICATORS: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"([1𝟙SB𝔹TIREDGVHP𝐶𝑐CLMNW𝐖swe,.-—―#ocb])").unwrap());
851
2
static COLLAPSE_SPACES: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"⠀⠀+").unwrap());
852
853
35
fn is_short_form(chars: &[char]) -> bool {
854
204
    let 
chars_as_string35
=
chars35
.
iter35
().
map35
(|ch| ch.to_string()).
collect35
::<String>();
855
35
    return SHORT_FORMS.contains(&chars_as_string);
856
35
}
857
858
366
fn ueb_cleanup(pref_manager: Ref<PreferenceManager>, raw_braille: String) -> String {
859
    // debug!("ueb_cleanup: start={}", raw_braille);
860
366
    let result = typeface_to_word_mode(&raw_braille);
861
366
    let result = capitals_to_word_mode(&result);
862
863
366
    let use_only_grade1 = pref_manager.pref_to_string("UEB_START_MODE").as_str() == "Grade1";
864
    
865
    // '𝐖' is a hard break -- basically, it separates exprs
866
366
    let mut result = result.split('𝐖')
867
370
                        .
map366
(|str| pick_start_mode(str, use_only_grade1) + "W")
868
366
                        .collect::<String>();
869
366
    result.pop();   // we added a 'W' at the end that needs to be removed.
870
871
366
    let result = result.replace("tW", "W");
872
873
    // these typeforms need to get pulled from user-prefs as they are transcriber-defined
874
366
    let double_struck = pref_manager.pref_to_string("UEB_DoubleStruck");
875
366
    let sans_serif = pref_manager.pref_to_string("UEB_SansSerif");
876
366
    let fraktur = pref_manager.pref_to_string("UEB_Fraktur");
877
366
    let greek_variant = pref_manager.pref_to_string("UEB_GreekVariant");
878
879
3.77k
    let 
result366
=
REPLACE_INDICATORS366
.
replace_all366
(
&result366
, |cap: &Captures| {
880
3.77k
        let matched_char = &cap[0];
881
3.77k
        match matched_char {
882
3.77k
            "𝔹" => 
&double_struck0
,
883
3.77k
            "S" => 
&sans_serif0
,
884
3.77k
            "D" => 
&fraktur2
,
885
3.77k
            "V" => 
&greek_variant0
,
886
3.77k
            _ => match UEB_INDICATOR_REPLACEMENTS.get(matched_char) {
887
0
                None => {error!("REPLACE_INDICATORS and UEB_INDICATOR_REPLACEMENTS are not in sync: missing '{matched_char}'"); ""},
888
3.77k
                Some(&ch) => ch,
889
            },
890
        }
891
3.77k
    });
892
893
    // Remove unicode blanks at start and end -- do this after the substitutions because ',' introduces spaces
894
    // let result = result.trim_start_matches('⠀').trim_end_matches('⠀');
895
366
    let result = COLLAPSE_SPACES.replace_all(&result, "⠀");
896
   
897
366
    return result.to_string();
898
899
370
    fn pick_start_mode(raw_braille: &str, use_only_grade1: bool) -> String {
900
        // Need to decide what the start mode should be
901
        // From http://www.brailleauthority.org/ueb/ueb_math_guidance/final_for_posting_ueb_math_guidance_may_2019_102419.pdf
902
        //   Unless a math expression can be correctly represented with only a grade 1 symbol indicator in the first three cells
903
        //   or before a single letter standing alone anywhere in the expression,
904
        //   begin the expression with a grade 1 word indicator (or a passage indicator if the expression includes spaces)
905
        // Apparently "only a grade 1 symbol..." means at most one grade 1 symbol based on some examples (GTM 6.4, example 4)
906
        // debug!("before determining mode:  '{}'", raw_braille);
907
908
        // a bit ugly because we need to store the string if we have cap passage mode
909
370
        let raw_braille_string = if is_cap_passage_mode_good(raw_braille) {
convert_to_cap_passage_mode3
(
raw_braille3
)} else {
String::default367
()};
910
370
        let raw_braille = if raw_braille_string.is_empty() {
raw_braille367
} else {
&raw_braille_string3
};
911
370
        if use_only_grade1 {
912
1
            return remove_unneeded_mode_changes(raw_braille, UEB_Mode::Grade1, UEB_Duration::Passage);
913
369
        }
914
369
        let grade2 = remove_unneeded_mode_changes(raw_braille, UEB_Mode::Grade2, UEB_Duration::Symbol);
915
369
        debug!("Symbol mode:  '{}'", grade2);
916
917
369
        if is_grade2_string_ok(&grade2) {
918
143
            return grade2;
919
        } else {
920
            // BANA says use g1 word mode if spaces are present, but that's not what their examples do
921
            // A conversation with Ms. DeAndrea from BANA said that they mean use passage mode if ≥3 "segments" (≥2 blanks)
922
            // The G1 Word mode might not be at the start (iceb.rs:omission_3_6_7)
923
226
            let grade1_word = try_grade1_word_mode(raw_braille);
924
226
            debug!("Word mode:    '{}'", grade1_word);
925
226
            if !grade1_word.is_empty() {
926
36
                return grade1_word;
927
            } else {
928
190
                let grade1_passage = remove_unneeded_mode_changes(raw_braille, UEB_Mode::Grade1, UEB_Duration::Passage);
929
190
                return "⠰⠰⠰".to_string() + &grade1_passage + "⠰⠄";
930
            }
931
        }
932
933
        /// Return true if at least five (= # of cap passage indicators) cap indicators and no lower case letters
934
370
        fn is_cap_passage_mode_good(braille: &str) -> bool {
935
370
            let mut n_caps = 0;
936
370
            let mut is_cap_mode = false;
937
370
            let mut cap_mode = UEB_Duration::Symbol;    // real value set when is_cap_mode is set to true
938
370
            let mut chars = braille.chars();
939
940
            // look CL or CCL for caps (CC runs until we get whitespace)
941
            // if we find an L not in caps mode, we return false
942
            // Note: caps can be C𝐶, whitespace can be W𝐖
943
2.03k
            while let Some(
ch1.96k
) = chars.next() {
944
1.96k
                if ch == 'L' {
945
401
                    if !is_cap_mode {
946
288
                        return false;
947
113
                    }
948
113
                    chars.next();       // skip letter
949
113
                    if cap_mode == UEB_Duration::Symbol {
950
79
                        is_cap_mode = false;
951
79
                    
}34
952
1.55k
                } else if ch == 'C' || 
ch == '𝐶'1.49k
{
953
107
                    if is_cap_mode {
954
16
                        if cap_mode == UEB_Duration::Symbol {
955
12
                            cap_mode = UEB_Duration::Word;
956
12
                        
}4
957
91
                    } else {
958
91
                        is_cap_mode = true;
959
91
                        cap_mode = UEB_Duration::Symbol;
960
91
                    }
961
107
                    n_caps += 1;
962
1.45k
                } else if ch == 'W' || 
ch == '𝐖'1.33k
{
963
119
                    if is_cap_mode {
964
2
                        assert!(cap_mode == UEB_Duration::Word);
965
117
                    }
966
119
                    is_cap_mode = false;
967
1.33k
                } else if ch == '1' && 
is_cap_mode117
{
968
3
                    break;
969
1.33k
                }
970
            }
971
82
            return n_caps > 4;
972
370
        }
973
974
3
        fn convert_to_cap_passage_mode(braille: &str) -> String {
975
3
            return "⠠⠠⠠".to_string() + &braille.replace(['C', '𝐶'], "") + "⠠⠄";
976
3
        }
977
978
        /// Return true if the BANA or ICEB guidelines say it is ok to start with grade 2
979
369
        fn is_grade2_string_ok(grade2_braille: &str) -> bool {
980
            // BANA says use grade 2 if there is not more than one grade one symbol or single letter standing alone.
981
            // The exact quote from their guidance:
982
            //    Unless a math expression can be correctly represented with only a grade 1 symbol indicator in the first three cells
983
            //    or before a single letter standing alone anywhere in the expression,
984
            //    begin the expression with a grade 1 word indicator
985
            // Note: I modified this slightly to exclude the cap indicator in the count. That allows three more ICEB rule to pass and seems
986
            //    like it is a reasonable thing to do.
987
            // Another modification is allow a single G1 indicator to occur after whitespace later on
988
            //    because ICEB examples show it and it seems better than going to passage mode if it is the only G1 indicator
989
990
            // Because of the 'L's which go away, we have to put a little more work into finding the first three chars
991
369
            let chars = grade2_braille.chars().collect::<Vec<char>>();
992
369
            let mut n_real_chars = 0;  // actually number of chars
993
369
            let mut found_g1 = false;
994
369
            let mut i = 0;
995
1.75k
            while i < chars.len() {
996
1.75k
                let ch = chars[i];
997
1.75k
                if ch == '1' && 
!275
is_forced_grade1275
(&chars, i) {
998
269
                    if found_g1 {
999
19
                        return false;
1000
250
                    }
1001
250
                    found_g1 = true;
1002
1.48k
                } else if !"𝐶CLobc".contains(ch) {
1003
1.07k
                    if n_real_chars == 2 {
1004
347
                        i += 1;
1005
347
                        break;              // this is the third real char
1006
730
                    };
1007
730
                    n_real_chars += 1;
1008
407
                }
1009
1.38k
                i += 1
1010
            }
1011
1012
            // if we find *another* g1 that isn't forced and isn't standing alone, we are done
1013
            // I've added a 'follows whitespace' clause for test iceb.rs:omission_3_6_2 to the standing alone rule
1014
            // we only allow one standing alone example -- not sure if BANA guidance has this limit, but GTM 11_5_5_3 seems better with it
1015
            // Same for GTM 1_7_3_1 (passage mode is mentioned also)
1016
350
            let mut is_standing_alone_already_encountered = false;
1017
350
            let mut is_after_whitespace = false;
1018
2.43k
            while i < chars.len() {
1019
2.29k
                let ch = chars[i];
1020
2.29k
                if ch == 'W' {
1021
355
                    is_after_whitespace = true;
1022
1.93k
                } else if ch == '1' && 
!239
is_forced_grade1239
(&chars, i) {
1023
235
                    if is_standing_alone_already_encountered ||
1024
226
                       ((found_g1 || 
!is_after_whitespace33
) &&
!203
is_single_letter_on_right203
(&chars, i)) {
1025
207
                        return false;
1026
28
                    }
1027
28
                    found_g1 = true;
1028
28
                    is_standing_alone_already_encountered = true;
1029
1.70k
                }
1030
2.08k
                i += 1;
1031
            }
1032
143
            return true;
1033
369
        }
1034
1035
        /// Return true if the sequence of chars forces a '1' at the `i`th position
1036
        /// Note: `chars[i]` should be '1'
1037
930
        fn is_forced_grade1(chars: &[char], i: usize) -> bool {
1038
            // A '1' is forced if 'a-j' follows a digit
1039
930
            assert_eq!(chars[i], '1', "'is_forced_grade1' didn't start with '1'");
1040
            // check that a-j follows the '1' -- we have '1Lx' where 'x' is the letter to check
1041
930
            if i+2 < chars.len() && 
is_letter_number927
(
unhighlight927
(
chars[i+2]927
)) {
1042
                // check for a number before the '1'
1043
                // this will be 'N' followed by LETTER_NUMBERS or the number ".", ",", or " "
1044
25
                for j in (
0..i12
).
rev12
() {
1045
25
                    let ch = chars[j];
1046
25
                    if !(is_letter_number(unhighlight(ch)) || 
".,W𝐖"14
.
contains14
(
ch14
)) {
1047
12
                        return ch == 'N'
1048
13
                    }
1049
                }
1050
918
            }
1051
918
            return false;
1052
930
        }
1053
1054
203
        fn is_single_letter_on_right(chars: &[char], i: usize) -> bool {
1055
205
            fn is_skip_char(ch: char) -> bool {
1056
205
                
matches!204
(ch, 'B' | 'I' | '𝔹' | 'S' | 'T' | 'D' | 'C' | '𝐶' | 's' | 'w')
1057
205
            }
1058
1059
            // find the first char (if any)
1060
203
            let mut count = 0;      // how many letters
1061
203
            let mut i = i+1;
1062
209
            while i < chars.len() {
1063
205
                let ch = chars[i];
1064
205
                if !is_skip_char(ch) {
1065
204
                    if ch == 'L' {
1066
5
                        if count == 1 {
1067
0
                            return false;   // found a second letter in the sequence
1068
5
                        }
1069
5
                        count += 1;
1070
                    } else {
1071
199
                        return count==1;
1072
                    }
1073
5
                    i += 2;   // eat 'L' and actual letter
1074
1
                } else {
1075
1
                    i += 1;
1076
1
                }
1077
            }
1078
4
            return true;
1079
203
        }
1080
1081
226
        fn try_grade1_word_mode(raw_braille: &str) -> String {
1082
            // this isn't quite right, but pretty close -- try splitting at 'W' (words)
1083
            // only one of the parts can be in word mode and none of the others can have '1' unless forced
1084
226
            let mut g1_words = Vec::default();
1085
226
            let mut found_word_mode = false;
1086
622
            for raw_word in 
raw_braille226
.
split226
('W') {
1087
622
                let word = remove_unneeded_mode_changes(raw_word, UEB_Mode::Grade2, UEB_Duration::Symbol);
1088
                // debug!("try_grade1_word_mode: word='{}'", word);
1089
622
                let word_chars = word.chars().collect::<Vec<char>>();
1090
622
                let needs_word_mode = word_chars.iter().enumerate()
1091
1.12k
                    .
any622
(|(i, &ch) | ch == '1' &&
!416
is_forced_grade1416
(&word_chars, i));
1092
622
                if needs_word_mode {
1093
416
                    if found_word_mode {
1094
190
                        return "".to_string();
1095
226
                    }
1096
226
                    found_word_mode = true;
1097
226
                    g1_words.push("⠰⠰".to_string() + &remove_unneeded_mode_changes(raw_word, UEB_Mode::Grade1, UEB_Duration::Word)
1098
                    );
1099
206
                } else {
1100
206
                    g1_words.push(word);
1101
206
                }
1102
            }
1103
36
            return if found_word_mode {g1_words.join("W")} else {
""0
.
to_string0
()};
1104
226
        }
1105
370
    }
1106
366
}
1107
1108
478
fn typeface_to_word_mode(braille: &str) -> String {
1109
2
    static HAS_TYPEFACE: LazyLock<Regex> = LazyLock::new(|| Regex::new("[BI𝔹STD]").unwrap());
1110
    // debug!("before typeface fix:  '{}'", braille);
1111
1112
478
    let mut result = "".to_string();
1113
478
    let chars = braille.chars().collect::<Vec<char>>();
1114
478
    let mut word_mode = Vec::with_capacity(5);
1115
478
    let mut word_mode_end = Vec::with_capacity(5);
1116
478
    let mut i = 0;
1117
11.5k
    while i < chars.len() {
1118
11.0k
        let ch = chars[i];
1119
11.0k
        if HAS_TYPEFACE.is_match(ch.to_string().as_str()) {
1120
8
            let i_next_char_target = find_next_char(&chars[i+1..], ch);
1121
8
            if word_mode.contains(&ch) {
1122
3
                if i_next_char_target.is_none() {
1123
2
                    word_mode.retain(|&item| item!=ch);  // drop the char since word mode is done
1124
2
                    word_mode_end.push(ch);   // add the char to signal to add end sequence
1125
1
                }
1126
            } else {
1127
5
                result.push(ch);
1128
5
                if i_next_char_target.is_some() {
1129
2
                    result.push('w');     // typeface word indicator
1130
2
                    word_mode.push(ch);      // starting word mode for this char
1131
3
                } else {
1132
3
                    result.push('s');     // typeface single char indicator
1133
3
                }
1134
            }
1135
8
            i += 1; // eat "B", etc
1136
11.0k
        } else if ch == 'L' || 
ch == 'N'8.72k
{
1137
3.70k
            result.push(chars[i]);
1138
3.70k
            result.push(chars[i+1]);
1139
3.70k
            if !word_mode_end.is_empty() && 
i+22
< chars.len() && !(
chars[i+2] == 'W'1
||
chars[i+2] == '𝐖'1
) {
1140
                // add terminator unless word sequence is terminated by end of string or whitespace
1141
1
                for &ch in &word_mode_end {
1142
1
                    result.push(ch);
1143
1
                    result.push('e');
1144
1
                };
1145
1
                word_mode_end.clear();
1146
3.70k
            }
1147
3.70k
            i += 2; // eat Ll/Nd
1148
7.30k
        } else {
1149
7.30k
            result.push(ch);
1150
7.30k
            i += 1;
1151
7.30k
        }
1152
    }
1153
478
    return result;
1154
1155
478
}
1156
1157
478
fn capitals_to_word_mode(braille: &str) -> String {
1158
    use std::iter::FromIterator;
1159
    // debug!("before capitals fix:  '{}'", braille);
1160
1161
478
    let mut result = "".to_string();
1162
478
    let chars = braille.chars().collect::<Vec<char>>();
1163
478
    let mut is_word_mode = false;
1164
478
    let mut i = 0;
1165
    // look for a sequence of CLxCLy... and create CCLxLy...
1166
12.6k
    while i < chars.len() {
1167
12.1k
        let ch = chars[i];
1168
12.1k
        if ch == 'C' {
1169
            // '𝑐' should only occur after a 'C', so we don't have top-level check for it
1170
256
            let mut next_non_cap = i+1;
1171
257
            while let Some(
i_next1
) = find_next_char(&chars[next_non_cap..], '𝑐') {
1172
1
                next_non_cap += i_next + 1; // C/𝑐, L, letter
1173
1
            }
1174
256
            if find_next_char(&chars[next_non_cap..], 'C').is_some() { // next letter sequence "C..."
1175
63
                if is_next_char_start_of_section_12_modifier(&chars[next_non_cap+1..]) {
1176
                    // to me this is tricky -- section 12 modifiers apply to the previous item
1177
                    // the last clause of the "item" def is the previous indivisible symbol" which ICEB 2.1 say is:
1178
                    //   braille sign: one or more consecutive braille characters comprising a unit,
1179
                    //     consisting of a root on its own or a root preceded by one or more
1180
                    //     prefixes (also referred to as braille symbol)
1181
                    // this means the capital indicator needs to be stated and can't be part of a word or passage
1182
1
                    is_word_mode = false;
1183
1
                    result.push_str(String::from_iter(&chars[i..next_non_cap]).as_str());
1184
1
                    i = next_non_cap;
1185
1
                    continue;
1186
62
                }
1187
62
                if is_word_mode {
1188
12
                    i += 1;     // skip the 'C'
1189
50
                } else {
1190
50
                    // start word mode -- need an extra 'C'
1191
50
                    result.push('C');
1192
50
                    is_word_mode = true;
1193
50
                }
1194
193
            } else if is_word_mode {
1195
50
                i += 1;         // skip the 'C'
1196
143
            }
1197
255
            if chars[next_non_cap] == 'G' {
1198
8
                // Greek letters are a bit exceptional in that the pattern is "CGLx" -- bump 'i'
1199
8
                next_non_cap += 1;
1200
247
            }
1201
255
            if chars[next_non_cap] != 'L' {
1202
0
                error!("capitals_to_word_mode: internal error: didn't find L after C in '{}'.",
1203
0
                       chars[i..next_non_cap+2].iter().collect::<String>().as_str());
1204
255
            }
1205
255
            let i_braille_char = next_non_cap + 2;
1206
255
            result.push_str(String::from_iter(&chars[i..i_braille_char]).as_str());
1207
255
            i = i_braille_char;
1208
11.9k
        } else if ch == 'L' {       // must be lowercase -- uppercase consumed above
1209
            // assert!(LETTERS.contains(&unhighlight(chars[i+1]))); not true for other alphabets
1210
2.03k
            if is_word_mode {
1211
2
                result.push('e');       // terminate Word mode (letter after caps)
1212
2
                is_word_mode = false;
1213
2.03k
            }
1214
2.03k
            result.push('L');
1215
2.03k
            result.push(chars[i+1]);
1216
2.03k
            i += 2; // eat L, letter
1217
9.88k
        } else {
1218
9.88k
            is_word_mode = false;   // non-letters terminate cap word mode
1219
9.88k
            result.push(ch);
1220
9.88k
            i += 1;
1221
9.88k
        }
1222
    }
1223
478
    return result;
1224
1225
63
    fn is_next_char_start_of_section_12_modifier(chars: &[char]) -> bool {
1226
        // first find the L and eat the char so that we are at the potential start of where the target lies
1227
63
        let chars_len = chars.len();
1228
63
        let mut i_cap = 0;
1229
126
        while chars[i_cap] != 'C' {     // we know 'C' is in the string, so no need to check for exceeding chars_len
1230
63
            i_cap += 1;
1231
63
        }
1232
73
        for i_end in 
i_cap+1..chars_len63
{
1233
73
            if chars[i_end] == 'L' {
1234
                // skip the next char to get to the real start, and then look for the modifier string or next L/N
1235
                // debug!("   after L '{}'", chars[i_end+2..].iter().collect::<String>());
1236
65
                for i in 
i_end+2..chars_len63
{
1237
65
                    let ch = chars[i];
1238
65
                    if ch == '1' {
1239
                        // Fix: there's probably a much better way to check if we have a match against one of "⠱", "⠘⠱", "⠘⠲", "⠸⠱", "⠐⠱ ", "⠨⠸⠱"
1240
5
                        if chars[i+1] == '⠱' {
1241
0
                            return true;
1242
5
                        } else if i+2 < chars_len {
1243
5
                            let mut str = chars[i+1].to_string();
1244
5
                            str.push(chars[i+2]);
1245
5
                            if str == "⠘⠱" || str == "⠘⠲" || str == "⠸⠱" || str == "⠐⠱" {
1246
1
                                return true;
1247
4
                            } else if i+3 < chars_len {
1248
4
                                str.push(chars[i+3]);
1249
4
                                return str == "⠨⠸⠱";
1250
0
                            }
1251
0
                            return false;
1252
0
                        }
1253
60
                    }
1254
60
                    if ch == 'L' || 
ch == 'N'46
||
!is_letter_prefix(ch)46
{
1255
48
                        return false;
1256
12
                    }
1257
                }
1258
10
            }
1259
        }
1260
10
        return false;
1261
63
    }    
1262
478
}
1263
1264
521
fn find_next_char(chars: &[char], target: char) -> Option<usize> {        
1265
    // first find the L or N and eat the char so that we are at the potential start of where the target lies
1266
    // debug!("Looking for '{}' in '{}'", target, chars.iter().collect::<String>());
1267
610
    for i_end in 
0..chars.len()521
{
1268
610
        if chars[i_end] == 'L' || 
chars[i_end] == 'N'95
{
1269
            // skip the next char to get to the real start, and then look for the target
1270
            // stop when L/N signals past potential target or we hit some non L/N char (actual braille)
1271
            // debug!("   after L/N '{}'", chars[i_end+2..].iter().collect::<String>());
1272
521
            for (
i515
, &
ch515
) in chars.iter().enumerate().skip(i_end+2) {
1273
515
                if ch == 'L' || 
ch == 'N'368
||
!is_letter_prefix(ch)366
{
1274
383
                    return None;
1275
132
                } else if ch == target {
1276
                    // debug!("   found target");
1277
67
                    return Some(i);
1278
65
                }
1279
            }
1280
89
        }
1281
    }
1282
71
    return None;
1283
521
}
1284
1285
#[allow(non_camel_case_types)]
1286
#[derive(Debug, PartialEq, Copy, Clone)]
1287
enum UEB_Mode {
1288
    Numeric,        // also includes Grade1
1289
    Grade1,
1290
    Grade2,
1291
}
1292
1293
#[allow(non_camel_case_types)]
1294
#[derive(Debug, PartialEq, Copy, Clone)]
1295
enum UEB_Duration {
1296
    // Standing alone: A braille symbol that is standing alone may have a contracted (grade 2) meaning.
1297
    // A letter or unbroken sequence of letters is “standing alone” if the symbols before and after the letter or
1298
    //   sequence are spaces, hyphens, dashes or any combination thereof, including some common punctuation.
1299
    // Item: An “item” is defined as the next symbol or one of seven groupings listed in Rules of Unified English Braille, §11.4.1.
1300
    Symbol,
1301
1302
    // The grade 1 word indicator sets grade 1 mode for the next word or symbol sequence.
1303
    // A symbol sequence in UEB is defined as an unbroken string of braille signs,
1304
    //   whether alphabetic or non-alphabetic, preceded and followed by a space.
1305
    Word,
1306
    Passage,
1307
}
1308
1309
// used to determine standing alone (on left side)
1310
4.53k
fn is_left_intervening_char(ch: char) -> bool {
1311
4.53k
    
matches!4.34k
(ch, 'B' | 'I' | '𝔹' | 'S' | 'T' | 'D' | 'C' | '𝐶' | 's' | 'w')
1312
4.53k
}
1313
1314
/// Return value for use_g1_word_mode()
1315
#[derive(Debug, PartialEq)]
1316
enum Grade1WordIndicator {
1317
    NotInWord,        // no '𝟙' in the current/next word
1318
    InWord,           // '𝟙' in the current/next word
1319
    NotInChars,       // no '𝟙' in the entire string (optimization for common case)
1320
}
1321
1322
1.89k
fn remove_unneeded_mode_changes(raw_braille: &str, start_mode: UEB_Mode, start_duration: UEB_Duration) -> String {
1323
    // FIX: need to be smarter about moving on wrt to typeforms/typefaces, caps, bold/italic. [maybe just let them loop through the default?]
1324
1.89k
    let mut mode = start_mode;
1325
1.89k
    let mut duration = start_duration;
1326
1.89k
    let mut start_g2_letter = None;    // used for start of contraction checks
1327
1.89k
    let mut i_g2_start = None;  // set to 'i' when entering G2 mode; None in other modes. '1' indicator goes here if standing alone
1328
1.89k
    let mut cap_word_mode = false;     // only set to true in G2 to prevent contractions
1329
1.89k
    let mut result = String::default();
1330
1.89k
    let chars = raw_braille.chars().collect::<Vec<char>>();
1331
1.89k
    let mut g1_word_indicator = Grade1WordIndicator::NotInChars;        // almost always true (and often irrelevant)
1332
1.89k
    if mode == UEB_Mode::Grade2 || 
duration == UEB_Duration::Symbol901
{
1333
991
        g1_word_indicator = use_g1_word_mode(&chars);
1334
991
        if g1_word_indicator == Grade1WordIndicator::InWord {
1335
1
            mode = UEB_Mode::Grade1;
1336
1
            if duration == UEB_Duration::Symbol {
1337
1
                duration = UEB_Duration::Word;     // if Passage mode, leave as is
1338
1
                result.push('𝟙')
1339
0
            }
1340
990
        }
1341
901
    }
1342
1.89k
    let mut i = 0;
1343
37.0k
    while i < chars.len() {
1344
35.1k
        let ch = chars[i];
1345
35.1k
        match mode {
1346
            UEB_Mode::Numeric => {
1347
                // Numeric Mode: (from https://uebmath.aphtech.org/lesson1.0 and lesson4.0)
1348
                // Symbols that can appear within numeric mode include the ten digits, comma, period, simple fraction line,
1349
                // line continuation indicator, and numeric space digit symbols.
1350
                // A space or any other symbol not listed here terminates numeric mode.
1351
                // Numeric mode is also terminated by the "!" -- used after a script
1352
                //
1353
                // The numeric indicator also turns on grade 1 mode.
1354
                // When grade 1 mode is set by the numeric indicator,
1355
                //   grade 1 indicators are not used unless a single lower-case letter a-j immediately follows a digit.
1356
                // Grade 1 mode when set by the numeric indicator is terminated by a space, hyphen, dash, or a grade 1 indicator.
1357
3.31k
                i_g2_start = None;
1358
                // debug!("Numeric: ch={}, duration: {:?}", ch, duration);
1359
3.31k
                match ch {
1360
                    'L' => {
1361
                        // terminate numeric mode -- duration doesn't change
1362
                        // let the default case handle pushing on the chars for the letter
1363
1.42k
                        if is_letter_number(unhighlight(chars[i+1])) {
1364
1.37k
                            result.push('1');   // need to distinguish a-j from a digit
1365
1.37k
                        
}44
1366
1.42k
                        result.push(ch);
1367
1.42k
                        i += 1;
1368
1.42k
                        mode = UEB_Mode::Grade1;
1369
                        // duration remains Word
1370
                    },
1371
                    '1' | '𝟙' => {
1372
                        // numeric mode implies grade 1, so don't output indicator;
1373
107
                        i += 1;
1374
107
                        mode = UEB_Mode::Grade1;
1375
107
                        if start_duration == UEB_Duration::Passage {
1376
15
                            duration = UEB_Duration::Passage;      // otherwise it remains at Word
1377
92
                        }
1378
                    },
1379
                    '#' => {
1380
                        // terminate numeric mode -- duration doesn't change
1381
738
                        i += 1;
1382
738
                        if i+1 < chars.len() && 
chars[i] == 'L'691
&&
is_letter_number22
(
unhighlight22
(
chars[i+1]22
)) {
1383
9
                            // special case where the script was numeric and a letter follows, so need to put out G1 indicator
1384
9
                            result.push('1');
1385
9
                            // the G1 case should work with 'L' now
1386
729
                        }
1387
738
                        mode = UEB_Mode::Grade1;
1388
                    },
1389
521
                    'N' => {
1390
521
                        // stay in the same mode (includes numeric "," and "." space) -- don't let default get these chars
1391
521
                        result.push(chars[i+1]);
1392
521
                        i += 2;
1393
521
                    },
1394
                    _ => {
1395
                        // moving out of numeric mode
1396
524
                        result.push(ch);
1397
524
                        i += 1;
1398
524
                        if "W𝐖-—―".contains(ch) {
1399
94
                            mode = start_mode;
1400
94
                            if mode == UEB_Mode::Grade2 {
1401
47
                                start_g2_letter = None;        // will be set to real letter
1402
47
                            }
1403
94
                            if start_duration != UEB_Duration::Passage {
1404
47
                                duration = UEB_Duration::Symbol;
1405
47
                            }
1406
                        } else {
1407
430
                            mode = UEB_Mode::Grade1
1408
                        }
1409
                    },
1410
                }
1411
            },
1412
            UEB_Mode::Grade1 => {
1413
                // Grade 1 Mode:
1414
                // The numeric indicator also sets grade 1 mode.
1415
                // Grade 1 mode, when initiated by the numeric indicator, is terminated by a space, hyphen, dash or grade 1 terminator.
1416
                // Grade 1 mode is also set by grade 1 indicators.
1417
25.0k
                i_g2_start = None;
1418
                // debug!("Grade 1: ch={}, duration: {:?}", ch, duration);
1419
25.0k
                match ch {
1420
3.34k
                    'L' => {
1421
3.34k
                        // note: be aware of '#' case for Numeric because '1' might already be generated
1422
3.34k
                        // let prev_ch = if i > 1 {chars[i-1]} else {'1'};   // '1' -- anything beside ',' or '.'
1423
3.34k
                        // if duration == UEB_Duration::Symbol || 
1424
3.34k
                        //     ( ",. ".contains(prev_ch) && LETTER_NUMBERS.contains(&unhighlight(chars[i+1])) ) {
1425
3.34k
                        //     result.push('1');        // need to retain grade 1 indicator (RUEB 6.5.2)
1426
3.34k
                        // }
1427
3.34k
                        // let the default case handle pushing on the chars for the letter
1428
3.34k
                        result.push(ch);
1429
3.34k
                        i += 1;
1430
3.34k
                    },
1431
                    '1' | '𝟙' => {
1432
2.35k
                        assert!(ch == '1' || 
duration != UEB_Duration::Symbol2
); // if '𝟙', should be Word or Passage duration
1433
                        // nothing to do -- let the default case handle the following chars
1434
2.35k
                        i += 1;
1435
                    },
1436
2.36k
                    'N' => {
1437
2.36k
                        result.push(ch);
1438
2.36k
                        result.push(chars[i+1]);
1439
2.36k
                        i += 2;
1440
2.36k
                        mode = UEB_Mode::Numeric;
1441
2.36k
                        duration = UEB_Duration::Word;
1442
2.36k
                    },
1443
                    'W' | '𝐖' => {
1444
                        // this terminates a word mode if there was one
1445
711
                        result.push(ch);
1446
711
                        i += 1;
1447
711
                        if start_duration != UEB_Duration::Passage {
1448
224
                            duration = UEB_Duration::Symbol;
1449
224
                            mode = UEB_Mode::Grade2;
1450
487
                        }
1451
                    },
1452
                    _ => {
1453
16.3k
                        result.push(ch);
1454
16.3k
                        i += 1;
1455
16.3k
                        if duration == UEB_Duration::Symbol && 
!is_letter_prefix(ch)1.34k
{
1456
1.34k
                            mode = start_mode;
1457
14.9k
                        }
1458
                    }
1459
                }
1460
25.0k
                if mode == UEB_Mode::Grade2 {
1461
1.56k
                    start_g2_letter = None;        // will be set to real letter
1462
23.5k
                }
1463
1464
            },
1465
            UEB_Mode::Grade2 => {
1466
                // note: if we ended up using a '1', it only extends to the next char, which is also dealt with, so mode doesn't change
1467
6.79k
               if i_g2_start.is_none() {
1468
2.58k
                   i_g2_start = Some(i);
1469
2.58k
                   cap_word_mode = false;
1470
4.21k
               }
1471
                // debug!("Grade 2: ch={}, duration: {:?}", ch, duration);
1472
6.79k
                match ch {
1473
                    'L' => {
1474
1.44k
                        if start_g2_letter.is_none() {
1475
1.34k
                            start_g2_letter = Some(i);
1476
1.34k
                        
}97
1477
1.44k
                        let (is_alone, right_matched_chars, n_letters) = stands_alone(&chars, i);
1478
                        // GTM 1.2.1 says we only need to use G1 for single letters or sequences that are a shortform (e.g, "ab")
1479
1.44k
                        if is_alone && (
n_letters == 1400
||
is_short_form28
(
&right_matched_chars[..2*n_letters]28
)) {
1480
373
                            // debug!("  is_alone -- pushing '1'");
1481
373
                            result.push('1');
1482
373
                            mode = UEB_Mode::Grade1;
1483
1.07k
                        }
1484
                        // debug!("  pushing {:?}", right_matched_chars);
1485
3.13k
                        
right_matched_chars1.44k
.
iter1.44k
().
for_each1.44k
(|&ch| result.push(ch));
1486
1.44k
                        i += right_matched_chars.len();
1487
                    },
1488
                    'C' => {
1489
                        // Want 'C' before 'L'; Could be CC for word cap -- if so, eat it and move on
1490
                        // Note: guaranteed that there is a char after the 'C', so chars[i+1] is safe
1491
99
                        if chars[i+1] == 'C' {
1492
14
                            cap_word_mode = true;
1493
14
                            i += 1;
1494
14
                        } else {
1495
85
                            let is_greek = chars[i+1] == 'G';
1496
85
                            let (is_alone, right_matched_chars, n_letters) = stands_alone(&chars, if is_greek {
i+22
} else {
i+183
});
1497
                            // GTM 1.2.1 says we only need to use G1 for single letters or sequences that are a shortform (e.g, "ab")
1498
85
                            if is_alone && (
n_letters == 122
||
is_short_form7
(
&right_matched_chars[..2*n_letters]7
)) {
1499
16
                                // debug!("  is_alone -- pushing '1'");
1500
16
                                result.push('1');
1501
16
                                mode = UEB_Mode::Grade1;
1502
69
                            }
1503
85
                            if cap_word_mode {
1504
14
                                result.push('C');   // first 'C' if cap word
1505
71
                            }
1506
85
                            result.push('C');
1507
85
                            if is_greek {
1508
2
                                result.push('G');
1509
2
                                i += 1;
1510
83
                            }
1511
85
                            start_g2_letter = Some(i);
1512
                            // debug!("  pushing 'C' + {:?}", right_matched_chars);
1513
256
                            
right_matched_chars85
.
iter85
().
for_each85
(|&ch| result.push(ch));
1514
85
                            i += 1 + right_matched_chars.len();
1515
                        }
1516
                    },
1517
1.34k
                    '1' => {
1518
1.34k
                        result.push(ch);
1519
1.34k
                        i += 1;
1520
1.34k
                        mode = UEB_Mode::Grade1;
1521
1.34k
                        duration = UEB_Duration::Symbol;
1522
1.34k
                    },
1523
                    '𝟙' => {
1524
                        // '𝟙' should have forced G1 Word mode
1525
0
                        error!("Internal error: '𝟙' found in G2 mode: index={i} in '{raw_braille}'");
1526
0
                        i += 1;
1527
                    }
1528
582
                    'N' => {
1529
582
                        result.push(ch);
1530
582
                        result.push(chars[i+1]);
1531
582
                        i += 2;
1532
582
                        mode = UEB_Mode::Numeric;
1533
582
                        duration = UEB_Duration::Word;
1534
582
                    },
1535
                    _ => {
1536
3.32k
                        if let Some(
start505
) = start_g2_letter {
1537
505
                            if !cap_word_mode {
1538
504
                                result = handle_contractions(&chars[start..i], result);
1539
504
                            
}1
1540
505
                            cap_word_mode = false;
1541
505
                            start_g2_letter = None;     // not start of char sequence
1542
2.81k
                        }
1543
3.32k
                        result.push(ch);
1544
3.32k
                        i += 1;
1545
3.32k
                        if !is_left_intervening_char(ch) {
1546
3.29k
                            cap_word_mode = false;
1547
3.29k
                            i_g2_start = Some(i);
1548
3.29k
                        
}29
1549
1550
                    }
1551
                }
1552
6.79k
                if mode != UEB_Mode::Grade2 && 
!cap_word_mode2.31k
&&
1553
2.30k
                   let Some(
start883
) = start_g2_letter {
1554
883
                        result = handle_contractions(&chars[start..i], result);
1555
883
                        start_g2_letter = None;     // not start of char sequence
1556
5.91k
                    }
1557
            },
1558
        }
1559
1560
35.1k
        if (ch == 'W' || 
ch == '𝐖'34.0k
) &&
g1_word_indicator != Grade1WordIndicator::NotInChars1.13k
&&
1561
602
           (mode == UEB_Mode::Grade2 || 
duration == UEB_Duration::Symbol0
) {
1562
602
            g1_word_indicator = use_g1_word_mode(&chars[i..]);
1563
602
            if g1_word_indicator == Grade1WordIndicator::InWord {
1564
1
                mode = UEB_Mode::Grade1;
1565
1
                if duration == UEB_Duration::Symbol {
1566
1
                    duration = UEB_Duration::Word;     // if Passage mode, leave as is
1567
1
                    result.push('𝟙')
1568
0
                }
1569
601
            }
1570
34.5k
        }
1571
    }
1572
1.89k
    if mode == UEB_Mode::Grade2 &&
1573
289
       let Some(
start31
) = start_g2_letter {
1574
31
            result = handle_contractions(&chars[start..i], result);
1575
1.86k
        }
1576
1577
1.89k
    return result;
1578
1579
1580
1.59k
    fn use_g1_word_mode(chars: &[char]) -> Grade1WordIndicator {
1581
        // debug!("use_g1_word_mode: chars='{:?}'", chars);
1582
19.5k
        for &ch in 
chars1.59k
{
1583
19.5k
            if ch == 'W' || 
ch == '𝐖'18.9k
{
1584
601
                return Grade1WordIndicator::NotInWord;       // reached a word boundary
1585
18.9k
            }
1586
18.9k
            if ch == '𝟙' {
1587
2
                return Grade1WordIndicator::InWord;        // need word mode in this "word"
1588
18.9k
            }
1589
        }
1590
990
        return Grade1WordIndicator::NotInChars;               // 
1591
1.59k
    }
1592
1.89k
}
1593
1594
/// Returns a tuple:
1595
///   true if the ith char "stands alone" (UEB 2.6)
1596
///   the chars on the right that are part of the standing alone sequence
1597
///   the number of letters in that sequence
1598
/// This basically means a letter sequence surrounded by white space with some potentially intervening chars
1599
/// The intervening chars can be typeform/cap indicators, along with various forms of punctuation
1600
/// The ith char should be an "L"
1601
/// This assumes that there is whitespace before and after the character string
1602
1.52k
fn stands_alone(chars: &[char], i: usize) -> (bool, &[char], usize) {
1603
    // scan backward and check the conditions for "standing-alone"
1604
    // we scan forward and check the conditions for "standing-alone"
1605
1.52k
    assert_eq!(chars[i], 'L', "'stands_alone' starts with non 'L'");
1606
    // debug!("stands_alone: i={}, chars: {:?}", i, chars);
1607
1.52k
    if !left_side_stands_alone(&chars[0..i]) {
1608
977
        return (false, &chars[i..i+2], 0);
1609
552
    }
1610
1611
552
    let (mut is_alone, n_letters, n_right_matched) = right_side_stands_alone(&chars[i+2..]);
1612
    // debug!("left is alone, right is alone: {}, : n_letters={}, n_right_matched={}", is_alone, n_letters, n_right_matched);
1613
1614
552
    if is_alone && 
n_letters == 1425
{
1615
390
        let ch = chars[i+1];
1616
390
        if ch=='⠁' || 
ch=='⠊'389
||
ch=='⠕'387
{ // a, i, o
1617
3
            is_alone = false;
1618
387
        }
1619
162
    }
1620
552
    return (is_alone, &chars[i..i+2+n_right_matched], n_letters);
1621
1622
    /// chars before 'L'
1623
1.52k
    fn left_side_stands_alone(chars: &[char]) -> bool {
1624
        // scan backwards to skip letters and intervening chars
1625
        // once we hit an intervening char, only intervening chars are allowed if standing alone
1626
1.52k
        let mut intervening_chars_mode = false; // true when we are on the final stretch
1627
1.52k
        let mut i = chars.len();
1628
1.86k
        while i > 0 {
1629
1.38k
            i -= 1;
1630
1.38k
            let ch = chars[i];
1631
1.38k
            let prev_ch = if i > 0 {
chars[i-1]1.34k
} else {
' '45
}; // ' ' is a char not in input
1632
            // debug!("  left alone: prev/ch {}/{}", prev_ch, ch);
1633
1.38k
            if (!intervening_chars_mode && 
prev_ch == 'L'1.10k
) ||
1634
1.30k
               (prev_ch == 'o' || 
prev_ch == 'b'1.21k
) {
1635
174
                intervening_chars_mode = true;
1636
174
                i -= 1;       // ignore 'Lx' and also ignore 'ox'
1637
1.21k
            } else if is_left_intervening_char(ch) {
1638
161
                intervening_chars_mode = true;
1639
161
            } else {
1640
1.05k
                return "W𝐖-—―".contains(ch);
1641
            }
1642
        }
1643
1644
475
        return true;
1645
1.52k
    }
1646
1647
    // chars after character we are testing
1648
552
    fn right_side_stands_alone(chars: &[char]) -> (bool, usize, usize) {
1649
        // see RUEB 2.6.3
1650
355
        fn is_right_intervening_char(ch: char) -> bool {
1651
355
            
matches!342
(ch, 'B' | 'I' | '𝔹' | 'S' | 'T' | 'D' | 'C' | '𝐶' | 's' | 'w' | 'e')
1652
355
        }
1653
        // scan forward to skip letters and intervening chars
1654
        // once we hit an intervening char, only intervening chars are allowed if standing alone ('c' and 'b' are part of them)
1655
552
        let mut intervening_chars_mode = false; // true when we are on the final stretch
1656
552
        let mut i = 0;
1657
552
        let mut n_letters = 1;      // we have skipped the first letter
1658
725
        while i < chars.len() {
1659
515
            let ch = chars[i];
1660
            // debug!("  right alone: ch/next {}/{}", ch, if i+1<chars.len() {chars[i+1]} else {' '});
1661
515
            if !intervening_chars_mode && 
ch == 'L'502
{
1662
140
                n_letters += 1;
1663
140
                i += 1;       // ignore 'Lx' and also ignore 'ox'
1664
375
            } else if ch == 'c' || 
ch == 'b'355
{
1665
20
                i += 1;       // ignore 'Lx' and also ignore 'ox'
1666
355
            } else if is_right_intervening_char(ch) {  
1667
13
                intervening_chars_mode = true;
1668
13
            } else {
1669
342
                return if "W𝐖-—―".contains(ch) {
(true, n_letters, i)215
} else {
(false, n_letters, i)127
};
1670
            }
1671
173
            i += 1;
1672
        }
1673
1674
210
        return (true, n_letters, chars.len());
1675
552
    }
1676
1.52k
}
1677
1678
1679
/// Return a modified result if chars can be contracted.
1680
/// Otherwise, the original string is returned
1681
1.41k
fn handle_contractions(chars: &[char], mut result: String) -> String {
1682
    struct Replacement {
1683
        pattern: String,
1684
        replacement: &'static str
1685
    }
1686
1687
    const ASCII_TO_UNICODE: &[char] = &[
1688
        '⠀', '⠮', '⠐', '⠼', '⠫', '⠩', '⠯', '⠄', '⠷', '⠾', '⠡', '⠬', '⠠', '⠤', '⠨', '⠌',
1689
        '⠴', '⠂', '⠆', '⠒', '⠲', '⠢', '⠖', '⠶', '⠦', '⠔', '⠱', '⠰', '⠣', '⠿', '⠜', '⠹',
1690
        '⠈', '⠁', '⠃', '⠉', '⠙', '⠑', '⠋', '⠛', '⠓', '⠊', '⠚', '⠅', '⠇', '⠍', '⠝', '⠕',
1691
        '⠏', '⠟', '⠗', '⠎', '⠞', '⠥', '⠧', '⠺', '⠭', '⠽', '⠵', '⠪', '⠳', '⠻', '⠘', '⠸',
1692
    ];
1693
1694
36
    fn to_unicode_braille(ascii: &str) -> String {
1695
36
        let mut unicode = String::with_capacity(4*ascii.len());   // 'L' + 3 bytes for braille char
1696
82
        for ch in 
ascii36
.
as_bytes36
() {
1697
82
            unicode.push('L');
1698
82
            unicode.push(ASCII_TO_UNICODE[(ch.to_ascii_uppercase() - 32) as usize])
1699
        }
1700
36
        return unicode;
1701
36
    }
1702
1703
    // It would be much better from an extensibility point of view to read the table in from a file
1704
2
    static CONTRACTIONS: LazyLock<Vec<Replacement>> = LazyLock::new(|| { vec![
1705
            // 10.3: Strong contractions
1706
2
            Replacement{ pattern: to_unicode_braille("and"), replacement: "L⠯"},
1707
2
            Replacement{ pattern: to_unicode_braille("for"), replacement: "L⠿"},
1708
2
            Replacement{ pattern: to_unicode_braille("of"), replacement: "L⠷"},
1709
2
            Replacement{ pattern: to_unicode_braille("the"), replacement: "L⠮"},
1710
2
            Replacement{ pattern: to_unicode_braille("with"), replacement: "L⠾"},
1711
            
1712
            // 10.8: final-letter group signs (this need to precede 'en' and any other shorter contraction)
1713
2
            Replacement{ pattern: "(?P<s>L.)L⠍L⠑L⠝L⠞".to_string(), replacement: "${s}L⠰L⠞" }, // ment
1714
2
            Replacement{ pattern: "(?P<s>L.)L⠞L⠊L⠕L⠝".to_string(), replacement: "${s}L⠰L⠝" } ,// tion
1715
1716
            // 10.4: Strong group signs
1717
2
            Replacement{ pattern: to_unicode_braille("ch"), replacement: "L⠡"},
1718
2
            Replacement{ pattern: to_unicode_braille("gh"), replacement: "L⠣"},
1719
2
            Replacement{ pattern: to_unicode_braille("sh"), replacement: "L⠩"},
1720
2
            Replacement{ pattern: to_unicode_braille("th"), replacement: "L⠹"},
1721
2
            Replacement{ pattern: to_unicode_braille("wh"), replacement: "L⠱"},
1722
2
            Replacement{ pattern: to_unicode_braille("ed"), replacement: "L⠫"},
1723
2
            Replacement{ pattern: to_unicode_braille("er"), replacement: "L⠻"},
1724
2
            Replacement{ pattern: to_unicode_braille("ou"), replacement: "L⠳"},
1725
2
            Replacement{ pattern: to_unicode_braille("ow"), replacement: "L⠪"},
1726
2
            Replacement{ pattern: to_unicode_braille("st"), replacement: "L⠌"},
1727
2
            Replacement{ pattern: "(?P<s>L.)L⠊L⠝L⠛".to_string(), replacement: "${s}L⠬" },  // 'ing', not at start
1728
2
            Replacement{ pattern: to_unicode_braille("ar"), replacement: "L⠜"},
1729
1730
            // 10.6.5: Lower group signs preceded and followed by letters
1731
            // FIX: don't match if after/before a cap letter -- can't use negative pattern (?!...) in regex package
1732
            // Note: removed cc because "arccos" shouldn't be contracted (10.11.1), but there is no way to know about compound words
1733
            // Add it back after implementing a lookup dictionary of exceptions
1734
2
            Replacement{ pattern: "(?P<s>L.)L⠑L⠁(?P<e>L.)".to_string(), replacement: "${s}L⠂${e}" },  // ea
1735
2
            Replacement{ pattern: "(?P<s>L.)L⠃L⠃(?P<e>L.)".to_string(), replacement: "${s}L⠆${e}" },  // bb
1736
            // Replacement{ pattern: "(?P<s>L.)L⠉L⠉(?P<e>L.)".to_string(), replacement: "${s}L⠒${e}" },  // cc
1737
2
            Replacement{ pattern: "(?P<s>L.)L⠋L⠋(?P<e>L.)".to_string(), replacement: "${s}L⠖${e}" },  // ff
1738
2
            Replacement{ pattern: "(?P<s>L.)L⠛L⠛(?P<e>L.)".to_string(), replacement: "${s}L⠶${e}" },  // gg
1739
1740
            // 10.6.8: Lower group signs ("in" also 10.5.4 lower word signs)
1741
            // FIX: these need restrictions about only applying when upper dots are present
1742
2
            Replacement{ pattern: to_unicode_braille("en"), replacement: "⠢"},
1743
2
            Replacement{ pattern: to_unicode_braille("in"), replacement: "⠔"},
1744
           
1745
        ]
1746
2
    });
1747
1748
2
    static CONTRACTION_PATTERNS: LazyLock<RegexSet> = LazyLock::new(|| init_patterns(&CONTRACTIONS));
1749
1
    static CONTRACTION_REGEX: LazyLock<Vec<Regex>> = LazyLock::new(|| init_regex(&CONTRACTIONS));
1750
1751
1.41k
    let mut chars_as_str = chars.iter().collect::<String>();
1752
    // debug!("  handle_contractions: examine '{}'", &chars_as_str);
1753
1.41k
    let matches = CONTRACTION_PATTERNS.matches(&chars_as_str);
1754
1.41k
    for 
i35
in matches.iter() {
1755
35
        let element = &CONTRACTIONS[i];
1756
35
        // debug!("  replacing '{}' with '{}' in '{}'", element.pattern, element.replacement, &chars_as_str);
1757
35
        result.truncate(result.len() - chars_as_str.len());
1758
35
        chars_as_str = CONTRACTION_REGEX[i].replace_all(&chars_as_str, element.replacement).to_string();
1759
35
        result.push_str(&chars_as_str);
1760
35
        // debug!("  result after replace '{}'", result);
1761
35
    }
1762
1.41k
    return result;
1763
1764
1765
1766
2
    fn init_patterns(contractions: &[Replacement]) -> RegexSet {
1767
2
        let mut vec: Vec<&str> = Vec::with_capacity(contractions.len());
1768
50
        for contraction in 
contractions2
{
1769
50
            vec.push(&contraction.pattern);
1770
50
        }
1771
2
        return RegexSet::new(&vec).unwrap();
1772
2
    }
1773
1774
1
    fn init_regex(contractions: &[Replacement]) -> Vec<Regex> {
1775
1
        let mut vec = Vec::with_capacity(contractions.len());
1776
25
        for contraction in 
contractions1
{
1777
25
            vec.push(Regex::new(&contraction.pattern).unwrap());
1778
25
        }
1779
1
        return vec;
1780
1
    }
1781
1.41k
}
1782
1783
1784
1785
1786
static VIETNAM_INDICATOR_REPLACEMENTS: phf::Map<&str, &str> = phf_map! {
1787
    "S" => "XXX",    // sans-serif -- from prefs
1788
    "B" => "⠘",     // bold
1789
    "𝔹" => "XXX",     // blackboard -- from prefs
1790
    "T" => "⠈",     // script
1791
    "I" => "⠨",     // italic
1792
    "R" => "",      // roman
1793
    // "E" => "⠰",     // English
1794
    "1" => "⠠",     // Grade 1 symbol
1795
    "L" => "",     // Letter left in to assist in locating letters
1796
    "D" => "XXX",     // German (Deutsche) -- from prefs
1797
    "G" => "⠰",     // Greek
1798
    "V" => "XXX",    // Greek Variants
1799
    // "H" => "⠠⠠",    // Hebrew
1800
    // "U" => "⠈⠈",    // Russian
1801
    "C" => "⠨",      // capital
1802
    "𝑐" => "",       // second or latter braille cell of a capital letter
1803
    "𝐶" => "⠨",      // capital that never should get word indicator (from chemical element)
1804
    "N" => "⠼",     // number indicator
1805
    "t" => "⠱",     // shape terminator
1806
    "W" => "⠀",     // whitespace"
1807
    "𝐖"=> "⠀",     // whitespace
1808
    "s" => "⠆",     // typeface single char indicator
1809
    "w" => "",     // typeface word indicator
1810
    "e" => "",     // typeface & capital terminator 
1811
    "o" => "",       // flag that what follows is an open indicator (used for standing alone rule)
1812
    "c" => "",     // flag that what follows is an close indicator (used for standing alone rule)
1813
    "b" => "",       // flag that what follows is an open or close indicator (used for standing alone rule)
1814
    "," => "⠂",     // comma
1815
    "." => "⠲",     // period
1816
    "-" => "-",     // hyphen
1817
    "—" => "⠠⠤",   // normal dash (2014) -- assume all normal dashes are unified here [RUEB appendix 3]
1818
    "―" => "⠐⠠⠤",  // long dash (2015) -- assume all long dashes are unified here [RUEB appendix 3]
1819
    "#" => "",      // signals end of script
1820
    "!" => "",      // Hack used to prevent some regular expression matches
1821
};
1822
1823
112
fn vietnam_cleanup(pref_manager: Ref<PreferenceManager>, raw_braille: String) -> String {
1824
    // Deal with Vietnamese "rhymes" -- moving accents around
1825
    // See "Vietnamese Uncontracted Braille Update in MathCAT" or maybe https://icanreadvietnamese.com/blog/14-rule-of-tone-mark-placement
1826
    // Note: I don't know how to write (for example) I_E_RULE so that it excludes "qu" and "gi", so I use two rules
1827
    // The first rule rewrites the patterns with "qu" and "gi" to add "!" to prevent a match of the second rule -- "!" is dropped later
1828
1
    static QU_GI_RULE_EXCEPTION: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(L⠟L⠥|L⠛L⠊)").unwrap());
1829
1
    static IUOY_E_RULE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"L(⠊|⠥|⠕|⠽)(L[⠔⠰⠢⠤⠠])L(⠑|⠣)").unwrap()); // ie, ue, oe, and ye rule
1830
1
    static UO_A_RULE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"L(⠥|⠕)(L[⠔⠰⠢⠤⠠])L(⠁|⠡|⠜)").unwrap()); // ua, oa rule
1831
1
    static UU_O_RULE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"L(⠥|⠳)(L[⠔⠰⠢⠤⠠])L(⠪|⠹)").unwrap()); // uo, ưo rule
1832
1
    static UYE_RULE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"L⠥L([⠔⠰⠢⠤⠠])L⠽L⠣").unwrap()); // uo, ưo rule
1833
1
    static UY_RULE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"L⠥L([⠔⠰⠢⠤⠠])L⠽").unwrap()); // uo, ưo rule
1834
1
    static REPLACE_INDICATORS: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"([1𝟙SB𝔹TIREDGVHP𝐶𝑐CLMNW𝐖swe,.-—―#ocb!])").unwrap());
1835
    // debug!("vietnam_cleanup: start={}", raw_braille);
1836
112
    let result = typeface_to_word_mode(&raw_braille);
1837
112
    let result = capitals_to_word_mode(&result);
1838
1839
112
    let result = result.replace("tW", "W");
1840
112
    let result = result.replace("CG", "⠸");    // capital Greek letters are problematic in Vietnam braille
1841
112
    let result = result.replace("CC", "⠸");    // capital word more is the same as capital Greek letters
1842
    // debug!("   after typeface/caps={}", &result);
1843
1844
    // deal with "rhymes"
1845
112
    let result = QU_GI_RULE_EXCEPTION.replace_all(&result, "${1}!");
1846
    // debug!("          after except={}", &result);
1847
112
    let result = IUOY_E_RULE.replace_all(&result, "${2}L${1}L${3}");
1848
    // debug!("          after IUOY_E={}", &result);
1849
112
    let result = UO_A_RULE.replace_all(&result, "${2}L${1}L${3}");
1850
    // debug!("          after   UO_A={}", &result);
1851
112
    let result = UU_O_RULE.replace_all(&result, "${2}L${1}L${3}");
1852
    // debug!("          after   UO_O={}", &result);
1853
112
    let result = UYE_RULE.replace_all(&result, "${1}L⠥L⠽L⠣");  // longer match first
1854
    // debug!("          after    UYE={}", &result);
1855
112
    let result = UY_RULE.replace_all(&result, "${1}L⠥L⠽");
1856
    // debug!("          after     UY={}", &result);
1857
1858
    // these typeforms need to get pulled from user-prefs as they are transcriber-defined
1859
112
    let double_struck = pref_manager.pref_to_string("Vietnam_DoubleStruck");
1860
112
    let sans_serif = pref_manager.pref_to_string("Vietnam_SansSerif");
1861
112
    let fraktur = pref_manager.pref_to_string("Vietnam_Fraktur");
1862
112
    let greek_variant = pref_manager.pref_to_string("Vietnam_GreekVariant");
1863
1864
    // This reuses the code just for getting rid of unnecessary "L"s and "N"s
1865
112
    let result = remove_unneeded_mode_changes(&result, UEB_Mode::Grade1, UEB_Duration::Passage);
1866
1867
1868
1.23k
    let 
result112
=
REPLACE_INDICATORS112
.
replace_all112
(
&result112
, |cap: &Captures| {
1869
1.23k
        let matched_char = &cap[0];
1870
1.23k
        match matched_char {
1871
1.23k
            "𝔹" => 
&double_struck0
,
1872
1.23k
            "S" => 
&sans_serif0
,
1873
1.23k
            "D" => 
&fraktur0
,
1874
1.23k
            "V" => 
&greek_variant0
,
1875
1.23k
            _ => match VIETNAM_INDICATOR_REPLACEMENTS.get(matched_char) {
1876
0
                None => {error!("REPLACE_INDICATORS and VIETNAM_INDICATOR_REPLACEMENTS are not in sync: missing '{matched_char}'"); ""},
1877
1.23k
                Some(&ch) => ch,
1878
            },
1879
        }
1880
1.23k
    });
1881
1882
    // Remove unicode blanks at start and end -- do this after the substitutions because ',' introduces spaces
1883
    // let result = result.trim_start_matches('⠀').trim_end_matches('⠀');
1884
112
    let result = COLLAPSE_SPACES.replace_all(&result, "⠀");
1885
   
1886
112
    return result.to_string();
1887
112
}
1888
1889
1890
static CMU_INDICATOR_REPLACEMENTS: phf::Map<&str, &str> = phf_map! {
1891
    // "S" => "XXX",    // sans-serif -- from prefs
1892
    "B" => "⠔",     // bold
1893
    "𝔹" => "⠬",     // blackboard -- from prefs
1894
    // "T" => "⠈",     // script
1895
    "I" => "⠔",     // italic -- same as bold
1896
    // "R" => "",      // roman
1897
    // "E" => "⠰",     // English
1898
    "1" => "⠐",     // Grade 1 symbol -- used here for a-j after number
1899
    "L" => "",     // Letter left in to assist in locating letters
1900
    "D" => "⠠",     // German (Gothic)
1901
    "G" => "⠈",     // Greek
1902
    "V" => "⠈⠬",    // Greek Variants
1903
    // "H" => "⠠⠠",    // Hebrew
1904
    // "U" => "⠈⠈",    // Russian
1905
    "C" => "⠨",      // capital
1906
    "𝐶" => "⠨",      // capital that never should get word indicator (from chemical element)
1907
    "N" => "⠼",     // number indicator
1908
    "𝑁" => "",      // continue number
1909
    // "t" => "⠱",     // shape terminator
1910
    "W" => "⠀",     // whitespace"
1911
    "𝐖"=> "⠀",     // whitespace
1912
    // "𝘄" => "⠀",    // add whitespace if char to the left has dots 1, 2, or 3 -- special rule handled separately, so commented out
1913
    "s" => "",     // typeface single char indicator
1914
    // "w" => "⠂",     // typeface word indicator
1915
    // "e" => "⠄",     // typeface & capital terminator 
1916
    // "o" => "",       // flag that what follows is an open indicator (used for standing alone rule)
1917
    // "c" => "",       // flag that what follows is an close indicator (used for standing alone rule)
1918
    // "b" => "",       // flag that what follows is an open or close indicator (used for standing alone rule)
1919
    "," => "⠂",     // comma
1920
    "." => "⠄",     // period
1921
    "-" => "⠤",     // hyphen
1922
    "—" => "⠤⠤",   // normal dash (2014) -- assume all normal dashes are unified here [RUEB appendix 3]
1923
    // "―" => "⠐⠤⠤",  // long dash (2015) -- assume all long dashes are unified here [RUEB appendix 3]
1924
    "#" => "⠼",      // signals to end/restart of numeric mode (mixed fractions)
1925
};
1926
1927
1928
372
fn cmu_cleanup(_pref_manager: Ref<PreferenceManager>, raw_braille: String) -> String {
1929
2
    static ADD_WHITE_SPACE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"𝘄(.)|𝘄$").unwrap());
1930
1931
    // debug!("cmu_cleanup: start={}", raw_braille);
1932
    // let result = typeface_to_word_mode(&raw_braille);
1933
1934
    // let result = result.replace("tW", "W");
1935
372
    let result = raw_braille.replace("CG", "⠘")
1936
372
                                .replace("𝔹C", "⠩")
1937
372
                                .replace("DC", "⠰");
1938
    // let result = result.replace("CC", "⠸");
1939
1940
    // these typeforms need to get pulled from user-prefs as they are transcriber-defined
1941
    // let double_struck = pref_manager.pref_to_string("CMU_DoubleStruck");
1942
    // let sans_serif = pref_manager.pref_to_string("CMU_SansSerif");
1943
    // let fraktur = pref_manager.pref_to_string("CMU_Fraktur");
1944
1945
    // debug!("Before remove mode changes: '{}'", &result);
1946
    // This reuses the code just for getting rid of unnecessary "L"s and "N"s
1947
372
    let result = remove_unneeded_mode_changes(&result, UEB_Mode::Grade1, UEB_Duration::Passage);
1948
372
    let result = result.replace("𝑁N", "");
1949
    // debug!(" After remove mode changes: '{}'", &result);
1950
1951
2.58k
    let 
result372
=
REPLACE_INDICATORS372
.
replace_all372
(
&result372
, |cap: &Captures| {
1952
2.58k
        match CMU_INDICATOR_REPLACEMENTS.get(&cap[0]) {
1953
0
            None => {error!("REPLACE_INDICATORS and CMU_INDICATOR_REPLACEMENTS are not in sync"); ""},
1954
2.58k
            Some(&ch) => ch,
1955
        }
1956
2.58k
    });
1957
372
    let result = ADD_WHITE_SPACE.replace_all(&result, |cap: &Captures| 
{12
1958
12
        if cap.get(1).is_none() {
1959
2
            return "⠀".to_string();
1960
        } else {
1961
            // debug!("ADD_WHITE_SPACE match='{}', has left dots = {}", &cap[1], has_left_dots(cap[1].chars().next().unwrap()));
1962
10
            let mut next_chars = cap[1].chars();
1963
10
            let next_char = next_chars.next().unwrap();
1964
10
            assert!(next_chars.next().is_none());
1965
10
            return (if has_left_dots(next_char) {
"⠀"9
} else {
""1
}).to_string() + &cap[1];
1966
        }
1967
12
    });
1968
    
1969
    // Remove unicode blanks at start and end -- do this after the substitutions because ',' introduces spaces
1970
372
    let result = COLLAPSE_SPACES.replace_all(&result, "⠀");
1971
372
    let result = result.trim_start_matches('⠀');            // don't trip end (e.g., see once::vector_11_2_5)
1972
372
    return result.to_string();
1973
1974
10
    fn has_left_dots(ch: char) -> bool {
1975
        // Unicode braille is set up so dot 1 is 2^0, dot 2 is 2^1, etc
1976
10
        return ( (ch as u32 - 0x2800) >> 4 ) > 0;
1977
10
    }
1978
372
}
1979
1980
1981
1982
static SWEDISH_INDICATOR_REPLACEMENTS: phf::Map<&str, &str> = phf_map! {
1983
    // FIX: this needs cleaning up -- not all of these are used
1984
    "S" => "XXX",    // sans-serif -- from prefs
1985
    "B" => "⠨",     // bold
1986
    "𝔹" => "XXX",     // blackboard -- from prefs
1987
    "T" => "⠈",     // script
1988
    "I" => "⠨",     // italic
1989
    "R" => "",      // roman
1990
    "1" => "⠱",     // Grade 1 symbol (used for number followed by a letter)
1991
    "L" => "",     // Letter left in to assist in locating letters
1992
    "D" => "XXX",     // German (Deutsche) -- from prefs
1993
    "G" => "⠰",     // Greek
1994
    "V" => "XXX",    // Greek Variants
1995
    // "H" => "⠠⠠",    // Hebrew
1996
    // "U" => "⠈⠈",    // Russian
1997
    "C" => "⠠",      // capital
1998
    "𝑐" => "",       // second or latter braille cell of a capital letter
1999
    "𝐶" => "⠠",      // capital that never should get word indicator (from chemical element)
2000
    "N" => "⠼",     // number indicator
2001
    "t" => "⠱",     // shape terminator
2002
    "W" => "⠀",     // whitespace"
2003
    "𝐖"=> "⠀",     // whitespace
2004
    "w" => "⠀",     // whitespace after function name
2005
    "s" => "",     // typeface single char indicator
2006
    "e" => "",     // typeface & capital terminator 
2007
    "E" => "⠱",     // empty base -- see index of radical
2008
    "o" => "",       // flag that what follows is an open indicator (used for standing alone rule)
2009
    "c" => "",     // flag that what follows is an close indicator (used for standing alone rule)
2010
    "b" => "",       // flag that what follows is an open or close indicator (used for standing alone rule)
2011
    "," => "⠂",     // comma
2012
    "." => "⠲",     // period
2013
    "-" => "-",     // hyphen
2014
    "—" => "⠠⠤",   // normal dash (2014) -- assume all normal dashes are unified here [RUEB appendix 3]
2015
    "―" => "⠐⠠⠤",  // long dash (2015) -- assume all long dashes are unified here [RUEB appendix 3]
2016
    "#" => "",      // signals end of script
2017
2018
};
2019
2020
2021
static FINNISH_INDICATOR_REPLACEMENTS: phf::Map<&str, &str> = phf_map! {
2022
    // FIX: this needs cleaning up -- not all of these are used
2023
    "S" => "XXX",    // sans-serif -- from prefs
2024
    "B" => "⠨",     // bold
2025
    "𝔹" => "XXX",     // blackboard -- from prefs
2026
    "T" => "⠈",     // script
2027
    "I" => "⠨",     // italic
2028
    "R" => "",      // roman
2029
    "E" => "⠰",     // English
2030
    "1" => "⠀",     // Grade 1 symbol (used for number followed by a letter)
2031
    "L" => "",     // Letter left in to assist in locating letters
2032
    "D" => "XXX",     // German (Deutsche) -- from prefs
2033
    "G" => "⠨",     // Greek
2034
    "V" => "XXX",    // Greek Variants
2035
    // "H" => "⠠⠠",    // Hebrew
2036
    // "U" => "⠈⠈",    // Russian
2037
    "C" => "⠠",      // capital
2038
    "𝑐" => "",       // second or latter braille cell of a capital letter
2039
    "𝐶" => "⠠",      // capital that never should get whitespace in front (from chemical element)
2040
    "N" => "⠼",     // number indicator
2041
    "n" => "⠼",     // number indicator for drop numbers (special case with close parens)
2042
    "t" => "⠱",     // shape terminator
2043
    "W" => "⠀",     // whitespace"
2044
    "𝐖"=> "⠀",     // whitespace
2045
    "s" => "⠆",     // typeface single char indicator
2046
    "w" => "",     // typeface word indicator
2047
    "e" => "",     // typeface & capital terminator 
2048
    "," => "⠂",     // comma
2049
    "." => "⠲",     // period
2050
    "-" => "-",     // hyphen
2051
    "—" => "⠠⠤",   // normal dash (2014) -- assume all normal dashes are unified here [RUEB appendix 3]
2052
    "―" => "⠐⠠⠤",  // long dash (2015) -- assume all long dashes are unified here [RUEB appendix 3]
2053
    "(" => "⠦",     // Not really needed, but done for consistency with ")"
2054
    ")" => "⠴",     // Needed for rules with drop numbers to avoid mistaking for dropped 0
2055
    "↑" => "⠬",     // superscript
2056
    "↓" => "⠡",     // subscript
2057
    "#" => "",      // signals end of script
2058
    "Z" => "⠐",     // signals end of index of root, integrand/lim from function ("zone change")
2059
2060
};
2061
2062
0
fn finnish_cleanup(pref_manager: Ref<PreferenceManager>, raw_braille: String) -> String {
2063
0
    static REPLACE_INDICATORS: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"([SB𝔹TIREDGVHUP𝐏C𝐶LlMmb↑↓Nn𝑁WwZ,()])").unwrap());
2064
    // Numbers need to end with a space, but sometimes there is one there for other reasons
2065
0
    static DROP_NUMBER_SEPARATOR: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(n.)\)").unwrap());
2066
0
    static NUMBER_MATCH: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"((N.)+[^WN𝐶#↑↓Z])").unwrap());
2067
2068
    // debug!("finnish_cleanup: start={}", raw_braille);
2069
0
    let result = DROP_NUMBER_SEPARATOR.replace_all(&raw_braille, |cap: &Captures| {
2070
        // match includes the char after the number -- insert the whitespace before it
2071
        // debug!("DROP_NUMBER_SEPARATOR match='{}'", &cap[1]);
2072
0
        return cap[1].to_string() + "𝐶)";       // hack to use "𝐶" instead of dot 6 directly, but works for NUMBER_MATCH
2073
0
    });
2074
0
    let result = result.replace('n', "N");  // avoids having to modify remove_unneeded_mode_changes()
2075
0
    let result = NUMBER_MATCH.replace_all(&result, |cap: &Captures| {
2076
        // match includes the char after the number -- insert the whitespace before it
2077
        // debug!("NUMBER_MATCH match='{}'", &cap[1]);
2078
0
        let mut chars = cap[0].chars();
2079
0
        let last_char = chars.next_back().unwrap(); // unwrap safe since several chars were matched
2080
0
        return chars.as_str().to_string() + "W" + &last_char.to_string();
2081
0
    });
2082
2083
    // FIX: need to implement this -- this is just a copy of the Vietnam code
2084
0
    let result = result.replace("CG", "⠘")
2085
0
                                    .replace("𝔹C", "⠩")
2086
0
                                    .replace("DC", "⠰");
2087
2088
    // debug!("   after typeface/caps={}", &result);
2089
2090
    // these typeforms need to get pulled from user-prefs as they are transcriber-defined
2091
0
    let double_struck = pref_manager.pref_to_string("Vietnam_DoubleStruck");
2092
0
    let sans_serif = pref_manager.pref_to_string("Vietnam_SansSerif");
2093
0
    let fraktur = pref_manager.pref_to_string("Vietnam_Fraktur");
2094
0
    let greek_variant = pref_manager.pref_to_string("Vietnam_GreekVariant");
2095
2096
    // This reuses the code just for getting rid of unnecessary "L"s and "N"s
2097
0
    let result = remove_unneeded_mode_changes(&result, UEB_Mode::Grade1, UEB_Duration::Passage);
2098
    // debug!("   remove_unneeded_mode_changes={}", &result);
2099
2100
2101
0
    let result = REPLACE_INDICATORS.replace_all(&result, |cap: &Captures| {
2102
0
        let matched_char = &cap[0];
2103
0
        match matched_char {
2104
0
            "𝔹" => &double_struck,
2105
0
            "S" => &sans_serif,
2106
0
            "D" => &fraktur,
2107
0
            "V" => &greek_variant,
2108
0
            _ => match FINNISH_INDICATOR_REPLACEMENTS.get(matched_char) {
2109
0
                None => {error!("REPLACE_INDICATORS and SWEDISH_INDICATOR_REPLACEMENTS are not in sync: missing '{matched_char}'"); ""},
2110
0
                Some(&ch) => ch,
2111
            },
2112
        }
2113
0
    });
2114
2115
    // Remove unicode blanks at start and end -- do this after the substitutions because ',' introduces spaces
2116
    // let result = result.trim_start_matches('⠀').trim_end_matches('⠀');
2117
0
    let result = COLLAPSE_SPACES.replace_all(&result, "⠀");
2118
   
2119
0
    return result.to_string();
2120
0
}
2121
2122
2123
0
fn swedish_cleanup(pref_manager: Ref<PreferenceManager>, raw_braille: String) -> String {
2124
    // FIX: need to implement this -- this is just a copy of the Vietnam code
2125
    // Empty bases are ok if they follow whitespace
2126
0
    static EMPTY_BASE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(^|[W𝐖w])E").unwrap());
2127
    // debug!("swedish_cleanup: start={}", raw_braille);
2128
0
    let result = typeface_to_word_mode(&raw_braille);
2129
0
    let result = capitals_to_word_mode(&result);
2130
2131
0
    let result = result.replace("CG", "⠘")
2132
0
                                    .replace("𝔹C", "⠩")
2133
0
                                    .replace("DC", "⠰");
2134
2135
    // debug!("   after typeface/caps={}", &result);
2136
2137
    // these typeforms need to get pulled from user-prefs as they are transcriber-defined
2138
0
    let double_struck = pref_manager.pref_to_string("Vietnam_DoubleStruck");
2139
0
    let sans_serif = pref_manager.pref_to_string("Vietnam_SansSerif");
2140
0
    let fraktur = pref_manager.pref_to_string("Vietnam_Fraktur");
2141
0
    let greek_variant = pref_manager.pref_to_string("Vietnam_GreekVariant");
2142
2143
    // This reuses the code just for getting rid of unnecessary "L"s and "N"s
2144
0
    let result = remove_unneeded_mode_changes(&result, UEB_Mode::Grade1, UEB_Duration::Passage);
2145
    // debug!("   after removing mode changes={}", &result);
2146
2147
2148
0
    let result = EMPTY_BASE.replace_all(&result, "$1");
2149
0
    let result = REPLACE_INDICATORS.replace_all(&result, |cap: &Captures| {
2150
0
        let matched_char = &cap[0];
2151
0
        match matched_char {
2152
0
            "𝔹" => &double_struck,
2153
0
            "S" => &sans_serif,
2154
0
            "D" => &fraktur,
2155
0
            "V" => &greek_variant,
2156
0
            _ => match SWEDISH_INDICATOR_REPLACEMENTS.get(matched_char) {
2157
0
                None => {error!("REPLACE_INDICATORS and SWEDISH_INDICATOR_REPLACEMENTS are not in sync: missing '{matched_char}'"); ""},
2158
0
                Some(&ch) => ch,
2159
            },
2160
        }
2161
0
    });
2162
2163
    // Remove unicode blanks at start and end -- do this after the substitutions because ',' introduces spaces
2164
    // let result = result.trim_start_matches('⠀').trim_end_matches('⠀');
2165
0
    let result = COLLAPSE_SPACES.replace_all(&result, "⠀");
2166
   
2167
0
    return result.to_string();
2168
0
}
2169
2170
#[allow(non_snake_case)]
2171
50
fn LaTeX_cleanup(_pref_manager: Ref<PreferenceManager>, raw_braille: String) -> String {
2172
1
    static REMOVE_SPACE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r" ([\^_,;)\]}])").unwrap()); // '^', '_', ',', ';', ')', ']', '}'
2173
1
    static COLLAPSE_SPACES: LazyLock<Regex> = LazyLock::new(|| Regex::new(r" +").unwrap());
2174
    // debug!("LaTeX_cleanup: start={}", raw_braille);
2175
50
    let result = raw_braille.replace('𝐖', " ");
2176
    // let result = COLLAPSE_SPACES.replace_all(&raw_braille, "⠀");
2177
50
    let result = COLLAPSE_SPACES.replace_all(&result, " ");
2178
    // debug!("After collapse: {}", &result);
2179
50
    let result = REMOVE_SPACE.replace_all(&result, "$1");
2180
    // debug!("After remove: {}", &result);
2181
    // let result = result.trim_matches('⠀');
2182
50
    let result = result.trim_matches(' ');
2183
   
2184
50
    return result.to_string();
2185
50
}
2186
2187
#[allow(non_snake_case)]
2188
41
fn ASCIIMath_cleanup(_pref_manager: Ref<PreferenceManager>, raw_braille: String) -> String {
2189
1
    static REMOVE_SPACE_BEFORE_OP: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"([\w\d]) +([^\w\d"]|[\^_,;)\]}])"#).unwrap());
2190
1
    static REMOVE_SPACE_AFTER_OP: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"([^\^_,;)\]}\w\d"]) +([\w\d])"#).unwrap());
2191
1
    static COLLAPSE_SPACES: LazyLock<Regex> = LazyLock::new(|| Regex::new(r" +").unwrap());
2192
    // debug!("ASCIIMath_cleanup: start={}", raw_braille);
2193
41
    let result  = raw_braille.replace("|𝐖__|", "|𝐰__|");    // protect the whitespace to prevent misinterpretation as lfloor
2194
41
    let result = result.replace('𝐖', " ");
2195
41
    let result = COLLAPSE_SPACES.replace_all(&result, " ");
2196
    // debug!("After collapse: {}", &result);
2197
41
    let result = REMOVE_SPACE_BEFORE_OP.replace_all(&result, "$1$2");
2198
41
    let result = REMOVE_SPACE_AFTER_OP.replace_all(&result, "$1$2");
2199
41
    let result = result.replace('𝐰', " ");     // spaces around relational operators
2200
41
    let result = COLLAPSE_SPACES.replace_all(&result, " ");
2201
    // debug!("After remove: {}", &result);
2202
    // let result = result.trim_matches('⠀');
2203
41
    let result = result.trim_matches(' ');
2204
   
2205
41
    return result.to_string();
2206
41
}
2207
2208
2209
/************** Braille xpath functionality ***************/
2210
use crate::canonicalize::{as_element, as_text, name};
2211
use crate::xpath_functions::{is_leaf, validate_one_node, IsBracketed};
2212
use std::result::Result as StdResult;
2213
use sxd_document::dom::ParentOfChild;
2214
use sxd_xpath::function::Error as XPathError;
2215
use sxd_xpath::function::{Args, Function};
2216
use sxd_xpath::{context, nodeset::*, Value};
2217
2218
pub struct NemethNestingChars;
2219
const NEMETH_FRAC_LEVEL: &str = "data-nemeth-frac-level";    // name of attr where value is cached
2220
const FIRST_CHILD_ONLY: &[&str] = &["mroot", "msub", "msup", "msubsup", "munder", "mover", "munderover", "mmultiscripts"];
2221
impl NemethNestingChars {
2222
    // returns a 'repeat_char' corresponding to the Nemeth rules for nesting
2223
    // note: this value is likely one char too long because the starting fraction is counted
2224
537
    fn nemeth_frac_value(node: Element, repeat_char: &str) -> String {
2225
537
        let children = node.children();
2226
537
        let name = name(node);
2227
537
        if is_leaf(node) {
2228
244
            return "".to_string();
2229
293
        } else if name == "mfrac" {
2230
            // have we already computed the value?
2231
221
            if let Some(
value152
) = node.attribute_value(NEMETH_FRAC_LEVEL) {
2232
152
                return value.to_string();
2233
69
            }
2234
2235
69
            let num_value = NemethNestingChars::nemeth_frac_value(as_element(children[0]), repeat_char);
2236
69
            let denom_value = NemethNestingChars::nemeth_frac_value(as_element(children[1]), repeat_char);
2237
69
            let mut max_value = if num_value.len() > denom_value.len() {
num_value8
} else {
denom_value61
};
2238
69
            max_value += repeat_char;
2239
69
            node.set_attribute_value(NEMETH_FRAC_LEVEL, &max_value);
2240
69
            return max_value;
2241
72
        } else if FIRST_CHILD_ONLY.contains(&name) {
2242
            // only look at the base -- ignore scripts/index
2243
10
            return NemethNestingChars::nemeth_frac_value(as_element(children[0]), repeat_char);
2244
        } else {
2245
62
            let mut result = "".to_string();
2246
197
            for child in 
children62
{
2247
197
                let value = NemethNestingChars::nemeth_frac_value(as_element(child), repeat_char);
2248
197
                if value.len() > result.len() {
2249
19
                    result = value;
2250
178
                }
2251
            }
2252
62
            return result;
2253
        }
2254
537
    }
2255
2256
0
    fn nemeth_root_value(node: Element, repeat_char: &str) -> StdResult<String, XPathError> {
2257
        // returns the correct number of repeat_chars to use
2258
        // note: because the highest count is toward the leaves and
2259
        //    because this is a loop and not recursive, caching doesn't work without a lot of overhead
2260
0
        let parent = node.parent().unwrap();
2261
0
        if let ParentOfChild::Element(e) =  parent {
2262
0
            let mut parent = e;
2263
0
            let mut result = "".to_string();
2264
            loop {
2265
0
                let name = name(parent);
2266
0
                if name == "math" {
2267
0
                    return Ok( result );
2268
0
                }
2269
0
                if name == "msqrt" || name == "mroot" {
2270
0
                    result += repeat_char;
2271
0
                }
2272
0
                let parent_of_child = parent.parent().unwrap();
2273
0
                if let ParentOfChild::Element(e) =  parent_of_child {
2274
0
                    parent = e;
2275
0
                } else {
2276
0
                    return Err( sxd_xpath::function::Error::Other("Internal error in nemeth_root_value: didn't find 'math' tag".to_string()) );
2277
                }
2278
            }
2279
0
        }
2280
0
        return Err( XPathError::Other("Internal error in nemeth_root_value: didn't find 'math' tag".to_string()) );
2281
0
    }
2282
}
2283
2284
impl Function for NemethNestingChars {
2285
/**
2286
 * Returns a string with the correct number of nesting chars (could be an empty string)
2287
 * @param(node) -- current node
2288
 * @param(char) -- char (string) that should be repeated
2289
 * Note: as a side effect, an attribute with the value so repeated calls to this or a child will be fast
2290
 */
2291
192
 fn evaluate<'d>(&self,
2292
192
                        _context: &context::Evaluation<'_, 'd>,
2293
192
                        args: Vec<Value<'d>>)
2294
192
                        -> StdResult<Value<'d>, XPathError>
2295
    {
2296
192
        let mut args = Args(args);
2297
192
        args.exactly(2)
?0
;
2298
192
        let repeat_char = args.pop_string()
?0
;
2299
192
        let node = crate::xpath_functions::validate_one_node(args.pop_nodeset()
?0
, "NestingChars")
?0
;
2300
192
        if let Node::Element(el) = node {
2301
192
            let name = name(el);
2302
            // it is likely a bug to call this one a non mfrac
2303
192
            if name == "mfrac" {
2304
                // because it is called on itself, the fraction is counted one too many times -- chop one off
2305
                // this is slightly messy because we are chopping off a char, not a byte
2306
                const BRAILLE_BYTE_LEN: usize = "⠹".len();      // all Unicode braille symbols have the same number of bytes
2307
192
                return Ok( Value::String( NemethNestingChars::nemeth_frac_value(el, &repeat_char)[BRAILLE_BYTE_LEN..].to_string() ) );
2308
0
            } else if name == "msqrt" || name == "mroot" {
2309
0
                return Ok( Value::String( NemethNestingChars::nemeth_root_value(el, &repeat_char)? ) );
2310
            } else {
2311
0
                return Err(XPathError::Other(format!("NestingChars chars should be used only on 'mfrac'. '{}' was passed in", name)));
2312
            }
2313
        } else {
2314
            // not an element, so nothing to do
2315
0
            return Ok( Value::String("".to_string()) );
2316
        }
2317
192
    }
2318
}
2319
2320
pub struct BrailleChars;
2321
impl BrailleChars {
2322
    // returns a string for the chars in the *leaf* node.
2323
    // this string follows the Nemeth rules typefaces and deals with mathvariant
2324
    //  which has partially turned chars to the alphanumeric block
2325
12.5k
    fn get_braille_chars(node: Element, code: &str, text_range: Option<Range<usize>>) -> StdResult<String, XPathError> {
2326
12.5k
        let result = match code {
2327
12.5k
            "Nemeth" => 
BrailleChars::get_braille_nemeth_chars5.99k
(
node5.99k
,
text_range5.99k
),
2328
6.52k
            "UEB" => 
BrailleChars:: get_braille_ueb_chars2.28k
(
node2.28k
,
text_range2.28k
),
2329
4.24k
            "CMU" => 
BrailleChars:: get_braille_cmu_chars3.70k
(
node3.70k
,
text_range3.70k
),
2330
536
            "Vietnam" => BrailleChars:: get_braille_vietnam_chars(node, text_range),
2331
0
            "Swedish" => BrailleChars:: get_braille_ueb_chars(node, text_range),    // FIX: need to figure out what to implement
2332
0
            "Finnish" => BrailleChars:: get_braille_ueb_chars(node, text_range),    // FIX: need to figure out what to implement
2333
0
            _ => return Err(sxd_xpath::function::Error::Other(format!("get_braille_chars: unknown braille code '{code}'")))
2334
        };
2335
12.5k
        return match result {
2336
12.5k
            Ok(string) => Ok(make_quoted_string(string)),
2337
0
            Err(err) => return Err(sxd_xpath::function::Error::Other(err.to_string())),
2338
        }
2339
12.5k
    }
2340
2341
5.99k
    fn get_braille_nemeth_chars(node: Element, text_range: Option<Range<usize>>) -> Result<String> {
2342
        // To greatly simplify typeface/language generation, the chars have unique ASCII chars for them:
2343
        // Typeface: S: sans-serif, B: bold, 𝔹: blackboard, T: script, I: italic, R: Roman
2344
        // Language: E: English, D: German, G: Greek, V: Greek variants, H: Hebrew, U: Russian
2345
        // Indicators: C: capital, L: letter, N: number, P: punctuation, M: multipurpose
2346
2
        static PICK_APART_CHAR: LazyLock<Regex> = LazyLock::new(|| {
2347
2
            Regex::new(r"(?P<face>[SB𝔹TIR]*)(?P<lang>[EDGVHU]?)(?P<cap>C?)(?P<letter>L?)(?P<num>[N]?)(?P<char>.)").unwrap()
2348
2
        });
2349
5.99k
        let math_variant = node.attribute_value("mathvariant");
2350
        // FIX: cover all the options -- use phf::Map
2351
5.99k
        let  attr_typeface = match math_variant {
2352
5.76k
            None => "R",
2353
233
            Some(variant) => match variant {
2354
233
                "bold" => 
"B"42
,
2355
191
                "italic" => 
"I"2
,
2356
189
                "double-struck" => 
"𝔹"27
,
2357
162
                "script" => 
"T"5
,
2358
157
                "fraktur" => 
"D"0
,
2359
157
                "sans-serif" => 
"S"1
,
2360
156
                _ => "R",       // normal and unknown
2361
            },
2362
        };
2363
5.99k
        let text = BrailleChars::substring(as_text(node), &text_range);
2364
5.99k
        let braille_chars = braille_replace_chars(&text, node)
?0
;
2365
        // debug!("Nemeth chars: text='{}', braille_chars='{}'", &text, &braille_chars);
2366
        
2367
        // we want to pull the prefix (typeface, language) out to the front until a change happens
2368
        // the same is true for number indicator
2369
        // also true (sort of) for capitalization -- if all caps, use double cap in front (assume abbr or Roman Numeral)
2370
        
2371
        // we only care about this for numbers and identifiers/text, so we filter for only those
2372
5.99k
        let node_name = name(node);
2373
5.99k
        let is_in_enclosed_list = node_name != "mo" && 
BrailleChars::is_in_enclosed_list3.45k
(
node3.45k
);
2374
5.99k
        let is_mn_in_enclosed_list = is_in_enclosed_list && 
node_name == "mn"120
;
2375
5.99k
        let mut typeface = "R".to_string();     // assumption is "R" and if attr or letter is different, something happens
2376
5.99k
        let mut is_all_caps = true;
2377
5.99k
        let mut is_all_caps_valid = false;      // all_caps only valid if we did a replacement
2378
7.87k
        let 
result5.99k
=
PICK_APART_CHAR5.99k
.
replace_all5.99k
(
&braille_chars5.99k
, |caps: &Captures| {
2379
            // debug!("  face: {:?}, lang: {:?}, num {:?}, letter: {:?}, cap: {:?}, char: {:?}",
2380
            //        &caps["face"], &caps["lang"], &caps["num"], &caps["letter"], &caps["cap"], &caps["char"]);
2381
7.87k
            let mut nemeth_chars = "".to_string();
2382
7.87k
            let char_face = if caps["face"].is_empty() {
attr_typeface7.78k
} else {
&caps["face"]86
};
2383
7.87k
            let typeface_changed =  typeface != char_face;
2384
7.87k
            if typeface_changed {
2385
86
                typeface = char_face.to_string();   // needs to outlast this instance of the loop
2386
86
                nemeth_chars += &typeface;
2387
86
                nemeth_chars +=  &caps["lang"];
2388
7.78k
            } else {
2389
7.78k
                nemeth_chars +=  &caps["lang"];
2390
7.78k
            }
2391
            // debug!("  typeface changed: {}, is_in_list: {}; num: {}", typeface_changed, is_in_enclosed_list, !caps["num"].is_empty());
2392
7.87k
            if !caps["num"].is_empty() && (
typeface_changed2.74k
||
!is_mn_in_enclosed_list2.72k
) {
2393
2.58k
                nemeth_chars += "N";
2394
5.28k
            }
2395
7.87k
            is_all_caps_valid = true;
2396
7.87k
            is_all_caps &= !&caps["cap"].is_empty();
2397
7.87k
            nemeth_chars += &caps["cap"];       // will be stripped later if all caps
2398
7.87k
            if is_in_enclosed_list {
2399
228
                nemeth_chars += &caps["letter"].replace('L', "l");
2400
7.64k
            } else {
2401
7.64k
                nemeth_chars += &caps["letter"];
2402
7.64k
            }
2403
7.87k
            nemeth_chars += &caps["char"];
2404
7.87k
            return nemeth_chars;
2405
7.87k
        });
2406
        // debug!("  result: {}", &result);
2407
5.99k
        let mut text_chars = text.chars();     // see if more than one char
2408
5.99k
        if is_all_caps_valid && 
is_all_caps5.22k
&&
text_chars.next()369
.
is_some369
() &&
text_chars.next()369
.
is_some369
() {
2409
7
            return Ok( "CC".to_string() + &result.replace('C', ""));
2410
        } else {
2411
5.98k
            return Ok( result.to_string() );
2412
        }
2413
5.99k
    }
2414
2415
2.82k
    fn get_braille_ueb_chars(node: Element, text_range: Option<Range<usize>>) -> Result<String> {
2416
        // Because in UEB typeforms and caps may extend for multiple tokens,
2417
        //   this routine merely deals with the mathvariant attr.
2418
        // Canonicalize has already transformed all chars it can to math alphanumerics, but not all have bold/italic 
2419
        // The typeform/caps transforms to (potentially) word mode are handled later.
2420
1
        static HAS_TYPEFACE: LazyLock<Regex> = LazyLock::new(|| Regex::new(".*?(double-struck|script|fraktur|sans-serif).*").unwrap());
2421
1
        static PICK_APART_CHAR: LazyLock<Regex> = LazyLock::new(|| {
2422
1
            Regex::new(r"(?P<bold>B??)(?P<italic>I??)(?P<face>[S𝔹TD]??)s??(?P<cap>C??)(?P<greek>G??)(?P<char>[NL].)").unwrap()
2423
1
        });
2424
    
2425
2.82k
        let math_variant = node.attribute_value("mathvariant");
2426
2.82k
        let text = BrailleChars::substring(as_text(node), &text_range);
2427
2.82k
        let mut braille_chars = braille_replace_chars(&text, node)
?0
;
2428
2429
        // debug!("get_braille_ueb_chars: before/after unicode.yaml: '{}'/'{}'", text, braille_chars);
2430
2.82k
        if math_variant.is_none() {         // nothing we need to do
2431
2.71k
            return Ok(braille_chars);
2432
108
        }
2433
        // mathvariant could be "sans-serif-bold-italic" -- get the parts
2434
108
        let math_variant = math_variant.unwrap();
2435
108
        let italic = math_variant.contains("italic");
2436
108
        if italic & !braille_chars.contains('I') {
2437
0
            braille_chars = "I".to_string() + &braille_chars;
2438
108
        }
2439
108
        let bold = math_variant.contains("bold");
2440
108
        if bold & !braille_chars.contains('B') {
2441
0
            braille_chars = "B".to_string() + &braille_chars;
2442
108
        }
2443
108
        let typeface = match HAS_TYPEFACE.find(math_variant) {
2444
107
            None => "",
2445
1
            Some(m) => match m.as_str() {
2446
1
                "double-struck" => 
"𝔹"0
,
2447
1
                "script" => 
"T"0
,
2448
1
                "fraktur" => "D",
2449
0
                "sans-serif" => "S",
2450
                //  don't consider monospace as a typeform
2451
0
                _ => "",
2452
            },
2453
        };
2454
116
        let 
result108
=
PICK_APART_CHAR108
.
replace_all108
(
&braille_chars108
, |caps: &Captures| {
2455
            // debug!("captures: {:?}", caps);
2456
            // debug!("  bold: {:?}, italic: {:?}, face: {:?}, cap: {:?}, char: {:?}",
2457
            //        &caps["bold"], &caps["italic"], &caps["face"], &caps["cap"], &caps["char"]);
2458
116
            if bold || 
!caps["bold"].is_empty()111
{
"B"5
} else {
""111
}.to_string()
2459
116
                + if italic || !caps["italic"].is_empty() {
"I"0
} else {""}
2460
116
                + if !&caps["face"].is_empty() {
&caps["face"]1
} else {
typeface115
}
2461
116
                + &caps["cap"]
2462
116
                + &caps["greek"]
2463
116
                + &caps["char"]
2464
116
        });
2465
        // debug!("get_braille_ueb_chars: '{}'", &result);
2466
108
        return Ok(result.to_string())
2467
2.82k
    }
2468
2469
3.70k
    fn get_braille_cmu_chars(node: Element, text_range: Option<Range<usize>>) -> Result<String> {
2470
        // In CMU, we need to replace spaces used for number blocks with "."
2471
        // For other numbers, we need to add "." to create digit blocks
2472
2473
1
        static HAS_TYPEFACE: LazyLock<Regex> = LazyLock::new(|| Regex::new(".*?(double-struck|script|fraktur|sans-serif).*").unwrap());
2474
1
        static PICK_APART_CHAR: LazyLock<Regex> = LazyLock::new(|| {
2475
1
            Regex::new(r"(?P<bold>B??)(?P<italic>I??)(?P<face>[S𝔹TD]??)s??(?P<cap>C??)(?P<greek>G??)(?P<char>[NL].)").unwrap()
2476
1
        });
2477
    
2478
3.70k
        let math_variant = node.attribute_value("mathvariant");
2479
3.70k
        let text = BrailleChars::substring(as_text(node), &text_range);
2480
3.70k
        let text = add_separator(text);
2481
2482
3.70k
        let braille_chars = braille_replace_chars(&text, node)
?0
;
2483
2484
        // debug!("get_braille_ueb_chars: before/after unicode.yaml: '{}'/'{}'", text, braille_chars);
2485
3.70k
        if math_variant.is_none() {         // nothing we need to do
2486
3.70k
            return Ok(braille_chars);
2487
4
        }
2488
        // mathvariant could be "sans-serif-bold-italic" -- get the parts
2489
4
        let math_variant = math_variant.unwrap();
2490
4
        let bold = math_variant.contains("bold");
2491
4
        let italic = math_variant.contains("italic");
2492
4
        let typeface = match HAS_TYPEFACE.find(math_variant) {
2493
4
            None => "",
2494
0
            Some(m) => match m.as_str() {
2495
0
                "double-struck" => "𝔹",
2496
0
                "script" => "T",
2497
0
                "fraktur" => "D",
2498
0
                "sans-serif" => "S",
2499
                //  don't consider monospace as a typeform
2500
0
                _ => "",
2501
            },
2502
        };
2503
4
        let result = PICK_APART_CHAR.replace_all(&braille_chars, |caps: &Captures| {
2504
            // debug!("captures: {:?}", caps);
2505
            // debug!("  bold: {:?}, italic: {:?}, face: {:?}, cap: {:?}, char: {:?}",
2506
            //        &caps["bold"], &caps["italic"], &caps["face"], &caps["cap"], &caps["char"]);
2507
4
            if bold || !caps["bold"].is_empty() {
"B"0
} else {""}.to_string()
2508
4
                + if italic || !caps["italic"].is_empty() {
"I"0
} else {""}
2509
4
                + if !&caps["face"].is_empty() {
&caps["face"]0
} else {typeface}
2510
4
                + &caps["cap"]
2511
4
                + &caps["greek"]
2512
4
                + &caps["char"]
2513
4
        });
2514
4
        return Ok(result.to_string());
2515
2516
3.70k
        fn add_separator(text: String) -> String {
2517
            use crate::definitions::BRAILLE_DEFINITIONS;
2518
3.70k
            if let Some(
text_without_arc0
) = text.strip_prefix("arc") {
2519
                // "." after arc (7.5.3)
2520
0
                let is_function_name = BRAILLE_DEFINITIONS.with(|definitions| {
2521
0
                    let definitions = definitions.borrow();
2522
0
                    let set = definitions.get_hashset("CMUFunctionNames").unwrap();
2523
0
                    return set.contains(&text);
2524
0
                });
2525
0
                if is_function_name {
2526
0
                    return "arc.".to_string() + text_without_arc;
2527
0
                }
2528
3.70k
            } 
2529
3.70k
            return text;
2530
3.70k
        }
2531
3.70k
    }
2532
2533
536
    fn get_braille_vietnam_chars(node: Element, text_range: Option<Range<usize>>) -> Result<String> {
2534
        // this is basically the same as for ueb except:
2535
        // 1. we deal with switching '.' and ',' if in English style for numbers
2536
        // 2. if it is identified as a Roman Numeral, we make all but the first char lower case because they shouldn't get a cap indicator
2537
        // 3. double letter chemical elements should NOT be part of a cap word sequence
2538
536
        if name(node) == "mn" {
2539
248
            // text of element is modified by these if needed
2540
248
            lower_case_roman_numerals(node);
2541
248
            switch_if_english_style_number(node);
2542
288
        }
2543
536
        let result = BrailleChars::get_braille_ueb_chars(node, text_range)
?0
;
2544
536
        return Ok(result);
2545
2546
248
        fn lower_case_roman_numerals(mn_node: Element) {
2547
248
            if mn_node.attribute("data-roman-numeral").is_some() {
2548
2
                // if a roman numeral, all ASCII so we can optimize
2549
2
                let text = as_text(mn_node);
2550
2
                let mut new_text = String::from(&text[..1]);
2551
2
                new_text.push_str(text[1..].to_ascii_lowercase().as_str());    // works for single char too
2552
2
                mn_node.set_text(&new_text);
2553
246
            }
2554
248
        }
2555
248
        fn switch_if_english_style_number(mn_node: Element) {
2556
248
            let text = as_text(mn_node);
2557
248
            let dot = text.find('.');
2558
248
            let comma = text.find(',');
2559
248
            match (dot, comma) {
2560
218
                (None, None) => (),
2561
4
                (Some(dot), Some(comma)) => {
2562
4
                    if comma < dot {
2563
2
                        // switch dot/comma -- using "\x01" as a temp when switching the two chars
2564
2
                        let switched = text.replace('.', "\x01").replace(',', ".").replace('\x01', ",");
2565
2
                        mn_node.set_text(&switched);
2566
2
                    }
2567
                },
2568
17
                (Some(dot), None) => {
2569
                    // If it starts with a '.', a leading 0, or if there is only one '.' and not three chars after it
2570
17
                    if dot==0 ||
2571
15
                       (dot==1 && 
text11
.
starts_with11
('0')) ||
2572
13
                       (text[dot+1..].find('.').is_none() && 
text[dot+1..].len()!=310
) {
2573
5
                        mn_node.set_text(&text.replace('.', ","));
2574
12
                    }
2575
                },
2576
9
                (None, Some(comma)) => {
2577
                    // if there is more than one ",", than it can't be a decimal separator
2578
9
                    if text[comma+1..].find(',').is_some() {
2579
1
                        mn_node.set_text(&text.replace(',', "."));
2580
8
                    }
2581
                },
2582
            }
2583
248
        }
2584
2585
536
    }
2586
2587
2588
3.45k
    fn is_in_enclosed_list(node: Element) -> bool {
2589
        // Nemeth Rule 10 defines an enclosed list:
2590
        // 1: begins and ends with fence
2591
        // 2: FIX: not implemented -- must contain no word, abbreviation, ordinal or plural ending
2592
        // 3: function names or signs of shape and the signs which follow them are a single item (not a word)
2593
        // 4: an item of the list may be an ellipsis or any sign used for omission
2594
        // 5: no relational operator may appear within the list
2595
        // 6: the list must have at least 2 items.
2596
        //       Items are separated by commas, can not have other punctuation (except ellipsis and dash)
2597
3.45k
        let mut parent = get_parent(node); // safe since 'math' is always at root
2598
7.37k
        while name(parent) == "mrow" {
2599
4.04k
            if IsBracketed::is_bracketed(parent, "", "", true, false) {
2600
388
                for child in 
parent134
.
children134
() {
2601
388
                    if !child_meets_conditions(as_element(child)) {
2602
14
                        return false;
2603
374
                    }
2604
                }
2605
120
                return true;
2606
3.91k
            }
2607
3.91k
            parent = get_parent(parent);
2608
        }
2609
3.32k
        return false;
2610
2611
1.55k
        fn child_meets_conditions(node: Element) -> bool {
2612
1.55k
            let name = name(node);
2613
1.55k
            return match name {
2614
1.55k
                "mi" | 
"mn"1.39k
=>
true476
,
2615
1.07k
                "mo"  => 
!crate::canonicalize::is_relational_op(node)664
,
2616
412
                "mtext" => {
2617
9
                    let text = as_text(node).trim();
2618
9
                    return text=="?" || text=="-?-" || text.is_empty();   // various forms of "fill in missing content" (see also Nemeth_RULEs.yaml, "omissions")
2619
                },
2620
403
                "mrow" => {
2621
385
                    if IsBracketed::is_bracketed(node, "", "", false, false) {
2622
125
                        return child_meets_conditions(as_element(node.children()[1]));
2623
                    } else {
2624
1.00k
                        for child in 
node260
.
children260
() {
2625
1.00k
                            if !child_meets_conditions(as_element(child)) {
2626
28
                                return false;
2627
975
                            }
2628
                        }
2629
                    }  
2630
232
                    true      
2631
                },
2632
18
                "menclose" => {
2633
0
                    if let Some(notation) = node.attribute_value("notation") {
2634
0
                        if notation != "bottom" || notation != "box" {
2635
0
                            return false;
2636
0
                        }
2637
0
                        let child = as_element(node.children()[0]);     // menclose has exactly one child
2638
0
                        return is_leaf(child) && as_text(child) == "?";
2639
0
                    }
2640
0
                    return false;
2641
                },
2642
                _ => {
2643
36
                    for child in 
node18
.
children18
() {
2644
36
                        if !child_meets_conditions(as_element(child)) {
2645
0
                            return false;
2646
36
                        }
2647
                    }
2648
18
                    true
2649
                },
2650
            }
2651
1.55k
        }
2652
3.45k
    }
2653
2654
    /// Extract the `char`s from `str` within `range` (these are chars, not byte offsets)
2655
12.5k
    fn substring(str: &str, text_range: &Option<Range<usize>>) -> String {
2656
12.5k
        return match text_range {
2657
9.99k
            None => str.to_string(),
2658
2.52k
            Some(range) => str.chars().skip(range.start).take(range.end - range.start).collect(),
2659
        }
2660
12.5k
    }
2661
}
2662
2663
impl Function for BrailleChars {
2664
    /**
2665
     * Returns a string with the correct number of nesting chars (could be an empty string)
2666
     * @param(node) -- current node or string
2667
     * @param(char) -- char (string) that should be repeated
2668
     * Note: as a side effect, an attribute with the value so repeated calls to this or a child will be fast
2669
     */
2670
12.5k
    fn evaluate<'d>(&self,
2671
12.5k
                        context: &context::Evaluation<'_, 'd>,
2672
12.5k
                        args: Vec<Value<'d>>)
2673
12.5k
                        -> StdResult<Value<'d>, XPathError>
2674
    {
2675
        use crate::canonicalize::create_mathml_element;
2676
12.5k
        let mut args = Args(args);
2677
12.5k
        if let Err(
e0
) = args.exactly(2).or_else(|_|
args2.52k
.
exactly2.52k
(4)) {
2678
0
            return Err( XPathError::Other(format!("BrailleChars requires 2 or 4 args: {e}")));
2679
12.5k
        };
2680
2681
12.5k
        let range = if args.len() == 4 {
2682
2.52k
            let end = args.pop_number()
?0
as usize - 1; // non-inclusive at end, 0-based
2683
2.52k
            let start = args.pop_number()
?0
as usize - 1; // inclusive at start, a 0-based
2684
2.52k
            Some(start..end)
2685
        } else {
2686
9.99k
            None
2687
        };
2688
12.5k
        let braille_code = args.pop_string()
?0
;
2689
12.5k
        let v: Value<'_> = args.0.pop().ok_or(XPathError::ArgumentMissing)
?0
;
2690
12.5k
        let node = match v {
2691
11.8k
            Value::Nodeset(nodes) => {
2692
11.8k
                validate_one_node(nodes, "BrailleChars")
?0
.element().unwrap()
2693
            },
2694
2
            Value::Number(n) => {
2695
2
                let new_node = create_mathml_element(&context.node.document(), "mn");
2696
2
                new_node.set_text(&n.to_string());
2697
2
                new_node
2698
            },
2699
681
            Value::String(s) => {
2700
681
                let new_node = create_mathml_element(&context.node.document(), "mi");   // FIX: try to guess mi vs mo???
2701
681
                new_node.set_text(&s);
2702
681
                new_node
2703
            },
2704
            _ => {
2705
0
                return Ok( Value::String("".to_string()) ) // not an element, so nothing to do
2706
            },
2707
        };
2708
2709
12.5k
        if !is_leaf(node) {
2710
0
            return Err( XPathError::Other(format!("BrailleChars called on non-leaf element '{}'", mml_to_string(node))) );
2711
12.5k
        }
2712
12.5k
        return Ok( Value::String( BrailleChars::get_braille_chars(node, &braille_code, range)
?0
) );
2713
12.5k
    }
2714
}
2715
2716
pub struct NeedsToBeGrouped;
2717
impl NeedsToBeGrouped {
2718
    // ordinals often have an irregular start (e.g., "half") before becoming regular.
2719
    // if the number is irregular, return the ordinal form, otherwise return 'None'.
2720
805
    fn needs_grouping_for_cmu(element: Element, _is_base: bool) -> bool {
2721
805
        let node_name = name(element);
2722
805
        let children = element.children();
2723
805
        if node_name == "mrow" {
2724
            // check for bracketed exprs
2725
544
            if IsBracketed::is_bracketed(element, "", "", false, true) {
2726
0
                return false;
2727
544
            }
2728
2729
            // check for prefix and postfix ops at start or end (=> len()==2, prefix is first op, postfix is last op)
2730
544
            if children.len() == 2 &&
2731
9
                (name(as_element(children[0])) == "mo" || 
name5
(
as_element5
(children[1])) == "mo") {
2732
7
                return false;
2733
537
            }
2734
2735
537
            if children.len() != 3 {  // ==3, need to check if it a linear fraction
2736
4
                return true;
2737
533
            }
2738
533
            let operator = as_element(children[1]);
2739
533
            if name(operator) != "mo" || as_text(operator) != "/" {
2740
532
                return true;
2741
1
            }
2742
261
        }
2743
2744
262
        if !(node_name == "mrow" || 
node_name == "mfrac"261
) {
2745
258
            return false;
2746
4
        }
2747
        // check for numeric fractions (regular fractions need brackets, not numeric fractions), either as an mfrac or with "/"
2748
        // if the fraction starts with a "-", it is still a numeric fraction that doesn't need parens
2749
4
        let mut numerator = as_element(children[0]);
2750
4
        let denominator = as_element(children[children.len()-1]);
2751
4
        let decimal_separator = crate::interface::get_preference("DecimalSeparators").unwrap()
2752
4
                                                        .chars().next().unwrap_or('.');
2753
4
        if is_integer(denominator, decimal_separator) {
2754
            // check numerator being either an integer "- integer"
2755
2
            if name(numerator) == "mrow" {
2756
1
                let numerator_children = numerator.children();
2757
1
                if !(numerator_children.len() == 2 &&
2758
1
                        name(as_element(numerator_children[0])) == "mo" &&
2759
1
                        as_text(as_element(numerator_children[0])) == "-") {
2760
0
                    return true;
2761
1
                }
2762
1
                numerator = as_element(numerator_children[1]);
2763
1
            }
2764
2
            return !is_integer(numerator, decimal_separator);
2765
2
        }
2766
2
        return true;
2767
2768
6
        fn is_integer(mathml: Element, decimal_separator: char) -> bool {
2769
6
            return name(mathml) == "mn" && 
!4
as_text(mathml)4
.contains(decimal_separator)
2770
6
        }
2771
805
    }
2772
2773
    /// FIX: what needs to be implemented?
2774
0
    fn needs_grouping_for_finnish(mathml: Element, is_base: bool) -> bool {
2775
        use crate::xpath_functions::IsInDefinition;
2776
0
        let mut node_name = name(mathml);
2777
0
        if mathml.attribute_value("data-roman-numeral").is_some() {
2778
0
            node_name = "mi";           // roman numerals don't follow number rules
2779
0
        }
2780
2781
        // FIX: the leaf rules are from UEB -- check the Swedish rules
2782
0
        match node_name {
2783
0
            "mn" => {   
2784
0
                if !is_base {
2785
0
                    return false;
2786
0
                }                                                                                        // clause 1
2787
                // two 'mn's can be adjacent, in which case we need to group the 'mn' to make it clear it is separate (see bug #204)
2788
0
                let parent = get_parent(mathml);   // there is always a "math" node
2789
0
                let grandparent = if name(parent) == "math" {parent} else {get_parent(parent)};
2790
0
                if name(grandparent) != "mrow" {
2791
0
                    return false;
2792
0
                }
2793
0
                let preceding = parent.preceding_siblings();
2794
0
                if preceding.len()  < 2 {
2795
0
                    return false;
2796
0
                }
2797
                // any 'mn' would be separated from this node by invisible times
2798
0
                let previous_child = as_element(preceding[preceding.len()-1]);
2799
0
                if name(previous_child) == "mo" && as_text(previous_child) == "\u{2062}" {
2800
0
                    let previous_child = as_element(preceding[preceding.len()-2]);
2801
0
                    return name(previous_child) == "mn"
2802
                } else {
2803
0
                    return false;
2804
                }
2805
            },
2806
0
            "mi" | "mo" | "mtext" => {
2807
0
                let text = as_text(mathml);
2808
0
                let parent = get_parent(mathml);   // there is always a "math" node
2809
0
                let parent_name = name(parent);   // there is always a "math" node
2810
0
                if is_base && (parent_name == "msub" || parent_name == "msup" || parent_name == "msubsup") && !text.contains([' ', '\u{00A0}']) {
2811
0
                    return false;
2812
0
                }
2813
0
                let mut chars = text.chars();
2814
0
                let first_char = chars.next().unwrap();             // canonicalization assures it isn't empty;
2815
0
                let is_one_char = chars.next().is_none();
2816
                // '¨', etc., brailles as two chars -- there probably is some exception list but I haven't found it -- these are the ones I know about
2817
0
                return !((is_one_char && !['¨', '″', '‴', '⁗'].contains(&first_char)) ||                       // clause 8
2818
                            // "lim", "cos", etc., appear not to get parens, but the rules don't mention it (tests show it)
2819
0
                            IsInDefinition::is_defined_in(text, &SPEECH_DEFINITIONS, "FunctionNames").unwrap() ||
2820
0
                            IsInDefinition::is_defined_in(text, &SPEECH_DEFINITIONS, "Arrows").unwrap() ||          // clause 4
2821
0
                            IsInDefinition::is_defined_in(text, &SPEECH_DEFINITIONS, "GeometryShapes").unwrap());   // clause 5
2822
            },
2823
0
            "mrow" => {
2824
                // check for bracketed exprs
2825
0
                if IsBracketed::is_bracketed(mathml, "", "", false, true) {
2826
0
                    return false;
2827
0
                }
2828
2829
0
                let parent = get_parent(mathml); // safe since 'math' is always at root
2830
0
                if name(parent) == "mfrac" {
2831
0
                    let children = mathml.children();
2832
0
                    if mathml.preceding_siblings().is_empty() {
2833
                        // numerator: check for multiplication -- doesn't need grouping in numerator
2834
0
                        if children.len() >= 3 {
2835
0
                            let operator = as_element(children[1]);
2836
0
                            if name(operator) == "mo" {
2837
0
                                let ch = as_text(operator);
2838
0
                                if ch == "\u{2062}" || ch == "⋅" || ch == "×"  {
2839
0
                                    return false;
2840
0
                                }
2841
0
                            }
2842
0
                        }
2843
0
                        return true;
2844
                    } else {
2845
                        // denominator
2846
0
                        return true;
2847
                    }
2848
2849
0
                }
2850
                // check for prefix at start
2851
                // example 7.12 has "2-" in superscript and is grouped, so we don't consider postfix ops
2852
0
                let children = mathml.children();
2853
0
                if children.len() == 2 &&
2854
0
                    (name(as_element(children[0])) == "mo") {
2855
0
                    return false;
2856
0
                }
2857
0
                return true;
2858
            },
2859
0
            _ => return false,
2860
        }
2861
0
    }
2862
2863
    // ordinals often have an irregular start (e.g., "half") before becoming regular.
2864
    // if the number is irregular, return the ordinal form, otherwise return 'None'.
2865
0
    fn needs_grouping_for_swedish(mathml: Element, is_base: bool) -> bool {
2866
        use crate::xpath_functions::IsInDefinition;
2867
0
        let mut node_name = name(mathml);
2868
0
        if mathml.attribute_value("data-roman-numeral").is_some() {
2869
0
            node_name = "mi";           // roman numerals don't follow number rules
2870
0
        }
2871
2872
0
        match node_name {
2873
0
            "mn" => return false,
2874
0
            "mi" | "mo" | "mtext" => {
2875
0
                let text = as_text(mathml);
2876
0
                let parent = get_parent(mathml);   // there is always a "math" node
2877
0
                let parent_name = name(parent);   // there is always a "math" node
2878
0
                if is_base && (parent_name == "msub" || parent_name == "msup" || parent_name == "msubsup") && !text.contains([' ', '\u{00A0}']) {
2879
0
                    return false;
2880
0
                }
2881
0
                let mut chars = text.chars();
2882
0
                let first_char = chars.next().unwrap();             // canonicalization assures it isn't empty;
2883
0
                let is_one_char = chars.next().is_none();
2884
                // '¨', etc., brailles as two chars -- there probably is some exception list but I haven't found it -- these are the ones I know about
2885
0
                return !((is_one_char && !['¨', '″', '‴', '⁗'].contains(&first_char)) ||                       // clause 8
2886
                            // "lim", "cos", etc., appear not to get parens, but the rules don't mention it (tests show it)
2887
0
                            IsInDefinition::is_defined_in(text, &SPEECH_DEFINITIONS, "FunctionNames").unwrap() ||
2888
0
                            IsInDefinition::is_defined_in(text, &SPEECH_DEFINITIONS, "Arrows").unwrap() ||          // clause 4
2889
0
                            IsInDefinition::is_defined_in(text, &SPEECH_DEFINITIONS, "GeometryShapes").unwrap());   // clause 5
2890
            },
2891
0
            "mrow" => {
2892
                // check for bracketed exprs
2893
0
                if IsBracketed::is_bracketed(mathml, "", "", false, true) {
2894
0
                    return false;
2895
0
                }
2896
2897
                // check for prefix at start
2898
                // example 7.12 has "2-" in superscript and is grouped, so we don't consider postfix ops
2899
0
                let children = mathml.children();
2900
0
                if children.len() == 2 &&
2901
0
                    (name(as_element(children[0])) == "mo") {
2902
0
                    return false;
2903
0
                }
2904
0
                return true;
2905
            },
2906
0
            "mfrac" => {
2907
                // exclude simple fractions -- they are not bracketed with start/end marks
2908
0
                let children = mathml.children();
2909
0
                return !(NeedsToBeGrouped::needs_grouping_for_swedish(as_element(children[0]), true) ||
2910
0
                         NeedsToBeGrouped::needs_grouping_for_swedish(as_element(children[0]), true));
2911
            },
2912
            // At least for msup (Ex 7.7, and 7.32 and maybe more), spec seems to feel grouping is not needed.
2913
            // "msub" | "msup" | "msubsup" | "munder" | "mover" | "munderover" => return true,
2914
0
            "mtable" => return true,    // Fix: should check for trivial cases that don't need grouping
2915
0
            _ => return false,
2916
        }
2917
0
    }
2918
2919
    /// Returns true if the element needs grouping symbols
2920
    /// Bases need extra attention because if they are a number and the item to the left is one, that needs distinguishing
2921
538
    fn needs_grouping_for_ueb(mathml: Element, is_base: bool) -> bool {
2922
        // From GTM 7.1
2923
        // 1. An entire number, i.e. the initiating numeric symbol and all succeeding symbols within the numeric mode thus
2924
        //     established (which would include any interior decimal points, commas, separator spaces, or simple numeric fraction lines).
2925
        // 2. An entire general fraction, enclosed in fraction indicators.
2926
        // 3. An entire radical expression, enclosed in radical indicators.
2927
        // 4. An arrow.
2928
        // 5. An arbitrary shape.
2929
        // 6. Any expression enclosed in matching pairs of round parentheses, square brackets or curly braces.
2930
        // 7. Any expression enclosed in the braille grouping indicators.   [Note: not possible here]
2931
        // 8. If none of the foregoing apply, the item is simply the [this element's] individual symbol.
2932
2933
        use crate::xpath_functions::IsInDefinition;
2934
538
        let mut node_name = name(mathml);
2935
538
        if mathml.attribute_value("data-roman-numeral").is_some() {
2936
1
            node_name = "mi";           // roman numerals don't follow number rules
2937
537
        }
2938
538
        match node_name {
2939
538
            "mn" => {   
2940
250
                if !is_base {
2941
233
                    return false;
2942
17
                }                                                                                        // clause 1
2943
                // two 'mn's can be adjacent, in which case we need to group the 'mn' to make it clear it is separate (see bug #204)
2944
17
                let parent = get_parent(mathml);   // there is always a "math" node
2945
17
                let grandparent = if name(parent) == "math" {
parent0
} else {get_parent(parent)};
2946
17
                if name(grandparent) != "mrow" {
2947
2
                    return false;
2948
15
                }
2949
15
                let preceding = parent.preceding_siblings();
2950
15
                if preceding.len()  < 2 {
2951
6
                    return false;
2952
9
                }
2953
                // any 'mn' would be separated from this node by invisible times
2954
9
                let previous_child = as_element(preceding[preceding.len()-1]);
2955
9
                if name(previous_child) == "mo" && as_text(previous_child) == "\u{2062}" {
2956
6
                    let previous_child = as_element(preceding[preceding.len()-2]);
2957
6
                    return name(previous_child) == "mn"
2958
                } else {
2959
3
                    return false;
2960
                }
2961
            },
2962
288
            "mi" | 
"mo"44
|
"mtext"32
=> {
2963
258
                let text = as_text(mathml);
2964
258
                let parent = get_parent(mathml);   // there is always a "math" node
2965
258
                let parent_name = name(parent);   // there is always a "math" node
2966
258
                if is_base && (
parent_name == "msub"230
||
parent_name == "msup"224
||
parent_name == "msubsup"10
) &&
!224
text224
.contains([' ', '\u{00A0}']) {
2967
224
                    return false;
2968
34
                }
2969
34
                let mut chars = text.chars();
2970
34
                let first_char = chars.next().unwrap();             // canonicalization assures it isn't empty;
2971
34
                let is_one_char = chars.next().is_none();
2972
                // '¨', etc., brailles as two chars -- there probably is some exception list but I haven't found it -- these are the ones I know about
2973
34
                return !((is_one_char && 
!31
['¨', '″', '‴', '⁗']31
.contains(&first_char)) || // clause 8
2974
                            // "lim", "cos", etc., appear not to get parens, but the rules don't mention it (tests show it)
2975
4
                            IsInDefinition::is_defined_in(text, &SPEECH_DEFINITIONS, "FunctionNames").unwrap() ||
2976
3
                            IsInDefinition::is_defined_in(text, &SPEECH_DEFINITIONS, "Arrows").unwrap() ||          // clause 4
2977
3
                            IsInDefinition::is_defined_in(text, &SPEECH_DEFINITIONS, "GeometryShapes").unwrap());   // clause 5
2978
            },
2979
30
            "mfrac" => return 
false2
, // clause 2 (test GTM 8.2(4) shows numeric fractions are not special)
2980
28
            "msqrt" | "mroot" => return 
false0
, // clause 3
2981
                    // clause 6 only mentions three grouping chars, I'm a little suspicious of that, but that's what it says
2982
28
            "mrow" => return !(
IsBracketed::is_bracketed22
(
mathml22
,
"("22
,
")"22
, false, false) ||
2983
16
                                IsBracketed::is_bracketed(mathml, "[", "]", false, false) || 
2984
15
                                IsBracketed::is_bracketed(mathml, "{", "}", false, false) ),
2985
6
            "msub" | 
"msup"4
|
"msubsup"1
=> {
2986
                // I'm a little dubious about the false value, but see GTM 7.7(2)
2987
5
                if !is_base {
2988
3
                    return true;
2989
2
                } 
2990
                // need to group nested scripts in base -- see GTM 12.2(2)                                         
2991
2
                let parent = get_parent(mathml);   // there is always a "math" node
2992
2
                let parent_name = name(parent);   // there is always a "math" node
2993
2
                return parent_name == "munder" || parent_name == "mover" || 
parent_name == "munderover"1
;
2994
            },
2995
1
            _ => return true,
2996
        }
2997
2998
538
    }
2999
}
3000
3001
impl Function for NeedsToBeGrouped {
3002
    // convert a node to an ordinal number
3003
1.34k
    fn evaluate<'d>(&self,
3004
1.34k
                        _context: &context::Evaluation<'_, 'd>,
3005
1.34k
                        args: Vec<Value<'d>>)
3006
1.34k
                        -> StdResult<Value<'d>, XPathError>
3007
    {
3008
1.34k
        let mut args = Args(args);
3009
1.34k
        args.exactly(3)
?0
;
3010
1.34k
        let is_base = args.pop_boolean()
?0
;
3011
1.34k
        let braille_code = args.pop_string()
?0
;
3012
1.34k
        let node = validate_one_node(args.pop_nodeset()
?0
, "NeedsToBeGrouped")
?0
;
3013
1.34k
        if let Node::Element(e) = node {
3014
1.34k
            let answer = match braille_code.as_str() {
3015
1.34k
                "CMU" => 
NeedsToBeGrouped::needs_grouping_for_cmu805
(
e805
,
is_base805
),
3016
538
                "UEB" => NeedsToBeGrouped::needs_grouping_for_ueb(e, is_base),
3017
0
                "Finnish" => NeedsToBeGrouped::needs_grouping_for_finnish(e, is_base),
3018
0
                "Swedish" => NeedsToBeGrouped::needs_grouping_for_swedish(e, is_base),
3019
0
                _ => return Err(XPathError::Other(format!("NeedsToBeGrouped: braille code arg '{braille_code:?}' is not a known code ('UEB', 'CMU', or 'Swedish')"))),
3020
            };
3021
1.34k
            return Ok( Value::Boolean( answer ) );
3022
0
        }
3023
3024
0
        return Err(XPathError::Other(format!("NeedsToBeGrouped: first arg '{node:?}' is not a node")));
3025
1.34k
    }
3026
}
3027
    
3028
    
3029
    
3030
#[cfg(test)]
3031
mod tests {
3032
    use super::*;
3033
    #[allow(unused_imports)]
3034
    use crate::init_logger;
3035
    use crate::interface::*;
3036
    use log::debug;
3037
3038
    #[test]
3039
1
    fn ueb_highlight_24() -> Result<()> {       // issue 24
3040
1
        let mathml_str = "<math display='block' id='id-0'>
3041
1
            <mrow id='id-1'>
3042
1
                <mn id='id-2'>4</mn>
3043
1
                <mo id='id-3'>&#x2062;</mo>
3044
1
                <mi id='id-4'>a</mi>
3045
1
                <mo id='id-5'>&#x2062;</mo>
3046
1
                <mi id='id-6'>c</mi>
3047
1
            </mrow>
3048
1
        </math>";
3049
1
        crate::interface::set_rules_dir(super::super::abs_rules_dir_path()).unwrap();
3050
1
        set_mathml(mathml_str).unwrap();
3051
1
        set_preference("BrailleCode", "UEB").unwrap();
3052
1
        set_preference("BrailleNavHighlight", "All").unwrap();
3053
1
        let braille = get_braille("id-2")
?0
;
3054
1
        assert_eq!("⣼⣙⠰⠁⠉", braille);
3055
1
        set_navigation_node("id-2", 0)
?0
;
3056
1
        assert_eq!( get_braille_position()
?0
, (0,2));
3057
3058
1
        let braille = get_braille("id-4")
?0
;
3059
1
        assert_eq!("⠼⠙⣰⣁⠉", braille);
3060
1
        set_navigation_node("id-4", 0)
?0
;
3061
1
        assert_eq!( get_braille_position()
?0
, (2,4));
3062
1
        return Ok( () );
3063
1
    }
3064
    
3065
    #[test]
3066
    // This test probably should be repeated for each braille code and be taken out of here
3067
1
    fn find_mathml_from_braille() -> Result<()> { 
3068
        use std::time::Instant;
3069
1
        let mathml_str = "<math id='id-0'>
3070
1
        <mrow data-changed='added' id='id-1'>
3071
1
          <mi id='id-2'>x</mi>
3072
1
          <mo id='id-3'>=</mo>
3073
1
          <mfrac id='id-4'>
3074
1
            <mrow id='id-5'>
3075
1
              <mrow data-changed='added' id='id-6'>
3076
1
                <mo id='id-7'>-</mo>
3077
1
                <mi id='id-8'>b</mi>
3078
1
              </mrow>
3079
1
              <mo id='id-9'>±</mo>
3080
1
              <msqrt id='id-10'>
3081
1
                <mrow data-changed='added' id='id-11'>
3082
1
                  <msup id='id-12'>
3083
1
                    <mi id='id-13'>b</mi>
3084
1
                    <mn id='id-14'>2</mn>
3085
1
                  </msup>
3086
1
                  <mo id='id-15'>-</mo>
3087
1
                  <mrow data-changed='added' id='id-16'>
3088
1
                    <mn id='id-17'>4</mn>
3089
1
                    <mo data-changed='added' id='id-18'>&#x2062;</mo>
3090
1
                    <mi id='id-19'>a</mi>
3091
1
                    <mo data-changed='added' id='id-20'>&#x2062;</mo>
3092
1
                    <mi id='id-21'>c</mi>
3093
1
                  </mrow>
3094
1
                </mrow>
3095
1
              </msqrt>
3096
1
            </mrow>
3097
1
            <mrow id='id-22'>
3098
1
              <mn id='id-23'>2</mn>
3099
1
              <mo data-changed='added' id='id-24'>&#x2062;</mo>
3100
1
              <mi id='id-25'>a</mi>
3101
1
            </mrow>
3102
1
          </mfrac>
3103
1
        </mrow>
3104
1
       </math>";
3105
1
        crate::interface::set_rules_dir(super::super::abs_rules_dir_path()).unwrap();
3106
1
        set_mathml(mathml_str).unwrap();
3107
1
        set_preference("BrailleNavHighlight", "Off").unwrap();
3108
3109
1
        set_preference("BrailleCode", "Nemeth").unwrap();
3110
1
        let _braille = get_braille("")
?0
;
3111
1
        let answers= &[2, 3, 3, 3, 3, 4, 7, 8, 9, 9,   10, 13, 12, 14, 12, 15, 17, 19, 21, 10,   4, 23, 25, 4];
3112
24
        let 
answers1
=
answers1
.
map1
(|num| format!("id-{}", num));
3113
1
        debug!("\n*** Testing Nemeth ***");
3114
24
        for (i, answer) in 
answers1
.
iter1
().
enumerate1
() {
3115
24
            debug!("\n===  i={}  ===", i);
3116
24
            let instant = Instant::now();
3117
24
            let (id, _offset) = crate::interface::get_navigation_node_from_braille_position(i)
?0
;
3118
24
            N_PROBES.with(|n| {debug!("test {:2} #probes = {}", i, 
n0
.
borrow0
())});
3119
24
            debug!("Time taken: {}ms", 
instant.elapsed()0
.
as_millis0
());
3120
24
            assert_eq!(*answer, id, "\nNemeth test ith position={}", i);
3121
        }
3122
3123
1
        set_preference("BrailleCode", "UEB").unwrap();
3124
1
        let _braille = get_braille("")
?0
;
3125
1
        let answers= &[0, 0, 0, 2, 3, 3, 3, 3, 4, 7,   7, 8, 9, 9, 10, 13, 12, 14, 14, 15,   15, 17, 17, 19, 19, 21, 10, 4, 4, 23,   23, 25, 25, 4, 0, 0];
3126
36
        let 
answers1
=
answers1
.
map1
(|num| format!("id-{}", num));
3127
1
        debug!("\n\n*** Testing UEB ***");
3128
36
        for (i, answer) in 
answers1
.
iter1
().
enumerate1
() {
3129
36
            debug!("\n===  i={}  ===", i);
3130
36
            let instant = Instant::now();
3131
36
            let (id, _offset) = crate::interface::get_navigation_node_from_braille_position(i)
?0
;
3132
36
            N_PROBES.with(|n| {debug!("test {:2} #probes = {}", i, 
n0
.
borrow0
())});
3133
36
            debug!("Time taken: {}ms", 
instant.elapsed()0
.
as_millis0
());
3134
36
            assert_eq!(*answer, id, "\nUEB test ith position={}", i);
3135
        }
3136
1
        set_preference("BrailleCode", "CMU").unwrap();
3137
1
        let braille = get_braille("")
?0
;
3138
1
        let answers= &[2, 3, 5, 7, 8, 9, 9, 9, 10, 10,   11, 13, 12, 14, 14, 15, 17, 17, 19, 19,   21, 11, 5, 4, 22, 23, 23, 25, 25, 22,];
3139
30
        let 
answers1
=
answers1
.
map1
(|num| format!("id-{}", num));
3140
1
        debug!("\n\n*** Testing CMU ***");
3141
1
        debug!("Braille: {}", braille);
3142
30
        for (i, answer) in 
answers1
.
iter1
().
enumerate1
() {
3143
30
            debug!("\n===  i={}  ===", i);
3144
30
            let instant = Instant::now();
3145
30
            let (id, _offset) = crate::interface::get_navigation_node_from_braille_position(i)
?0
;
3146
30
            N_PROBES.with(|n| {debug!("test {:2} #probes = {}", i, 
n0
.
borrow0
())});
3147
30
            debug!("Time taken: {}ms", 
instant.elapsed()0
.
as_millis0
());
3148
30
            assert_eq!(*answer, id, "\nCMU test ith position={}", i);
3149
        }
3150
1
        return Ok( () );
3151
1
    }
3152
    
3153
    #[test]
3154
    #[allow(non_snake_case)]
3155
1
    fn test_UEB_start_mode() -> Result<()> {
3156
1
        let mathml_str = "<math><msup><mi>x</mi><mi>n</mi></msup></math>";
3157
1
        crate::interface::set_rules_dir(super::super::abs_rules_dir_path()).unwrap();
3158
1
        set_mathml(mathml_str).unwrap();
3159
1
        set_preference("BrailleCode", "UEB").unwrap();
3160
1
        set_preference("UEB_START_MODE", "Grade2").unwrap();
3161
1
        let braille = get_braille("")
?0
;
3162
1
        assert_eq!("⠭⠰⠔⠝", braille, "Grade2");
3163
1
        set_preference("UEB_START_MODE", "Grade1").unwrap();
3164
1
        let braille = get_braille("")
?0
;
3165
1
        assert_eq!("⠭⠔⠝", braille, "Grade1");
3166
1
        return Ok( () );
3167
1
    }
3168
}
\ No newline at end of file diff --git a/docs/llvm-cov/html/coverage/home/runner/work/MathCAT/MathCAT/src/canonicalize.rs.html b/docs/llvm-cov/html/coverage/home/runner/work/MathCAT/MathCAT/src/canonicalize.rs.html index 9682013f..329675c2 100644 --- a/docs/llvm-cov/html/coverage/home/runner/work/MathCAT/MathCAT/src/canonicalize.rs.html +++ b/docs/llvm-cov/html/coverage/home/runner/work/MathCAT/MathCAT/src/canonicalize.rs.html @@ -1 +1 @@ -

Coverage Report

Created: 2026-04-30 05:51

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/home/runner/work/MathCAT/MathCAT/src/canonicalize.rs
Line
Count
Source
1
//! Converts the MathML to some sort of canonical MathML.
2
//!
3
//! Some changes made:
4
//! * &extra whitespace at the start/end of tokens is trimmed.
5
//! * "equivalent" characters are converted to a chosen character.
6
//! * known "bad" MathML is cleaned up (this will likely be an ongoing effort)
7
//! * mrows are added based on operator priorities from the MathML Operator Dictionary
8
#![allow(clippy::needless_return)]
9
use crate::errors::*;
10
use std::rc::Rc;
11
use std::cell::RefCell;
12
use sxd_document::dom::{Element, Document, ChildOfElement, Attribute};
13
use sxd_document::QName;
14
use phf::{phf_map, phf_set};
15
use crate::xpath_functions::{IsBracketed, is_leaf, IsNode};
16
use std::ptr::eq as ptr_eq;
17
use crate::pretty_print::*;
18
use regex::Regex;
19
use std::fmt;
20
use crate::chemistry::*;
21
use unicode_script::Script;
22
use roman_numerals_rs::RomanNumeral;
23
use std::sync::LazyLock;
24
use log::{debug};
25
use bitflags::bitflags;
26
27
// FIX: DECIMAL_SEPARATOR should be set by env, or maybe language
28
const DECIMAL_SEPARATOR: &str = ".";
29
pub const CHANGED_ATTR: &str = "data-changed";
30
pub const ADDED_ATTR_VALUE: &str = "added";
31
pub const INTENT_ATTR: &str = "intent";
32
pub const MATHML_FROM_NAME_ATTR: &str = "data-from-mathml";
33
const MFENCED_ATTR_VALUE: &str = "from_mfenced";
34
const EMPTY_IN_2D: &str = "data-empty-in-2D";
35
const SPACE_AFTER: &str = "data-space-after";
36
const ACT_AS_OPERATOR: &str = "data-acts_as_operator";
37
// character to use instead of the text content for priority, etc.
38
pub const CHEMICAL_BOND: &str ="data-chemical-bond";
39
40
41
/// Used when mhchem is detected and we should favor postscripts rather than prescripts in constructing an mmultiscripts
42
const MHCHEM_MMULTISCRIPTS_HACK: &str = "MHCHEM_SCRIPT_HACK";
43
44
// (perfect) hash of operators built from MathML's operator dictionary
45
static OPERATORS: phf::Map<&str, OperatorInfo> = include!("operator-info.in");
46
47
48
// The set of fence operators that can being either a left or right fence (or infix). For example: "|".
49
static AMBIGUOUS_OPERATORS: phf::Set<&str> = phf_set! {
50
  "|", "∥", "\u{2016}"
51
};
52
53
// static vars used when canonicalizing
54
// lowest priority operator so it is never popped off the stack
55
static LEFT_FENCEPOST: OperatorInfo = OperatorInfo{ op_type: OperatorTypes::LEFT_FENCE, priority: 0, next: &None };
56
57
3
static INVISIBLE_FUNCTION_APPLICATION: LazyLock<&'static OperatorInfo> = LazyLock::new(|| OPERATORS.get("\u{2061}").unwrap());
58
3
static IMPLIED_TIMES: LazyLock<&'static OperatorInfo> = LazyLock::new(|| OPERATORS.get("\u{2062}").unwrap());
59
2
static IMPLIED_INVISIBLE_COMMA: LazyLock<&'static OperatorInfo> = LazyLock::new(|| OPERATORS.get("\u{2063}").unwrap());
60
3
static IMPLIED_INVISIBLE_PLUS: LazyLock<&'static OperatorInfo> = LazyLock::new(|| OPERATORS.get("\u{2064}").unwrap());
61
62
// FIX: any other operators that should act the same (e.g, plus-minus and minus-plus)?
63
3
static PLUS: LazyLock<&'static OperatorInfo> = LazyLock::new(|| OPERATORS.get("+").unwrap());
64
3
static MINUS: LazyLock<&'static OperatorInfo> = LazyLock::new(|| OPERATORS.get("-").unwrap());
65
3
static PREFIX_MINUS: LazyLock<&'static OperatorInfo> = LazyLock::new(|| MINUS.next.as_ref().unwrap());
66
67
3
static TIMES_SIGN: LazyLock<&'static OperatorInfo> = LazyLock::new(|| OPERATORS.get("×").unwrap());
68
69
// IMPLIED_TIMES_HIGH_PRIORITY -- used in trig functions for things like sin 2x cos 2x where want > function app priority
70
static IMPLIED_TIMES_HIGH_PRIORITY: OperatorInfo = OperatorInfo{
71
  op_type: OperatorTypes::INFIX, priority: 851, next: &None
72
};
73
// IMPLIED_SEPARATOR_HIGH_PRIORITY -- used for Geometry points like ABC
74
static IMPLIED_SEPARATOR_HIGH_PRIORITY: OperatorInfo = OperatorInfo{
75
  op_type: OperatorTypes::INFIX, priority: 901, next: &None
76
};
77
// IMPLIED_CHEMICAL_BOND -- used for implicit and explicit bonds
78
static IMPLIED_CHEMICAL_BOND: OperatorInfo = OperatorInfo{
79
  op_type: OperatorTypes::INFIX, priority: 905, next: &None
80
};
81
static IMPLIED_PLUS_SLASH_HIGH_PRIORITY: OperatorInfo = OperatorInfo{ // (linear) mixed fraction 2 3/4
82
  op_type: OperatorTypes::INFIX, priority: 881, next: &None
83
};
84
85
// Useful static defaults to have available if there is no character match
86
static DEFAULT_OPERATOR_INFO_PREFIX: OperatorInfo = OperatorInfo{
87
  op_type: OperatorTypes::PREFIX, priority: 260, next: &None
88
};
89
static DEFAULT_OPERATOR_INFO_INFIX: OperatorInfo = OperatorInfo{
90
  op_type: OperatorTypes::INFIX, priority: 260, next:& None
91
};
92
static DEFAULT_OPERATOR_INFO_POSTFIX: OperatorInfo = OperatorInfo{
93
  op_type: OperatorTypes::POSTFIX, priority: 260, next: &None
94
};
95
96
// avoids having to use Option<OperatorInfo> in some cases
97
static ILLEGAL_OPERATOR_INFO: OperatorInfo = OperatorInfo{
98
  op_type: OperatorTypes::INFIX, priority: 999, next: &None
99
};
100
101
// used to tell if an operator is a relational operator
102
1
static EQUAL_PRIORITY: LazyLock<usize> = LazyLock::new(|| OPERATORS.get("=").unwrap().priority);
103
104
// useful for detecting whitespace
105
3
static IS_WHITESPACE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\s+$").unwrap());    // only Unicode whitespace
106
107
// Operators are either PREFIX, INFIX, or POSTFIX, but can also have other properties such as LEFT_FENCE
108
bitflags! {
109
  #[derive(Clone, Debug, Copy, PartialEq, Eq)]
110
  struct OperatorTypes: u32 {
111
    const NONE    = 0x0;
112
    const PREFIX  = 0x1;
113
    const INFIX   = 0x2;
114
    const POSTFIX = 0x4;
115
    const FENCE   = 0x8;
116
    const LEFT_FENCE= 0x9;
117
    const RIGHT_FENCE=0xc;
118
    const UNSPECIFIED=0xf;    // 'and-ing will match anything
119
  }
120
}
121
// OperatorInfo is a key structure for parsing.
122
// They OperatorInfo is this program's representation of MathML's Operator Dictionary.
123
// The OperatorTypes say how the operator can group (can be overridden with @form="..." on an element).
124
//   Basically, it says the operator can be at the start, middle, or end of an mrow.
125
// The priority field gives the relationships between operators so that lower priority operators are towards the root of the tree.
126
//   E.g.,  '=' is lower priority than (infix) '+', which in turn is lower priority than multiplication.
127
// The operator info is a linked list because some operators (not many) have alternatives (e.g, '+' is both prefix and infix)
128
// All OperatorInfo is static info, with some special static defaults to capture when it is not listed in the operator dictionary.
129
#[derive(Clone, Debug)]
130
struct OperatorInfo {
131
  op_type: OperatorTypes,   // can be set on <mo>
132
  priority: usize,      // not settable on an element
133
  next: &'static Option<OperatorInfo>,  // can be both prefix & infix (etc) -- chain of options
134
}
135
136
// The character is separated out from the OperatorInfo as this allows the OperatorInfo to be static (can use default values)
137
#[derive(Clone, Debug)]
138
struct OperatorPair<'op> {
139
  ch: &'op str,
140
  op: &'static OperatorInfo
141
}
142
143
impl<'op> OperatorPair<'op> {
144
57.3k
  fn new() -> OperatorPair<'op> {
145
57.3k
    return OperatorPair{
146
57.3k
      ch: "illegal",          // value 'illegal' used only in debugging, if then
147
57.3k
      op: &ILLEGAL_OPERATOR_INFO,   // ILLEGAL_OPERATOR_INFO avoids using <Option>
148
57.3k
    };
149
57.3k
  }
150
}
151
152
// OperatorVersions is a convenient data structure when looking to see whether the operator should be prefix, infix, or postfix.
153
// It is only used in one place in the code, so this could maybe be eliminated and the code localized to where it is used.
154
#[derive(Debug)]
155
struct OperatorVersions {
156
  prefix: Option<&'static OperatorInfo>,
157
  infix: Option<&'static OperatorInfo>,
158
  postfix: Option<&'static OperatorInfo>,
159
}
160
161
impl OperatorVersions {
162
401
  fn new(op: &'static OperatorInfo) -> OperatorVersions {
163
401
    let mut op = op;
164
401
    let mut prefix = None;
165
401
    let mut infix = None;
166
401
    let mut postfix = None;
167
    loop {
168
1.10k
      if op.is_prefix() {
169
360
        prefix = Some( op );
170
745
      } else if op.is_infix() {
171
385
        infix = Some( op )
172
360
      } else if op.is_postfix() {
173
360
        postfix = Some( op );
174
360
      } else {
175
0
        panic!("OperatorVersions::new: operator is not prefix, infix, or postfix")
176
      }
177
      //let another_op = op.next;
178
1.10k
      match &op.next {
179
401
        None => break,
180
704
        Some(alt_op) => op = alt_op,
181
      }
182
    }
183
401
    return OperatorVersions{prefix, infix, postfix};
184
401
  }
185
}
186
187
188
impl OperatorInfo {
189
13.1k
  fn is_prefix(&self) -> bool {
190
13.1k
    return (self.op_type & OperatorTypes::PREFIX) != OperatorTypes::NONE;
191
13.1k
  }
192
193
805
  fn is_infix(&self) -> bool {
194
805
    return (self.op_type & OperatorTypes::INFIX) != OperatorTypes::NONE;
195
805
  }
196
197
14.2k
  fn is_postfix(&self) -> bool {
198
14.2k
    return (self.op_type & OperatorTypes::POSTFIX) != OperatorTypes::NONE;
199
14.2k
  }
200
201
13.9k
  fn is_left_fence(&self) -> bool {
202
13.9k
    return self.op_type & OperatorTypes::LEFT_FENCE == OperatorTypes::LEFT_FENCE;
203
13.9k
  }
204
205
12.9k
  fn is_right_fence(&self) -> bool {
206
12.9k
    return self.op_type & OperatorTypes::RIGHT_FENCE ==OperatorTypes::RIGHT_FENCE;
207
12.9k
  }
208
209
4.84k
  fn is_fence(&self) -> bool {
210
4.84k
    return (self.op_type & (OperatorTypes::LEFT_FENCE | OperatorTypes::RIGHT_FENCE)) != OperatorTypes::NONE;
211
4.84k
  }
212
213
21.3k
  fn is_operator_type(&self, op_type: OperatorTypes) -> bool {
214
21.3k
    return self.op_type & op_type != OperatorTypes::NONE;
215
21.3k
  }
216
217
13.5k
  fn is_plus_or_minus(&self) -> bool {
218
13.5k
    return ptr_eq(self, *PLUS) || 
ptr_eq13.0k
(
self13.0k
,
*MINUS13.0k
);
219
13.5k
  }
220
221
13.2k
  fn is_times(&self) -> bool {
222
13.2k
    return ptr_eq(self, *IMPLIED_TIMES) || 
ptr_eq13.0k
(
self13.0k
,
*TIMES_SIGN13.0k
);
223
13.2k
  }
224
225
17.7k
  fn is_nary(&self, previous_op: &OperatorInfo) -> bool {
226
17.7k
    return  ptr_eq(previous_op,self) ||
227
13.0k
        (previous_op.is_plus_or_minus() && 
self506
.
is_plus_or_minus506
()) ||
228
13.0k
        (previous_op.is_times() && 
self163
.
is_times163
());
229
17.7k
  }
230
}
231
232
// StackInfo contains all the needed information for deciding shift/reduce during parsing.
233
// The stack itself is just a Vec of StackInfo (since we only push, pop, and look at the top)
234
// There are a number of useful functions defined on StackInfo. 
235
struct StackInfo<'a, 'op>{
236
  mrow: Element<'a>,      // mrow being built
237
  op_pair: OperatorPair<'op>, // last operator placed on stack
238
  is_operand: bool,     // true if child at end of mrow is an operand (as opposed to an operator)
239
}
240
241
impl fmt::Display for StackInfo<'_, '_> {
242
0
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
243
0
        write!(f, "StackInfo(op={}/{}, is_operand={}, mrow({}",
244
0
        show_invisible_op_char(self.op_pair.ch), self.op_pair.op.priority, self.is_operand,
245
0
        if self.mrow.children().is_empty() {")"} else {""})?;
246
0
    for child in self.mrow.children() {
247
0
      let child = as_element(child);
248
0
      write!(f, "{}{}", name(child), if child.following_siblings().is_empty() {")"} else {","})?;
249
    }
250
0
        return Ok( () );
251
0
    }
252
}
253
254
impl<'a, 'op:'a> StackInfo<'a, 'op> {
255
10.6k
  fn new(doc: Document<'a>) -> StackInfo<'a, 'op> {
256
    // debug!("  new empty StackInfo");
257
10.6k
    let mrow = create_mathml_element(&doc, "mrow") ;
258
10.6k
    mrow.set_attribute_value(CHANGED_ATTR, ADDED_ATTR_VALUE);
259
10.6k
    return StackInfo{
260
10.6k
      mrow,
261
10.6k
      op_pair: OperatorPair{ ch: "\u{E000}", op: &LEFT_FENCEPOST },
262
10.6k
      is_operand: false,
263
10.6k
    }
264
10.6k
  }
265
266
10.9k
  fn with_op<'d>(doc: &'d Document<'a>, node: Element<'a>, op_pair: OperatorPair<'op>) -> StackInfo<'a, 'op> {
267
    // debug!("  new StackInfo with '{}' and operator {}/{}", name(node), show_invisible_op_char(op_pair.ch), op_pair.op.priority);
268
10.9k
    let mrow = create_mathml_element(doc, "mrow");
269
10.9k
    mrow.set_attribute_value(CHANGED_ATTR, ADDED_ATTR_VALUE);
270
10.9k
    mrow.append_child(node);
271
10.9k
    return StackInfo {
272
10.9k
      mrow,
273
10.9k
      op_pair,
274
10.9k
      is_operand: false,
275
10.9k
    }
276
10.9k
  }
277
278
37.2k
  fn priority(&self) -> usize {
279
37.2k
    return self.op_pair.op.priority;
280
37.2k
  }
281
282
37.1k
  fn last_child_in_mrow(&self) -> Option<Element<'a>> {
283
37.1k
    let children = self.mrow.children();
284
37.1k
    for &
child29.5k
in children.iter().rev() {
285
29.5k
      let child = as_element(child);
286
29.5k
      if let Some(
value687
) = child.attribute_value(CHANGED_ATTR)
287
687
        && value == "empty_content" {
288
0
          continue;
289
29.5k
        }
290
29.5k
      return Some(child);
291
    }
292
7.58k
    return None;
293
37.1k
  }
294
295
57.6k
  fn add_child_to_mrow(&mut self, child: Element<'a>, child_op: OperatorPair<'op>) {
296
    // debug!("  adding '{}' to mrow[{}], operator '{}/{}'",
297
    //    element_summary(child), self.mrow.children().len(), show_invisible_op_char(child_op.ch), child_op.op.priority);
298
57.6k
    self.mrow.append_child(child);
299
57.6k
    if ptr_eq(child_op.op, &ILLEGAL_OPERATOR_INFO) {
300
36.8k
      assert!(!self.is_operand);  // should not have two operands in a row (ok to add whitespace)
301
36.8k
      self.is_operand = true;
302
20.7k
    } else {
303
20.7k
      self.op_pair = child_op;
304
20.7k
      self.is_operand = false;
305
20.7k
    }
306
57.6k
  }
307
308
18.4k
  fn remove_last_operand_from_mrow(&mut self) -> Element<'a> {
309
18.4k
    let children = self.mrow.children();
310
18.4k
    assert!( !children.is_empty() );
311
18.4k
    assert!( self.is_operand || 
children.len()==163
); // could be operator that is forced to be interpreted as operand -- eg, bad input like "x+("
312
18.4k
    self.is_operand = false;
313
18.4k
    let last_operand = as_element(children[children.len()-1]);
314
    // debug!("  Removing last element '{}' from mrow[{}]",element_summary(last_operand), children.len());
315
18.4k
    last_operand.remove_from_parent();
316
18.4k
    return last_operand;
317
18.4k
  }
318
319
}
320
321
322
117k
pub fn create_mathml_element<'a>(doc: &Document<'a>, name: &str) -> Element<'a> {
323
117k
  return doc.create_element(sxd_document::QName::with_namespace_uri(
324
117k
    Some("http://www.w3.org/1998/Math/MathML"),
325
117k
    name));
326
117k
}
327
328
4.84k
pub fn is_fence(mo: Element) -> bool {
329
4.84k
  return CanonicalizeContext::find_operator(None, mo, None, None, None).is_fence();
330
4.84k
}
331
332
664
pub fn is_relational_op(mo: Element) -> bool {
333
664
  return CanonicalizeContext::find_operator(None, mo, None, None, None).priority == *EQUAL_PRIORITY;
334
664
}
335
336
113k
pub fn set_mathml_name(element: Element, new_name: &str) {
337
113k
  element.set_name(QName::with_namespace_uri(Some("http://www.w3.org/1998/Math/MathML"), new_name));
338
113k
}
339
340
/// Replace 'mathml' in the parent (must exist since this only happens for leaves) with the 'replacements' (new children).
341
/// This handles adding mrows if needed.
342
/// 
343
/// Returns first replacement
344
2.47k
pub fn replace_children<'a>(mathml: Element<'a>, replacements: Vec<Element<'a>>) -> Element<'a> {
345
2.47k
  let parent = get_parent(mathml);
346
2.47k
  let parent_name = name(parent);
347
  // debug!("\nreplace_children: mathml\n{}", mml_to_string(mathml));
348
  // debug!("replace_children: parent before replace\n{}", mml_to_string(parent));
349
  // debug!("{} replacements:\n{}", replacements.len(), replacements.iter().map(|e| mml_to_string(e)).collect::<Vec<String>>().join("\n"));
350
2.47k
  if ELEMENTS_WITH_FIXED_NUMBER_OF_CHILDREN.contains(parent_name) ||
351
2.44k
     parent_name == "mmultiscripts" {     // each child acts like the parent has a fixed number of children
352
    // gather up the preceding/following siblings before mucking with the tree structure (mrow.append_children below)
353
32
    let mut new_children = mathml.preceding_siblings();
354
32
    let mut following_siblings = mathml.following_siblings();
355
356
    // debug!("\nreplace_children: mathml\n{}", mml_to_string(mathml));
357
    // debug!("replace_children: parent before replace\n{}", mml_to_string(parent));
358
    // wrap an mrow around the replacements and then replace 'mathml' with that
359
32
    let mrow = create_mathml_element(&mathml.document(), "mrow");
360
32
    add_attrs(mrow, &replacements[0].attributes());
361
32
    mrow.append_children(replacements);
362
32
    new_children.push(ChildOfElement::Element(mrow));
363
32
    new_children.append(&mut following_siblings);
364
32
    parent.replace_children(new_children);
365
    // debug!("replace_children parent after: parent\n{}", mml_to_string(parent));
366
    // debug!("replace_children: returned mrow\n{}", mml_to_string(mrow));
367
32
    return mrow;
368
  } else {
369
    // replace the children of the parent with 'replacements' inserted in place of 'mathml'
370
2.44k
    let mut new_children = mathml.preceding_siblings();
371
2.44k
    let i_first_new_child = new_children.len();
372
6.54k
    let 
mut replacements2.44k
=
replacements.iter()2.44k
.
map2.44k
(|&el| ChildOfElement::Element(el)).
collect2.44k
::<Vec<ChildOfElement>>();
373
2.44k
    new_children.append(&mut replacements);
374
2.44k
    new_children.append(&mut mathml.following_siblings());
375
2.44k
    parent.replace_children(new_children);
376
    // debug!("replace_children: (will return child[{}]) parent after replace\n{}", i_first_new_child, mml_to_string(parent));
377
2.44k
    return as_element(parent.children()[i_first_new_child]);
378
  }
379
2.47k
}
380
381
// returns the presentation element of a "semantics" element
382
22
pub fn get_presentation_element(element: Element) -> (usize, Element) {
383
22
  assert_eq!(name(element), "semantics");
384
22
  let children = element.children();
385
22
  if let Some( (
i20
,
child20
) ) = children.iter().enumerate().find(|&(_, &child)|
386
48
      if let Some(
encoding46
) = as_element(child).attribute_value("encoding") {
387
46
        encoding == "MathML-Presentation"
388
      } else {
389
2
        false
390
48
      })
391
  {
392
20
    let presentation_annotation = as_element(*child);
393
    // debug!("get_presentation_element:\n{}", mml_to_string(presentation_annotation));
394
20
    assert_eq!(presentation_annotation.children().len(), 1);
395
20
    return (i, as_element(presentation_annotation.children()[0]));
396
  } else {
397
2
    return (0, as_element(children[0]));
398
  }
399
22
}
400
401
/// Canonicalize does several things:
402
/// 1. cleans up the tree so all extra white space is removed (should only have element and text nodes)
403
/// 2. normalize the characters
404
/// 3. clean up "bad" MathML based on known output from some converters (TODO: still a work in progress)
405
/// 4. the tree is "parsed" based on the mo (priority)/mi/mn's in an mrow
406
///    *  this adds mrows and some invisible operators (implied times, function app, ...)
407
///    * extra mrows are removed
408
///    * implicit mrows are turned into explicit mrows (e.g, there will be a single child of 'math')
409
///
410
/// Canonicalize is pretty conservative in adding new mrows and won't do it if:
411
/// * there is an intent attr
412
/// * if the mrow starts and ends with a fence (e.g, French open interval "]0,1[")
413
///
414
/// An mrow is never deleted unless it is redundant.
415
/// 
416
/// Whitespace handling:
417
/// Whitespace complicates parsing and also pattern matching (e.g., is it a mixed number which tests for a number preceding a fraction)
418
/// The first attempt which mostly worked was to shove whitespace into adjacent mi/mn/mtext. That has a problem with distinguish different uses for whitespace
419
/// The second attempt was to leave it in the parse and make it an mo when appropriate, but there were some cases where it should be prefix and wasn't caught
420
/// The third attempt (and the current one) is to make it an attribute on adjacent elements.
421
///   This preserves the data-width attr (with new name) added in the second attempt that helps resolve whether something is tweaking, a real space, or an omission.
422
///   It adds data-previous-space-width/data-following-space-width with values to indicate with the space was on the left or right (typically it placed on the previous token because that's easier)
423
5.06k
pub fn canonicalize(mathml: Element) -> Result<Element> {
424
5.06k
  let context = CanonicalizeContext::new();
425
5.06k
  return context.canonicalize(mathml);
426
5.06k
}
427
428
#[derive(Debug, PartialEq)]
429
enum FunctionNameCertainty {
430
  True,
431
  Maybe,
432
  False
433
}
434
435
436
static ELEMENTS_WITH_ONE_CHILD: phf::Set<&str> = phf_set! {
437
  "math", "msqrt", "merror", "mpadded", "mphantom", "menclose", "mtd", "mscarry"
438
};
439
440
static ELEMENTS_WITH_FIXED_NUMBER_OF_CHILDREN: phf::Set<&str> = phf_set! {
441
  "mfrac", "mroot", "msub", "msup", "msubsup","munder", "mover", "munderover"
442
};
443
444
static EMPTY_ELEMENTS: phf::Set<&str> = phf_set! {
445
  "mspace", "none", "mprescripts", "mglyph", "malignmark", "maligngroup", "msline",
446
};
447
448
// turns out Roman Numerals tests aren't needed, but we do want to block VII from being a chemical match
449
// two cases because we don't want to have a match for 'Cl', etc.
450
3
static UPPER_ROMAN_NUMERAL: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\s*^M{0,3}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})\s*$").unwrap());
451
3
static LOWER_ROMAN_NUMERAL: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\s*^m{0,3}(cm|cd|d?c{0,3})(xc|xl|l?x{0,3})(ix|iv|v?i{0,3})\s*$").unwrap());
452
453
454
struct CanonicalizeContextPatterns {
455
  decimal_separator: Regex,
456
  block_separator: Regex,
457
  digit_only_decimal_number: Regex,
458
  block_3digit_pattern: Regex,
459
  block_3_5digit_pattern: Regex,
460
  block_4digit_hex_pattern: Regex,
461
  block_1digit_pattern: Regex,    // used when generator puts each digit into a single mn
462
}
463
464
impl CanonicalizeContextPatterns {
465
4.10k
  fn new(block_separator_pref: &str, decimal_separator_pref: &str) -> CanonicalizeContextPatterns {
466
4.10k
    let block_separator = Regex::new(&format!("[{}]", regex::escape(block_separator_pref))).unwrap();
467
4.10k
    let decimal_separator = Regex::new(&format!("[{}]", regex::escape(decimal_separator_pref))).unwrap();
468
    // allows just "." and also matches an empty string, but those are ruled out elsewhere
469
4.10k
    let digit_only_decimal_number = Regex::new(&format!(r"^\d*{}?\d*$", regex::escape(decimal_separator_pref))).unwrap();
470
4.10k
    let block_3digit_pattern = get_number_pattern_regex(block_separator_pref, decimal_separator_pref, 3, 3);
471
4.10k
    let block_3_5digit_pattern = get_number_pattern_regex(block_separator_pref, decimal_separator_pref, 3, 5);
472
    // Note: on en.wikipedia.org/wiki/Decimal_separator, show '3.14159 26535 89793 23846'
473
4.10k
    let block_4digit_hex_pattern =  Regex::new(r"^[0-9a-fA-F]{4}([ \u00A0\u202F][0-9a-fA-F]{4})*$").unwrap();
474
4.10k
    let block_1digit_pattern =  Regex::new(r"^((\d(\uFFFF\d)?)(\d([, \u00A0\u202F]\d){2})*)?([\.](\d(\uFFFF\d)*)?)?$").unwrap();
475
476
4.10k
    return CanonicalizeContextPatterns {
477
4.10k
      block_separator,
478
4.10k
      decimal_separator,
479
4.10k
      digit_only_decimal_number,
480
4.10k
      block_3digit_pattern,
481
4.10k
      block_3_5digit_pattern,
482
4.10k
      block_4digit_hex_pattern,
483
4.10k
      block_1digit_pattern
484
4.10k
    };
485
486
    
487
8.21k
    fn get_number_pattern_regex(block_separator: &str, decimal_separator: &str, n_sep_before: usize, n_sep_after: usize) -> Regex {
488
      // the following is a generalization of a regex like ^(\d*|\d{1,3}([, ]?\d{3})*)(\.(\d*|(\d{3}[, ])*\d{1,3}))?$
489
      // that matches something like '1 234.567 8' and '1,234.', but not '1,234.12,34
490
8.21k
      return Regex::new(&format!(r"^(\d*|\d{{1,{}}}([{}]?\d{{{}}})*)([{}](\d*|(\d{{{}}}[{}])*\d{{1,{}}}))?$",
491
8.21k
              n_sep_before, regex::escape(block_separator), n_sep_before, regex::escape(decimal_separator),
492
8.21k
              n_sep_after, regex::escape(block_separator), n_sep_after) ).unwrap();
493
8.21k
    }
494
4.10k
  }
495
}
496
497
/// Profiling showed that creating new contexts was very time consuming because creating the RegExs is very expensive
498
/// Profiling set_mathml (which does the canonicalization) spends 65% of the time in Regex::new, of which half of it is spent in this initialization.
499
struct CanonicalizeContextPatternsCache {
500
  block_separator_pref: String,
501
  decimal_separator_pref: String,
502
  patterns: Rc<CanonicalizeContextPatterns>,
503
}
504
505
thread_local!{
506
    static PATTERN_CACHE: RefCell<CanonicalizeContextPatternsCache> = RefCell::new(CanonicalizeContextPatternsCache::new());
507
}
508
509
impl CanonicalizeContextPatternsCache {
510
4.10k
  fn new() -> CanonicalizeContextPatternsCache {
511
4.10k
    let pref_manager = crate::prefs::PreferenceManager::get();
512
4.10k
    let pref_manager = pref_manager.borrow();
513
4.10k
    let block_separator_pref = pref_manager.pref_to_string("BlockSeparators");
514
4.10k
    let decimal_separator_pref = pref_manager.pref_to_string("DecimalSeparators");
515
4.10k
    return CanonicalizeContextPatternsCache {
516
4.10k
      patterns: Rc::new( CanonicalizeContextPatterns::new(&block_separator_pref, &decimal_separator_pref) ),
517
4.10k
      block_separator_pref,
518
4.10k
      decimal_separator_pref
519
4.10k
    }
520
4.10k
  }
521
522
5.06k
  fn get() -> Rc<CanonicalizeContextPatterns> {
523
5.06k
    return PATTERN_CACHE.with( |cache| {
524
5.06k
      let pref_manager_rc = crate::prefs::PreferenceManager::get();
525
5.06k
      let pref_manager = pref_manager_rc.borrow();
526
5.06k
      let block_separator_pref = pref_manager.pref_to_string("BlockSeparators");
527
5.06k
      let decimal_separator_pref = pref_manager.pref_to_string("DecimalSeparators");
528
529
5.06k
      let mut cache = cache.borrow_mut();
530
5.06k
      if block_separator_pref != cache.block_separator_pref || decimal_separator_pref != cache.decimal_separator_pref {
531
0
        // update the cache
532
0
        cache.patterns = Rc::new( CanonicalizeContextPatterns::new(&block_separator_pref, &decimal_separator_pref) );
533
0
        cache.block_separator_pref = block_separator_pref;
534
0
        cache.decimal_separator_pref = decimal_separator_pref;
535
5.06k
      }
536
5.06k
      return cache.patterns.clone();
537
5.06k
    })
538
5.06k
  }
539
}
540
541
struct CanonicalizeContext {
542
  patterns: Rc<CanonicalizeContextPatterns>,
543
}
544
545
546
impl CanonicalizeContext {
547
5.06k
  fn new() -> CanonicalizeContext {
548
5.06k
    return CanonicalizeContext {
549
5.06k
      patterns: CanonicalizeContextPatternsCache::get(),
550
5.06k
    };
551
5.06k
  }
552
553
5.06k
  fn canonicalize<'a>(&self, mut mathml: Element<'a>) -> Result<Element<'a>> {
554
    // debug!("MathML before canonicalize:\n{}", mml_to_string(mathml));
555
  
556
5.06k
    if name(mathml) != "math" {
557
0
      // debug!("Didn't start with <math> element -- attempting repair");
558
0
      let math_element = create_mathml_element(&mathml.document(), "math");
559
0
      math_element.set_attribute_value(CHANGED_ATTR, ADDED_ATTR_VALUE);
560
0
      math_element.append_child(mathml);
561
0
      let root = math_element.document().root();
562
0
      root.clear_children();
563
0
      root.append_child(math_element);
564
0
      mathml = root.children()[0].element().unwrap();
565
5.06k
    }
566
5.06k
    CanonicalizeContext::assure_mathml(mathml, 0)
?4
;
567
5.05k
    let mathml = self.clean_mathml(mathml).unwrap(); // 'math' is never removed
568
5.05k
    self.assure_nary_tag_has_one_child(mathml);
569
    // debug!("Not chemistry -- retry:\n{}", mml_to_string(mathml));
570
5.05k
    let mut converted_mathml = self.canonicalize_mrows(mathml)
571
5.05k
        .with_context(|| 
format!0
("while processing\n{}",
mml_to_string0
(
mathml0
)))
?0
;
572
    // debug!("canonicalize before canonicalize_mrows:\n{}", mml_to_string(converted_mathml));
573
5.05k
    if !crate::chemistry::scan_and_mark_chemistry(converted_mathml) {
574
869
      self.assure_nary_tag_has_one_child(converted_mathml);
575
869
      converted_mathml = self.canonicalize_mrows(mathml)
576
869
        .with_context(|| 
format!0
("while processing\n{}",
mml_to_string0
(
mathml0
)))
?0
;
577
4.18k
    }
578
5.05k
    debug!("\nMathML after canonicalize:\n{}", 
mml_to_string0
(
converted_mathml0
));
579
5.05k
    return Ok(converted_mathml);
580
5.06k
  }
581
    
582
  /// Make sure there is exactly one child
583
19.1k
  fn assure_nary_tag_has_one_child(&self, mathml: Element) {
584
19.1k
    let children = mathml.children();
585
19.1k
    if !ELEMENTS_WITH_ONE_CHILD.contains(name(mathml)) {
586
6.43k
      return;
587
12.7k
    }
588
589
12.7k
    if children.is_empty() {
590
3
      // make sure there is content
591
3
      let child = CanonicalizeContext::create_empty_element(&mathml.document());
592
3
      mathml.append_child(child);
593
12.7k
    } else if children.len() > 1 {
594
2.34k
      // wrap the children in an mrow
595
2.34k
      let mrow = create_mathml_element(&mathml.document(), "mrow");
596
2.34k
      mrow.set_attribute_value(CHANGED_ATTR, ADDED_ATTR_VALUE);
597
2.34k
      mrow.append_children(children);
598
2.34k
      mathml.replace_children(vec![ChildOfElement::Element(mrow)]);
599
10.3k
    }
600
19.1k
  }
601
602
  /// Return an error if some element is not MathML (only look at first child of <semantics>) or if it has the wrong number of children
603
52.9k
  fn assure_mathml(mathml: Element, depth: usize) -> Result<()> {
604
52.9k
    if depth > crate::interface::MAX_DEPTH {
605
1
      bail!("MathML is too deeply nested to process");
606
52.9k
    }
607
52.9k
    let n_children = mathml.children().len();
608
52.9k
    let element_name = name(mathml);
609
52.9k
    if is_leaf(mathml) {
610
33.1k
      if EMPTY_ELEMENTS.contains(element_name) {
611
464
        if n_children != 0 {
612
0
          bail!("{} should only have one child:\n{}", element_name, mml_to_string(mathml));
613
464
        }
614
32.7k
      } else if element_name == "annotation" {
615
0
        bail!("'annotation' element is not child of 'semantics' element");
616
32.7k
      } else if (n_children == 1 && 
mathml.children()[0].text()32.6k
.
is_some32.6k
()) ||
n_children == 018
{ // allow empty children such as mtext
617
32.7k
        return Ok( () );
618
      } else {
619
0
        bail!("Not a valid MathML leaf element:\n{}", mml_to_string(mathml));
620
      };
621
19.7k
    }
622
623
20.2k
    if ELEMENTS_WITH_FIXED_NUMBER_OF_CHILDREN.contains(element_name) {
624
3.90k
      match element_name {
625
3.90k
        "munderover" | 
"msubsup"3.84k
=> if
n_children != 3277
{
626
0
          bail!("{} should have 3 children:\n{}", element_name, mml_to_string(mathml));
627
277
        },
628
3.62k
        _ => if n_children != 2 {
629
0
          bail!("{} should have 2 children:\n{}", element_name, mml_to_string(mathml));
630
3.62k
        },
631
      }
632
16.3k
    } else if 
matches!2.18k
(element_name, "mtd" |
"mtr"14.8k
|
"mlabeledtr"14.1k
) {
633
2.18k
      let parent_name = name(get_parent(mathml));
634
2.18k
      if (element_name == "mtr" || 
element_name == "mlabeledtr"1.47k
) &&
parent_name != "mtable"722
{
635
0
        bail!("Illegal MathML: {} is not a child of mtable. Parent is {}", element_name, mml_to_string(get_parent(mathml)));
636
2.18k
      } else if element_name == "mtd" && !(
parent_name == "mtr"1.45k
||
parent_name == "mlabeledtr"57
) {
637
1
        bail!("Illegal MathML: mtd is not a child of {}. Parent is {}", parent_name, mml_to_string(get_parent(mathml)));
638
2.17k
      }
639
    }
640
14.1k
    else if element_name == "mmultiscripts" {
641
182
      let has_prescripts = mathml.children().iter()
642
649
          .
any182
(|&child| name(as_element(child)) == "mprescripts");
643
182
      if has_prescripts ^ (n_children.is_multiple_of(2)) {
644
1
        bail!("{} has the wrong number of children:\n{}", element_name, mml_to_string(mathml));
645
181
      }
646
13.9k
    } else if element_name == "mlongdiv" {
647
0
      if n_children < 3 {
648
0
        bail!("{} should have at least 3 children:\n{}", element_name, mml_to_string(mathml));
649
0
      }
650
13.9k
    } else if element_name == "semantics" {
651
11
      let children = mathml.children();
652
11
      if children.is_empty() {
653
0
        return Ok( () );
654
      } else {
655
11
        let (i_presentation, presentation_element) = get_presentation_element(mathml);
656
        // make sure only 'annotation' and 'annotation-xml' elements are children of the non-presentation element
657
24
        for (i, child) in 
children.iter()11
.
enumerate11
() {
658
24
          if i != i_presentation {
659
13
            let child = as_element(*child);
660
13
            if name(child)!="annotation" && 
name(child)!="annotation-xml"1
{
661
0
              bail!("Illegal MathML: {} is child of 'semantic'", name(child));
662
13
            }
663
11
          }
664
        }
665
11
        return CanonicalizeContext::assure_mathml(presentation_element, depth + 1);
666
      }
667
13.9k
    } else if !IsNode::is_mathml(mathml) {
668
1
      if element_name == "annotation-xml" {
669
0
        bail!("'annotation-xml' element is not child of 'semantics' element");
670
      } else {
671
1
        bail!("'{}' is not a valid MathML element", element_name);
672
      }
673
13.9k
    }
674
675
    // valid MathML element and not a leaf -- check the children
676
47.8k
    for child in 
mathml20.2k
.
children20.2k
() {
677
47.8k
      CanonicalizeContext::assure_mathml( as_element(child), depth + 1)
?520
;
678
    }
679
19.6k
    return Ok( () );
680
52.9k
  }
681
682
283
  fn make_empty_element(mathml: Element) -> Element {
683
283
    set_mathml_name(mathml, "mtext");
684
283
    mathml.clear_children();
685
283
    mathml.set_text("\u{00A0}");
686
283
    mathml.set_attribute_value("data-changed", "empty_content");
687
283
    mathml.set_attribute_value("data-width", "0");
688
283
    return mathml;
689
283
  }
690
  
691
24
  fn create_empty_element<'a>(doc: &Document<'a>) -> Element<'a> {
692
24
    let mtext = create_mathml_element(doc, "mtext");
693
24
    mtext.set_text("\u{00A0}");
694
24
    mtext.set_attribute_value("data-added", "missing-content");
695
24
    mtext.set_attribute_value("data-width", "0");
696
24
    return mtext;
697
24
  }
698
  
699
11.5k
  fn is_empty_element(el: Element) -> bool {
700
11.5k
    return (is_leaf(el) && 
as_text(el).trim()7.55k
.
is_empty7.55k
()) ||
701
11.0k
         (name(el) == "mrow" && 
el.children()1.33k
.
is_empty1.33k
() &&
el.attribute(INTENT_ATTR)0
.
is_none0
());
702
11.5k
  }
703
704
705
  // this should only be called for 2D elements
706
4.48k
  fn mark_empty_content(two_d_element: Element) {
707
7.32k
    for child in 
two_d_element4.48k
.
children4.48k
() {
708
7.32k
      let child = as_element(child);
709
7.32k
      if CanonicalizeContext::is_empty_element(child) {
710
20
        child.set_attribute_value(EMPTY_IN_2D, "true");
711
7.30k
      }
712
    }
713
4.48k
  }
714
715
  /// Turn leaf into an 'mn' and set attributes appropriately
716
34
  fn make_roman_numeral(leaf: Element) {
717
34
    assert!(is_leaf(leaf));
718
34
    set_mathml_name(leaf, "mn");
719
34
    leaf.set_attribute_value("data-roman-numeral", "true");  // mark for easy detection
720
34
    let as_number = match as_text(leaf).parse::<RomanNumeral>() {
721
34
      Ok(roman) => roman.as_u16().to_string(),
722
0
      Err(_) => as_text(leaf).to_string(),
723
    };
724
34
    leaf.set_attribute_value("data-number", &as_number);
725
34
  }
726
727
  /// most of the time it is ok to merge the mrow with its singleton child, but there are some exceptions:
728
  ///   mrow has 'intent' -- this might reference the child and you aren't allowed to self reference
729
2.82k
  fn is_ok_to_merge_mrow_child(mrow: Element) -> bool {
730
2.82k
    assert_eq!(name(mrow), "mrow");
731
2.82k
    assert!(mrow.children().len() == 1);
732
2.82k
    return mrow.attribute(INTENT_ATTR).is_none();   // could check if child is referenced, but that's a chunk of code
733
2.82k
  }
734
735
  /// This function does some cleanup of MathML (mostly fixing bad MathML)
736
  /// Unlike the main canonicalization routine, significant tree changes happen here
737
  /// Changes to "good" MathML:
738
  /// 1. mfenced -> mrow, a => mrow
739
  /// 2. mspace and mtext with only whitespace are canonicalized to a non-breaking space and merged in with 
740
  ///    an adjacent non-mo element unless in a required element position (need to keep for braille)
741
  /// 
742
  /// Note: mspace that is potentially part of a number that was split apart is merged into a number as a single space char
743
  /// 
744
  /// mstyle, mpadded, and mphantom, malignmark, maligngroup are removed (but children might be kept)
745
  /// 
746
  /// Significant changes are made cleaning up empty bases of scripts, looking for chemistry, merging numbers with commas,
747
  ///   "arg trig" functions, pseudo scripts, and others
748
  /// 
749
  /// Returns 'None' if the element should not be in the tree.
750
52.3k
  fn clean_mathml<'a>(&self, mathml: Element<'a>) -> Option<Element<'a>> {
751
    // Note: this works bottom-up (clean the children first, then this element)
752
3
    static IS_PRIME: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"['′″‴⁗]").unwrap());
753
754
    // Note: including intervening spaces in what is likely a symbol of omission preserves any notion of separate digits (e.g., "_ _ _")
755
3
    static IS_UNDERSCORE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^[_\u{00A0}]+$").unwrap());
756
757
      
758
23.5k
    fn is_currency_symbol(ch: char) -> bool {
759
23.5k
      
matches!23.5k
(ch, '$' | '¢' | '€' | '£' | '₡' | '₤' | '₨' | '₩' | '₪' | '₱' | '₹' | '₺' | '₿')
760
23.5k
    }
761
762
20.0k
    fn contains_currency(s: &str) -> bool {
763
20.0k
      s.chars().any(is_currency_symbol)
764
20.0k
    }    
765
    
766
    // begin by cleaning up empty elements
767
    // debug!("clean_mathml\n{}", mml_to_string(mathml));
768
52.3k
    let element_name = name(mathml);
769
52.3k
    let parent_name = if element_name == "math" {
770
5.09k
      "math".to_string()
771
    } else {
772
47.2k
      let parent = get_parent(mathml);
773
47.2k
      name(parent).to_string()
774
    };
775
52.3k
    let parent_requires_child = ELEMENTS_WITH_FIXED_NUMBER_OF_CHILDREN.contains(&parent_name) ||
776
44.0k
                      
matches!2.18k
(parent_name.as_ref(), "mtr" |
"mlabeledtr"42.6k
|
"mtable"42.5k
) ||
777
41.8k
                      parent_name == "mmultiscripts";
778
779
    // handle empty leaves -- leaving it empty causes problems with the speech rules
780
52.3k
    if is_leaf(mathml) && 
!32.8k
EMPTY_ELEMENTS32.8k
.contains(element_name) &&
as_text(mathml)32.3k
.
is_empty32.3k
() {
781
32
      return if parent_requires_child {
Some( CanonicalizeContext::make_empty_element(mathml) )4
} else {
None28
};
782
52.3k
    };
783
    
784
52.3k
    if mathml.children().is_empty() && 
!734
EMPTY_ELEMENTS734
.contains(element_name) {
785
158
      if element_name == "mrow" && 
mathml.attribute(INTENT_ATTR)143
.
is_none143
() {
786
        // if it is an empty mrow that doesn't need to be there, get rid of it. Otherwise, replace it with an mtext
787
142
        if parent_name == "mmultiscripts" && 
!mathml.preceding_siblings().is_empty()5
{
788
          // MathML Core dropped "none" in favor of <mrow/>, but MathCAT is written with <none/>
789
          // Do substitutions for the scripts, not the base
790
4
          set_mathml_name(mathml, "none");
791
4
          return Some(mathml);
792
138
        }
793
138
        if parent_requires_child {
794
14
          return Some( CanonicalizeContext::make_empty_element(mathml) );
795
        } else {
796
124
          return None;
797
        }
798
16
      } else {
799
16
        // create some content so that speech rules don't require special cases
800
16
        let mtext = CanonicalizeContext::create_empty_element(&mathml.document());
801
16
        mathml.append_child(mtext);
802
16
        // return Some(mathml);
803
16
      }
804
52.1k
    };
805
806
52.1k
    match element_name {
807
52.1k
      "mn" => {
808
9.08k
        let text = as_text(mathml);
809
9.08k
        let mut chars = text.chars();
810
9.08k
        let first_char = chars.next().unwrap();   // we have already made sure it is non-empty
811
9.08k
        if !text.trim().is_empty() && is_roman_number_match(text) {
812
2
          // people tend to set them in a non-italic font and software makes that 'mtext'
813
2
          CanonicalizeContext::make_roman_numeral(mathml);
814
9.08k
        } else if 
matches!9.08k
(first_char, '-' | '\u{2212}') {
815
5
          let doc = mathml.document();
816
5
          let mo = create_mathml_element(&doc, "mo");
817
5
          let mn = create_mathml_element(&doc, "mn");
818
5
          mo.set_text("-");
819
5
          mn.set_text(&text[first_char.len_utf8()..]);
820
5
          set_mathml_name(mathml, "mrow");
821
5
          mathml.set_attribute_value(CHANGED_ATTR, ADDED_ATTR_VALUE);
822
5
          mathml.replace_children([mo,mn]);
823
9.08k
        } else if contains_currency(text) && let Some(
result3
) =
split_currency_symbol(mathml)3
{
824
3
          return Some(result);
825
9.07k
        }
826
9.08k
        if let Some((idx, last_char)) = text.char_indices().next_back() {
827
          // look for something like 12°
828
9.08k
          if is_pseudo_script_char(last_char) {
829
1
            let doc = mathml.document();
830
1
            let mn = create_mathml_element(&doc, "mn");
831
1
            let mo = create_mathml_element(&doc, "mo");
832
1
            mn.set_text(&text[..idx]);
833
1
            mo.set_text(last_char.to_string().as_str());
834
1
            set_mathml_name(mathml, "msup");
835
1
            mathml.set_attribute_value(CHANGED_ATTR, ADDED_ATTR_VALUE);
836
1
            mathml.replace_children([mn, mo]);
837
9.08k
          }
838
0
        }
839
9.08k
        return Some(mathml);
840
      },
841
43.0k
      "ms" | 
"mglyph"43.0k
=> {
842
3
        return Some(mathml);
843
      },
844
43.0k
      "mi" => {
845
11.6k
        let text = as_text(mathml);
846
11.6k
        if !text.trim().is_empty() && is_roman_number_match(text) && 
is_roman_numeral_number_context3.32k
(
mathml3.32k
) {
847
          // people tend to set them in a non-italic font and software makes that 'mtext'
848
28
          CanonicalizeContext::make_roman_numeral(mathml);
849
28
          return Some(mathml);
850
11.5k
        }
851
11.5k
        if let Some(
dash1
) = canonicalize_dash(text) { // needs to be before OPERATORS.get due to "--"
852
1
          mathml.set_text(dash);
853
1
          return Some(mathml);
854
11.5k
        } else if text.contains('_') {
855
          // if left or right are an mo, leave as is. Otherwise convert to an mo.
856
6
          let preceding_siblings = mathml.preceding_siblings();
857
6
          let following_siblings = mathml.following_siblings();
858
6
          if preceding_siblings.is_empty() || following_siblings.is_empty() {
859
4
            return Some(mathml);
860
2
          }
861
2
          if name(as_element(preceding_siblings[preceding_siblings.len()-1])) != "mo" &&
862
2
             name(as_element(following_siblings[0])) != "mo" {
863
2
            set_mathml_name(mathml, "mo");
864
2
          
}0
865
2
          return Some(mathml);
866
11.5k
        } else if OPERATORS.get(text).is_some() {
867
118
          if  let Some(
intent_value88
) = mathml.attribute_value(INTENT_ATTR) {
868
            // if it is a unit, it might be seconds, minutes, feet, ... not an operator
869
88
            if intent_value.contains(":unit") {
870
88
              return Some(mathml);
871
0
            }
872
30
          }
873
30
          set_mathml_name(mathml, "mo");
874
875
          // For at least pandoc, ∇ is an 'mi' and it sometimes adds an invisible times -- remove them
876
30
          let op = OPERATORS.get(text).unwrap();
877
30
          let preceding_siblings = mathml.preceding_siblings();
878
30
          if (op.is_infix() || 
op17
.
is_postfix17
()) &&
879
16
             !preceding_siblings.is_empty() && 
CanonicalizeContext::is_invisible_char_element15
(
as_element15
(
preceding_siblings[0]15
)) {
880
0
            as_element(preceding_siblings[0]).remove_from_parent();
881
30
          }
882
30
          let following_siblings = mathml.following_siblings();
883
30
          if (op.is_infix() || 
op17
.
is_prefix17
()) &&
884
27
             !following_siblings.is_empty() && CanonicalizeContext::is_invisible_char_element(as_element(following_siblings[0])) {
885
0
            as_element(following_siblings[0]).remove_from_parent();
886
30
          }
887
30
          return Some(mathml);
888
11.4k
        } else if let Some(
result1
) = split_apart_pseudo_scripts(mathml) {
889
1
            return Some(result);
890
11.4k
        } else if let Some(
result0
) = merge_arc_trig(mathml) {
891
0
            return Some(result);
892
11.4k
        } else if IS_PRIME.is_match(text) {
893
0
          let new_text = merge_prime_text(text);
894
0
          mathml.set_text(&new_text);
895
0
          return Some(mathml);
896
11.4k
        } else if text == "..." {
897
1
          mathml.set_text("…");
898
1
          return Some(mathml);
899
11.4k
        } else if let Some(
result27
) = split_points(mathml) {
900
27
          return Some(result);
901
11.4k
        } else if let Some(
result11
) = merge_mi_sequence(mathml) {
902
11
          return Some(result);
903
        } else {
904
11.4k
          return Some(mathml);
905
        };
906
      },
907
31.4k
      "mtext" => {
908
        // debug!("before merge_arc_trig: {}", mml_to_string(mathml));
909
910
401
        if let Some(
result2
) = merge_arc_trig(mathml) {
911
2
          return Some(result);
912
399
        } else if let Some(
result11
) = split_points(mathml) {
913
11
          return Some(result);
914
388
        }
915
916
388
        let text = as_text(mathml);
917
388
        if !text.trim().is_empty() && 
is_roman_number_match317
(
text317
) &&
is_roman_numeral_number_context33
(
mathml33
) {
918
          // people tend to set them in a non-italic font and software makes that 'mtext'
919
4
          CanonicalizeContext::make_roman_numeral(mathml);
920
4
          return Some(mathml);
921
449
        } else if 
text.chars()384
.
all384
(|c| c.is_ascii_digit() ||
matches!332
(
c445
, '.' | ',' | ' ' | '\u{00A0}')) &&
922
58
                  
text.chars()52
.
any52
(|c| c.is_ascii_digit()){ // does it look like a number?
923
1
          mathml.set_name("mn");
924
1
          return Some(mathml);
925
383
        } else if contains_currency(text) && let Some(
result0
) =
split_currency_symbol(mathml)0
{
926
0
          return Some(result);
927
383
        }
928
        // common bug: trig functions, lim, etc., should be mi
929
383
        if ["…", "⋯", "∞"].contains(&text) ||
930
383
           crate::definitions::SPEECH_DEFINITIONS.with(|definitions| 
931
383
          if let Some(
hashset382
) = definitions.borrow().get_hashset("FunctionNames") {
932
382
            hashset.contains(text)
933
          } else {
934
1
            false
935
383
          }
936
        ) {
937
6
          set_mathml_name(mathml, "mi");
938
6
          return Some(mathml);
939
377
        }
940
941
        // allow non-breaking whitespace to stay -- needed by braille
942
377
        if IS_WHITESPACE.is_match(text) {
943
          // normalize to just a single non-breaking space
944
71
          mathml.set_attribute_value("data-width", &format!("{:.3}", white_space_em_width(text)));
945
71
          mathml.set_text("\u{00A0}");
946
71
          return Some(mathml);
947
306
        } else if let Some(
dash2
) = canonicalize_dash(text) {
948
2
          mathml.set_text(dash);
949
304
        } else if OPERATORS.get(text).is_some() {
950
11
          set_mathml_name(mathml, "mo");
951
11
          return Some(mathml);
952
293
        }
953
295
        return if parent_requires_child || 
!text.is_empty()220
{Some(mathml)} else {
None0
};
954
      },
955
31.0k
      "mo" => {
956
        // WIRIS editor puts non-breaking whitespace as standalone in 'mo'
957
11.2k
        let text = as_text(mathml);
958
11.2k
        if !text.is_empty() && IS_WHITESPACE.is_match(text) {
959
          // can't throw it out because it is needed by braille -- change to what it really is
960
78
          set_mathml_name(mathml, "mtext");
961
78
          mathml.set_attribute_value("data-width", &format!("{:.3}", white_space_em_width(text)));
962
78
          mathml.set_text("\u{00A0}");
963
78
          mathml.set_attribute_value(CHANGED_ATTR, "data-was-mo");
964
78
          return Some(mathml);
965
        } else {
966
11.1k
          match text {
967
11.1k
            "arc" | "arc " | "arc " /* non-breaking space */ => {
968
0
              if let Some(result) = merge_arc_trig(mathml) {
969
0
                return Some(result);
970
0
              }
971
            },
972
11.1k
            "..." => 
{0
mathml0
.set_text("…");}, // name might need to change -- checked below
973
11.1k
            ":" => {
974
94
              if is_ratio(mathml) {
975
8
                mathml.set_text("∶"); // ratio U+2236
976
86
              }
977
94
              return Some(mathml);
978
            },
979
11.0k
            "::" =>
{9
mathml9
.set_text("∷");},
980
11.0k
            "│" => 
{0
mathml0
.set_text("|");}, // ASCII vertical bar
981
11.0k
            "|" | 
"||"10.7k
=> if let Some(
result6
) =
merge_vertical_bars(mathml)305
{
982
6
              return Some(result);
983
            } else {
984
299
              return Some(mathml);
985
            },
986
10.7k
            _ => (),
987
          }
988
        }
989
990
        // common bug: trig functions, lim, etc., should be mi
991
        // same for ellipsis ("…")
992
10.7k
        return crate::definitions::SPEECH_DEFINITIONS.with(|definitions| {
993
10.7k
          if ["…", "⋯", "∞"].contains(&text) ||
994
10.7k
             definitions.borrow().get_hashset("FunctionNames").unwrap().contains(text) ||
995
10.6k
             definitions.borrow().get_hashset("GeometryShapes").unwrap().contains(text) {
996
83
            set_mathml_name(mathml, "mi");
997
83
            return Some(mathml);
998
10.6k
          }
999
10.6k
          if IS_PRIME.is_match(text) {
1000
66
            let new_text = merge_prime_text(text);
1001
66
            mathml.set_text(&new_text);
1002
66
            return Some(mathml);
1003
10.5k
          }
1004
10.5k
          if contains_currency(text) && let Some(
result9
) =
split_currency_symbol(mathml)9
{
1005
9
            return Some(result);
1006
10.5k
          }
1007
10.5k
          return Some(mathml);
1008
10.7k
        });
1009
        // note: chemistry test is done later as part of another phase of chemistry cleanup
1010
      },
1011
19.8k
      "mfenced" => {return 
self40
.
clean_mathml40
(
convert_mfenced_to_mrow40
(
mathml40
) )},
1012
19.8k
      "a" => {
1013
        // convert 'a' into 'mrow'
1014
2
        set_mathml_name(mathml, "mrow");
1015
2
        return self.clean_mathml(mathml);
1016
      }
1017
19.8k
      "mstyle" | 
"mpadded"19.7k
=> {
1018
        // Throw out mstyle and mpadded -- to do this, we need to avoid mstyle being the arg of clean_mathml
1019
        // FIX: should probably push the attrs down to the children (set in 'self')
1020
714
        merge_adjacent_similar_mstyles(mathml);
1021
714
        let children = mathml.children();
1022
714
        if children.is_empty() {
1023
0
          return if parent_requires_child {Some( CanonicalizeContext::make_empty_element(mathml) )} else {None};
1024
714
        } else if children.len() == 1 {
1025
678
          let is_from_mhchem = element_name == "mpadded" && 
is_from_mhchem_hack588
(
mathml588
);
1026
678
          if let Some(
new_mathml269
) = self.clean_mathml( as_element(children[0]) ) {
1027
            // "lift" the child up so all the links (e.g., siblings) are correct
1028
269
            mathml.replace_children(new_mathml.children());
1029
269
            set_mathml_name(mathml, name(new_mathml));
1030
269
            add_attrs(mathml, &new_mathml.attributes());
1031
269
            return Some(mathml);
1032
409
          } else if parent_requires_child {
1033
            // need a placeholder -- make it empty mtext
1034
31
            let empty = CanonicalizeContext::make_empty_element(mathml);
1035
31
            if is_from_mhchem {
1036
27
              empty.set_attribute_value(MHCHEM_MMULTISCRIPTS_HACK, "true");
1037
27
            
}4
1038
31
            return Some(empty);
1039
          } else {
1040
378
            return None;
1041
          }
1042
        } else {
1043
          // wrap the children in an mrow, but maintain tree siblings by changing mpadded/mstyle to mrow
1044
36
          set_mathml_name(mathml, "mrow");
1045
36
          mathml.set_attribute_value(CHANGED_ATTR, ADDED_ATTR_VALUE);
1046
36
          return self.clean_mathml(mathml);  // now it's an mrow so a different path next time
1047
        }
1048
      },
1049
19.0k
      "mphantom" | 
"malignmark"18.7k
|
"maligngroup"18.7k
=> {
1050
364
        return if parent_requires_child {
Some( CanonicalizeContext::make_empty_element(mathml) )0
} else {None};
1051
      },
1052
18.7k
      "mspace" => {
1053
        // need to hold onto space for braille
1054
224
        set_mathml_name(mathml, "mtext");
1055
224
        mathml.set_text("\u{00A0}");
1056
224
        mathml.set_attribute_value(CHANGED_ATTR, "was-mspace");
1057
1058
        // normalize width ems
1059
224
        let width = mathml.attribute_value("width").unwrap_or("0em");
1060
224
        let normalized_width = crate::xpath_functions::FontSizeGuess::em_from_value(width);
1061
224
        mathml.set_attribute_value("data-width", &normalized_width.to_string());
1062
224
        return Some(mathml);
1063
      },
1064
18.5k
      "semantics" => {
1065
        // The semantics tag, like the style tag, can mess with pattern matching.
1066
        // However, it may be the case that having the annotations could aid in determining intent, so we want to keep them.
1067
        // The compromise is to move the annotations into an attr named data-annotation[-xml]-<encoding-name>
1068
        // The attribute is put on presentation element root
1069
11
        let presentation = get_presentation_element(mathml).1;
1070
11
        let new_presentation = if let Some(presentation) = self.clean_mathml(presentation) {
1071
11
          presentation
1072
        } else {
1073
          // probably shouldn't happen, but just in case
1074
0
          CanonicalizeContext::create_empty_element(&mathml.document())
1075
        };
1076
11
        set_annotation_attrs(new_presentation, mathml);
1077
11
        return Some(new_presentation);
1078
      },
1079
      _  => {
1080
18.5k
        let children = mathml.children();
1081
18.5k
        if element_name == "mrow" {
1082
          // handle special cases of empty mrows and mrows which just one element
1083
6.04k
          if children.is_empty() && 
mathml.attribute(INTENT_ATTR)0
.
is_none0
() {
1084
0
            return if parent_requires_child {Some(mathml)} else {None};
1085
6.04k
          } else if children.len() == 1 && 
CanonicalizeContext::is_ok_to_merge_mrow_child2.58k
(
mathml2.58k
) {
1086
2.56k
            let is_from_mhchem = is_from_mhchem_hack(mathml);
1087
2.56k
            if let Some(
new_mathml1.95k
) = self.clean_mathml(as_element(children[0])) {
1088
              // "lift" the child up so all the links (e.g., siblings) are correct
1089
1.95k
              mathml.replace_children(new_mathml.children());
1090
1.95k
              set_mathml_name(mathml, name(new_mathml));
1091
1.95k
              add_attrs(mathml, &new_mathml.attributes());
1092
1.95k
              return Some(mathml);
1093
607
            } else if parent_requires_child {
1094
234
              let empty = CanonicalizeContext::make_empty_element(mathml);
1095
234
              if is_from_mhchem {
1096
142
                empty.set_attribute_value(MHCHEM_MMULTISCRIPTS_HACK, "true");
1097
142
              
}92
1098
234
              return Some(empty);
1099
            } else {
1100
373
              return None;
1101
            }
1102
3.48k
          }
1103
12.4k
        }
1104
1105
        // FIX: this should be setting children, not mathml
1106
15.9k
        let mathml =  if element_name == "mrow" ||
1107
12.4k
              (children.len() > 1 && 
ELEMENTS_WITH_ONE_CHILD7.31k
.
contains7.31k
(
element_name7.31k
)) {
1108
5.90k
          let merged = merge_dots(mathml);  // FIX -- switch to passing in children
1109
5.90k
          let merged = merge_primes(merged);
1110
5.90k
          let merged = merge_degrees_C_F(merged);
1111
5.90k
          let merged = merge_chars(merged, &IS_UNDERSCORE);
1112
5.90k
          handle_pseudo_scripts(merged)
1113
        } else {
1114
10.0k
          mathml
1115
        };
1116
1117
        // cleaning children can add or delete subsequent children, so we need to constantly update the children (and mathml)
1118
15.9k
        let mut children = mathml.children();
1119
15.9k
        let mut i = 0;
1120
1121
59.7k
        while i < children.len() {
1122
43.9k
          if let Some(child) = children[i].element() {
1123
43.9k
            match self.clean_mathml(child) {
1124
299
              None => {
1125
299
                mathml.remove_child(child);
1126
299
                // don't increment 'i' because there is one less child now and so everything shifted left
1127
299
              },
1128
43.6k
              Some(new_child) => {
1129
                // debug!("new_child (i={})\n{}", i, mml_to_string(new_child));
1130
43.6k
                let new_child_name = name(new_child);
1131
43.6k
                children = mathml.children();       // clean_mathml(child) may have changed following siblings
1132
43.6k
                children[i] = ChildOfElement::Element(new_child);
1133
43.6k
                mathml.replace_children(children);
1134
43.6k
                if new_child_name == "mi" || 
new_child_name == "mtext"31.9k
{
1135
12.5k
                  // can't do this above in 'match' because this changes the tree and
1136
12.5k
                  // lifting single element mrows messes with structure in a conflicting way
1137
12.5k
                  // Note: if clean_chemistry_leaf() made changes, they don't need cleaning because they will be "ok" mi's
1138
12.5k
                  clean_chemistry_leaf(as_element(mathml.children()[i]));
1139
12.5k
                } else {
1140
                  // If the attach call does something, children are inserted *before* child (i.e., into parent)
1141
                  // We return the new start at the expense of re-cleaning the script
1142
                  // This is needed because anything before the returned element will be lost
1143
31.0k
                  let start_of_change = attach_scripts_to_split_element(new_child);
1144
31.0k
                  if name(start_of_change) == "mrow" {
1145
3.43k
                    start_of_change.remove_attribute(MAYBE_CHEMISTRY);   // was lifted, and not set -- remove and it will be computed later
1146
27.6k
                  }
1147
                  // crate::canonicalize::assure_mathml(get_parent(start_of_change)).unwrap();    // FIX: find a recovery -- we're in deep trouble if this isn't true
1148
31.0k
                  if start_of_change != child {
1149
                    // debug!("clean_mathml: start_of_change != mathml -- mathml={}", mml_to_string(mathml));
1150
49
                    return self.clean_mathml(mathml);  // restart cleaning
1151
30.9k
                  }
1152
                }                   
1153
43.5k
                i += 1;
1154
              }
1155
            }
1156
43.8k
            children = mathml.children();           // 'children' moved above, so need new values
1157
0
          } else {
1158
0
            // bad mathml such as '<annotation-xml> </annotation-xml>' -- don't add to new_children
1159
0
            i += 1;
1160
0
          }
1161
        }
1162
1163
        // could have deleted children so only one child remains -- need to lift it
1164
15.8k
        if element_name == "mrow" && 
children.len() == 13.47k
&&
CanonicalizeContext::is_ok_to_merge_mrow_child122
(
mathml122
) {
1165
          // "lift" the child up so all the links (e.g., siblings) are correct
1166
108
          let child = as_element(children[0]);
1167
108
          mathml.replace_children(child.children());
1168
108
          set_mathml_name(mathml, name(child));
1169
108
          add_attrs(mathml, &child.attributes());
1170
108
          return Some(mathml);   // child has already been cleaned, so we can return
1171
15.7k
        }
1172
1173
15.7k
        if element_name == "mrow" || 
ELEMENTS_WITH_ONE_CHILD12.4k
.
contains12.4k
(
element_name12.4k
) {
1174
10.1k
          merge_number_blocks(self, mathml, &mut children);
1175
10.1k
          merge_whitespace(&mut children);
1176
10.1k
          merge_cross_or_dot_product_elements(&mut children);
1177
10.1k
          handle_convert_to_mmultiscripts(&mut children);
1178
10.1k
        } else if 
element_name == "msub"5.59k
||
element_name == "msup"4.81k
||
1179
3.48k
              element_name == "msubsup" || 
element_name == "mmultiscripts"3.25k
{
1180
2.52k
          if element_name != "mmultiscripts" {
1181
            // mhchem emits some cases that boil down to a completely empty script -- see test mhchem_beta_decay
1182
2.33k
            let mut is_empty_script = CanonicalizeContext::is_empty_element(as_element(children[0])) &&
1183
181
                              CanonicalizeContext::is_empty_element(as_element(children[1]));
1184
2.33k
            if element_name == "msubsup" && 
is_empty_script228
{
1185
51
              is_empty_script = CanonicalizeContext::is_empty_element(as_element(children[2]));
1186
2.28k
            }
1187
2.33k
            if is_empty_script {
1188
48
              if parent_requires_child {
1189
                // need a placeholder -- make it empty mtext
1190
0
                return Some( as_element(children[0]) ); // pick one of the empty elements
1191
              } else {
1192
48
                return None;
1193
              }
1194
2.29k
            }
1195
185
          }
1196
2.47k
          let mathml = if element_name == "mmultiscripts" {
clean_mmultiscripts185
(
mathml185
).
unwrap185
()} else {
mathml2.29k
};
1197
2.47k
          if !is_chemistry_off(mathml) {
1198
2.47k
            let likely_chemistry = likely_adorned_chem_formula(mathml);
1199
            // debug!("likely_chemistry={}, {}", likely_chemistry, mml_to_string(mathml));
1200
2.47k
            if likely_chemistry >= 0 {
1201
553
              mathml.set_attribute_value(MAYBE_CHEMISTRY, likely_chemistry.to_string().as_str());
1202
1.92k
            }
1203
0
          }
1204
1205
2.47k
          if element_name == "msubsup" {
1206
180
            return Some( clean_msubsup(mathml) );
1207
          } else {
1208
2.29k
            return Some(mathml);
1209
          }
1210
3.06k
        }
1211
1212
13.2k
        mathml.replace_children(children);
1213
        // debug!("clean_mathml: after loop\n{}", mml_to_string(mathml));
1214
13.2k
        if element_name == "mrow" || 
ELEMENTS_WITH_ONE_CHILD9.88k
.
contains9.88k
(
element_name9.88k
) {
1215
10.1k
          clean_chemistry_mrow(mathml);
1216
10.1k
        
}3.06k
1217
13.2k
        self.assure_nary_tag_has_one_child(mathml);
1218
13.2k
        if crate::xpath_functions::IsNode::is_2D(mathml) {
1219
4.48k
          CanonicalizeContext::mark_empty_content(mathml);
1220
8.77k
        }
1221
1222
13.2k
        return Some(mathml);       
1223
      }
1224
    }
1225
1226
    /// Returns substitute text if hyphen sequence should be a short or long dash
1227
11.8k
    fn canonicalize_dash(text: &str)  -> Option<&str> {
1228
11.8k
      if text == "--"  {
1229
1
        return Some("—"); // U+2014 (em dash)
1230
11.8k
      } else if text == "---" || 
text == "----"11.8k
{ // use a regexp to catch a longer sequence?
1231
2
        return Some("―"); // U+2015 (Horizontal bar)
1232
      } else {
1233
11.8k
        return None;
1234
      }
1235
11.8k
    }
1236
1237
11
    fn  set_annotation_attrs(new_presentation: Element, semantics: Element) {
1238
24
      for child in 
semantics11
.
children11
() {
1239
24
        let child = as_element(child);
1240
24
        let child_name = name(child);
1241
24
        if child == new_presentation {
1242
1
          continue;
1243
23
        }
1244
23
        let attr_name = match child.attribute_value("encoding") {
1245
23
          Some(encoding_name) => format!("data-{}-{}", child_name, encoding_name.replace('/', "_slash_")),
1246
0
          None => format!("data-{child_name}"),    // probably shouldn't happen
1247
        };
1248
23
        let attr_name = attr_name.as_str();
1249
23
        if child_name == "annotation" {
1250
12
          new_presentation.set_attribute_value(attr_name, as_text(child));
1251
12
        } else {
1252
11
          new_presentation.set_attribute_value(attr_name, &mml_to_string(child));
1253
11
        }
1254
      }
1255
1256
11
    }
1257
1258
    /// Hack to try and guess if a colon should be a ratio -- this affects parsing because of different precedences
1259
    /// It also guesses on the spacing after the colon and adds a space attr if it looks like set building or function mapping notation.
1260
    /// These conditions are really not well thought out and are just a first cut -- they do cause the braille tests to pass
1261
    /// If 'intent' is given, it must be intent='ratio'
1262
    /// 2. It must be infix and there is a proportion (∷) mo as a sibling, or
1263
    /// 3. It is the only mo and has numbers on each side
1264
    /// 
1265
    /// Need to rule out field extensions "[K:F]" and trilinear coordinates "a:b:c" (Nemeth doesn't consider these to be ratios)
1266
94
    fn is_ratio(mathml: Element) -> bool {
1267
94
      assert_eq!(name(mathml), "mo");
1268
94
      let parent = get_parent(mathml);  // must exist
1269
94
      if name(parent) != "mrow" && 
name(parent) != "math"81
{
1270
0
        return false;
1271
94
      }
1272
1273
94
      if let Some(
intent_value1
) = mathml.attribute_value(INTENT_ATTR)
1274
1
        && (intent_value != "ratio" || 
!intent_value.starts_with('_')0
) {
1275
1
          return false;
1276
93
        }
1277
1278
93
      if let Some(
value0
) = mathml.attribute_value("data-mjx-texclass")
1279
0
        && value ==  "PUNCT" {
1280
0
          mathml.remove_attribute("data-mjx-texclass");
1281
0
          mathml.set_attribute_value(SPACE_AFTER, "true");  // signal to at least Nemeth rules that this is punctuation
1282
93
        }
1283
1284
93
      let preceding = mathml.preceding_siblings();
1285
93
      let following = mathml.following_siblings();
1286
93
      if preceding.is_empty() || 
following92
.
is_empty92
() {
1287
2
        return false;
1288
91
      }
1289
91
      let preceding_child = as_element( preceding[preceding.len()-1] );
1290
91
      let following_child = as_element(following[0]);
1291
91
      if preceding.len() == 1 && 
name(preceding_child) == "mn"34
&&
1292
8
         following.len() == 1 && 
name(following_child) == "mn"2
{
1293
2
        return true;
1294
89
      }
1295
      // only want one "∷"
1296
89
      let is_before = is_proportional_before_colon(preceding.iter().rev());
1297
89
      if let Some(
is_before3
) = is_before
1298
3
        && !is_before {
1299
0
          return false;
1300
89
        }
1301
89
      let is_before = is_before.is_some();   // move this to true/false (found/not found)
1302
89
      let is_after = is_proportional_before_colon(following.iter());
1303
89
      if let Some(
is_after3
) = is_after
1304
3
        && !is_after {
1305
0
          return false;
1306
89
        }
1307
89
      let is_after = is_after.is_some();   // move this to true/false (found/not found)
1308
89
      return is_before ^ is_after;
1309
1310
178
      fn is_proportional_before_colon<'a>(siblings: impl Iterator<Item = &'a ChildOfElement<'a>>) -> Option<bool> {
1311
        // unparsed, so we look at relative priorities to make sure the proportional operator is really the next operator
1312
3
        static PROPORTIONAL_PRIORITY: LazyLock<usize> = LazyLock::new(|| OPERATORS.get("∷").unwrap().priority);
1313
461
        for sibling in 
siblings178
{
1314
461
          let child = as_element(*sibling);
1315
461
          if name(child) == "mo" {
1316
203
            let text = as_text(child);
1317
203
            match text {
1318
203
              "∷" | 
"::"198
=> return
Some(true)6
, // "::" might not be canonicalized yet
1319
197
              "∶" => return 
Some(false)0
,
1320
              _ => {
1321
197
                if let Some(
op191
) = OPERATORS.get(text)
1322
191
                  && op.priority < *PROPORTIONAL_PRIORITY {
1323
109
                    return None;   // no "∷"
1324
88
                  }
1325
              },
1326
            }
1327
258
          }
1328
        }
1329
63
        return None;
1330
178
      }
1331
94
    }
1332
1333
1334
    /// Returns true if it detects that this is likely coming from mhchem:
1335
    /// v3: msub/msup/msubsup with mpadded width=0/mphantom/mi=A)
1336
    /// v4: msub/msup/msubsup with mrow/mrow/mpadded width=0/mphantom/mi=A)
1337
    /// This should be called with 'mrow' being the outer mrow
1338
3.15k
    fn is_from_mhchem_hack(mathml: Element) -> bool {
1339
3.15k
      assert!(name(mathml) == "mrow" || 
name(mathml) == "mpadded"588
);
1340
3.15k
      assert_eq!(mathml.children().len(), 1);
1341
3.15k
      let parent = get_parent(mathml);
1342
3.15k
      let parent_name = name(parent);
1343
3.15k
      if !(parent_name == "msub" || 
parent_name == "msup"2.99k
||
parent_name == "msubsup"2.80k
) {
1344
2.56k
        return false;
1345
594
      }
1346
1347
594
      let 
mpadded315
= if name(mathml) == "mrow" {
1348
545
        let mrow = as_element(mathml.children()[0]);
1349
545
        if !(name(mrow) == "mrow" && 
mrow.children().len() == 1347
) {
1350
255
          return false;
1351
290
        }
1352
290
        let child = as_element(mrow.children()[0]);
1353
290
        if name(child) != "mpadded" {
1354
24
          return false;
1355
266
        }
1356
266
        child
1357
      } else {
1358
49
        mathml
1359
      };
1360
315
      if let Some(
width169
) = mpadded.attribute_value("width") {
1361
169
        if width != "0" {
1362
0
          return false;
1363
169
        }
1364
      } else {
1365
146
        return false;
1366
      }
1367
1368
169
      let mphantom = as_element(mpadded.children()[0]);
1369
169
      if !(name(mphantom) == "mphantom" && mphantom.children().len() == 1) {
1370
0
        return false;
1371
169
      }
1372
1373
169
      let child = as_element(mphantom.children()[0]);
1374
169
      return name(child) == "mi" && as_text(child) == "A";
1375
3.15k
    }
1376
1377
    /// 'text' is potentially one of the many Unicode whitespace chars. Estimate the width in ems
1378
149
    fn white_space_em_width(text: &str) -> f64 {
1379
149
      assert!(IS_WHITESPACE.is_match(text));
1380
149
      let mut width = 0.0;
1381
163
      for ch in 
text149
.
chars149
() {
1382
163
        width += match ch {
1383
137
          ' ' | '\u{00A0}' | '\u{1680}' | ' ' => 0.7, // space, non-breaking space, Ogham space mark, figure space
1384
0
          ' ' | ' ' => 0.5,           // en quad, en space
1385
0
          ' ' | ' ' => 1.0,           // em quad, em space
1386
0
          ' ' => 1.0/3.0,             // three per em space
1387
0
          ' ' | ' ' => 0.25,           // four per em space, punctuation space (wild guess)
1388
22
          ' ' | ' ' => 3.0/18.0,         // six per em space, thin space
1389
0
          ' ' => 1.0/18.0,           // hair space
1390
0
          ' ' => 0.3,               // narrow no-break space (half a regular space?)
1391
4
          ' ' => 4.0/18.0,           // medium math space
1392
0
          ' ' => 1.5,             // Ideographic Space
1393
0
          _ => 0.7,               // shouldn't happen
1394
        }
1395
      }
1396
149
      return width;
1397
149
    }
1398
1399
    /// Splits the leaf element into chemical elements if needed
1400
12.5k
    fn clean_chemistry_leaf(mathml: Element) -> Element {
1401
12.5k
      if !(is_chemistry_off(mathml) || mathml.attribute(MAYBE_CHEMISTRY).is_some()) {
1402
12.3k
        assert!(name(mathml)=="mi" || 
name(mathml)=="mtext"942
);
1403
        // this is a hack -- VII is more likely to be roman numeral than the molecule V I I so prevent that from happening
1404
        // FIX: come up with a less hacky way to prevent chem element misinterpretation
1405
12.3k
        let text = as_text(mathml);
1406
12.3k
        if text.len() > 2 && 
is_roman_number_match3.09k
(
text3.09k
) {
1407
0
          return mathml;
1408
12.3k
        }
1409
12.3k
        if let Some(
elements135
) = convert_leaves_to_chem_elements(mathml) {
1410
          // children are already marked as chemical elements         
1411
135
          let answer = replace_children(mathml, elements);
1412
135
          if name(answer) == "mrow" {
1413
29
            answer.remove_attribute(MAYBE_CHEMISTRY);   // was lifted, and not set -- remove and it will be computed later
1414
106
          }
1415
135
          return answer;
1416
        } else {
1417
12.1k
          let likely_chemistry = likely_chem_element(mathml);
1418
12.1k
          if likely_chemistry >= 0 {
1419
2.59k
            mathml.set_attribute_value(MAYBE_CHEMISTRY, likely_chemistry.to_string().as_str());
1420
9.57k
          }
1421
        };
1422
259
      }
1423
12.4k
      return mathml;
1424
12.5k
    }
1425
1426
1427
    /// looks for pairs of (letter, pseudo-script) such as x' or p'q' all inside of a single token element
1428
11.4k
    fn split_apart_pseudo_scripts<'a>(mi: Element<'a>) -> Option<Element<'a>> {
1429
2
      static IS_DEGREES_C_OR_F: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"[°º][CF]").unwrap());
1430
1431
11.4k
      let text = as_text(mi);
1432
      // debug!("split_apart_pseudo_scripts: start text=\"{text}\"");
1433
11.4k
      if !text.chars().any(is_pseudo_script_char) || 
IS_DEGREES_C_OR_F14
.is_match(text) {
1434
11.4k
        return None;
1435
1
      }
1436
1437
1
      let document = mi.document();
1438
      // create pairs of text
1439
1
      let chars = text.chars();
1440
1
        let next_chars = text.chars().skip(1);
1441
1
      let result = chars.zip(next_chars).map(|(a, b)|
1442
1
            if a.is_alphabetic() && is_pseudo_script_char(b) {
1443
              // create msup
1444
1
              let base = create_mathml_element(&document, "mi");
1445
1
              base.set_text(&a.to_string());
1446
1
              let script = create_mathml_element(&document, "mo");
1447
1
              script.set_text(&b.to_string());
1448
1
              let msup = create_mathml_element(&document, "msup");
1449
1
              msup.append_child(base);
1450
1
              msup.append_child(script);
1451
1
              msup
1452
            } else {
1453
              // create an mi "ab"
1454
0
              let new_mi = create_mathml_element(&document, "mi");
1455
0
              let mut new_mi_text = String::with_capacity(6);    // likely will fit almost all cases
1456
0
              new_mi_text.push(a);
1457
0
              new_mi_text.push(b);
1458
0
              new_mi.set_text(&new_mi_text);
1459
0
              new_mi
1460
1
            } )
1461
1
        .collect::<Vec<Element>>();
1462
1
      if result.len() == 1 {
1463
1
        return Some( result[0] );
1464
      } else {
1465
0
        return Some( replace_children(mi, result) );
1466
      }
1467
11.4k
    }
1468
1469
1470
    /// If 'mathml' is a scripted element and has an mrow for a base,
1471
    ///   attach any prescripts to the first element in mrow
1472
    ///   attach any postscript to the last element in mrow
1473
    /// Return the modified element (which might now be an mrow)
1474
31.0k
    fn attach_scripts_to_split_element(mathml: Element) -> Element {
1475
31.0k
      if !IsNode::is_scripted(mathml) {
1476
28.5k
        return mathml;
1477
2.48k
      }
1478
2.48k
      let base = as_element(mathml.children()[0]);
1479
2.48k
      if name(base) != "mrow" {
1480
2.30k
        return mathml;
1481
185
      }
1482
185
      let base_children = base.children();
1483
185
      let i_last_base = base_children.len()-1;
1484
185
      let last_child = as_element(base_children[i_last_base]);
1485
185
      if last_child.attribute(SPLIT_TOKEN).is_none() {
1486
156
        return mathml;
1487
29
      }
1488
      // debug!("attach_scripts_to_split_element -- start: \n{}", mml_to_string(mathml));
1489
29
      let mut mathml_replacement = Vec::with_capacity(base_children.len());
1490
29
      if name(mathml) == "mmultiscripts" {
1491
        // pull any prescript (should be at most one prefix pair) into the first child
1492
1
        let multiscripts_children = mathml.children();
1493
1
        let n_multiscripts_children = multiscripts_children.len();
1494
1
        let potential_mprescripts_element = as_element(multiscripts_children[n_multiscripts_children-3]);
1495
1
        if name(potential_mprescripts_element) == "mprescripts" {    // we have potential chem prescripts
1496
          // create a new mmultiscripts elements with first child as its base mathml's prescripts as the new element's prescripts
1497
1
          let mut new_mmultiscripts_children = Vec::with_capacity(4);
1498
1
          new_mmultiscripts_children.push(base_children[0]);
1499
1
          base.remove_child(as_element(base_children[0]));
1500
1
          new_mmultiscripts_children.push(multiscripts_children[n_multiscripts_children-3]);
1501
1
          new_mmultiscripts_children.push(multiscripts_children[n_multiscripts_children-2]);
1502
1
          new_mmultiscripts_children.push(multiscripts_children[n_multiscripts_children-1]);
1503
1504
1
          let new_mmultiscripts = create_mathml_element(&base.document(), "mmultiscripts");
1505
1
          new_mmultiscripts.append_children(new_mmultiscripts_children);
1506
1
          let likely = likely_adorned_chem_formula(new_mmultiscripts);
1507
1
          new_mmultiscripts.set_attribute_value(MAYBE_CHEMISTRY, &likely.to_string());
1508
          // debug!("attach_scripts_to_split_element -- new_mmultiscripts: \n{}", mml_to_string(new_mmultiscripts));
1509
1
          if n_multiscripts_children == 4 {
1510
            // we stripped all the children so only the (modified) base exists
1511
            // create mrow(new_mmultiscripts, mathml[0])
1512
0
            let children = vec![new_mmultiscripts, base];
1513
0
            return replace_children(mathml, children);
1514
1
          }
1515
1
          mathml_replacement.push(new_mmultiscripts);
1516
0
        }
1517
28
      }
1518
1519
      // Add all the middle children of the base to the mrow
1520
34
      
base.children().iter()29
.
take29
(
base.children().len()-129
).
for_each29
(|&child| mathml_replacement.push(as_element(child)));
1521
1522
      // create a new script element with last child as its base
1523
29
      let mut new_mathml_children = mathml.children();
1524
29
      new_mathml_children[0] = ChildOfElement::Element(base);
1525
29
      mathml.replace_children(new_mathml_children);
1526
29
      mathml_replacement.push(mathml);
1527
      // debug!("attach_scripts_to_split_element -- after adjusting ({} replacement children): \n{}", mathml_replacement.len(), mml_to_string(mathml));
1528
29
      return replace_children(mathml, mathml_replacement);
1529
31.0k
    }
1530
1531
    /// makes sure the structure is correct and also eliminates <none/> pairs
1532
    /// MathML core changed <none/> to <mrow/>. For now (since MathCAT has lots of "none" tests), <mrow/> => <mtext> => <none/>
1533
    /// (used https://chem.libretexts.org/Courses/Saint_Francis_University/CHEM_113%3A_Human_Chemistry_I_(Muino)/13%3A_Nuclear_Chemistry12/13.04%3A_Nuclear_Decay)
1534
    ///
1535
    /// This does some dubious repairs when the structure is bad, but not sure what else to do
1536
185
    fn clean_mmultiscripts(mathml: Element) -> Option<Element> {
1537
185
      let mut mathml = mathml;
1538
185
      let children = mathml.children();
1539
185
      let n = children.len();
1540
185
      let i_mprescripts =
1541
185
        if let Some((
i108
,_)) = children.iter().enumerate()
1542
659
          .
find185
(|&(_,&el)| name(as_element(el)) == "mprescripts") {
i108
} else {
n77
};
1543
185
      let has_misplaced_mprescripts = i_mprescripts & 1 == 0;  // should be first, third, ... child
1544
185
      let mut has_proper_number_of_children = if i_mprescripts == n { 
n & 1 == 077
} else {
n & 1 != 0108
}; // should be odd else even #
1545
185
      if has_misplaced_mprescripts || !has_proper_number_of_children || 
has_none_none_script_pair0
(
&children0
) {
1546
        // need to reset the children
1547
185
        let mut new_children = Vec::with_capacity(n+2); // adjusting position of mprescripts might add two children
1548
185
        new_children.push(children[0]);
1549
        // drop none, none script pairs
1550
185
        let mut i = 1;
1551
604
        while i < n {
1552
419
          let child = as_element(children[i]);
1553
419
          let child_name = name(child);
1554
419
          if child_name == "mprescripts" {
1555
108
            if has_misplaced_mprescripts {
1556
0
              let mtext = CanonicalizeContext::create_empty_element(&mathml.document());
1557
0
              new_children.push(ChildOfElement::Element(mtext));
1558
0
              has_proper_number_of_children = !has_proper_number_of_children;
1559
108
            }
1560
108
            new_children.push(children[i]);
1561
108
            i += 1;
1562
311
          } else if i+1 < n && child_name == "none" && 
name85
(
as_element85
(children[i+1])) == "none" {
1563
2
            i += 2;   // found none, none pair
1564
309
          } else {
1565
309
            // copy pair
1566
309
            new_children.push(children[i]);
1567
309
            new_children.push(children[i+1]);
1568
309
            i += 2;
1569
309
          }
1570
        }
1571
185
        if new_children.len() <= 2 {  // base only, or base and </mprescripts>
1572
1
          mathml = as_element(new_children[0]);
1573
184
        } else {
1574
184
          mathml.replace_children(new_children);
1575
184
        }
1576
0
      }
1577
1578
185
      return Some(mathml);
1579
1580
0
      fn has_none_none_script_pair(children: &[ChildOfElement]) -> bool {
1581
0
        let mut i = 1;
1582
0
        let n = children.len();
1583
0
        while i < n {
1584
0
          let child = as_element(children[i]);
1585
0
          let child_name = name(child);
1586
0
          if child_name == "mprescripts" {
1587
0
            i += 1;
1588
0
          } else if i+1 < n && child_name == "none" && name(as_element(children[i+1])) == "none" {
1589
0
            return true;   // found none, none pair
1590
0
          } else {
1591
0
            i += 2;
1592
0
          }
1593
        }
1594
0
        return false;
1595
0
      }
1596
185
    }
1597
1598
    /// converts element if there is an empty subscript or superscript
1599
180
    fn clean_msubsup(mathml: Element) -> Element {
1600
180
      let children = mathml.children();
1601
180
      let subscript = as_element(children[1]);
1602
180
      let has_subscript = !(name(subscript) == "mtext" && 
as_text(subscript).trim()3
.
is_empty3
());
1603
180
      let superscript = as_element(children[2]);
1604
180
      let has_superscript = !(name(superscript) == "mtext" && 
as_text(superscript).trim()6
.
is_empty6
());
1605
180
      if has_subscript && 
has_superscript177
{
1606
171
        return mathml;
1607
9
      } else if has_subscript {
1608
6
        set_mathml_name(mathml, "msub");
1609
6
        let children = vec!(children[0], children[1]);
1610
6
        mathml.replace_children(children);
1611
6
        return mathml;
1612
3
      } else if has_superscript {
1613
3
        set_mathml_name(mathml, "msup");
1614
3
        let children = vec!(children[0], children[2]);
1615
3
        mathml.replace_children(children);
1616
3
        return mathml;
1617
      } else {
1618
0
        return as_element(children[0]);  // no scripts
1619
      }
1620
180
    }
1621
1622
    /// Split off the currency symbol from the rest of the text and return an mrow with the result
1623
    /// Assumes it has already checked and that we have a leaf
1624
12
    fn split_currency_symbol(leaf: Element) -> Option<Element> {
1625
12
      assert!(is_leaf(leaf));
1626
12
      let text = as_text(leaf);
1627
12
      assert!(contains_currency(text));
1628
12
      let mut iter = text.chars();
1629
12
      match (iter.next(), iter.next()) {
1630
0
        (None, _) => return None,
1631
        (Some(_), None) => {  // 1 char
1632
9
          leaf.set_name("mi");
1633
9
          return Some(leaf);       }
1634
        (Some(_), Some(_)) => { // 2 or more chars
1635
          // WARNING: don't use 'leaf' in the mrow -- that detaches it from its parent and could shrink the number of children causing problems
1636
4
          if 
text.chars()3
.
any3
(|c| c.is_ascii_digit()) { // might be a number with a currency symbol
1637
3
            leaf.set_name("mn");  // make sure we create an mn (might be one already)
1638
3
          
}0
1639
3
          let first_ch = text.char_indices().next().map(|(i, ch)| &text[i..i + ch.len_utf8()]).unwrap();
1640
3
          if is_currency_symbol(first_ch.chars().next().unwrap()) {
1641
1
            let mrow = create_mathml_element(&leaf.document(), "mrow");
1642
1
            mrow.set_attribute_value(CHANGED_ATTR, ADDED_ATTR_VALUE);
1643
1
            let currency_symbol = create_mathml_element(&leaf.document(), "mi");
1644
1
            currency_symbol.set_text(first_ch);
1645
1
            mrow.append_child(currency_symbol);
1646
1
            let implied_times = create_mo(leaf.document(), "\u{2062}", ADDED_ATTR_VALUE);
1647
1
            mrow.append_child(implied_times);
1648
1
            let currency_amount = create_mathml_element(&leaf.document(), name(leaf));
1649
1
            currency_amount.set_text(&text[first_ch.len()..]);
1650
1
            mrow.append_child(currency_amount);
1651
1
            return Some(mrow);
1652
2
          }
1653
2
          let last_ch = text.char_indices().last().map(|(i, _)| &text[i..]).unwrap();
1654
2
          if is_currency_symbol(last_ch.chars().next().unwrap()) {
1655
1
            let mrow = create_mathml_element(&leaf.document(), "mrow");
1656
1
            mrow.set_attribute_value(CHANGED_ATTR, ADDED_ATTR_VALUE);
1657
1
            let implied_times = create_mo(leaf.document(), "\u{2062}", ADDED_ATTR_VALUE);
1658
1
            mrow.append_child(implied_times);
1659
1
            let currency_amount = create_mathml_element(&leaf.document(), name(leaf));
1660
1
            currency_amount.set_text(&text[..text.len()-last_ch.len()]);
1661
1
            mrow.append_child(currency_amount);
1662
1
            let currency_symbol = create_mathml_element(&leaf.document(), "mi");
1663
1
            currency_symbol.set_text(last_ch);
1664
1
            mrow.append_child(currency_symbol);
1665
1
            return Some(mrow);
1666
1
          }
1667
          // try to find it in the middle
1668
2
          for (byte_idx, ch) in 
text1
.
char_indices1
() {
1669
2
            if contains_currency(&text[byte_idx .. byte_idx + ch.len_utf8()]) {
1670
              // get all the substrings
1671
1
              let first_part = &text[..byte_idx];
1672
1
              let currency_symbol = &text[byte_idx .. byte_idx + ch.len_utf8()];
1673
1
              let second_part = &text[byte_idx + ch.len_utf8() ..];
1674
1
              let mrow = create_mathml_element(&leaf.document(), "mrow");
1675
1
              mrow.set_attribute_value(CHANGED_ATTR, ADDED_ATTR_VALUE);
1676
1
              let first_part_element = create_mathml_element(&leaf.document(), name(leaf));
1677
1
              first_part_element.set_text(first_part);
1678
1
              mrow.append_child(first_part_element);
1679
1
              let implied_times = create_mo(leaf.document(), "\u{2062}", ADDED_ATTR_VALUE);
1680
1
              mrow.append_child(implied_times);
1681
1
              let currency_symbol_element = create_mathml_element(&leaf.document(), "mi");
1682
1
              currency_symbol_element.set_text(currency_symbol);
1683
1
              mrow.append_child(currency_symbol_element);
1684
1
              let implied_times = create_mo(leaf.document(), "\u{2062}", ADDED_ATTR_VALUE);
1685
1
              mrow.append_child(implied_times);
1686
1
              let second_part_element = create_mathml_element(&leaf.document(), name(leaf));
1687
1
              second_part_element.set_text(second_part);
1688
1
              mrow.append_child(second_part_element);
1689
1
              return Some(mrow);
1690
1
            }
1691
          }
1692
0
          return None
1693
        }
1694
      }
1695
12
    }
1696
1697
    /// If arg is "arc" (with optional space), merge the following element in if a trig function (sibling is deleted)
1698
11.8k
    fn merge_arc_trig(leaf: Element) -> Option<Element> {
1699
11.8k
      assert!(is_leaf(leaf));
1700
11.8k
      let leaf_text = as_text(leaf);
1701
11.8k
      if !(leaf_text == "arc" || 
leaf_text == "arc "11.8k
||
leaf_text == "arc "11.8k
/* non-breaking space */ ) {
1702
11.8k
        return None;
1703
2
      }
1704
1705
2
      let following_siblings = leaf.following_siblings();
1706
2
      if following_siblings.is_empty() {
1707
0
        return None;
1708
2
      }
1709
1710
2
      let following_sibling = as_element(following_siblings[0]);
1711
2
      let following_sibling_name = name(following_sibling);
1712
2
      if !(following_sibling_name == "mi" || 
following_sibling_name == "mo"0
||
following_sibling_name == "mtext"0
) {
1713
0
        return None;
1714
2
      }
1715
1716
2
      return crate::definitions::SPEECH_DEFINITIONS.with(|definitions| {
1717
        // change "arc" "cos" to "arccos" -- we look forward because calling loop stores previous node
1718
2
        let following_text = as_text(following_sibling);
1719
2
        if definitions.borrow().get_hashset("TrigFunctionNames").unwrap().contains(following_text) {
1720
2
          let new_text = "arc".to_string() + following_text;
1721
2
          set_mathml_name(leaf, "mi");
1722
2
          leaf.set_text(&new_text);
1723
2
          following_sibling.remove_from_parent();
1724
2
          return Some(leaf);
1725
0
        }
1726
0
        return None;
1727
2
      })
1728
11.8k
    }
1729
1730
    /// Convert "||" to "‖", if in single element or in repeated 'mo's (but not "|x||y|" or "{x ||x|>0}")
1731
305
    fn merge_vertical_bars(leaf: Element) -> Option<Element> {
1732
305
      assert!(is_leaf(leaf));
1733
305
      let leaf_text = as_text(leaf);
1734
305
      if leaf_text == "||" {
1735
4
        leaf.set_text("‖");    // U+2016
1736
4
        return Some(leaf);
1737
301
      } else if leaf_text != "|" {
1738
0
        return None;
1739
301
      }
1740
301
      let following_siblings = leaf.following_siblings();
1741
301
      if following_siblings.is_empty() {
1742
96
        return None;
1743
205
      }
1744
1745
205
      let following_sibling = as_element(following_siblings[0]);
1746
205
      if name(following_sibling) != "mo" || 
as_text(following_sibling) != "|"18
{
1747
201
        return None
1748
4
      }
1749
1750
      // have "||" -- if there a single "|" on left, rule out merge
1751
4
      let preceding_siblings = leaf.preceding_siblings();
1752
5
      if 
preceding_siblings.iter()4
.
any4
(|&child| {
1753
5
        let child = as_element(child);
1754
5
        return name(child) == "mo" && 
as_text(child) == "|"3
;
1755
5
      }) {
1756
1
        return None;   // found "|" on left
1757
3
      }
1758
1759
3
      if following_siblings.len() > 1 {
1760
2
        let following_siblings = &following_siblings[1..];
1761
        // if there are an odd number of "|"s to the right, rule out the merge
1762
8
        if !(
following_siblings2
.
iter2
().
filter2
(|&&child| {
1763
8
          let child = as_element(child);
1764
8
          return name(child) == "mo" && 
as_text(child) == "|"5
;
1765
8
        }).
count2
()).
is_multiple_of2
(2) {
1766
1
          return None;
1767
1
        }
1768
1
      }
1769
1770
      // didn't find any
1771
2
      leaf.set_text("‖");    // U+2016
1772
2
      following_sibling.remove_from_parent();
1773
2
      return Some(leaf);
1774
305
    }
1775
1776
    /// merge a following mstyle that has the same attrs
1777
714
    fn merge_adjacent_similar_mstyles(mathml: Element) {
1778
714
      if ELEMENTS_WITH_FIXED_NUMBER_OF_CHILDREN.contains(name(get_parent(mathml))) {
1779
        // FIX: look to see if all of the children (might be more than just the adjacent one) have the same attr and then pull them up to the parent
1780
65
        return;   // can't remove subsequent child 
1781
649
      }
1782
649
      let following_siblings = mathml.following_siblings();
1783
649
      if following_siblings.is_empty() {
1784
579
        return;
1785
70
      }
1786
70
      let following_element = as_element(following_siblings[0]);
1787
70
      if name(following_element) != "mstyle" {
1788
66
        return;
1789
4
      }
1790
4
      let are_same = mathml.attributes().iter()
1791
4
              .zip( following_element.attributes() )
1792
5
              .
all4
(|(first, second)| first.name()==second.name() && first.value()==second.value());
1793
4
      if are_same {
1794
4
        mathml.append_children(following_element.children());
1795
4
        following_element.remove_from_parent();
1796
4
      
}0
1797
714
    }
1798
1799
40
    fn convert_mfenced_to_mrow(mfenced: Element) -> Element {
1800
      // The '<'/'>' replacements are because WIRIS uses them out instead of the correct chars in its template
1801
40
      let open = mfenced.attribute_value("open").unwrap_or("(").replace('<', "⟨");
1802
40
      let close = mfenced.attribute_value("close").unwrap_or(")").replace('>', "⟩");
1803
      // debug!("open={}, close={}", open, close);
1804
40
      let mut separators= mfenced.attribute_value("separators").unwrap_or(",").chars();
1805
40
      set_mathml_name(mfenced, "mrow");
1806
40
      mfenced.remove_attribute("open");
1807
40
      mfenced.remove_attribute("close");
1808
40
      mfenced.remove_attribute("separators");
1809
40
      let children = mfenced.children();
1810
40
      let mut new_children = Vec::with_capacity(2*children.len() + 1);
1811
40
      if !open.is_empty() {
1812
40
        new_children.push(ChildOfElement::Element( create_mo(mfenced.document(), &open, MFENCED_ATTR_VALUE)) );
1813
40
      
}0
1814
40
      if !children.is_empty() {
1815
40
        new_children.push(children[0]);
1816
40
        for 
child3
in &children[1..] {
1817
3
          let sep = separators.next().unwrap_or(',').to_string();
1818
3
          new_children.push( ChildOfElement::Element( create_mo(mfenced.document(), &sep, MFENCED_ATTR_VALUE)) );
1819
3
          new_children.push(*child);
1820
3
        }
1821
0
      }
1822
40
      if !close.is_empty() {
1823
38
        new_children.push(ChildOfElement::Element( create_mo(mfenced.document(), &close, MFENCED_ATTR_VALUE)) );
1824
38
      
}2
1825
40
      mfenced.replace_children(new_children);
1826
40
      return mfenced;
1827
40
    }
1828
1829
30.4k
    fn is_roman_number_match(text: &str) -> bool {
1830
30.4k
      return UPPER_ROMAN_NUMERAL.is_match(text) || 
LOWER_ROMAN_NUMERAL29.6k
.is_match(text);
1831
30.4k
    }
1832
1833
    /// Return true if 'element' (which is syntactically a roman numeral) is only inside mrows and
1834
    ///  if its length is < 3 chars, then there is another roman numeral near it (separated by an operator).
1835
    /// We want to rule out something like 'm' or 'cm' being a roman numeral.
1836
    /// Note: this function assumes 'mathml' is a Roman Numeral, and optimizes operations based on that.
1837
    /// Note: Nemeth has some rules about roman numerals (capitalization and punctuation after)
1838
3.35k
    fn is_roman_numeral_number_context(mathml: Element) -> bool {
1839
3.35k
      assert!(name(mathml)=="mtext" || 
name(mathml)=="mi"3.32k
);
1840
3.35k
      let mut parent = mathml;
1841
      loop {
1842
5.41k
        parent = get_parent(parent);
1843
5.41k
        let current_name = name(parent);
1844
5.41k
        if current_name == "math" {
1845
1.57k
          break;
1846
3.84k
        } else if current_name == "msup" || 
current_name == "mmultiscripts"3.42k
{
1847
          // could be a oxidation state in a Chemical formula
1848
559
          let children = parent.children();
1849
          // make sure that there is only one script and that 'mathml' is a superscript
1850
559
          if current_name == "mmultiscripts" && (
children.len() > 3139
||
!mathml.following_siblings().is_empty()27
) {
1851
122
            return false;
1852
437
          }
1853
437
          let base = as_element(children[0]);
1854
437
          if is_chemical_element(base) {
1855
21
            break;
1856
          } else {
1857
416
            return false;
1858
          }
1859
3.28k
        } else if current_name != "mrow" {
1860
1.22k
          return false;
1861
2.06k
        }
1862
      }
1863
1864
1.59k
      let text = as_text(mathml).as_bytes(); // note: we know it is all ASCII chars
1865
      // if roman numeral is in superscript and we get here, then it had a chemical element base, so we accept it
1866
      // note: you never has a state = I; if two letters, it must be 'II'.
1867
1.59k
      if text.len() > 2  || 
1868
1.57k
         ((name(parent) =="msup" || 
name(parent) == "mmultiscripts"1.57k
) &&
text.len()==212
&&
text==[b'I',b'I']8
) {
1869
28
        return true;
1870
      } else {
1871
1.56k
        let is_upper_case = text[0].is_ascii_uppercase(); // safe since we know it is a roman numeral
1872
1.56k
        let preceding = mathml.preceding_siblings();
1873
1.56k
        let following = mathml.following_siblings();
1874
1.56k
        if preceding.is_empty() && 
following356
.
is_empty356
() {
1875
81
          return false;   // no context and too short to confirm it is a roman numeral
1876
1.48k
        }
1877
1.48k
        if preceding.is_empty() {
1878
275
          return is_roman_numeral_adjacent(following.iter(), is_upper_case);
1879
1.21k
        }
1880
1.21k
        if following.is_empty() {
1881
399
          return is_roman_numeral_adjacent(preceding.iter().rev(), is_upper_case);
1882
813
        }
1883
813
        return is_roman_numeral_adjacent(preceding.iter().rev(), is_upper_case) &&
1884
3
             is_roman_numeral_adjacent(following.iter(), is_upper_case);
1885
      }
1886
1887
      /// make sure all the non-mo leaf siblings are roman numerals
1888
      /// 'mo' should only be '+', '-', '=', ',', '.'  -- unlikely someone is doing anything sophisticated
1889
1.49k
      fn is_roman_numeral_adjacent<'a, I>(siblings: I, must_be_upper_case: bool) -> bool
1890
1.49k
          where I: Iterator<Item = &'a ChildOfElement<'a>> {    
1891
        static ROMAN_NUMERAL_OPERATORS: phf::Set<&str> = phf_set! {
1892
          "+", "-'", "=", "<", "≤", ">", "≥", 
1893
          // ",", ".",   // [c,d] triggers this if "," is present, so omitting it
1894
        };
1895
1.49k
        let mut found_match = false;       // guard against no siblings
1896
1.49k
        let mut last_was_roman_numeral = true; // started at roman numeral
1897
        // debug!("start is_roman_numeral_adjacent");
1898
1.74k
        for child in 
siblings1.49k
{
1899
1.74k
          let maybe_roman_numeral = as_element(*child);
1900
          // debug!("maybe_roman_numeral: {}", mml_to_string(maybe_roman_numeral));
1901
1.74k
          match name(maybe_roman_numeral) {
1902
1.74k
            "mo" => {
1903
858
              if !last_was_roman_numeral {
1904
18
                return false;
1905
840
              }
1906
840
              let text = as_text(maybe_roman_numeral);
1907
840
              if !ROMAN_NUMERAL_OPERATORS.contains(text) {
1908
660
                return false;
1909
180
              }
1910
180
              last_was_roman_numeral = false;
1911
            },
1912
889
            "mi" | 
"mn"585
=> {
1913
562
              if last_was_roman_numeral {
1914
429
                return false;   // no implicit multiplication (or whatever)
1915
133
              }
1916
133
              let text = as_text(maybe_roman_numeral);
1917
133
              if !(( must_be_upper_case && 
UPPER_ROMAN_NUMERAL18
.is_match(text)) ||
1918
117
                 (!must_be_upper_case && 
LOWER_ROMAN_NUMERAL115
.is_match(text)) ) {
1919
109
                return false;
1920
24
              };
1921
24
              found_match = true;
1922
24
              last_was_roman_numeral = true;
1923
            },
1924
327
            "mtext" | 
"mspace"252
|
"mphantom"252
=>
{}75
,
1925
            _ => {
1926
252
              return false;
1927
            }
1928
          }
1929
        }
1930
22
        return found_match;
1931
1.49k
      }
1932
3.35k
    }
1933
1934
    /// Merge adjacent mtext by increasing the width of the first mtext
1935
    /// The resulting merged whitespace is put on the previous child, or if there is one, on the following child
1936
    /// 
1937
    /// Note: this should be called *after* the mo/mtext cleanup (i.e., after the MathML child cleanup loop).
1938
10.1k
    fn merge_whitespace(children: &mut Vec<ChildOfElement>) {
1939
10.1k
      if children.is_empty() {
1940
3
        return;
1941
10.1k
      }
1942
1943
10.1k
      let mut i = 0;
1944
10.1k
      let mut previous_mtext_with_width: Option<Element<'_>> = None;  // prefer to spacing on previous mtext
1945
10.1k
      let mut whitespace: Option<f64> = None;
1946
42.0k
      while i < children.len() {
1947
31.8k
        let child = as_element(children[i]);
1948
31.8k
        let is_child_whitespace = name(child) == "mtext" && 
as_text(child) == "\u{00A0}"555
;
1949
        // debug!("merge_whitespace: i={}, whitespace={:?}, mtext set={} {}",
1950
        //    i, whitespace, previous_mtext_with_width.is_some(), mml_to_string(child));
1951
31.8k
        if is_child_whitespace {
1952
          // update the running total of whitespace
1953
340
          let child_width = child.attribute_value("data-width").unwrap_or("0")
1954
340
                                          .parse::<f64>().unwrap_or(0.0) ;
1955
340
          whitespace = match whitespace {
1956
327
            None => Some(child_width),
1957
13
            Some(w) => Some(w + child_width),
1958
          };
1959
340
          if children.len() == 1 {
1960
15
            i += 1;             // don't remove only child
1961
325
          } else {
1962
325
            children.remove(i);   // remove the current child (don't inc 'i')
1963
325
          }
1964
31.5k
        } else if let Some(
ws305
) = whitespace {
1965
          // done with sequence of whitespaces
1966
305
          if let Some(
prev_mtext13
) = previous_mtext_with_width {
1967
13
            // prefer to set on previous mtext
1968
13
            prev_mtext.set_attribute_value("data-following-space-width", (ws).to_string().as_str());
1969
13
            previous_mtext_with_width = None;
1970
13
          } else {
1971
            // if the space is significant, set it on the current child
1972
292
            child.set_attribute_value("data-previous-space-width", ws.to_string().as_str());
1973
292
            if name(child) == "mtext" {
1974
18
              previous_mtext_with_width = Some(child);
1975
274
            }
1976
          }
1977
305
          whitespace = None;
1978
305
          i += 1;
1979
31.2k
        } else {
1980
31.2k
          i += 1;
1981
31.2k
          previous_mtext_with_width = None;
1982
31.2k
        }
1983
      }
1984
      // debug!("  after loop: whitespace={:?}, {}", whitespace, mml_to_string(as_element(children[children.len()-1])));
1985
10.1k
      if let Some(
mut ws22
) = whitespace {
1986
        // last child in mrow is white space -- mark with space *after*
1987
22
        if children.len() == 1 {
1988
          // only child -- check to see if we need to set the space-width
1989
21
          let child = as_element(children[0]);
1990
21
          let child_width = child.attribute_value("data-width").unwrap_or("0").parse::<f64>().unwrap_or(0.0);
1991
21
          if (child_width - ws).abs() > 0.001 {
1992
9
            ws += child_width;
1993
9
            child.set_attribute_value("data-following-space-width", ws.to_string().as_str());
1994
12
          }
1995
1
        } else {
1996
1
          let non_space_child = as_element(children[children.len()-1]);
1997
1
          non_space_child.set_attribute_value("data-following-space-width", ws.to_string().as_str());
1998
1
        }
1999
10.1k
      }
2000
10.1k
    }
2001
2002
    /// look for potential numbers by looking for sequences with commas, spaces, and decimal points
2003
10.1k
    fn merge_number_blocks(context: &CanonicalizeContext, parent_mrow: Element, children: &mut Vec<ChildOfElement>) {
2004
      // debug!("parent:\n{}", mml_to_string(parent_mrow));
2005
      // If we find a comma that is not part of a number, don't form a number
2006
      //   (see https://github.com/NSoiffer/MathCAT/issues/271)
2007
      // Unfortunately, we can't do this in the loop below because we might discover the "not part of a number" after a number has been formed
2008
10.1k
      let do_not_merge_comma = is_comma_not_part_of_a_number(children);
2009
10.1k
      let mut i = 0;
2010
38.2k
      while i < children.len() {    // length might change after a merge
2011
        // {
2012
        //  debug!("merge_number_blocks: top of loop");
2013
        //  for (i_child, &child) in children[i..].iter().enumerate() {
2014
        //    let child = as_element(child);
2015
        //    debug!("child #{}: {}", i+i_child, mml_to_string(child));
2016
        //  }
2017
        // }
2018
28.0k
        let child = as_element(children[i]);
2019
28.0k
        let child_name = name(child);
2020
2021
        // numbers start with an mn or a decimal separator
2022
28.0k
        if child_name == "mn" || 
child_name=="mtext"22.4k
{
2023
6.09k
          let leaf_child_text = as_text(child);
2024
          // if Roman numeral, don't merge (move on)
2025
          // or if the 'mn' has ',', '.', or space, consider it correctly parsed and move on
2026
6.09k
          if is_roman_number_match(leaf_child_text) ||
2027
5.75k
            context.patterns.block_separator.is_match(leaf_child_text) ||
2028
5.64k
            (leaf_child_text.len() > 1 && 
context.patterns.decimal_separator710
.
is_match710
(
leaf_child_text710
)) {
2029
559
            i += 1;
2030
559
            continue;
2031
5.53k
          }
2032
21.9k
        } else if child_name != "mo" ||
2033
9.20k
              (do_not_merge_comma && 
as_text(child) == ","3.08k
) ||
2034
6.44k
              !context.patterns.decimal_separator.is_match(as_text(child)) {
2035
21.9k
          i += 1;
2036
21.9k
          continue;
2037
31
        }
2038
          
2039
        // potential start of a number
2040
5.56k
        let mut end = i + 1;
2041
5.56k
        let mut has_decimal_separator = false;
2042
5.56k
        let mut not_a_number = false;
2043
5.56k
        if i < children.len() {
2044
          // look at the right siblings and pull in the longest sequence of number/separators -- then check it for validity
2045
5.56k
          for 
sibling4.00k
in children[i+1..].iter() {
2046
4.00k
            let sibling = as_element(*sibling);
2047
4.00k
            let sibling_name = name(sibling);
2048
4.00k
            if sibling_name == "mn" {
2049
245
              let leaf_text = as_text(sibling);
2050
245
              let is_block_separator = context.patterns.block_separator.is_match(leaf_text);
2051
245
              let is_decimal_separator = context.patterns.decimal_separator.is_match(leaf_text);
2052
245
              if is_roman_number_match(leaf_text) || is_block_separator || is_decimal_separator {
2053
                // consider this mn correctly parsed
2054
1
                break;
2055
244
              }
2056
3.75k
            } else if sibling_name=="mo" || 
sibling_name=="mtext"2.59k
{
2057
1.33k
              let leaf_text = as_text(sibling);
2058
1.33k
              let is_block_separator = context.patterns.block_separator.is_match(leaf_text);
2059
1.33k
              let is_decimal_separator = context.patterns.decimal_separator.is_match(leaf_text);
2060
1.33k
              if (leaf_text == "," && 
do_not_merge_comma315
) ||
2061
1.14k
                 !(is_block_separator || 
is_decimal_separator954
) ||
2062
261
                 (is_decimal_separator && 
has_decimal_separator75
) {
2063
                // not a separator or (it is decimal separator and we've already seen a decimal separator)
2064
1.09k
                not_a_number = is_decimal_separator && 
has_decimal_separator127
; // e.g., 1.2.3 or 1,2,3
2065
1.09k
                break;
2066
244
              }
2067
244
              has_decimal_separator |= is_decimal_separator;
2068
            } else {
2069
              // not mn, mo, or mtext -- end of a number
2070
2.41k
              break;
2071
            }
2072
488
            end += 1;     // increment at end so we can tell the difference between a 'break' and end of loop
2073
          }
2074
0
        }
2075
5.56k
        if not_a_number {
2076
17
          i = end + 1;
2077
17
          continue; // continue looking in the rest of the mrow
2078
5.55k
        }
2079
5.55k
        if ignore_final_punctuation(context, parent_mrow, &children[i..end]) {
2080
18
          end -= 1;
2081
5.53k
        };
2082
        // debug!("start={}, end={}", i, end);
2083
        // no need to merge if only one child (also avoids "." being considered a number)
2084
5.55k
        if end > i + 1 && 
is_likely_a_number275
(
context275
,
parent_mrow275
,
&275
children275
[i..end]) {
2085
107
          (i, end) = trim_whitespace(children, i, end);
2086
107
          merge_block(children, i, end);
2087
107
          // note: start..end has been collapsed, so restart after the collapsed part
2088
5.44k
        } else {
2089
5.44k
          i = end;  // start looking at the end of the block we just rejected
2090
5.44k
        }
2091
5.55k
        i += 1;
2092
      }
2093
10.1k
    }
2094
2095
    /// Return true if we find a comma that doesn't have an <mn> on both sides
2096
10.1k
    fn is_comma_not_part_of_a_number(children: &[ChildOfElement])-> bool {
2097
10.1k
      let n_children = children.len();
2098
10.1k
      if n_children == 0 {
2099
3
        return false;
2100
10.1k
      }
2101
10.1k
      let mut previous_child = as_element(children[0]);
2102
14.5k
      for i in 
1..n_children10.1k
{
2103
14.5k
        let child = as_element(children[i]);
2104
14.5k
        if name(child) == "mo" && 
as_text(child) == ","6.27k
&&
i+1 < n_children980
&&
2105
972
           (name(previous_child) != "mn" || 
name208
(as_element(children[i+1])) != "mn") {
2106
809
          return true;
2107
13.7k
        }
2108
13.7k
        previous_child = child;
2109
      }
2110
9.37k
      return false;
2111
10.1k
    }
2112
2113
    /// If we have something like 'shape' ABC, we split the ABC and add IMPLIED_SEPARATOR_HIGH_PRIORITY between them
2114
    /// under some specific conditions (trying to be a little cautious).
2115
    /// The returned (mrow) element reuses the arg so tree siblings links remain correct.
2116
11.8k
    fn split_points(leaf: Element) -> Option<Element> {
2117
3
      static IS_UPPERCASE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^[A-Z]+$").unwrap());
2118
2119
11.8k
      if !IS_UPPERCASE.is_match(as_text(leaf)) {
2120
9.88k
        return None;
2121
1.96k
      }
2122
2123
      // check to see if there is a bar, arrow, etc over the letters (line-segment, arc, ...)
2124
1.96k
      let parent = get_parent(leaf);
2125
1.96k
      if name(parent) == "mover" {
2126
        // look for likely overscripts (basically just rule out some definite 'no's)
2127
29
        let over = as_element(parent.children()[1]);
2128
29
        if is_leaf(over) {
2129
29
          let mut over_chars = as_text(over).chars();
2130
29
          let first_char = over_chars.next();
2131
29
          if first_char.is_some() && over_chars.next().is_none() && !first_char.unwrap().is_alphanumeric(){
2132
            // only one char and it isn't alphanumeric
2133
29
            return Some( split_element(leaf) );
2134
0
          }
2135
0
        }
2136
1.93k
      }
2137
  
2138
      // check to see if it is preceded by a geometric shape (e.g, ∠ABC)
2139
1.93k
      let preceding_siblings = leaf.preceding_siblings();
2140
1.93k
      if !preceding_siblings.is_empty() {
2141
1.11k
        let preceding_sibling = as_element(preceding_siblings[preceding_siblings.len()-1]);
2142
1.11k
        let preceding_sibling_name = name(preceding_sibling);
2143
1.11k
        if preceding_sibling_name == "mi" || 
preceding_sibling_name == "mo"886
||
preceding_sibling_name == "mtext"439
{
2144
711
          let preceding_text = as_text(preceding_sibling);
2145
711
          return crate::definitions::SPEECH_DEFINITIONS.with(|definitions| {
2146
711
            let defs = definitions.borrow();
2147
711
            let prefix_ops = defs.get_hashset("GeometryPrefixOperators").unwrap();
2148
711
            let shapes = defs.get_hashset("GeometryShapes").unwrap();
2149
711
            if prefix_ops.contains(preceding_text) || 
shapes708
.contains(preceding_text) {
2150
              // split leaf
2151
9
              return Some( split_element(leaf) ); // always treated as function names
2152
            } else {
2153
702
              return None;
2154
            }
2155
711
          })
2156
407
        }
2157
817
      }
2158
1.22k
      return None;
2159
2160
38
      fn split_element(leaf: Element) -> Element {
2161
38
        let mut children = Vec::with_capacity(leaf.children().len());
2162
51
        for ch in 
as_text(leaf)38
.
chars38
() {
2163
51
          let new_leaf = create_mathml_element(&leaf.document(), "mi");
2164
51
          new_leaf.set_text(&ch.to_string());
2165
51
          children.push(new_leaf);
2166
51
        }
2167
38
        set_mathml_name(leaf, "mrow");
2168
38
        leaf.replace_children(children);
2169
38
        return leaf;
2170
38
      }
2171
11.8k
    }
2172
2173
    /// If we have something like 'V e l o c i t y', merge that into a single <mi>
2174
    /// We only do this for sequences of at least three chars, and also exclude things like consecutive letter (e.g., 'x y z')
2175
    /// The returned (mi) element reuses 'mi'
2176
11.4k
    fn merge_mi_sequence(mi: Element) -> Option<Element> {
2177
      // The best solution would be to use a dictionary of words, or maybe restricted to words in a formula,
2178
      //   but that would likely miss the words used in slope=run/rise.
2179
      // It would also be really expensive since we would need a dictionary for each language.
2180
      // We shouldn't need to worry about trig names like "cos", but people sometimes forget to use "\cos"
2181
      // Hence, we check against the "FunctionNames" that get read on startup.
2182
70
      fn is_vowel(ch: char) -> bool {
2183
70
        
matches!58
(ch,
2184
          'a' | 'e' | 'i' | 'o' | 'u' | 'y' |
2185
          'à' | 'á' | 'â' | 'ã' | 'ä' | 'è' | 'é' | 'ê' | 'ë' | 'ì' | 'í' | 'î' | 'ï' |
2186
          'ò' | 'ó' | 'ô' | 'õ' | 'ö' | 'ú' | 'Ù' | 'û' | 'ü' | 'ý' | 'ÿ' |
2187
          'ả' | 'ạ' | 'ă' | 'ằ' | 'ẳ' | 'ẵ' | 'ắ' | 'ặ' | 'ầ' | 'ẩ' | 'ẫ' | 'ấ' | 'ậ' | 'ẻ' | 'ẽ' | 'ẹ' | 'ề' | 'ể' | 'ễ' | 'ế' | 'ệ' |
2188
          'ỉ' | 'ĩ' | 'ị' | 'ỏ' | 'ọ' | 'ồ' | 'ổ' | 'ỗ' | 'ố' | 'ộ' | 'ơ' | 'ờ' | 'ở' | 'ỡ' | 'ớ' | 'ợ' |
2189
          'ủ' | 'ũ' | 'ụ' | 'ư' | 'ừ' | 'ử' | 'ữ' | 'ứ' | 'ự' | 'ỳ' | 'ỷ' | 'ỹ' | 'ỵ'
2190
        )
2191
70
      }
2192
11.4k
      let parent = get_parent(mi);  // not canonicalized into mrows, so parent could be "math"
2193
11.4k
      let parent_name = name(parent);
2194
      // don't merge if more than one char, or if not in an mrow (or implied on since we haven't normalized yet)
2195
11.4k
      if as_text(mi).chars().nth(1).is_some() || !(
parent_name == "mrow"8.87k
||
parent_name == "math"5.62k
) {
2196
5.16k
        return None;
2197
6.25k
      }
2198
6.25k
      let mut text =  as_text(mi).to_string();
2199
6.25k
      let text_script = Script::from(text.chars().next().unwrap_or('a'));
2200
6.25k
      let following_siblings = mi.following_siblings();
2201
6.25k
      let mut last_char_is_scripted = None;
2202
6.25k
      let mut following_mi_siblings: Vec<Element> = following_siblings.iter()
2203
6.25k
            .map_while(|&child| 
{4.15k
2204
4.15k
              let mut child = as_element(child);
2205
4.15k
              let mut is_ok = false;
2206
4.15k
              if name(child) == "msub" || 
name(child) == "msup"4.02k
{
2207
                // check if the *last* char in the sequence is scripted
2208
                // if so, we need to stop here anyway and deal with it specially
2209
163
                last_char_is_scripted = Some(child);   // need to remember the value -- cleared later if not ok
2210
163
                child = as_element(child.children()[0]);
2211
233
                while name(child) == "mrow" && 
child.children().len() == 171
{
2212
70
                  // the base may be wrapped with mrows
2213
70
                  child = as_element(child.children()[0]);
2214
70
                }
2215
3.99k
              }
2216
4.15k
              if name(child) == "mi" {
2217
402
                let mut child_text = as_text(child).chars();
2218
402
                let first_char = child_text.next().unwrap_or('a');
2219
402
                if child_text.next().is_none() && 
Script::from(first_char) == text_script376
{
2220
365
                  text.push(first_char);
2221
365
                  is_ok = true;
2222
365
                
}37
2223
3.75k
              }
2224
4.15k
              if last_char_is_scripted.is_some() {
2225
163
                if is_ok {
2226
114
                  is_ok = false;    // don't want to continue
2227
114
                } else {
2228
49
                  last_char_is_scripted = None; // reset to None
2229
49
                }
2230
3.99k
              }
2231
4.15k
              if is_ok {
Some(child)251
} else {
None3.90k
}
2232
4.15k
            })
2233
6.25k
            .collect();
2234
6.25k
      if following_mi_siblings.is_empty() {
2235
6.03k
        return None;
2236
224
      }
2237
    
2238
224
      if let Some(
last14
) = last_char_is_scripted {
2239
14
        // add the last char to the run
2240
14
        following_mi_siblings.push(last);
2241
210
      }
2242
      // debug!("merge_mi_sequence: text={}", &text);
2243
224
      if let Some(
answer11
) = crate::definitions::SPEECH_DEFINITIONS.with(|definitions| {
2244
224
        let definitions = definitions.borrow();
2245
224
        let function_names = definitions.get_hashset("FunctionNames").unwrap();
2246
        // UEB seems to think "Sin" (etc) is used for "sin", so we move to lower case
2247
        // function name might be (wrongly) set to italic math alphanumeric chars, including bold italic
2248
224
        if let Some(
ascii_text221
) = CanonicalizeContext::math_alphanumeric_to_ascii(&text)
2249
221
          && function_names.contains(&ascii_text.to_lowercase()) {
2250
10
            return Some(merge_from_text(mi, &ascii_text, &following_mi_siblings));
2251
214
          }
2252
214
        if function_names.contains(&text) {
2253
0
          return Some(merge_from_text(mi, &text, &following_mi_siblings));
2254
214
        }
2255
        // unlike "FunctionNames", "KnownWords" might not exist
2256
214
        if let Some(
word_map131
) = definitions.get_hashset("KnownWords")
2257
131
          && word_map.contains(&text) {
2258
1
            return Some(merge_from_text(mi, &text, &following_mi_siblings));
2259
213
          }
2260
213
        return None;
2261
224
      }) {
2262
11
        return answer;
2263
213
      }
2264
2265
      // don't be too aggressive combining mi's when they are short
2266
213
      if text.chars().count() < 3 {
2267
186
        return None;
2268
27
      }
2269
      // If it is a word, it needs a vowel and it must be a letter
2270
      // FIX: this check needs to be internationalized to include accented vowels, other alphabets
2271
70
      if !
text.chars()27
.
any27
(|ch| is_vowel(ch) ||
!ch.is_ascii_alphabetic()58
) {
2272
15
        return None;
2273
12
      }
2274
    
2275
      // now for some heuristics to rule out a sequence of variables
2276
      // rule out sequences like 'abc' and also 'axy' that are in alphabetical order
2277
12
      let mut chars = text.chars();
2278
12
      let mut left = chars.next().unwrap();   // at least 3 chars
2279
12
      let mut is_in_alphabetical_order = true;
2280
23
      for ch in 
chars12
{
2281
23
        if (left as u32) >= (ch as u32) {
2282
3
          is_in_alphabetical_order = false;
2283
3
          break;                 // can't be 'abc', 'axy', etc
2284
20
        }
2285
20
        left = ch;
2286
      }
2287
12
      if is_in_alphabetical_order || 
text.len() < 43
{
2288
        // If it is in alphabetical order, it's not likely a word
2289
12
        return None;
2290
0
      }
2291
2292
      // FIX: should add more heuristics to rule out words
2293
0
      return merge_from_text(mi, &text, &following_mi_siblings);
2294
2295
11
      fn merge_from_text<'a>(mi: Element<'a>, text: &str, following_siblings: &[Element<'a>]) -> Option<Element<'a>> {
2296
        // remove trailing mi's
2297
11
        let i_last_child = following_siblings.len()-1;
2298
11
        let last_child = following_siblings[i_last_child];
2299
11
        if name(last_child) == "mi" {
2300
10
          
following_siblings5
.
iter5
().
for_each5
(|sibling| sibling.remove_from_parent());
2301
5
          mi.set_text(text);
2302
5
          return Some(mi);
2303
        } else {
2304
          // replace the base of the scripted element (the last child) with the run (e.g. 's i n^2' -> {sin}^2)
2305
6
          mi.remove_from_parent();
2306
6
          following_siblings[..i_last_child].iter().for_each(|sibling| sibling.remove_from_parent());
2307
6
          let mut base = as_element(last_child.children()[0]);
2308
9
          while name(base) == "mrow" && 
base.children().len() == 13
{
2309
3
            // the base may be wrapped with mrows
2310
3
            base = as_element(base.children()[0]);
2311
3
            base.remove_attribute(SPLIT_TOKEN);
2312
3
          }
2313
6
          base.set_text(text);
2314
6
          return Some(last_child);
2315
        }
2316
11
      }
2317
11.4k
    }
2318
2319
    // Check if start..end is a number
2320
275
    fn is_likely_a_number(context: &CanonicalizeContext, mrow: Element, children: &[ChildOfElement]) -> bool {
2321
      // Note: the children of math_or_mrow aren't valid ('children' represents the current state)
2322
275
      let end = children.len();
2323
      // {
2324
      //  let n_preceding_siblings = as_element(children[0]).preceding_siblings().len();
2325
      //  debug!("is_likely_a_number: start/end={}/{}", n_preceding_siblings, n_preceding_siblings+end);
2326
      //  for (i, &child) in children.iter().enumerate() {
2327
      //    let child = as_element(child);
2328
      //    debug!("child# {}: {}", n_preceding_siblings+i, mml_to_string(child));
2329
      //  }
2330
      //  debug!("\n");
2331
      // }
2332
2333
      // gather up the text of the children (all mn, mo, or mtext)
2334
275
      let mut previous_name_was_mn = false;
2335
275
      let mut text = "".to_string();
2336
727
      for &child in 
children275
{
2337
727
        let child = as_element(child);
2338
727
        let child_name = name(child);
2339
727
        if previous_name_was_mn && 
child_name == "mn"303
{
2340
94
          text.push('\u{FFFF}');      // FIX: this should come from the separator string
2341
633
        }
2342
727
        text.push_str(as_text(child));
2343
727
        previous_name_was_mn = child_name == "mn";
2344
      }
2345
2346
275
      let text = text.trim(); // could be space got merged into an mn (e.g., braille::UEB::iceb::expr_3_1_6)
2347
      // debug!("  text='{}', decimal num={}, 3 digit match={}, 3-5 match={}, 1 digit={}", &text,
2348
      //    context.patterns.digit_only_decimal_number.is_match(text),
2349
      //    context.patterns.block_3digit_pattern.is_match(text),
2350
      //    context.patterns.block_3_5digit_pattern.is_match(text),
2351
      //    context.patterns.block_1digit_pattern.is_match(text));
2352
275
      if !(context.patterns.digit_only_decimal_number.is_match(text) ||
2353
190
         context.patterns.block_3digit_pattern.is_match(text) ||
2354
167
         context.patterns.block_3_5digit_pattern.is_match(text) ||
2355
166
         context.patterns.block_4digit_hex_pattern.is_match(text) ||
2356
162
         ( (text.chars().count() > 5 || 
context.patterns.decimal_separator139
.
is_match139
(
text139
)) &&
2357
25
           context.patterns.block_1digit_pattern.is_match(text) )
2358
        ) {
2359
161
          return false;
2360
114
      }
2361
2362
      // ??? might want to rule out "sequences" like '100, 200, 300' and '100, 103, 106' (if constant difference, then a sequence)
2363
2364
      // If surrounded by fences, and commas are used, leave as is (e.g, "{1,234}")
2365
114
      if !text.contains(',') {
2366
84
        return true;   // not comma separated
2367
30
      }
2368
2369
      // We have already checked for whitespace as separators, so it must be a comma. Just check the fences.
2370
      // This is not yet in canonical form, so the fences may be siblings or siblings of the parent 
2371
30
      let preceding_siblings = as_element(children[0]).preceding_siblings();
2372
30
      let following_siblings = as_element(children[end-1]).following_siblings();
2373
      let first_child;
2374
      let last_child;
2375
30
      if preceding_siblings.is_empty() && 
following_siblings19
.
is_empty19
() {
2376
        // number spans all children, look to parent for fences
2377
14
        let preceding_children = mrow.preceding_siblings();
2378
14
        let following_children = mrow.following_siblings();
2379
14
        if preceding_children.is_empty() || 
following_children9
.
is_empty9
() {
2380
9
          return true; // doesn't have left or right fence
2381
5
        }
2382
5
        first_child = preceding_children[preceding_children.len()-1];
2383
5
        last_child = following_children[0];
2384
16
      } else if preceding_siblings.is_empty() || 
following_siblings11
.
is_empty11
() {
2385
13
        return true; // can't be fences around it
2386
3
      } else {
2387
3
        first_child = preceding_siblings[preceding_siblings.len()-1];
2388
3
        last_child = following_siblings[0];
2389
3
      }
2390
8
      let first_child = as_element(first_child);
2391
8
      let last_child = as_element(last_child);
2392
8
      return !(name(first_child) == "mo" && is_fence(first_child) &&
2393
7
             name(last_child) == "mo" && is_fence(last_child) );
2394
275
    }
2395
2396
    // fn count_decimal_pts(context: &CanonicalizeContext, children: &[ChildOfElement], start: usize, end: usize) -> usize {
2397
    //  let mut n_decimal_pt = 0;
2398
    //  for &child_as_element in children.iter().take(end).skip(start) {
2399
    //    let child = as_element(child_as_element);
2400
    //    if context.patterns.decimal_separator.is_match(as_text(child))  {
2401
    //      n_decimal_pt += 1;
2402
    //    }
2403
    //  }
2404
    //  return n_decimal_pt;
2405
    // }
2406
2407
    /// This is a special case heuristic so try and determine if a terminating punctuation should be a decimal separator
2408
    /// Often math expressions end with punctuations for typographic reasons, so we try to figure that out here.
2409
    /// 'children' is a subset of 'mrow'
2410
5.55k
    fn ignore_final_punctuation(context: &CanonicalizeContext, mrow: Element, children: &[ChildOfElement]) -> bool {
2411
5.55k
      let last_child = children[children.len()-1];
2412
5.55k
      if mrow.children()[mrow.children().len()-1] != last_child {
2413
3.49k
        return false;   // not at end
2414
2.05k
      }
2415
2.05k
      let parent = mrow.parent().unwrap().element();
2416
2.05k
      if let Some(
math1.71k
) = parent
2417
1.71k
        && name(math) != "math" {
2418
1.58k
          return false;     // mrow inside something else -- not at end
2419
471
        }
2420
2421
471
      let last_child = as_element(last_child);
2422
      // debug!("ignore_final_punctuation: last child={}", mml_to_string(last_child));
2423
471
      if name(last_child) != "mo" {
2424
451
        return false; // last was not "mo", so can't be a period
2425
20
      }
2426
2427
20
      if !context.patterns.decimal_separator.is_match(as_text(last_child)) {
2428
0
        return false;
2429
20
      }
2430
2431
      // debug!("ignore_final_punctuation: #preceding={}", as_element(children[0]).preceding_siblings().len());
2432
      // look to preceding siblings and see if an of the mn's have a decimal separator
2433
20
      return !as_element(children[0]).preceding_siblings().iter()
2434
101
          .
any20
(|&child| {
2435
101
            let child = as_element(child);
2436
101
            name(child) == "mn" && 
context.patterns.decimal_separator14
.
is_match14
(
as_text(child)14
)
2437
101
          });
2438
5.55k
    }
2439
2440
    /// Trim off any children that are whitespace on either side
2441
107
    fn trim_whitespace(children: &mut [ChildOfElement], start: usize, end: usize) -> (usize, usize) {
2442
107
      let mut real_start = start;
2443
      #[allow(clippy::needless_range_loop)]  // I don't like enumerate/take/skip here
2444
107
      for i in start..end {
2445
107
        let child = as_element(children[i]);
2446
107
        if !as_text(child).trim().is_empty() {
2447
107
          real_start = i;
2448
107
          break;
2449
0
        }
2450
      }
2451
2452
107
      let mut real_end = end;
2453
157
      for i in (
start..end107
).
rev107
() {
2454
157
        let child = as_element(children[i]);
2455
157
        if !as_text(child).trim().is_empty() {
2456
107
          real_end = i+1;
2457
107
          break;
2458
50
        }
2459
      }
2460
107
      return (real_start, real_end);
2461
107
    }
2462
2463
    /// Merge the number block from start..end
2464
107
    fn merge_block(children: &mut Vec<ChildOfElement>, start: usize, end: usize) {
2465
2466
      // debug!("merge_block: merging {}..{}", start, end);
2467
107
      let mut mn_text = String::with_capacity(4*(end-start)-1);    // true size less than #3 digit blocks + separator
2468
237
      for &child_as_element in 
children.iter()107
.
take107
(
end107
).
skip107
(
start107
) {
2469
237
        let child = as_element(child_as_element);
2470
237
        mn_text.push_str(as_text(child));
2471
237
      }
2472
107
      let child = as_element(children[start]);
2473
107
      set_mathml_name(child, "mn");
2474
107
      child.set_text(&mn_text);
2475
2476
107
      children.drain(start+1..end);
2477
107
    }
2478
2479
    
2480
    /// merge  ° C or  ° F into a single <mi> with the text '℃' or '℉' -- prevents '°' from becoming a superscript
2481
    #[allow(non_snake_case)]
2482
5.90k
    fn merge_degrees_C_F<'a>(mrow: Element<'a>) -> Element<'a> {
2483
5.90k
      let mut degree_child = None;
2484
28.1k
      for child in 
mrow5.90k
.
children5.90k
() {
2485
28.1k
        let child = as_element(child);
2486
28.1k
        if is_leaf(child) {
2487
23.9k
          match as_text(child) {
2488
23.9k
            "°" => {
2489
34
              degree_child = Some(child);
2490
34
            },
2491
23.9k
            "°C" => {
2492
12
              child.set_text("℃");
2493
12
              degree_child = None;
2494
12
            },
2495
23.8k
            "°F" => {
2496
0
              child.set_text("℉");
2497
0
              degree_child = None;
2498
0
            },
2499
23.8k
            text  => {
2500
23.8k
              if let Some(
degree_child23
) = degree_child
2501
23
                && (text == "C" || 
text == "F"22
) {
2502
                  // merge the degree child with the current child
2503
3
                  degree_child.set_text(if text == "C" { 
"℃"1
} else {
"℉"2
});
2504
3
                  child.remove_from_parent();
2505
23.8k
                }
2506
                // merge the degree child with the current child
2507
23.8k
              degree_child = None; 
2508
            },
2509
          }
2510
4.25k
        }
2511
      }
2512
5.90k
      return mrow;
2513
5.90k
    }
2514
2515
2516
    /// merge consecutive leaves containing any of the 'chars' into the first leaf -- probably used for omission with('_')
2517
5.90k
    fn merge_chars<'a>(mrow: Element<'a>, pattern: &Regex) -> Element<'a> {
2518
5.90k
      let mut first_child = None;
2519
5.90k
      let mut new_text = "".to_string();
2520
28.1k
      for child in 
mrow5.90k
.
children5.90k
() {
2521
28.1k
        let child = as_element(child);
2522
28.1k
        if is_leaf(child) {
2523
23.9k
          let text = as_text(child);
2524
23.9k
          if pattern.is_match(text) {
2525
134
            if new_text.is_empty() {
2526
118
              // potential start of a string
2527
118
              first_child = Some(child);
2528
118
              new_text = as_text(child).to_string();
2529
118
            } else {
2530
16
              // merge chars
2531
16
              new_text.push_str(text);
2532
16
              child.remove_from_parent();
2533
16
            }
2534
23.8k
          } else if new_text.len() > 1 {
2535
99
            // end of a run
2536
99
            first_child.unwrap().set_text(&new_text);
2537
99
            new_text.clear();
2538
23.7k
          } else {
2539
23.7k
            new_text.clear(); // just one entry -- no need to set the text
2540
23.7k
          }
2541
4.25k
        } else if new_text.len() > 1{
2542
7
          // end of a run
2543
7
          first_child.unwrap().set_text(&new_text);
2544
7
          new_text.clear();
2545
4.24k
        } else {
2546
4.24k
          new_text.clear();     // just one entry -- no need to set the text
2547
4.24k
        }
2548
      }
2549
5.90k
      if new_text.len() > 1{
2550
9
        // end of a run
2551
9
        first_child.unwrap().set_text(&new_text);
2552
5.89k
      }
2553
5.90k
      return mrow;
2554
5.90k
    }
2555
2556
    /// curl and divergence are handled as two character operators
2557
    /// if found, merge them into their own (new) mrow that has an intent on it
2558
    /// we can have '∇' or '𝛁', or those as vectors (inside an mover)
2559
10.1k
    fn merge_cross_or_dot_product_elements(children: &mut Vec<ChildOfElement>) {
2560
10.1k
      if children.is_empty() {
2561
3
        return;
2562
10.1k
      }
2563
10.1k
      let mut i = 0;
2564
10.1k
      let mut is_previous_nabla = false;
2565
31.5k
      while i < children.len() - 1 {
2566
21.3k
        let child = as_element(children[i]);
2567
21.3k
        if is_previous_nabla {
2568
14
          if is_leaf(child) {
2569
14
            let text = as_text(child);
2570
14
            if text == "⋅" || 
text == "·"13
||
text == "×"9
{
2571
12
              let nabla_child = as_element(children[i-1]);
2572
12
              let nabla_text = as_text( get_possible_embellished_node(nabla_child) );
2573
12
              let new_mrow = create_mathml_element(&child.document(), "mrow");
2574
12
              new_mrow.set_attribute_value(ACT_AS_OPERATOR, nabla_text);
2575
12
              new_mrow.append_child(nabla_child);
2576
12
              new_mrow.append_child(child);
2577
12
              children[i-1] = ChildOfElement::Element(new_mrow);
2578
12
              children.remove(i);
2579
12
            
}2
2580
0
          }
2581
14
          is_previous_nabla = false;
2582
        } else {
2583
21.3k
          let potential_nabla = if name(child) == "mover" {
as_element136
(
child.children()[0]136
)} else {
child21.1k
};
2584
21.3k
          if is_leaf(potential_nabla) {
2585
19.0k
            let text = as_text(potential_nabla);
2586
19.0k
            if text == "∇" || 
text == "𝛁"19.0k
{
2587
22
              is_previous_nabla = true;
2588
19.0k
            }
2589
2.27k
          }
2590
        }
2591
21.3k
        i += 1;
2592
      }
2593
10.1k
    }
2594
2595
5.90k
    fn merge_dots(mrow: Element) -> Element {
2596
      // merge consecutive <mo>s containing '.' into ellipsis
2597
5.90k
      let children = mrow.children();
2598
5.90k
      let mut i = 0;
2599
5.90k
      let mut n_dots = 0;   // number of consecutive mo's containing dots
2600
34.1k
      while i < children.len() {
2601
28.2k
        let child = as_element(children[i]);
2602
28.2k
        if name(child) == "mo" {
2603
10.4k
          let text = as_text(child);
2604
10.4k
          if text == "." {
2605
71
            n_dots += 1;
2606
71
            if n_dots == 3 {
2607
3
              let first_child = as_element(children[i-2]);
2608
3
              first_child.set_text("…");
2609
3
              as_element(children[i-1]).remove_from_parent();
2610
3
              child.remove_from_parent();
2611
3
              n_dots = 0;
2612
68
            }
2613
10.3k
          } else {
2614
10.3k
            n_dots = 0;
2615
10.3k
          }
2616
17.7k
        } else {
2617
17.7k
          n_dots = 0;
2618
17.7k
        }
2619
28.2k
        i += 1;
2620
      }
2621
5.90k
      return mrow;
2622
5.90k
    }
2623
2624
5.90k
    fn merge_primes(mrow: Element) -> Element {
2625
      // merge consecutive <mo>s containing primes (in various forms)
2626
5.90k
      let mut children = mrow.children();
2627
5.90k
      let mut i = 0;
2628
5.90k
      let mut n_primes = 0;   // number of consecutive mo's containing primes
2629
34.1k
      while i < children.len() {
2630
28.1k
        let child = as_element(children[i]);
2631
28.1k
        if name(child) == "mo" {
2632
10.4k
          let text = as_text(child);
2633
          // FIX: should we be more restrictive and change (apostrophe) only in a superscript?
2634
10.4k
          if IS_PRIME.is_match(text) {
2635
21
            n_primes += 1;
2636
10.4k
          } else if n_primes > 0 {
2637
3
            merge_prime_elements(&mut children, i - n_primes, i);
2638
3
            n_primes = 0;
2639
10.4k
          }
2640
17.7k
        } else if n_primes > 0 {
2641
2
          merge_prime_elements(&mut children, i - n_primes, i);
2642
2
          n_primes = 0;
2643
17.7k
        }
2644
28.1k
        i += 1;
2645
      }
2646
5.90k
      if n_primes > 0 {
2647
12
        merge_prime_elements(&mut children, i - n_primes, i);
2648
5.89k
      }
2649
5.90k
      return mrow;
2650
5.90k
    }
2651
2652
17
    fn merge_prime_elements(children: &mut [ChildOfElement], start: usize, end: usize) {
2653
      // not very efficient since this is probably causing an array shift each time (array is probably not big though)
2654
17
      let first_child = as_element(children[start]);
2655
17
      let mut new_text = String::with_capacity(end+3-start);  // one per element plus a little extra
2656
17
      new_text.push_str(as_text(first_child));
2657
17
      for &
child_as_element4
in children.iter().take(end).skip(start+1) {
2658
4
        let child = as_element(child_as_element);
2659
4
        let text = as_text(child);    // only in this function because it is an <mo>
2660
4
        new_text.push_str(text);
2661
4
        child.remove_from_parent();
2662
4
      }
2663
17
      first_child.set_text(&merge_prime_text(&new_text));
2664
17
    }
2665
  
2666
83
    fn merge_prime_text(text: &str) -> String {
2667
      // merge together single primes into double primes, etc.
2668
83
      let mut n_primes = 0;
2669
101
      for ch in 
text83
.
chars83
() {
2670
101
        match ch {
2671
90
          '\'' | '′' => n_primes += 1,
2672
9
          '″' => n_primes += 2,
2673
0
          '‴' => n_primes += 3,
2674
2
          '⁗' => n_primes += 4,
2675
          _ => {
2676
0
            eprintln!("merge_prime_text: unexpected char '{ch}' found in prime text '{text}'");
2677
0
            return text.to_string();
2678
          }
2679
        }
2680
      }
2681
      // it would be very rare to have more than a quadruple prime, so the inefficiency in the won't likely happen
2682
83
      let mut result = String::with_capacity(n_primes);  // likely 4x too big, but string is short-lived and small
2683
83
      for _ in 0..n_primes/4 {
2684
3
        result.push('⁗');
2685
3
      }
2686
83
      match n_primes % 4 {
2687
61
        1 => result.push('′'),
2688
20
        2 => result.push('″'),
2689
1
        3 => result.push('‴'),
2690
1
        _ => ()  // can't happen
2691
      }
2692
83
      return result;
2693
83
    }
2694
2695
    // from https://www.w3.org/TR/MathML3/chapter7.html#chars.pseudo-scripts
2696
35.1k
    fn is_pseudo_script_char(ch: char) -> bool {
2697
35.1k
      
matches!35.0k
(ch,
2698
        '\"' | '\'' | '*' | '`' | 'ª' | '°' | '²' | '³' | '´' | '¹' | 'º' |
2699
        '\u{2018}' | '\u{2019}' | '\u{201C}' | '\u{201D}' | '\u{201E}' | '\u{201F}' |
2700
        '\u{2032}' | '\u{2033}' | '\u{2034}' | '\u{2035}' | '\u{2036}' | '\u{2037}' | '\u{2057}'
2701
      )
2702
35.1k
    }
2703
5.90k
    fn handle_pseudo_scripts(mrow: Element) -> Element {
2704
  
2705
5.90k
      assert!(name(mrow) == "mrow" || 
ELEMENTS_WITH_ONE_CHILD2.42k
.
contains2.42k
(
name(mrow)2.42k
), "non-mrow passed to handle_pseudo_scripts: {}",
mml_to_string0
(
mrow0
));
2706
5.90k
      let mut children = mrow.children();
2707
      // check to see if mrow of all pseudo scripts
2708
5.91k
      if 
children.iter()5.90k
.
all5.90k
(|&child| {
2709
5.91k
        is_pseudo_script(as_element(child))
2710
5.91k
      }) {
2711
2
        let parent = get_parent(mrow);  // must exist
2712
2
        let is_first_child = mrow.preceding_siblings().is_empty();
2713
2
        if  is_first_child {
2714
0
          return mrow; // FIX: what should happen
2715
2
        }
2716
2
        if crate::xpath_functions::IsNode::is_scripted(parent) {
2717
2
          return mrow;   // already in a script position
2718
0
        }
2719
0
        if name(parent) == "mrow" {
2720
0
          mrow.set_attribute_value("data-pseudo-script", "true");
2721
0
          return handle_pseudo_scripts(parent);
2722
        } else {
2723
0
          return mrow; // FIX: what should happen?
2724
        }
2725
5.90k
      }
2726
2727
5.90k
      let mut i = 1;
2728
5.90k
      let mut found = false;
2729
28.1k
      while i < children.len() {
2730
22.2k
        let child = as_element(children[i]);
2731
22.2k
        if is_pseudo_script(child) ||
2732
22.2k
           child.attribute("data-pseudo-script").is_some() {
2733
35
          let msup = create_mathml_element(&child.document(), "msup");
2734
35
          msup.set_attribute_value(CHANGED_ATTR, ADDED_ATTR_VALUE);
2735
35
          msup.append_child(children[i-1]);
2736
35
          msup.append_child(child);
2737
35
          children[i-1] = ChildOfElement::Element(msup);
2738
35
          children.remove(i);
2739
35
          found = true;
2740
22.2k
        } else {
2741
22.2k
          i += 1;
2742
22.2k
        }
2743
      }
2744
5.90k
      if found {
2745
25
        mrow.replace_children(children)
2746
5.88k
      }
2747
5.90k
      return mrow;
2748
2749
28.1k
      fn is_pseudo_script(child: Element) -> bool {
2750
28.1k
        if name(child) == "mo" {
2751
10.4k
          let text = as_text(child);
2752
10.4k
          if let Some(
ch10.3k
) = single_char(text)
2753
10.3k
            && is_pseudo_script_char(ch) {
2754
              // don't script a pseudo-script
2755
55
              let preceding_siblings = child.preceding_siblings();
2756
55
              if !preceding_siblings.is_empty() {
2757
42
                let last_child = as_element(preceding_siblings[preceding_siblings.len()-1]);
2758
42
                if name(last_child) == "mo" &&
2759
10
                   let Some(ch) = single_char(as_text(last_child))
2760
10
                    && is_pseudo_script_char(ch) {
2761
6
                      return false;
2762
36
                    }
2763
13
              }
2764
49
              if text == "*" {
2765
                // could be infix "*" -- this is a weak check to see if what follows is potentially an operand
2766
5
                let following_siblings = child.following_siblings();
2767
5
                if  following_siblings.is_empty() {
2768
1
                  return true;
2769
4
                }
2770
4
                let first_child = as_element(following_siblings[0]);
2771
4
                return name(first_child) != "mo" || ["(", "[", "{"].contains(&text);
2772
              } else {
2773
44
                return true;
2774
              }
2775
10.3k
            }
2776
17.7k
        }
2777
28.1k
        return false;
2778
2779
        /// An efficient method to get the char from a string if it is just one char or fail
2780
10.4k
        fn single_char(text: &str) -> Option<char> {
2781
10.4k
          let mut chars = text.chars();
2782
10.4k
          let ch = chars.next();
2783
10.4k
          if ch.is_none() || chars.next().is_some() {
2784
39
            return None;   // not one character
2785
          } else {
2786
10.3k
            return ch;
2787
          }
2788
10.4k
        }
2789
28.1k
      }
2790
2791
5.90k
    }
2792
2793
10.1k
    fn handle_convert_to_mmultiscripts(children: &mut Vec<ChildOfElement>) {
2794
10.1k
      if children.len() == 1 {
2795
4.45k
        return;   // can't convert to mmultiscripts if there is nothing to attach an empty base to
2796
5.72k
      }
2797
5.72k
        let mut i = 0;
2798
      // convert_to_mmultiscripts changes 'children', so can't cache length
2799
32.7k
      while i < children.len() {
2800
26.9k
        let child = as_element(children[i]);
2801
26.9k
        let child_name = name(child);
2802
26.9k
        if (child_name == "msub" || 
child_name == "msup"26.3k
||
child_name == "msubsup"25.8k
) &&
CanonicalizeContext::is_empty_element1.24k
(
as_element1.24k
(
child.children()[0]1.24k
)) {
2803
115
          i = convert_to_mmultiscripts(children, i);
2804
26.8k
        } else {
2805
26.8k
          i += 1;
2806
26.8k
        }
2807
      }
2808
10.1k
    }
2809
2810
2811
    /// Converts the script element with an empty base to mmultiscripts by sucking the base from the following or preceding element.
2812
    /// The following element is preferred so that these become prescripts (common usage is from TeX), but if the preceding element
2813
    ///   has a closer mi/mtext, it is used.
2814
    /// mhchem has some ugly output (at least in MathJax) and that's where using the following element makes sense (usually)
2815
    ///   because an empty base (mpadded width=0) is used for the scripts. A hacky attribute indicates this case.
2816
115
    fn convert_to_mmultiscripts(mrow_children: &mut Vec<ChildOfElement>, i: usize) -> usize {
2817
      // this is a bit messy/confusing because we might scan forwards or backwards and this affects whether
2818
      // we are scanning for prescripts or postscripts
2819
      // the generic name "primary_scripts" means prescripts if going forward or postscripts if going backwards
2820
      // if we are going forward and hit a sub/superscript with a base, then those scripts become postscripts ("other_scripts")
2821
      // if we are going backwards, we never add prescripts
2822
2823
      // let parent = get_parent(as_element(mrow_children[i]));
2824
      // debug!("convert_to_mmultiscripts (i={}) -- PARENT:\n{}", i, mml_to_string(parent));
2825
2826
115
      let i_base = choose_base_of_mmultiscripts(mrow_children, i);
2827
115
      let mut base = as_element(mrow_children[i_base]);
2828
      // debug!("convert_to_mmultiscripts -- base\n{}", mml_to_string(base));
2829
115
      let base_name = name(base);
2830
115
      let mut prescripts = vec![];
2831
115
      let mut postscripts = vec![];
2832
115
      let mut i_postscript = i_base + 1;
2833
2834
115
      if (base_name == "msub" || 
base_name == "msup"110
||
base_name == "msubsup"110
) &&
2835
5
         !CanonicalizeContext::is_empty_element(as_element(base.children()[0])) {
2836
5
        // if the base is a script element, then we want the base of that to be the base of the mmultiscripts
2837
5
        let mut base_children = base.children();
2838
5
        let script_base = as_element(base.children()[0]);
2839
5
        base_children[0] = ChildOfElement::Element(CanonicalizeContext::create_empty_element(&base.document()));
2840
5
        base.replace_children(base_children);
2841
5
        add_to_scripts(base, &mut postscripts);
2842
5
        base = script_base;
2843
110
      }
2844
2845
115
      let mut has_chemistry_prescript = false; // chemical elements don't have both prescripts (nuclear chem) and postscripts
2846
115
      if i_base > i {
2847
        // we have prescripts -- gather them up
2848
61
        let mut i_prescript = i;
2849
122
        while i_prescript < i_base {
2850
61
          let script = as_element(mrow_children[i_prescript]);
2851
          // kind of ugly -- this duplicates the first part of add_to_scripts
2852
61
          let script_name = name(script);
2853
61
          if script_name == "msub" || 
script_name == "msup"56
||
script_name == "msubsup"48
{
2854
61
            let base = as_element(script.children()[0]);
2855
61
            has_chemistry_prescript |= base.attribute(MHCHEM_MMULTISCRIPTS_HACK).is_some();
2856
61
          
}0
2857
61
          if !add_to_scripts(script, &mut prescripts) {
2858
0
            break;
2859
61
          }
2860
61
          i_prescript += 1;
2861
        }
2862
54
      }
2863
2864
115
      if !has_chemistry_prescript {
2865
        // gather up the postscripts (if any)
2866
137
        while i_postscript < mrow_children.len() {
2867
104
          let script = as_element(mrow_children[i_postscript]);
2868
          // debug!("script: {}", mml_to_string(script));
2869
          // if name(script) == "msub" && i_postscript+1 < mrow_children.len() {
2870
          //  let superscript = as_element(mrow_children[i_postscript+1]);
2871
          //  if name(superscript) == "msup" && CanonicalizeContext::is_empty_element(as_element(superscript.children()[0])) {
2872
          //    set_mathml_name(script, "msubsup");
2873
          //    script.append_child(superscript.children()[1]);
2874
          //    i_postscript += 1;
2875
          //  }
2876
          // }
2877
          // debug!("adding postscript\n{}", mml_to_string(script));
2878
104
          if !add_to_scripts(script, &mut postscripts) {
2879
32
            break;
2880
72
          }
2881
72
          i_postscript += 1;
2882
        }
2883
50
      }
2884
2885
115
      let i_multiscript = if i_base < i {
i_base54
} else {
i61
};
2886
115
      let script = create_mathml_element(&base.document(), "mmultiscripts");
2887
115
      let mut num_children = 1 + postscripts.len();
2888
115
      if !prescripts.is_empty() {
2889
61
        num_children += 1 + prescripts.len();
2890
61
      
}54
2891
115
      let mut new_children = Vec::with_capacity(num_children);
2892
115
      new_children.push(ChildOfElement::Element(base));
2893
115
      new_children.append(&mut postscripts);
2894
115
      if !prescripts.is_empty() {
2895
61
        new_children.push( ChildOfElement::Element( create_mathml_element(&script.document(), "mprescripts") ) );
2896
61
        new_children.append(&mut prescripts);
2897
61
      
}54
2898
2899
115
      script.replace_children(new_children);
2900
115
      let lifted_base = as_element(mrow_children[i_multiscript]);
2901
115
      add_attrs(script, &lifted_base.attributes());
2902
115
      script.remove_attribute("data-split");   // doesn't make sense on mmultiscripts
2903
115
      script.remove_attribute("mathvariant");    // doesn't make sense on mmultiscripts
2904
115
      mrow_children[i_multiscript] = ChildOfElement::Element(script);
2905
115
      mrow_children.drain(i_multiscript+1..i_postscript);  // remove children after the first
2906
2907
115
      let likely_chemistry = likely_adorned_chem_formula(script);
2908
115
      if likely_chemistry >= 0 {
2909
106
        script.set_attribute_value(MAYBE_CHEMISTRY, likely_chemistry.to_string().as_str());
2910
106
      
}9
2911
2912
      // debug!("convert_to_mmultiscripts -- converted script:\n{}", mml_to_string(script));
2913
      // debug!("convert_to_mmultiscripts (at end) -- #children={}", mrow_children.len());
2914
115
      return i_multiscript + 1;   // child to start on next
2915
115
    }
2916
2917
170
    fn add_to_scripts<'a>(el: Element<'a>, scripts: &mut Vec<ChildOfElement<'a>>) -> bool {
2918
170
      let script_name = name(el);
2919
170
      if !(script_name == "msub" || 
script_name == "msup"111
||
script_name == "msubsup"80
) {
2920
32
        return false;
2921
138
      }
2922
138
      let base = as_element(el.children()[0]);
2923
138
      if !CanonicalizeContext::is_empty_element(base) { // prescript that really should be a postscript
2924
        // debug!("add_to_scripts: not empty base:\n{}", mml_to_string(base));
2925
0
        return false;
2926
138
      }
2927
138
      if script_name == "msub" {
2928
59
        add_pair(scripts, Some(el.children()[1]), None);
2929
79
      } else if script_name == "msup" {
2930
31
        add_pair(scripts, None, Some(el.children()[1]));
2931
48
      } else { // msubsup
2932
48
        add_pair(scripts, Some(el.children()[1]), Some(el.children()[2]));
2933
48
      };
2934
138
      return true;
2935
170
    }
2936
2937
138
    fn add_pair<'v, 'a:'v>(script_vec: &'v mut Vec<ChildOfElement<'a>>, subscript: Option<ChildOfElement<'a>>, superscript: Option<ChildOfElement<'a>>) {
2938
138
      let child_of_element = if let Some(
subscript107
) = subscript {
subscript107
} else {
superscript31
.
unwrap31
()};
2939
138
      let doc = as_element(child_of_element).document();
2940
138
      let subscript = if let Some(
subscript107
)= subscript {
2941
107
        if CanonicalizeContext::is_empty_element(as_element(subscript)) {
2942
0
          ChildOfElement::Element(create_mathml_element(&doc, "none"))
2943
        } else {
2944
107
          subscript
2945
        }
2946
      } else {
2947
31
        ChildOfElement::Element(create_mathml_element(&doc, "none"))
2948
      };
2949
138
      let superscript = if let Some(
superscript79
) = superscript {
2950
79
        if CanonicalizeContext::is_empty_element(as_element(superscript)) {
2951
0
          ChildOfElement::Element(create_mathml_element(&doc, "none"))
2952
        } else {
2953
79
          superscript
2954
        }
2955
      } else {
2956
59
        ChildOfElement::Element(create_mathml_element(&doc, "none"))
2957
      };
2958
138
      script_vec.push(subscript);
2959
138
      script_vec.push(superscript);
2960
138
    }
2961
2962
    /// Find the closest likely base to the 'i'th child, preferring the next one over the preceding one, but want the closest.
2963
    ///
2964
    /// Note: because the base might be (...), 'mrow_children might be changed so that they are grouped into an mrow.
2965
115
    fn choose_base_of_mmultiscripts(mrow_children: &mut Vec<ChildOfElement>, i: usize) -> usize {
2966
      // We already know there are no empty scripts to the left (because we find first empty base from left to right).
2967
      // However, there may be some empty bases before we get to real base on the right.
2968
115
      let script_element_base = as_element(as_element(mrow_children[i]).children()[0]);
2969
115
      let mut likely_postscript = script_element_base.attribute(MHCHEM_MMULTISCRIPTS_HACK).is_some() && 
i > 0103
;
2970
115
      if likely_postscript {
2971
86
        let base_of_postscript = as_element(mrow_children[i-1]);
2972
86
        if name(base_of_postscript) != "mi" || 
likely_chem_element(base_of_postscript) < 050
{
2973
36
          likely_postscript = false;  // base for potential postscript doesn't look reasonable -- consider it a prescript
2974
50
        }
2975
29
      }
2976
115
      if i+1 < mrow_children.len() && 
!likely_postscript107
&&
is_child_simple_base61
(
mrow_children[i+1]61
) {
2977
61
        return i+1;
2978
54
      }
2979
54
      if i > 0 {
2980
54
        if let Some(
i_start2
) = is_grouped_base(&mrow_children[..i]) {
2981
2
          assert!(i_start < i-1);  // should be at least two children (open and close)
2982
          // create a new mrow, add the grouped children to it, then drain all but the first of them from the original mrow vec.
2983
          // stick the mrow into the first of them -- this is the base
2984
2
          let new_mrow = create_mathml_element(&as_element(mrow_children[0]).document(), "mrow");
2985
2
          new_mrow.set_attribute_value(CHANGED_ATTR, ADDED_ATTR_VALUE);
2986
8
          for &child in 
&2
mrow_children2
[i_start..i] {
2987
8
            new_mrow.append_child(child);
2988
8
          }
2989
2
          mrow_children.drain(i_start+1..i);
2990
2
          mrow_children[i_start] = ChildOfElement::Element(new_mrow);
2991
2
          return i_start;
2992
52
        }
2993
52
        if is_child_simple_base(mrow_children[i-1]) {
2994
52
          return i-1;
2995
0
        }
2996
0
      }
2997
2998
      // base very likely after multiple scripts to the right
2999
0
      for (i_base, &child) in mrow_children.iter().enumerate().skip(i+1) {
3000
0
        if is_child_simple_base(child) {
3001
0
            return i_base;
3002
        } else {
3003
0
          let child = as_element(child);
3004
0
          let child_name = name(child);
3005
0
          if !(child_name == "msub" || child_name == "msup" || child_name == "msubsup") {
3006
0
            break;
3007
0
          }
3008
        }
3009
      }
3010
      // didn't find any good candidates for a base -- pick something valid
3011
0
      assert!(mrow_children.len() > i);
3012
0
      return i;
3013
      
3014
      
3015
113
      fn is_child_simple_base(child: ChildOfElement) -> bool {
3016
113
        let mut child = as_element(child);
3017
113
        let child_name = name(child);
3018
113
        if child_name == "msub" || 
child_name == "msup"108
||
child_name == "msubsup"108
{
3019
5
          child = as_element(child.children()[0]);
3020
108
        }
3021
3022
113
        return is_leaf(child) && !CanonicalizeContext::is_empty_element(child);  // a little overly general (but hopefully doesn't matter)
3023
113
      }
3024
3025
      /// Return the index of the matched open paren/bracket if the last element is a closed paren/bracket
3026
54
      fn is_grouped_base(mrow_children: &[ChildOfElement]) -> Option<usize> {
3027
        // FIX: this really belongs in canonicalization pass, not the clean pass
3028
54
        let i_last = mrow_children.len()-1;
3029
54
        let last_child = get_possible_embellished_node(as_element(mrow_children[i_last]));
3030
54
        if name(last_child) == "mo" &&
3031
3
           CanonicalizeContext::find_operator(None, last_child, None, None, None).is_right_fence() {
3032
6
          for i_child in (
0..i_last2
).
rev2
() {
3033
6
            let child = get_possible_embellished_node(as_element(mrow_children[i_child]));
3034
6
            if name(child) == "mo" &&
3035
2
               CanonicalizeContext::find_operator(None, child, None, None, None).is_left_fence() {
3036
              // FIX: should make sure left and right match. Should also count for nested parens
3037
2
              return Some(i_child);
3038
4
            }
3039
          }
3040
52
        }
3041
52
        return None;
3042
54
      }
3043
115
    }
3044
52.3k
  }
3045
3046
64.1k
  fn canonicalize_mrows<'a>(&self, mathml: Element<'a>) -> Result<Element<'a>> {
3047
64.1k
    let tag_name = name(mathml);
3048
64.1k
    set_mathml_name(mathml, tag_name);  // add namespace
3049
64.1k
    match tag_name {
3050
64.1k
      "mi" | 
"ms"48.7k
|
"mtext"48.7k
|
"mspace"48.3k
=> {
3051
15.8k
        self.canonicalize_plane1(mathml);
3052
15.8k
        return Ok( mathml ); },
3053
48.3k
      "mo" => {
3054
14.6k
        self.canonicalize_plane1(mathml);
3055
14.6k
        self.canonicalize_mo_text(mathml);
3056
14.6k
        return Ok( mathml );
3057
      },
3058
33.7k
      "mn" => {
3059
11.6k
        self.canonicalize_plane1(mathml);
3060
11.6k
        return Ok( mathml );
3061
      },
3062
22.0k
      "mrow" => {
3063
7.48k
        return self.canonicalize_mrows_in_mrow(mathml);
3064
      },
3065
      _ => {
3066
        // recursively try to make mrows in other structures (eg, num/denom in fraction)
3067
14.6k
        let mut new_children = Vec::with_capacity(mathml.children().len());
3068
21.5k
        for child in 
mathml14.6k
.
children14.6k
() {
3069
21.5k
          match child {
3070
21.5k
            ChildOfElement::Element(e) => {
3071
21.5k
              new_children.push( ChildOfElement::Element(self.canonicalize_mrows(e)
?0
));
3072
            },
3073
0
            ChildOfElement::Text(t) => {
3074
0
              if mathml.children().len() != 1 {
3075
0
                bail!("Text '{}' found with more than one child in element '{}'", t.text(), tag_name);
3076
0
              }
3077
0
              return Ok( mathml );
3078
            },
3079
0
            _ => bail!("Should have been an element or text in '{}'", tag_name),
3080
          }
3081
        }
3082
14.6k
        mathml.replace_children(new_children);
3083
14.6k
        return Ok( mathml );
3084
      },
3085
    }
3086
64.1k
  }
3087
    
3088
1.91k
  fn potentially_lift_script<'a>(&self, mrow: Element<'a>) -> Element<'a> {
3089
1.91k
    if name(mrow) != "mrow" {
3090
0
      return mrow;
3091
1.91k
    }
3092
1.91k
    let mut mrow_children = mrow.children();
3093
1.91k
    let first_child = as_element(mrow_children[0]);
3094
1.91k
    let last_child = as_element(mrow_children[mrow_children.len()-1]);
3095
1.91k
    let last_child_name = name(last_child);
3096
3097
1.91k
    if name(first_child) == "mo" && 
is_fence1.91k
(
first_child1.91k
) &&
3098
1.91k
       (last_child_name == "msub" || last_child_name == "msup" || 
last_child_name == "msubsup"1.89k
) {
3099
19
      let base = as_element(last_child.children()[0]);
3100
19
      if !(name(base) == "mo" && is_fence(base)) {
3101
0
        return mrow; // not a case we are interested in
3102
19
      }
3103
      // else drop through
3104
    } else {
3105
1.89k
      return mrow; // not a case we are interested in
3106
    }
3107
3108
19
    let script = last_child; // better name now that we know what it is
3109
19
    let mut script_children = script.children();
3110
19
    let close_fence = script_children[0];
3111
19
    let mrow_children_len = mrow_children.len();     // rust complains about a borrow after move if we don't store this first
3112
19
    mrow_children[mrow_children_len-1] = close_fence;     // make the mrow hold the fences
3113
19
    mrow.replace_children(mrow_children);
3114
    // make the mrow the child of the script
3115
19
    script_children[0] = ChildOfElement::Element(mrow);
3116
19
    script.replace_children(script_children);
3117
19
    return script;
3118
1.91k
  }
3119
3120
  /// Map names to start of Unicode alphanumeric blocks (Roman, digits, Greek)
3121
  /// Don't do this for function names -- for function names, map them back to ASCII
3122
42.1k
  fn canonicalize_plane1<'a>(&self, mi: Element<'a>) -> Element<'a> {
3123
    // if the character shouldn't be mapped, use 0 -- don't use 'A' as ASCII and Greek aren't contiguous
3124
    static MATH_VARIANTS: phf::Map<&str, [u32; 3]> = phf_map! {
3125
      // "normal" -- nothing to do
3126
      "italic" => [0, 0, 0x1D6E2],
3127
      "bold" => [0x1D400, 0x1D7CE, 0x1D6A8],
3128
      "bold-italic" => [0x1D468, 0x1D7CE, 0x1D71C],
3129
      "double-struck" => [0x1D538, 0x1D7D8, 0],
3130
      "bold-fraktur" => [0x1D56C, 0, 0x1D6A8],
3131
      "script" => [0x1D49C, 0, 0],
3132
      "bold-script" => [0x1D4D0, 0, 0x1D6A8],
3133
      "fraktur" => [0x1D504, 0, 0],
3134
      "sans-serif" => [0x1D5A0, 0x1D7E2, 0],
3135
      "bold-sans-serif" => [0x1D5D4, 0x1D7EC, 0x1D756],
3136
      "sans-serif-italic" => [0x1D608, 0x1D7E2, 0],
3137
      "sans-serif-bold-italic" => [0x1D63C, 0x1D7EC, 0x1D790],
3138
      "monospace" => [0x1D670, 0x1D7F6, 0],
3139
    };
3140
3141
42.1k
    return crate::definitions::SPEECH_DEFINITIONS.with(|defs| {
3142
      // names that are always function names (e.g, "sin" and "log")
3143
42.1k
      let defs = defs.borrow();
3144
42.1k
      let 
names42.1k
= match defs.get_hashset("FunctionNames") {
3145
42.1k
        Some(hs) => hs,
3146
3
        None => return mi,  // happens in some canonicalize tests but not in real use
3147
      };
3148
3149
3150
42.1k
      let mi_text = as_text(mi);
3151
42.1k
      let variant = mi.attribute_value("mathvariant");
3152
3153
42.1k
      if names.contains(mi_text) {
3154
791
        return mi;   // avoid mapping mathvariant for function names
3155
41.3k
      }
3156
      // function name might be (wrongly) set to italic math alphanumeric chars, including bold italic
3157
41.3k
      if let Some(
ascii_text12.6k
) = CanonicalizeContext::math_alphanumeric_to_ascii(mi_text)
3158
12.6k
        && names.contains(&ascii_text) {
3159
3
          mi.set_text(&ascii_text);
3160
3
          return mi
3161
41.3k
        }
3162
3163
41.3k
      if variant.is_none() {
3164
40.3k
        return mi;
3165
952
      }
3166
3167
952
      let new_text = match MATH_VARIANTS.get(variant.unwrap()) {
3168
755
        None => mi_text.to_string(),
3169
197
        Some(start) => shift_text(mi_text, start),
3170
      };
3171
      // mi.remove_attribute("mathvariant");  // leave attr -- for Nemeth, there are italic digits etc that don't have Unicode points
3172
952
      mi.set_text(&new_text);
3173
952
      return mi;
3174
42.1k
    });
3175
3176
197
    fn shift_text(old_text: &str, char_mapping: &[u32; 3]) -> String {
3177
      // if there is no block for something, use 'a', 'A', 0 as that will be a no-op
3178
      struct Offsets {
3179
        ch: u32,
3180
        table: usize, 
3181
      }
3182
      static SHIFT_AMOUNTS: phf::Map<char, Offsets> = phf_map! {
3183
        'A' => Offsets{ ch: 0, table: 0},
3184
        'B' => Offsets{ ch: 1, table: 0},
3185
        'C' => Offsets{ ch: 2, table: 0},
3186
        'D' => Offsets{ ch: 3, table: 0},
3187
        'E' => Offsets{ ch: 4, table: 0},
3188
        'F' => Offsets{ ch: 5, table: 0},
3189
        'G' => Offsets{ ch: 6, table: 0},
3190
        'H' => Offsets{ ch: 7, table: 0},
3191
        'I' => Offsets{ ch: 8, table: 0},
3192
        'J' => Offsets{ ch: 9, table: 0},
3193
        'K' => Offsets{ ch: 10, table: 0},
3194
        'L' => Offsets{ ch: 11, table: 0},
3195
        'M' => Offsets{ ch: 12, table: 0},
3196
        'N' => Offsets{ ch: 13, table: 0},
3197
        'O' => Offsets{ ch: 14, table: 0},
3198
        'P' => Offsets{ ch: 15, table: 0},
3199
        'Q' => Offsets{ ch: 16, table: 0},
3200
        'R' => Offsets{ ch: 17, table: 0},
3201
        'S' => Offsets{ ch: 18, table: 0},
3202
        'T' => Offsets{ ch: 19, table: 0},
3203
        'U' => Offsets{ ch: 20, table: 0},
3204
        'V' => Offsets{ ch: 21, table: 0},
3205
        'W' => Offsets{ ch: 22, table: 0},
3206
        'X' => Offsets{ ch: 23, table: 0},
3207
        'Y' => Offsets{ ch: 24, table: 0},
3208
        'Z' => Offsets{ ch: 25, table: 0},
3209
        'a' => Offsets{ ch: 26, table: 0},
3210
        'b' => Offsets{ ch: 27, table: 0},
3211
        'c' => Offsets{ ch: 28, table: 0},
3212
        'd' => Offsets{ ch: 29, table: 0},
3213
        'e' => Offsets{ ch: 30, table: 0},
3214
        'f' => Offsets{ ch: 31, table: 0},
3215
        'g' => Offsets{ ch: 32, table: 0},
3216
        'h' => Offsets{ ch: 33, table: 0},
3217
        'i' => Offsets{ ch: 34, table: 0},
3218
        'j' => Offsets{ ch: 35, table: 0},
3219
        'k' => Offsets{ ch: 36, table: 0},
3220
        'l' => Offsets{ ch: 37, table: 0},
3221
        'm' => Offsets{ ch: 38, table: 0},
3222
        'n' => Offsets{ ch: 39, table: 0},
3223
        'o' => Offsets{ ch: 40, table: 0},
3224
        'p' => Offsets{ ch: 41, table: 0},
3225
        'q' => Offsets{ ch: 42, table: 0},
3226
        'r' => Offsets{ ch: 43, table: 0},
3227
        's' => Offsets{ ch: 44, table: 0},
3228
        't' => Offsets{ ch: 45, table: 0},
3229
        'u' => Offsets{ ch: 46, table: 0},
3230
        'v' => Offsets{ ch: 47, table: 0},
3231
        'w' => Offsets{ ch: 48, table: 0},
3232
        'x' => Offsets{ ch: 49, table: 0},
3233
        'y' => Offsets{ ch: 50, table: 0},
3234
        'z' => Offsets{ ch: 51, table: 0},
3235
        '0' => Offsets{ ch: 0, table: 1},
3236
        '1' => Offsets{ ch: 1, table: 1},
3237
        '2' => Offsets{ ch: 2, table: 1},
3238
        '3' => Offsets{ ch: 3, table: 1},
3239
        '4' => Offsets{ ch: 4, table: 1},
3240
        '5' => Offsets{ ch: 5, table: 1},
3241
        '6' => Offsets{ ch: 6, table: 1},
3242
        '7' => Offsets{ ch: 7, table: 1},
3243
        '8' => Offsets{ ch: 8, table: 1},
3244
        '9' => Offsets{ ch: 9, table: 1},
3245
        'Α' => Offsets{ ch: 0, table: 2},
3246
        'Β' => Offsets{ ch: 1, table: 2},
3247
        'Γ' => Offsets{ ch: 2, table: 2},
3248
        'Δ' => Offsets{ ch: 3, table: 2},
3249
        'Ε' => Offsets{ ch: 4, table: 2},
3250
        'Ζ' => Offsets{ ch: 5, table: 2},
3251
        'Η' => Offsets{ ch: 6, table: 2},
3252
        'Θ' => Offsets{ ch: 7, table: 2},
3253
        'Ι' => Offsets{ ch: 8, table: 2},
3254
        'Κ' => Offsets{ ch: 9, table: 2},
3255
        'Λ' => Offsets{ ch: 10, table: 2},
3256
        'Μ' => Offsets{ ch: 11, table: 2},
3257
        'Ν' => Offsets{ ch: 12, table: 2},
3258
        'Ξ' => Offsets{ ch: 13, table: 2},
3259
        'Ο' => Offsets{ ch: 14, table: 2},
3260
        'Π' => Offsets{ ch: 15, table: 2},
3261
        'Ρ' => Offsets{ ch: 16, table: 2},
3262
        'ϴ' => Offsets{ ch: 17, table: 2},
3263
        'Σ' => Offsets{ ch: 18, table: 2},
3264
        'Τ' => Offsets{ ch: 19, table: 2},
3265
        'Υ' => Offsets{ ch: 20, table: 2},
3266
        'Φ' => Offsets{ ch: 21, table: 2},
3267
        'Χ' => Offsets{ ch: 22, table: 2},
3268
        'Ψ' => Offsets{ ch: 23, table: 2},
3269
        'Ω' => Offsets{ ch: 24, table: 2},
3270
        '∇' => Offsets{ ch: 25, table: 2},                
3271
        'α' => Offsets{ ch: 26, table: 2},
3272
        'β' => Offsets{ ch: 27, table: 2},
3273
        'γ' => Offsets{ ch: 28, table: 2},
3274
        'δ' => Offsets{ ch: 29, table: 2},
3275
        'ε' => Offsets{ ch: 30, table: 2},
3276
        'ζ' => Offsets{ ch: 31, table: 2},
3277
        'η' => Offsets{ ch: 32, table: 2},
3278
        'θ' => Offsets{ ch: 33, table: 2},
3279
        'ι' => Offsets{ ch: 34, table: 2},
3280
        'κ' => Offsets{ ch: 35, table: 2},
3281
        'λ' => Offsets{ ch: 36, table: 2},
3282
        'μ' => Offsets{ ch: 37, table: 2},
3283
        'ν' => Offsets{ ch: 38, table: 2},
3284
        'ξ' => Offsets{ ch: 39, table: 2},
3285
        'ο' => Offsets{ ch: 40, table: 2},
3286
        'π' => Offsets{ ch: 41, table: 2},
3287
        'ρ' => Offsets{ ch: 42, table: 2},
3288
        'ς' => Offsets{ ch: 43, table: 2},
3289
        'σ' => Offsets{ ch: 44, table: 2},
3290
        'τ' => Offsets{ ch: 45, table: 2},
3291
        'υ' => Offsets{ ch: 46, table: 2},
3292
        'φ' => Offsets{ ch: 47, table: 2},
3293
        'χ' => Offsets{ ch: 48, table: 2},
3294
        'ψ' => Offsets{ ch: 49, table: 2},
3295
        'ω' => Offsets{ ch: 50, table: 2},
3296
        '∂' => Offsets{ ch: 51, table: 2},
3297
        'ϵ' => Offsets{ ch: 52, table: 2},
3298
        'ϑ' => Offsets{ ch: 53, table: 2},
3299
        'ϰ' => Offsets{ ch: 54, table: 2},
3300
        'ϕ' => Offsets{ ch: 55, table: 2},
3301
        'ϱ' => Offsets{ ch: 56, table: 2},
3302
        'ϖ' => Offsets{ ch: 57, table: 2},
3303
      };
3304
197
      let mut new_text = String::new();
3305
321
      for ch in 
old_text197
.
chars197
() {
3306
321
        new_text.push(
3307
321
          match SHIFT_AMOUNTS.get(&ch) {
3308
            None => {
3309
              // there are two digamma chars only in the bold mapping. Handled here
3310
71
              if char_mapping[2] == 0x1D6A8 {
3311
43
                match ch {
3312
1
                  'Ϝ' => '𝟊',
3313
1
                  'ϝ' => '𝟋',
3314
41
                  _   => ch,
3315
                }
3316
              } else {
3317
28
                ch
3318
              }
3319
            },
3320
250
            Some(offsets) => {
3321
250
              let start_of_mapping = char_mapping[offsets.table];
3322
250
              if start_of_mapping == 0 {
ch37
} else {
shift_char213
(
start_of_mapping + offsets.ch213
)}
3323
            }
3324
          }
3325
        )
3326
      }
3327
197
      return new_text;
3328
3329
213
      fn shift_char(ch: u32) -> char {
3330
        // there are "holes" in the math alphanumerics due to legacy issues
3331
        // this table maps the holes to their legacy location
3332
        static EXCEPTIONS: phf::Map<u32, u32> = phf_map! {
3333
          0x1D455u32 => 0x210Eu32,
3334
          0x1D49Du32 => 0x212Cu32,
3335
          0x1D4A0u32 => 0x2130u32,
3336
          0x1D4A1u32 => 0x2131u32,
3337
          0x1D4A3u32 => 0x210Bu32,
3338
          0x1D4A4u32 => 0x2110u32,
3339
          0x1D4A7u32 => 0x2112u32,
3340
          0x1D4A8u32 => 0x2133u32,
3341
          0x1D4ADu32 => 0x211Bu32,
3342
          0x1D4BAu32 => 0x212Fu32,
3343
          0x1D4BCu32 => 0x210Au32,
3344
          0x1D4C4u32 => 0x2134u32,
3345
          0x1D506u32 => 0x212Du32,
3346
          0x1D50Bu32 => 0x210Cu32,
3347
          0x1D50Cu32 => 0x2111u32,
3348
          0x1D515u32 => 0x211Cu32,
3349
          0x1D51Du32 => 0x2128u32,
3350
          0x1D53Au32 => 0x2102u32,
3351
          0x1D53Fu32 => 0x210Du32,
3352
          0x1D545u32 => 0x2115u32,
3353
          0x1D547u32 => 0x2119u32,
3354
          0x1D548u32 => 0x211Au32,
3355
          0x1D549u32 => 0x211Du32,
3356
          0x1D551u32 => 0x2124u32,
3357
        };
3358
                
3359
213
        return unsafe { char::from_u32_unchecked(   // safe because the values are a char or from the table above
3360
213
          match EXCEPTIONS.get(&ch) {
3361
161
            None => ch,
3362
52
            Some(exception_value) => *exception_value,
3363
          }
3364
        ) }
3365
213
      }
3366
197
    }
3367
42.1k
  }
3368
3369
41.5k
  fn math_alphanumeric_to_ascii(input: &str) -> Option<String> {
3370
41.5k
    let mut result = String::with_capacity(input.len());
3371
3372
46.6k
    for c in 
input41.5k
.
chars41.5k
() {
3373
46.6k
      let 
converted18.0k
= match c {
3374
        // Standard ASCII
3375
18.4k
        'a'..='z' | 
'A'..='Z'11.2k
=>
c17.8k
,
3376
        
3377
        // Mathematical Bold (A-Z: U+1D400, a-z: U+1D41A)
3378
482
        '\u{1D400}'..='\u{1D419}' => 
((c as u32 - 0x1D400) as u8 + b'A') as char22
,
3379
460
        '\u{1D41A}'..='\u{1D433}' => 
((c as u32 - 0x1D41A) as u8 + b'a') as char36
,
3380
        
3381
        // Mathematical Italic (A-Z: U+1D434, a-z: U+1D44E)
3382
        // Note: 'h' is missing from this range (U+210E)
3383
424
        '\u{1D434}'..='\u{1D44D}' => 
((c as u32 - 0x1D434) as u8 + b'A') as char10
,
3384
414
        '\u{1D44E}'..='\u{1D467}' => 
((c as u32 - 0x1D44E) as u8 + b'a') as char14
,
3385
        
3386
        // Mathematical Bold Italic (A-Z: U+1D468, a-z: U+1D482)
3387
400
        '\u{1D468}'..='\u{1D481}' => 
((c as u32 - 0x1D468) as u8 + b'A') as char0
,
3388
400
        '\u{1D482}'..='\u{1D49B}' => 
((c as u32 - 0x1D482) as u8 + b'a') as char14
,
3389
3390
        // Mathematical Sans-Serif (A-Z: U+1D5A0, a-z: U+1D5BA)
3391
274
        '\u{1D5A0}'..='\u{1D5B9}' => 
((c as u32 - 0x1D5A0) as u8 + b'A') as char10
,
3392
264
        '\u{1D5BA}'..='\u{1D5D3}' => 
((c as u32 - 0x1D5BA) as u8 + b'a') as char11
,
3393
3394
        // If a character isn't a letter (or supported math letter), return None
3395
28.6k
        _ => return None,
3396
      };
3397
18.0k
      result.push(converted);
3398
    }
3399
3400
12.8k
    Some(result)
3401
41.5k
  }
3402
3403
14.6k
  fn canonicalize_mo_text(&self, mo: Element) {
3404
    // lazy_static! {    (NOTE: std::sync::LazyLock is now used instead)
3405
    //  static ref IS_LIKELY_SCALAR_VARIABLE: Regex = Regex::new("[a-eh-z]").unwrap();
3406
    // }
3407
    
3408
14.6k
    let mut mo_text = as_text(mo);
3409
14.6k
    let parent = get_parent(mo);
3410
14.6k
    let parent_name = name(parent);
3411
14.6k
    let is_base = mo.preceding_siblings().is_empty();
3412
14.6k
    if !is_base && (
parent_name == "mover"1.38k
||
parent_name == "munder"1.09k
||
parent_name == "munderover"1.07k
) {
3413
      // canonicalize various diacritics for munder, mover, munderover
3414
309
      mo_text = match mo_text {
3415
309
        "_" | 
"\u{02C9}"303
|
"\u{0304}"303
|
"\u{0305}"303
|
"\u{332}"303
|
"\u{2212}"302
|
3416
302
        "\u{2010}" | "\u{2011}" | "\u{2012}" | "\u{2013}" | "\u{2014}" | "\u{2015}" | 
"\u{203e}"293
=>
"\u{00AF}"17
,
3417
292
        "\u{02BC}" => 
"`"0
,
3418
292
        "\u{02DC}" | "\u{223C}" => 
"~"0
, // use ASCII for diacriticals
3419
292
        "\u{02C6}"| "\u{0302}" => 
"^"0
,
3420
292
        "\u{0307}" => 
"\u{02D9}"0
, // Nemeth distinguishes this from "." -- \u{02D9} is generated for over dots by most generators
3421
292
        "\u{0308}" => 
"¨"0
,
3422
292
        _ => mo_text,
3423
      }
3424
      // FIX: MathType generates the wrong version of union and intersection ops (binary instead of unary)
3425
14.3k
    } else if !is_base && (
parent_name == "msup"1.07k
||
parent_name == "msubsup"858
) {
3426
227
      mo_text = match mo_text {
3427
227
        "\u{00BA}"| "\u{2092}"| "\u{20D8}"| "\u{2218}" | 
"\u{25E6}"223
=>
"\u{00B0}"4
, // circle-like objects -> degree
3428
223
        _ => mo_text,
3429
      };
3430
    } else {
3431
14.0k
      mo_text = match mo_text {
3432
14.0k
        "\u{02C9}"| "\u{0304}"| "\u{0305}" => 
"\u{00AF}"0
,
3433
14.0k
        "\u{02DC}" | "~"  => 
"\u{223C}"5
, // for base, use version with prefix and infix
3434
14.0k
        "\u{01C1}" => 
"\u{2016}"0
, // U+2016 is "‖"
3435
3436
14.0k
        _ => mo_text,
3437
      };
3438
    };
3439
14.6k
    if mo_text == "\u{2212}" {
3440
314
      mo_text = "-";
3441
14.2k
    }
3442
14.6k
    mo.set_text(mo_text);
3443
14.6k
  }
3444
  
3445
    
3446
  // Find the operator associated with the 'mo_node'
3447
  // This is complicated by potentially needing to distinguish between the
3448
  //   prefix, infix, or postfix version of the operator.
3449
  // To figure out prefix, we need to look at the node on the left; for postfix, we need to look to the left
3450
  // If the node of the left has been parsed, then this works.
3451
  // For example, suppose we want to determine if the "+" in 'x < n!+1' is prefix or infix.
3452
  //   If we simply looked left without parsing, we'd see an operator and choose prefix unless we could figure out that
3453
  //   that "!" was postfix.  But if it had been parsed, we'd see an mrow (operand) and tree "+" as infix (as it should).
3454
  // The same problem applies on the right for postfix operators, but a problem is rare for those
3455
  //   e.g., n!!n -- ((n!)!)*n or (n!)*(!n)  -- the latter doesn't make semantic sense though
3456
  // FIX:  the above ignores mspace and other nodes that need to be skipped to determine the right node to determine airity
3457
  // FIX:  the postfix problem above should be addressed
3458
19.4k
  fn find_operator<'a>(context: Option<&CanonicalizeContext>, mo_node: Element<'a>, previous_operator: Option<&'static OperatorInfo>,
3459
19.4k
            previous_node: Option<Element<'a>>, next_node: Option<Element<'a>>) -> &'static OperatorInfo {
3460
    // get the unicode value and return the OpKeyword associated with it
3461
19.4k
    assert!( name(mo_node) == "mo");
3462
  
3463
    // if a form has been given, that takes precedence
3464
19.4k
    let form = mo_node.attribute_value("form");
3465
19.4k
    let op_type =  match form {
3466
19.4k
      None => match context {
3467
5.50k
        None => OperatorTypes::POSTFIX,   // what compute_type_from_position returns when the other args to this are all None
3468
13.9k
        Some(context) => compute_type_from_position(context, previous_operator, previous_node, next_node),
3469
      },
3470
10
      Some(form) => match form.to_lowercase().as_str() {
3471
10
        "prefix" => 
OperatorTypes::PREFIX4
,
3472
6
        "postfix" => 
OperatorTypes::POSTFIX2
,
3473
4
        _ => OperatorTypes::INFIX,
3474
      }
3475
    };  
3476
  
3477
19.4k
    let found_op_info = if mo_node.attribute_value(CHEMICAL_BOND).is_some() {
3478
112
      Some(&IMPLIED_CHEMICAL_BOND)
3479
    } else {
3480
19.3k
      OPERATORS.get(as_text(mo_node))
3481
    };
3482
19.4k
    if found_op_info.is_none() {
3483
      // no known operator -- return the unknown operator with the correct "fix" type
3484
49
      return op_not_in_operator_dictionary(op_type);
3485
19.4k
    }
3486
  
3487
19.4k
    let found_op_info = found_op_info.unwrap();
3488
19.4k
    let matching_op_info = find_operator_info(found_op_info, op_type, form.is_some());
3489
19.4k
    if ptr_eq(matching_op_info, &ILLEGAL_OPERATOR_INFO) {
3490
0
      return op_not_in_operator_dictionary(op_type);
3491
    } else {
3492
19.4k
      return matching_op_info;
3493
    }
3494
3495
  
3496
13.9k
    fn compute_type_from_position<'a>(context: &CanonicalizeContext, previous_operator: Option<&'static OperatorInfo>, previous_node: Option<Element<'a>>, next_node: Option<Element<'a>>) -> OperatorTypes {
3497
      // based on choices, pick one that fits the context
3498
      // if there isn't an obvious one, we have parsed the left, but not the right, so discount that
3499
    
3500
      // Trig functions have some special syntax
3501
      // We need to treat '-' as prefix for things like "sin -2x"
3502
      // Need to be careful because (sin - cos)(x) needs an infix '-'
3503
      // Return either the prefix or infix version of the operator
3504
13.9k
      if next_node.is_some() &&
3505
11.9k
         context.is_function_name(get_possible_embellished_node(next_node.unwrap()), None) == FunctionNameCertainty::True {
3506
260
        return OperatorTypes::INFIX;
3507
13.6k
      }
3508
13.6k
      if previous_node.is_some() &&
3509
11.1k
         context.is_function_name(get_possible_embellished_node(previous_node.unwrap()), None) == FunctionNameCertainty::True {
3510
207
        return OperatorTypes::PREFIX;
3511
13.4k
      }
3512
    
3513
      // after that special case, start with the obvious cases...
3514
13.4k
      let operand_on_left = previous_operator.is_none() || 
previous_operator.unwrap()2.94k
.
is_postfix2.94k
(); // operand or postfix operator
3515
13.4k
      let operand_on_right = next_node.is_some() && 
name11.5k
(
get_possible_embellished_node11.5k
(next_node.unwrap())) !="mo"; // FIX: could improve by checking if it is a prefix op
3516
    
3517
13.4k
      if operand_on_left && 
operand_on_right10.5k
{
3518
8.19k
        return OperatorTypes::INFIX; // infix
3519
5.29k
      } else if !operand_on_left && 
operand_on_right2.94k
{
3520
2.75k
        return OperatorTypes::PREFIX; // prefix
3521
2.54k
      } else if operand_on_left && 
!operand_on_right2.34k
{
3522
2.34k
        return OperatorTypes::POSTFIX; // postfix
3523
      } else {
3524
        // either two operators in a row or right hand side not parsed so we don't really know what is right (same is true above)
3525
        // since there is nothing good to return, assume right is an operand after parsing (thus infix case)
3526
196
        return OperatorTypes::INFIX;
3527
      }
3528
13.9k
    }
3529
3530
19.4k
    fn find_operator_info(op_info: &OperatorInfo, op_type: OperatorTypes, from_form_attr: bool) -> &OperatorInfo {
3531
19.4k
      if op_info.is_operator_type(op_type) {
3532
12.9k
        return op_info;
3533
6.45k
      } else if let Some(
next_op_info1.64k
) = op_info.next {
3534
1.64k
        if next_op_info.is_operator_type(op_type) {
3535
730
          return next_op_info;
3536
915
        } else if let Some(
last_op_info256
) = next_op_info.next
3537
256
          && last_op_info.is_operator_type(op_type) {
3538
256
            return last_op_info;
3539
659
          }
3540
4.81k
      }
3541
3542
      // didn't find op_info that matches -- if type is not forced, then return first value (any is probably ok) 
3543
5.47k
      return if from_form_attr {
&ILLEGAL_OPERATOR_INFO0
} else {op_info};
3544
19.4k
    }
3545
  
3546
49
    fn op_not_in_operator_dictionary(op_type: OperatorTypes) -> &'static OperatorInfo {
3547
49
      return match op_type {
3548
16
        OperatorTypes::PREFIX => &DEFAULT_OPERATOR_INFO_PREFIX,
3549
9
        OperatorTypes::POSTFIX => &DEFAULT_OPERATOR_INFO_POSTFIX,
3550
24
        _ => &DEFAULT_OPERATOR_INFO_INFIX, // should only be infix
3551
      };
3552
49
    }
3553
19.4k
  }
3554
  
3555
13.9k
  fn n_vertical_bars_on_right(&self, remaining_children: &[ChildOfElement], vert_bar_ch: &str) -> usize {
3556
    // return the number of children that match 'vert_bar_op' not counting the first element
3557
13.9k
    let mut n = 0;
3558
149k
    for child_of_element in 
remaining_children13.9k
{
3559
149k
      let child = as_element(*child_of_element);
3560
149k
      if name(child) == "mo" {
3561
49.9k
        let operator_str = as_text(child);
3562
49.9k
        if operator_str == vert_bar_ch {
3563
42.7k
          n += 1;
3564
42.7k
        
}7.25k
3565
99.9k
      }
3566
    }
3567
13.9k
    return n;
3568
13.9k
  }
3569
  
3570
  
3571
13.9k
  fn determine_vertical_bar_op<'a>(&self, original_op: &'static OperatorInfo, mo_node: Element<'a>, 
3572
13.9k
        next_child: Option<Element<'a>>,
3573
13.9k
        parse_stack: &'a mut Vec<StackInfo>,
3574
13.9k
        n_vertical_bars_on_right: usize) -> &'static OperatorInfo {
3575
    // if in a prefix location, it is a left fence
3576
    // note:  if there is an operator on the top of the stack, it wants an operand (otherwise it would have been reduced)
3577
13.9k
    let operator_str = as_text(mo_node);
3578
13.9k
    let found_op_info = OPERATORS.get(operator_str);
3579
13.9k
    if found_op_info.is_none() {
3580
48
      return original_op;
3581
13.8k
    }
3582
13.8k
    let op = found_op_info.unwrap();
3583
13.8k
    if !AMBIGUOUS_OPERATORS.contains(operator_str) {
3584
      // debug!("   op is not ambiguous");
3585
13.4k
      return original_op;
3586
401
    };
3587
  
3588
401
    let operator_versions = OperatorVersions::new(op);
3589
401
    if let Some(
prefix360
) = operator_versions.prefix &&
3590
360
       (top(parse_stack).last_child_in_mrow().is_none() || 
!top(parse_stack).is_operand260
) {
3591
      // debug!("   is prefix");
3592
115
      return prefix;
3593
286
    }
3594
    
3595
    // We have either a right fence or an infix operand at the top of the stack
3596
    // If this is already parsed, we'd look to the right to see if there is an operand after this child.
3597
    // But it isn't parsed and there might be a prefix operator which will eventually become an operand, so it is tricky.
3598
    // It is even trickier because we might have an implicit times, so we can't really tell
3599
    // For example:  |x|y|z| which can be '|x| y |z|' or '|x |y| z|', or even | (x|y)|z |'
3600
    // We can't really know what is intended (without @intent).
3601
    // It seems like the case where it could be paired with a matching vertical bar as what most people would choose, so we favor that.
3602
  
3603
    // If there is a matching open vertical bar, it is either at the top of the stack or the entry just below the top
3604
3605
286
    let has_left_match = if let Some(
op_prefix245
) = operator_versions.prefix {
3606
245
      if ptr_eq(top(parse_stack).op_pair.op, op_prefix) {   // match at top of stack? (empty matching bars)
3607
109
        true
3608
136
      } else if parse_stack.len() > 2 {
3609
        // matching op is below top (operand between matching bars) -- pop, peek, push
3610
36
        let old_top = parse_stack.pop().unwrap();   
3611
36
        let top_op = top(parse_stack).op_pair.op;                                 // can only access top, so we need to pop off top and push back later
3612
36
        parse_stack.push(old_top);
3613
36
        ptr_eq(top_op, op_prefix)
3614
      } else {
3615
100
        false
3616
      }
3617
    } else {
3618
41
      false
3619
    };
3620
286
    if let Some(
postfix245
) =operator_versions.postfix && (
next_child245
.
is_none245
() ||
has_left_match130
) {
3621
      // last child in row (must be a close) or we have a left match
3622
      // debug!("   is postfix");
3623
136
      return postfix;
3624
150
    } else if next_child.is_none() {
3625
      // operand on left, so prefer infix version
3626
18
      return if let Some(infix) = operator_versions.infix {infix} else {
op0
};
3627
132
    }
3628
  
3629
132
    let next_child = next_child.unwrap();
3630
132
    if let Some(
prefix109
) = operator_versions.prefix &&
(n_vertical_bars_on_right & 0x1 != 0)109
{
3631
      //  ("   is prefix");
3632
3
      return prefix;   // odd number of vertical bars remain, so consider this the start of a pair
3633
129
    }
3634
  
3635
129
    let next_child = get_possible_embellished_node(next_child);
3636
129
    let next_child_op = if name(next_child) != "mo" {
3637
128
        None
3638
      } else {
3639
1
        let next_next_children = next_child.following_siblings();
3640
1
        let next_next_child = if next_next_children.is_empty() { 
None0
} else { Some( as_element(next_next_children[0]) )};
3641
1
        Some( CanonicalizeContext::find_operator(Some(self), next_child, operator_versions.infix,
3642
1
                  top(parse_stack).last_child_in_mrow(), next_next_child) )
3643
      };
3644
                          
3645
    // If the next child is a prefix op or a left fence, it will reduce to an operand, so don't consider it an operator
3646
129
    if next_child_op.is_some() && 
!next_child_op.unwrap().is_left_fence()1
&&
!next_child_op.unwrap().is_prefix()0
{
3647
0
      if let Some(postfix) =operator_versions.postfix {
3648
        // debug!("   is postfix");
3649
0
        return postfix; 
3650
0
      }
3651
129
    } else if let Some(infix) = operator_versions.infix {
3652
      // debug!("   is infix");
3653
129
      return infix; 
3654
0
    }
3655
  
3656
    // nothing good to match
3657
0
    return op;
3658
13.9k
  }
3659
3660
3661
  // return FunctionNameCertainty::False or Maybe if 'node' is a chemical element and is followed by a state (solid, liquid, ...)
3662
  //  in other words, we are certain this can't be a function since it looks like it is or might be chemistry
3663
1.71k
  fn is_likely_chemical_state<'a>(&self, node: Element<'a>, right_sibling: Element<'a>) -> FunctionNameCertainty {
3664
1.71k
    assert_eq!(name(get_parent(node)), "mrow"); // should be here because we are parsing an mrow
3665
  
3666
    // debug!("   in is_likely_chemical_state: '{}'?",element_summary(node));
3667
1.71k
    let node_chem_likelihood= node.attribute_value(MAYBE_CHEMISTRY);
3668
1.71k
    if node.attribute(MAYBE_CHEMISTRY).is_none() {
3669
1.16k
      return FunctionNameCertainty::True;
3670
549
    }
3671
3672
549
    if name(right_sibling) == "mrow" {    // clean_chemistry_mrow made sure any state-like structure is an mrow
3673
75
      let state_likelihood = likely_chem_state(right_sibling);
3674
75
      if state_likelihood > 0 {
3675
49
        right_sibling.set_attribute_value(MAYBE_CHEMISTRY, state_likelihood.to_string().as_str());
3676
        // at this point, we know both node and right_sibling are positive, so we have at least a maybe
3677
49
        if state_likelihood + node_chem_likelihood.unwrap().parse::<i32>().unwrap() > 2 {
3678
49
          return FunctionNameCertainty::False;
3679
        } else {
3680
0
          return FunctionNameCertainty::Maybe
3681
        }
3682
26
      }
3683
474
    }
3684
3685
500
    return FunctionNameCertainty::True;
3686
1.71k
  }
3687
  
3688
  // Try to figure out whether an <mi> is a function name or not.
3689
  // There are two important cases depending upon whether parens/brackets are used or not.
3690
  // E.g, sin x and f(x)
3691
  // 1. If parens follow the name, then we use a more inclusive set of heuristics as it is more likely a function
3692
  // The heuristics used are:
3693
  //   - it is on the list of known function names (e.g., sin" and "log")
3694
  //   - it is on the list of likely function names (e.g, f, g, h)
3695
  //   - multi-char names that begin with a capital letter (e.g, "Tr")
3696
  //   - there is a single token inside the parens (why else would someone use parens), any name (e.g, a(x))
3697
  //   - if there are multiple comma-separated args
3698
  //
3699
  // 2. If there are no parens, then only names on the known function list are used (e.g., "sin x")
3700
  //
3701
  // If the name if followed by parens but doesn't fit into the above categories, we return a "maybe"
3702
32.0k
  fn is_function_name<'a>(&self, node: Element<'a>, right_siblings: Option<&[ChildOfElement<'a>]>) -> FunctionNameCertainty {
3703
32.0k
    let base_of_name = get_possible_embellished_node(node);
3704
  
3705
    // actually only 'mi' should be legal here, but some systems used 'mtext' for multi-char variables
3706
    // FIX: need to allow for composition of function names. E.g, (f+g)(x) and (f^2/g)'(x)
3707
32.0k
    let node_name = name(base_of_name);
3708
32.0k
    if node_name != "mi" && 
node_name != "mtext"15.7k
{
3709
15.4k
      return FunctionNameCertainty::False;
3710
16.6k
    }
3711
    // whitespace is sometimes added to the mi since braille needs it, so do a trim here to get function name
3712
16.6k
    let base_name = as_text(base_of_name).trim();
3713
16.6k
    if base_name.is_empty() {
3714
2
      return FunctionNameCertainty::False;
3715
16.6k
    }
3716
    // debug!("    is_function_name({}), {} following nodes", base_name, if right_siblings.is_none() {"No".to_string()} else {right_siblings.unwrap().len().to_string()});
3717
16.6k
    return crate::definitions::SPEECH_DEFINITIONS.with(|defs| {
3718
      // names that are always function names (e.g, "sin" and "log")
3719
16.6k
      let defs = defs.borrow();
3720
16.6k
      let names = defs.get_hashset("FunctionNames").unwrap();
3721
      // UEB seems to think "Sin" (etc) is used for "sin", so we move to lower case
3722
16.6k
      if names.contains(&base_name.to_ascii_lowercase()) {
3723
        // debug!("     ...is in FunctionNames");
3724
1.02k
        return FunctionNameCertainty::True; // always treated as function names
3725
15.5k
      }
3726
3727
      // We include shapes as function names so that △ABC makes sense since △ and
3728
      //   the other shapes are not in the operator dictionary
3729
15.5k
      let shapes = defs.get_hashset("GeometryShapes").unwrap();
3730
15.5k
      if shapes.contains(base_name) {
3731
23
        return FunctionNameCertainty::True; // always treated as function names
3732
15.5k
      }
3733
  
3734
15.5k
      if right_siblings.is_none() {
3735
13.8k
        return FunctionNameCertainty::False; // only accept known names, which is tested above
3736
1.71k
      }
3737
3738
      // make sure that what follows starts and ends with parens/brackets
3739
1.71k
      assert_eq!(name(get_parent(node)), "mrow");
3740
1.71k
      let right_siblings = right_siblings.unwrap();
3741
1.71k
      let non_whitespace = right_siblings.iter().enumerate()
3742
1.71k
            .find(|&(_, child)| {
3743
1.71k
              let child = as_element(*child);
3744
1.71k
              name(child) != "mtext" || 
!as_text(child).trim().is_empty()54
3745
1.71k
            });
3746
1.71k
      let right_siblings = if let Some( (i, _) ) = non_whitespace {&right_siblings[i..]} else {
right_siblings0
};
3747
1.71k
      if right_siblings.is_empty() {
3748
        // debug!("     ...right siblings not None, but zero of them");
3749
0
        return FunctionNameCertainty::False;
3750
1.71k
      }
3751
3752
1.71k
      let first_child = as_element(right_siblings[0]);
3753
          
3754
      // clean_chemistry wrapped up a state in an mrow and this is assumed by is_likely_chemical_state()
3755
1.71k
      let chem_state_certainty = self.is_likely_chemical_state(node, first_child);
3756
1.71k
      if chem_state_certainty != FunctionNameCertainty::True {
3757
        // debug!("      ...is_likely_chemical_state says it is a function ={:?}", chem_state_certainty);
3758
49
        return chem_state_certainty;
3759
1.66k
      }
3760
3761
1.66k
      if name(first_child) == "mrow" && 
is_left_paren238
(
as_element238
(
first_child.children()[0]238
)) {
3762
        // debug!("     ...trying again after expanding mrow");
3763
235
        return self.is_function_name(node, Some(&first_child.children()));
3764
1.43k
      }
3765
3766
1.43k
      if right_siblings.len() < 2 {
3767
        // debug!("     ...not enough right siblings");
3768
542
        return FunctionNameCertainty::False; // can't be (...)
3769
892
      }
3770
3771
      // at least two siblings are this point -- check that they are parens/brackets
3772
      // we can only check the open paren/bracket because the right side is unparsed and we don't know the close location
3773
892
      let first_sibling = as_element(right_siblings[0]);
3774
892
      if name(first_sibling) != "mo"  || 
!is_left_paren(first_sibling)384
// '(' or '['
3775
      {
3776
        // debug!("     ...first sibling is not '(' or '['");
3777
522
        return FunctionNameCertainty::False;
3778
370
      }
3779
  
3780
370
      let likely_names = defs.get_hashset("LikelyFunctionNames").unwrap();
3781
370
      if likely_names.contains(base_name) {
3782
206
        return FunctionNameCertainty::True; // don't bother checking contents of parens, consider these as function names
3783
164
      }
3784
  
3785
164
      if is_single_arg(as_text(first_sibling), &right_siblings[1..]) {
3786
        // debug!("      ...is single arg");
3787
64
        return FunctionNameCertainty::True; // if there is only a single arg, why else would you use parens?
3788
100
      };
3789
3790
100
      if is_comma_arg(as_text(first_sibling), &right_siblings[1..]) {
3791
        // debug!("      ...is comma arg");
3792
2
        return FunctionNameCertainty::True; // if there is only a single arg, why else would you use parens?
3793
98
      };
3794
  
3795
      // FIX: should really make sure all the args are marked as MAYBE_CHEMISTRY, but we don't know the matching close paren/bracket
3796
98
      if node.attribute(MAYBE_CHEMISTRY).is_some() &&
3797
34
         as_element(right_siblings[1]).attribute(MAYBE_CHEMISTRY).is_some() {
3798
1
        return FunctionNameCertainty::False;
3799
97
      }
3800
  
3801
      // Names like "Tr" are likely function names, single letter names like "M" or "J" are iffy
3802
      // This needs to be after the chemical state check above to rule out Cl(g), etc
3803
      // This would be better if it were part of 'likely_names' as "[A-Za-z]+", but reg exprs don't work in HashSets.
3804
      // FIX: create our own struct and write appropriate traits for it and then it could work
3805
97
      let mut chars = base_name.chars();
3806
97
      let first_char = chars.next().unwrap();   // we know there is at least one byte in it, hence one char
3807
97
      if chars.next().is_some() && 
first_char4
.
is_uppercase4
() {
3808
        // debug!("      ...is uppercase name");
3809
4
        return FunctionNameCertainty::True;
3810
93
      }
3811
3812
      // debug!("      ...didn't match options to be a function");
3813
      // debug!("Right siblings:\n{}  ", right_siblings.iter().map(|&child| mml_to_string(as_element(child))).collect::<Vec<String>>().join("\n  "));
3814
93
      return if is_name_inside_parens(base_name, right_siblings) {
FunctionNameCertainty::False5
} else {
FunctionNameCertainty::Maybe88
};
3815
16.6k
    });
3816
  
3817
164
    fn is_single_arg(open: &str, following_nodes: &[ChildOfElement]) -> bool {
3818
      // following_nodes are nodes after "("
3819
164
      if following_nodes.is_empty() {
3820
0
        return true;   // "a(" might or might not be a function call -- treat as "is" because we can't see more 
3821
164
      }
3822
  
3823
164
      let first_child = as_element(following_nodes[0]);
3824
164
      if is_matching_right_paren(open, first_child) {
3825
0
        return true;   // no-arg case "a()"
3826
164
      }
3827
  
3828
      // could be really picky and restrict to checking for only mi/mn
3829
      // that might make more sense in stranger cases, but mfrac, msqrt, etc., probably shouldn't have parens if times 
3830
164
      return following_nodes.len() > 1 && 
3831
164
          name(first_child) != "mrow" &&
3832
127
          is_matching_right_paren(open, as_element(following_nodes[1]));
3833
164
    }
3834
  
3835
100
    fn is_comma_arg(open: &str, following_nodes: &[ChildOfElement]) -> bool {
3836
      // following_nodes are nodes after "("
3837
100
      if following_nodes.len() == 1 {
3838
0
        return false;
3839
100
      }
3840
3841
100
      let first_child = as_element(following_nodes[1]);
3842
100
      if name(first_child) == "mrow" {
3843
0
        return is_comma_arg(open, &first_child.children()[..]);
3844
100
      }
3845
3846
      // FIX: this loop is very simplistic and could be improved to count parens, etc., to make sure "," is at top-level
3847
318
      for child in 
following_nodes100
{
3848
318
        let child = as_element(*child);
3849
318
        if name(child) == "mo" {
3850
141
          if as_text(child) == "," {
3851
2
            return true;
3852
139
          }
3853
139
          if is_matching_right_paren(open, child) {
3854
96
            return false;
3855
43
          }
3856
177
        }
3857
      }
3858
      
3859
2
      return false;
3860
100
    }
3861
  
3862
622
    fn is_left_paren(node: Element) -> bool {
3863
622
      if name(node) != "mo" {
3864
1
        return false;
3865
621
      }
3866
621
      let text = as_text(node);
3867
621
      return text == "(" || 
text == "["22
;
3868
622
    }
3869
  
3870
430
    fn is_matching_right_paren(open: &str, node: Element) -> bool {
3871
430
      if name(node) != "mo" {
3872
184
        return false;
3873
246
      }
3874
246
      let text = as_text(node);
3875
      // debug!("         is_matching_right_paren: open={}, close={}", open, text);
3876
246
      return (open == "(" && 
text == ")"244
) || (
open == "["88
&&
text == "]"2
);
3877
430
    }
3878
3879
    /// Returns true if the name of the potential function is inside the parens. In that case, it is very unlikely to be a function call
3880
    /// For example, "n(n+1)"
3881
93
    fn is_name_inside_parens(function_name: &str, right_siblings: &[ChildOfElement]) -> bool {
3882
      // the first child of right_siblings is either '(' or '['
3883
      // right_siblings may extend well beyond the closing parens, so we first break this into finding the contents
3884
      // then we search the contents for the name
3885
93
      match find_contents(right_siblings) {
3886
2
        None => return false,
3887
91
        Some(contents) => return is_name_inside_contents(function_name, contents),
3888
      }
3889
      
3890
3891
93
      fn find_contents<'a>(right_siblings: &'a[ChildOfElement<'a>]) -> Option<&'a[ChildOfElement<'a>]> {
3892
93
        let open_text = as_text(as_element(right_siblings[0]));
3893
93
        let close_text = if open_text == "("  { 
")"91
} else {
"]"2
};
3894
93
        let mut nesting_level = 1;
3895
93
        let mut i = 1;
3896
296
        while i < right_siblings.len() {
3897
294
          let child = as_element(right_siblings[i]);
3898
294
          if name(child) == "mo" {
3899
133
            let op_text = as_text(child);
3900
133
            if op_text == open_text {
3901
0
              nesting_level += 1;
3902
133
            } else if op_text == close_text {
3903
91
              if nesting_level == 1 {
3904
91
                return Some(&right_siblings[1..i]);
3905
0
              } 
3906
0
              nesting_level -= 1;
3907
42
            }
3908
161
          }
3909
203
          i += 1;
3910
        }
3911
2
        return None; // didn't find matching paren
3912
93
      }
3913
3914
134
      fn is_name_inside_contents(function_name: &str, contents: &[ChildOfElement]) -> bool {
3915
304
        for &child in 
contents134
{
3916
304
          let child = as_element(child);
3917
          // debug!("is_name_inside_contents: child={}", mml_to_string(child));
3918
304
          if is_leaf(child) {
3919
261
            let text = as_text(child);
3920
261
            if (name(child) == "mi" || 
name(child) == "mtext"108
) &&
text == function_name163
{
3921
5
              return true;
3922
256
            }
3923
43
          } else if is_name_inside_contents(function_name, &child.children()) {
3924
4
            return true;
3925
39
          }
3926
        }
3927
125
        return false;
3928
134
      }
3929
93
    }
3930
32.0k
  }
3931
  
3932
5.79k
  fn is_mixed_fraction<'a>(&self, integer_part: Element<'a>, fraction_children: &[ChildOfElement<'a>]) -> Result<bool> {
3933
    // do some simple disqualifying checks on the fraction part
3934
5.79k
    if fraction_children.is_empty() {
3935
0
      return Ok( false );
3936
5.79k
    }
3937
5.79k
    let right_child = as_element(fraction_children[0]);
3938
5.79k
    let right_child_name = name(right_child);
3939
5.79k
    if ! (right_child_name == "mfrac" ||
3940
5.68k
       (right_child_name == "mrow" && 
right_child.children().len() == 3218
) ||
3941
5.48k
         (right_child_name == "mn" && 
fraction_children.len() >= 3138
) ) {
3942
5.46k
      return Ok( false );
3943
329
    };
3944
3945
329
    if !is_integer_part_ok(integer_part) {
3946
219
      return Ok( false );
3947
110
    }
3948
    
3949
110
    if right_child_name == "mfrac" {
3950
75
      return Ok( is_mfrac_ok(right_child) );
3951
35
    }
3952
3953
35
    return is_linear_fraction(self, fraction_children);
3954
3955
3956
351
    fn is_int(integer_part: Element) -> bool {
3957
351
      return name(integer_part) == "mn"  && 
!as_text(integer_part).contains(DECIMAL_SEPARATOR)185
;
3958
351
    }
3959
3960
329
    fn is_integer_part_ok(integer_part: Element) -> bool {
3961
      // integer part must be either 'n' or '-n' (in an mrow)
3962
329
      let integer_part_name = name(integer_part);
3963
329
      if integer_part_name == "mrow" {
3964
83
        let children = integer_part.children();
3965
83
        if children.len() == 2 &&
3966
16
           name(as_element(children[0])) == "mo" &&
3967
0
           as_text(as_element(children[0])) == "-" {
3968
0
          let integer_part = as_element(children[1]);
3969
0
          return is_int(integer_part);
3970
83
        }
3971
83
        return false;
3972
246
      };
3973
    
3974
246
      return is_int(integer_part);
3975
329
    }
3976
3977
75
    fn is_mfrac_ok(fraction_part: Element) -> bool {
3978
      // fraction_part needs to have integer numerator and denominator (already tested it is a frac)
3979
75
      let fraction_children = fraction_part.children();
3980
75
      if fraction_children.len() != 2 {
3981
0
        return false;
3982
75
      }
3983
75
      let numerator = as_element(fraction_children[0]);
3984
75
      if name(numerator) != "mn" || 
as_text(numerator)67
.
contains67
(DECIMAL_SEPARATOR) {
3985
8
        return false;
3986
67
      }
3987
67
      let denominator = as_element(fraction_children[1]);
3988
67
      return is_int(denominator);
3989
75
    }
3990
3991
66
    fn is_linear_fraction(canonicalize: &CanonicalizeContext, fraction_children: &[ChildOfElement]) -> Result<bool> {
3992
      // two possibilities
3993
      // 1. '3 / 4' is in an mrow
3994
      // 2. '3 / 4' are three separate elements
3995
66
      let first_child = as_element(fraction_children[0]);
3996
66
      if name(first_child) == "mrow" {
3997
31
        if first_child.children().len() != 3 {
3998
0
          return Ok( false );
3999
31
        }
4000
31
        return is_linear_fraction(canonicalize, &first_child.children())
4001
35
      }
4002
      
4003
      
4004
      // the length has been checked
4005
35
      assert!(fraction_children.len() >= 3);
4006
      
4007
35
      if !is_int(first_child) {
4008
30
        return Ok( false );
4009
5
      }
4010
5
      let slash_part = canonicalize.canonicalize_mrows(as_element(fraction_children[1]))
?0
;
4011
5
      if name(slash_part) == "mo" && as_text(slash_part) == "/" {
4012
3
        let denom = canonicalize.canonicalize_mrows(as_element(fraction_children[2]))
?0
;
4013
3
        return Ok( is_int(denom) );
4014
2
      }
4015
2
      return Ok( false );
4016
66
    }
4017
5.79k
  }
4018
4019
  /// implied comma when two numbers are adjacent and are in a script position
4020
5.72k
  fn is_implied_comma<'a>(&self, prev: Element<'a>, current: Element<'a>, mrow: Element<'a>) -> bool {
4021
5.72k
    if name(prev) != "mn" || 
name(current) != "mn"4.06k
{
4022
5.63k
      return false;
4023
95
    }
4024
4025
95
    assert_eq!(name(mrow), "mrow");
4026
95
    let container = get_parent(mrow);
4027
95
    let name = name(container);
4028
4029
    // test for script position is that it is not the base and hence has a preceding sibling
4030
95
    return (name == "msub" || 
name == "msubsup"14
||
name == "msup"14
) &&
!mrow.preceding_siblings().is_empty()81
;
4031
5.72k
  }
4032
4033
  /// implied separator when two capital letters are adjacent or two chemical elements
4034
5.64k
  fn is_implied_chemical_bond<'a>(&self, prev: Element<'a>, current: Element<'a>) -> bool {
4035
    // debug!("is_implied_chemical_bond: previous: {:?}", prev.preceding_siblings());
4036
    // debug!("is_implied_chemical_bond: following: {:?}", prev.following_siblings());
4037
5.64k
    if prev.attribute(MAYBE_CHEMISTRY).is_none() || 
current514
.attribute(MAYBE_CHEMISTRY).
is_none514
() {
4038
5.18k
      return false;
4039
462
    }
4040
    // ABC example where B and C are chemical elements is why we need to scan further than just checking B and C
4041
    // look for an mi/mtext with @MAYBE_CHEMISTRY until we get to something that can't have it
4042
626
    for child in 
prev462
.
preceding_siblings462
() {
4043
626
      if !is_valid_chemistry(as_element(child)) {
4044
11
        return false;
4045
615
      }
4046
    }
4047
851
    for child in 
current451
.
following_siblings451
() {
4048
851
      if !is_valid_chemistry(as_element(child)) {
4049
32
        return false;
4050
819
      }
4051
    }
4052
419
    return true;   // sequence of all MAYBE_CHEMISTRY
4053
4054
1.47k
    fn is_valid_chemistry(child: Element) -> bool {
4055
1.47k
      let child = get_possible_embellished_node(child);
4056
1.47k
      return child.attribute(MAYBE_CHEMISTRY).is_some() || (
name(child) != "mi"654
&&
name(child) != "mtext"614
);
4057
1.47k
    }
4058
5.64k
  }
4059
4060
  /// implied separator when two capital letters are adjacent or two chemical elements
4061
  /// also for adjacent omission chars
4062
5.22k
  fn is_implied_separator<'a>(&self, prev: Element<'a>, current: Element<'a>) -> bool {
4063
5.22k
    if name(prev) != "mi" || 
name(current) != "mi"516
{
4064
4.83k
      return false;
4065
390
    }
4066
4067
    // trim because whitespace might have gotten stuffed into the <mi>s
4068
390
    let prev_text = as_text(prev).trim();
4069
390
    let current_text = as_text(current).trim();
4070
390
    return prev_text.len() == 1 && 
current_text.len() == 1352
&&
4071
317
         ((is_cap(prev_text) && 
is_cap174
(
current_text174
)) ||
4072
151
          (prev_text=="_" && 
current_text=="_"0
));
4073
4074
4075
491
    fn is_cap(str: &str) -> bool {
4076
491
      assert_eq!(str.len(), 1);
4077
491
      return str.chars().next().unwrap().is_ascii_uppercase();
4078
491
    }
4079
5.22k
  }
4080
  
4081
42
  fn is_invisible_char_element(mathml: Element) -> bool {
4082
42
    if !is_leaf(mathml) {
4083
8
      return false
4084
34
    }
4085
34
    let text = as_text(mathml);
4086
34
    if text.len() != 3 {   // speed hack: invisible chars are three UTF-8 chars
4087
28
      return false;
4088
6
    } 
4089
6
    let ch = text.chars().next().unwrap();
4090
6
    return ('\u{2061}'..='\u{2064}').contains(&ch);
4091
42
  }
4092
4093
  // Add the current operator if it's not n-ary to the stack
4094
  // 'current_child' and it the operator to the stack.
4095
17.7k
  fn shift_stack<'s, 'a:'s, 'op:'a>(
4096
17.7k
        &self, parse_stack: &'s mut Vec<StackInfo<'a, 'op>>,
4097
17.7k
        current_child: Element<'a>, 
4098
17.7k
        current_op: OperatorPair<'op>) -> (Element<'a>, OperatorPair<'op>) {
4099
17.7k
    let mut new_current_child = current_child;
4100
17.7k
    let mut new_current_op = current_op.clone();
4101
17.7k
    let previous_op = top(parse_stack).op_pair.clone();
4102
    // debug!(" shift_stack: mrow len={}", top(parse_stack).mrow.children().len().to_string());
4103
    // debug!(" shift_stack: shift on '{}'; ops: prev '{}/{}', cur '{}/{}'",
4104
    //    element_summary(current_child),show_invisible_op_char(previous_op.ch), previous_op.op.priority,
4105
    //    show_invisible_op_char(current_op.ch), current_op.op.priority);
4106
17.7k
    if !current_op.op.is_nary(previous_op.op) {
4107
      // grab operand on top of stack (if there is one) and make it part of the new mrow since current op has higher precedence
4108
      // if operators are the same and are binary, then this push makes them act as left associative
4109
13.0k
      let mut top_of_stack = parse_stack.pop().unwrap();
4110
13.0k
      if top_of_stack.mrow.children().is_empty() || (
!top_of_stack.is_operand12.9k
&&
!current_op.op.is_right_fence()72
) {
4111
138
        // "bad" syntax - no operand on left -- don't grab operand (there is none)
4112
138
        //   just start a new mrow beginning with operator
4113
138
        // FIX -- check this shouldn't happen:  parse_stack.push(top_of_stack);
4114
138
        parse_stack.push( top_of_stack );   // put top back on
4115
138
        parse_stack.push( StackInfo::new(current_child.document()) );
4116
12.8k
      } else if current_op.op.is_right_fence() {
4117
        // likely, but not necessarily, there is a left fence to start the mrow
4118
        // this is like the postfix case except we grab the entire mrow, push on the close, and make that the mrow
4119
        // note:  the code does these operations on the stack for consistency, but it could be optimized without push/popping the stack
4120
1.96k
        let mrow = top_of_stack.mrow;
4121
1.96k
        top_of_stack.add_child_to_mrow(current_child, current_op);
4122
        // debug!("shift_stack: after adding right fence to mrow:\n{}", mml_to_string(mrow));
4123
1.96k
        new_current_op = OperatorPair::new();             // treat matched brackets as operand
4124
1.96k
        new_current_child = mrow;
4125
1.96k
        let children = mrow.children();
4126
1.96k
        let base_of_first_child = get_possible_embellished_node(as_element(children[0]));
4127
        // debug!("looking for left fence: len={}, {:#?}", children.len(), CanonicalizeContext::find_operator(Some(self), base_of_first_child, None, Some(as_element(children[0])), Some(mrow)));
4128
1.96k
        if children.len() == 2 &&
4129
64
           (name(base_of_first_child) != "mo" ||
4130
13
            !CanonicalizeContext::find_operator(Some(self), base_of_first_child, None,
4131
51
                            Some(
as_element13
(children[0])), Some(mrow)).is_left_fence()) {
4132
51
          // the mrow did *not* start with an open (hence no push)
4133
51
          // since parser really wants balanced parens to keep stack state right, we do a push here
4134
51
          parse_stack.push( StackInfo::new(mrow.document()) );
4135
51
        } else {
4136
          // the mrow started with some open fence (which caused a push) -- add the close, pop, and push on the "operand"
4137
1.91k
          new_current_child = self.potentially_lift_script(mrow)
4138
        }
4139
10.9k
      } else if current_op.op.is_postfix() {
4140
81
        // grab the left operand and start a new mrow with it and the operator -- put those back on the stack
4141
81
        // note:  the code does these operations on the stack for consistency, but it could be optimized without push/popping the stack
4142
81
        let previous_child = top_of_stack.remove_last_operand_from_mrow();         // remove operand from mrow
4143
81
        parse_stack.push(top_of_stack);
4144
81
        let mut new_top_of_stack = StackInfo::with_op(&current_child.document(), previous_child, current_op.clone()); // begin new mrow with operand
4145
81
        new_top_of_stack.add_child_to_mrow(current_child, current_op);  // add on operator
4146
81
        new_current_child = new_top_of_stack.mrow;                // grab for pushing on old mrow
4147
81
        new_current_op = OperatorPair::new();               // treat "reduced" postfix operator & operand as an operand
4148
81
        // debug!("shift_stack: after adding postfix to mrow has len: {}", new_current_child.children().len().to_string());
4149
10.8k
      } else {
4150
10.8k
        // normal infix op case -- grab the left operand and start a new mrow with it and the operator
4151
10.8k
        let previous_child = top_of_stack.remove_last_operand_from_mrow();
4152
10.8k
        parse_stack.push(top_of_stack);
4153
10.8k
        parse_stack.push( StackInfo::with_op(&current_child.document(),previous_child, current_op) );
4154
10.8k
      }
4155
4.73k
    }
4156
17.7k
    return (new_current_child, new_current_op);
4157
17.7k
  }
4158
  
4159
  
4160
25.2k
  fn reduce_stack<'s, 'a:'s, 'op:'a>(&self, parse_stack: &'s mut Vec<StackInfo<'a, 'op>>, current_priority: usize) {
4161
25.2k
    let mut prev_priority = top(parse_stack).priority();
4162
    // debug!(" reduce_stack: stack len={}, priority: prev={}, cur={}", parse_stack.len(), prev_priority, current_priority);
4163
37.2k
    while current_priority < prev_priority {          // pop off operators until we are back to the right level
4164
12.0k
      if parse_stack.len() == 1 {
4165
0
        break;     // something went wrong -- break before popping too much
4166
12.0k
      }
4167
12.0k
      prev_priority = self.reduce_stack_one_time(parse_stack);
4168
    };
4169
25.2k
  }
4170
4171
12.0k
  fn reduce_stack_one_time<'s, 'a:'s, 'op:'a>(&self, parse_stack: &'s mut Vec<StackInfo<'a, 'op>>) -> usize {
4172
12.0k
    let mut top_of_stack = parse_stack.pop().unwrap();
4173
    // debug!(" ..popped len={} op:'{}/{}', operand: {}",
4174
    //    top_of_stack.mrow.children().len(),
4175
    //    show_invisible_op_char(top_of_stack.op_pair.ch), top_of_stack.op_pair.op.priority,
4176
    //    top_of_stack.is_operand);
4177
12.0k
    let mut mrow = top_of_stack.mrow;
4178
12.0k
    if mrow.children().len() == 1 && 
CanonicalizeContext::is_ok_to_merge_mrow_child63
(
mrow63
) {
4179
63
      // should have added at least operator and operand, but input might not be well-formed
4180
63
      // in this case, unwrap the mrow and expose the single child for pushing onto stack
4181
63
      let single_child = top_of_stack.remove_last_operand_from_mrow();
4182
63
      mrow = single_child;
4183
11.9k
    }
4184
4185
12.0k
    let mut top_of_stack = parse_stack.pop().unwrap();
4186
12.0k
    top_of_stack.add_child_to_mrow(mrow, OperatorPair::new());  // mrow on top is "parsed" -- now add it to previous
4187
12.0k
    let prev_priority = top_of_stack.priority();
4188
12.0k
    parse_stack.push(top_of_stack);
4189
12.0k
    return prev_priority;
4190
12.0k
  }
4191
  
4192
5.06k
  fn is_trig_arg<'a, 'op:'a>(&self, previous_child: Element<'a>, current_child: Element<'a>, parse_stack: &mut Vec<StackInfo<'a, 'op>>) -> bool {
4193
    // We have operand-operand and know we want multiplication at this point. 
4194
    // Check for special case where we want multiplication to bind more tightly than function app (e.g, sin 2x, sin -2xy)
4195
    // We only want to do this for simple args
4196
    // debug!("  is_trig_arg: prev {}, current {}, Stack:", element_summary(previous_child), element_summary(current_child));
4197
    // parse_stack.iter().for_each(|stack_info| debug!("    {}", stack_info));
4198
5.06k
    if !IsNode::is_simple(current_child) {
4199
2.98k
      return false;
4200
2.07k
    }
4201
    // This only matters if we are not inside of parens
4202
2.07k
    if IsBracketed::is_bracketed(previous_child, "(", ")", false, false) ||
4203
2.01k
       IsBracketed::is_bracketed(previous_child, "[", "]", false, false) {
4204
63
      return false;
4205
2.01k
    }
4206
  
4207
    // Use lower priority multiplication if current_child is a function (e.g. "cos" in "sin x cos 3y")
4208
    // if !is_trig(current_child) {
4209
2.01k
    if self.is_function_name(current_child, None) == FunctionNameCertainty::True {
4210
1
      return false;
4211
2.01k
    }
4212
    // Three cases:
4213
    // 1. First operand-operand (e.g, sin 2x, where 'current_child' is 'x') -- top of stack is mrow('sin' f_apply '2')
4214
    // 2. Another First operand-operand (e.g, sin -2x, where 'current_child' is 'x') -- top of stack is mrow('-' '2'), next is mrow('sin', f_apply)
4215
    // 3. Subsequent operand-operand (e.g, sin 2xy, where 'current_child' is 'y') -- top of stack is mrow('2' 'times' 'x')
4216
    //    Note: IMPLIED_TIMES_HIGH_PRIORITY is only present if we have a trig function
4217
2.01k
    let op_on_top = &top(parse_stack).op_pair;
4218
2.01k
    if ptr_eq(op_on_top.op, *INVISIBLE_FUNCTION_APPLICATION) {
4219
8
      let function_element = as_element(top(parse_stack).mrow.children()[0]);
4220
8
      return is_trig(function_element);
4221
2.00k
    }
4222
2.00k
    if ptr_eq(op_on_top.op, *PREFIX_MINUS) {
4223
74
      if parse_stack.len() < 2 {
4224
0
        return false;
4225
74
      }
4226
74
      let next_stack_info = &parse_stack[parse_stack.len()-2];
4227
74
      if !ptr_eq(next_stack_info.op_pair.op, *INVISIBLE_FUNCTION_APPLICATION) {
4228
72
        return false;
4229
2
      }
4230
2
      let function_element = as_element(next_stack_info.mrow.children()[0]);
4231
2
      if is_trig(function_element) {
4232
        // want '- 2' to be an mrow; don't want '- 2 x ...' to be the mrow (IMPLIED_TIMES_HIGH_PRIORITY is an internal hack)
4233
1
        self.reduce_stack_one_time(parse_stack);
4234
1
        return true;
4235
1
      }
4236
1
      return false;
4237
1.92k
    }
4238
1.92k
    return ptr_eq(op_on_top.op, &IMPLIED_TIMES_HIGH_PRIORITY);
4239
4240
10
    fn is_trig(node: Element) -> bool {
4241
10
      let base_of_name = get_possible_embellished_node(node);
4242
  
4243
      // actually only 'mi' should be legal here, but some systems used 'mtext' for multi-char variables
4244
10
      let node_name = name(base_of_name);
4245
10
      if node_name != "mi" && 
node_name != "mtext"0
{
4246
0
        return false;
4247
10
      }
4248
      // whitespace is sometimes added to the mi since braille needs it, so do a trim here to get function name
4249
10
      let base_name = as_text(base_of_name).trim();
4250
10
      if base_name.is_empty() {
4251
0
        return false;
4252
10
      }
4253
10
      return crate::definitions::SPEECH_DEFINITIONS.with(|defs| {
4254
        // names that are always function names (e.g, "sin" and "log")
4255
10
        let defs = defs.borrow();
4256
10
        let names = defs.get_hashset("TrigFunctionNames").unwrap();
4257
        // UEB seems to think "Sin" (etc) is used for "sin", so we move to lower case
4258
10
        return names.contains(&base_name.to_ascii_lowercase());
4259
10
      });
4260
10
    }
4261
5.06k
  }
4262
  
4263
  
4264
  /*
4265
    canonicalize_mrows_in_mrow is a simple(ish) operator precedence parser.
4266
    It works by keeping a stack of 'StackInfo':
4267
    'StackInfo' has three parts:
4268
    1. the mrow being build
4269
    2. info about the operator in the mrow being build
4270
    3. bool to say whether the last thing is an operator or an operand
4271
  
4272
    When the op priority increases (eg, have "=" and get "+"), we push on
4273
    1. a new mrow -- if the operator has a left operand, we remove the last node in the mrow and it becomes
4274
       the first (only so far) child of the new mrow
4275
    2. the operator info
4276
  
4277
    When the op priority decreases, we do the following loop until the this new priority > priority on top of stack
4278
    1. pop the StackInfo
4279
    2. add the StackInfo's mrow  as the last child to the new top of the stack
4280
    We also do this when we hit the end of the mrow (we can treat this case as if we have a negative precedence)
4281
  
4282
    +/- are treated as nary operators and don't push/pop in those cases.
4283
    consecutive operands such as nary times are also considered n-ary operators and don't push/pop in those cases.
4284
  */
4285
7.48k
  fn canonicalize_mrows_in_mrow<'a>(&self, mrow: Element<'a>) -> Result<Element<'a>> {
4286
7.48k
    let is_ok_to_merge_child = mrow.children().len() != 1 || 
CanonicalizeContext::is_ok_to_merge_mrow_child56
(
mrow56
);
4287
7.48k
    let saved_mrow_attrs = mrow.attributes(); 
4288
7.48k
    assert_eq!(name(mrow), "mrow");
4289
  
4290
    // FIX: don't touch/canonicalize
4291
    // 1. if intent is given -- anything intent references
4292
    // 2. if the mrow starts or ends with a fence, don't merge into parent (parse children only) -- allows for "]a,b["
4293
7.48k
    let mut parse_stack = vec![StackInfo::new(mrow.document())];
4294
7.48k
    let mut children = mrow.children();
4295
7.48k
    let num_children = children.len();
4296
  
4297
36.7k
    for i_child in 
0..num_children7.48k
{
4298
      // debug!("\nDealing with child #{}: {}", i_child, mml_to_string(as_element(children[i_child])));
4299
36.7k
      let mut current_child = self.canonicalize_mrows(as_element(children[i_child]))
?0
;
4300
36.7k
      children[i_child] = ChildOfElement::Element( current_child );
4301
36.7k
      let base_of_child = get_possible_embellished_node(current_child);
4302
36.7k
      let acts_as_ch = current_child.attribute_value(ACT_AS_OPERATOR);
4303
36.7k
      let mut current_op = OperatorPair::new();
4304
      // figure what the current operator is -- it either comes from the 'mo' (if we have an 'mo') or it is implied
4305
36.7k
      if (name(base_of_child) == "mo" &&
4306
13.9k
          !( base_of_child.children().is_empty() || as_text(base_of_child) == "\u{00A0}" )) || // shouldn't have empty mo node, but...
4307
22.8k
         acts_as_ch.is_some() {
4308
13.9k
        let previous_op = if top(&parse_stack).is_operand {
None10.9k
} else {
Some( top(&parse_stack).op_pair.op )2.95k
};
4309
13.9k
        let next_node = if i_child + 1 < num_children {
Some(11.9k
as_element11.9k
(children[i_child+1]))} else {
None1.99k
};
4310
13.9k
        if let Some(
acts_as_ch20
) = acts_as_ch {
4311
20
          // ∇× (etc) hack, including ∇ being a vector (maybe eventually others)
4312
20
          let temp_mo = create_mathml_element(&current_child.document(), "mo");
4313
20
          temp_mo.set_text(acts_as_ch);
4314
20
          current_op = OperatorPair{
4315
20
            ch: acts_as_ch,
4316
20
            op: CanonicalizeContext::find_operator(Some(self), temp_mo, previous_op,
4317
20
                top(&parse_stack).last_child_in_mrow(), next_node)
4318
20
          };
4319
13.9k
        } else {
4320
13.9k
          current_op = OperatorPair{
4321
13.9k
            ch: as_text(base_of_child),
4322
13.9k
            op: CanonicalizeContext::find_operator(Some(self), base_of_child, previous_op,
4323
13.9k
                top(&parse_stack).last_child_in_mrow(), next_node)
4324
13.9k
          };
4325
13.9k
    
4326
13.9k
          // deal with vertical bars which might be infix, open, or close fences
4327
13.9k
          // note: mrow shrinks as we iterate through it (removing children from it)
4328
13.9k
          current_op.op = self.determine_vertical_bar_op(
4329
13.9k
            current_op.op,
4330
13.9k
            base_of_child,
4331
13.9k
            next_node,
4332
13.9k
            &mut parse_stack,
4333
13.9k
            self.n_vertical_bars_on_right(&children[i_child+1..], current_op.ch)
4334
13.9k
          );
4335
13.9k
        }
4336
      } else {
4337
22.8k
        let previous_child = top(&parse_stack).last_child_in_mrow();
4338
22.8k
        if let Some(
previous_child17.9k
) = previous_child {
4339
17.9k
          let base_of_previous_child = get_possible_embellished_node(previous_child);
4340
17.9k
          let acts_as_ch = previous_child.attribute_value(ACT_AS_OPERATOR);
4341
17.9k
          if name(base_of_previous_child) != "mo" && 
acts_as_ch6.57k
.
is_none6.57k
() {
4342
6.55k
            let likely_function_name = self.is_function_name(previous_child, Some(&children[i_child..]));
4343
6.55k
            if name(base_of_child) == "mtext" && 
as_text(base_of_child) == "\u{00A0}"184
{
4344
1
              base_of_child.set_attribute_value("data-function-likelihood", &(likely_function_name == FunctionNameCertainty::True).to_string());
4345
1
              base_of_child.remove_attribute("data-was-mo");
4346
1
              set_mathml_name(base_of_child, "mo");
4347
1
              let mut top_of_stack = parse_stack.pop().unwrap();
4348
1
              top_of_stack.add_child_to_mrow(current_child, OperatorPair{ ch: "\u{00A0}", op: *INVISIBLE_FUNCTION_APPLICATION});    // whitespace -- make part of mrow to keep out of parse
4349
1
              parse_stack.push(top_of_stack);
4350
1
              continue;
4351
6.55k
            }
4352
            // consecutive operands -- add an invisible operator as appropriate
4353
6.55k
            current_op = if likely_function_name == FunctionNameCertainty::True {
4354
753
                  OperatorPair{ ch: "\u{2061}", op: *INVISIBLE_FUNCTION_APPLICATION }
4355
5.79k
                } else if self.is_mixed_fraction(previous_child, &children[i_child..])
?0
{
4356
70
                  OperatorPair{ ch: "\u{2064}", op: *IMPLIED_INVISIBLE_PLUS }
4357
5.72k
                } else if self.is_implied_comma(previous_child, current_child, mrow) {
4358
81
                  OperatorPair{ch: "\u{2063}", op: *IMPLIED_INVISIBLE_COMMA }          
4359
5.64k
                } else if self.is_implied_chemical_bond(previous_child, current_child) {
4360
419
                  OperatorPair{ch: "\u{2063}", op: &IMPLIED_CHEMICAL_BOND }          
4361
5.22k
                } else if self.is_implied_separator(previous_child, current_child) {
4362
166
                  OperatorPair{ch: "\u{2063}", op: &IMPLIED_SEPARATOR_HIGH_PRIORITY }          
4363
5.06k
                } else if self.is_trig_arg(base_of_previous_child, base_of_child, &mut parse_stack) {
4364
9
                  OperatorPair{ch: "\u{2062}", op: &IMPLIED_TIMES_HIGH_PRIORITY }          
4365
                } else {
4366
5.05k
                  OperatorPair{ ch: "\u{2062}", op: *IMPLIED_TIMES }
4367
                };
4368
6.55k
            if let Some(
attr_val262
) = base_of_child.attribute_value(CHANGED_ATTR)
4369
262
              && attr_val == "data-was-mo" {
4370
0
                // it really should be an operator
4371
0
                base_of_child.remove_attribute(CHANGED_ATTR);
4372
0
                set_mathml_name(base_of_child, "mo");
4373
6.55k
              }
4374
6.55k
            if name(base_of_child) == "mo" {
4375
1
              current_op.ch = as_text(base_of_child);
4376
1
              // debug!("  Found whitespace op '{}'/{}", show_invisible_op_char(current_op.ch), current_op.op.priority);
4377
1
            } else {
4378
6.54k
              let implied_mo = create_mo(current_child.document(), current_op.ch, ADDED_ATTR_VALUE);
4379
6.54k
              if likely_function_name == FunctionNameCertainty::Maybe {
4380
33
                implied_mo.set_attribute_value("data-function-guess", "true");
4381
6.51k
              }
4382
              // debug!("  Found implicit op {}/{} [{:?}]", show_invisible_op_char(current_op.ch), current_op.op.priority, likely_function_name);
4383
6.54k
              self.reduce_stack(&mut parse_stack, current_op.op.priority);    
4384
6.54k
              let shift_result = self.shift_stack(&mut parse_stack, implied_mo, current_op.clone());
4385
              // ignore shift_result.0 which is just 'implied_mo'
4386
6.54k
              assert_eq!(implied_mo, shift_result.0);
4387
6.54k
              assert!( ptr_eq(current_op.op, shift_result.1.op) );
4388
6.54k
              let mut top_of_stack = parse_stack.pop().unwrap();
4389
6.54k
              top_of_stack.add_child_to_mrow(implied_mo, current_op);
4390
6.54k
              parse_stack.push(top_of_stack);
4391
6.54k
              current_op = OperatorPair::new(); 
4392
            }
4393
11.3k
          }
4394
4.88k
        }
4395
      }
4396
  
4397
36.7k
      if !ptr_eq(current_op.op, &ILLEGAL_OPERATOR_INFO) {
4398
13.9k
        if current_op.op.is_left_fence() || 
current_op.op12.0k
.
is_prefix12.0k
() {
4399
2.95k
          if top(&parse_stack).is_operand {
4400
            // will end up with duplicate operands -- need to choose operator associated with prev child
4401
            // we use the original input here because in this case, we need to look to the right of the ()s to deal with chemical states
4402
232
            let likely_function_name = self.is_function_name(as_element(children[i_child-1]), Some(&children[i_child..]));
4403
232
            let implied_operator = if likely_function_name== FunctionNameCertainty::True {
4404
98
                OperatorPair{ ch: "\u{2061}", op: *INVISIBLE_FUNCTION_APPLICATION }
4405
              } else {
4406
134
                OperatorPair{ ch: "\u{2062}", op: *IMPLIED_TIMES }
4407
              };
4408
            // debug!("  adding implied {}", if ptr_eq(implied_operator.op,*IMPLIED_TIMES) {"times"} else {"function apply"});
4409
  
4410
232
            let implied_mo = create_mo(current_child.document(), implied_operator.ch, ADDED_ATTR_VALUE);
4411
232
            if likely_function_name == FunctionNameCertainty::Maybe {
4412
55
              implied_mo.set_attribute_value("data-function-guess", "true");
4413
177
            }
4414
232
            self.reduce_stack(&mut parse_stack, implied_operator.op.priority);            let shift_result = self.shift_stack(&mut parse_stack, implied_mo, implied_operator.clone());
4415
            // ignore shift_result.0 which is just 'implied_mo'
4416
232
            assert_eq!(implied_mo, shift_result.0);
4417
232
            assert!( ptr_eq(implied_operator.op, shift_result.1.op) );
4418
232
            let mut top_of_stack = parse_stack.pop().unwrap();
4419
232
            top_of_stack.add_child_to_mrow(implied_mo, implied_operator);
4420
232
            parse_stack.push(top_of_stack);
4421
2.72k
          }
4422
          // starting a new mrow
4423
2.95k
          parse_stack.push( StackInfo::new(current_child.document()) );
4424
        } else {
4425
          // One of infix, postfix, or right fence -- all should have a left operand
4426
          // pop the stack if it is lower precedence (it forms an mrow)
4427
          
4428
          // hack to get linear mixed fractions to parse correctly
4429
10.9k
          if current_op.ch == "/" && 
top(&parse_stack).op_pair.ch == "\u{2064}"41
{
4430
2
              current_op.op = &IMPLIED_PLUS_SLASH_HIGH_PRIORITY;
4431
10.9k
          }
4432
10.9k
          self.reduce_stack(&mut parse_stack, current_op.op.priority);
4433
          // push new operator on stack (already handled n-ary case)
4434
10.9k
          let shift_result = self.shift_stack(&mut parse_stack, current_child, current_op);
4435
10.9k
          current_child = shift_result.0;
4436
10.9k
          current_op = shift_result.1;
4437
        }
4438
22.7k
      }
4439
36.7k
      let mut top_of_stack = parse_stack.pop().unwrap();
4440
36.7k
      top_of_stack.add_child_to_mrow(current_child, current_op);
4441
36.7k
      parse_stack.push(top_of_stack);
4442
    }
4443
  
4444
    // Reached the end -- force reduction of what's left on the stack
4445
7.48k
    self.reduce_stack(&mut parse_stack, LEFT_FENCEPOST.priority);
4446
  
4447
    // We essentially have 'terminator( mrow terminator)'
4448
    //   in other words, we have an extra mrow with one child due to the initial start -- remove it
4449
7.48k
    let mut top_of_stack = parse_stack.pop().unwrap();
4450
7.48k
    assert_eq!(parse_stack.len(), 0);
4451
  
4452
7.48k
    let mut parsed_mrow = top_of_stack.mrow;
4453
7.48k
    assert_eq!( name(top_of_stack.mrow), "mrow");
4454
7.48k
    if parsed_mrow.children().len() == 1 && is_ok_to_merge_child {
4455
7.46k
      parsed_mrow = top_of_stack.remove_last_operand_from_mrow();
4456
7.46k
      // was synthesized, but is really the original top level mrow
4457
7.46k
    
}15
4458
  
4459
7.48k
    parsed_mrow.remove_attribute(CHANGED_ATTR);
4460
7.48k
    return Ok( add_attrs(parsed_mrow, &saved_mrow_attrs) );
4461
7.48k
  }  
4462
}
4463
4464
// ---------------- useful utility functions --------------------
4465
102k
fn top<'s, 'a:'s, 'op:'a>(vec: &'s[StackInfo<'a, 'op>]) -> &'s StackInfo<'a, 'op> {
4466
102k
  return &vec[vec.len()-1];
4467
102k
}
4468
// Replace the attrs of 'mathml' with 'attrs' and keep the global attrs of 'mathml' (i.e, lift 'attrs' to 'mathml' for replacing children)
4469
10.0k
pub fn add_attrs<'a>(mathml: Element<'a>, attrs: &[Attribute]) -> Element<'a> {
4470
  static GLOBAL_ATTRS: phf::Set<&str> = phf_set! {
4471
    "class", "dir", "displaystyle", "id", "mathbackground", "mathcolor", "mathsize",
4472
    "mathvariant", "nonce", "scriptlevel", "style", "tabindex",
4473
    "intent", "arg",
4474
  };
4475
  
4476
  // debug!(   "Adding back {} attr(s) to {}", attrs.len(), name(mathml));
4477
  // remove non-global attrs
4478
10.0k
  for 
attr740
in mathml.attributes() {
4479
740
    let attr_name = attr.name().local_part();
4480
740
    if !( attr_name.starts_with("data-") || 
GLOBAL_ATTRS534
.
contains534
(
attr_name534
) ||
4481
278
          attr_name.starts_with("on") ) {     // allows too much - cheapo way to allow event handlers like "onchange"
4482
278
      mathml.remove_attribute(attr.name());
4483
462
    }
4484
  }
4485
4486
  // add in 'attrs'
4487
10.0k
  for 
attr5.22k
in attrs {
4488
5.22k
    mathml.set_attribute_value(attr.name(), attr.value());
4489
5.22k
  }
4490
10.0k
  return mathml;
4491
10.0k
}
4492
4493
4494
2.91M
pub fn name(node: Element<'_>) -> &str {
4495
2.91M
  return node.name().local_part();
4496
2.91M
}
4497
4498
/// The child of a non-leaf element must be an element
4499
// Note: can't use references as that results in 'returning use of local variable'
4500
1.14M
pub fn as_element(child: ChildOfElement) -> Element {
4501
1.14M
  return match child {
4502
1.14M
    ChildOfElement::Element(e) => e,
4503
    _ => {
4504
0
      panic!("as_element: internal error -- found non-element child (text? '{:?}')", child.text());
4505
    },
4506
  };
4507
1.14M
}
4508
4509
/// The child of a leaf element must be text (previously trimmed)
4510
/// Note: trim() combines all the Text children into a single string
4511
603k
pub fn as_text(leaf_child: Element<'_>) -> &str {
4512
603k
  assert!(is_leaf(leaf_child));
4513
603k
  let children = leaf_child.children();
4514
603k
  if children.is_empty() {
4515
401
    return "";
4516
602k
  }
4517
602k
  assert!(children.len() == 1);
4518
602k
  return match children[0] {
4519
602k
    ChildOfElement::Text(t) => t.text(),
4520
0
    _ => panic!("as_text: internal error -- found non-text child of leaf element"),
4521
  }
4522
603k
}
4523
4524
/// Returns the parent of the argument.
4525
/// Warning: this assumes the parent exists
4526
239k
pub fn get_parent(mathml: Element) -> Element {
4527
239k
  return mathml.parent().unwrap().element().unwrap();
4528
239k
}
4529
4530
#[allow(dead_code)] // for debugging
4531
0
pub fn element_summary(mathml: Element) -> String {
4532
0
  return format!("{}<{}>", name(mathml),
4533
0
                if is_leaf(mathml) {show_invisible_op_char(as_text(mathml)).to_string()}
4534
          else 
4535
0
                     {mathml.children().len().to_string()});
4536
0
}
4537
4538
6.86k
fn create_mo<'a, 'd:'a>(doc: Document<'d>, ch: &'a str, attr_value: &str) -> Element<'d> {
4539
6.86k
  let implied_mo = create_mathml_element(&doc, "mo");
4540
6.86k
  implied_mo.set_attribute_value(CHANGED_ATTR, attr_value);
4541
6.86k
  let mo_text = doc.create_text(ch);
4542
6.86k
  implied_mo.append_child(mo_text);
4543
6.86k
  return implied_mo;
4544
6.86k
}
4545
4546
/// return 'node' or if it is adorned, return its base (recursive)
4547
130k
pub fn get_possible_embellished_node(node: Element) -> Element {
4548
130k
  let mut node = node;
4549
138k
  while IsNode::is_modified(node) {
4550
8.33k
    node = as_element(node.children()[0]);
4551
8.33k
  }
4552
130k
  return node;
4553
130k
}    
4554
4555
#[allow(dead_code)] // for debugging with println
4556
0
fn show_invisible_op_char(ch: &str) -> &str {
4557
0
  return match ch.chars().next().unwrap() {
4558
0
    '\u{2061}' => "&#x2061;",
4559
0
    '\u{2062}' => "&#x2062;",
4560
0
    '\u{2063}' => "&#x2063;",
4561
0
    '\u{2064}' => "&#x2064;",
4562
0
    '\u{E000}' => "&#xE000;",
4563
0
    _        => ch
4564
  };
4565
0
}
4566
4567
4568
#[cfg(test)]
4569
mod canonicalize_tests {
4570
  use crate::errors::Result;
4571
  use crate::{are_strs_canonically_equal_result, are_strs_canonically_equal_with_locale};
4572
4573
#[allow(unused_imports)]
4574
  use super::super::init_logger;
4575
  use super::super::abs_rules_dir_path;
4576
    use super::*;
4577
    use sxd_document::parser;
4578
4579
4580
    #[test]
4581
1
    fn canonical_same() -> Result<()> {
4582
1
        let target_str = "<math><mrow><mo>-</mo><mi>a</mi></mrow></math>";
4583
1
        are_strs_canonically_equal_result(target_str, target_str, &[])
4584
1
    }
4585
4586
  #[test]
4587
1
    fn plane1_common() -> Result<()> {
4588
1
        let test_str = "<math>
4589
1
        <mi mathvariant='normal'>sin</mi> <mo>,</mo>    <!-- shouldn't change -->
4590
1
        <mi mathvariant='italic'>bB4</mi> <mo>,</mo>    <!-- shouldn't change -->
4591
1
        <mi mathvariant='bold'>a</mi> <mo>,</mo>      <!-- single char id tests -->
4592
1
        <mi mathvariant='bold'>Z</mi> <mo>,</mo>
4593
1
        <mn mathvariant='bold'>19=&#x1D7D7;</mn> <mo>,</mo> <!-- '=' and plane1 shouldn't change -->
4594
1
        <mn mathvariant='double-struck'>024689</mn> <mo>,</mo>  <!-- '=' and plane1 shouldn't change -->
4595
1
        <mi mathvariant='double-struck'>yzCHNPQRZ</mi> <mo>,</mo>
4596
1
        <mi mathvariant='fraktur'>0yACHIRZ</mi> <mo>,</mo>  <!-- 0 stays as ASCII -->
4597
1
        <mi mathvariant='bold-fraktur'>nC</mi> <mo>,</mo>
4598
1
        <mi mathvariant='script'>ABEFHILMRegow</mi> <mo>,</mo>
4599
1
        <msup>
4600
1
          <mi mathvariant='bold-script'>fG</mi>
4601
1
          <mo mathvariant='bold-script'>*</mo>        <!-- '*' shouldn't change -->
4602
1
        </msup>
4603
1
      </math>";
4604
1
        let target_str = "<math>
4605
1
      <mrow data-changed='added'>
4606
1
        <mi mathvariant='normal'>sin</mi>
4607
1
        <mo >,</mo>
4608
1
        <mi mathvariant='italic'>bB4</mi>
4609
1
        <mo>,</mo>
4610
1
        <mi mathvariant='bold'>𝐚</mi>
4611
1
        <mo>,</mo>
4612
1
        <mi mathvariant='bold'>𝐙</mi>
4613
1
        <mo>,</mo>
4614
1
        <mn mathvariant='bold'>𝟏𝟗=𝟗</mn>
4615
1
        <mo>,</mo>
4616
1
        <mn mathvariant='double-struck'>𝟘𝟚𝟜𝟞𝟠𝟡</mn>
4617
1
        <mo>,</mo>
4618
1
        <mi mathvariant='double-struck'>𝕪𝕫ℂℍℕℙℚℝℤ</mi>
4619
1
        <mo>,</mo>
4620
1
        <mi mathvariant='fraktur'>0𝔶𝔄ℭℌℑℜℨ</mi>
4621
1
        <mo>,</mo>
4622
1
        <mi mathvariant='bold-fraktur'>𝖓𝕮</mi>
4623
1
        <mo>,</mo>
4624
1
        <mi mathvariant='script'>𝒜ℬℰℱℋℐℒℳℛℯℊℴ𝓌</mi>
4625
1
        <mo>,</mo>
4626
1
        <msup>
4627
1
          <mi mathvariant='bold-script'>𝓯𝓖</mi>
4628
1
          <mo mathvariant='bold-script'>*</mo>        <!-- '*' shouldn't change -->
4629
1
        </msup>
4630
1
      </mrow>
4631
1
    </math>";
4632
1
    are_strs_canonically_equal_result(test_str, target_str, &[])
4633
1
  }
4634
  
4635
  #[test]
4636
1
    fn plane1_font_styles() -> Result<()> {
4637
1
        let test_str = "<math>
4638
1
        <mi mathvariant='sans-serif'>aA09=</mi> <mo>,</mo>      <!-- '=' shouldn't change -->
4639
1
        <mi mathvariant='bold-sans-serif'>zZ09</mi> <mo>,</mo>  
4640
1
        <mi mathvariant='sans-serif-italic'>azAZ09</mi> <mo>,</mo>  <!-- italic digits don't exist: revert to sans-serif -->
4641
1
        <mi mathvariant='sans-serif-bold-italic'>AZaz09</mi> <mo>,</mo> <!--  italic digits don't exist: revert to just bold -->
4642
1
        <mi mathvariant='monospace'>aA09</mi>
4643
1
      </math>";
4644
1
        let target_str = "<math>
4645
1
        <mrow data-changed='added'>
4646
1
          <mi mathvariant='sans-serif'>𝖺𝖠𝟢𝟫=</mi>
4647
1
          <mo>,</mo>
4648
1
          <mi mathvariant='bold-sans-serif'>𝘇𝗭𝟬𝟵</mi>
4649
1
          <mo>,</mo>
4650
1
          <mi mathvariant='sans-serif-italic'>𝘢𝘻𝘈𝘡𝟢𝟫</mi>
4651
1
          <mo>,</mo>
4652
1
          <mi mathvariant='sans-serif-bold-italic'>𝘼𝙕𝙖𝙯𝟬𝟵</mi>
4653
1
          <mo>,</mo>
4654
1
          <mi mathvariant='monospace'>𝚊𝙰𝟶𝟿</mi>
4655
1
        </mrow>
4656
1
      </math>";
4657
1
    are_strs_canonically_equal_result(test_str, target_str, &[])
4658
1
  }
4659
  
4660
  #[test]
4661
1
    fn plane1_greek() -> Result<()> {
4662
1
        let test_str = "<math>
4663
1
        <mi mathvariant='normal'>ΑΩαω∇∂ϵ=</mi> <mo>,</mo>    <!-- shouldn't change -->
4664
1
        <mi mathvariant='italic'>ϴΑΩαω∇∂ϵ</mi> <mo>,</mo>
4665
1
        <mi mathvariant='bold'>ΑΩαωϝϜ</mi> <mo>,</mo> 
4666
1
        <mi mathvariant='double-struck'>Σβ∇</mi> <mo>,</mo>   <!-- shouldn't change -->
4667
1
        <mi mathvariant='fraktur'>ΞΦλϱ</mi> <mo>,</mo>      <!-- shouldn't change -->
4668
1
        <mi mathvariant='bold-fraktur'>ψΓ</mi> <mo>,</mo>   <!-- map to bold -->
4669
1
        <mi mathvariant='script'>μΨ</mi> <mo>,</mo>       <!-- shouldn't change -->
4670
1
        <mi mathvariant='bold-script'>Σπ</mi>         <!-- map to bold -->
4671
1
      </math>";
4672
1
        let target_str = "<math>
4673
1
        <mrow data-changed='added'>
4674
1
          <mi mathvariant='normal'>ΑΩαω∇∂ϵ=</mi>
4675
1
          <mo>,</mo>
4676
1
          <mi mathvariant='italic'>𝛳𝛢𝛺𝛼𝜔𝛻𝜕𝜖</mi>
4677
1
          <mo>,</mo>
4678
1
          <mi mathvariant='bold'>𝚨𝛀𝛂𝛚𝟋𝟊</mi>
4679
1
          <mo>,</mo>
4680
1
          <mi mathvariant='double-struck'>Σβ∇</mi>
4681
1
          <mo>,</mo>
4682
1
          <mi mathvariant='fraktur'>ΞΦλϱ</mi>
4683
1
          <mo>,</mo>
4684
1
          <mi mathvariant='bold-fraktur'>𝛙𝚪</mi>
4685
1
          <mo>,</mo>
4686
1
          <mi mathvariant='script'>μΨ</mi>
4687
1
          <mo>,</mo>
4688
1
          <mi mathvariant='bold-script'>𝚺𝛑</mi>
4689
1
        </mrow>
4690
1
      </math>";
4691
1
    are_strs_canonically_equal_result(test_str, target_str, &[])
4692
1
  }
4693
  
4694
  #[test]
4695
1
    fn plane1_greek_font_styles() -> Result<()> {
4696
1
        let test_str = "<math>
4697
1
        <mi mathvariant='sans-serif'>ΑΩαω∇∂ϵ=</mi> <mo>,</mo>      <!-- '=' shouldn't change -->
4698
1
        <mi mathvariant='bold-sans-serif'>ϴ0ΑΩαω∇∂ϵ</mi> <mo>,</mo> 
4699
1
        <mi mathvariant='sans-serif-italic'>aΑΩαω∇∂ϵ</mi> <mo>,</mo> <!-- italic digits don't exist: revert to sans-serif -->
4700
1
        <mi mathvariant='sans-serif-bold-italic'>ZΑΩαωϰϕϱϖ</mi> <mo>,</mo>  <!--  italic digits don't exist: revert to just bold -->
4701
1
        <mi mathvariant='monospace'>zΑΩαω∇∂</mi>
4702
1
      </math>";
4703
1
        let target_str = "<math>
4704
1
        <mrow data-changed='added'>
4705
1
          <mi mathvariant='sans-serif'>ΑΩαω∇∂ϵ=</mi>
4706
1
          <mo>,</mo>
4707
1
          <mi mathvariant='bold-sans-serif'>𝝧𝟬𝝖𝝮𝝰𝞈𝝯𝞉𝞊</mi>
4708
1
          <mo>,</mo>
4709
1
          <mi mathvariant='sans-serif-italic'>𝘢ΑΩαω∇∂ϵ</mi>
4710
1
          <mo>,</mo>
4711
1
          <mi mathvariant='sans-serif-bold-italic'>𝙕𝞐𝞨𝞪𝟂𝟆𝟇𝟈𝟉</mi>
4712
1
          <mo>,</mo>
4713
1
          <mi mathvariant='monospace'>𝚣ΑΩαω∇∂</mi>
4714
1
        </mrow>
4715
1
      </math>";
4716
1
    are_strs_canonically_equal_result(test_str, target_str, &[])
4717
1
  }
4718
4719
    #[test]
4720
1
    fn short_and_long_dash() -> Result<()> {
4721
1
        let test_str = "<math><mi>x</mi> <mo>=</mo> <mi>--</mi><mo>+</mo><mtext>----</mtext></math>";
4722
1
        let target_str = "<math>
4723
1
      <mrow data-changed='added'>
4724
1
      <mi>x</mi>
4725
1
      <mo>=</mo>
4726
1
      <mrow data-changed='added'>
4727
1
        <mi>—</mi>
4728
1
        <mo>+</mo>
4729
1
        <mtext>―</mtext>
4730
1
      </mrow>
4731
1
      </mrow>
4732
1
    </math>";
4733
1
    are_strs_canonically_equal_result(test_str, target_str, &[])
4734
1
    }
4735
4736
    #[test]
4737
1
    fn illegal_mathml_element() {
4738
    use crate::interface::*;
4739
1
        let test_str = "<math><foo><mi>f</mi></foo></math>";
4740
1
        let package1 = &parser::parse(test_str).expect("Failed to parse test input");
4741
1
    let mathml = get_element(package1);
4742
1
    trim_element(mathml, false);
4743
1
    assert!(canonicalize(mathml).is_err());
4744
1
    }
4745
4746
    #[test]
4747
1
    fn illegal_mtd_element() {
4748
    use crate::interface::*;
4749
1
        let test_str = "<math>
4750
1
      <mtable>
4751
1
        <mtr>
4752
1
          <mtd>
4753
1
          <mtext></mtext>
4754
1
          </mtd>
4755
1
          <mrow>
4756
1
          <mi>E</mi>
4757
1
          <mo>=</mo>
4758
1
          <mrow>
4759
1
          <mtd>
4760
1
            <mi>m</mi>
4761
1
            <mo>⁢<!--INVISIBLE TIMES--></mo>
4762
1
            <msup>
4763
1
            <mi>c</mi>
4764
1
            <mn>2</mn>
4765
1
            </msup>
4766
1
            </mtd></mrow>
4767
1
          </mrow>
4768
1
          
4769
1
        </mtr>
4770
1
      </mtable>
4771
1
    </math>";
4772
1
        let package1 = &parser::parse(test_str).expect("Failed to parse test input");
4773
1
    let mathml = get_element(package1);
4774
1
    trim_element(mathml, false);
4775
1
    assert!(canonicalize(mathml).is_err());
4776
1
    }
4777
4778
4779
    #[test]
4780
1
    fn a_to_mrow() -> Result<()> {
4781
1
        let test_str = "<math>
4782
1
      <a href='https://www.example.com'>
4783
1
        <mo>(</mo>
4784
1
        <a href='#its_relative'>
4785
1
          <mi>x</mi>
4786
1
          <mo>,</mo>
4787
1
          <mi>y</mi>
4788
1
        </a>
4789
1
        <mo>)</mo>
4790
1
      </a>
4791
1
      </math>
4792
1
";
4793
1
        let target_str = " <math>
4794
1
      <mrow href='https://www.example.com'>
4795
1
        <mo>(</mo>
4796
1
        <mrow href='#its_relative'>
4797
1
        <mi>x</mi>
4798
1
        <mo>,</mo>
4799
1
        <mi>y</mi>
4800
1
        </mrow>
4801
1
        <mo>)</mo>
4802
1
      </mrow>
4803
1
    </math>";
4804
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
4805
1
    }
4806
4807
    #[test]
4808
1
    fn mfenced_no_children() -> Result<()> {
4809
1
        let test_str = "<math><mi>f</mi><mfenced><mrow/></mfenced></math>";
4810
1
        let target_str = "<math>
4811
1
      <mrow data-changed='added'>
4812
1
        <mi>f</mi>
4813
1
        <mo data-changed='added'>&#x2061;</mo>
4814
1
        <mrow>
4815
1
          <mo data-changed='from_mfenced'>(</mo>
4816
1
          <mo data-changed='from_mfenced'>)</mo>
4817
1
        </mrow>
4818
1
      </mrow>
4819
1
    </math>";
4820
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
4821
1
    }
4822
4823
    #[test]
4824
1
    fn mfenced_one_child() -> Result<()> {
4825
1
        let test_str = "<math><mi>f</mi><mfenced open='[' close=']'><mi>x</mi></mfenced></math>";
4826
1
        let target_str = " <math>
4827
1
      <mrow data-changed='added'>
4828
1
      <mi>f</mi>
4829
1
      <mo data-changed='added'>&#x2061;</mo>
4830
1
      <mrow>
4831
1
        <mo data-changed='from_mfenced'>[</mo>
4832
1
        <mi>x</mi>
4833
1
        <mo data-changed='from_mfenced'>]</mo>
4834
1
      </mrow>
4835
1
      </mrow>
4836
1
    </math>";
4837
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
4838
1
    }
4839
4840
    #[test]
4841
1
    fn mfenced_no_attrs() -> Result<()> {
4842
1
        let test_str = "<math><mi>f</mi><mfenced><mrow><mi>x</mi><mo>,</mo><mi>y</mi><mo>,</mo><mi>z</mi></mrow></mfenced></math>";
4843
1
        let target_str = " <math>
4844
1
      <mrow data-changed='added'>
4845
1
      <mi>f</mi>
4846
1
      <mo data-changed='added'>&#x2061;</mo>
4847
1
      <mrow>
4848
1
        <mo data-changed='from_mfenced'>(</mo>
4849
1
        <mrow>
4850
1
        <mi>x</mi>
4851
1
        <mo>,</mo>
4852
1
        <mi>y</mi>
4853
1
        <mo>,</mo>
4854
1
        <mi>z</mi>
4855
1
        </mrow>
4856
1
        <mo data-changed='from_mfenced'>)</mo>
4857
1
      </mrow>
4858
1
      </mrow>
4859
1
    </math>";
4860
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
4861
1
    }
4862
4863
    #[test]
4864
1
    fn mfenced_with_separators() -> Result<()> {
4865
1
        let test_str = "<math><mi>f</mi><mfenced separators=',;'><mi>x</mi><mi>y</mi><mi>z</mi><mi>a</mi></mfenced></math>";
4866
1
        let target_str = "<math>
4867
1
      <mrow data-changed='added'>
4868
1
      <mi>f</mi>
4869
1
      <mo data-changed='added'>&#x2061;</mo>
4870
1
      <mrow>
4871
1
        <mo data-changed='from_mfenced'>(</mo>
4872
1
        <mrow data-changed='added'>
4873
1
        <mrow data-changed='added'>
4874
1
          <mi>x</mi>
4875
1
          <mo data-changed='from_mfenced'>,</mo>
4876
1
          <mi>y</mi>
4877
1
        </mrow>
4878
1
        <mo data-changed='from_mfenced'>;</mo>
4879
1
        <mrow data-changed='added'>
4880
1
          <mi>z</mi>
4881
1
          <mo data-changed='from_mfenced'>,</mo>
4882
1
          <mi>a</mi>
4883
1
        </mrow>
4884
1
        </mrow>
4885
1
        <mo data-changed='from_mfenced'>)</mo>
4886
1
      </mrow>
4887
1
      </mrow>
4888
1
    </math>";
4889
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
4890
1
    }
4891
4892
    #[test]
4893
1
    fn canonical_one_element_mrow_around_mrow() -> Result<()> {
4894
1
        let test_str = "<math><mrow><mrow><mo>-</mo><mi>a</mi></mrow></mrow></math>";
4895
1
        let target_str = "<math><mrow><mo>-</mo><mi>a</mi></mrow></math>";
4896
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
4897
1
    }
4898
4899
    #[test]
4900
1
    fn canonical_mtext_in_mtd_477() -> Result<()> {
4901
    // make sure mtext doesn't go away
4902
1
        let test_str = r#"<math>
4903
1
      <mtable>
4904
1
        <mtr>
4905
1
          <mtd>
4906
1
            <mstyle scriptlevel="0">
4907
1
              <mspace width="2em"/>
4908
1
            </mstyle>
4909
1
            <mstyle scriptlevel="0">
4910
1
              <mspace width="1em"/>
4911
1
            </mstyle>
4912
1
          </mtd>
4913
1
        </mtr>
4914
1
      </mtable>
4915
1
    </math>"#;
4916
1
        let target_str = r#"   <math>
4917
1
      <mtable>
4918
1
        <mtr>
4919
1
        <mtd>
4920
1
          <mtext data-width='1' data-following-space-width='4' scriptlevel='0' data-changed='added'> </mtext>
4921
1
        </mtd>
4922
1
        </mtr>
4923
1
      </mtable>
4924
1
    </math>"#;
4925
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
4926
1
    }
4927
4928
    #[test]
4929
1
    fn canonical_mtext_in_mtr() -> Result<()> {
4930
    // make sure mtext doesn't go away
4931
1
        let test_str = "<math> <mtable> <mtr> <mtext> </mtext> </mtr> <mtr> <mtext> </mtext> </mtr> </mtable> </math>";
4932
1
        let target_str = "   <math>
4933
1
      <mtable>
4934
1
        <mtr>
4935
1
          <mtext data-changed='empty_content' data-width='0' data-empty-in-2D='true'> </mtext>
4936
1
        </mtr>
4937
1
        <mtr>
4938
1
          <mtext data-changed='empty_content' data-width='0' data-empty-in-2D='true'> </mtext>
4939
1
        </mtr>
4940
1
      </mtable>
4941
1
    </math>";
4942
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
4943
1
    }
4944
4945
    #[test]
4946
1
    fn canonical_mtext_in_mtable() -> Result<()> {
4947
    // make sure mtext doesn't go away
4948
1
        let test_str = r"<math> <mtable> <mtr> <mtd> <mi>L</mi> </mtd> <mtd> <mrow> <mi>&lt;mi/&gt;</mi> <mo>=</mo> 
4949
1
            <mrow> <mo>[</mo> <mtable> <mtext> </mtext> </mtable> <mo>]</mo> </mrow> </mrow> </mtd> </mtr> </mtable> </math>";
4950
1
        let target_str = r"<math>
4951
1
      <mtable>
4952
1
      <mtr>
4953
1
        <mtd>
4954
1
        <mi>L</mi>
4955
1
        </mtd>
4956
1
        <mtd>
4957
1
        <mrow>
4958
1
          <mi>&lt;mi/&gt;</mi>
4959
1
          <mo>=</mo>
4960
1
          <mrow>
4961
1
          <mo>[</mo>
4962
1
          <mtable>
4963
1
            <mtext data-changed='empty_content' data-width='0' data-empty-in-2D='true'> </mtext>
4964
1
          </mtable>
4965
1
          <mo>]</mo>
4966
1
          </mrow>
4967
1
        </mrow>
4968
1
        </mtd>
4969
1
      </mtr>
4970
1
      </mtable>
4971
1
    </math>";
4972
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
4973
1
    }
4974
4975
    #[test]
4976
1
    fn mrow_with_intent_and_single_child() -> Result<()> {
4977
    use crate::interface::*;
4978
    use sxd_document::parser;
4979
    use crate::canonicalize::canonicalize;
4980
    // this forces initialization
4981
1
    crate::interface::set_rules_dir(abs_rules_dir_path()).unwrap();
4982
1
    crate::speech::SPEECH_RULES.with(|_| true);
4983
4984
    // we don't want to remove the mrow because the intent on the mi would reference itself
4985
1
        let test = "<math><mrow intent='log($x)'><mi arg='x'>X</mi></mrow></math>"; 
4986
4987
1
    let package1 = &parser::parse(test).expect("Failed to parse test input");
4988
1
    let mathml = get_element(package1);
4989
1
    trim_element(mathml, false);
4990
1
    let mathml_test = canonicalize(mathml).unwrap();
4991
1
    let first_child = as_element( mathml_test.children()[0] );
4992
1
    assert_eq!(name(first_child), "mrow");
4993
1
    assert_eq!(first_child.children().len(), 1);
4994
1
    let mi = as_element(first_child.children()[0]);
4995
1
    assert_eq!(name(mi), "mi");
4996
1
    Ok(())
4997
1
    }
4998
4999
    #[test]
5000
1
    fn empty_mrow_with_intent() -> Result<()> {
5001
    // we don't want to remove the mrow because the intent on the mi would reference itself
5002
    use crate::interface::*;
5003
    use sxd_document::parser;
5004
    use crate::canonicalize::canonicalize;
5005
    // this forces initialization
5006
1
    crate::interface::set_rules_dir(abs_rules_dir_path()).unwrap();
5007
1
    crate::speech::SPEECH_RULES.with(|_| true);
5008
5009
    // we don't want to remove the mrow because the intent needs to stick around
5010
1
        let test = "<math><mrow intent='log(x)'/></math>";
5011
5012
1
    let package1 = &parser::parse(test).expect("Failed to parse test input");
5013
1
    let mathml = get_element(package1);
5014
1
    trim_element(mathml, false);
5015
1
    let mathml_test = canonicalize(mathml).unwrap();
5016
1
    let first_child = as_element( mathml_test.children()[0] );
5017
1
    assert_eq!(name(first_child), "mrow");
5018
1
    assert_eq!(first_child.children().len(), 1);
5019
1
    let mtext = as_element(first_child.children()[0]);
5020
1
    assert_eq!(name(mtext), "mtext");
5021
1
    Ok(())
5022
1
    }
5023
5024
    #[test]
5025
1
    fn mn_with_negative_sign() -> Result<()> {
5026
1
        let test_str = "<math><mfrac>
5027
1
        <mrow><mn>-1</mn></mrow>
5028
1
        <mn>−987</mn>
5029
1
        </mfrac></math>";
5030
1
        let target_str = "<math><mfrac>
5031
1
      <mrow data-changed='added'><mo>-</mo><mn>1</mn></mrow>
5032
1
      <mrow data-changed='added'><mo>-</mo><mn>987</mn></mrow>
5033
1
      </mfrac></math>";
5034
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5035
1
    }
5036
5037
    #[test]
5038
1
    fn mn_with_degree_sign() -> Result<()> {
5039
1
        let test_str = "<math> <mrow> <mi>cos</mi> <mo>⁡</mo> <mrow> <mo>(</mo> <mn>150°</mn> <mo>)</mo> </mrow> </mrow> </math>";
5040
1
        let target_str = "<math>
5041
1
      <mrow>
5042
1
        <mi>cos</mi> <mo>&#x2061;</mo>
5043
1
        <mrow>
5044
1
          <mo>(</mo>
5045
1
          <msup data-changed='added'> <mn>150</mn> <mo>°</mo> </msup>
5046
1
          <mo>)</mo>
5047
1
        </mrow>
5048
1
      </mrow>
5049
1
    </math>";
5050
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5051
1
    }
5052
5053
    #[test]
5054
1
    fn canonical_one_element_mrow_around_mo() -> Result<()> {
5055
1
        let test_str = "<math><mrow><mrow><mo>-</mo></mrow><mi>a</mi></mrow></math>";
5056
1
        let target_str = "<math><mrow><mo>-</mo><mi>a</mi></mrow></math>";
5057
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5058
1
    }
5059
5060
    #[test]
5061
1
    fn canonical_flat_to_times_and_plus() -> Result<()> {
5062
1
        let test_str = "<math><mi>c</mi><mo>+</mo><mi>x</mi><mi>y</mi></math>";
5063
1
        let target_str = "<math>
5064
1
    <mrow data-changed='added'><mi>c</mi><mo>+</mo>
5065
1
      <mrow data-changed='added'><mi>x</mi><mo data-changed='added'>&#x2062;</mo><mi>y</mi></mrow>
5066
1
    </mrow></math>";
5067
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5068
1
    }
5069
5070
    #[test]
5071
1
    fn canonical_prefix_and_infix() -> Result<()> {
5072
1
        let test_str = "<math><mrow><mo>-</mo><mi>a</mi><mo>-</mo><mi>b</mi></mrow></math>";
5073
1
        let target_str = "<math>
5074
1
    <mrow>
5075
1
      <mrow data-changed='added'>
5076
1
      <mo>-</mo>
5077
1
      <mi>a</mi>
5078
1
      </mrow>
5079
1
      <mo>-</mo>
5080
1
      <mi>b</mi>
5081
1
    </mrow>
5082
1
     </math>";
5083
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5084
1
    }
5085
5086
5087
    #[test]
5088
1
    fn canonical_prefix_implied_times_prefix() -> Result<()> {
5089
1
        let test_str = "<math><mrow><mo>∂</mo><mi>x</mi><mo>∂</mo><mi>y</mi></mrow></math>";
5090
1
        let target_str = "<math>
5091
1
      <mrow>
5092
1
      <mrow data-changed='added'><mo>∂</mo><mi>x</mi></mrow>
5093
1
      <mo data-changed='added'>&#x2062;</mo>
5094
1
      <mrow data-changed='added'><mo>∂</mo><mi>y</mi></mrow>
5095
1
      </mrow>
5096
1
    </math>";
5097
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5098
1
    }
5099
5100
    #[test]
5101
1
    fn function_with_single_arg() -> Result<()> {
5102
1
        let test_str = "<math><mrow>
5103
1
      <mi>sin</mi><mo>(</mo><mi>x</mi><mo>)</mo>
5104
1
      <mo>+</mo>
5105
1
      <mi>f</mi><mo>(</mo><mi>x</mi><mo>)</mo>
5106
1
      <mo>+</mo>
5107
1
      <mi>t</mi><mrow><mo>(</mo><mi>x</mi><mo>)</mo></mrow>
5108
1
    </mrow></math>";
5109
1
        let target_str = "<math>
5110
1
    <mrow>
5111
1
      <mrow data-changed='added'>
5112
1
      <mi>sin</mi>
5113
1
      <mo data-changed='added'>&#x2061;</mo>
5114
1
      <mrow data-changed='added'>
5115
1
        <mo>(</mo>
5116
1
        <mi>x</mi>
5117
1
        <mo>)</mo>
5118
1
      </mrow>
5119
1
      </mrow>
5120
1
      <mo>+</mo>
5121
1
      <mrow data-changed='added'>
5122
1
      <mi>f</mi>
5123
1
      <mo data-changed='added'>&#x2061;</mo>
5124
1
      <mrow data-changed='added'>
5125
1
        <mo>(</mo>
5126
1
        <mi>x</mi>
5127
1
        <mo>)</mo>
5128
1
      </mrow>
5129
1
      </mrow>
5130
1
      <mo>+</mo>
5131
1
      <mrow data-changed='added'>
5132
1
      <mi>t</mi>
5133
1
      <mo data-changed='added'>&#x2061;</mo>
5134
1
      <mrow>
5135
1
        <mo>(</mo>
5136
1
        <mi>x</mi>
5137
1
        <mo>)</mo>
5138
1
      </mrow>
5139
1
      </mrow>
5140
1
    </mrow>
5141
1
     </math>";
5142
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5143
1
    }
5144
5145
  #[test]
5146
1
  fn maybe_function() -> Result<()> {
5147
1
    let test_str = "<math>
5148
1
        <mrow>
5149
1
          <mi>P</mi>
5150
1
          <mo>(</mo>
5151
1
          <mi>A</mi>
5152
1
          <mo>∩</mo>
5153
1
          <mi>B</mi>
5154
1
          <mo>)</mo>
5155
1
        </mrow>
5156
1
      </math>";
5157
1
    let target_str = "<math>
5158
1
        <mrow>
5159
1
        <mi>P</mi>
5160
1
        <mo data-function-guess='true' data-changed='added'>&#x2062;</mo>
5161
1
        <mrow data-changed='added'>
5162
1
          <mo>(</mo>
5163
1
          <mrow data-changed='added'>
5164
1
          <mi>A</mi>
5165
1
          <mo>∩</mo>
5166
1
          <mi>B</mi>
5167
1
          </mrow>
5168
1
          <mo>)</mo>
5169
1
        </mrow>
5170
1
        </mrow>
5171
1
      </math>";
5172
1
    are_strs_canonically_equal_result(test_str, target_str, &[])
5173
1
  }
5174
5175
    #[test]
5176
1
    fn function_with_multiple_args() -> Result<()> {
5177
1
        let test_str = "<math>
5178
1
    <mi>sin</mi><mo>(</mo><mi>x</mi><mo>+</mo><mi>y</mi><mo>)</mo>
5179
1
      <mo>+</mo>
5180
1
     <mi>f</mi><mo>(</mo><mi>x</mi><mo>+</mo><mi>y</mi><mo>)</mo>
5181
1
      <mo>+</mo>
5182
1
     <mi>t</mi><mo>(</mo><mi>x</mi><mo>+</mo><mi>y</mi><mo>)</mo>
5183
1
      <mo>+</mo>
5184
1
     <mi>w</mi><mo>(</mo><mi>x</mi><mo>,</mo><mi>y</mi><mo>)</mo>
5185
1
    </math>";
5186
1
        let target_str = " <math>
5187
1
    <mrow data-changed='added'>
5188
1
    <mrow data-changed='added'>
5189
1
      <mi>sin</mi>
5190
1
      <mo data-changed='added'>&#x2061;</mo>
5191
1
      <mrow data-changed='added'>
5192
1
      <mo>(</mo>
5193
1
      <mrow data-changed='added'>
5194
1
        <mi>x</mi>
5195
1
        <mo>+</mo>
5196
1
        <mi>y</mi>
5197
1
      </mrow>
5198
1
      <mo>)</mo>
5199
1
      </mrow>
5200
1
    </mrow>
5201
1
    <mo>+</mo>
5202
1
    <mrow data-changed='added'>
5203
1
      <mi>f</mi>
5204
1
      <mo data-changed='added'>&#x2061;</mo>
5205
1
      <mrow data-changed='added'>
5206
1
      <mo>(</mo>
5207
1
      <mrow data-changed='added'>
5208
1
        <mi>x</mi>
5209
1
        <mo>+</mo>
5210
1
        <mi>y</mi>
5211
1
      </mrow>
5212
1
      <mo>)</mo>
5213
1
      </mrow>
5214
1
    </mrow>
5215
1
    <mo>+</mo>
5216
1
    <mrow data-changed='added'>
5217
1
      <mi>t</mi>
5218
1
      <mo data-changed='added' data-function-guess='true'>&#x2062;</mo>
5219
1
      <mrow data-changed='added'>
5220
1
      <mo>(</mo>
5221
1
      <mrow data-changed='added'>
5222
1
        <mi>x</mi>
5223
1
        <mo>+</mo>
5224
1
        <mi>y</mi>
5225
1
      </mrow>
5226
1
      <mo>)</mo>
5227
1
      </mrow>
5228
1
    </mrow>
5229
1
    <mo>+</mo>
5230
1
    <mrow data-changed='added'>
5231
1
      <mi>w</mi>
5232
1
      <mo data-changed='added'>&#x2061;</mo>
5233
1
      <mrow data-changed='added'>
5234
1
      <mo>(</mo>
5235
1
      <mrow data-changed='added'>
5236
1
        <mi>x</mi>
5237
1
        <mo>,</mo>
5238
1
        <mi>y</mi>
5239
1
      </mrow>
5240
1
      <mo>)</mo>
5241
1
      </mrow>
5242
1
    </mrow>
5243
1
    </mrow>
5244
1
      </math>";
5245
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5246
1
    }
5247
5248
    #[test]
5249
1
    fn function_with_no_args() -> Result<()> {
5250
1
        let test_str = "<math><mrow>
5251
1
    <mi>sin</mi><mi>x</mi>
5252
1
      <mo>+</mo>
5253
1
     <mi>f</mi><mi>x</mi>
5254
1
      <mo>+</mo>
5255
1
     <mi>t</mi><mi>x</mi>
5256
1
    </mrow></math>";
5257
1
        let target_str = " <math>
5258
1
    <mrow>
5259
1
      <mrow data-changed='added'>
5260
1
      <mi>sin</mi>
5261
1
      <mo data-changed='added'>&#x2061;</mo>
5262
1
      <mi>x</mi>
5263
1
      </mrow>
5264
1
      <mo>+</mo>
5265
1
      <mrow data-changed='added'>
5266
1
      <mi>f</mi>
5267
1
      <mo data-changed='added'>&#x2062;</mo>
5268
1
      <mi>x</mi>
5269
1
      </mrow>
5270
1
      <mo>+</mo>
5271
1
      <mrow data-changed='added'>
5272
1
      <mi>t</mi>
5273
1
      <mo data-changed='added'>&#x2062;</mo>
5274
1
      <mi>x</mi>
5275
1
      </mrow>
5276
1
    </mrow>
5277
1
     </math>";
5278
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5279
5280
1
  }
5281
5282
5283
    #[test]
5284
1
    fn function_call_vs_implied_times() -> Result<()> {
5285
1
        let test_str = "<math><mi>f</mi><mo>(</mo><mi>x</mi><mo>)</mo><mi>y</mi></math>";
5286
1
        let target_str = "<math>
5287
1
      <mrow data-changed='added'>
5288
1
        <mrow data-changed='added'>
5289
1
          <mi>f</mi>
5290
1
          <mo data-changed='added'>&#x2061;</mo>
5291
1
          <mrow data-changed='added'> <mo>(</mo> <mi>x</mi> <mo>)</mo> </mrow>
5292
1
        </mrow>
5293
1
      <mo data-changed='added'>&#x2062;</mo>
5294
1
      <mi>y</mi>    </mrow>
5295
1
     </math>";
5296
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5297
1
    }
5298
5299
    #[test]
5300
1
    fn implied_plus() -> Result<()> {
5301
1
        let test_str = "<math><mrow>
5302
1
    <mn>2</mn><mfrac><mn>3</mn><mn>4</mn></mfrac>
5303
1
    </mrow></math>";
5304
1
        let target_str = "<math>
5305
1
      <mrow>
5306
1
        <mn>2</mn>
5307
1
        <mo data-changed='added'>&#x2064;</mo>
5308
1
        <mfrac>
5309
1
          <mn>3</mn>
5310
1
          <mn>4</mn>
5311
1
        </mfrac>
5312
1
      </mrow>
5313
1
    </math>";
5314
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5315
1
    }
5316
5317
    #[test]
5318
1
    fn implied_plus_linear() -> Result<()> {
5319
1
        let test_str = "<math><mrow>
5320
1
      <mn>2</mn><mspace width='0.278em'></mspace><mn>3</mn><mo>/</mo><mn>4</mn>
5321
1
      </mrow></math>";
5322
1
        let target_str = "<math>
5323
1
      <mrow>
5324
1
        <mn>2</mn>
5325
1
        <mo data-changed='added'>&#x2064;</mo>
5326
1
        <mrow data-changed='added'>>
5327
1
          <mn data-previous-space-width='0.278'>3</mn>
5328
1
          <mo>/</mo>
5329
1
          <mn>4</mn>
5330
1
        </mrow>
5331
1
      </mrow>
5332
1
    </math>";
5333
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5334
1
    }
5335
5336
    #[test]
5337
1
    fn implied_plus_linear2() -> Result<()> {
5338
1
        let test_str = "<math><mrow>
5339
1
      <mn>2</mn><mrow><mn>3</mn><mo>/</mo><mn>4</mn></mrow>
5340
1
      </mrow></math>";
5341
1
        let target_str = "<math>
5342
1
      <mrow>
5343
1
        <mn>2</mn>
5344
1
        <mo data-changed='added'>&#x2064;</mo>
5345
1
        <mrow>
5346
1
          <mn>3</mn>
5347
1
          <mo>/</mo>
5348
1
          <mn>4</mn>
5349
1
        </mrow>
5350
1
      </mrow>
5351
1
    </math>";
5352
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5353
1
    }
5354
5355
    #[test]
5356
1
    fn implied_comma() -> Result<()> {
5357
1
        let test_str = "<math><msub><mi>b</mi><mrow><mn>1</mn><mn>2</mn></mrow></msub></math>";
5358
1
        let target_str = "<math>
5359
1
       <msub><mi>b</mi><mrow><mn>1</mn><mo data-changed='added'>&#x2063;</mo><mn>2</mn></mrow></msub>
5360
1
    </math>";
5361
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5362
1
    }
5363
5364
    #[test]
5365
1
    fn no_implied_comma() -> Result<()> {
5366
1
        let test_str = "<math><mfrac><mi>b</mi><mrow><mn>1</mn><mn>2</mn></mrow></mfrac></math>";
5367
1
        let target_str = "<math>
5368
1
       <mfrac><mi>b</mi><mrow><mn>1</mn><mo data-changed='added'>&#x2062;</mo><mn>2</mn></mrow></mfrac>
5369
1
    </math>";
5370
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5371
1
    }
5372
5373
    #[test]
5374
1
    fn vertical_bars() -> Result<()> {
5375
1
        let test_str = "<math>
5376
1
    <mo>|</mo> <mi>x</mi> <mo>|</mo><mo>+</mo><mo>|</mo>
5377
1
     <mi>a</mi><mo>+</mo><mn>1</mn> <mo>|</mo>
5378
1
    </math>";
5379
1
    let target_str = " <math>
5380
1
    <mrow data-changed='added'>
5381
1
    <mrow data-changed='added'>
5382
1
      <mo>|</mo>
5383
1
      <mi>x</mi>
5384
1
      <mo>|</mo>
5385
1
    </mrow>
5386
1
    <mo>+</mo>
5387
1
    <mrow data-changed='added'>
5388
1
      <mo>|</mo>
5389
1
      <mrow data-changed='added'>
5390
1
      <mi>a</mi>
5391
1
      <mo>+</mo>
5392
1
      <mn>1</mn>
5393
1
      </mrow>
5394
1
      <mo>|</mo>
5395
1
    </mrow>
5396
1
    </mrow>
5397
1
   </math>";
5398
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5399
1
    }
5400
5401
5402
    #[test]
5403
1
    fn vertical_bars_nested() -> Result<()> {
5404
1
        let test_str = "<math><mo>|</mo><mi>x</mi><mo>|</mo><mi>y</mi><mo>|</mo><mi>z</mi><mo>|</mo></math>";
5405
1
    let target_str = "<math>
5406
1
    <mrow data-changed='added'>
5407
1
    <mrow data-changed='added'>
5408
1
      <mo>|</mo>
5409
1
      <mi>x</mi>
5410
1
      <mo>|</mo>
5411
1
    </mrow>
5412
1
    <mo data-changed='added'>&#x2062;</mo>
5413
1
    <mi>y</mi>
5414
1
    <mo data-changed='added'>&#x2062;</mo>
5415
1
    <mrow data-changed='added'>
5416
1
      <mo>|</mo>
5417
1
      <mi>z</mi>
5418
1
      <mo>|</mo>
5419
1
    </mrow>
5420
1
    </mrow>
5421
1
   </math>";
5422
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5423
1
    }
5424
5425
    #[test]
5426
1
    fn double_vertical_bars() -> Result<()> {
5427
1
      let test_str = "<math><mrow><mo>||</mo><mi>x</mi><mo>||</mo><mo>||</mo><mi>y</mi><mo>||</mo></mrow></math>";
5428
1
    let target_str = "<math>
5429
1
      <mrow>
5430
1
        <mrow data-changed='added'><mo>‖</mo><mi>x</mi><mo>‖</mo></mrow>
5431
1
        <mo data-changed='added'>&#x2062;</mo>
5432
1
        <mrow data-changed='added'><mo>‖</mo><mi>y</mi><mo>‖</mo></mrow>
5433
1
      </mrow>
5434
1
    </math>";
5435
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5436
1
    }
5437
5438
    #[test]
5439
1
    fn double_vertical_bars_mo() -> Result<()> {
5440
1
      let test_str = "<math><mo>|</mo><mo>|</mo><mi>a</mi><mo>|</mo><mo>|</mo></math>";
5441
1
    let target_str = "<math><mrow data-changed='added'><mo>‖</mo><mi>a</mi><mo>‖</mo></mrow></math>";
5442
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5443
1
    }
5444
5445
    #[test]
5446
1
    fn no_double_vertical_bars_mo() -> Result<()> {
5447
1
      let test_str = "<math><mo>|</mo><mi>x</mi><mo>|</mo><mo>|</mo><mi>y</mi><mo>|</mo></math>";
5448
1
        let target_str = "<math>  <mrow data-changed='added'>
5449
1
        <mrow data-changed='added'><mo>|</mo><mi>x</mi><mo>|</mo></mrow>
5450
1
        <mo data-changed='added'>&#x2062;</mo>
5451
1
        <mrow data-changed='added'><mo>|</mo><mi>y</mi><mo>|</mo></mrow>
5452
1
      </mrow> </math>";
5453
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5454
1
    }
5455
5456
    #[test]
5457
1
    fn vertical_bar_such_that() -> Result<()> {
5458
1
        let test_str = "<math>
5459
1
        <mo>{</mo><mi>x</mi><mo>|</mo><mi>x</mi><mo>&#x2208;</mo><mi>S</mi><mo>}</mo>
5460
1
            </math>";
5461
1
        let target_str = "<math>
5462
1
    <mrow data-changed='added'>
5463
1
      <mo>{</mo>
5464
1
      <mrow data-changed='added'>
5465
1
      <mi>x</mi>
5466
1
      <mo>|</mo>
5467
1
      <mrow data-changed='added'>
5468
1
        <mi>x</mi>
5469
1
        <mo>∈</mo>
5470
1
        <mi>S</mi>
5471
1
      </mrow>
5472
1
      </mrow>
5473
1
      <mo>}</mo>
5474
1
    </mrow>
5475
1
     </math>";
5476
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5477
1
    }
5478
5479
    #[test]
5480
  #[ignore]  // need to figure out a test for this ("|" should have a precedence around ":" since that is an alternative notation for "such that", but "∣" is higher precedence)
5481
0
    fn vertical_bar_divides() -> Result<()> {
5482
0
        let test_str = "<math>
5483
0
    <mi>x</mi><mo>+</mo><mi>y</mi> <mo>|</mo><mn>12</mn>
5484
0
            </math>";
5485
0
        let target_str = "<math>
5486
0
        <mrow data-changed='added'>
5487
0
        <mrow data-changed='added'>
5488
0
          <mi>x</mi>
5489
0
          <mo>+</mo>
5490
0
          <mi>y</mi>
5491
0
        </mrow>
5492
0
        <mo>∣ <!--divides--></mo>
5493
0
        <mn>12</mn>
5494
0
        </mrow>
5495
0
      </math>";
5496
0
        are_strs_canonically_equal_result(test_str, target_str, &[])
5497
0
    }
5498
5499
5500
    #[test]
5501
1
    fn trig_mo() -> Result<()> {
5502
1
        let test_str = "<math><mo>sin</mo><mi>x</mi>
5503
1
        <mo>+</mo><mo>cos</mo><mi>y</mi>
5504
1
        <mo>+</mo><munder><mo>lim</mo><mi>D</mi></munder><mi>y</mi>
5505
1
      </math>";
5506
1
        let target_str = "<math>
5507
1
    <mrow data-changed='added'>
5508
1
      <mrow data-changed='added'>
5509
1
      <mi>sin</mi>
5510
1
      <mo data-changed='added'>&#x2061;</mo>
5511
1
      <mi>x</mi>
5512
1
      </mrow>
5513
1
      <mo>+</mo>
5514
1
      <mrow data-changed='added'>
5515
1
      <mi>cos</mi>
5516
1
      <mo data-changed='added'>&#x2061;</mo>
5517
1
      <mi>y</mi>
5518
1
      </mrow>
5519
1
      <mo>+</mo>
5520
1
      <mrow data-changed='added'>
5521
1
      <munder>
5522
1
        <mi>lim</mi>
5523
1
        <mi>D</mi>
5524
1
      </munder>
5525
1
      <mo data-changed='added'>&#x2061;</mo>
5526
1
      <mi>y</mi>
5527
1
      </mrow>
5528
1
    </mrow>
5529
1
     </math>";
5530
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5531
1
    }
5532
5533
    #[test]
5534
1
    fn trig_mtext() -> Result<()> {
5535
1
        let test_str = "<math><mtext>sin</mtext><mi>x</mi>
5536
1
        <mo>+</mo><mtext>cos</mtext><mi>y</mi>
5537
1
        <mo>+</mo><munder><mtext>lim</mtext><mi>D</mi></munder><mi>y</mi>
5538
1
      </math>";
5539
1
        let target_str = "<math>
5540
1
    <mrow data-changed='added'>
5541
1
      <mrow data-changed='added'>
5542
1
      <mi>sin</mi>
5543
1
      <mo data-changed='added'>&#x2061;</mo>
5544
1
      <mi>x</mi>
5545
1
      </mrow>
5546
1
      <mo>+</mo>
5547
1
      <mrow data-changed='added'>
5548
1
      <mi>cos</mi>
5549
1
      <mo data-changed='added'>&#x2061;</mo>
5550
1
      <mi>y</mi>
5551
1
      </mrow>
5552
1
      <mo>+</mo>
5553
1
      <mrow data-changed='added'>
5554
1
      <munder>
5555
1
        <mi>lim</mi>
5556
1
        <mi>D</mi>
5557
1
      </munder>
5558
1
      <mo data-changed='added'>&#x2061;</mo>
5559
1
      <mi>y</mi>
5560
1
      </mrow>
5561
1
    </mrow>
5562
1
     </math>";
5563
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5564
1
    }
5565
  
5566
    #[test]
5567
1
    fn trig_negative_args() -> Result<()> {
5568
1
        let test_str = "<math><mi>sin</mi><mo>-</mo><mn>2</mn><mi>π</mi><mi>x</mi></math>";
5569
1
        let target_str = "<math>
5570
1
    <mrow data-changed='added'>
5571
1
      <mi>sin</mi>
5572
1
      <mo data-changed='added'>&#x2061;</mo>
5573
1
      <mrow data-changed='added'>
5574
1
      <mrow data-changed='added'>
5575
1
        <mo>-</mo>
5576
1
        <mn>2</mn>
5577
1
      </mrow>
5578
1
      <mo data-changed='added'>&#x2062;</mo>
5579
1
      <mi>π</mi>
5580
1
      <mo data-changed='added'>&#x2062;</mo>
5581
1
      <mi>x</mi>
5582
1
      </mrow>
5583
1
    </mrow>
5584
1
     </math>";
5585
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5586
1
    }
5587
  
5588
    #[test]
5589
1
    fn not_trig_negative_args() -> Result<()> {
5590
    // this is here to make sure that only trig functions get the special treatment
5591
1
        let test_str = "<math><mi>ker</mi><mo>-</mo><mn>2</mn><mi>π</mi><mi>x</mi></math>";
5592
1
        let target_str = "<math>
5593
1
      <mrow data-changed='added'>
5594
1
          <mrow data-changed='added'>
5595
1
          <mi>ker</mi>
5596
1
          <mo data-changed='added'>&#x2061;</mo>
5597
1
          <mrow data-changed='added'>
5598
1
            <mo>-</mo>
5599
1
            <mn>2</mn>
5600
1
          </mrow>
5601
1
          </mrow>
5602
1
        <mo data-changed='added'>&#x2062;</mo>
5603
1
        <mi>π</mi>
5604
1
        <mo data-changed='added'>&#x2062;</mo>
5605
1
        <mi>x</mi>
5606
1
      </mrow>
5607
1
    </math>";
5608
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5609
1
    }
5610
5611
    #[test]
5612
1
    fn trig_args() -> Result<()> {
5613
1
        let test_str = "<math><mi>sin</mi><mn>2</mn><mi>π</mi><mi>x</mi></math>";
5614
1
        let target_str = "<math>
5615
1
    <mrow data-changed='added'>
5616
1
      <mi>sin</mi>
5617
1
      <mo data-changed='added'>&#x2061;</mo>
5618
1
      <mrow data-changed='added'>
5619
1
      <mn>2</mn>
5620
1
      <mo data-changed='added'>&#x2062;</mo>
5621
1
      <mi>π</mi>
5622
1
      <mo data-changed='added'>&#x2062;</mo>
5623
1
      <mi>x</mi>
5624
1
      </mrow>
5625
1
    </mrow>
5626
1
     </math>";
5627
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5628
1
    }
5629
5630
    #[test]
5631
1
    fn not_trig_args() -> Result<()> {
5632
    // this is here to make sure that only trig functions get the special treatment
5633
1
        let test_str = "<math><mi>ker</mi><mn>2</mn><mi>π</mi><mi>x</mi></math>";
5634
1
        let target_str = "<math>
5635
1
    <mrow data-changed='added'>
5636
1
      <mrow data-changed='added'>
5637
1
        <mi>ker</mi>
5638
1
        <mo data-changed='added'>&#x2061;</mo>
5639
1
        <mn>2</mn>
5640
1
      </mrow>
5641
1
      <mo data-changed='added'>&#x2062;</mo>
5642
1
      <mi>π</mi>
5643
1
      <mo data-changed='added'>&#x2062;</mo>
5644
1
      <mi>x</mi>
5645
1
    </mrow>
5646
1
     </math>";
5647
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5648
1
    }
5649
5650
    #[test]
5651
1
    fn trig_trig() -> Result<()> {
5652
1
        let test_str = "<math><mi>sin</mi><mi>x</mi><mi>cos</mi><mi>y</mi></math>";
5653
1
        let target_str = "<math>
5654
1
    <mrow data-changed='added'>
5655
1
      <mrow data-changed='added'>
5656
1
        <mi>sin</mi>
5657
1
        <mo data-changed='added'>&#x2061;</mo>
5658
1
        <mi>x</mi>
5659
1
      </mrow>
5660
1
      <mo data-changed='added'>&#x2062;</mo>
5661
1
      <mrow data-changed='added'>
5662
1
        <mi>cos</mi>
5663
1
        <mo data-changed='added'>&#x2061;</mo>
5664
1
        <mi>y</mi>
5665
1
      </mrow>
5666
1
    </mrow>
5667
1
    </math>";
5668
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5669
1
    }
5670
5671
    #[test]
5672
1
    fn trig_function_composition() -> Result<()> {
5673
1
        let test_str = "<math><mo>(</mo><mi>sin</mi><mo>-</mo><mi>cos</mi><mo>)</mo><mi>x</mi></math>";
5674
1
        let target_str = "<math>
5675
1
    <mrow data-changed='added'>
5676
1
      <mrow data-changed='added'>
5677
1
      <mo>(</mo>
5678
1
      <mrow data-changed='added'>
5679
1
        <mi>sin</mi>
5680
1
        <mo>-</mo>
5681
1
        <mi>cos</mi>
5682
1
      </mrow>
5683
1
      <mo>)</mo>
5684
1
      </mrow>
5685
1
      <mo data-changed='added'>&#x2062;</mo>
5686
1
      <mi>x</mi>
5687
1
    </mrow>
5688
1
     </math>";
5689
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5690
1
    }
5691
5692
  
5693
  #[test]
5694
1
    fn currency_in_leaf_prefix() -> Result<()> {
5695
1
        let test_str = "<math><mn>$8.54</mn></math>";
5696
1
        let target_str = "<math>
5697
1
      <mrow data-changed='added'>
5698
1
      <mi>$</mi>
5699
1
      <mo data-changed='added'>&#x2062;</mo>
5700
1
      <mn>8.54</mn>
5701
1
      </mrow>
5702
1
    </math>";
5703
1
    are_strs_canonically_equal_result(test_str, target_str, &[])
5704
1
  }
5705
5706
  #[test]
5707
1
    fn currency_in_leaf_postfix() -> Result<()> {
5708
1
        let test_str = "<math><mn>188,23€</mn></math>";
5709
1
        let target_str = " <math>
5710
1
      <mrow data-changed='added'>
5711
1
        <mo data-changed='added'>&#x2062;</mo>
5712
1
        <mn>188,23</mn>
5713
1
        <mo data-changed='added'>&#x2062;</mo>
5714
1
        <mi>€</mi>
5715
1
      </mrow>
5716
1
    </math>";
5717
1
   are_strs_canonically_equal_with_locale(test_str, target_str, &[], ".", ",")
5718
1
}
5719
5720
  #[test]
5721
1
    fn currency_in_leaf_infix() -> Result<()> {
5722
1
        let test_str = "<math><mn>1€23</mn></math>";
5723
1
        let target_str = " <math>
5724
1
      <mrow data-changed='added'>
5725
1
        <mn>1</mn>
5726
1
        <mo data-changed='added'>&#x2062;</mo>
5727
1
        <mi>€</mi>
5728
1
        <mo data-changed='added'>&#x2062;</mo>
5729
1
        <mn>23</mn>
5730
1
      </mrow>
5731
1
    </math>";
5732
1
   are_strs_canonically_equal_with_locale(test_str, target_str, &[], ".", ",")
5733
1
}
5734
  
5735
  #[test]
5736
1
    fn mtext_whitespace_string() -> Result<()> {
5737
1
        let test_str = "<math><mi>t</mi><mtext>&#x00A0;&#x205F;</mtext></math>";
5738
1
        let target_str = "<math><mi data-following-space-width='0.922'>t</mi></math>";
5739
1
    are_strs_canonically_equal_result(test_str, target_str, &[])
5740
1
  }
5741
  
5742
  #[test]
5743
1
    fn mtext_whitespace_string_before() -> Result<()> {
5744
1
        let test_str = "<math><mtext>&#x00A0;&#x205F;</mtext><mi>t</mi></math>";
5745
1
        let target_str = "<math><mi data-previous-space-width='0.922'>t</mi></math>";
5746
1
    are_strs_canonically_equal_result(test_str, target_str, &[])
5747
1
  }
5748
  
5749
  #[test]
5750
1
    fn mtext_whitespace_1() -> Result<()> {
5751
1
        let test_str = "<math><mi>t</mi><mtext>&#x00A0;&#x205F;</mtext>
5752
1
        <mrow><mo>(</mo><mi>x</mi><mo>+</mo><mi>y</mi><mo>)</mo></mrow></math>";
5753
1
        let target_str = " <math>
5754
1
    <mrow data-changed='added'>
5755
1
      <mi>t</mi>
5756
1
      <mo data-changed='added' data-function-guess='true'>&#x2062;</mo>
5757
1
      <mrow data-previous-space-width='0.922'>
5758
1
      <mo>(</mo>
5759
1
      <mrow data-changed='added'>
5760
1
        <mi>x</mi>
5761
1
        <mo>+</mo>
5762
1
        <mi>y</mi>
5763
1
      </mrow>
5764
1
      <mo>)</mo>
5765
1
      </mrow>
5766
1
    </mrow>
5767
1
     </math>";
5768
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5769
1
  }
5770
  
5771
  #[test]
5772
1
    fn mtext_whitespace_2() -> Result<()> {
5773
1
        let test_str = "<math><mi>f</mi><mtext>&#x00A0;&#x205F;</mtext>
5774
1
        <mrow><mo>(</mo><mi>x</mi><mo>+</mo><mi>y</mi><mo>)</mo></mrow></math>";
5775
1
        let target_str = " <math>
5776
1
    <mrow data-changed='added'>
5777
1
      <mi>f</mi>
5778
1
      <mo data-changed='added'>&#x2061;</mo>
5779
1
      <mrow  data-previous-space-width='0.922'>
5780
1
      <mo>(</mo>
5781
1
      <mrow data-changed='added'>
5782
1
        <mi>x</mi>
5783
1
        <mo>+</mo>
5784
1
        <mi>y</mi>
5785
1
      </mrow>
5786
1
      <mo>)</mo>
5787
1
      </mrow>
5788
1
    </mrow>
5789
1
     </math>";
5790
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5791
1
  }
5792
5793
  #[test]
5794
1
    fn remove_mtext_whitespace_3() -> Result<()> {
5795
1
        let test_str = "<math><mi>t</mi>
5796
1
        <mrow><mtext>&#x2009;</mtext><mo>(</mo><mi>x</mi><mo>+</mo><mi>y</mi><mo>)</mo></mrow></math>";
5797
1
        let target_str = "<math>
5798
1
    <mrow data-changed='added'>
5799
1
      <mi>t</mi>
5800
1
      <mo data-changed='added' data-function-guess='true'>&#x2062;</mo>
5801
1
      <mrow>
5802
1
      <mo data-previous-space-width='0.167'>(</mo>
5803
1
      <mrow data-changed='added'>
5804
1
        <mi>x</mi>
5805
1
        <mo>+</mo>
5806
1
        <mi>y</mi>
5807
1
      </mrow>
5808
1
      <mo>)</mo>
5809
1
      </mrow>
5810
1
    </mrow>
5811
1
     </math>";
5812
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5813
1
  }
5814
5815
  #[test]
5816
1
    fn do_not_remove_any_whitespace() -> Result<()> {
5817
1
        let test_str = "<math><mfrac>
5818
1
          <mrow><mspace width='3em'/></mrow>
5819
1
          <mtext>&#x2009;</mtext>
5820
1
        </mfrac></math>";
5821
1
        let target_str = " <math>
5822
1
      <mfrac>
5823
1
        <mtext width='3em' data-changed='was-mspace' data-width='3' data-empty-in-2D='true'> </mtext>
5824
1
        <mtext data-width='0.167' data-empty-in-2D='true'> </mtext>
5825
1
      </mfrac>
5826
1
     </math>";
5827
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5828
1
  }
5829
5830
  #[test]
5831
1
    fn remove_mo_whitespace() -> Result<()> {
5832
1
        let test_str = "<math><mi>cos</mi><mo>&#xA0;</mo><mi>x</mi></math>";
5833
1
        let target_str = "<math>
5834
1
        <mrow data-changed='added'>
5835
1
          <mi>cos</mi>
5836
1
          <mo data-changed='added'>&#x2061;</mo>
5837
1
          <mi data-previous-space-width='0.7'>x</mi>
5838
1
        </mrow>
5839
1
        </math>";
5840
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5841
1
  }
5842
5843
  #[test]
5844
1
    fn do_not_remove_some_whitespace() -> Result<()> {
5845
1
        let test_str = "<math><mroot>
5846
1
          <mrow><mi>b</mi><mphantom><mi>y</mi></mphantom></mrow>
5847
1
          <mtext>&#x2009;</mtext>
5848
1
        </mroot></math>";
5849
1
        let target_str = "<math><mroot>
5850
1
        <mi>b</mi>
5851
1
        <mtext data-empty-in-2D='true' data-width='0.167'>&#xA0;</mtext>
5852
1
      </mroot></math>";
5853
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5854
1
  }
5855
5856
  #[test]
5857
1
    fn remove_all_extra_elements() -> Result<()> {
5858
1
        let test_str = "<math><msqrt>
5859
1
          <mstyle> <mi>b</mi> </mstyle>
5860
1
          <mphantom><mi>y</mi></mphantom>
5861
1
          <mtext>&#x2009;</mtext>
5862
1
          <mspace width='3em'/>
5863
1
        </msqrt></math>";
5864
1
        let target_str = "<math><msqrt>
5865
1
        <mi data-following-space-width='3.167'>b</mi>
5866
1
      </msqrt></math>";
5867
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5868
1
  }
5869
5870
  #[test]
5871
1
    fn empty_content() -> Result<()> {
5872
1
        let test_str = "<math></math>";
5873
1
        let target_str = " <math><mtext data-added='missing-content' data-width='0.700'> </mtext></math>";
5874
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5875
1
  }
5876
5877
  #[test]
5878
1
    fn empty_content_after_cleanup() -> Result<()> {
5879
1
        let test_str = "<math><mrow><mphantom><mn>1</mn></mphantom></mrow></math>";
5880
1
        let target_str = " <math><mtext data-added='missing-content' data-width='0'> </mtext></math>";
5881
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5882
1
  }
5883
5884
  #[test]
5885
1
    fn empty_content_fix_num_children() -> Result<()> {
5886
1
        let test_str = "  <math><mfrac><menclose notation='box'><mrow/></menclose><mrow/></mfrac></math>";
5887
1
        let target_str = "<math>
5888
1
    <mfrac>
5889
1
      <menclose notation='box'>
5890
1
      <mtext data-added='missing-content' data-empty-in-2D='true' data-width='0'> </mtext>
5891
1
      </menclose>
5892
1
      <mtext data-changed='empty_content' data-empty-in-2D='true' data-width='0'> </mtext>
5893
1
    </mfrac>
5894
1
     </math>";
5895
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5896
1
  }
5897
5898
5899
  #[test]
5900
1
    fn clean_semantics() -> Result<()> {
5901
    // this comes from LateXML
5902
1
        let test_str = "<math>
5903
1
        <semantics>
5904
1
          <mrow><mi>z</mi></mrow>
5905
1
          <annotation-xml encoding='MathML-Content'>
5906
1
            <ci>𝑧</ci>
5907
1
          </annotation-xml>
5908
1
          <annotation encoding='application/x-tex'>z</annotation>
5909
1
          <annotation encoding='application/x-llamapun'>italic_z</annotation>
5910
1
        </semantics>
5911
1
      </math>";
5912
    // the annotation-xml value is very touchy and must exactly match what mml-to-string() generates for the test to pass
5913
1
    let target_str = " <math>
5914
1
    <mi data-annotation-xml-MathML-Content=' &lt;annotation-xml encoding=&apos;MathML-Content&apos;&gt;
5915
1
  &lt;ci&gt;𝑧&lt;/ci&gt;
5916
1
 &lt;/annotation-xml&gt;
5917
1
' data-annotation-application_slash_x-tex='z' data-annotation-application_slash_x-llamapun='italic_z'>z</mi>
5918
1
     </math>";
5919
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5920
1
  }
5921
5922
  #[test]
5923
1
    fn clean_up_mi_operator() -> Result<()> {
5924
1
        let test_str = "<math><mrow><mi>∠</mi><mi>A</mi><mi>B</mi><mi>C</mi></mrow></math>";
5925
1
        let target_str = " <math>
5926
1
        <mrow>
5927
1
        <mo>∠</mo>
5928
1
        <mrow data-changed='added'>
5929
1
          <mi>A</mi>
5930
1
          <mo data-changed='added'>&#x2063;</mo>
5931
1
          <mi>B</mi>
5932
1
          <mo data-changed='added'>&#x2063;</mo>
5933
1
          <mi>C</mi>
5934
1
        </mrow>
5935
1
        </mrow>
5936
1
      </math>";
5937
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5938
1
  }
5939
5940
5941
  #[test]
5942
1
    fn clean_up_arc() -> Result<()> {
5943
1
        let test_str = "<math><mtext>arc&#xA0;</mtext><mi>cos</mi><mi>x</mi></math>";
5944
1
        let target_str = "<math>
5945
1
      <mrow data-changed='added'>
5946
1
      <mi>arccos</mi>
5947
1
      <mo data-changed='added'>&#x2061;</mo>
5948
1
      <mi>x</mi>
5949
1
      </mrow>
5950
1
    </math>";
5951
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5952
1
  }
5953
5954
  #[test]
5955
1
    fn clean_up_arc_nospace() -> Result<()> {
5956
1
        let test_str = "<math><mtext>arc</mtext><mi>cos</mi><mi>x</mi></math>";
5957
1
        let target_str = "<math>
5958
1
      <mrow data-changed='added'>
5959
1
      <mi>arccos</mi>
5960
1
      <mo data-changed='added'>&#x2061;</mo>
5961
1
      <mi>x</mi>
5962
1
      </mrow>
5963
1
    </math>";
5964
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5965
1
  }
5966
5967
  #[test]
5968
1
    fn roman_numeral() -> Result<()> {
5969
1
        let test_str = "<math><mrow><mtext>XLVIII</mtext> <mo>+</mo><mn>mmxxvi</mn></mrow></math>";
5970
    // turns out there is no need to mark them as Roman Numerals -- thought that was need for braille
5971
1
        let target_str = "<math><mrow>
5972
1
      <mn data-roman-numeral='true' data-number='48'>XLVIII</mn> <mo>+</mo><mn data-roman-numeral='true' data-number='2026'>mmxxvi</mn>
5973
1
      </mrow></math>";
5974
        // let target_str = "<math><mrow><mtext>XLVIII</mtext> <mo>+</mo><mn>mmxxvi</mn></mrow></math>";
5975
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5976
1
  }
5977
5978
  // #[test]
5979
    // fn roman_numeral_context() {
5980
    //     let test_str = "<math><mi>vi</mi><mo>-</mo><mi mathvariant='normal'>i</mi><mo>=</mo><mtext>v</mtext></math>";
5981
    //     let target_str = "<math> <mrow data-changed='added'>
5982
  //    <mrow data-changed='added'><mn data-roman-numeral='true'>vi</mn><mo>-</mo><mn mathvariant='normal' data-roman-numeral='true'>i</mn></mrow> 
5983
  //    <mo>=</mo> <mn data-roman-numeral='true'>v</mn>
5984
  //  </mrow> </math>";
5985
    //     are_strs_canonically_equal_result(test_str, target_str, &[])
5986
  // }
5987
5988
  #[test]
5989
1
    fn not_roman_numeral() -> Result<()> {
5990
1
        let test_str = "<math><mtext>cm</mtext></math>";
5991
    // shouldn't change
5992
1
        let target_str = "<math><mtext>cm</mtext></math>";
5993
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5994
1
  }
5995
5996
  #[test]
5997
1
    fn digit_block_binary() -> Result<()> {
5998
1
        let test_str = "<math><mo>(</mo><mn>0110</mn><mspace width=\"thickmathspace\"></mspace><mn>1110</mn><mspace width=\"thickmathspace\"></mspace><mn>0110</mn><mo>)</mo></math>";
5999
1
        let target_str = " <math>
6000
1
        <mrow data-changed='added'>
6001
1
        <mo>(</mo>
6002
1
        <mn>0110\u{00A0}1110\u{00A0}0110</mn>
6003
1
        <mo>)</mo>
6004
1
        </mrow>
6005
1
      </math>";
6006
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
6007
1
  }
6008
6009
  #[test]
6010
1
    fn digit_block_decimal() -> Result<()> {
6011
1
        let test_str = "<math><mn>8</mn><mo>,</mo><mn>123</mn><mo>,</mo><mn>456</mn><mo>+</mo>
6012
1
                    <mn>4</mn><mo>.</mo><mn>32</mn></math>";
6013
1
        let target_str = " <math>
6014
1
        <mrow data-changed='added'>
6015
1
        <mn>8,123,456</mn>
6016
1
        <mo>+</mo>
6017
1
        <mn>4.32</mn>
6018
1
        </mrow>
6019
1
      </math>";
6020
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
6021
1
  }
6022
  #[test]
6023
1
    fn digit_block_comma() -> Result<()> {
6024
1
        let test_str = "<math><mn>8</mn><mo>.</mo><mn>123</mn><mo>.</mo><mn>456</mn><mo>+</mo>
6025
1
                    <mn>4</mn><mo>,</mo><mn>32</mn></math>";
6026
1
        let target_str = " <math>
6027
1
        <mrow data-changed='added'>
6028
1
        <mn>8.123.456</mn>
6029
1
        <mo>+</mo>
6030
1
        <mn>4,32</mn>
6031
1
        </mrow>
6032
1
      </math>";
6033
1
        are_strs_canonically_equal_with_locale(test_str, target_str, &[], ".", ", ")
6034
1
  }
6035
6036
  #[test]
6037
1
  fn digit_block_int() -> Result<()> {
6038
1
        let test_str = "<math><mn>12</mn><mo>,</mo><mn>345</mn><mo>+</mo>
6039
1
                    <mn>1</mn><mo>,</mo><mn>000</mn></math>";
6040
1
        let target_str = " <math>
6041
1
        <mrow data-changed='added'>
6042
1
        <mn>12,345</mn>
6043
1
        <mo>+</mo>
6044
1
        <mn>1,000</mn>
6045
1
        </mrow>
6046
1
      </math>";
6047
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
6048
1
  }
6049
6050
  #[test]
6051
1
  fn digit_block_non_ascii_int() -> Result<()> {
6052
1
        let test_str = "<math><mn>𝟏𝟐</mn><mo>,</mo><mn>3𝟰𝟻</mn><mo>+</mo>
6053
1
                    <mn>𝟙</mn><mo>,</mo><mn>𝟬𝟬𝟬</mn></math>";
6054
1
        let target_str = " <math>
6055
1
        <mrow data-changed='added'>
6056
1
        <mn>𝟏𝟐,3𝟰𝟻</mn>
6057
1
        <mo>+</mo>
6058
1
        <mn>𝟙,𝟬𝟬𝟬</mn>
6059
1
        </mrow>
6060
1
      </math>";
6061
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
6062
1
  }
6063
6064
  #[test]
6065
1
  fn digit_block_int_dots() -> Result<()> {
6066
1
        let test_str = "<math><mn>12</mn><mo>.</mo><mn>345</mn><mo>+</mo>
6067
1
                    <mn>1</mn><mo>.</mo><mn>000</mn></math>";
6068
1
        let target_str = " <math>
6069
1
        <mrow data-changed='added'>
6070
1
        <mn>12.345</mn>
6071
1
        <mo>+</mo>
6072
1
        <mn>1.000</mn>
6073
1
        </mrow>
6074
1
      </math>";
6075
1
        are_strs_canonically_equal_with_locale(test_str, target_str, &[], ".", ", ")
6076
1
  }
6077
6078
  #[test]
6079
1
    fn digit_block_decimal_pt() -> Result<()> {
6080
1
        let test_str = "<math><mn>8</mn><mo>,</mo><mn>123</mn><mo>.</mo>
6081
1
                <mo>+</mo><mn>4</mn><mo>.</mo>
6082
1
                <mo>+</mo><mo>.</mo><mn>01</mn></math>";
6083
1
        let target_str = " <math>
6084
1
        <mrow data-changed='added'>
6085
1
        <mn>8,123.</mn>
6086
1
        <mo>+</mo>
6087
1
        <mn>4.</mn>
6088
1
        <mo>+</mo>
6089
1
        <mn>.01</mn>
6090
1
        </mrow>
6091
1
      </math>";
6092
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
6093
1
  }
6094
6095
  #[test]
6096
1
    fn number_with_decimal_pt() -> Result<()> {
6097
    // this is output from WIRIS for "12.3"
6098
1
        let test_str = "<math><mn>12</mn><mo>.</mo><mn>3</mn></math>";
6099
1
        let target_str = "<math><mn>12.3</mn></math>";
6100
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
6101
1
  }
6102
6103
  #[test]
6104
1
    fn number_with_comma_decimal_pt() -> Result<()> {
6105
    // this is output from WIRIS for "12.3"
6106
1
        let test_str = "<math><mn>12</mn><mo>,</mo><mn>3</mn></math>";
6107
1
        let target_str = "<math><mn>12,3</mn></math>";
6108
1
        are_strs_canonically_equal_with_locale(test_str, target_str, &[], ".", ", ")
6109
1
  }
6110
6111
  #[test]
6112
1
    fn addition_with_decimal_point_at_end() -> Result<()> {
6113
    // in this case, the trailing "." is probably a decimal point" -- testing special case combine the "."
6114
    // this comes from WIRIS
6115
1
        let test_str = "<math><mn>1</mn><mo>.</mo><mn>3</mn><mo>+</mo><mn>2</mn><mo>.</mo></math>";
6116
1
        let target_str = "<math><mrow data-changed='added'><mn>1.3</mn><mo>+</mo><mn>2.</mn></mrow></math>";
6117
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
6118
1
  }
6119
6120
  #[test]
6121
1
    fn addition_with_decimal_point_at_end_and_comma_decimal_separator() -> Result<()> {
6122
    // in this case, the trailing "." is probably a decimal point" -- testing special case combine the "."
6123
    // this comes from WIRIS
6124
1
        let test_str = "<math><mn>1</mn><mo>,</mo><mn>3</mn><mo>+</mo><mn>2</mn><mo>,</mo></math>";
6125
1
        let target_str = "<math><mrow data-changed='added'><mn>1,3</mn><mo>+</mo><mn>2,</mn></mrow></math>";
6126
1
        are_strs_canonically_equal_with_locale(test_str, target_str, &[], ".", ", ")
6127
1
  }
6128
6129
  #[test]
6130
1
    fn sequence_with_period() -> Result<()> {
6131
    // in this case, we don't want "5." -- testing special case to avoid combining the period.
6132
1
        let test_str = "<math><mn>1</mn><mo>,</mo><mn>3</mn><mo>,</mo><mn>5</mn><mo>.</mo></math>";
6133
1
        let target_str = "<math><mrow data-changed='added'>
6134
1
        <mrow data-changed='added'><mn>1</mn><mo>,</mo><mn>3</mn><mo>,</mo><mn>5</mn></mrow><mo>.</mo>
6135
1
      </mrow></math>";
6136
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
6137
1
  }
6138
6139
  #[test]
6140
1
    fn addition_decimal_pt() -> Result<()> {
6141
1
        let test_str = "<math><mo>.</mo><mn>4</mn><mo>=</mo><mn>0</mn><mo>.</mo><mn>4</mn></math>";
6142
1
        let target_str = "<math><mrow data-changed='added'><mn>.4</mn><mo>=</mo><mn>0.4</mn></mrow></math>";
6143
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
6144
1
  }
6145
6146
  #[test]
6147
1
    fn fraction_decimal_pt() -> Result<()> {
6148
1
        let test_str = "<math><mfrac><mrow><mn>1</mn><mo>.</mo></mrow><mrow><mn>2</mn><mo>.</mo></mrow></mfrac></math>";
6149
1
        let target_str = "<math><mfrac><mn>1.</mn><mn>2.</mn></mfrac></math>";
6150
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
6151
1
  }
6152
6153
  #[test]
6154
1
    fn fraction_decimal_pt_no_split() -> Result<()> {
6155
    // don't split off the '.'
6156
1
        let test_str = "<math><mfrac><mn>1.</mn><mn>2.</mn></mfrac></math>";
6157
1
        let target_str = "<math><mfrac><mn>1.</mn><mn>2.</mn></mfrac></math>";
6158
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
6159
1
  }
6160
6161
  #[test]
6162
1
    fn not_digit_block_parens() -> Result<()> {
6163
1
        let test_str = "<math><mo>(</mo><mn>451</mn><mo>,</mo><mn>231</mn><mo>)</mo></math>";
6164
1
        let target_str = " <math> <mrow data-changed='added'>
6165
1
        <mo>(</mo>
6166
1
        <mrow data-changed='added'>
6167
1
        <mn>451</mn> <mo>,</mo> <mn>231</mn>
6168
1
        </mrow>
6169
1
        <mo>)</mo>
6170
1
      </mrow></math>";
6171
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
6172
1
  }
6173
6174
  #[test]
6175
1
    fn not_digit_block_parens_mrow() -> Result<()> {
6176
1
        let test_str = "<math><mo>(</mo><mrow><mn>451</mn><mo>,</mo><mn>231</mn></mrow><mo>)</mo></math>";
6177
1
        let target_str = " <math> <mrow data-changed='added'>
6178
1
        <mo>(</mo>
6179
1
        <mrow>
6180
1
        <mn>451</mn> <mo>,</mo> <mn>231</mn>
6181
1
        </mrow>
6182
1
        <mo>)</mo>
6183
1
      </mrow></math>";
6184
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
6185
1
  }
6186
6187
  #[test]
6188
1
    fn not_digit_block_decimal() -> Result<()> {
6189
1
    let test_str = "<math><mn>8</mn><mo>,</mo><mn>49</mn><mo>,</mo><mn>456</mn><mo>+</mo>
6190
1
                    <mn>4</mn><mtext> </mtext><mn>32</mn><mo>+</mo>
6191
1
                  <mn>1</mn><mo>,</mo><mn>234</mn><mo>,</mo><mn>56</mn></math>";
6192
1
        let target_str = "<math>
6193
1
        <mrow data-changed='added'>
6194
1
        <mn>8</mn>
6195
1
        <mo>,</mo>
6196
1
        <mn>49</mn>
6197
1
        <mo>,</mo>
6198
1
        <mrow data-changed='added'>
6199
1
          <mn>456</mn>
6200
1
          <mo>+</mo>
6201
1
          <mrow data-changed='added'>
6202
1
          <mn>4</mn>
6203
1
          <mo data-changed='added'>&#x2062;</mo>
6204
1
          <mn>32</mn>
6205
1
          </mrow>
6206
1
          <mo>+</mo>
6207
1
          <mn>1</mn>
6208
1
        </mrow>
6209
1
        <mo>,</mo>
6210
1
        <mn>234</mn>
6211
1
        <mo>,</mo>
6212
1
        <mn>56</mn>
6213
1
        </mrow>
6214
1
      </math>";
6215
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
6216
1
  }
6217
6218
  #[test]
6219
1
    fn not_digit_block_ellipsis() -> Result<()> {
6220
1
        let test_str = "<math><mrow><mn>8</mn><mo>,</mo><mn>123</mn><mo>,</mo><mn>456</mn><mo>,</mo>
6221
1
                    <mi>…</mi></mrow></math>";
6222
1
        let target_str = "<math>
6223
1
    <mrow>
6224
1
      <mn>8</mn>
6225
1
      <mo>,</mo>
6226
1
      <mn>123</mn>
6227
1
      <mo>,</mo>
6228
1
      <mn>456</mn>
6229
1
      <mo>,</mo>
6230
1
      <mi>…</mi>
6231
1
    </mrow>
6232
1
     </math>";
6233
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
6234
1
  }
6235
6236
  #[test]
6237
1
    fn not_digit_block_negative_numbers_euro() -> Result<()> {
6238
1
        let test_str = "<math><mrow>
6239
1
      <mo>-</mo><mn>1</mn><mo>,</mo>
6240
1
      <mo>-</mo><mn>2</mn><mo>,</mo>
6241
1
      <mo>-</mo><mn>3</mn><mo>,</mo>
6242
1
      <mo>&#x2026;</mo>
6243
1
    </mrow></math>";
6244
1
        let target_str = "<math><mrow>
6245
1
        <mrow data-changed='added'>
6246
1
          <mo>-</mo>
6247
1
          <mn>1</mn>
6248
1
        </mrow>
6249
1
        <mo>,</mo>
6250
1
        <mrow data-changed='added'>
6251
1
          <mo>-</mo>
6252
1
          <mn>2</mn>
6253
1
        </mrow>
6254
1
        <mo>,</mo>
6255
1
        <mrow data-changed='added'>
6256
1
          <mo>-</mo>
6257
1
          <mn>3</mn>
6258
1
        </mrow>
6259
1
        <mo>,</mo>
6260
1
        <mi>…</mi>
6261
1
      </mrow></math>";
6262
1
      are_strs_canonically_equal_with_locale(test_str, target_str, &[], " .", ",")
6263
1
  }
6264
6265
  #[test]
6266
1
    fn ellipsis() -> Result<()> {
6267
1
        let test_str = "<math><mn>5</mn><mo>,</mo><mo>.</mo><mo>.</mo><mo>.</mo><mo>,</mo><mn>8</mn><mo>,</mo>
6268
1
        <mn>9</mn><mo>,</mo><mo>.</mo><mo>.</mo><mo>.</mo><mo>,</mo><mn>11</mn><mo>,</mo>
6269
1
        <mn>5</mn><mo>,</mo><mo>.</mo><mo>.</mo><mo>,</mo><mn>8</mn>
6270
1
      </math>";
6271
1
        let target_str = "<math><mrow data-changed='added'>
6272
1
      <mn>5</mn><mo>,</mo><mi>…</mi><mo>,</mo><mn>8</mn><mo>,</mo>
6273
1
      <mn>9</mn><mo>,</mo><mi>…</mi><mo>,</mo><mn>11</mn><mo>,</mo>
6274
1
      <mn>5</mn><mo>,</mo><mrow data-changed='added'><mo>.</mo><mo>.</mo></mrow>
6275
1
      <mo>,</mo><mn>8</mn></mrow></math>";
6276
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
6277
1
  }
6278
6279
6280
  #[test]
6281
1
    fn no_merge_271() -> Result<()> {
6282
1
        let test_str = "<math><mrow><mo>{</mo>
6283
1
        <mrow><mn>2</mn><mo>,</mo><mn>4</mn><mo>,</mo><mn>6</mn></mrow>
6284
1
      <mo>}</mo></mrow></math>";
6285
1
        let target_str = "<math><mrow><mo>{</mo>
6286
1
        <mrow><mn>2</mn><mo>,</mo><mn>4</mn><mo>,</mo><mn>6</mn></mrow>
6287
1
      <mo>}</mo></mrow></math>";
6288
1
      are_strs_canonically_equal_with_locale(test_str, target_str, &[], " .", ",")
6289
1
  }
6290
6291
  #[test]
6292
1
    fn not_digit_block_271() -> Result<()> {
6293
1
        let test_str = "<math><mrow>
6294
1
        <mi>…</mi><mo>,</mo>
6295
1
        <mo>-</mo><mn>2</mn><mo>,</mo>
6296
1
        <mo>-</mo><mn>1</mn><mo>,</mo>
6297
1
        <mn>0</mn>
6298
1
      </mrow></math>";
6299
1
        let target_str = "<math> <mrow>
6300
1
      <mi>…</mi>
6301
1
      <mo>,</mo>
6302
1
      <mrow data-changed='added'><mo>-</mo><mn>2</mn></mrow>
6303
1
      <mo>,</mo>
6304
1
      <mrow data-changed='added'><mo>-</mo><mn>1</mn></mrow>
6305
1
      <mo>,</mo>
6306
1
      <mn>0</mn>
6307
1
      </mrow></math>";
6308
1
      are_strs_canonically_equal_with_locale(test_str, target_str, &[], " .", ",")
6309
1
  }
6310
6311
  #[test]
6312
1
    fn merge_decimal_in_list_271() -> Result<()> {
6313
1
        let test_str = "<math><mi>x</mi><mo>,</mo><mn>2</mn><mo>.</mo><mn>5</mn><mi>g</mi><mo>,</mo><mn>3</mn></math>";
6314
1
        let target_str = "<math> <mrow data-changed='added'>
6315
1
        <mi>x</mi>
6316
1
        <mo>,</mo>
6317
1
        <mrow data-changed='added'> <mn>2.5</mn> <mo data-changed='added'>&#x2062;</mo> <mi>g</mi> </mrow>
6318
1
        <mo>,</mo>
6319
1
        <mn>3</mn>
6320
1
      </mrow> </math>";
6321
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
6322
1
  }
6323
6324
  #[test]
6325
1
    fn primes_common() -> Result<()> {
6326
1
        let test_str = "<math><msup><mn>5</mn><mo>'</mo></msup>
6327
1
              <msup><mn>5</mn><mo>''</mo></msup>
6328
1
              <msup><mn>8</mn><mrow><mo>'</mo><mo>'</mo></mrow></msup></math>";
6329
1
        let target_str = "<math>
6330
1
        <mrow data-changed='added'>
6331
1
        <msup>
6332
1
          <mn>5</mn>
6333
1
          <mo>′</mo>
6334
1
        </msup>
6335
1
        <mo data-changed='added'>&#x2062;</mo>
6336
1
        <msup>
6337
1
          <mn>5</mn>
6338
1
          <mo>″</mo>
6339
1
        </msup>
6340
1
        <mo data-changed='added'>&#x2062;</mo>
6341
1
        <msup>
6342
1
          <mn>8</mn>
6343
1
          <mo>″</mo>
6344
1
        </msup>
6345
1
        </mrow>
6346
1
      </math>";
6347
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
6348
1
  }
6349
6350
  #[test]
6351
1
    fn primes_uncommon() -> Result<()> {
6352
1
        let test_str = "<math><msup><mn>5</mn><mo>''′</mo></msup>
6353
1
              <msup><mn>5</mn><mo>''''</mo></msup>
6354
1
              <msup><mn>8</mn><mrow><mo>′</mo><mo>⁗</mo></mrow></msup></math>";
6355
1
        let target_str = " <math>
6356
1
        <mrow data-changed='added'>
6357
1
        <msup>
6358
1
          <mn>5</mn>
6359
1
          <mo>‴</mo>
6360
1
        </msup>
6361
1
        <mo data-changed='added'>&#x2062;</mo>
6362
1
        <msup>
6363
1
          <mn>5</mn>
6364
1
          <mo>⁗</mo>
6365
1
        </msup>
6366
1
        <mo data-changed='added'>&#x2062;</mo>
6367
1
        <msup>
6368
1
          <mn>8</mn>
6369
1
          <mo>⁗′</mo>
6370
1
        </msup>
6371
1
        </mrow>
6372
1
      </math>";
6373
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
6374
1
  }
6375
6376
  #[test]
6377
1
    fn merge_mi_test() -> Result<()> {
6378
1
        let test_str = "<math>
6379
1
      <mi>c</mi><mi>o</mi><mi>s</mi><mo>=</mo>
6380
1
      <mi>w</mi><mi>x</mi><mi>y</mi><mi>z</mi><mo>+</mo>
6381
1
      <mi>n</mi><mi>a</mi><mi>x</mi><mo>+</mo>
6382
1
        <mi>i</mi><mi>ω</mi><mi>t</mi><mo>+</mo>
6383
1
      <mi>f</mi><mi>l</mi><mi>o</mi><mi>w</mi><mo>+</mo>
6384
1
      <mi>m</mi><mi>a</mi><mi>x</mi>
6385
1
    </math> 
6386
1
  ";
6387
1
        let target_str = "<math>
6388
1
    <mrow data-changed='added'>
6389
1
      <mi>cos</mi>
6390
1
      <mo>=</mo>
6391
1
      <mrow data-changed='added'>
6392
1
        <mrow data-changed='added'>
6393
1
          <mi>w</mi>
6394
1
          <mo data-changed='added'>&#x2062;</mo>
6395
1
          <mi>x</mi>
6396
1
          <mo data-changed='added'>&#x2062;</mo>
6397
1
          <mi>y</mi>
6398
1
          <mo data-changed='added'>&#x2062;</mo>
6399
1
          <mi>z</mi>
6400
1
        </mrow>
6401
1
        <mo>+</mo>
6402
1
        <mrow data-changed='added'>
6403
1
          <mi>n</mi>
6404
1
          <mo data-changed='added'>&#x2062;</mo>
6405
1
          <mi>a</mi>
6406
1
          <mo data-changed='added'>&#x2062;</mo>
6407
1
          <mi>x</mi>
6408
1
        </mrow>
6409
1
        <mo>+</mo>
6410
1
        <mrow data-changed='added'>
6411
1
          <mi>i</mi>
6412
1
          <mo data-changed='added'>&#x2062;</mo>
6413
1
          <mi>ω</mi>
6414
1
          <mo data-changed='added'>&#x2062;</mo>
6415
1
          <mi>t</mi>
6416
1
        </mrow>
6417
1
        <mo>+</mo>
6418
1
        <mi>flow</mi>
6419
1
        <mo>+</mo>
6420
1
        <mi>max</mi>
6421
1
      </mrow>
6422
1
      </mrow>
6423
1
    </math>";
6424
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
6425
1
  }
6426
6427
  #[test]
6428
1
    fn merge_mi_with_script_test() -> Result<()> {
6429
1
        let test_str = "<math>
6430
1
      <mi>c</mi><mi>o</mi><msup><mi>s</mi><mn>2</mn></msup><mi>y</mi><mo>=</mo>
6431
1
      <mi>l</mi><mi>o</mi><msup><mi>g</mi><mn>2</mn></msup><mi>y</mi><mo>+</mo>
6432
1
      <mi>d</mi><mi>a</mi><msup><mi>g</mi><mn>2</mn></msup>
6433
1
    </math>";
6434
1
        let target_str = "<math>
6435
1
        <mrow data-changed='added'>
6436
1
          <mrow data-changed='added'>
6437
1
            <msup>
6438
1
              <mi>cos</mi>
6439
1
              <mn>2</mn>
6440
1
            </msup>
6441
1
            <mo data-changed='added'>&#x2061;</mo>
6442
1
            <mi>y</mi>
6443
1
          </mrow>
6444
1
          <mo>=</mo>
6445
1
          <mrow data-changed='added'>
6446
1
            <mrow data-changed='added'>
6447
1
              <msup>
6448
1
                <mi>log</mi>
6449
1
                <mn>2</mn>
6450
1
              </msup>
6451
1
              <mo data-changed='added'>&#x2061;</mo>
6452
1
              <mi>y</mi>
6453
1
            </mrow>
6454
1
            <mo>+</mo>
6455
1
            <mrow data-changed='added'>
6456
1
              <mi>d</mi>
6457
1
              <mo data-changed='added'>&#x2062;</mo>
6458
1
              <mi>a</mi>
6459
1
              <mo data-changed='added'>&#x2062;</mo>
6460
1
              <msup>
6461
1
                <mi>g</mi>
6462
1
                <mn>2</mn>
6463
1
              </msup>
6464
1
            </mrow>
6465
1
          </mrow>
6466
1
        </mrow>
6467
1
      </math>";
6468
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
6469
1
  }
6470
6471
  #[test]
6472
1
    fn merge_mi_with_script_bug_333_test() -> Result<()> {
6473
1
        let test_str = "<math>
6474
1
      <mi>l</mi><mi>o</mi><msub><mrow><mi>g</mi></mrow><mrow><mn>2</mn></mrow></msub><mo>=</mo>
6475
1
      <mi>l</mi><mi>i</mi><msub><mrow><mi>m</mi></mrow><mrow><mi>n</mi><mo>→</mo><mi>∞</mi></mrow></msub>
6476
1
    </math> 
6477
1
  ";
6478
1
        let target_str = " <math>
6479
1
        <mrow data-changed='added'>
6480
1
        <msub>
6481
1
          <mi>log</mi>
6482
1
          <mn>2</mn>
6483
1
        </msub>
6484
1
        <mo>=</mo>
6485
1
        <msub>
6486
1
          <mi>lim</mi>
6487
1
          <mrow>
6488
1
          <mi>n</mi>
6489
1
          <mo>→</mo>
6490
1
          <mi>∞</mi>
6491
1
          </mrow>
6492
1
        </msub>
6493
1
        </mrow>
6494
1
      </math>";
6495
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
6496
1
  }
6497
6498
  #[test]
6499
1
    fn merge_mi_bug_545() -> Result<()> {
6500
1
        let test_str = "<math><mi>S</mi><mi>I</mi><msup><mi>N</mi><mrow><mo>-</mo><mn>1</mn></mrow></msup></math>";
6501
1
        let target_str = "<math><msup><mi mathvariant='normal'>SIN</mi><mrow><mo>-</mo><mn>1</mn></mrow></msup></math>";
6502
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
6503
1
  }
6504
6505
  #[test]
6506
1
    fn parent_bug_94() -> Result<()> {
6507
    // This is a test to make sure the crash in the bug report doesn't happen.
6508
    // Note: in the bug, they behavior they would like is a single mn with content "0.02"
6509
    // However, TeX input "1 2 3" will produce three consecutive <mn>s, so merging <mn>s isn't good in general
6510
    // This test 
6511
1
        let test_str = " <math>
6512
1
      <mrow>
6513
1
        <msqrt>
6514
1
          <mrow>
6515
1
            <mstyle mathvariant='bold' mathsize='normal'><mn>0</mn></mstyle>
6516
1
            <mstyle mathvariant='bold' mathsize='normal'><mo>.</mo><mn>0</mn><mn>2</mn></mstyle>
6517
1
          </mrow>
6518
1
        </msqrt>
6519
1
      </mrow>
6520
1
    </math>
6521
1
    ";
6522
1
      let target_str = "<math>
6523
1
      <msqrt>
6524
1
        <mn mathsize='normal' mathvariant='bold' data-changed='added'>0.02</mn>
6525
1
      </msqrt>
6526
1
    </math>";
6527
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
6528
1
  }
6529
6530
  #[test]
6531
1
  fn mstyle_merge_bug_272() -> Result<()> {
6532
1
        let test_str = r#"<math>
6533
1
      <msup>
6534
1
        <mstyle mathvariant="bold" mathsize="normal">
6535
1
          <mn>6</mn>
6536
1
        </mstyle>
6537
1
        <mstyle mathvariant="bold" mathsize="normal">
6538
1
          <mn>9</mn>
6539
1
        </mstyle>
6540
1
      </msup>
6541
1
    </math>"#;
6542
1
      let target_str = "<math>
6543
1
      <msup>
6544
1
      <mn mathsize='normal' mathvariant='bold'>𝟔</mn>
6545
1
      <mn mathsize='normal' mathvariant='bold'>𝟗</mn>
6546
1
      </msup>
6547
1
    </math>";
6548
1
    are_strs_canonically_equal_result(test_str, target_str, &[])
6549
1
  }
6550
6551
6552
  #[test]
6553
1
  fn munder_mspace_bug_296() -> Result<()> {
6554
    // this was a "typo" bug that should have looking embellished base
6555
1
        let test_str = r#"<math>
6556
1
      <mrow><mn>5</mn><mfrac><mn>9</mn><mrow><mn>10</mn></mrow></mfrac>
6557
1
        <munder accentunder="true"><mspace width="2.7em" /><mo stretchy="true">_</mo></munder>
6558
1
        </mrow></math>"#;
6559
1
      let target_str = "<math><mrow>
6560
1
        <mrow data-changed='added'>
6561
1
          <mn>5</mn>
6562
1
          <mo data-changed='added'>&#x2064;</mo>
6563
1
          <mfrac> <mn>9</mn><mn>10</mn> </mfrac>
6564
1
        </mrow>
6565
1
        <munder accentunder='true'>
6566
1
          <mo width='2.7em' data-changed='was-mspace' data-width='2.7' data-empty-in-2D='true' data-function-likelihood='false'> </mo>
6567
1
          <mo stretchy='true'>¯</mo>
6568
1
        </munder>
6569
1
      </mrow></math>";
6570
1
    are_strs_canonically_equal_result(test_str, target_str, &[])
6571
1
  }
6572
6573
  #[test]
6574
1
  fn parse_scripted_open_paren_439() -> Result<()> {
6575
    // this was a "typo" bug that should have looking embellished base
6576
1
        let test_str = r#"<math><mrow><msub><mo>(</mo><mn>2</mn></msub><mo>)</mo></mrow></math>"#;
6577
1
      let target_str = "<math><mrow><msub><mo>(</mo><mn>2</mn></msub><mo>)</mo></mrow></math>";
6578
1
    are_strs_canonically_equal_result(test_str, target_str, &[])
6579
1
  }
6580
6581
  #[test]
6582
1
    fn lift_script() -> Result<()> {
6583
1
        let test_str = "<math xmlns='http://www.w3.org/1998/Math/MathML' >
6584
1
    <mrow>
6585
1
      <mstyle scriptlevel='0' displaystyle='true'>
6586
1
      <mrow>
6587
1
        <msqrt>
6588
1
        <munder>
6589
1
          <mo>∑<!-- ∑ --></mo>
6590
1
          <mrow>
6591
1
          <mn>0</mn>
6592
1
          <mo>≤<!-- ≤ --></mo>
6593
1
          <mi>k</mi>
6594
1
          <mo>≤<!-- ≤ --></mo>
6595
1
          <mi>n</mi>
6596
1
          </mrow>
6597
1
        </munder>
6598
1
        <mrow>
6599
1
          <mo stretchy='false'>|</mo>
6600
1
        </mrow>
6601
1
        <msub>
6602
1
          <mi>a</mi>
6603
1
          <mrow>
6604
1
          <mi>k</mi>
6605
1
          </mrow>
6606
1
        </msub>
6607
1
        <msup>
6608
1
          <mrow>
6609
1
          <mo stretchy='false'>|</mo>
6610
1
          </mrow>
6611
1
          <mrow>
6612
1
          <mn>2</mn>
6613
1
          </mrow>
6614
1
        </msup>
6615
1
        </msqrt>
6616
1
      </mrow>
6617
1
      </mstyle>
6618
1
    </mrow>
6619
1
    </math>";
6620
1
        let target_str = "<math>
6621
1
    <msqrt scriptlevel='0' displaystyle='true'>
6622
1
      <mrow data-changed='added'>
6623
1
      <munder>
6624
1
        <mo>∑</mo>
6625
1
        <mrow>
6626
1
        <mn>0</mn>
6627
1
        <mo>≤</mo>
6628
1
        <mi>k</mi>
6629
1
        <mo>≤</mo>
6630
1
        <mi>n</mi>
6631
1
        </mrow>
6632
1
      </munder>
6633
1
      <msup>
6634
1
        <mrow data-changed='added'>
6635
1
        <mo stretchy='false'>|</mo>
6636
1
        <msub>
6637
1
          <mi>a</mi>
6638
1
          <mi>k</mi>
6639
1
        </msub>
6640
1
        <mo stretchy='false'>|</mo>
6641
1
        </mrow>
6642
1
        <mn>2</mn>
6643
1
      </msup>
6644
1
      </mrow>
6645
1
    </msqrt>
6646
1
     </math>";
6647
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
6648
1
  }
6649
6650
  #[test]
6651
1
    fn pseudo_scripts() -> Result<()> {
6652
1
        let test_str = "<math><mrow>
6653
1
        <mi>cos</mi><mn>30</mn><mo>°</mo>
6654
1
        <mi>sin</mi><mn>60</mn><mo>′</mo>
6655
1
        </mrow></math>";
6656
1
        let target_str = "<math>
6657
1
    <mrow>
6658
1
      <mrow data-changed='added'>
6659
1
      <mi>cos</mi>
6660
1
      <mo data-changed='added'>&#x2061;</mo>
6661
1
      <msup data-changed='added'><mn>30</mn><mo>°</mo></msup>
6662
1
      </mrow>
6663
1
      <mo data-changed='added'>&#x2062;</mo>
6664
1
      <mrow data-changed='added'>
6665
1
      <mi>sin</mi>
6666
1
      <mo data-changed='added'>&#x2061;</mo>
6667
1
      <msup data-changed='added'><mn>60</mn><mo>′</mo></msup>
6668
1
      </mrow>
6669
1
    </mrow>
6670
1
     </math>";
6671
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
6672
1
  }
6673
6674
  #[test]
6675
1
    fn pseudo_scripts_in_mi() -> Result<()> {
6676
1
        let test_str = "<math><mrow><mi>p'</mi><mo>=</mo><mi>µ°C</mi></mrow></math>";
6677
1
        let target_str = "<math><mrow><msup><mi>p</mi><mo>′</mo></msup><mo>=</mo><mi>µ°C</mi></mrow></math>";
6678
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
6679
1
  }
6680
6681
  #[test]
6682
1
    fn prescript_only() -> Result<()> {
6683
1
        let test_str = "<math><msub><mtext/><mn>92</mn></msub><mi>U</mi></math>";
6684
1
        let target_str = "<math><mmultiscripts><mi>U</mi><mprescripts/> <mn>92</mn><none/> </mmultiscripts></math>";
6685
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
6686
1
  }
6687
6688
  #[test]
6689
1
    fn pre_and_postscript_only() -> Result<()> {
6690
1
        let test_str = "<math>
6691
1
      <msub><mrow/><mn>0</mn></msub>
6692
1
      <msub><mi>F</mi><mn>1</mn></msub>
6693
1
      <mo stretchy='false'>(</mo>
6694
1
      <mi>a</mi><mo>,</mo><mi>b</mi><mo>;</mo><mi>c</mi><mo>;</mo><mi>z</mi>
6695
1
      <mo stretchy='false'>)</mo>
6696
1
    </math>";
6697
1
      let target_str = " <math>
6698
1
      <mrow data-changed='added'>
6699
1
      <mmultiscripts>
6700
1
        <mi>F</mi>
6701
1
        <mn>1</mn>
6702
1
        <none></none>
6703
1
        <mprescripts></mprescripts>
6704
1
        <mn>0</mn>
6705
1
        <none></none>
6706
1
      </mmultiscripts>
6707
1
      <mo data-changed='added'>&#x2061;</mo>
6708
1
      <mrow data-changed='added'>
6709
1
        <mo stretchy='false'>(</mo>
6710
1
        <mrow data-changed='added'>
6711
1
        <mrow data-changed='added'>
6712
1
          <mi>a</mi>
6713
1
          <mo>,</mo>
6714
1
          <mi>b</mi>
6715
1
        </mrow>
6716
1
        <mo>;</mo>
6717
1
        <mi>c</mi>
6718
1
        <mo>;</mo>
6719
1
        <mi>z</mi>
6720
1
        </mrow>
6721
1
        <mo stretchy='false'>)</mo>
6722
1
      </mrow>
6723
1
      </mrow>
6724
1
    </math>";
6725
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
6726
1
  }
6727
6728
  #[test]
6729
1
    fn pointless_nones_in_mmultiscripts() -> Result<()> {
6730
1
        let test_str = "<math><mmultiscripts>
6731
1
        <mtext>C</mtext>
6732
1
        <none />
6733
1
        <none />
6734
1
        <mprescripts />
6735
1
        <mn>6</mn>
6736
1
        <mn>14</mn>
6737
1
      </mmultiscripts></math>";
6738
1
        let target_str = "<math>
6739
1
    <mmultiscripts data-chem-formula='6'>
6740
1
    <mtext data-chem-element='1'>C</mtext>
6741
1
    <mprescripts></mprescripts>
6742
1
    <mn>6</mn>
6743
1
    <mn>14</mn>
6744
1
    </mmultiscripts>
6745
1
    </math>";
6746
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
6747
1
  }
6748
6749
  #[test]
6750
1
    fn empty_mmultiscripts_485() -> Result<()> {
6751
1
        let test_str = "<math><mmultiscripts>   </mmultiscripts></math>";
6752
1
        let target_str = ""; // shouldn't get to the point of comparing because the input is illegal.
6753
1
        let err = are_strs_canonically_equal_result(test_str, target_str, &[])
6754
1
            .expect_err("empty mmultiscripts should be rejected");
6755
1
        assert!(
6756
1
            err.to_string().contains("mmultiscripts has the wrong number of children:\n <mmultiscripts></mmultiscripts>"),
6757
            "unexpected error message: {err}"
6758
        );
6759
1
        Ok(())
6760
1
  }
6761
6762
  #[test]
6763
1
    fn empty_mmultiscripts_544() -> Result<()> {
6764
1
        let test_str = "<math><mmultiscripts><mrow/><mprescripts></mprescripts><mrow/><mrow/></mmultiscripts></math>";
6765
1
        let target_str = "<math> <mtext data-changed='empty_content' data-width='0'> </mtext></math>";
6766
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
6767
1
  }
6768
6769
  #[test]
6770
1
    fn empty_mrows_in_mmultiscripts_306() -> Result<()> {
6771
1
        let test_str = "<math display='block'>
6772
1
      <mmultiscripts intent='_permutation:prefix(_of,$k,_from,$n)'>
6773
1
        <mi>P</mi>
6774
1
        <mi arg='k'>k</mi>
6775
1
        <mrow/>
6776
1
        <mprescripts/>
6777
1
        <mrow/>
6778
1
        <mi arg='n'>n</mi>
6779
1
      </mmultiscripts>
6780
1
    </math>";
6781
1
        let target_str = "<math display='block'>
6782
1
      <mmultiscripts intent='_permutation:prefix(_of,$k,_from,$n)'>
6783
1
        <mi>P</mi>
6784
1
        <mi arg='k'>k</mi>
6785
1
        <none></none>
6786
1
        <mprescripts></mprescripts>
6787
1
        <none></none>
6788
1
        <mi arg='n'>n</mi>
6789
1
      </mmultiscripts>
6790
1
    </math>";
6791
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
6792
1
  }
6793
6794
6795
  #[test]
6796
  #[ignore] // this fails -- need to figure out grabbing base from previous or next child
6797
0
    fn tensor() -> Result<()> {
6798
0
        let test_str = "<math>
6799
0
        <msub><mi>R</mi><mi>i</mi></msub>
6800
0
        <msup><mrow/><mi>j</mi></msup>
6801
0
        <msub><mrow/><mi>k</mi></msub>
6802
0
        <msub><mrow/><mi>l</mi></msub>
6803
0
      </math>";
6804
0
    let target_str = "<math>
6805
0
      <mmultiscripts>
6806
0
        <mi> R </mi>
6807
0
        <mi> i </mi>
6808
0
        <none/>
6809
0
        <none/>
6810
0
        <mi> j </mi>
6811
0
        <mi> k </mi>
6812
0
        <none/>
6813
0
        <mi> l </mi>
6814
0
        <none/>
6815
0
      </mmultiscripts>
6816
0
    </math>";
6817
0
        are_strs_canonically_equal_result(test_str, target_str, &[])
6818
0
  }
6819
6820
6821
  #[test]
6822
1
    fn test_nonascii_function_name() -> Result<()> {
6823
1
        let test_str = r#"<math>
6824
1
        <mi mathvariant="bold-italic">x</mi>
6825
1
        <mo>=</mo>
6826
1
        <mn>2</mn>
6827
1
        <mrow>
6828
1
        <mi>𝒔𝒊𝒏</mi>
6829
1
        <mo>&#x2061;</mo>
6830
1
        <mrow><mi mathvariant="bold-italic">t</mi></mrow>
6831
1
        </mrow>
6832
1
        <mo>-</mo>
6833
1
        <mn>1</mn>
6834
1
      </math>"#;
6835
1
    let target_str = r#"<math>
6836
1
      <mrow data-changed='added'>
6837
1
      <mi mathvariant='bold-italic'>𝒙</mi>
6838
1
      <mo>=</mo>
6839
1
      <mrow data-changed='added'>
6840
1
        <mrow data-changed='added'>
6841
1
        <mn>2</mn>
6842
1
        <mo data-changed='added'>&#x2062;</mo>
6843
1
        <mrow>
6844
1
          <mi>sin</mi>
6845
1
          <mo>&#x2061;</mo>
6846
1
          <mi mathvariant='bold-italic'>𝒕</mi>
6847
1
        </mrow>
6848
1
        </mrow>
6849
1
        <mo>-</mo>
6850
1
        <mn>1</mn>
6851
1
      </mrow>
6852
1
      </mrow>
6853
1
    </math>"#;
6854
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
6855
1
  }
6856
6857
  #[test]
6858
1
    fn test_nonascii_function_name_as_chars() -> Result<()> {
6859
1
        let test_str = r#"<math display="block">
6860
1
      <mi>&#x1D499;</mi>
6861
1
      <mo>=</mo>
6862
1
      <mrow>
6863
1
        <mrow>
6864
1
          <mi>&#x1D484;</mi>
6865
1
          <mi>&#x1D490;</mi>
6866
1
          <mi>&#x1D494;</mi>
6867
1
        </mrow>
6868
1
        <mo>&#x2061;</mo>
6869
1
        <mrow>
6870
1
          <mi>&#x1D495;</mi>
6871
1
        </mrow>
6872
1
      </mrow>
6873
1
      <mo>+</mo>
6874
1
      <mn>&#x1D7D0;</mn>
6875
1
    </math>"#;
6876
1
    let target_str = r#"<math display='block'>
6877
1
      <mrow data-changed='added'>
6878
1
        <mi>𝒙</mi>
6879
1
        <mo>=</mo>
6880
1
        <mrow data-changed='added'>
6881
1
          <mrow>
6882
1
          <mi>cos</mi>
6883
1
          <mo>&#x2061;</mo>
6884
1
          <mi>𝒕</mi>
6885
1
          </mrow>
6886
1
          <mo>+</mo>
6887
1
          <mn>𝟐</mn>
6888
1
        </mrow>
6889
1
      </mrow>
6890
1
    </math>"#;
6891
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
6892
1
  }
6893
6894
6895
}
\ No newline at end of file +

Coverage Report

Created: 2026-05-04 09:37

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/home/runner/work/MathCAT/MathCAT/src/canonicalize.rs
Line
Count
Source
1
//! Converts the MathML to some sort of canonical MathML.
2
//!
3
//! Some changes made:
4
//! * &extra whitespace at the start/end of tokens is trimmed.
5
//! * "equivalent" characters are converted to a chosen character.
6
//! * known "bad" MathML is cleaned up (this will likely be an ongoing effort)
7
//! * mrows are added based on operator priorities from the MathML Operator Dictionary
8
#![allow(clippy::needless_return)]
9
use crate::errors::*;
10
use std::rc::Rc;
11
use std::cell::RefCell;
12
use sxd_document::dom::{Element, Document, ChildOfElement, Attribute};
13
use sxd_document::QName;
14
use phf::{phf_map, phf_set};
15
use crate::xpath_functions::{IsBracketed, is_leaf, IsNode};
16
use std::ptr::eq as ptr_eq;
17
use crate::pretty_print::*;
18
use regex::Regex;
19
use std::fmt;
20
use crate::chemistry::*;
21
use unicode_script::Script;
22
use roman_numerals_rs::RomanNumeral;
23
use std::sync::LazyLock;
24
use log::{debug};
25
use bitflags::bitflags;
26
27
// FIX: DECIMAL_SEPARATOR should be set by env, or maybe language
28
const DECIMAL_SEPARATOR: &str = ".";
29
pub const CHANGED_ATTR: &str = "data-changed";
30
pub const ADDED_ATTR_VALUE: &str = "added";
31
pub const INTENT_ATTR: &str = "intent";
32
pub const MATHML_FROM_NAME_ATTR: &str = "data-from-mathml";
33
const MFENCED_ATTR_VALUE: &str = "from_mfenced";
34
const EMPTY_IN_2D: &str = "data-empty-in-2D";
35
const SPACE_AFTER: &str = "data-space-after";
36
const ACT_AS_OPERATOR: &str = "data-acts_as_operator";
37
// character to use instead of the text content for priority, etc.
38
pub const CHEMICAL_BOND: &str ="data-chemical-bond";
39
40
41
/// Used when mhchem is detected and we should favor postscripts rather than prescripts in constructing an mmultiscripts
42
const MHCHEM_MMULTISCRIPTS_HACK: &str = "MHCHEM_SCRIPT_HACK";
43
44
// (perfect) hash of operators built from MathML's operator dictionary
45
static OPERATORS: phf::Map<&str, OperatorInfo> = include!("operator-info.in");
46
47
48
// The set of fence operators that can being either a left or right fence (or infix). For example: "|".
49
static AMBIGUOUS_OPERATORS: phf::Set<&str> = phf_set! {
50
  "|", "∥", "\u{2016}"
51
};
52
53
// static vars used when canonicalizing
54
// lowest priority operator so it is never popped off the stack
55
static LEFT_FENCEPOST: OperatorInfo = OperatorInfo{ op_type: OperatorTypes::LEFT_FENCE, priority: 0, next: &None };
56
57
3
static INVISIBLE_FUNCTION_APPLICATION: LazyLock<&'static OperatorInfo> = LazyLock::new(|| OPERATORS.get("\u{2061}").unwrap());
58
3
static IMPLIED_TIMES: LazyLock<&'static OperatorInfo> = LazyLock::new(|| OPERATORS.get("\u{2062}").unwrap());
59
2
static IMPLIED_INVISIBLE_COMMA: LazyLock<&'static OperatorInfo> = LazyLock::new(|| OPERATORS.get("\u{2063}").unwrap());
60
3
static IMPLIED_INVISIBLE_PLUS: LazyLock<&'static OperatorInfo> = LazyLock::new(|| OPERATORS.get("\u{2064}").unwrap());
61
62
// FIX: any other operators that should act the same (e.g, plus-minus and minus-plus)?
63
3
static PLUS: LazyLock<&'static OperatorInfo> = LazyLock::new(|| OPERATORS.get("+").unwrap());
64
3
static MINUS: LazyLock<&'static OperatorInfo> = LazyLock::new(|| OPERATORS.get("-").unwrap());
65
3
static PREFIX_MINUS: LazyLock<&'static OperatorInfo> = LazyLock::new(|| MINUS.next.as_ref().unwrap());
66
67
3
static TIMES_SIGN: LazyLock<&'static OperatorInfo> = LazyLock::new(|| OPERATORS.get("×").unwrap());
68
69
// IMPLIED_TIMES_HIGH_PRIORITY -- used in trig functions for things like sin 2x cos 2x where want > function app priority
70
static IMPLIED_TIMES_HIGH_PRIORITY: OperatorInfo = OperatorInfo{
71
  op_type: OperatorTypes::INFIX, priority: 851, next: &None
72
};
73
// IMPLIED_SEPARATOR_HIGH_PRIORITY -- used for Geometry points like ABC
74
static IMPLIED_SEPARATOR_HIGH_PRIORITY: OperatorInfo = OperatorInfo{
75
  op_type: OperatorTypes::INFIX, priority: 901, next: &None
76
};
77
// IMPLIED_CHEMICAL_BOND -- used for implicit and explicit bonds
78
static IMPLIED_CHEMICAL_BOND: OperatorInfo = OperatorInfo{
79
  op_type: OperatorTypes::INFIX, priority: 905, next: &None
80
};
81
static IMPLIED_PLUS_SLASH_HIGH_PRIORITY: OperatorInfo = OperatorInfo{ // (linear) mixed fraction 2 3/4
82
  op_type: OperatorTypes::INFIX, priority: 881, next: &None
83
};
84
85
// Useful static defaults to have available if there is no character match
86
static DEFAULT_OPERATOR_INFO_PREFIX: OperatorInfo = OperatorInfo{
87
  op_type: OperatorTypes::PREFIX, priority: 260, next: &None
88
};
89
static DEFAULT_OPERATOR_INFO_INFIX: OperatorInfo = OperatorInfo{
90
  op_type: OperatorTypes::INFIX, priority: 260, next:& None
91
};
92
static DEFAULT_OPERATOR_INFO_POSTFIX: OperatorInfo = OperatorInfo{
93
  op_type: OperatorTypes::POSTFIX, priority: 260, next: &None
94
};
95
96
// avoids having to use Option<OperatorInfo> in some cases
97
static ILLEGAL_OPERATOR_INFO: OperatorInfo = OperatorInfo{
98
  op_type: OperatorTypes::INFIX, priority: 999, next: &None
99
};
100
101
// used to tell if an operator is a relational operator
102
1
static EQUAL_PRIORITY: LazyLock<usize> = LazyLock::new(|| OPERATORS.get("=").unwrap().priority);
103
104
// useful for detecting whitespace
105
3
static IS_WHITESPACE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\s+$").unwrap());    // only Unicode whitespace
106
107
// Operators are either PREFIX, INFIX, or POSTFIX, but can also have other properties such as LEFT_FENCE
108
bitflags! {
109
  #[derive(Clone, Debug, Copy, PartialEq, Eq)]
110
  struct OperatorTypes: u32 {
111
    const NONE    = 0x0;
112
    const PREFIX  = 0x1;
113
    const INFIX   = 0x2;
114
    const POSTFIX = 0x4;
115
    const FENCE   = 0x8;
116
    const LEFT_FENCE= 0x9;
117
    const RIGHT_FENCE=0xc;
118
    const UNSPECIFIED=0xf;    // 'and-ing will match anything
119
  }
120
}
121
// OperatorInfo is a key structure for parsing.
122
// They OperatorInfo is this program's representation of MathML's Operator Dictionary.
123
// The OperatorTypes say how the operator can group (can be overridden with @form="..." on an element).
124
//   Basically, it says the operator can be at the start, middle, or end of an mrow.
125
// The priority field gives the relationships between operators so that lower priority operators are towards the root of the tree.
126
//   E.g.,  '=' is lower priority than (infix) '+', which in turn is lower priority than multiplication.
127
// The operator info is a linked list because some operators (not many) have alternatives (e.g, '+' is both prefix and infix)
128
// All OperatorInfo is static info, with some special static defaults to capture when it is not listed in the operator dictionary.
129
#[derive(Clone, Debug)]
130
struct OperatorInfo {
131
  op_type: OperatorTypes,   // can be set on <mo>
132
  priority: usize,      // not settable on an element
133
  next: &'static Option<OperatorInfo>,  // can be both prefix & infix (etc) -- chain of options
134
}
135
136
// The character is separated out from the OperatorInfo as this allows the OperatorInfo to be static (can use default values)
137
#[derive(Clone, Debug)]
138
struct OperatorPair<'op> {
139
  ch: &'op str,
140
  op: &'static OperatorInfo
141
}
142
143
impl<'op> OperatorPair<'op> {
144
57.3k
  fn new() -> OperatorPair<'op> {
145
57.3k
    return OperatorPair{
146
57.3k
      ch: "illegal",          // value 'illegal' used only in debugging, if then
147
57.3k
      op: &ILLEGAL_OPERATOR_INFO,   // ILLEGAL_OPERATOR_INFO avoids using <Option>
148
57.3k
    };
149
57.3k
  }
150
}
151
152
// OperatorVersions is a convenient data structure when looking to see whether the operator should be prefix, infix, or postfix.
153
// It is only used in one place in the code, so this could maybe be eliminated and the code localized to where it is used.
154
#[derive(Debug)]
155
struct OperatorVersions {
156
  prefix: Option<&'static OperatorInfo>,
157
  infix: Option<&'static OperatorInfo>,
158
  postfix: Option<&'static OperatorInfo>,
159
}
160
161
impl OperatorVersions {
162
401
  fn new(op: &'static OperatorInfo) -> OperatorVersions {
163
401
    let mut op = op;
164
401
    let mut prefix = None;
165
401
    let mut infix = None;
166
401
    let mut postfix = None;
167
    loop {
168
1.10k
      if op.is_prefix() {
169
360
        prefix = Some( op );
170
745
      } else if op.is_infix() {
171
385
        infix = Some( op )
172
360
      } else if op.is_postfix() {
173
360
        postfix = Some( op );
174
360
      } else {
175
0
        panic!("OperatorVersions::new: operator is not prefix, infix, or postfix")
176
      }
177
      //let another_op = op.next;
178
1.10k
      match &op.next {
179
401
        None => break,
180
704
        Some(alt_op) => op = alt_op,
181
      }
182
    }
183
401
    return OperatorVersions{prefix, infix, postfix};
184
401
  }
185
}
186
187
188
impl OperatorInfo {
189
13.1k
  fn is_prefix(&self) -> bool {
190
13.1k
    return (self.op_type & OperatorTypes::PREFIX) != OperatorTypes::NONE;
191
13.1k
  }
192
193
805
  fn is_infix(&self) -> bool {
194
805
    return (self.op_type & OperatorTypes::INFIX) != OperatorTypes::NONE;
195
805
  }
196
197
14.2k
  fn is_postfix(&self) -> bool {
198
14.2k
    return (self.op_type & OperatorTypes::POSTFIX) != OperatorTypes::NONE;
199
14.2k
  }
200
201
13.9k
  fn is_left_fence(&self) -> bool {
202
13.9k
    return self.op_type & OperatorTypes::LEFT_FENCE == OperatorTypes::LEFT_FENCE;
203
13.9k
  }
204
205
12.9k
  fn is_right_fence(&self) -> bool {
206
12.9k
    return self.op_type & OperatorTypes::RIGHT_FENCE ==OperatorTypes::RIGHT_FENCE;
207
12.9k
  }
208
209
4.84k
  fn is_fence(&self) -> bool {
210
4.84k
    return (self.op_type & (OperatorTypes::LEFT_FENCE | OperatorTypes::RIGHT_FENCE)) != OperatorTypes::NONE;
211
4.84k
  }
212
213
21.3k
  fn is_operator_type(&self, op_type: OperatorTypes) -> bool {
214
21.3k
    return self.op_type & op_type != OperatorTypes::NONE;
215
21.3k
  }
216
217
13.5k
  fn is_plus_or_minus(&self) -> bool {
218
13.5k
    return ptr_eq(self, *PLUS) || 
ptr_eq13.0k
(
self13.0k
,
*MINUS13.0k
);
219
13.5k
  }
220
221
13.2k
  fn is_times(&self) -> bool {
222
13.2k
    return ptr_eq(self, *IMPLIED_TIMES) || 
ptr_eq13.0k
(
self13.0k
,
*TIMES_SIGN13.0k
);
223
13.2k
  }
224
225
17.7k
  fn is_nary(&self, previous_op: &OperatorInfo) -> bool {
226
17.7k
    return  ptr_eq(previous_op,self) ||
227
13.0k
        (previous_op.is_plus_or_minus() && 
self506
.
is_plus_or_minus506
()) ||
228
13.0k
        (previous_op.is_times() && 
self163
.
is_times163
());
229
17.7k
  }
230
}
231
232
// StackInfo contains all the needed information for deciding shift/reduce during parsing.
233
// The stack itself is just a Vec of StackInfo (since we only push, pop, and look at the top)
234
// There are a number of useful functions defined on StackInfo. 
235
struct StackInfo<'a, 'op>{
236
  mrow: Element<'a>,      // mrow being built
237
  op_pair: OperatorPair<'op>, // last operator placed on stack
238
  is_operand: bool,     // true if child at end of mrow is an operand (as opposed to an operator)
239
}
240
241
impl fmt::Display for StackInfo<'_, '_> {
242
0
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
243
0
        write!(f, "StackInfo(op={}/{}, is_operand={}, mrow({}",
244
0
        show_invisible_op_char(self.op_pair.ch), self.op_pair.op.priority, self.is_operand,
245
0
        if self.mrow.children().is_empty() {")"} else {""})?;
246
0
    for child in self.mrow.children() {
247
0
      let child = as_element(child);
248
0
      write!(f, "{}{}", name(child), if child.following_siblings().is_empty() {")"} else {","})?;
249
    }
250
0
        return Ok( () );
251
0
    }
252
}
253
254
impl<'a, 'op:'a> StackInfo<'a, 'op> {
255
10.6k
  fn new(doc: Document<'a>) -> StackInfo<'a, 'op> {
256
    // debug!("  new empty StackInfo");
257
10.6k
    let mrow = create_mathml_element(&doc, "mrow") ;
258
10.6k
    mrow.set_attribute_value(CHANGED_ATTR, ADDED_ATTR_VALUE);
259
10.6k
    return StackInfo{
260
10.6k
      mrow,
261
10.6k
      op_pair: OperatorPair{ ch: "\u{E000}", op: &LEFT_FENCEPOST },
262
10.6k
      is_operand: false,
263
10.6k
    }
264
10.6k
  }
265
266
10.9k
  fn with_op<'d>(doc: &'d Document<'a>, node: Element<'a>, op_pair: OperatorPair<'op>) -> StackInfo<'a, 'op> {
267
    // debug!("  new StackInfo with '{}' and operator {}/{}", name(node), show_invisible_op_char(op_pair.ch), op_pair.op.priority);
268
10.9k
    let mrow = create_mathml_element(doc, "mrow");
269
10.9k
    mrow.set_attribute_value(CHANGED_ATTR, ADDED_ATTR_VALUE);
270
10.9k
    mrow.append_child(node);
271
10.9k
    return StackInfo {
272
10.9k
      mrow,
273
10.9k
      op_pair,
274
10.9k
      is_operand: false,
275
10.9k
    }
276
10.9k
  }
277
278
37.2k
  fn priority(&self) -> usize {
279
37.2k
    return self.op_pair.op.priority;
280
37.2k
  }
281
282
37.1k
  fn last_child_in_mrow(&self) -> Option<Element<'a>> {
283
37.1k
    let children = self.mrow.children();
284
37.1k
    for &
child29.5k
in children.iter().rev() {
285
29.5k
      let child = as_element(child);
286
29.5k
      if let Some(
value687
) = child.attribute_value(CHANGED_ATTR)
287
687
        && value == "empty_content" {
288
0
          continue;
289
29.5k
        }
290
29.5k
      return Some(child);
291
    }
292
7.58k
    return None;
293
37.1k
  }
294
295
57.6k
  fn add_child_to_mrow(&mut self, child: Element<'a>, child_op: OperatorPair<'op>) {
296
    // debug!("  adding '{}' to mrow[{}], operator '{}/{}'",
297
    //    element_summary(child), self.mrow.children().len(), show_invisible_op_char(child_op.ch), child_op.op.priority);
298
57.6k
    self.mrow.append_child(child);
299
57.6k
    if ptr_eq(child_op.op, &ILLEGAL_OPERATOR_INFO) {
300
36.8k
      assert!(!self.is_operand);  // should not have two operands in a row (ok to add whitespace)
301
36.8k
      self.is_operand = true;
302
20.7k
    } else {
303
20.7k
      self.op_pair = child_op;
304
20.7k
      self.is_operand = false;
305
20.7k
    }
306
57.6k
  }
307
308
18.4k
  fn remove_last_operand_from_mrow(&mut self) -> Element<'a> {
309
18.4k
    let children = self.mrow.children();
310
18.4k
    assert!( !children.is_empty() );
311
18.4k
    assert!( self.is_operand || 
children.len()==163
); // could be operator that is forced to be interpreted as operand -- eg, bad input like "x+("
312
18.4k
    self.is_operand = false;
313
18.4k
    let last_operand = as_element(children[children.len()-1]);
314
    // debug!("  Removing last element '{}' from mrow[{}]",element_summary(last_operand), children.len());
315
18.4k
    last_operand.remove_from_parent();
316
18.4k
    return last_operand;
317
18.4k
  }
318
319
}
320
321
322
117k
pub fn create_mathml_element<'a>(doc: &Document<'a>, name: &str) -> Element<'a> {
323
117k
  return doc.create_element(sxd_document::QName::with_namespace_uri(
324
117k
    Some("http://www.w3.org/1998/Math/MathML"),
325
117k
    name));
326
117k
}
327
328
4.84k
pub fn is_fence(mo: Element) -> bool {
329
4.84k
  return CanonicalizeContext::find_operator(None, mo, None, None, None).is_fence();
330
4.84k
}
331
332
664
pub fn is_relational_op(mo: Element) -> bool {
333
664
  return CanonicalizeContext::find_operator(None, mo, None, None, None).priority == *EQUAL_PRIORITY;
334
664
}
335
336
113k
pub fn set_mathml_name(element: Element, new_name: &str) {
337
113k
  element.set_name(QName::with_namespace_uri(Some("http://www.w3.org/1998/Math/MathML"), new_name));
338
113k
}
339
340
/// Replace 'mathml' in the parent (must exist since this only happens for leaves) with the 'replacements' (new children).
341
/// This handles adding mrows if needed.
342
/// 
343
/// Returns first replacement
344
2.47k
pub fn replace_children<'a>(mathml: Element<'a>, replacements: Vec<Element<'a>>) -> Element<'a> {
345
2.47k
  let parent = get_parent(mathml);
346
2.47k
  let parent_name = name(parent);
347
  // debug!("\nreplace_children: mathml\n{}", mml_to_string(mathml));
348
  // debug!("replace_children: parent before replace\n{}", mml_to_string(parent));
349
  // debug!("{} replacements:\n{}", replacements.len(), replacements.iter().map(|e| mml_to_string(e)).collect::<Vec<String>>().join("\n"));
350
2.47k
  if ELEMENTS_WITH_FIXED_NUMBER_OF_CHILDREN.contains(parent_name) ||
351
2.44k
     parent_name == "mmultiscripts" {     // each child acts like the parent has a fixed number of children
352
    // gather up the preceding/following siblings before mucking with the tree structure (mrow.append_children below)
353
32
    let mut new_children = mathml.preceding_siblings();
354
32
    let mut following_siblings = mathml.following_siblings();
355
356
    // debug!("\nreplace_children: mathml\n{}", mml_to_string(mathml));
357
    // debug!("replace_children: parent before replace\n{}", mml_to_string(parent));
358
    // wrap an mrow around the replacements and then replace 'mathml' with that
359
32
    let mrow = create_mathml_element(&mathml.document(), "mrow");
360
32
    add_attrs(mrow, &replacements[0].attributes());
361
32
    mrow.append_children(replacements);
362
32
    new_children.push(ChildOfElement::Element(mrow));
363
32
    new_children.append(&mut following_siblings);
364
32
    parent.replace_children(new_children);
365
    // debug!("replace_children parent after: parent\n{}", mml_to_string(parent));
366
    // debug!("replace_children: returned mrow\n{}", mml_to_string(mrow));
367
32
    return mrow;
368
  } else {
369
    // replace the children of the parent with 'replacements' inserted in place of 'mathml'
370
2.44k
    let mut new_children = mathml.preceding_siblings();
371
2.44k
    let i_first_new_child = new_children.len();
372
6.54k
    let 
mut replacements2.44k
=
replacements.iter()2.44k
.
map2.44k
(|&el| ChildOfElement::Element(el)).
collect2.44k
::<Vec<ChildOfElement>>();
373
2.44k
    new_children.append(&mut replacements);
374
2.44k
    new_children.append(&mut mathml.following_siblings());
375
2.44k
    parent.replace_children(new_children);
376
    // debug!("replace_children: (will return child[{}]) parent after replace\n{}", i_first_new_child, mml_to_string(parent));
377
2.44k
    return as_element(parent.children()[i_first_new_child]);
378
  }
379
2.47k
}
380
381
// returns the presentation element of a "semantics" element
382
22
pub fn get_presentation_element(element: Element) -> (usize, Element) {
383
22
  assert_eq!(name(element), "semantics");
384
22
  let children = element.children();
385
22
  if let Some( (
i20
,
child20
) ) = children.iter().enumerate().find(|&(_, &child)|
386
48
      if let Some(
encoding46
) = as_element(child).attribute_value("encoding") {
387
46
        encoding == "MathML-Presentation"
388
      } else {
389
2
        false
390
48
      })
391
  {
392
20
    let presentation_annotation = as_element(*child);
393
    // debug!("get_presentation_element:\n{}", mml_to_string(presentation_annotation));
394
20
    assert_eq!(presentation_annotation.children().len(), 1);
395
20
    return (i, as_element(presentation_annotation.children()[0]));
396
  } else {
397
2
    return (0, as_element(children[0]));
398
  }
399
22
}
400
401
/// Canonicalize does several things:
402
/// 1. cleans up the tree so all extra white space is removed (should only have element and text nodes)
403
/// 2. normalize the characters
404
/// 3. clean up "bad" MathML based on known output from some converters (TODO: still a work in progress)
405
/// 4. the tree is "parsed" based on the mo (priority)/mi/mn's in an mrow
406
///    *  this adds mrows and some invisible operators (implied times, function app, ...)
407
///    * extra mrows are removed
408
///    * implicit mrows are turned into explicit mrows (e.g, there will be a single child of 'math')
409
///
410
/// Canonicalize is pretty conservative in adding new mrows and won't do it if:
411
/// * there is an intent attr
412
/// * if the mrow starts and ends with a fence (e.g, French open interval "]0,1[")
413
///
414
/// An mrow is never deleted unless it is redundant.
415
/// 
416
/// Whitespace handling:
417
/// Whitespace complicates parsing and also pattern matching (e.g., is it a mixed number which tests for a number preceding a fraction)
418
/// The first attempt which mostly worked was to shove whitespace into adjacent mi/mn/mtext. That has a problem with distinguish different uses for whitespace
419
/// The second attempt was to leave it in the parse and make it an mo when appropriate, but there were some cases where it should be prefix and wasn't caught
420
/// The third attempt (and the current one) is to make it an attribute on adjacent elements.
421
///   This preserves the data-width attr (with new name) added in the second attempt that helps resolve whether something is tweaking, a real space, or an omission.
422
///   It adds data-previous-space-width/data-following-space-width with values to indicate with the space was on the left or right (typically it placed on the previous token because that's easier)
423
5.06k
pub fn canonicalize(mathml: Element) -> Result<Element> {
424
5.06k
  let context = CanonicalizeContext::new();
425
5.06k
  return context.canonicalize(mathml);
426
5.06k
}
427
428
#[derive(Debug, PartialEq)]
429
enum FunctionNameCertainty {
430
  True,
431
  Maybe,
432
  False
433
}
434
435
436
static ELEMENTS_WITH_ONE_CHILD: phf::Set<&str> = phf_set! {
437
  "math", "msqrt", "merror", "mpadded", "mphantom", "menclose", "mtd", "mscarry"
438
};
439
440
static ELEMENTS_WITH_FIXED_NUMBER_OF_CHILDREN: phf::Set<&str> = phf_set! {
441
  "mfrac", "mroot", "msub", "msup", "msubsup","munder", "mover", "munderover"
442
};
443
444
static EMPTY_ELEMENTS: phf::Set<&str> = phf_set! {
445
  "mspace", "none", "mprescripts", "mglyph", "malignmark", "maligngroup", "msline",
446
};
447
448
// turns out Roman Numerals tests aren't needed, but we do want to block VII from being a chemical match
449
// two cases because we don't want to have a match for 'Cl', etc.
450
3
static UPPER_ROMAN_NUMERAL: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\s*^M{0,3}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})\s*$").unwrap());
451
3
static LOWER_ROMAN_NUMERAL: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\s*^m{0,3}(cm|cd|d?c{0,3})(xc|xl|l?x{0,3})(ix|iv|v?i{0,3})\s*$").unwrap());
452
453
454
struct CanonicalizeContextPatterns {
455
  decimal_separator: Regex,
456
  block_separator: Regex,
457
  digit_only_decimal_number: Regex,
458
  block_3digit_pattern: Regex,
459
  block_3_5digit_pattern: Regex,
460
  block_4digit_hex_pattern: Regex,
461
  block_1digit_pattern: Regex,    // used when generator puts each digit into a single mn
462
}
463
464
impl CanonicalizeContextPatterns {
465
4.10k
  fn new(block_separator_pref: &str, decimal_separator_pref: &str) -> CanonicalizeContextPatterns {
466
4.10k
    let block_separator = Regex::new(&format!("[{}]", regex::escape(block_separator_pref))).unwrap();
467
4.10k
    let decimal_separator = Regex::new(&format!("[{}]", regex::escape(decimal_separator_pref))).unwrap();
468
    // allows just "." and also matches an empty string, but those are ruled out elsewhere
469
4.10k
    let digit_only_decimal_number = Regex::new(&format!(r"^\d*{}?\d*$", regex::escape(decimal_separator_pref))).unwrap();
470
4.10k
    let block_3digit_pattern = get_number_pattern_regex(block_separator_pref, decimal_separator_pref, 3, 3);
471
4.10k
    let block_3_5digit_pattern = get_number_pattern_regex(block_separator_pref, decimal_separator_pref, 3, 5);
472
    // Note: on en.wikipedia.org/wiki/Decimal_separator, show '3.14159 26535 89793 23846'
473
4.10k
    let block_4digit_hex_pattern =  Regex::new(r"^[0-9a-fA-F]{4}([ \u00A0\u202F][0-9a-fA-F]{4})*$").unwrap();
474
4.10k
    let block_1digit_pattern =  Regex::new(r"^((\d(\uFFFF\d)?)(\d([, \u00A0\u202F]\d){2})*)?([\.](\d(\uFFFF\d)*)?)?$").unwrap();
475
476
4.10k
    return CanonicalizeContextPatterns {
477
4.10k
      block_separator,
478
4.10k
      decimal_separator,
479
4.10k
      digit_only_decimal_number,
480
4.10k
      block_3digit_pattern,
481
4.10k
      block_3_5digit_pattern,
482
4.10k
      block_4digit_hex_pattern,
483
4.10k
      block_1digit_pattern
484
4.10k
    };
485
486
    
487
8.21k
    fn get_number_pattern_regex(block_separator: &str, decimal_separator: &str, n_sep_before: usize, n_sep_after: usize) -> Regex {
488
      // the following is a generalization of a regex like ^(\d*|\d{1,3}([, ]?\d{3})*)(\.(\d*|(\d{3}[, ])*\d{1,3}))?$
489
      // that matches something like '1 234.567 8' and '1,234.', but not '1,234.12,34
490
8.21k
      return Regex::new(&format!(r"^(\d*|\d{{1,{}}}([{}]?\d{{{}}})*)([{}](\d*|(\d{{{}}}[{}])*\d{{1,{}}}))?$",
491
8.21k
              n_sep_before, regex::escape(block_separator), n_sep_before, regex::escape(decimal_separator),
492
8.21k
              n_sep_after, regex::escape(block_separator), n_sep_after) ).unwrap();
493
8.21k
    }
494
4.10k
  }
495
}
496
497
/// Profiling showed that creating new contexts was very time consuming because creating the RegExs is very expensive
498
/// Profiling set_mathml (which does the canonicalization) spends 65% of the time in Regex::new, of which half of it is spent in this initialization.
499
struct CanonicalizeContextPatternsCache {
500
  block_separator_pref: String,
501
  decimal_separator_pref: String,
502
  patterns: Rc<CanonicalizeContextPatterns>,
503
}
504
505
thread_local!{
506
    static PATTERN_CACHE: RefCell<CanonicalizeContextPatternsCache> = RefCell::new(CanonicalizeContextPatternsCache::new());
507
}
508
509
impl CanonicalizeContextPatternsCache {
510
4.10k
  fn new() -> CanonicalizeContextPatternsCache {
511
4.10k
    let pref_manager = crate::prefs::PreferenceManager::get();
512
4.10k
    let pref_manager = pref_manager.borrow();
513
4.10k
    let block_separator_pref = pref_manager.pref_to_string("BlockSeparators");
514
4.10k
    let decimal_separator_pref = pref_manager.pref_to_string("DecimalSeparators");
515
4.10k
    return CanonicalizeContextPatternsCache {
516
4.10k
      patterns: Rc::new( CanonicalizeContextPatterns::new(&block_separator_pref, &decimal_separator_pref) ),
517
4.10k
      block_separator_pref,
518
4.10k
      decimal_separator_pref
519
4.10k
    }
520
4.10k
  }
521
522
5.06k
  fn get() -> Rc<CanonicalizeContextPatterns> {
523
5.06k
    return PATTERN_CACHE.with( |cache| {
524
5.06k
      let pref_manager_rc = crate::prefs::PreferenceManager::get();
525
5.06k
      let pref_manager = pref_manager_rc.borrow();
526
5.06k
      let block_separator_pref = pref_manager.pref_to_string("BlockSeparators");
527
5.06k
      let decimal_separator_pref = pref_manager.pref_to_string("DecimalSeparators");
528
529
5.06k
      let mut cache = cache.borrow_mut();
530
5.06k
      if block_separator_pref != cache.block_separator_pref || decimal_separator_pref != cache.decimal_separator_pref {
531
0
        // update the cache
532
0
        cache.patterns = Rc::new( CanonicalizeContextPatterns::new(&block_separator_pref, &decimal_separator_pref) );
533
0
        cache.block_separator_pref = block_separator_pref;
534
0
        cache.decimal_separator_pref = decimal_separator_pref;
535
5.06k
      }
536
5.06k
      return cache.patterns.clone();
537
5.06k
    })
538
5.06k
  }
539
}
540
541
struct CanonicalizeContext {
542
  patterns: Rc<CanonicalizeContextPatterns>,
543
}
544
545
546
impl CanonicalizeContext {
547
5.06k
  fn new() -> CanonicalizeContext {
548
5.06k
    return CanonicalizeContext {
549
5.06k
      patterns: CanonicalizeContextPatternsCache::get(),
550
5.06k
    };
551
5.06k
  }
552
553
5.06k
  fn canonicalize<'a>(&self, mut mathml: Element<'a>) -> Result<Element<'a>> {
554
    // debug!("MathML before canonicalize:\n{}", mml_to_string(mathml));
555
  
556
5.06k
    if name(mathml) != "math" {
557
0
      // debug!("Didn't start with <math> element -- attempting repair");
558
0
      let math_element = create_mathml_element(&mathml.document(), "math");
559
0
      math_element.set_attribute_value(CHANGED_ATTR, ADDED_ATTR_VALUE);
560
0
      math_element.append_child(mathml);
561
0
      let root = math_element.document().root();
562
0
      root.clear_children();
563
0
      root.append_child(math_element);
564
0
      mathml = root.children()[0].element().unwrap();
565
5.06k
    }
566
5.06k
    CanonicalizeContext::assure_mathml(mathml, 0)
?4
;
567
5.05k
    let mathml = self.clean_mathml(mathml).unwrap(); // 'math' is never removed
568
5.05k
    self.assure_nary_tag_has_one_child(mathml);
569
    // debug!("Not chemistry -- retry:\n{}", mml_to_string(mathml));
570
5.05k
    let mut converted_mathml = self.canonicalize_mrows(mathml)
571
5.05k
        .with_context(|| 
format!0
("while processing\n{}",
mml_to_string0
(
mathml0
)))
?0
;
572
    // debug!("canonicalize before canonicalize_mrows:\n{}", mml_to_string(converted_mathml));
573
5.05k
    if !crate::chemistry::scan_and_mark_chemistry(converted_mathml) {
574
869
      self.assure_nary_tag_has_one_child(converted_mathml);
575
869
      converted_mathml = self.canonicalize_mrows(mathml)
576
869
        .with_context(|| 
format!0
("while processing\n{}",
mml_to_string0
(
mathml0
)))
?0
;
577
4.18k
    }
578
5.05k
    debug!("\nMathML after canonicalize:\n{}", 
mml_to_string0
(
converted_mathml0
));
579
5.05k
    return Ok(converted_mathml);
580
5.06k
  }
581
    
582
  /// Make sure there is exactly one child
583
19.1k
  fn assure_nary_tag_has_one_child(&self, mathml: Element) {
584
19.1k
    let children = mathml.children();
585
19.1k
    if !ELEMENTS_WITH_ONE_CHILD.contains(name(mathml)) {
586
6.43k
      return;
587
12.7k
    }
588
589
12.7k
    if children.is_empty() {
590
3
      // make sure there is content
591
3
      let child = CanonicalizeContext::create_empty_element(&mathml.document());
592
3
      mathml.append_child(child);
593
12.7k
    } else if children.len() > 1 {
594
2.34k
      // wrap the children in an mrow
595
2.34k
      let mrow = create_mathml_element(&mathml.document(), "mrow");
596
2.34k
      mrow.set_attribute_value(CHANGED_ATTR, ADDED_ATTR_VALUE);
597
2.34k
      mrow.append_children(children);
598
2.34k
      mathml.replace_children(vec![ChildOfElement::Element(mrow)]);
599
10.3k
    }
600
19.1k
  }
601
602
  /// Return an error if some element is not MathML (only look at first child of <semantics>) or if it has the wrong number of children
603
52.9k
  fn assure_mathml(mathml: Element, depth: usize) -> Result<()> {
604
52.9k
    if depth > crate::interface::MAX_DEPTH {
605
1
      bail!("MathML is too deeply nested to process");
606
52.9k
    }
607
52.9k
    let n_children = mathml.children().len();
608
52.9k
    let element_name = name(mathml);
609
52.9k
    if is_leaf(mathml) {
610
33.1k
      if EMPTY_ELEMENTS.contains(element_name) {
611
464
        if n_children != 0 {
612
0
          bail!("{} should only have one child:\n{}", element_name, mml_to_string(mathml));
613
464
        }
614
32.7k
      } else if element_name == "annotation" {
615
0
        bail!("'annotation' element is not child of 'semantics' element");
616
32.7k
      } else if (n_children == 1 && 
mathml.children()[0].text()32.6k
.
is_some32.6k
()) ||
n_children == 018
{ // allow empty children such as mtext
617
32.7k
        return Ok( () );
618
      } else {
619
0
        bail!("Not a valid MathML leaf element:\n{}", mml_to_string(mathml));
620
      };
621
19.7k
    }
622
623
20.2k
    if ELEMENTS_WITH_FIXED_NUMBER_OF_CHILDREN.contains(element_name) {
624
3.90k
      match element_name {
625
3.90k
        "munderover" | 
"msubsup"3.84k
=> if
n_children != 3277
{
626
0
          bail!("{} should have 3 children:\n{}", element_name, mml_to_string(mathml));
627
277
        },
628
3.62k
        _ => if n_children != 2 {
629
0
          bail!("{} should have 2 children:\n{}", element_name, mml_to_string(mathml));
630
3.62k
        },
631
      }
632
16.3k
    } else if 
matches!2.18k
(element_name, "mtd" |
"mtr"14.8k
|
"mlabeledtr"14.1k
) {
633
2.18k
      let parent_name = name(get_parent(mathml));
634
2.18k
      if (element_name == "mtr" || 
element_name == "mlabeledtr"1.47k
) &&
parent_name != "mtable"722
{
635
0
        bail!("Illegal MathML: {} is not a child of mtable. Parent is {}", element_name, mml_to_string(get_parent(mathml)));
636
2.18k
      } else if element_name == "mtd" && !(
parent_name == "mtr"1.45k
||
parent_name == "mlabeledtr"57
) {
637
1
        bail!("Illegal MathML: mtd is not a child of {}. Parent is {}", parent_name, mml_to_string(get_parent(mathml)));
638
2.17k
      }
639
    }
640
14.1k
    else if element_name == "mmultiscripts" {
641
182
      let has_prescripts = mathml.children().iter()
642
649
          .
any182
(|&child| name(as_element(child)) == "mprescripts");
643
182
      if has_prescripts ^ (n_children.is_multiple_of(2)) {
644
1
        bail!("{} has the wrong number of children:\n{}", element_name, mml_to_string(mathml));
645
181
      }
646
13.9k
    } else if element_name == "mlongdiv" {
647
0
      if n_children < 3 {
648
0
        bail!("{} should have at least 3 children:\n{}", element_name, mml_to_string(mathml));
649
0
      }
650
13.9k
    } else if element_name == "semantics" {
651
11
      let children = mathml.children();
652
11
      if children.is_empty() {
653
0
        return Ok( () );
654
      } else {
655
11
        let (i_presentation, presentation_element) = get_presentation_element(mathml);
656
        // make sure only 'annotation' and 'annotation-xml' elements are children of the non-presentation element
657
24
        for (i, child) in 
children.iter()11
.
enumerate11
() {
658
24
          if i != i_presentation {
659
13
            let child = as_element(*child);
660
13
            if name(child)!="annotation" && 
name(child)!="annotation-xml"1
{
661
0
              bail!("Illegal MathML: {} is child of 'semantic'", name(child));
662
13
            }
663
11
          }
664
        }
665
11
        return CanonicalizeContext::assure_mathml(presentation_element, depth + 1);
666
      }
667
13.9k
    } else if !IsNode::is_mathml(mathml) {
668
1
      if element_name == "annotation-xml" {
669
0
        bail!("'annotation-xml' element is not child of 'semantics' element");
670
      } else {
671
1
        bail!("'{}' is not a valid MathML element", element_name);
672
      }
673
13.9k
    }
674
675
    // valid MathML element and not a leaf -- check the children
676
47.8k
    for child in 
mathml20.2k
.
children20.2k
() {
677
47.8k
      CanonicalizeContext::assure_mathml( as_element(child), depth + 1)
?520
;
678
    }
679
19.6k
    return Ok( () );
680
52.9k
  }
681
682
283
  fn make_empty_element(mathml: Element) -> Element {
683
283
    set_mathml_name(mathml, "mtext");
684
283
    mathml.clear_children();
685
283
    mathml.set_text("\u{00A0}");
686
283
    mathml.set_attribute_value("data-changed", "empty_content");
687
283
    mathml.set_attribute_value("data-width", "0");
688
283
    return mathml;
689
283
  }
690
  
691
24
  fn create_empty_element<'a>(doc: &Document<'a>) -> Element<'a> {
692
24
    let mtext = create_mathml_element(doc, "mtext");
693
24
    mtext.set_text("\u{00A0}");
694
24
    mtext.set_attribute_value("data-added", "missing-content");
695
24
    mtext.set_attribute_value("data-width", "0");
696
24
    return mtext;
697
24
  }
698
  
699
11.5k
  fn is_empty_element(el: Element) -> bool {
700
11.5k
    return (is_leaf(el) && 
as_text(el).trim()7.55k
.
is_empty7.55k
()) ||
701
11.0k
         (name(el) == "mrow" && 
el.children()1.33k
.
is_empty1.33k
() &&
el.attribute(INTENT_ATTR)0
.
is_none0
());
702
11.5k
  }
703
704
705
  // this should only be called for 2D elements
706
4.48k
  fn mark_empty_content(two_d_element: Element) {
707
7.32k
    for child in 
two_d_element4.48k
.
children4.48k
() {
708
7.32k
      let child = as_element(child);
709
7.32k
      if CanonicalizeContext::is_empty_element(child) {
710
20
        child.set_attribute_value(EMPTY_IN_2D, "true");
711
7.30k
      }
712
    }
713
4.48k
  }
714
715
  /// Turn leaf into an 'mn' and set attributes appropriately
716
34
  fn make_roman_numeral(leaf: Element) {
717
34
    assert!(is_leaf(leaf));
718
34
    set_mathml_name(leaf, "mn");
719
34
    leaf.set_attribute_value("data-roman-numeral", "true");  // mark for easy detection
720
34
    let as_number = match as_text(leaf).parse::<RomanNumeral>() {
721
34
      Ok(roman) => roman.as_u16().to_string(),
722
0
      Err(_) => as_text(leaf).to_string(),
723
    };
724
34
    leaf.set_attribute_value("data-number", &as_number);
725
34
  }
726
727
  /// most of the time it is ok to merge the mrow with its singleton child, but there are some exceptions:
728
  ///   mrow has 'intent' -- this might reference the child and you aren't allowed to self reference
729
2.82k
  fn is_ok_to_merge_mrow_child(mrow: Element) -> bool {
730
2.82k
    assert_eq!(name(mrow), "mrow");
731
2.82k
    assert!(mrow.children().len() == 1);
732
2.82k
    return mrow.attribute(INTENT_ATTR).is_none();   // could check if child is referenced, but that's a chunk of code
733
2.82k
  }
734
735
  /// This function does some cleanup of MathML (mostly fixing bad MathML)
736
  /// Unlike the main canonicalization routine, significant tree changes happen here
737
  /// Changes to "good" MathML:
738
  /// 1. mfenced -> mrow, a => mrow
739
  /// 2. mspace and mtext with only whitespace are canonicalized to a non-breaking space and merged in with 
740
  ///    an adjacent non-mo element unless in a required element position (need to keep for braille)
741
  /// 
742
  /// Note: mspace that is potentially part of a number that was split apart is merged into a number as a single space char
743
  /// 
744
  /// mstyle, mpadded, and mphantom, malignmark, maligngroup are removed (but children might be kept)
745
  /// 
746
  /// Significant changes are made cleaning up empty bases of scripts, looking for chemistry, merging numbers with commas,
747
  ///   "arg trig" functions, pseudo scripts, and others
748
  /// 
749
  /// Returns 'None' if the element should not be in the tree.
750
52.3k
  fn clean_mathml<'a>(&self, mathml: Element<'a>) -> Option<Element<'a>> {
751
    // Note: this works bottom-up (clean the children first, then this element)
752
3
    static IS_PRIME: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"['′″‴⁗]").unwrap());
753
754
    // Note: including intervening spaces in what is likely a symbol of omission preserves any notion of separate digits (e.g., "_ _ _")
755
3
    static IS_UNDERSCORE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^[_\u{00A0}]+$").unwrap());
756
757
      
758
23.5k
    fn is_currency_symbol(ch: char) -> bool {
759
23.5k
      
matches!23.5k
(ch, '$' | '¢' | '€' | '£' | '₡' | '₤' | '₨' | '₩' | '₪' | '₱' | '₹' | '₺' | '₿')
760
23.5k
    }
761
762
20.0k
    fn contains_currency(s: &str) -> bool {
763
20.0k
      s.chars().any(is_currency_symbol)
764
20.0k
    }    
765
    
766
    // begin by cleaning up empty elements
767
    // debug!("clean_mathml\n{}", mml_to_string(mathml));
768
52.3k
    let element_name = name(mathml);
769
52.3k
    let parent_name = if element_name == "math" {
770
5.09k
      "math".to_string()
771
    } else {
772
47.2k
      let parent = get_parent(mathml);
773
47.2k
      name(parent).to_string()
774
    };
775
52.3k
    let parent_requires_child = ELEMENTS_WITH_FIXED_NUMBER_OF_CHILDREN.contains(&parent_name) ||
776
44.0k
                      
matches!2.18k
(parent_name.as_ref(), "mtr" |
"mlabeledtr"42.6k
|
"mtable"42.5k
) ||
777
41.8k
                      parent_name == "mmultiscripts";
778
779
    // handle empty leaves -- leaving it empty causes problems with the speech rules
780
52.3k
    if is_leaf(mathml) && 
!32.8k
EMPTY_ELEMENTS32.8k
.contains(element_name) &&
as_text(mathml)32.3k
.
is_empty32.3k
() {
781
32
      return if parent_requires_child {
Some( CanonicalizeContext::make_empty_element(mathml) )4
} else {
None28
};
782
52.3k
    };
783
    
784
52.3k
    if mathml.children().is_empty() && 
!734
EMPTY_ELEMENTS734
.contains(element_name) {
785
158
      if element_name == "mrow" && 
mathml.attribute(INTENT_ATTR)143
.
is_none143
() {
786
        // if it is an empty mrow that doesn't need to be there, get rid of it. Otherwise, replace it with an mtext
787
142
        if parent_name == "mmultiscripts" && 
!mathml.preceding_siblings().is_empty()5
{
788
          // MathML Core dropped "none" in favor of <mrow/>, but MathCAT is written with <none/>
789
          // Do substitutions for the scripts, not the base
790
4
          set_mathml_name(mathml, "none");
791
4
          return Some(mathml);
792
138
        }
793
138
        if parent_requires_child {
794
14
          return Some( CanonicalizeContext::make_empty_element(mathml) );
795
        } else {
796
124
          return None;
797
        }
798
16
      } else {
799
16
        // create some content so that speech rules don't require special cases
800
16
        let mtext = CanonicalizeContext::create_empty_element(&mathml.document());
801
16
        mathml.append_child(mtext);
802
16
        // return Some(mathml);
803
16
      }
804
52.1k
    };
805
806
52.1k
    match element_name {
807
52.1k
      "mn" => {
808
9.08k
        let text = as_text(mathml);
809
9.08k
        let mut chars = text.chars();
810
9.08k
        let first_char = chars.next().unwrap();   // we have already made sure it is non-empty
811
9.08k
        if !text.trim().is_empty() && is_roman_number_match(text) {
812
2
          // people tend to set them in a non-italic font and software makes that 'mtext'
813
2
          CanonicalizeContext::make_roman_numeral(mathml);
814
9.08k
        } else if 
matches!9.08k
(first_char, '-' | '\u{2212}') {
815
5
          let doc = mathml.document();
816
5
          let mo = create_mathml_element(&doc, "mo");
817
5
          let mn = create_mathml_element(&doc, "mn");
818
5
          mo.set_text("-");
819
5
          mn.set_text(&text[first_char.len_utf8()..]);
820
5
          set_mathml_name(mathml, "mrow");
821
5
          mathml.set_attribute_value(CHANGED_ATTR, ADDED_ATTR_VALUE);
822
5
          mathml.replace_children([mo,mn]);
823
9.08k
        } else if contains_currency(text) && let Some(
result3
) =
split_currency_symbol(mathml)3
{
824
3
          return Some(result);
825
9.07k
        }
826
9.08k
        if let Some((idx, last_char)) = text.char_indices().next_back() {
827
          // look for something like 12°
828
9.08k
          if is_pseudo_script_char(last_char) {
829
1
            let doc = mathml.document();
830
1
            let mn = create_mathml_element(&doc, "mn");
831
1
            let mo = create_mathml_element(&doc, "mo");
832
1
            mn.set_text(&text[..idx]);
833
1
            mo.set_text(last_char.to_string().as_str());
834
1
            set_mathml_name(mathml, "msup");
835
1
            mathml.set_attribute_value(CHANGED_ATTR, ADDED_ATTR_VALUE);
836
1
            mathml.replace_children([mn, mo]);
837
9.08k
          }
838
0
        }
839
9.08k
        return Some(mathml);
840
      },
841
43.0k
      "ms" | 
"mglyph"43.0k
=> {
842
3
        return Some(mathml);
843
      },
844
43.0k
      "mi" => {
845
11.6k
        let text = as_text(mathml);
846
11.6k
        if !text.trim().is_empty() && is_roman_number_match(text) && 
is_roman_numeral_number_context3.32k
(
mathml3.32k
) {
847
          // people tend to set them in a non-italic font and software makes that 'mtext'
848
28
          CanonicalizeContext::make_roman_numeral(mathml);
849
28
          return Some(mathml);
850
11.5k
        }
851
11.5k
        if let Some(
dash1
) = canonicalize_dash(text) { // needs to be before OPERATORS.get due to "--"
852
1
          mathml.set_text(dash);
853
1
          return Some(mathml);
854
11.5k
        } else if text.contains('_') {
855
          // if left or right are an mo, leave as is. Otherwise convert to an mo.
856
6
          let preceding_siblings = mathml.preceding_siblings();
857
6
          let following_siblings = mathml.following_siblings();
858
6
          if preceding_siblings.is_empty() || following_siblings.is_empty() {
859
4
            return Some(mathml);
860
2
          }
861
2
          if name(as_element(preceding_siblings[preceding_siblings.len()-1])) != "mo" &&
862
2
             name(as_element(following_siblings[0])) != "mo" {
863
2
            set_mathml_name(mathml, "mo");
864
2
          
}0
865
2
          return Some(mathml);
866
11.5k
        } else if OPERATORS.get(text).is_some() {
867
118
          if  let Some(
intent_value88
) = mathml.attribute_value(INTENT_ATTR) {
868
            // if it is a unit, it might be seconds, minutes, feet, ... not an operator
869
88
            if intent_value.contains(":unit") {
870
88
              return Some(mathml);
871
0
            }
872
30
          }
873
30
          set_mathml_name(mathml, "mo");
874
875
          // For at least pandoc, ∇ is an 'mi' and it sometimes adds an invisible times -- remove them
876
30
          let op = OPERATORS.get(text).unwrap();
877
30
          let preceding_siblings = mathml.preceding_siblings();
878
30
          if (op.is_infix() || 
op17
.
is_postfix17
()) &&
879
16
             !preceding_siblings.is_empty() && 
CanonicalizeContext::is_invisible_char_element15
(
as_element15
(
preceding_siblings[0]15
)) {
880
0
            as_element(preceding_siblings[0]).remove_from_parent();
881
30
          }
882
30
          let following_siblings = mathml.following_siblings();
883
30
          if (op.is_infix() || 
op17
.
is_prefix17
()) &&
884
27
             !following_siblings.is_empty() && CanonicalizeContext::is_invisible_char_element(as_element(following_siblings[0])) {
885
0
            as_element(following_siblings[0]).remove_from_parent();
886
30
          }
887
30
          return Some(mathml);
888
11.4k
        } else if let Some(
result1
) = split_apart_pseudo_scripts(mathml) {
889
1
            return Some(result);
890
11.4k
        } else if let Some(
result0
) = merge_arc_trig(mathml) {
891
0
            return Some(result);
892
11.4k
        } else if IS_PRIME.is_match(text) {
893
0
          let new_text = merge_prime_text(text);
894
0
          mathml.set_text(&new_text);
895
0
          return Some(mathml);
896
11.4k
        } else if text == "..." {
897
1
          mathml.set_text("…");
898
1
          return Some(mathml);
899
11.4k
        } else if let Some(
result27
) = split_points(mathml) {
900
27
          return Some(result);
901
11.4k
        } else if let Some(
result11
) = merge_mi_sequence(mathml) {
902
11
          return Some(result);
903
        } else {
904
11.4k
          return Some(mathml);
905
        };
906
      },
907
31.4k
      "mtext" => {
908
        // debug!("before merge_arc_trig: {}", mml_to_string(mathml));
909
910
401
        if let Some(
result2
) = merge_arc_trig(mathml) {
911
2
          return Some(result);
912
399
        } else if let Some(
result11
) = split_points(mathml) {
913
11
          return Some(result);
914
388
        }
915
916
388
        let text = as_text(mathml);
917
388
        if !text.trim().is_empty() && 
is_roman_number_match317
(
text317
) &&
is_roman_numeral_number_context33
(
mathml33
) {
918
          // people tend to set them in a non-italic font and software makes that 'mtext'
919
4
          CanonicalizeContext::make_roman_numeral(mathml);
920
4
          return Some(mathml);
921
449
        } else if 
text.chars()384
.
all384
(|c| c.is_ascii_digit() ||
matches!332
(
c445
, '.' | ',' | ' ' | '\u{00A0}')) &&
922
58
                  
text.chars()52
.
any52
(|c| c.is_ascii_digit()){ // does it look like a number?
923
1
          mathml.set_name("mn");
924
1
          return Some(mathml);
925
383
        } else if contains_currency(text) && let Some(
result0
) =
split_currency_symbol(mathml)0
{
926
0
          return Some(result);
927
383
        }
928
        // common bug: trig functions, lim, etc., should be mi
929
383
        if ["…", "⋯", "∞"].contains(&text) ||
930
383
           crate::definitions::SPEECH_DEFINITIONS.with(|definitions| 
931
383
          if let Some(
hashset382
) = definitions.borrow().get_hashset("FunctionNames") {
932
382
            hashset.contains(text)
933
          } else {
934
1
            false
935
383
          }
936
        ) {
937
6
          set_mathml_name(mathml, "mi");
938
6
          return Some(mathml);
939
377
        }
940
941
        // allow non-breaking whitespace to stay -- needed by braille
942
377
        if IS_WHITESPACE.is_match(text) {
943
          // normalize to just a single non-breaking space
944
71
          mathml.set_attribute_value("data-width", &format!("{:.3}", white_space_em_width(text)));
945
71
          mathml.set_text("\u{00A0}");
946
71
          return Some(mathml);
947
306
        } else if let Some(
dash2
) = canonicalize_dash(text) {
948
2
          mathml.set_text(dash);
949
304
        } else if OPERATORS.get(text).is_some() {
950
11
          set_mathml_name(mathml, "mo");
951
11
          return Some(mathml);
952
293
        }
953
295
        return if parent_requires_child || 
!text.is_empty()220
{Some(mathml)} else {
None0
};
954
      },
955
31.0k
      "mo" => {
956
        // WIRIS editor puts non-breaking whitespace as standalone in 'mo'
957
11.2k
        let text = as_text(mathml);
958
11.2k
        if !text.is_empty() && IS_WHITESPACE.is_match(text) {
959
          // can't throw it out because it is needed by braille -- change to what it really is
960
78
          set_mathml_name(mathml, "mtext");
961
78
          mathml.set_attribute_value("data-width", &format!("{:.3}", white_space_em_width(text)));
962
78
          mathml.set_text("\u{00A0}");
963
78
          mathml.set_attribute_value(CHANGED_ATTR, "data-was-mo");
964
78
          return Some(mathml);
965
        } else {
966
11.1k
          match text {
967
11.1k
            "arc" | "arc " | "arc " /* non-breaking space */ => {
968
0
              if let Some(result) = merge_arc_trig(mathml) {
969
0
                return Some(result);
970
0
              }
971
            },
972
11.1k
            "..." => 
{0
mathml0
.set_text("…");}, // name might need to change -- checked below
973
11.1k
            ":" => {
974
94
              if is_ratio(mathml) {
975
8
                mathml.set_text("∶"); // ratio U+2236
976
86
              }
977
94
              return Some(mathml);
978
            },
979
11.0k
            "::" =>
{9
mathml9
.set_text("∷");},
980
11.0k
            "│" => 
{0
mathml0
.set_text("|");}, // ASCII vertical bar
981
11.0k
            "|" | 
"||"10.7k
=> if let Some(
result6
) =
merge_vertical_bars(mathml)305
{
982
6
              return Some(result);
983
            } else {
984
299
              return Some(mathml);
985
            },
986
10.7k
            _ => (),
987
          }
988
        }
989
990
        // common bug: trig functions, lim, etc., should be mi
991
        // same for ellipsis ("…")
992
10.7k
        return crate::definitions::SPEECH_DEFINITIONS.with(|definitions| {
993
10.7k
          if ["…", "⋯", "∞"].contains(&text) ||
994
10.7k
             definitions.borrow().get_hashset("FunctionNames").unwrap().contains(text) ||
995
10.6k
             definitions.borrow().get_hashset("GeometryShapes").unwrap().contains(text) {
996
83
            set_mathml_name(mathml, "mi");
997
83
            return Some(mathml);
998
10.6k
          }
999
10.6k
          if IS_PRIME.is_match(text) {
1000
66
            let new_text = merge_prime_text(text);
1001
66
            mathml.set_text(&new_text);
1002
66
            return Some(mathml);
1003
10.5k
          }
1004
10.5k
          if contains_currency(text) && let Some(
result9
) =
split_currency_symbol(mathml)9
{
1005
9
            return Some(result);
1006
10.5k
          }
1007
10.5k
          return Some(mathml);
1008
10.7k
        });
1009
        // note: chemistry test is done later as part of another phase of chemistry cleanup
1010
      },
1011
19.8k
      "mfenced" => {return 
self40
.
clean_mathml40
(
convert_mfenced_to_mrow40
(
mathml40
) )},
1012
19.8k
      "a" => {
1013
        // convert 'a' into 'mrow'
1014
2
        set_mathml_name(mathml, "mrow");
1015
2
        return self.clean_mathml(mathml);
1016
      }
1017
19.8k
      "mstyle" | 
"mpadded"19.7k
=> {
1018
        // Throw out mstyle and mpadded -- to do this, we need to avoid mstyle being the arg of clean_mathml
1019
        // FIX: should probably push the attrs down to the children (set in 'self')
1020
714
        merge_adjacent_similar_mstyles(mathml);
1021
714
        let children = mathml.children();
1022
714
        if children.is_empty() {
1023
0
          return if parent_requires_child {Some( CanonicalizeContext::make_empty_element(mathml) )} else {None};
1024
714
        } else if children.len() == 1 {
1025
678
          let is_from_mhchem = element_name == "mpadded" && 
is_from_mhchem_hack588
(
mathml588
);
1026
678
          if let Some(
new_mathml269
) = self.clean_mathml( as_element(children[0]) ) {
1027
            // "lift" the child up so all the links (e.g., siblings) are correct
1028
269
            mathml.replace_children(new_mathml.children());
1029
269
            set_mathml_name(mathml, name(new_mathml));
1030
269
            add_attrs(mathml, &new_mathml.attributes());
1031
269
            return Some(mathml);
1032
409
          } else if parent_requires_child {
1033
            // need a placeholder -- make it empty mtext
1034
31
            let empty = CanonicalizeContext::make_empty_element(mathml);
1035
31
            if is_from_mhchem {
1036
27
              empty.set_attribute_value(MHCHEM_MMULTISCRIPTS_HACK, "true");
1037
27
            
}4
1038
31
            return Some(empty);
1039
          } else {
1040
378
            return None;
1041
          }
1042
        } else {
1043
          // wrap the children in an mrow, but maintain tree siblings by changing mpadded/mstyle to mrow
1044
36
          set_mathml_name(mathml, "mrow");
1045
36
          mathml.set_attribute_value(CHANGED_ATTR, ADDED_ATTR_VALUE);
1046
36
          return self.clean_mathml(mathml);  // now it's an mrow so a different path next time
1047
        }
1048
      },
1049
19.0k
      "mphantom" | 
"malignmark"18.7k
|
"maligngroup"18.7k
=> {
1050
364
        return if parent_requires_child {
Some( CanonicalizeContext::make_empty_element(mathml) )0
} else {None};
1051
      },
1052
18.7k
      "mspace" => {
1053
        // need to hold onto space for braille
1054
224
        set_mathml_name(mathml, "mtext");
1055
224
        mathml.set_text("\u{00A0}");
1056
224
        mathml.set_attribute_value(CHANGED_ATTR, "was-mspace");
1057
1058
        // normalize width ems
1059
224
        let width = mathml.attribute_value("width").unwrap_or("0em");
1060
224
        let normalized_width = crate::xpath_functions::FontSizeGuess::em_from_value(width);
1061
224
        mathml.set_attribute_value("data-width", &normalized_width.to_string());
1062
224
        return Some(mathml);
1063
      },
1064
18.5k
      "semantics" => {
1065
        // The semantics tag, like the style tag, can mess with pattern matching.
1066
        // However, it may be the case that having the annotations could aid in determining intent, so we want to keep them.
1067
        // The compromise is to move the annotations into an attr named data-annotation[-xml]-<encoding-name>
1068
        // The attribute is put on presentation element root
1069
11
        let presentation = get_presentation_element(mathml).1;
1070
11
        let new_presentation = if let Some(presentation) = self.clean_mathml(presentation) {
1071
11
          presentation
1072
        } else {
1073
          // probably shouldn't happen, but just in case
1074
0
          CanonicalizeContext::create_empty_element(&mathml.document())
1075
        };
1076
11
        set_annotation_attrs(new_presentation, mathml);
1077
11
        return Some(new_presentation);
1078
      },
1079
      _  => {
1080
18.5k
        let children = mathml.children();
1081
18.5k
        if element_name == "mrow" {
1082
          // handle special cases of empty mrows and mrows which just one element
1083
6.04k
          if children.is_empty() && 
mathml.attribute(INTENT_ATTR)0
.
is_none0
() {
1084
0
            return if parent_requires_child {Some(mathml)} else {None};
1085
6.04k
          } else if children.len() == 1 && 
CanonicalizeContext::is_ok_to_merge_mrow_child2.58k
(
mathml2.58k
) {
1086
2.56k
            let is_from_mhchem = is_from_mhchem_hack(mathml);
1087
2.56k
            if let Some(
new_mathml1.95k
) = self.clean_mathml(as_element(children[0])) {
1088
              // "lift" the child up so all the links (e.g., siblings) are correct
1089
1.95k
              mathml.replace_children(new_mathml.children());
1090
1.95k
              set_mathml_name(mathml, name(new_mathml));
1091
1.95k
              add_attrs(mathml, &new_mathml.attributes());
1092
1.95k
              return Some(mathml);
1093
607
            } else if parent_requires_child {
1094
234
              let empty = CanonicalizeContext::make_empty_element(mathml);
1095
234
              if is_from_mhchem {
1096
142
                empty.set_attribute_value(MHCHEM_MMULTISCRIPTS_HACK, "true");
1097
142
              
}92
1098
234
              return Some(empty);
1099
            } else {
1100
373
              return None;
1101
            }
1102
3.48k
          }
1103
12.4k
        }
1104
1105
        // FIX: this should be setting children, not mathml
1106
15.9k
        let mathml =  if element_name == "mrow" ||
1107
12.4k
              (children.len() > 1 && 
ELEMENTS_WITH_ONE_CHILD7.31k
.
contains7.31k
(
element_name7.31k
)) {
1108
5.90k
          let merged = merge_dots(mathml);  // FIX -- switch to passing in children
1109
5.90k
          let merged = merge_primes(merged);
1110
5.90k
          let merged = merge_degrees_C_F(merged);
1111
5.90k
          let merged = merge_chars(merged, &IS_UNDERSCORE);
1112
5.90k
          handle_pseudo_scripts(merged)
1113
        } else {
1114
10.0k
          mathml
1115
        };
1116
1117
        // cleaning children can add or delete subsequent children, so we need to constantly update the children (and mathml)
1118
15.9k
        let mut children = mathml.children();
1119
15.9k
        let mut i = 0;
1120
1121
59.7k
        while i < children.len() {
1122
43.9k
          if let Some(child) = children[i].element() {
1123
43.9k
            match self.clean_mathml(child) {
1124
299
              None => {
1125
299
                mathml.remove_child(child);
1126
299
                // don't increment 'i' because there is one less child now and so everything shifted left
1127
299
              },
1128
43.6k
              Some(new_child) => {
1129
                // debug!("new_child (i={})\n{}", i, mml_to_string(new_child));
1130
43.6k
                let new_child_name = name(new_child);
1131
43.6k
                children = mathml.children();       // clean_mathml(child) may have changed following siblings
1132
43.6k
                children[i] = ChildOfElement::Element(new_child);
1133
43.6k
                mathml.replace_children(children);
1134
43.6k
                if new_child_name == "mi" || 
new_child_name == "mtext"31.9k
{
1135
12.5k
                  // can't do this above in 'match' because this changes the tree and
1136
12.5k
                  // lifting single element mrows messes with structure in a conflicting way
1137
12.5k
                  // Note: if clean_chemistry_leaf() made changes, they don't need cleaning because they will be "ok" mi's
1138
12.5k
                  clean_chemistry_leaf(as_element(mathml.children()[i]));
1139
12.5k
                } else {
1140
                  // If the attach call does something, children are inserted *before* child (i.e., into parent)
1141
                  // We return the new start at the expense of re-cleaning the script
1142
                  // This is needed because anything before the returned element will be lost
1143
31.0k
                  let start_of_change = attach_scripts_to_split_element(new_child);
1144
31.0k
                  if name(start_of_change) == "mrow" {
1145
3.43k
                    start_of_change.remove_attribute(MAYBE_CHEMISTRY);   // was lifted, and not set -- remove and it will be computed later
1146
27.6k
                  }
1147
                  // crate::canonicalize::assure_mathml(get_parent(start_of_change)).unwrap();    // FIX: find a recovery -- we're in deep trouble if this isn't true
1148
31.0k
                  if start_of_change != child {
1149
                    // debug!("clean_mathml: start_of_change != mathml -- mathml={}", mml_to_string(mathml));
1150
49
                    return self.clean_mathml(mathml);  // restart cleaning
1151
30.9k
                  }
1152
                }                   
1153
43.5k
                i += 1;
1154
              }
1155
            }
1156
43.8k
            children = mathml.children();           // 'children' moved above, so need new values
1157
0
          } else {
1158
0
            // bad mathml such as '<annotation-xml> </annotation-xml>' -- don't add to new_children
1159
0
            i += 1;
1160
0
          }
1161
        }
1162
1163
        // could have deleted children so only one child remains -- need to lift it
1164
15.8k
        if element_name == "mrow" && 
children.len() == 13.47k
&&
CanonicalizeContext::is_ok_to_merge_mrow_child122
(
mathml122
) {
1165
          // "lift" the child up so all the links (e.g., siblings) are correct
1166
108
          let child = as_element(children[0]);
1167
108
          mathml.replace_children(child.children());
1168
108
          set_mathml_name(mathml, name(child));
1169
108
          add_attrs(mathml, &child.attributes());
1170
108
          return Some(mathml);   // child has already been cleaned, so we can return
1171
15.7k
        }
1172
1173
15.7k
        if element_name == "mrow" || 
ELEMENTS_WITH_ONE_CHILD12.4k
.
contains12.4k
(
element_name12.4k
) {
1174
10.1k
          merge_number_blocks(self, mathml, &mut children);
1175
10.1k
          merge_whitespace(&mut children);
1176
10.1k
          merge_cross_or_dot_product_elements(&mut children);
1177
10.1k
          handle_convert_to_mmultiscripts(&mut children);
1178
10.1k
        } else if 
element_name == "msub"5.59k
||
element_name == "msup"4.81k
||
1179
3.48k
              element_name == "msubsup" || 
element_name == "mmultiscripts"3.25k
{
1180
2.52k
          if element_name != "mmultiscripts" {
1181
            // mhchem emits some cases that boil down to a completely empty script -- see test mhchem_beta_decay
1182
2.33k
            let mut is_empty_script = CanonicalizeContext::is_empty_element(as_element(children[0])) &&
1183
181
                              CanonicalizeContext::is_empty_element(as_element(children[1]));
1184
2.33k
            if element_name == "msubsup" && 
is_empty_script228
{
1185
51
              is_empty_script = CanonicalizeContext::is_empty_element(as_element(children[2]));
1186
2.28k
            }
1187
2.33k
            if is_empty_script {
1188
48
              if parent_requires_child {
1189
                // need a placeholder -- make it empty mtext
1190
0
                return Some( as_element(children[0]) ); // pick one of the empty elements
1191
              } else {
1192
48
                return None;
1193
              }
1194
2.29k
            }
1195
185
          }
1196
2.47k
          let mathml = if element_name == "mmultiscripts" {
clean_mmultiscripts185
(
mathml185
).
unwrap185
()} else {
mathml2.29k
};
1197
2.47k
          if !is_chemistry_off(mathml) {
1198
2.47k
            let likely_chemistry = likely_adorned_chem_formula(mathml);
1199
            // debug!("likely_chemistry={}, {}", likely_chemistry, mml_to_string(mathml));
1200
2.47k
            if likely_chemistry >= 0 {
1201
553
              mathml.set_attribute_value(MAYBE_CHEMISTRY, likely_chemistry.to_string().as_str());
1202
1.92k
            }
1203
0
          }
1204
1205
2.47k
          if element_name == "msubsup" {
1206
180
            return Some( clean_msubsup(mathml) );
1207
          } else {
1208
2.29k
            return Some(mathml);
1209
          }
1210
3.06k
        }
1211
1212
13.2k
        mathml.replace_children(children);
1213
        // debug!("clean_mathml: after loop\n{}", mml_to_string(mathml));
1214
13.2k
        if element_name == "mrow" || 
ELEMENTS_WITH_ONE_CHILD9.88k
.
contains9.88k
(
element_name9.88k
) {
1215
10.1k
          clean_chemistry_mrow(mathml);
1216
10.1k
        
}3.06k
1217
13.2k
        self.assure_nary_tag_has_one_child(mathml);
1218
13.2k
        if crate::xpath_functions::IsNode::is_2D(mathml) {
1219
4.48k
          CanonicalizeContext::mark_empty_content(mathml);
1220
8.77k
        }
1221
1222
13.2k
        return Some(mathml);       
1223
      }
1224
    }
1225
1226
    /// Returns substitute text if hyphen sequence should be a short or long dash
1227
11.8k
    fn canonicalize_dash(text: &str)  -> Option<&str> {
1228
11.8k
      if text == "--"  {
1229
1
        return Some("—"); // U+2014 (em dash)
1230
11.8k
      } else if text == "---" || 
text == "----"11.8k
{ // use a regexp to catch a longer sequence?
1231
2
        return Some("―"); // U+2015 (Horizontal bar)
1232
      } else {
1233
11.8k
        return None;
1234
      }
1235
11.8k
    }
1236
1237
11
    fn  set_annotation_attrs(new_presentation: Element, semantics: Element) {
1238
24
      for child in 
semantics11
.
children11
() {
1239
24
        let child = as_element(child);
1240
24
        let child_name = name(child);
1241
24
        if child == new_presentation {
1242
1
          continue;
1243
23
        }
1244
23
        let attr_name = match child.attribute_value("encoding") {
1245
23
          Some(encoding_name) => format!("data-{}-{}", child_name, encoding_name.replace('/', "_slash_")),
1246
0
          None => format!("data-{child_name}"),    // probably shouldn't happen
1247
        };
1248
23
        let attr_name = attr_name.as_str();
1249
23
        if child_name == "annotation" {
1250
12
          new_presentation.set_attribute_value(attr_name, as_text(child));
1251
12
        } else {
1252
11
          new_presentation.set_attribute_value(attr_name, &mml_to_string(child));
1253
11
        }
1254
      }
1255
1256
11
    }
1257
1258
    /// Hack to try and guess if a colon should be a ratio -- this affects parsing because of different precedences
1259
    /// It also guesses on the spacing after the colon and adds a space attr if it looks like set building or function mapping notation.
1260
    /// These conditions are really not well thought out and are just a first cut -- they do cause the braille tests to pass
1261
    /// If 'intent' is given, it must be intent='ratio'
1262
    /// 2. It must be infix and there is a proportion (∷) mo as a sibling, or
1263
    /// 3. It is the only mo and has numbers on each side
1264
    /// 
1265
    /// Need to rule out field extensions "[K:F]" and trilinear coordinates "a:b:c" (Nemeth doesn't consider these to be ratios)
1266
94
    fn is_ratio(mathml: Element) -> bool {
1267
94
      assert_eq!(name(mathml), "mo");
1268
94
      let parent = get_parent(mathml);  // must exist
1269
94
      if name(parent) != "mrow" && 
name(parent) != "math"81
{
1270
0
        return false;
1271
94
      }
1272
1273
94
      if let Some(
intent_value1
) = mathml.attribute_value(INTENT_ATTR)
1274
1
        && (intent_value != "ratio" || 
!intent_value.starts_with('_')0
) {
1275
1
          return false;
1276
93
        }
1277
1278
93
      if let Some(
value0
) = mathml.attribute_value("data-mjx-texclass")
1279
0
        && value ==  "PUNCT" {
1280
0
          mathml.remove_attribute("data-mjx-texclass");
1281
0
          mathml.set_attribute_value(SPACE_AFTER, "true");  // signal to at least Nemeth rules that this is punctuation
1282
93
        }
1283
1284
93
      let preceding = mathml.preceding_siblings();
1285
93
      let following = mathml.following_siblings();
1286
93
      if preceding.is_empty() || 
following92
.
is_empty92
() {
1287
2
        return false;
1288
91
      }
1289
91
      let preceding_child = as_element( preceding[preceding.len()-1] );
1290
91
      let following_child = as_element(following[0]);
1291
91
      if preceding.len() == 1 && 
name(preceding_child) == "mn"34
&&
1292
8
         following.len() == 1 && 
name(following_child) == "mn"2
{
1293
2
        return true;
1294
89
      }
1295
      // only want one "∷"
1296
89
      let is_before = is_proportional_before_colon(preceding.iter().rev());
1297
89
      if let Some(
is_before3
) = is_before
1298
3
        && !is_before {
1299
0
          return false;
1300
89
        }
1301
89
      let is_before = is_before.is_some();   // move this to true/false (found/not found)
1302
89
      let is_after = is_proportional_before_colon(following.iter());
1303
89
      if let Some(
is_after3
) = is_after
1304
3
        && !is_after {
1305
0
          return false;
1306
89
        }
1307
89
      let is_after = is_after.is_some();   // move this to true/false (found/not found)
1308
89
      return is_before ^ is_after;
1309
1310
178
      fn is_proportional_before_colon<'a>(siblings: impl Iterator<Item = &'a ChildOfElement<'a>>) -> Option<bool> {
1311
        // unparsed, so we look at relative priorities to make sure the proportional operator is really the next operator
1312
3
        static PROPORTIONAL_PRIORITY: LazyLock<usize> = LazyLock::new(|| OPERATORS.get("∷").unwrap().priority);
1313
461
        for sibling in 
siblings178
{
1314
461
          let child = as_element(*sibling);
1315
461
          if name(child) == "mo" {
1316
203
            let text = as_text(child);
1317
203
            match text {
1318
203
              "∷" | 
"::"198
=> return
Some(true)6
, // "::" might not be canonicalized yet
1319
197
              "∶" => return 
Some(false)0
,
1320
              _ => {
1321
197
                if let Some(
op191
) = OPERATORS.get(text)
1322
191
                  && op.priority < *PROPORTIONAL_PRIORITY {
1323
109
                    return None;   // no "∷"
1324
88
                  }
1325
              },
1326
            }
1327
258
          }
1328
        }
1329
63
        return None;
1330
178
      }
1331
94
    }
1332
1333
1334
    /// Returns true if it detects that this is likely coming from mhchem:
1335
    /// v3: msub/msup/msubsup with mpadded width=0/mphantom/mi=A)
1336
    /// v4: msub/msup/msubsup with mrow/mrow/mpadded width=0/mphantom/mi=A)
1337
    /// This should be called with 'mrow' being the outer mrow
1338
3.15k
    fn is_from_mhchem_hack(mathml: Element) -> bool {
1339
3.15k
      assert!(name(mathml) == "mrow" || 
name(mathml) == "mpadded"588
);
1340
3.15k
      assert_eq!(mathml.children().len(), 1);
1341
3.15k
      let parent = get_parent(mathml);
1342
3.15k
      let parent_name = name(parent);
1343
3.15k
      if !(parent_name == "msub" || 
parent_name == "msup"2.99k
||
parent_name == "msubsup"2.80k
) {
1344
2.56k
        return false;
1345
594
      }
1346
1347
594
      let 
mpadded315
= if name(mathml) == "mrow" {
1348
545
        let mrow = as_element(mathml.children()[0]);
1349
545
        if !(name(mrow) == "mrow" && 
mrow.children().len() == 1347
) {
1350
255
          return false;
1351
290
        }
1352
290
        let child = as_element(mrow.children()[0]);
1353
290
        if name(child) != "mpadded" {
1354
24
          return false;
1355
266
        }
1356
266
        child
1357
      } else {
1358
49
        mathml
1359
      };
1360
315
      if let Some(
width169
) = mpadded.attribute_value("width") {
1361
169
        if width != "0" {
1362
0
          return false;
1363
169
        }
1364
      } else {
1365
146
        return false;
1366
      }
1367
1368
169
      let mphantom = as_element(mpadded.children()[0]);
1369
169
      if !(name(mphantom) == "mphantom" && mphantom.children().len() == 1) {
1370
0
        return false;
1371
169
      }
1372
1373
169
      let child = as_element(mphantom.children()[0]);
1374
169
      return name(child) == "mi" && as_text(child) == "A";
1375
3.15k
    }
1376
1377
    /// 'text' is potentially one of the many Unicode whitespace chars. Estimate the width in ems
1378
149
    fn white_space_em_width(text: &str) -> f64 {
1379
149
      assert!(IS_WHITESPACE.is_match(text));
1380
149
      let mut width = 0.0;
1381
163
      for ch in 
text149
.
chars149
() {
1382
163
        width += match ch {
1383
137
          ' ' | '\u{00A0}' | '\u{1680}' | ' ' => 0.7, // space, non-breaking space, Ogham space mark, figure space
1384
0
          ' ' | ' ' => 0.5,           // en quad, en space
1385
0
          ' ' | ' ' => 1.0,           // em quad, em space
1386
0
          ' ' => 1.0/3.0,             // three per em space
1387
0
          ' ' | ' ' => 0.25,           // four per em space, punctuation space (wild guess)
1388
22
          ' ' | ' ' => 3.0/18.0,         // six per em space, thin space
1389
0
          ' ' => 1.0/18.0,           // hair space
1390
0
          ' ' => 0.3,               // narrow no-break space (half a regular space?)
1391
4
          ' ' => 4.0/18.0,           // medium math space
1392
0
          ' ' => 1.5,             // Ideographic Space
1393
0
          _ => 0.7,               // shouldn't happen
1394
        }
1395
      }
1396
149
      return width;
1397
149
    }
1398
1399
    /// Splits the leaf element into chemical elements if needed
1400
12.5k
    fn clean_chemistry_leaf(mathml: Element) -> Element {
1401
12.5k
      if !(is_chemistry_off(mathml) || mathml.attribute(MAYBE_CHEMISTRY).is_some()) {
1402
12.3k
        assert!(name(mathml)=="mi" || 
name(mathml)=="mtext"942
);
1403
        // this is a hack -- VII is more likely to be roman numeral than the molecule V I I so prevent that from happening
1404
        // FIX: come up with a less hacky way to prevent chem element misinterpretation
1405
12.3k
        let text = as_text(mathml);
1406
12.3k
        if text.len() > 2 && 
is_roman_number_match3.09k
(
text3.09k
) {
1407
0
          return mathml;
1408
12.3k
        }
1409
12.3k
        if let Some(
elements135
) = convert_leaves_to_chem_elements(mathml) {
1410
          // children are already marked as chemical elements         
1411
135
          let answer = replace_children(mathml, elements);
1412
135
          if name(answer) == "mrow" {
1413
29
            answer.remove_attribute(MAYBE_CHEMISTRY);   // was lifted, and not set -- remove and it will be computed later
1414
106
          }
1415
135
          return answer;
1416
        } else {
1417
12.1k
          let likely_chemistry = likely_chem_element(mathml);
1418
12.1k
          if likely_chemistry >= 0 {
1419
2.59k
            mathml.set_attribute_value(MAYBE_CHEMISTRY, likely_chemistry.to_string().as_str());
1420
9.57k
          }
1421
        };
1422
259
      }
1423
12.4k
      return mathml;
1424
12.5k
    }
1425
1426
1427
    /// looks for pairs of (letter, pseudo-script) such as x' or p'q' all inside of a single token element
1428
11.4k
    fn split_apart_pseudo_scripts<'a>(mi: Element<'a>) -> Option<Element<'a>> {
1429
2
      static IS_DEGREES_C_OR_F: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"[°º][CF]").unwrap());
1430
1431
11.4k
      let text = as_text(mi);
1432
      // debug!("split_apart_pseudo_scripts: start text=\"{text}\"");
1433
11.4k
      if !text.chars().any(is_pseudo_script_char) || 
IS_DEGREES_C_OR_F14
.is_match(text) {
1434
11.4k
        return None;
1435
1
      }
1436
1437
1
      let document = mi.document();
1438
      // create pairs of text
1439
1
      let chars = text.chars();
1440
1
        let next_chars = text.chars().skip(1);
1441
1
      let result = chars.zip(next_chars).map(|(a, b)|
1442
1
            if a.is_alphabetic() && is_pseudo_script_char(b) {
1443
              // create msup
1444
1
              let base = create_mathml_element(&document, "mi");
1445
1
              base.set_text(&a.to_string());
1446
1
              let script = create_mathml_element(&document, "mo");
1447
1
              script.set_text(&b.to_string());
1448
1
              let msup = create_mathml_element(&document, "msup");
1449
1
              msup.append_child(base);
1450
1
              msup.append_child(script);
1451
1
              msup
1452
            } else {
1453
              // create an mi "ab"
1454
0
              let new_mi = create_mathml_element(&document, "mi");
1455
0
              let mut new_mi_text = String::with_capacity(6);    // likely will fit almost all cases
1456
0
              new_mi_text.push(a);
1457
0
              new_mi_text.push(b);
1458
0
              new_mi.set_text(&new_mi_text);
1459
0
              new_mi
1460
1
            } )
1461
1
        .collect::<Vec<Element>>();
1462
1
      if result.len() == 1 {
1463
1
        return Some( result[0] );
1464
      } else {
1465
0
        return Some( replace_children(mi, result) );
1466
      }
1467
11.4k
    }
1468
1469
1470
    /// If 'mathml' is a scripted element and has an mrow for a base,
1471
    ///   attach any prescripts to the first element in mrow
1472
    ///   attach any postscript to the last element in mrow
1473
    /// Return the modified element (which might now be an mrow)
1474
31.0k
    fn attach_scripts_to_split_element(mathml: Element) -> Element {
1475
31.0k
      if !IsNode::is_scripted(mathml) {
1476
28.5k
        return mathml;
1477
2.48k
      }
1478
2.48k
      let base = as_element(mathml.children()[0]);
1479
2.48k
      if name(base) != "mrow" {
1480
2.30k
        return mathml;
1481
185
      }
1482
185
      let base_children = base.children();
1483
185
      let i_last_base = base_children.len()-1;
1484
185
      let last_child = as_element(base_children[i_last_base]);
1485
185
      if last_child.attribute(SPLIT_TOKEN).is_none() {
1486
156
        return mathml;
1487
29
      }
1488
      // debug!("attach_scripts_to_split_element -- start: \n{}", mml_to_string(mathml));
1489
29
      let mut mathml_replacement = Vec::with_capacity(base_children.len());
1490
29
      if name(mathml) == "mmultiscripts" {
1491
        // pull any prescript (should be at most one prefix pair) into the first child
1492
1
        let multiscripts_children = mathml.children();
1493
1
        let n_multiscripts_children = multiscripts_children.len();
1494
1
        let potential_mprescripts_element = as_element(multiscripts_children[n_multiscripts_children-3]);
1495
1
        if name(potential_mprescripts_element) == "mprescripts" {    // we have potential chem prescripts
1496
          // create a new mmultiscripts elements with first child as its base mathml's prescripts as the new element's prescripts
1497
1
          let mut new_mmultiscripts_children = Vec::with_capacity(4);
1498
1
          new_mmultiscripts_children.push(base_children[0]);
1499
1
          base.remove_child(as_element(base_children[0]));
1500
1
          new_mmultiscripts_children.push(multiscripts_children[n_multiscripts_children-3]);
1501
1
          new_mmultiscripts_children.push(multiscripts_children[n_multiscripts_children-2]);
1502
1
          new_mmultiscripts_children.push(multiscripts_children[n_multiscripts_children-1]);
1503
1504
1
          let new_mmultiscripts = create_mathml_element(&base.document(), "mmultiscripts");
1505
1
          new_mmultiscripts.append_children(new_mmultiscripts_children);
1506
1
          let likely = likely_adorned_chem_formula(new_mmultiscripts);
1507
1
          new_mmultiscripts.set_attribute_value(MAYBE_CHEMISTRY, &likely.to_string());
1508
          // debug!("attach_scripts_to_split_element -- new_mmultiscripts: \n{}", mml_to_string(new_mmultiscripts));
1509
1
          if n_multiscripts_children == 4 {
1510
            // we stripped all the children so only the (modified) base exists
1511
            // create mrow(new_mmultiscripts, mathml[0])
1512
0
            let children = vec![new_mmultiscripts, base];
1513
0
            return replace_children(mathml, children);
1514
1
          }
1515
1
          mathml_replacement.push(new_mmultiscripts);
1516
0
        }
1517
28
      }
1518
1519
      // Add all the middle children of the base to the mrow
1520
34
      
base.children().iter()29
.
take29
(
base.children().len()-129
).
for_each29
(|&child| mathml_replacement.push(as_element(child)));
1521
1522
      // create a new script element with last child as its base
1523
29
      let mut new_mathml_children = mathml.children();
1524
29
      new_mathml_children[0] = ChildOfElement::Element(base);
1525
29
      mathml.replace_children(new_mathml_children);
1526
29
      mathml_replacement.push(mathml);
1527
      // debug!("attach_scripts_to_split_element -- after adjusting ({} replacement children): \n{}", mathml_replacement.len(), mml_to_string(mathml));
1528
29
      return replace_children(mathml, mathml_replacement);
1529
31.0k
    }
1530
1531
    /// makes sure the structure is correct and also eliminates <none/> pairs
1532
    /// MathML core changed <none/> to <mrow/>. For now (since MathCAT has lots of "none" tests), <mrow/> => <mtext> => <none/>
1533
    /// (used https://chem.libretexts.org/Courses/Saint_Francis_University/CHEM_113%3A_Human_Chemistry_I_(Muino)/13%3A_Nuclear_Chemistry12/13.04%3A_Nuclear_Decay)
1534
    ///
1535
    /// This does some dubious repairs when the structure is bad, but not sure what else to do
1536
185
    fn clean_mmultiscripts(mathml: Element) -> Option<Element> {
1537
185
      let mut mathml = mathml;
1538
185
      let children = mathml.children();
1539
185
      let n = children.len();
1540
185
      let i_mprescripts =
1541
185
        if let Some((
i108
,_)) = children.iter().enumerate()
1542
659
          .
find185
(|&(_,&el)| name(as_element(el)) == "mprescripts") {
i108
} else {
n77
};
1543
185
      let has_misplaced_mprescripts = i_mprescripts & 1 == 0;  // should be first, third, ... child
1544
185
      let mut has_proper_number_of_children = if i_mprescripts == n { 
n & 1 == 077
} else {
n & 1 != 0108
}; // should be odd else even #
1545
185
      if has_misplaced_mprescripts || !has_proper_number_of_children || 
has_none_none_script_pair0
(
&children0
) {
1546
        // need to reset the children
1547
185
        let mut new_children = Vec::with_capacity(n+2); // adjusting position of mprescripts might add two children
1548
185
        new_children.push(children[0]);
1549
        // drop none, none script pairs
1550
185
        let mut i = 1;
1551
604
        while i < n {
1552
419
          let child = as_element(children[i]);
1553
419
          let child_name = name(child);
1554
419
          if child_name == "mprescripts" {
1555
108
            if has_misplaced_mprescripts {
1556
0
              let mtext = CanonicalizeContext::create_empty_element(&mathml.document());
1557
0
              new_children.push(ChildOfElement::Element(mtext));
1558
0
              has_proper_number_of_children = !has_proper_number_of_children;
1559
108
            }
1560
108
            new_children.push(children[i]);
1561
108
            i += 1;
1562
311
          } else if i+1 < n && child_name == "none" && 
name85
(
as_element85
(children[i+1])) == "none" {
1563
2
            i += 2;   // found none, none pair
1564
309
          } else {
1565
309
            // copy pair
1566
309
            new_children.push(children[i]);
1567
309
            new_children.push(children[i+1]);
1568
309
            i += 2;
1569
309
          }
1570
        }
1571
185
        if new_children.len() <= 2 {  // base only, or base and </mprescripts>
1572
1
          mathml = as_element(new_children[0]);
1573
184
        } else {
1574
184
          mathml.replace_children(new_children);
1575
184
        }
1576
0
      }
1577
1578
185
      return Some(mathml);
1579
1580
0
      fn has_none_none_script_pair(children: &[ChildOfElement]) -> bool {
1581
0
        let mut i = 1;
1582
0
        let n = children.len();
1583
0
        while i < n {
1584
0
          let child = as_element(children[i]);
1585
0
          let child_name = name(child);
1586
0
          if child_name == "mprescripts" {
1587
0
            i += 1;
1588
0
          } else if i+1 < n && child_name == "none" && name(as_element(children[i+1])) == "none" {
1589
0
            return true;   // found none, none pair
1590
0
          } else {
1591
0
            i += 2;
1592
0
          }
1593
        }
1594
0
        return false;
1595
0
      }
1596
185
    }
1597
1598
    /// converts element if there is an empty subscript or superscript
1599
180
    fn clean_msubsup(mathml: Element) -> Element {
1600
180
      let children = mathml.children();
1601
180
      let subscript = as_element(children[1]);
1602
180
      let has_subscript = !(name(subscript) == "mtext" && 
as_text(subscript).trim()3
.
is_empty3
());
1603
180
      let superscript = as_element(children[2]);
1604
180
      let has_superscript = !(name(superscript) == "mtext" && 
as_text(superscript).trim()6
.
is_empty6
());
1605
180
      if has_subscript && 
has_superscript177
{
1606
171
        return mathml;
1607
9
      } else if has_subscript {
1608
6
        set_mathml_name(mathml, "msub");
1609
6
        let children = vec!(children[0], children[1]);
1610
6
        mathml.replace_children(children);
1611
6
        return mathml;
1612
3
      } else if has_superscript {
1613
3
        set_mathml_name(mathml, "msup");
1614
3
        let children = vec!(children[0], children[2]);
1615
3
        mathml.replace_children(children);
1616
3
        return mathml;
1617
      } else {
1618
0
        return as_element(children[0]);  // no scripts
1619
      }
1620
180
    }
1621
1622
    /// Split off the currency symbol from the rest of the text and return an mrow with the result
1623
    /// Assumes it has already checked and that we have a leaf
1624
12
    fn split_currency_symbol(leaf: Element) -> Option<Element> {
1625
12
      assert!(is_leaf(leaf));
1626
12
      let text = as_text(leaf);
1627
12
      assert!(contains_currency(text));
1628
12
      let mut iter = text.chars();
1629
12
      match (iter.next(), iter.next()) {
1630
0
        (None, _) => return None,
1631
        (Some(_), None) => {  // 1 char
1632
9
          leaf.set_name("mi");
1633
9
          return Some(leaf);       }
1634
        (Some(_), Some(_)) => { // 2 or more chars
1635
          // WARNING: don't use 'leaf' in the mrow -- that detaches it from its parent and could shrink the number of children causing problems
1636
4
          if 
text.chars()3
.
any3
(|c| c.is_ascii_digit()) { // might be a number with a currency symbol
1637
3
            leaf.set_name("mn");  // make sure we create an mn (might be one already)
1638
3
          
}0
1639
3
          let first_ch = text.char_indices().next().map(|(i, ch)| &text[i..i + ch.len_utf8()]).unwrap();
1640
3
          if is_currency_symbol(first_ch.chars().next().unwrap()) {
1641
1
            let mrow = create_mathml_element(&leaf.document(), "mrow");
1642
1
            mrow.set_attribute_value(CHANGED_ATTR, ADDED_ATTR_VALUE);
1643
1
            let currency_symbol = create_mathml_element(&leaf.document(), "mi");
1644
1
            currency_symbol.set_text(first_ch);
1645
1
            mrow.append_child(currency_symbol);
1646
1
            let implied_times = create_mo(leaf.document(), "\u{2062}", ADDED_ATTR_VALUE);
1647
1
            mrow.append_child(implied_times);
1648
1
            let currency_amount = create_mathml_element(&leaf.document(), name(leaf));
1649
1
            currency_amount.set_text(&text[first_ch.len()..]);
1650
1
            mrow.append_child(currency_amount);
1651
1
            return Some(mrow);
1652
2
          }
1653
2
          let last_ch = text.char_indices().last().map(|(i, _)| &text[i..]).unwrap();
1654
2
          if is_currency_symbol(last_ch.chars().next().unwrap()) {
1655
1
            let mrow = create_mathml_element(&leaf.document(), "mrow");
1656
1
            mrow.set_attribute_value(CHANGED_ATTR, ADDED_ATTR_VALUE);
1657
1
            let implied_times = create_mo(leaf.document(), "\u{2062}", ADDED_ATTR_VALUE);
1658
1
            mrow.append_child(implied_times);
1659
1
            let currency_amount = create_mathml_element(&leaf.document(), name(leaf));
1660
1
            currency_amount.set_text(&text[..text.len()-last_ch.len()]);
1661
1
            mrow.append_child(currency_amount);
1662
1
            let currency_symbol = create_mathml_element(&leaf.document(), "mi");
1663
1
            currency_symbol.set_text(last_ch);
1664
1
            mrow.append_child(currency_symbol);
1665
1
            return Some(mrow);
1666
1
          }
1667
          // try to find it in the middle
1668
2
          for (byte_idx, ch) in 
text1
.
char_indices1
() {
1669
2
            if contains_currency(&text[byte_idx .. byte_idx + ch.len_utf8()]) {
1670
              // get all the substrings
1671
1
              let first_part = &text[..byte_idx];
1672
1
              let currency_symbol = &text[byte_idx .. byte_idx + ch.len_utf8()];
1673
1
              let second_part = &text[byte_idx + ch.len_utf8() ..];
1674
1
              let mrow = create_mathml_element(&leaf.document(), "mrow");
1675
1
              mrow.set_attribute_value(CHANGED_ATTR, ADDED_ATTR_VALUE);
1676
1
              let first_part_element = create_mathml_element(&leaf.document(), name(leaf));
1677
1
              first_part_element.set_text(first_part);
1678
1
              mrow.append_child(first_part_element);
1679
1
              let implied_times = create_mo(leaf.document(), "\u{2062}", ADDED_ATTR_VALUE);
1680
1
              mrow.append_child(implied_times);
1681
1
              let currency_symbol_element = create_mathml_element(&leaf.document(), "mi");
1682
1
              currency_symbol_element.set_text(currency_symbol);
1683
1
              mrow.append_child(currency_symbol_element);
1684
1
              let implied_times = create_mo(leaf.document(), "\u{2062}", ADDED_ATTR_VALUE);
1685
1
              mrow.append_child(implied_times);
1686
1
              let second_part_element = create_mathml_element(&leaf.document(), name(leaf));
1687
1
              second_part_element.set_text(second_part);
1688
1
              mrow.append_child(second_part_element);
1689
1
              return Some(mrow);
1690
1
            }
1691
          }
1692
0
          return None
1693
        }
1694
      }
1695
12
    }
1696
1697
    /// If arg is "arc" (with optional space), merge the following element in if a trig function (sibling is deleted)
1698
11.8k
    fn merge_arc_trig(leaf: Element) -> Option<Element> {
1699
11.8k
      assert!(is_leaf(leaf));
1700
11.8k
      let leaf_text = as_text(leaf);
1701
11.8k
      if !(leaf_text == "arc" || 
leaf_text == "arc "11.8k
||
leaf_text == "arc "11.8k
/* non-breaking space */ ) {
1702
11.8k
        return None;
1703
2
      }
1704
1705
2
      let following_siblings = leaf.following_siblings();
1706
2
      if following_siblings.is_empty() {
1707
0
        return None;
1708
2
      }
1709
1710
2
      let following_sibling = as_element(following_siblings[0]);
1711
2
      let following_sibling_name = name(following_sibling);
1712
2
      if !(following_sibling_name == "mi" || 
following_sibling_name == "mo"0
||
following_sibling_name == "mtext"0
) {
1713
0
        return None;
1714
2
      }
1715
1716
2
      return crate::definitions::SPEECH_DEFINITIONS.with(|definitions| {
1717
        // change "arc" "cos" to "arccos" -- we look forward because calling loop stores previous node
1718
2
        let following_text = as_text(following_sibling);
1719
2
        if definitions.borrow().get_hashset("TrigFunctionNames").unwrap().contains(following_text) {
1720
2
          let new_text = "arc".to_string() + following_text;
1721
2
          set_mathml_name(leaf, "mi");
1722
2
          leaf.set_text(&new_text);
1723
2
          following_sibling.remove_from_parent();
1724
2
          return Some(leaf);
1725
0
        }
1726
0
        return None;
1727
2
      })
1728
11.8k
    }
1729
1730
    /// Convert "||" to "‖", if in single element or in repeated 'mo's (but not "|x||y|" or "{x ||x|>0}")
1731
305
    fn merge_vertical_bars(leaf: Element) -> Option<Element> {
1732
305
      assert!(is_leaf(leaf));
1733
305
      let leaf_text = as_text(leaf);
1734
305
      if leaf_text == "||" {
1735
4
        leaf.set_text("‖");    // U+2016
1736
4
        return Some(leaf);
1737
301
      } else if leaf_text != "|" {
1738
0
        return None;
1739
301
      }
1740
301
      let following_siblings = leaf.following_siblings();
1741
301
      if following_siblings.is_empty() {
1742
96
        return None;
1743
205
      }
1744
1745
205
      let following_sibling = as_element(following_siblings[0]);
1746
205
      if name(following_sibling) != "mo" || 
as_text(following_sibling) != "|"18
{
1747
201
        return None
1748
4
      }
1749
1750
      // have "||" -- if there a single "|" on left, rule out merge
1751
4
      let preceding_siblings = leaf.preceding_siblings();
1752
5
      if 
preceding_siblings.iter()4
.
any4
(|&child| {
1753
5
        let child = as_element(child);
1754
5
        return name(child) == "mo" && 
as_text(child) == "|"3
;
1755
5
      }) {
1756
1
        return None;   // found "|" on left
1757
3
      }
1758
1759
3
      if following_siblings.len() > 1 {
1760
2
        let following_siblings = &following_siblings[1..];
1761
        // if there are an odd number of "|"s to the right, rule out the merge
1762
8
        if !(
following_siblings2
.
iter2
().
filter2
(|&&child| {
1763
8
          let child = as_element(child);
1764
8
          return name(child) == "mo" && 
as_text(child) == "|"5
;
1765
8
        }).
count2
()).
is_multiple_of2
(2) {
1766
1
          return None;
1767
1
        }
1768
1
      }
1769
1770
      // didn't find any
1771
2
      leaf.set_text("‖");    // U+2016
1772
2
      following_sibling.remove_from_parent();
1773
2
      return Some(leaf);
1774
305
    }
1775
1776
    /// merge a following mstyle that has the same attrs
1777
714
    fn merge_adjacent_similar_mstyles(mathml: Element) {
1778
714
      if ELEMENTS_WITH_FIXED_NUMBER_OF_CHILDREN.contains(name(get_parent(mathml))) {
1779
        // FIX: look to see if all of the children (might be more than just the adjacent one) have the same attr and then pull them up to the parent
1780
65
        return;   // can't remove subsequent child 
1781
649
      }
1782
649
      let following_siblings = mathml.following_siblings();
1783
649
      if following_siblings.is_empty() {
1784
579
        return;
1785
70
      }
1786
70
      let following_element = as_element(following_siblings[0]);
1787
70
      if name(following_element) != "mstyle" {
1788
66
        return;
1789
4
      }
1790
4
      let are_same = mathml.attributes().iter()
1791
4
              .zip( following_element.attributes() )
1792
5
              .
all4
(|(first, second)| first.name()==second.name() && first.value()==second.value());
1793
4
      if are_same {
1794
4
        mathml.append_children(following_element.children());
1795
4
        following_element.remove_from_parent();
1796
4
      
}0
1797
714
    }
1798
1799
40
    fn convert_mfenced_to_mrow(mfenced: Element) -> Element {
1800
      // The '<'/'>' replacements are because WIRIS uses them out instead of the correct chars in its template
1801
40
      let open = mfenced.attribute_value("open").unwrap_or("(").replace('<', "⟨");
1802
40
      let close = mfenced.attribute_value("close").unwrap_or(")").replace('>', "⟩");
1803
      // debug!("open={}, close={}", open, close);
1804
40
      let mut separators= mfenced.attribute_value("separators").unwrap_or(",").chars();
1805
40
      set_mathml_name(mfenced, "mrow");
1806
40
      mfenced.remove_attribute("open");
1807
40
      mfenced.remove_attribute("close");
1808
40
      mfenced.remove_attribute("separators");
1809
40
      let children = mfenced.children();
1810
40
      let mut new_children = Vec::with_capacity(2*children.len() + 1);
1811
40
      if !open.is_empty() {
1812
40
        new_children.push(ChildOfElement::Element( create_mo(mfenced.document(), &open, MFENCED_ATTR_VALUE)) );
1813
40
      
}0
1814
40
      if !children.is_empty() {
1815
40
        new_children.push(children[0]);
1816
40
        for 
child3
in &children[1..] {
1817
3
          let sep = separators.next().unwrap_or(',').to_string();
1818
3
          new_children.push( ChildOfElement::Element( create_mo(mfenced.document(), &sep, MFENCED_ATTR_VALUE)) );
1819
3
          new_children.push(*child);
1820
3
        }
1821
0
      }
1822
40
      if !close.is_empty() {
1823
38
        new_children.push(ChildOfElement::Element( create_mo(mfenced.document(), &close, MFENCED_ATTR_VALUE)) );
1824
38
      
}2
1825
40
      mfenced.replace_children(new_children);
1826
40
      return mfenced;
1827
40
    }
1828
1829
30.4k
    fn is_roman_number_match(text: &str) -> bool {
1830
30.4k
      return UPPER_ROMAN_NUMERAL.is_match(text) || 
LOWER_ROMAN_NUMERAL29.6k
.is_match(text);
1831
30.4k
    }
1832
1833
    /// Return true if 'element' (which is syntactically a roman numeral) is only inside mrows and
1834
    ///  if its length is < 3 chars, then there is another roman numeral near it (separated by an operator).
1835
    /// We want to rule out something like 'm' or 'cm' being a roman numeral.
1836
    /// Note: this function assumes 'mathml' is a Roman Numeral, and optimizes operations based on that.
1837
    /// Note: Nemeth has some rules about roman numerals (capitalization and punctuation after)
1838
3.35k
    fn is_roman_numeral_number_context(mathml: Element) -> bool {
1839
3.35k
      assert!(name(mathml)=="mtext" || 
name(mathml)=="mi"3.32k
);
1840
3.35k
      let mut parent = mathml;
1841
      loop {
1842
5.41k
        parent = get_parent(parent);
1843
5.41k
        let current_name = name(parent);
1844
5.41k
        if current_name == "math" {
1845
1.57k
          break;
1846
3.84k
        } else if current_name == "msup" || 
current_name == "mmultiscripts"3.42k
{
1847
          // could be a oxidation state in a Chemical formula
1848
559
          let children = parent.children();
1849
          // make sure that there is only one script and that 'mathml' is a superscript
1850
559
          if current_name == "mmultiscripts" && (
children.len() > 3139
||
!mathml.following_siblings().is_empty()27
) {
1851
122
            return false;
1852
437
          }
1853
437
          let base = as_element(children[0]);
1854
437
          if is_chemical_element(base) {
1855
21
            break;
1856
          } else {
1857
416
            return false;
1858
          }
1859
3.28k
        } else if current_name != "mrow" {
1860
1.22k
          return false;
1861
2.06k
        }
1862
      }
1863
1864
1.59k
      let text = as_text(mathml).as_bytes(); // note: we know it is all ASCII chars
1865
      // if roman numeral is in superscript and we get here, then it had a chemical element base, so we accept it
1866
      // note: you never has a state = I; if two letters, it must be 'II'.
1867
1.59k
      if text.len() > 2  || 
1868
1.57k
         ((name(parent) =="msup" || 
name(parent) == "mmultiscripts"1.57k
) &&
text.len()==212
&&
text==[b'I',b'I']8
) {
1869
28
        return true;
1870
      } else {
1871
1.56k
        let is_upper_case = text[0].is_ascii_uppercase(); // safe since we know it is a roman numeral
1872
1.56k
        let preceding = mathml.preceding_siblings();
1873
1.56k
        let following = mathml.following_siblings();
1874
1.56k
        if preceding.is_empty() && 
following356
.
is_empty356
() {
1875
81
          return false;   // no context and too short to confirm it is a roman numeral
1876
1.48k
        }
1877
1.48k
        if preceding.is_empty() {
1878
275
          return is_roman_numeral_adjacent(following.iter(), is_upper_case);
1879
1.21k
        }
1880
1.21k
        if following.is_empty() {
1881
399
          return is_roman_numeral_adjacent(preceding.iter().rev(), is_upper_case);
1882
813
        }
1883
813
        return is_roman_numeral_adjacent(preceding.iter().rev(), is_upper_case) &&
1884
3
             is_roman_numeral_adjacent(following.iter(), is_upper_case);
1885
      }
1886
1887
      /// make sure all the non-mo leaf siblings are roman numerals
1888
      /// 'mo' should only be '+', '-', '=', ',', '.'  -- unlikely someone is doing anything sophisticated
1889
1.49k
      fn is_roman_numeral_adjacent<'a, I>(siblings: I, must_be_upper_case: bool) -> bool
1890
1.49k
          where I: Iterator<Item = &'a ChildOfElement<'a>> {    
1891
        static ROMAN_NUMERAL_OPERATORS: phf::Set<&str> = phf_set! {
1892
          "+", "-'", "=", "<", "≤", ">", "≥", 
1893
          // ",", ".",   // [c,d] triggers this if "," is present, so omitting it
1894
        };
1895
1.49k
        let mut found_match = false;       // guard against no siblings
1896
1.49k
        let mut last_was_roman_numeral = true; // started at roman numeral
1897
        // debug!("start is_roman_numeral_adjacent");
1898
1.74k
        for child in 
siblings1.49k
{
1899
1.74k
          let maybe_roman_numeral = as_element(*child);
1900
          // debug!("maybe_roman_numeral: {}", mml_to_string(maybe_roman_numeral));
1901
1.74k
          match name(maybe_roman_numeral) {
1902
1.74k
            "mo" => {
1903
858
              if !last_was_roman_numeral {
1904
18
                return false;
1905
840
              }
1906
840
              let text = as_text(maybe_roman_numeral);
1907
840
              if !ROMAN_NUMERAL_OPERATORS.contains(text) {
1908
660
                return false;
1909
180
              }
1910
180
              last_was_roman_numeral = false;
1911
            },
1912
889
            "mi" | 
"mn"585
=> {
1913
562
              if last_was_roman_numeral {
1914
429
                return false;   // no implicit multiplication (or whatever)
1915
133
              }
1916
133
              let text = as_text(maybe_roman_numeral);
1917
133
              if !(( must_be_upper_case && 
UPPER_ROMAN_NUMERAL18
.is_match(text)) ||
1918
117
                 (!must_be_upper_case && 
LOWER_ROMAN_NUMERAL115
.is_match(text)) ) {
1919
109
                return false;
1920
24
              };
1921
24
              found_match = true;
1922
24
              last_was_roman_numeral = true;
1923
            },
1924
327
            "mtext" | 
"mspace"252
|
"mphantom"252
=>
{}75
,
1925
            _ => {
1926
252
              return false;
1927
            }
1928
          }
1929
        }
1930
22
        return found_match;
1931
1.49k
      }
1932
3.35k
    }
1933
1934
    /// Merge adjacent mtext by increasing the width of the first mtext
1935
    /// The resulting merged whitespace is put on the previous child, or if there is one, on the following child
1936
    /// 
1937
    /// Note: this should be called *after* the mo/mtext cleanup (i.e., after the MathML child cleanup loop).
1938
10.1k
    fn merge_whitespace(children: &mut Vec<ChildOfElement>) {
1939
10.1k
      if children.is_empty() {
1940
3
        return;
1941
10.1k
      }
1942
1943
10.1k
      let mut i = 0;
1944
10.1k
      let mut previous_mtext_with_width: Option<Element<'_>> = None;  // prefer to spacing on previous mtext
1945
10.1k
      let mut whitespace: Option<f64> = None;
1946
42.0k
      while i < children.len() {
1947
31.8k
        let child = as_element(children[i]);
1948
31.8k
        let is_child_whitespace = name(child) == "mtext" && 
as_text(child) == "\u{00A0}"555
;
1949
        // debug!("merge_whitespace: i={}, whitespace={:?}, mtext set={} {}",
1950
        //    i, whitespace, previous_mtext_with_width.is_some(), mml_to_string(child));
1951
31.8k
        if is_child_whitespace {
1952
          // update the running total of whitespace
1953
340
          let child_width = child.attribute_value("data-width").unwrap_or("0")
1954
340
                                          .parse::<f64>().unwrap_or(0.0) ;
1955
340
          whitespace = match whitespace {
1956
327
            None => Some(child_width),
1957
13
            Some(w) => Some(w + child_width),
1958
          };
1959
340
          if children.len() == 1 {
1960
15
            i += 1;             // don't remove only child
1961
325
          } else {
1962
325
            children.remove(i);   // remove the current child (don't inc 'i')
1963
325
          }
1964
31.5k
        } else if let Some(
ws305
) = whitespace {
1965
          // done with sequence of whitespaces
1966
305
          if let Some(
prev_mtext13
) = previous_mtext_with_width {
1967
13
            // prefer to set on previous mtext
1968
13
            prev_mtext.set_attribute_value("data-following-space-width", (ws).to_string().as_str());
1969
13
            previous_mtext_with_width = None;
1970
13
          } else {
1971
            // if the space is significant, set it on the current child
1972
292
            child.set_attribute_value("data-previous-space-width", ws.to_string().as_str());
1973
292
            if name(child) == "mtext" {
1974
18
              previous_mtext_with_width = Some(child);
1975
274
            }
1976
          }
1977
305
          whitespace = None;
1978
305
          i += 1;
1979
31.2k
        } else {
1980
31.2k
          i += 1;
1981
31.2k
          previous_mtext_with_width = None;
1982
31.2k
        }
1983
      }
1984
      // debug!("  after loop: whitespace={:?}, {}", whitespace, mml_to_string(as_element(children[children.len()-1])));
1985
10.1k
      if let Some(
mut ws22
) = whitespace {
1986
        // last child in mrow is white space -- mark with space *after*
1987
22
        if children.len() == 1 {
1988
          // only child -- check to see if we need to set the space-width
1989
21
          let child = as_element(children[0]);
1990
21
          let child_width = child.attribute_value("data-width").unwrap_or("0").parse::<f64>().unwrap_or(0.0);
1991
21
          if (child_width - ws).abs() > 0.001 {
1992
9
            ws += child_width;
1993
9
            child.set_attribute_value("data-following-space-width", ws.to_string().as_str());
1994
12
          }
1995
1
        } else {
1996
1
          let non_space_child = as_element(children[children.len()-1]);
1997
1
          non_space_child.set_attribute_value("data-following-space-width", ws.to_string().as_str());
1998
1
        }
1999
10.1k
      }
2000
10.1k
    }
2001
2002
    /// look for potential numbers by looking for sequences with commas, spaces, and decimal points
2003
10.1k
    fn merge_number_blocks(context: &CanonicalizeContext, parent_mrow: Element, children: &mut Vec<ChildOfElement>) {
2004
      // debug!("parent:\n{}", mml_to_string(parent_mrow));
2005
      // If we find a comma that is not part of a number, don't form a number
2006
      //   (see https://github.com/NSoiffer/MathCAT/issues/271)
2007
      // Unfortunately, we can't do this in the loop below because we might discover the "not part of a number" after a number has been formed
2008
10.1k
      let do_not_merge_comma = is_comma_not_part_of_a_number(children);
2009
10.1k
      let mut i = 0;
2010
38.2k
      while i < children.len() {    // length might change after a merge
2011
        // {
2012
        //  debug!("merge_number_blocks: top of loop");
2013
        //  for (i_child, &child) in children[i..].iter().enumerate() {
2014
        //    let child = as_element(child);
2015
        //    debug!("child #{}: {}", i+i_child, mml_to_string(child));
2016
        //  }
2017
        // }
2018
28.0k
        let child = as_element(children[i]);
2019
28.0k
        let child_name = name(child);
2020
2021
        // numbers start with an mn or a decimal separator
2022
28.0k
        if child_name == "mn" || 
child_name=="mtext"22.4k
{
2023
6.09k
          let leaf_child_text = as_text(child);
2024
          // if Roman numeral, don't merge (move on)
2025
          // or if the 'mn' has ',', '.', or space, consider it correctly parsed and move on
2026
6.09k
          if is_roman_number_match(leaf_child_text) ||
2027
5.75k
            context.patterns.block_separator.is_match(leaf_child_text) ||
2028
5.64k
            (leaf_child_text.len() > 1 && 
context.patterns.decimal_separator710
.
is_match710
(
leaf_child_text710
)) {
2029
559
            i += 1;
2030
559
            continue;
2031
5.53k
          }
2032
21.9k
        } else if child_name != "mo" ||
2033
9.20k
              (do_not_merge_comma && 
as_text(child) == ","3.08k
) ||
2034
6.44k
              !context.patterns.decimal_separator.is_match(as_text(child)) {
2035
21.9k
          i += 1;
2036
21.9k
          continue;
2037
31
        }
2038
          
2039
        // potential start of a number
2040
5.56k
        let mut end = i + 1;
2041
5.56k
        let mut has_decimal_separator = false;
2042
5.56k
        let mut not_a_number = false;
2043
5.56k
        if i < children.len() {
2044
          // look at the right siblings and pull in the longest sequence of number/separators -- then check it for validity
2045
5.56k
          for 
sibling4.00k
in children[i+1..].iter() {
2046
4.00k
            let sibling = as_element(*sibling);
2047
4.00k
            let sibling_name = name(sibling);
2048
4.00k
            if sibling_name == "mn" {
2049
245
              let leaf_text = as_text(sibling);
2050
245
              let is_block_separator = context.patterns.block_separator.is_match(leaf_text);
2051
245
              let is_decimal_separator = context.patterns.decimal_separator.is_match(leaf_text);
2052
245
              if is_roman_number_match(leaf_text) || is_block_separator || is_decimal_separator {
2053
                // consider this mn correctly parsed
2054
1
                break;
2055
244
              }
2056
3.75k
            } else if sibling_name=="mo" || 
sibling_name=="mtext"2.59k
{
2057
1.33k
              let leaf_text = as_text(sibling);
2058
1.33k
              let is_block_separator = context.patterns.block_separator.is_match(leaf_text);
2059
1.33k
              let is_decimal_separator = context.patterns.decimal_separator.is_match(leaf_text);
2060
1.33k
              if (leaf_text == "," && 
do_not_merge_comma315
) ||
2061
1.14k
                 !(is_block_separator || 
is_decimal_separator954
) ||
2062
261
                 (is_decimal_separator && 
has_decimal_separator75
) {
2063
                // not a separator or (it is decimal separator and we've already seen a decimal separator)
2064
1.09k
                not_a_number = is_decimal_separator && 
has_decimal_separator127
; // e.g., 1.2.3 or 1,2,3
2065
1.09k
                break;
2066
244
              }
2067
244
              has_decimal_separator |= is_decimal_separator;
2068
            } else {
2069
              // not mn, mo, or mtext -- end of a number
2070
2.41k
              break;
2071
            }
2072
488
            end += 1;     // increment at end so we can tell the difference between a 'break' and end of loop
2073
          }
2074
0
        }
2075
5.56k
        if not_a_number {
2076
17
          i = end + 1;
2077
17
          continue; // continue looking in the rest of the mrow
2078
5.55k
        }
2079
5.55k
        if ignore_final_punctuation(context, parent_mrow, &children[i..end]) {
2080
18
          end -= 1;
2081
5.53k
        };
2082
        // debug!("start={}, end={}", i, end);
2083
        // no need to merge if only one child (also avoids "." being considered a number)
2084
5.55k
        if end > i + 1 && 
is_likely_a_number275
(
context275
,
parent_mrow275
,
&275
children275
[i..end]) {
2085
107
          (i, end) = trim_whitespace(children, i, end);
2086
107
          merge_block(children, i, end);
2087
107
          // note: start..end has been collapsed, so restart after the collapsed part
2088
5.44k
        } else {
2089
5.44k
          i = end;  // start looking at the end of the block we just rejected
2090
5.44k
        }
2091
5.55k
        i += 1;
2092
      }
2093
10.1k
    }
2094
2095
    /// Return true if we find a comma that doesn't have an <mn> on both sides
2096
10.1k
    fn is_comma_not_part_of_a_number(children: &[ChildOfElement])-> bool {
2097
10.1k
      let n_children = children.len();
2098
10.1k
      if n_children == 0 {
2099
3
        return false;
2100
10.1k
      }
2101
10.1k
      let mut previous_child = as_element(children[0]);
2102
14.5k
      for i in 
1..n_children10.1k
{
2103
14.5k
        let child = as_element(children[i]);
2104
14.5k
        if name(child) == "mo" && 
as_text(child) == ","6.27k
&&
i+1 < n_children980
&&
2105
972
           (name(previous_child) != "mn" || 
name208
(as_element(children[i+1])) != "mn") {
2106
809
          return true;
2107
13.7k
        }
2108
13.7k
        previous_child = child;
2109
      }
2110
9.37k
      return false;
2111
10.1k
    }
2112
2113
    /// If we have something like 'shape' ABC, we split the ABC and add IMPLIED_SEPARATOR_HIGH_PRIORITY between them
2114
    /// under some specific conditions (trying to be a little cautious).
2115
    /// The returned (mrow) element reuses the arg so tree siblings links remain correct.
2116
11.8k
    fn split_points(leaf: Element) -> Option<Element> {
2117
3
      static IS_UPPERCASE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^[A-Z]+$").unwrap());
2118
2119
11.8k
      if !IS_UPPERCASE.is_match(as_text(leaf)) {
2120
9.88k
        return None;
2121
1.96k
      }
2122
2123
      // check to see if there is a bar, arrow, etc over the letters (line-segment, arc, ...)
2124
1.96k
      let parent = get_parent(leaf);
2125
1.96k
      if name(parent) == "mover" {
2126
        // look for likely overscripts (basically just rule out some definite 'no's)
2127
29
        let over = as_element(parent.children()[1]);
2128
29
        if is_leaf(over) {
2129
29
          let mut over_chars = as_text(over).chars();
2130
29
          let first_char = over_chars.next();
2131
29
          if first_char.is_some() && over_chars.next().is_none() && !first_char.unwrap().is_alphanumeric(){
2132
            // only one char and it isn't alphanumeric
2133
29
            return Some( split_element(leaf) );
2134
0
          }
2135
0
        }
2136
1.93k
      }
2137
  
2138
      // check to see if it is preceded by a geometric shape (e.g, ∠ABC)
2139
1.93k
      let preceding_siblings = leaf.preceding_siblings();
2140
1.93k
      if !preceding_siblings.is_empty() {
2141
1.11k
        let preceding_sibling = as_element(preceding_siblings[preceding_siblings.len()-1]);
2142
1.11k
        let preceding_sibling_name = name(preceding_sibling);
2143
1.11k
        if preceding_sibling_name == "mi" || 
preceding_sibling_name == "mo"886
||
preceding_sibling_name == "mtext"439
{
2144
711
          let preceding_text = as_text(preceding_sibling);
2145
711
          return crate::definitions::SPEECH_DEFINITIONS.with(|definitions| {
2146
711
            let defs = definitions.borrow();
2147
711
            let prefix_ops = defs.get_hashset("GeometryPrefixOperators").unwrap();
2148
711
            let shapes = defs.get_hashset("GeometryShapes").unwrap();
2149
711
            if prefix_ops.contains(preceding_text) || 
shapes708
.contains(preceding_text) {
2150
              // split leaf
2151
9
              return Some( split_element(leaf) ); // always treated as function names
2152
            } else {
2153
702
              return None;
2154
            }
2155
711
          })
2156
407
        }
2157
817
      }
2158
1.22k
      return None;
2159
2160
38
      fn split_element(leaf: Element) -> Element {
2161
38
        let mut children = Vec::with_capacity(leaf.children().len());
2162
51
        for ch in 
as_text(leaf)38
.
chars38
() {
2163
51
          let new_leaf = create_mathml_element(&leaf.document(), "mi");
2164
51
          new_leaf.set_text(&ch.to_string());
2165
51
          children.push(new_leaf);
2166
51
        }
2167
38
        set_mathml_name(leaf, "mrow");
2168
38
        leaf.replace_children(children);
2169
38
        return leaf;
2170
38
      }
2171
11.8k
    }
2172
2173
    /// If we have something like 'V e l o c i t y', merge that into a single <mi>
2174
    /// We only do this for sequences of at least three chars, and also exclude things like consecutive letter (e.g., 'x y z')
2175
    /// The returned (mi) element reuses 'mi'
2176
11.4k
    fn merge_mi_sequence(mi: Element) -> Option<Element> {
2177
      // The best solution would be to use a dictionary of words, or maybe restricted to words in a formula,
2178
      //   but that would likely miss the words used in slope=run/rise.
2179
      // It would also be really expensive since we would need a dictionary for each language.
2180
      // We shouldn't need to worry about trig names like "cos", but people sometimes forget to use "\cos"
2181
      // Hence, we check against the "FunctionNames" that get read on startup.
2182
70
      fn is_vowel(ch: char) -> bool {
2183
70
        
matches!58
(ch,
2184
          'a' | 'e' | 'i' | 'o' | 'u' | 'y' |
2185
          'à' | 'á' | 'â' | 'ã' | 'ä' | 'è' | 'é' | 'ê' | 'ë' | 'ì' | 'í' | 'î' | 'ï' |
2186
          'ò' | 'ó' | 'ô' | 'õ' | 'ö' | 'ú' | 'Ù' | 'û' | 'ü' | 'ý' | 'ÿ' |
2187
          'ả' | 'ạ' | 'ă' | 'ằ' | 'ẳ' | 'ẵ' | 'ắ' | 'ặ' | 'ầ' | 'ẩ' | 'ẫ' | 'ấ' | 'ậ' | 'ẻ' | 'ẽ' | 'ẹ' | 'ề' | 'ể' | 'ễ' | 'ế' | 'ệ' |
2188
          'ỉ' | 'ĩ' | 'ị' | 'ỏ' | 'ọ' | 'ồ' | 'ổ' | 'ỗ' | 'ố' | 'ộ' | 'ơ' | 'ờ' | 'ở' | 'ỡ' | 'ớ' | 'ợ' |
2189
          'ủ' | 'ũ' | 'ụ' | 'ư' | 'ừ' | 'ử' | 'ữ' | 'ứ' | 'ự' | 'ỳ' | 'ỷ' | 'ỹ' | 'ỵ'
2190
        )
2191
70
      }
2192
11.4k
      let parent = get_parent(mi);  // not canonicalized into mrows, so parent could be "math"
2193
11.4k
      let parent_name = name(parent);
2194
      // don't merge if more than one char, or if not in an mrow (or implied on since we haven't normalized yet)
2195
11.4k
      if as_text(mi).chars().nth(1).is_some() || !(
parent_name == "mrow"8.87k
||
parent_name == "math"5.62k
) {
2196
5.16k
        return None;
2197
6.25k
      }
2198
6.25k
      let mut text =  as_text(mi).to_string();
2199
6.25k
      let text_script = Script::from(text.chars().next().unwrap_or('a'));
2200
6.25k
      let following_siblings = mi.following_siblings();
2201
6.25k
      let mut last_char_is_scripted = None;
2202
6.25k
      let mut following_mi_siblings: Vec<Element> = following_siblings.iter()
2203
6.25k
            .map_while(|&child| 
{4.15k
2204
4.15k
              let mut child = as_element(child);
2205
4.15k
              let mut is_ok = false;
2206
4.15k
              if name(child) == "msub" || 
name(child) == "msup"4.02k
{
2207
                // check if the *last* char in the sequence is scripted
2208
                // if so, we need to stop here anyway and deal with it specially
2209
163
                last_char_is_scripted = Some(child);   // need to remember the value -- cleared later if not ok
2210
163
                child = as_element(child.children()[0]);
2211
233
                while name(child) == "mrow" && 
child.children().len() == 171
{
2212
70
                  // the base may be wrapped with mrows
2213
70
                  child = as_element(child.children()[0]);
2214
70
                }
2215
3.99k
              }
2216
4.15k
              if name(child) == "mi" {
2217
402
                let mut child_text = as_text(child).chars();
2218
402
                let first_char = child_text.next().unwrap_or('a');
2219
402
                if child_text.next().is_none() && 
Script::from(first_char) == text_script376
{
2220
365
                  text.push(first_char);
2221
365
                  is_ok = true;
2222
365
                
}37
2223
3.75k
              }
2224
4.15k
              if last_char_is_scripted.is_some() {
2225
163
                if is_ok {
2226
114
                  is_ok = false;    // don't want to continue
2227
114
                } else {
2228
49
                  last_char_is_scripted = None; // reset to None
2229
49
                }
2230
3.99k
              }
2231
4.15k
              if is_ok {
Some(child)251
} else {
None3.90k
}
2232
4.15k
            })
2233
6.25k
            .collect();
2234
6.25k
      if following_mi_siblings.is_empty() {
2235
6.03k
        return None;
2236
224
      }
2237
    
2238
224
      if let Some(
last14
) = last_char_is_scripted {
2239
14
        // add the last char to the run
2240
14
        following_mi_siblings.push(last);
2241
210
      }
2242
      // debug!("merge_mi_sequence: text={}", &text);
2243
224
      if let Some(
answer11
) = crate::definitions::SPEECH_DEFINITIONS.with(|definitions| {
2244
224
        let definitions = definitions.borrow();
2245
224
        let function_names = definitions.get_hashset("FunctionNames").unwrap();
2246
        // UEB seems to think "Sin" (etc) is used for "sin", so we move to lower case
2247
        // function name might be (wrongly) set to italic math alphanumeric chars, including bold italic
2248
224
        if let Some(
ascii_text221
) = CanonicalizeContext::math_alphanumeric_to_ascii(&text)
2249
221
          && function_names.contains(&ascii_text.to_lowercase()) {
2250
10
            return Some(merge_from_text(mi, &ascii_text, &following_mi_siblings));
2251
214
          }
2252
214
        if function_names.contains(&text) {
2253
0
          return Some(merge_from_text(mi, &text, &following_mi_siblings));
2254
214
        }
2255
        // unlike "FunctionNames", "KnownWords" might not exist
2256
214
        if let Some(
word_map131
) = definitions.get_hashset("KnownWords")
2257
131
          && word_map.contains(&text) {
2258
1
            return Some(merge_from_text(mi, &text, &following_mi_siblings));
2259
213
          }
2260
213
        return None;
2261
224
      }) {
2262
11
        return answer;
2263
213
      }
2264
2265
      // don't be too aggressive combining mi's when they are short
2266
213
      if text.chars().count() < 3 {
2267
186
        return None;
2268
27
      }
2269
      // If it is a word, it needs a vowel and it must be a letter
2270
      // FIX: this check needs to be internationalized to include accented vowels, other alphabets
2271
70
      if !
text.chars()27
.
any27
(|ch| is_vowel(ch) ||
!ch.is_ascii_alphabetic()58
) {
2272
15
        return None;
2273
12
      }
2274
    
2275
      // now for some heuristics to rule out a sequence of variables
2276
      // rule out sequences like 'abc' and also 'axy' that are in alphabetical order
2277
12
      let mut chars = text.chars();
2278
12
      let mut left = chars.next().unwrap();   // at least 3 chars
2279
12
      let mut is_in_alphabetical_order = true;
2280
23
      for ch in 
chars12
{
2281
23
        if (left as u32) >= (ch as u32) {
2282
3
          is_in_alphabetical_order = false;
2283
3
          break;                 // can't be 'abc', 'axy', etc
2284
20
        }
2285
20
        left = ch;
2286
      }
2287
12
      if is_in_alphabetical_order || 
text.len() < 43
{
2288
        // If it is in alphabetical order, it's not likely a word
2289
12
        return None;
2290
0
      }
2291
2292
      // FIX: should add more heuristics to rule out words
2293
0
      return merge_from_text(mi, &text, &following_mi_siblings);
2294
2295
11
      fn merge_from_text<'a>(mi: Element<'a>, text: &str, following_siblings: &[Element<'a>]) -> Option<Element<'a>> {
2296
        // remove trailing mi's
2297
11
        let i_last_child = following_siblings.len()-1;
2298
11
        let last_child = following_siblings[i_last_child];
2299
11
        if name(last_child) == "mi" {
2300
10
          
following_siblings5
.
iter5
().
for_each5
(|sibling| sibling.remove_from_parent());
2301
5
          mi.set_text(text);
2302
5
          return Some(mi);
2303
        } else {
2304
          // replace the base of the scripted element (the last child) with the run (e.g. 's i n^2' -> {sin}^2)
2305
6
          mi.remove_from_parent();
2306
6
          following_siblings[..i_last_child].iter().for_each(|sibling| sibling.remove_from_parent());
2307
6
          let mut base = as_element(last_child.children()[0]);
2308
9
          while name(base) == "mrow" && 
base.children().len() == 13
{
2309
3
            // the base may be wrapped with mrows
2310
3
            base = as_element(base.children()[0]);
2311
3
            base.remove_attribute(SPLIT_TOKEN);
2312
3
          }
2313
6
          base.set_text(text);
2314
6
          return Some(last_child);
2315
        }
2316
11
      }
2317
11.4k
    }
2318
2319
    // Check if start..end is a number
2320
275
    fn is_likely_a_number(context: &CanonicalizeContext, mrow: Element, children: &[ChildOfElement]) -> bool {
2321
      // Note: the children of math_or_mrow aren't valid ('children' represents the current state)
2322
275
      let end = children.len();
2323
      // {
2324
      //  let n_preceding_siblings = as_element(children[0]).preceding_siblings().len();
2325
      //  debug!("is_likely_a_number: start/end={}/{}", n_preceding_siblings, n_preceding_siblings+end);
2326
      //  for (i, &child) in children.iter().enumerate() {
2327
      //    let child = as_element(child);
2328
      //    debug!("child# {}: {}", n_preceding_siblings+i, mml_to_string(child));
2329
      //  }
2330
      //  debug!("\n");
2331
      // }
2332
2333
      // gather up the text of the children (all mn, mo, or mtext)
2334
275
      let mut previous_name_was_mn = false;
2335
275
      let mut text = "".to_string();
2336
727
      for &child in 
children275
{
2337
727
        let child = as_element(child);
2338
727
        let child_name = name(child);
2339
727
        if previous_name_was_mn && 
child_name == "mn"303
{
2340
94
          text.push('\u{FFFF}');      // FIX: this should come from the separator string
2341
633
        }
2342
727
        text.push_str(as_text(child));
2343
727
        previous_name_was_mn = child_name == "mn";
2344
      }
2345
2346
275
      let text = text.trim(); // could be space got merged into an mn (e.g., braille::UEB::iceb::expr_3_1_6)
2347
      // debug!("  text='{}', decimal num={}, 3 digit match={}, 3-5 match={}, 1 digit={}", &text,
2348
      //    context.patterns.digit_only_decimal_number.is_match(text),
2349
      //    context.patterns.block_3digit_pattern.is_match(text),
2350
      //    context.patterns.block_3_5digit_pattern.is_match(text),
2351
      //    context.patterns.block_1digit_pattern.is_match(text));
2352
275
      if !(context.patterns.digit_only_decimal_number.is_match(text) ||
2353
190
         context.patterns.block_3digit_pattern.is_match(text) ||
2354
167
         context.patterns.block_3_5digit_pattern.is_match(text) ||
2355
166
         context.patterns.block_4digit_hex_pattern.is_match(text) ||
2356
162
         ( (text.chars().count() > 5 || 
context.patterns.decimal_separator139
.
is_match139
(
text139
)) &&
2357
25
           context.patterns.block_1digit_pattern.is_match(text) )
2358
        ) {
2359
161
          return false;
2360
114
      }
2361
2362
      // ??? might want to rule out "sequences" like '100, 200, 300' and '100, 103, 106' (if constant difference, then a sequence)
2363
2364
      // If surrounded by fences, and commas are used, leave as is (e.g, "{1,234}")
2365
114
      if !text.contains(',') {
2366
84
        return true;   // not comma separated
2367
30
      }
2368
2369
      // We have already checked for whitespace as separators, so it must be a comma. Just check the fences.
2370
      // This is not yet in canonical form, so the fences may be siblings or siblings of the parent 
2371
30
      let preceding_siblings = as_element(children[0]).preceding_siblings();
2372
30
      let following_siblings = as_element(children[end-1]).following_siblings();
2373
      let first_child;
2374
      let last_child;
2375
30
      if preceding_siblings.is_empty() && 
following_siblings19
.
is_empty19
() {
2376
        // number spans all children, look to parent for fences
2377
14
        let preceding_children = mrow.preceding_siblings();
2378
14
        let following_children = mrow.following_siblings();
2379
14
        if preceding_children.is_empty() || 
following_children9
.
is_empty9
() {
2380
9
          return true; // doesn't have left or right fence
2381
5
        }
2382
5
        first_child = preceding_children[preceding_children.len()-1];
2383
5
        last_child = following_children[0];
2384
16
      } else if preceding_siblings.is_empty() || 
following_siblings11
.
is_empty11
() {
2385
13
        return true; // can't be fences around it
2386
3
      } else {
2387
3
        first_child = preceding_siblings[preceding_siblings.len()-1];
2388
3
        last_child = following_siblings[0];
2389
3
      }
2390
8
      let first_child = as_element(first_child);
2391
8
      let last_child = as_element(last_child);
2392
8
      return !(name(first_child) == "mo" && is_fence(first_child) &&
2393
7
             name(last_child) == "mo" && is_fence(last_child) );
2394
275
    }
2395
2396
    // fn count_decimal_pts(context: &CanonicalizeContext, children: &[ChildOfElement], start: usize, end: usize) -> usize {
2397
    //  let mut n_decimal_pt = 0;
2398
    //  for &child_as_element in children.iter().take(end).skip(start) {
2399
    //    let child = as_element(child_as_element);
2400
    //    if context.patterns.decimal_separator.is_match(as_text(child))  {
2401
    //      n_decimal_pt += 1;
2402
    //    }
2403
    //  }
2404
    //  return n_decimal_pt;
2405
    // }
2406
2407
    /// This is a special case heuristic so try and determine if a terminating punctuation should be a decimal separator
2408
    /// Often math expressions end with punctuations for typographic reasons, so we try to figure that out here.
2409
    /// 'children' is a subset of 'mrow'
2410
5.55k
    fn ignore_final_punctuation(context: &CanonicalizeContext, mrow: Element, children: &[ChildOfElement]) -> bool {
2411
5.55k
      let last_child = children[children.len()-1];
2412
5.55k
      if mrow.children()[mrow.children().len()-1] != last_child {
2413
3.49k
        return false;   // not at end
2414
2.05k
      }
2415
2.05k
      let parent = mrow.parent().unwrap().element();
2416
2.05k
      if let Some(
math1.71k
) = parent
2417
1.71k
        && name(math) != "math" {
2418
1.58k
          return false;     // mrow inside something else -- not at end
2419
471
        }
2420
2421
471
      let last_child = as_element(last_child);
2422
      // debug!("ignore_final_punctuation: last child={}", mml_to_string(last_child));
2423
471
      if name(last_child) != "mo" {
2424
451
        return false; // last was not "mo", so can't be a period
2425
20
      }
2426
2427
20
      if !context.patterns.decimal_separator.is_match(as_text(last_child)) {
2428
0
        return false;
2429
20
      }
2430
2431
      // debug!("ignore_final_punctuation: #preceding={}", as_element(children[0]).preceding_siblings().len());
2432
      // look to preceding siblings and see if an of the mn's have a decimal separator
2433
20
      return !as_element(children[0]).preceding_siblings().iter()
2434
101
          .
any20
(|&child| {
2435
101
            let child = as_element(child);
2436
101
            name(child) == "mn" && 
context.patterns.decimal_separator14
.
is_match14
(
as_text(child)14
)
2437
101
          });
2438
5.55k
    }
2439
2440
    /// Trim off any children that are whitespace on either side
2441
107
    fn trim_whitespace(children: &mut [ChildOfElement], start: usize, end: usize) -> (usize, usize) {
2442
107
      let mut real_start = start;
2443
      #[allow(clippy::needless_range_loop)]  // I don't like enumerate/take/skip here
2444
107
      for i in start..end {
2445
107
        let child = as_element(children[i]);
2446
107
        if !as_text(child).trim().is_empty() {
2447
107
          real_start = i;
2448
107
          break;
2449
0
        }
2450
      }
2451
2452
107
      let mut real_end = end;
2453
157
      for i in (
start..end107
).
rev107
() {
2454
157
        let child = as_element(children[i]);
2455
157
        if !as_text(child).trim().is_empty() {
2456
107
          real_end = i+1;
2457
107
          break;
2458
50
        }
2459
      }
2460
107
      return (real_start, real_end);
2461
107
    }
2462
2463
    /// Merge the number block from start..end
2464
107
    fn merge_block(children: &mut Vec<ChildOfElement>, start: usize, end: usize) {
2465
2466
      // debug!("merge_block: merging {}..{}", start, end);
2467
107
      let mut mn_text = String::with_capacity(4*(end-start)-1);    // true size less than #3 digit blocks + separator
2468
237
      for &child_as_element in 
children.iter()107
.
take107
(
end107
).
skip107
(
start107
) {
2469
237
        let child = as_element(child_as_element);
2470
237
        mn_text.push_str(as_text(child));
2471
237
      }
2472
107
      let child = as_element(children[start]);
2473
107
      set_mathml_name(child, "mn");
2474
107
      child.set_text(&mn_text);
2475
2476
107
      children.drain(start+1..end);
2477
107
    }
2478
2479
    
2480
    /// merge  ° C or  ° F into a single <mi> with the text '℃' or '℉' -- prevents '°' from becoming a superscript
2481
    #[allow(non_snake_case)]
2482
5.90k
    fn merge_degrees_C_F<'a>(mrow: Element<'a>) -> Element<'a> {
2483
5.90k
      let mut degree_child = None;
2484
28.1k
      for child in 
mrow5.90k
.
children5.90k
() {
2485
28.1k
        let child = as_element(child);
2486
28.1k
        if is_leaf(child) {
2487
23.9k
          match as_text(child) {
2488
23.9k
            "°" => {
2489
34
              degree_child = Some(child);
2490
34
            },
2491
23.9k
            "°C" => {
2492
12
              child.set_text("℃");
2493
12
              degree_child = None;
2494
12
            },
2495
23.8k
            "°F" => {
2496
0
              child.set_text("℉");
2497
0
              degree_child = None;
2498
0
            },
2499
23.8k
            text  => {
2500
23.8k
              if let Some(
degree_child23
) = degree_child
2501
23
                && (text == "C" || 
text == "F"22
) {
2502
                  // merge the degree child with the current child
2503
3
                  degree_child.set_text(if text == "C" { 
"℃"1
} else {
"℉"2
});
2504
3
                  child.remove_from_parent();
2505
23.8k
                }
2506
                // merge the degree child with the current child
2507
23.8k
              degree_child = None; 
2508
            },
2509
          }
2510
4.25k
        }
2511
      }
2512
5.90k
      return mrow;
2513
5.90k
    }
2514
2515
2516
    /// merge consecutive leaves containing any of the 'chars' into the first leaf -- probably used for omission with('_')
2517
5.90k
    fn merge_chars<'a>(mrow: Element<'a>, pattern: &Regex) -> Element<'a> {
2518
5.90k
      let mut first_child = None;
2519
5.90k
      let mut new_text = "".to_string();
2520
28.1k
      for child in 
mrow5.90k
.
children5.90k
() {
2521
28.1k
        let child = as_element(child);
2522
28.1k
        if is_leaf(child) {
2523
23.9k
          let text = as_text(child);
2524
23.9k
          if pattern.is_match(text) {
2525
134
            if new_text.is_empty() {
2526
118
              // potential start of a string
2527
118
              first_child = Some(child);
2528
118
              new_text = as_text(child).to_string();
2529
118
            } else {
2530
16
              // merge chars
2531
16
              new_text.push_str(text);
2532
16
              child.remove_from_parent();
2533
16
            }
2534
23.8k
          } else if new_text.len() > 1 {
2535
99
            // end of a run
2536
99
            first_child.unwrap().set_text(&new_text);
2537
99
            new_text.clear();
2538
23.7k
          } else {
2539
23.7k
            new_text.clear(); // just one entry -- no need to set the text
2540
23.7k
          }
2541
4.25k
        } else if new_text.len() > 1{
2542
7
          // end of a run
2543
7
          first_child.unwrap().set_text(&new_text);
2544
7
          new_text.clear();
2545
4.24k
        } else {
2546
4.24k
          new_text.clear();     // just one entry -- no need to set the text
2547
4.24k
        }
2548
      }
2549
5.90k
      if new_text.len() > 1{
2550
9
        // end of a run
2551
9
        first_child.unwrap().set_text(&new_text);
2552
5.89k
      }
2553
5.90k
      return mrow;
2554
5.90k
    }
2555
2556
    /// curl and divergence are handled as two character operators
2557
    /// if found, merge them into their own (new) mrow that has an intent on it
2558
    /// we can have '∇' or '𝛁', or those as vectors (inside an mover)
2559
10.1k
    fn merge_cross_or_dot_product_elements(children: &mut Vec<ChildOfElement>) {
2560
10.1k
      if children.is_empty() {
2561
3
        return;
2562
10.1k
      }
2563
10.1k
      let mut i = 0;
2564
10.1k
      let mut is_previous_nabla = false;
2565
31.5k
      while i < children.len() - 1 {
2566
21.3k
        let child = as_element(children[i]);
2567
21.3k
        if is_previous_nabla {
2568
14
          if is_leaf(child) {
2569
14
            let text = as_text(child);
2570
14
            if text == "⋅" || 
text == "·"13
||
text == "×"9
{
2571
12
              let nabla_child = as_element(children[i-1]);
2572
12
              let nabla_text = as_text( get_possible_embellished_node(nabla_child) );
2573
12
              let new_mrow = create_mathml_element(&child.document(), "mrow");
2574
12
              new_mrow.set_attribute_value(ACT_AS_OPERATOR, nabla_text);
2575
12
              new_mrow.append_child(nabla_child);
2576
12
              new_mrow.append_child(child);
2577
12
              children[i-1] = ChildOfElement::Element(new_mrow);
2578
12
              children.remove(i);
2579
12
            
}2
2580
0
          }
2581
14
          is_previous_nabla = false;
2582
        } else {
2583
21.3k
          let potential_nabla = if name(child) == "mover" {
as_element136
(
child.children()[0]136
)} else {
child21.1k
};
2584
21.3k
          if is_leaf(potential_nabla) {
2585
19.0k
            let text = as_text(potential_nabla);
2586
19.0k
            if text == "∇" || 
text == "𝛁"19.0k
{
2587
22
              is_previous_nabla = true;
2588
19.0k
            }
2589
2.27k
          }
2590
        }
2591
21.3k
        i += 1;
2592
      }
2593
10.1k
    }
2594
2595
5.90k
    fn merge_dots(mrow: Element) -> Element {
2596
      // merge consecutive <mo>s containing '.' into ellipsis
2597
5.90k
      let children = mrow.children();
2598
5.90k
      let mut i = 0;
2599
5.90k
      let mut n_dots = 0;   // number of consecutive mo's containing dots
2600
34.1k
      while i < children.len() {
2601
28.2k
        let child = as_element(children[i]);
2602
28.2k
        if name(child) == "mo" {
2603
10.4k
          let text = as_text(child);
2604
10.4k
          if text == "." {
2605
71
            n_dots += 1;
2606
71
            if n_dots == 3 {
2607
3
              let first_child = as_element(children[i-2]);
2608
3
              first_child.set_text("…");
2609
3
              as_element(children[i-1]).remove_from_parent();
2610
3
              child.remove_from_parent();
2611
3
              n_dots = 0;
2612
68
            }
2613
10.3k
          } else {
2614
10.3k
            n_dots = 0;
2615
10.3k
          }
2616
17.7k
        } else {
2617
17.7k
          n_dots = 0;
2618
17.7k
        }
2619
28.2k
        i += 1;
2620
      }
2621
5.90k
      return mrow;
2622
5.90k
    }
2623
2624
5.90k
    fn merge_primes(mrow: Element) -> Element {
2625
      // merge consecutive <mo>s containing primes (in various forms)
2626
5.90k
      let mut children = mrow.children();
2627
5.90k
      let mut i = 0;
2628
5.90k
      let mut n_primes = 0;   // number of consecutive mo's containing primes
2629
34.1k
      while i < children.len() {
2630
28.1k
        let child = as_element(children[i]);
2631
28.1k
        if name(child) == "mo" {
2632
10.4k
          let text = as_text(child);
2633
          // FIX: should we be more restrictive and change (apostrophe) only in a superscript?
2634
10.4k
          if IS_PRIME.is_match(text) {
2635
21
            n_primes += 1;
2636
10.4k
          } else if n_primes > 0 {
2637
3
            merge_prime_elements(&mut children, i - n_primes, i);
2638
3
            n_primes = 0;
2639
10.4k
          }
2640
17.7k
        } else if n_primes > 0 {
2641
2
          merge_prime_elements(&mut children, i - n_primes, i);
2642
2
          n_primes = 0;
2643
17.7k
        }
2644
28.1k
        i += 1;
2645
      }
2646
5.90k
      if n_primes > 0 {
2647
12
        merge_prime_elements(&mut children, i - n_primes, i);
2648
5.89k
      }
2649
5.90k
      return mrow;
2650
5.90k
    }
2651
2652
17
    fn merge_prime_elements(children: &mut [ChildOfElement], start: usize, end: usize) {
2653
      // not very efficient since this is probably causing an array shift each time (array is probably not big though)
2654
17
      let first_child = as_element(children[start]);
2655
17
      let mut new_text = String::with_capacity(end+3-start);  // one per element plus a little extra
2656
17
      new_text.push_str(as_text(first_child));
2657
17
      for &
child_as_element4
in children.iter().take(end).skip(start+1) {
2658
4
        let child = as_element(child_as_element);
2659
4
        let text = as_text(child);    // only in this function because it is an <mo>
2660
4
        new_text.push_str(text);
2661
4
        child.remove_from_parent();
2662
4
      }
2663
17
      first_child.set_text(&merge_prime_text(&new_text));
2664
17
    }
2665
  
2666
83
    fn merge_prime_text(text: &str) -> String {
2667
      // merge together single primes into double primes, etc.
2668
83
      let mut n_primes = 0;
2669
101
      for ch in 
text83
.
chars83
() {
2670
101
        match ch {
2671
90
          '\'' | '′' => n_primes += 1,
2672
9
          '″' => n_primes += 2,
2673
0
          '‴' => n_primes += 3,
2674
2
          '⁗' => n_primes += 4,
2675
          _ => {
2676
0
            eprintln!("merge_prime_text: unexpected char '{ch}' found in prime text '{text}'");
2677
0
            return text.to_string();
2678
          }
2679
        }
2680
      }
2681
      // it would be very rare to have more than a quadruple prime, so the inefficiency in the won't likely happen
2682
83
      let mut result = String::with_capacity(n_primes);  // likely 4x too big, but string is short-lived and small
2683
83
      for _ in 0..n_primes/4 {
2684
3
        result.push('⁗');
2685
3
      }
2686
83
      match n_primes % 4 {
2687
61
        1 => result.push('′'),
2688
20
        2 => result.push('″'),
2689
1
        3 => result.push('‴'),
2690
1
        _ => ()  // can't happen
2691
      }
2692
83
      return result;
2693
83
    }
2694
2695
    // from https://www.w3.org/TR/MathML3/chapter7.html#chars.pseudo-scripts
2696
35.1k
    fn is_pseudo_script_char(ch: char) -> bool {
2697
35.1k
      
matches!35.0k
(ch,
2698
        '\"' | '\'' | '*' | '`' | 'ª' | '°' | '²' | '³' | '´' | '¹' | 'º' |
2699
        '\u{2018}' | '\u{2019}' | '\u{201C}' | '\u{201D}' | '\u{201E}' | '\u{201F}' |
2700
        '\u{2032}' | '\u{2033}' | '\u{2034}' | '\u{2035}' | '\u{2036}' | '\u{2037}' | '\u{2057}'
2701
      )
2702
35.1k
    }
2703
5.90k
    fn handle_pseudo_scripts(mrow: Element) -> Element {
2704
  
2705
5.90k
      assert!(name(mrow) == "mrow" || 
ELEMENTS_WITH_ONE_CHILD2.42k
.
contains2.42k
(
name(mrow)2.42k
), "non-mrow passed to handle_pseudo_scripts: {}",
mml_to_string0
(
mrow0
));
2706
5.90k
      let mut children = mrow.children();
2707
      // check to see if mrow of all pseudo scripts
2708
5.91k
      if 
children.iter()5.90k
.
all5.90k
(|&child| {
2709
5.91k
        is_pseudo_script(as_element(child))
2710
5.91k
      }) {
2711
2
        let parent = get_parent(mrow);  // must exist
2712
2
        let is_first_child = mrow.preceding_siblings().is_empty();
2713
2
        if  is_first_child {
2714
0
          return mrow; // FIX: what should happen
2715
2
        }
2716
2
        if crate::xpath_functions::IsNode::is_scripted(parent) {
2717
2
          return mrow;   // already in a script position
2718
0
        }
2719
0
        if name(parent) == "mrow" {
2720
0
          mrow.set_attribute_value("data-pseudo-script", "true");
2721
0
          return handle_pseudo_scripts(parent);
2722
        } else {
2723
0
          return mrow; // FIX: what should happen?
2724
        }
2725
5.90k
      }
2726
2727
5.90k
      let mut i = 1;
2728
5.90k
      let mut found = false;
2729
28.1k
      while i < children.len() {
2730
22.2k
        let child = as_element(children[i]);
2731
22.2k
        if is_pseudo_script(child) ||
2732
22.2k
           child.attribute("data-pseudo-script").is_some() {
2733
35
          let msup = create_mathml_element(&child.document(), "msup");
2734
35
          msup.set_attribute_value(CHANGED_ATTR, ADDED_ATTR_VALUE);
2735
35
          msup.append_child(children[i-1]);
2736
35
          msup.append_child(child);
2737
35
          children[i-1] = ChildOfElement::Element(msup);
2738
35
          children.remove(i);
2739
35
          found = true;
2740
22.2k
        } else {
2741
22.2k
          i += 1;
2742
22.2k
        }
2743
      }
2744
5.90k
      if found {
2745
25
        mrow.replace_children(children)
2746
5.88k
      }
2747
5.90k
      return mrow;
2748
2749
28.1k
      fn is_pseudo_script(child: Element) -> bool {
2750
28.1k
        if name(child) == "mo" {
2751
10.4k
          let text = as_text(child);
2752
10.4k
          if let Some(
ch10.3k
) = single_char(text)
2753
10.3k
            && is_pseudo_script_char(ch) {
2754
              // don't script a pseudo-script
2755
55
              let preceding_siblings = child.preceding_siblings();
2756
55
              if !preceding_siblings.is_empty() {
2757
42
                let last_child = as_element(preceding_siblings[preceding_siblings.len()-1]);
2758
42
                if name(last_child) == "mo" &&
2759
10
                   let Some(ch) = single_char(as_text(last_child))
2760
10
                    && is_pseudo_script_char(ch) {
2761
6
                      return false;
2762
36
                    }
2763
13
              }
2764
49
              if text == "*" {
2765
                // could be infix "*" -- this is a weak check to see if what follows is potentially an operand
2766
5
                let following_siblings = child.following_siblings();
2767
5
                if  following_siblings.is_empty() {
2768
1
                  return true;
2769
4
                }
2770
4
                let first_child = as_element(following_siblings[0]);
2771
4
                return name(first_child) != "mo" || ["(", "[", "{"].contains(&text);
2772
              } else {
2773
44
                return true;
2774
              }
2775
10.3k
            }
2776
17.7k
        }
2777
28.1k
        return false;
2778
2779
        /// An efficient method to get the char from a string if it is just one char or fail
2780
10.4k
        fn single_char(text: &str) -> Option<char> {
2781
10.4k
          let mut chars = text.chars();
2782
10.4k
          let ch = chars.next();
2783
10.4k
          if ch.is_none() || chars.next().is_some() {
2784
39
            return None;   // not one character
2785
          } else {
2786
10.3k
            return ch;
2787
          }
2788
10.4k
        }
2789
28.1k
      }
2790
2791
5.90k
    }
2792
2793
10.1k
    fn handle_convert_to_mmultiscripts(children: &mut Vec<ChildOfElement>) {
2794
10.1k
      if children.len() == 1 {
2795
4.45k
        return;   // can't convert to mmultiscripts if there is nothing to attach an empty base to
2796
5.72k
      }
2797
5.72k
        let mut i = 0;
2798
      // convert_to_mmultiscripts changes 'children', so can't cache length
2799
32.7k
      while i < children.len() {
2800
26.9k
        let child = as_element(children[i]);
2801
26.9k
        let child_name = name(child);
2802
26.9k
        if (child_name == "msub" || 
child_name == "msup"26.3k
||
child_name == "msubsup"25.8k
) &&
CanonicalizeContext::is_empty_element1.24k
(
as_element1.24k
(
child.children()[0]1.24k
)) {
2803
115
          i = convert_to_mmultiscripts(children, i);
2804
26.8k
        } else {
2805
26.8k
          i += 1;
2806
26.8k
        }
2807
      }
2808
10.1k
    }
2809
2810
2811
    /// Converts the script element with an empty base to mmultiscripts by sucking the base from the following or preceding element.
2812
    /// The following element is preferred so that these become prescripts (common usage is from TeX), but if the preceding element
2813
    ///   has a closer mi/mtext, it is used.
2814
    /// mhchem has some ugly output (at least in MathJax) and that's where using the following element makes sense (usually)
2815
    ///   because an empty base (mpadded width=0) is used for the scripts. A hacky attribute indicates this case.
2816
115
    fn convert_to_mmultiscripts(mrow_children: &mut Vec<ChildOfElement>, i: usize) -> usize {
2817
      // this is a bit messy/confusing because we might scan forwards or backwards and this affects whether
2818
      // we are scanning for prescripts or postscripts
2819
      // the generic name "primary_scripts" means prescripts if going forward or postscripts if going backwards
2820
      // if we are going forward and hit a sub/superscript with a base, then those scripts become postscripts ("other_scripts")
2821
      // if we are going backwards, we never add prescripts
2822
2823
      // let parent = get_parent(as_element(mrow_children[i]));
2824
      // debug!("convert_to_mmultiscripts (i={}) -- PARENT:\n{}", i, mml_to_string(parent));
2825
2826
115
      let i_base = choose_base_of_mmultiscripts(mrow_children, i);
2827
115
      let mut base = as_element(mrow_children[i_base]);
2828
      // debug!("convert_to_mmultiscripts -- base\n{}", mml_to_string(base));
2829
115
      let base_name = name(base);
2830
115
      let mut prescripts = vec![];
2831
115
      let mut postscripts = vec![];
2832
115
      let mut i_postscript = i_base + 1;
2833
2834
115
      if (base_name == "msub" || 
base_name == "msup"110
||
base_name == "msubsup"110
) &&
2835
5
         !CanonicalizeContext::is_empty_element(as_element(base.children()[0])) {
2836
5
        // if the base is a script element, then we want the base of that to be the base of the mmultiscripts
2837
5
        let mut base_children = base.children();
2838
5
        let script_base = as_element(base.children()[0]);
2839
5
        base_children[0] = ChildOfElement::Element(CanonicalizeContext::create_empty_element(&base.document()));
2840
5
        base.replace_children(base_children);
2841
5
        add_to_scripts(base, &mut postscripts);
2842
5
        base = script_base;
2843
110
      }
2844
2845
115
      let mut has_chemistry_prescript = false; // chemical elements don't have both prescripts (nuclear chem) and postscripts
2846
115
      if i_base > i {
2847
        // we have prescripts -- gather them up
2848
61
        let mut i_prescript = i;
2849
122
        while i_prescript < i_base {
2850
61
          let script = as_element(mrow_children[i_prescript]);
2851
          // kind of ugly -- this duplicates the first part of add_to_scripts
2852
61
          let script_name = name(script);
2853
61
          if script_name == "msub" || 
script_name == "msup"56
||
script_name == "msubsup"48
{
2854
61
            let base = as_element(script.children()[0]);
2855
61
            has_chemistry_prescript |= base.attribute(MHCHEM_MMULTISCRIPTS_HACK).is_some();
2856
61
          
}0
2857
61
          if !add_to_scripts(script, &mut prescripts) {
2858
0
            break;
2859
61
          }
2860
61
          i_prescript += 1;
2861
        }
2862
54
      }
2863
2864
115
      if !has_chemistry_prescript {
2865
        // gather up the postscripts (if any)
2866
137
        while i_postscript < mrow_children.len() {
2867
104
          let script = as_element(mrow_children[i_postscript]);
2868
          // debug!("script: {}", mml_to_string(script));
2869
          // if name(script) == "msub" && i_postscript+1 < mrow_children.len() {
2870
          //  let superscript = as_element(mrow_children[i_postscript+1]);
2871
          //  if name(superscript) == "msup" && CanonicalizeContext::is_empty_element(as_element(superscript.children()[0])) {
2872
          //    set_mathml_name(script, "msubsup");
2873
          //    script.append_child(superscript.children()[1]);
2874
          //    i_postscript += 1;
2875
          //  }
2876
          // }
2877
          // debug!("adding postscript\n{}", mml_to_string(script));
2878
104
          if !add_to_scripts(script, &mut postscripts) {
2879
32
            break;
2880
72
          }
2881
72
          i_postscript += 1;
2882
        }
2883
50
      }
2884
2885
115
      let i_multiscript = if i_base < i {
i_base54
} else {
i61
};
2886
115
      let script = create_mathml_element(&base.document(), "mmultiscripts");
2887
115
      let mut num_children = 1 + postscripts.len();
2888
115
      if !prescripts.is_empty() {
2889
61
        num_children += 1 + prescripts.len();
2890
61
      
}54
2891
115
      let mut new_children = Vec::with_capacity(num_children);
2892
115
      new_children.push(ChildOfElement::Element(base));
2893
115
      new_children.append(&mut postscripts);
2894
115
      if !prescripts.is_empty() {
2895
61
        new_children.push( ChildOfElement::Element( create_mathml_element(&script.document(), "mprescripts") ) );
2896
61
        new_children.append(&mut prescripts);
2897
61
      
}54
2898
2899
115
      script.replace_children(new_children);
2900
115
      let lifted_base = as_element(mrow_children[i_multiscript]);
2901
115
      add_attrs(script, &lifted_base.attributes());
2902
115
      script.remove_attribute("data-split");   // doesn't make sense on mmultiscripts
2903
115
      script.remove_attribute("mathvariant");    // doesn't make sense on mmultiscripts
2904
115
      mrow_children[i_multiscript] = ChildOfElement::Element(script);
2905
115
      mrow_children.drain(i_multiscript+1..i_postscript);  // remove children after the first
2906
2907
115
      let likely_chemistry = likely_adorned_chem_formula(script);
2908
115
      if likely_chemistry >= 0 {
2909
106
        script.set_attribute_value(MAYBE_CHEMISTRY, likely_chemistry.to_string().as_str());
2910
106
      
}9
2911
2912
      // debug!("convert_to_mmultiscripts -- converted script:\n{}", mml_to_string(script));
2913
      // debug!("convert_to_mmultiscripts (at end) -- #children={}", mrow_children.len());
2914
115
      return i_multiscript + 1;   // child to start on next
2915
115
    }
2916
2917
170
    fn add_to_scripts<'a>(el: Element<'a>, scripts: &mut Vec<ChildOfElement<'a>>) -> bool {
2918
170
      let script_name = name(el);
2919
170
      if !(script_name == "msub" || 
script_name == "msup"111
||
script_name == "msubsup"80
) {
2920
32
        return false;
2921
138
      }
2922
138
      let base = as_element(el.children()[0]);
2923
138
      if !CanonicalizeContext::is_empty_element(base) { // prescript that really should be a postscript
2924
        // debug!("add_to_scripts: not empty base:\n{}", mml_to_string(base));
2925
0
        return false;
2926
138
      }
2927
138
      if script_name == "msub" {
2928
59
        add_pair(scripts, Some(el.children()[1]), None);
2929
79
      } else if script_name == "msup" {
2930
31
        add_pair(scripts, None, Some(el.children()[1]));
2931
48
      } else { // msubsup
2932
48
        add_pair(scripts, Some(el.children()[1]), Some(el.children()[2]));
2933
48
      };
2934
138
      return true;
2935
170
    }
2936
2937
138
    fn add_pair<'v, 'a:'v>(script_vec: &'v mut Vec<ChildOfElement<'a>>, subscript: Option<ChildOfElement<'a>>, superscript: Option<ChildOfElement<'a>>) {
2938
138
      let child_of_element = if let Some(
subscript107
) = subscript {
subscript107
} else {
superscript31
.
unwrap31
()};
2939
138
      let doc = as_element(child_of_element).document();
2940
138
      let subscript = if let Some(
subscript107
)= subscript {
2941
107
        if CanonicalizeContext::is_empty_element(as_element(subscript)) {
2942
0
          ChildOfElement::Element(create_mathml_element(&doc, "none"))
2943
        } else {
2944
107
          subscript
2945
        }
2946
      } else {
2947
31
        ChildOfElement::Element(create_mathml_element(&doc, "none"))
2948
      };
2949
138
      let superscript = if let Some(
superscript79
) = superscript {
2950
79
        if CanonicalizeContext::is_empty_element(as_element(superscript)) {
2951
0
          ChildOfElement::Element(create_mathml_element(&doc, "none"))
2952
        } else {
2953
79
          superscript
2954
        }
2955
      } else {
2956
59
        ChildOfElement::Element(create_mathml_element(&doc, "none"))
2957
      };
2958
138
      script_vec.push(subscript);
2959
138
      script_vec.push(superscript);
2960
138
    }
2961
2962
    /// Find the closest likely base to the 'i'th child, preferring the next one over the preceding one, but want the closest.
2963
    ///
2964
    /// Note: because the base might be (...), 'mrow_children might be changed so that they are grouped into an mrow.
2965
115
    fn choose_base_of_mmultiscripts(mrow_children: &mut Vec<ChildOfElement>, i: usize) -> usize {
2966
      // We already know there are no empty scripts to the left (because we find first empty base from left to right).
2967
      // However, there may be some empty bases before we get to real base on the right.
2968
115
      let script_element_base = as_element(as_element(mrow_children[i]).children()[0]);
2969
115
      let mut likely_postscript = script_element_base.attribute(MHCHEM_MMULTISCRIPTS_HACK).is_some() && 
i > 0103
;
2970
115
      if likely_postscript {
2971
86
        let base_of_postscript = as_element(mrow_children[i-1]);
2972
86
        if name(base_of_postscript) != "mi" || 
likely_chem_element(base_of_postscript) < 050
{
2973
36
          likely_postscript = false;  // base for potential postscript doesn't look reasonable -- consider it a prescript
2974
50
        }
2975
29
      }
2976
115
      if i+1 < mrow_children.len() && 
!likely_postscript107
&&
is_child_simple_base61
(
mrow_children[i+1]61
) {
2977
61
        return i+1;
2978
54
      }
2979
54
      if i > 0 {
2980
54
        if let Some(
i_start2
) = is_grouped_base(&mrow_children[..i]) {
2981
2
          assert!(i_start < i-1);  // should be at least two children (open and close)
2982
          // create a new mrow, add the grouped children to it, then drain all but the first of them from the original mrow vec.
2983
          // stick the mrow into the first of them -- this is the base
2984
2
          let new_mrow = create_mathml_element(&as_element(mrow_children[0]).document(), "mrow");
2985
2
          new_mrow.set_attribute_value(CHANGED_ATTR, ADDED_ATTR_VALUE);
2986
8
          for &child in 
&2
mrow_children2
[i_start..i] {
2987
8
            new_mrow.append_child(child);
2988
8
          }
2989
2
          mrow_children.drain(i_start+1..i);
2990
2
          mrow_children[i_start] = ChildOfElement::Element(new_mrow);
2991
2
          return i_start;
2992
52
        }
2993
52
        if is_child_simple_base(mrow_children[i-1]) {
2994
52
          return i-1;
2995
0
        }
2996
0
      }
2997
2998
      // base very likely after multiple scripts to the right
2999
0
      for (i_base, &child) in mrow_children.iter().enumerate().skip(i+1) {
3000
0
        if is_child_simple_base(child) {
3001
0
            return i_base;
3002
        } else {
3003
0
          let child = as_element(child);
3004
0
          let child_name = name(child);
3005
0
          if !(child_name == "msub" || child_name == "msup" || child_name == "msubsup") {
3006
0
            break;
3007
0
          }
3008
        }
3009
      }
3010
      // didn't find any good candidates for a base -- pick something valid
3011
0
      assert!(mrow_children.len() > i);
3012
0
      return i;
3013
      
3014
      
3015
113
      fn is_child_simple_base(child: ChildOfElement) -> bool {
3016
113
        let mut child = as_element(child);
3017
113
        let child_name = name(child);
3018
113
        if child_name == "msub" || 
child_name == "msup"108
||
child_name == "msubsup"108
{
3019
5
          child = as_element(child.children()[0]);
3020
108
        }
3021
3022
113
        return is_leaf(child) && !CanonicalizeContext::is_empty_element(child);  // a little overly general (but hopefully doesn't matter)
3023
113
      }
3024
3025
      /// Return the index of the matched open paren/bracket if the last element is a closed paren/bracket
3026
54
      fn is_grouped_base(mrow_children: &[ChildOfElement]) -> Option<usize> {
3027
        // FIX: this really belongs in canonicalization pass, not the clean pass
3028
54
        let i_last = mrow_children.len()-1;
3029
54
        let last_child = get_possible_embellished_node(as_element(mrow_children[i_last]));
3030
54
        if name(last_child) == "mo" &&
3031
3
           CanonicalizeContext::find_operator(None, last_child, None, None, None).is_right_fence() {
3032
6
          for i_child in (
0..i_last2
).
rev2
() {
3033
6
            let child = get_possible_embellished_node(as_element(mrow_children[i_child]));
3034
6
            if name(child) == "mo" &&
3035
2
               CanonicalizeContext::find_operator(None, child, None, None, None).is_left_fence() {
3036
              // FIX: should make sure left and right match. Should also count for nested parens
3037
2
              return Some(i_child);
3038
4
            }
3039
          }
3040
52
        }
3041
52
        return None;
3042
54
      }
3043
115
    }
3044
52.3k
  }
3045
3046
64.1k
  fn canonicalize_mrows<'a>(&self, mathml: Element<'a>) -> Result<Element<'a>> {
3047
64.1k
    let tag_name = name(mathml);
3048
64.1k
    set_mathml_name(mathml, tag_name);  // add namespace
3049
64.1k
    match tag_name {
3050
64.1k
      "mi" | 
"ms"48.7k
|
"mtext"48.7k
|
"mspace"48.3k
=> {
3051
15.8k
        self.canonicalize_plane1(mathml);
3052
15.8k
        return Ok( mathml ); },
3053
48.3k
      "mo" => {
3054
14.6k
        self.canonicalize_plane1(mathml);
3055
14.6k
        self.canonicalize_mo_text(mathml);
3056
14.6k
        return Ok( mathml );
3057
      },
3058
33.7k
      "mn" => {
3059
11.6k
        self.canonicalize_plane1(mathml);
3060
11.6k
        return Ok( mathml );
3061
      },
3062
22.0k
      "mrow" => {
3063
7.48k
        return self.canonicalize_mrows_in_mrow(mathml);
3064
      },
3065
      _ => {
3066
        // recursively try to make mrows in other structures (eg, num/denom in fraction)
3067
14.6k
        let mut new_children = Vec::with_capacity(mathml.children().len());
3068
21.5k
        for child in 
mathml14.6k
.
children14.6k
() {
3069
21.5k
          match child {
3070
21.5k
            ChildOfElement::Element(e) => {
3071
21.5k
              new_children.push( ChildOfElement::Element(self.canonicalize_mrows(e)
?0
));
3072
            },
3073
0
            ChildOfElement::Text(t) => {
3074
0
              if mathml.children().len() != 1 {
3075
0
                bail!("Text '{}' found with more than one child in element '{}'", t.text(), tag_name);
3076
0
              }
3077
0
              return Ok( mathml );
3078
            },
3079
0
            _ => bail!("Should have been an element or text in '{}'", tag_name),
3080
          }
3081
        }
3082
14.6k
        mathml.replace_children(new_children);
3083
14.6k
        return Ok( mathml );
3084
      },
3085
    }
3086
64.1k
  }
3087
    
3088
1.91k
  fn potentially_lift_script<'a>(&self, mrow: Element<'a>) -> Element<'a> {
3089
1.91k
    if name(mrow) != "mrow" {
3090
0
      return mrow;
3091
1.91k
    }
3092
1.91k
    let mut mrow_children = mrow.children();
3093
1.91k
    let first_child = as_element(mrow_children[0]);
3094
1.91k
    let last_child = as_element(mrow_children[mrow_children.len()-1]);
3095
1.91k
    let last_child_name = name(last_child);
3096
3097
1.91k
    if name(first_child) == "mo" && 
is_fence1.91k
(
first_child1.91k
) &&
3098
1.91k
       (last_child_name == "msub" || last_child_name == "msup" || 
last_child_name == "msubsup"1.89k
) {
3099
19
      let base = as_element(last_child.children()[0]);
3100
19
      if !(name(base) == "mo" && is_fence(base)) {
3101
0
        return mrow; // not a case we are interested in
3102
19
      }
3103
      // else drop through
3104
    } else {
3105
1.89k
      return mrow; // not a case we are interested in
3106
    }
3107
3108
19
    let script = last_child; // better name now that we know what it is
3109
19
    let mut script_children = script.children();
3110
19
    let close_fence = script_children[0];
3111
19
    let mrow_children_len = mrow_children.len();     // rust complains about a borrow after move if we don't store this first
3112
19
    mrow_children[mrow_children_len-1] = close_fence;     // make the mrow hold the fences
3113
19
    mrow.replace_children(mrow_children);
3114
    // make the mrow the child of the script
3115
19
    script_children[0] = ChildOfElement::Element(mrow);
3116
19
    script.replace_children(script_children);
3117
19
    return script;
3118
1.91k
  }
3119
3120
  /// Map names to start of Unicode alphanumeric blocks (Roman, digits, Greek)
3121
  /// Don't do this for function names -- for function names, map them back to ASCII
3122
42.1k
  fn canonicalize_plane1<'a>(&self, mi: Element<'a>) -> Element<'a> {
3123
    // if the character shouldn't be mapped, use 0 -- don't use 'A' as ASCII and Greek aren't contiguous
3124
    static MATH_VARIANTS: phf::Map<&str, [u32; 3]> = phf_map! {
3125
      // "normal" -- nothing to do
3126
      "italic" => [0, 0, 0x1D6E2],
3127
      "bold" => [0x1D400, 0x1D7CE, 0x1D6A8],
3128
      "bold-italic" => [0x1D468, 0x1D7CE, 0x1D71C],
3129
      "double-struck" => [0x1D538, 0x1D7D8, 0],
3130
      "bold-fraktur" => [0x1D56C, 0, 0x1D6A8],
3131
      "script" => [0x1D49C, 0, 0],
3132
      "bold-script" => [0x1D4D0, 0, 0x1D6A8],
3133
      "fraktur" => [0x1D504, 0, 0],
3134
      "sans-serif" => [0x1D5A0, 0x1D7E2, 0],
3135
      "bold-sans-serif" => [0x1D5D4, 0x1D7EC, 0x1D756],
3136
      "sans-serif-italic" => [0x1D608, 0x1D7E2, 0],
3137
      "sans-serif-bold-italic" => [0x1D63C, 0x1D7EC, 0x1D790],
3138
      "monospace" => [0x1D670, 0x1D7F6, 0],
3139
    };
3140
3141
42.1k
    return crate::definitions::SPEECH_DEFINITIONS.with(|defs| {
3142
      // names that are always function names (e.g, "sin" and "log")
3143
42.1k
      let defs = defs.borrow();
3144
42.1k
      let 
names42.1k
= match defs.get_hashset("FunctionNames") {
3145
42.1k
        Some(hs) => hs,
3146
3
        None => return mi,  // happens in some canonicalize tests but not in real use
3147
      };
3148
3149
3150
42.1k
      let mi_text = as_text(mi);
3151
42.1k
      let variant = mi.attribute_value("mathvariant");
3152
3153
42.1k
      if names.contains(mi_text) {
3154
791
        return mi;   // avoid mapping mathvariant for function names
3155
41.3k
      }
3156
      // function name might be (wrongly) set to italic math alphanumeric chars, including bold italic
3157
41.3k
      if let Some(
ascii_text12.6k
) = CanonicalizeContext::math_alphanumeric_to_ascii(mi_text)
3158
12.6k
        && names.contains(&ascii_text) {
3159
3
          mi.set_text(&ascii_text);
3160
3
          return mi
3161
41.3k
        }
3162
3163
41.3k
      if variant.is_none() {
3164
40.3k
        return mi;
3165
952
      }
3166
3167
952
      let new_text = match MATH_VARIANTS.get(variant.unwrap()) {
3168
755
        None => mi_text.to_string(),
3169
197
        Some(start) => shift_text(mi_text, start),
3170
      };
3171
      // mi.remove_attribute("mathvariant");  // leave attr -- for Nemeth, there are italic digits etc that don't have Unicode points
3172
952
      mi.set_text(&new_text);
3173
952
      return mi;
3174
42.1k
    });
3175
3176
197
    fn shift_text(old_text: &str, char_mapping: &[u32; 3]) -> String {
3177
      // if there is no block for something, use 'a', 'A', 0 as that will be a no-op
3178
      struct Offsets {
3179
        ch: u32,
3180
        table: usize, 
3181
      }
3182
      static SHIFT_AMOUNTS: phf::Map<char, Offsets> = phf_map! {
3183
        'A' => Offsets{ ch: 0, table: 0},
3184
        'B' => Offsets{ ch: 1, table: 0},
3185
        'C' => Offsets{ ch: 2, table: 0},
3186
        'D' => Offsets{ ch: 3, table: 0},
3187
        'E' => Offsets{ ch: 4, table: 0},
3188
        'F' => Offsets{ ch: 5, table: 0},
3189
        'G' => Offsets{ ch: 6, table: 0},
3190
        'H' => Offsets{ ch: 7, table: 0},
3191
        'I' => Offsets{ ch: 8, table: 0},
3192
        'J' => Offsets{ ch: 9, table: 0},
3193
        'K' => Offsets{ ch: 10, table: 0},
3194
        'L' => Offsets{ ch: 11, table: 0},
3195
        'M' => Offsets{ ch: 12, table: 0},
3196
        'N' => Offsets{ ch: 13, table: 0},
3197
        'O' => Offsets{ ch: 14, table: 0},
3198
        'P' => Offsets{ ch: 15, table: 0},
3199
        'Q' => Offsets{ ch: 16, table: 0},
3200
        'R' => Offsets{ ch: 17, table: 0},
3201
        'S' => Offsets{ ch: 18, table: 0},
3202
        'T' => Offsets{ ch: 19, table: 0},
3203
        'U' => Offsets{ ch: 20, table: 0},
3204
        'V' => Offsets{ ch: 21, table: 0},
3205
        'W' => Offsets{ ch: 22, table: 0},
3206
        'X' => Offsets{ ch: 23, table: 0},
3207
        'Y' => Offsets{ ch: 24, table: 0},
3208
        'Z' => Offsets{ ch: 25, table: 0},
3209
        'a' => Offsets{ ch: 26, table: 0},
3210
        'b' => Offsets{ ch: 27, table: 0},
3211
        'c' => Offsets{ ch: 28, table: 0},
3212
        'd' => Offsets{ ch: 29, table: 0},
3213
        'e' => Offsets{ ch: 30, table: 0},
3214
        'f' => Offsets{ ch: 31, table: 0},
3215
        'g' => Offsets{ ch: 32, table: 0},
3216
        'h' => Offsets{ ch: 33, table: 0},
3217
        'i' => Offsets{ ch: 34, table: 0},
3218
        'j' => Offsets{ ch: 35, table: 0},
3219
        'k' => Offsets{ ch: 36, table: 0},
3220
        'l' => Offsets{ ch: 37, table: 0},
3221
        'm' => Offsets{ ch: 38, table: 0},
3222
        'n' => Offsets{ ch: 39, table: 0},
3223
        'o' => Offsets{ ch: 40, table: 0},
3224
        'p' => Offsets{ ch: 41, table: 0},
3225
        'q' => Offsets{ ch: 42, table: 0},
3226
        'r' => Offsets{ ch: 43, table: 0},
3227
        's' => Offsets{ ch: 44, table: 0},
3228
        't' => Offsets{ ch: 45, table: 0},
3229
        'u' => Offsets{ ch: 46, table: 0},
3230
        'v' => Offsets{ ch: 47, table: 0},
3231
        'w' => Offsets{ ch: 48, table: 0},
3232
        'x' => Offsets{ ch: 49, table: 0},
3233
        'y' => Offsets{ ch: 50, table: 0},
3234
        'z' => Offsets{ ch: 51, table: 0},
3235
        '0' => Offsets{ ch: 0, table: 1},
3236
        '1' => Offsets{ ch: 1, table: 1},
3237
        '2' => Offsets{ ch: 2, table: 1},
3238
        '3' => Offsets{ ch: 3, table: 1},
3239
        '4' => Offsets{ ch: 4, table: 1},
3240
        '5' => Offsets{ ch: 5, table: 1},
3241
        '6' => Offsets{ ch: 6, table: 1},
3242
        '7' => Offsets{ ch: 7, table: 1},
3243
        '8' => Offsets{ ch: 8, table: 1},
3244
        '9' => Offsets{ ch: 9, table: 1},
3245
        'Α' => Offsets{ ch: 0, table: 2},
3246
        'Β' => Offsets{ ch: 1, table: 2},
3247
        'Γ' => Offsets{ ch: 2, table: 2},
3248
        'Δ' => Offsets{ ch: 3, table: 2},
3249
        'Ε' => Offsets{ ch: 4, table: 2},
3250
        'Ζ' => Offsets{ ch: 5, table: 2},
3251
        'Η' => Offsets{ ch: 6, table: 2},
3252
        'Θ' => Offsets{ ch: 7, table: 2},
3253
        'Ι' => Offsets{ ch: 8, table: 2},
3254
        'Κ' => Offsets{ ch: 9, table: 2},
3255
        'Λ' => Offsets{ ch: 10, table: 2},
3256
        'Μ' => Offsets{ ch: 11, table: 2},
3257
        'Ν' => Offsets{ ch: 12, table: 2},
3258
        'Ξ' => Offsets{ ch: 13, table: 2},
3259
        'Ο' => Offsets{ ch: 14, table: 2},
3260
        'Π' => Offsets{ ch: 15, table: 2},
3261
        'Ρ' => Offsets{ ch: 16, table: 2},
3262
        'ϴ' => Offsets{ ch: 17, table: 2},
3263
        'Σ' => Offsets{ ch: 18, table: 2},
3264
        'Τ' => Offsets{ ch: 19, table: 2},
3265
        'Υ' => Offsets{ ch: 20, table: 2},
3266
        'Φ' => Offsets{ ch: 21, table: 2},
3267
        'Χ' => Offsets{ ch: 22, table: 2},
3268
        'Ψ' => Offsets{ ch: 23, table: 2},
3269
        'Ω' => Offsets{ ch: 24, table: 2},
3270
        '∇' => Offsets{ ch: 25, table: 2},                
3271
        'α' => Offsets{ ch: 26, table: 2},
3272
        'β' => Offsets{ ch: 27, table: 2},
3273
        'γ' => Offsets{ ch: 28, table: 2},
3274
        'δ' => Offsets{ ch: 29, table: 2},
3275
        'ε' => Offsets{ ch: 30, table: 2},
3276
        'ζ' => Offsets{ ch: 31, table: 2},
3277
        'η' => Offsets{ ch: 32, table: 2},
3278
        'θ' => Offsets{ ch: 33, table: 2},
3279
        'ι' => Offsets{ ch: 34, table: 2},
3280
        'κ' => Offsets{ ch: 35, table: 2},
3281
        'λ' => Offsets{ ch: 36, table: 2},
3282
        'μ' => Offsets{ ch: 37, table: 2},
3283
        'ν' => Offsets{ ch: 38, table: 2},
3284
        'ξ' => Offsets{ ch: 39, table: 2},
3285
        'ο' => Offsets{ ch: 40, table: 2},
3286
        'π' => Offsets{ ch: 41, table: 2},
3287
        'ρ' => Offsets{ ch: 42, table: 2},
3288
        'ς' => Offsets{ ch: 43, table: 2},
3289
        'σ' => Offsets{ ch: 44, table: 2},
3290
        'τ' => Offsets{ ch: 45, table: 2},
3291
        'υ' => Offsets{ ch: 46, table: 2},
3292
        'φ' => Offsets{ ch: 47, table: 2},
3293
        'χ' => Offsets{ ch: 48, table: 2},
3294
        'ψ' => Offsets{ ch: 49, table: 2},
3295
        'ω' => Offsets{ ch: 50, table: 2},
3296
        '∂' => Offsets{ ch: 51, table: 2},
3297
        'ϵ' => Offsets{ ch: 52, table: 2},
3298
        'ϑ' => Offsets{ ch: 53, table: 2},
3299
        'ϰ' => Offsets{ ch: 54, table: 2},
3300
        'ϕ' => Offsets{ ch: 55, table: 2},
3301
        'ϱ' => Offsets{ ch: 56, table: 2},
3302
        'ϖ' => Offsets{ ch: 57, table: 2},
3303
      };
3304
197
      let mut new_text = String::new();
3305
321
      for ch in 
old_text197
.
chars197
() {
3306
321
        new_text.push(
3307
321
          match SHIFT_AMOUNTS.get(&ch) {
3308
            None => {
3309
              // there are two digamma chars only in the bold mapping. Handled here
3310
71
              if char_mapping[2] == 0x1D6A8 {
3311
43
                match ch {
3312
1
                  'Ϝ' => '𝟊',
3313
1
                  'ϝ' => '𝟋',
3314
41
                  _   => ch,
3315
                }
3316
              } else {
3317
28
                ch
3318
              }
3319
            },
3320
250
            Some(offsets) => {
3321
250
              let start_of_mapping = char_mapping[offsets.table];
3322
250
              if start_of_mapping == 0 {
ch37
} else {
shift_char213
(
start_of_mapping + offsets.ch213
)}
3323
            }
3324
          }
3325
        )
3326
      }
3327
197
      return new_text;
3328
3329
213
      fn shift_char(ch: u32) -> char {
3330
        // there are "holes" in the math alphanumerics due to legacy issues
3331
        // this table maps the holes to their legacy location
3332
        static EXCEPTIONS: phf::Map<u32, u32> = phf_map! {
3333
          0x1D455u32 => 0x210Eu32,
3334
          0x1D49Du32 => 0x212Cu32,
3335
          0x1D4A0u32 => 0x2130u32,
3336
          0x1D4A1u32 => 0x2131u32,
3337
          0x1D4A3u32 => 0x210Bu32,
3338
          0x1D4A4u32 => 0x2110u32,
3339
          0x1D4A7u32 => 0x2112u32,
3340
          0x1D4A8u32 => 0x2133u32,
3341
          0x1D4ADu32 => 0x211Bu32,
3342
          0x1D4BAu32 => 0x212Fu32,
3343
          0x1D4BCu32 => 0x210Au32,
3344
          0x1D4C4u32 => 0x2134u32,
3345
          0x1D506u32 => 0x212Du32,
3346
          0x1D50Bu32 => 0x210Cu32,
3347
          0x1D50Cu32 => 0x2111u32,
3348
          0x1D515u32 => 0x211Cu32,
3349
          0x1D51Du32 => 0x2128u32,
3350
          0x1D53Au32 => 0x2102u32,
3351
          0x1D53Fu32 => 0x210Du32,
3352
          0x1D545u32 => 0x2115u32,
3353
          0x1D547u32 => 0x2119u32,
3354
          0x1D548u32 => 0x211Au32,
3355
          0x1D549u32 => 0x211Du32,
3356
          0x1D551u32 => 0x2124u32,
3357
        };
3358
                
3359
213
        return unsafe { char::from_u32_unchecked(   // safe because the values are a char or from the table above
3360
213
          match EXCEPTIONS.get(&ch) {
3361
161
            None => ch,
3362
52
            Some(exception_value) => *exception_value,
3363
          }
3364
        ) }
3365
213
      }
3366
197
    }
3367
42.1k
  }
3368
3369
41.5k
  fn math_alphanumeric_to_ascii(input: &str) -> Option<String> {
3370
41.5k
    let mut result = String::with_capacity(input.len());
3371
3372
46.6k
    for c in 
input41.5k
.
chars41.5k
() {
3373
46.6k
      let 
converted18.0k
= match c {
3374
        // Standard ASCII
3375
18.4k
        'a'..='z' | 
'A'..='Z'11.2k
=>
c17.8k
,
3376
        
3377
        // Mathematical Bold (A-Z: U+1D400, a-z: U+1D41A)
3378
482
        '\u{1D400}'..='\u{1D419}' => 
((c as u32 - 0x1D400) as u8 + b'A') as char22
,
3379
460
        '\u{1D41A}'..='\u{1D433}' => 
((c as u32 - 0x1D41A) as u8 + b'a') as char36
,
3380
        
3381
        // Mathematical Italic (A-Z: U+1D434, a-z: U+1D44E)
3382
        // Note: 'h' is missing from this range (U+210E)
3383
424
        '\u{1D434}'..='\u{1D44D}' => 
((c as u32 - 0x1D434) as u8 + b'A') as char10
,
3384
414
        '\u{1D44E}'..='\u{1D467}' => 
((c as u32 - 0x1D44E) as u8 + b'a') as char14
,
3385
        
3386
        // Mathematical Bold Italic (A-Z: U+1D468, a-z: U+1D482)
3387
400
        '\u{1D468}'..='\u{1D481}' => 
((c as u32 - 0x1D468) as u8 + b'A') as char0
,
3388
400
        '\u{1D482}'..='\u{1D49B}' => 
((c as u32 - 0x1D482) as u8 + b'a') as char14
,
3389
3390
        // Mathematical Sans-Serif (A-Z: U+1D5A0, a-z: U+1D5BA)
3391
274
        '\u{1D5A0}'..='\u{1D5B9}' => 
((c as u32 - 0x1D5A0) as u8 + b'A') as char10
,
3392
264
        '\u{1D5BA}'..='\u{1D5D3}' => 
((c as u32 - 0x1D5BA) as u8 + b'a') as char11
,
3393
3394
        // If a character isn't a letter (or supported math letter), return None
3395
28.6k
        _ => return None,
3396
      };
3397
18.0k
      result.push(converted);
3398
    }
3399
3400
12.8k
    Some(result)
3401
41.5k
  }
3402
3403
14.6k
  fn canonicalize_mo_text(&self, mo: Element) {
3404
    // lazy_static! {    (NOTE: std::sync::LazyLock is now used instead)
3405
    //  static ref IS_LIKELY_SCALAR_VARIABLE: Regex = Regex::new("[a-eh-z]").unwrap();
3406
    // }
3407
    
3408
14.6k
    let mut mo_text = as_text(mo);
3409
14.6k
    let parent = get_parent(mo);
3410
14.6k
    let parent_name = name(parent);
3411
14.6k
    let is_base = mo.preceding_siblings().is_empty();
3412
14.6k
    if !is_base && (
parent_name == "mover"1.38k
||
parent_name == "munder"1.09k
||
parent_name == "munderover"1.07k
) {
3413
      // canonicalize various diacritics for munder, mover, munderover
3414
309
      mo_text = match mo_text {
3415
309
        "_" | 
"\u{02C9}"303
|
"\u{0304}"303
|
"\u{0305}"303
|
"\u{332}"303
|
"\u{2212}"302
|
3416
302
        "\u{2010}" | "\u{2011}" | "\u{2012}" | "\u{2013}" | "\u{2014}" | "\u{2015}" | 
"\u{203e}"293
=>
"\u{00AF}"17
,
3417
292
        "\u{02BC}" => 
"`"0
,
3418
292
        "\u{02DC}" | "\u{223C}" => 
"~"0
, // use ASCII for diacriticals
3419
292
        "\u{02C6}"| "\u{0302}" => 
"^"0
,
3420
292
        "\u{0307}" => 
"\u{02D9}"0
, // Nemeth distinguishes this from "." -- \u{02D9} is generated for over dots by most generators
3421
292
        "\u{0308}" => 
"¨"0
,
3422
292
        _ => mo_text,
3423
      }
3424
      // FIX: MathType generates the wrong version of union and intersection ops (binary instead of unary)
3425
14.3k
    } else if !is_base && (
parent_name == "msup"1.07k
||
parent_name == "msubsup"858
) {
3426
227
      mo_text = match mo_text {
3427
227
        "\u{00BA}"| "\u{2092}"| "\u{20D8}"| "\u{2218}" | 
"\u{25E6}"223
=>
"\u{00B0}"4
, // circle-like objects -> degree
3428
223
        _ => mo_text,
3429
      };
3430
    } else {
3431
14.0k
      mo_text = match mo_text {
3432
14.0k
        "\u{02C9}"| "\u{0304}"| "\u{0305}" => 
"\u{00AF}"0
,
3433
14.0k
        "\u{02DC}" | "~"  => 
"\u{223C}"5
, // for base, use version with prefix and infix
3434
14.0k
        "\u{01C1}" => 
"\u{2016}"0
, // U+2016 is "‖"
3435
3436
14.0k
        _ => mo_text,
3437
      };
3438
    };
3439
14.6k
    if mo_text == "\u{2212}" {
3440
314
      mo_text = "-";
3441
14.2k
    }
3442
14.6k
    mo.set_text(mo_text);
3443
14.6k
  }
3444
  
3445
    
3446
  // Find the operator associated with the 'mo_node'
3447
  // This is complicated by potentially needing to distinguish between the
3448
  //   prefix, infix, or postfix version of the operator.
3449
  // To figure out prefix, we need to look at the node on the left; for postfix, we need to look to the left
3450
  // If the node of the left has been parsed, then this works.
3451
  // For example, suppose we want to determine if the "+" in 'x < n!+1' is prefix or infix.
3452
  //   If we simply looked left without parsing, we'd see an operator and choose prefix unless we could figure out that
3453
  //   that "!" was postfix.  But if it had been parsed, we'd see an mrow (operand) and tree "+" as infix (as it should).
3454
  // The same problem applies on the right for postfix operators, but a problem is rare for those
3455
  //   e.g., n!!n -- ((n!)!)*n or (n!)*(!n)  -- the latter doesn't make semantic sense though
3456
  // FIX:  the above ignores mspace and other nodes that need to be skipped to determine the right node to determine airity
3457
  // FIX:  the postfix problem above should be addressed
3458
19.4k
  fn find_operator<'a>(context: Option<&CanonicalizeContext>, mo_node: Element<'a>, previous_operator: Option<&'static OperatorInfo>,
3459
19.4k
            previous_node: Option<Element<'a>>, next_node: Option<Element<'a>>) -> &'static OperatorInfo {
3460
    // get the unicode value and return the OpKeyword associated with it
3461
19.4k
    assert!( name(mo_node) == "mo");
3462
  
3463
    // if a form has been given, that takes precedence
3464
19.4k
    let form = mo_node.attribute_value("form");
3465
19.4k
    let op_type =  match form {
3466
19.4k
      None => match context {
3467
5.50k
        None => OperatorTypes::POSTFIX,   // what compute_type_from_position returns when the other args to this are all None
3468
13.9k
        Some(context) => compute_type_from_position(context, previous_operator, previous_node, next_node),
3469
      },
3470
10
      Some(form) => match form.to_lowercase().as_str() {
3471
10
        "prefix" => 
OperatorTypes::PREFIX4
,
3472
6
        "postfix" => 
OperatorTypes::POSTFIX2
,
3473
4
        _ => OperatorTypes::INFIX,
3474
      }
3475
    };  
3476
  
3477
19.4k
    let found_op_info = if mo_node.attribute_value(CHEMICAL_BOND).is_some() {
3478
112
      Some(&IMPLIED_CHEMICAL_BOND)
3479
    } else {
3480
19.3k
      OPERATORS.get(as_text(mo_node))
3481
    };
3482
19.4k
    if found_op_info.is_none() {
3483
      // no known operator -- return the unknown operator with the correct "fix" type
3484
49
      return op_not_in_operator_dictionary(op_type);
3485
19.4k
    }
3486
  
3487
19.4k
    let found_op_info = found_op_info.unwrap();
3488
19.4k
    let matching_op_info = find_operator_info(found_op_info, op_type, form.is_some());
3489
19.4k
    if ptr_eq(matching_op_info, &ILLEGAL_OPERATOR_INFO) {
3490
0
      return op_not_in_operator_dictionary(op_type);
3491
    } else {
3492
19.4k
      return matching_op_info;
3493
    }
3494
3495
  
3496
13.9k
    fn compute_type_from_position<'a>(context: &CanonicalizeContext, previous_operator: Option<&'static OperatorInfo>, previous_node: Option<Element<'a>>, next_node: Option<Element<'a>>) -> OperatorTypes {
3497
      // based on choices, pick one that fits the context
3498
      // if there isn't an obvious one, we have parsed the left, but not the right, so discount that
3499
    
3500
      // Trig functions have some special syntax
3501
      // We need to treat '-' as prefix for things like "sin -2x"
3502
      // Need to be careful because (sin - cos)(x) needs an infix '-'
3503
      // Return either the prefix or infix version of the operator
3504
13.9k
      if next_node.is_some() &&
3505
11.9k
         context.is_function_name(get_possible_embellished_node(next_node.unwrap()), None) == FunctionNameCertainty::True {
3506
260
        return OperatorTypes::INFIX;
3507
13.6k
      }
3508
13.6k
      if previous_node.is_some() &&
3509
11.1k
         context.is_function_name(get_possible_embellished_node(previous_node.unwrap()), None) == FunctionNameCertainty::True {
3510
207
        return OperatorTypes::PREFIX;
3511
13.4k
      }
3512
    
3513
      // after that special case, start with the obvious cases...
3514
13.4k
      let operand_on_left = previous_operator.is_none() || 
previous_operator.unwrap()2.94k
.
is_postfix2.94k
(); // operand or postfix operator
3515
13.4k
      let operand_on_right = next_node.is_some() && 
name11.5k
(
get_possible_embellished_node11.5k
(next_node.unwrap())) !="mo"; // FIX: could improve by checking if it is a prefix op
3516
    
3517
13.4k
      if operand_on_left && 
operand_on_right10.5k
{
3518
8.19k
        return OperatorTypes::INFIX; // infix
3519
5.29k
      } else if !operand_on_left && 
operand_on_right2.94k
{
3520
2.75k
        return OperatorTypes::PREFIX; // prefix
3521
2.54k
      } else if operand_on_left && 
!operand_on_right2.34k
{
3522
2.34k
        return OperatorTypes::POSTFIX; // postfix
3523
      } else {
3524
        // either two operators in a row or right hand side not parsed so we don't really know what is right (same is true above)
3525
        // since there is nothing good to return, assume right is an operand after parsing (thus infix case)
3526
196
        return OperatorTypes::INFIX;
3527
      }
3528
13.9k
    }
3529
3530
19.4k
    fn find_operator_info(op_info: &OperatorInfo, op_type: OperatorTypes, from_form_attr: bool) -> &OperatorInfo {
3531
19.4k
      if op_info.is_operator_type(op_type) {
3532
12.9k
        return op_info;
3533
6.45k
      } else if let Some(
next_op_info1.64k
) = op_info.next {
3534
1.64k
        if next_op_info.is_operator_type(op_type) {
3535
730
          return next_op_info;
3536
915
        } else if let Some(
last_op_info256
) = next_op_info.next
3537
256
          && last_op_info.is_operator_type(op_type) {
3538
256
            return last_op_info;
3539
659
          }
3540
4.81k
      }
3541
3542
      // didn't find op_info that matches -- if type is not forced, then return first value (any is probably ok) 
3543
5.47k
      return if from_form_attr {
&ILLEGAL_OPERATOR_INFO0
} else {op_info};
3544
19.4k
    }
3545
  
3546
49
    fn op_not_in_operator_dictionary(op_type: OperatorTypes) -> &'static OperatorInfo {
3547
49
      return match op_type {
3548
16
        OperatorTypes::PREFIX => &DEFAULT_OPERATOR_INFO_PREFIX,
3549
9
        OperatorTypes::POSTFIX => &DEFAULT_OPERATOR_INFO_POSTFIX,
3550
24
        _ => &DEFAULT_OPERATOR_INFO_INFIX, // should only be infix
3551
      };
3552
49
    }
3553
19.4k
  }
3554
  
3555
13.9k
  fn n_vertical_bars_on_right(&self, remaining_children: &[ChildOfElement], vert_bar_ch: &str) -> usize {
3556
    // return the number of children that match 'vert_bar_op' not counting the first element
3557
13.9k
    let mut n = 0;
3558
149k
    for child_of_element in 
remaining_children13.9k
{
3559
149k
      let child = as_element(*child_of_element);
3560
149k
      if name(child) == "mo" {
3561
49.9k
        let operator_str = as_text(child);
3562
49.9k
        if operator_str == vert_bar_ch {
3563
42.7k
          n += 1;
3564
42.7k
        
}7.25k
3565
99.9k
      }
3566
    }
3567
13.9k
    return n;
3568
13.9k
  }
3569
  
3570
  
3571
13.9k
  fn determine_vertical_bar_op<'a>(&self, original_op: &'static OperatorInfo, mo_node: Element<'a>, 
3572
13.9k
        next_child: Option<Element<'a>>,
3573
13.9k
        parse_stack: &'a mut Vec<StackInfo>,
3574
13.9k
        n_vertical_bars_on_right: usize) -> &'static OperatorInfo {
3575
    // if in a prefix location, it is a left fence
3576
    // note:  if there is an operator on the top of the stack, it wants an operand (otherwise it would have been reduced)
3577
13.9k
    let operator_str = as_text(mo_node);
3578
13.9k
    let found_op_info = OPERATORS.get(operator_str);
3579
13.9k
    if found_op_info.is_none() {
3580
48
      return original_op;
3581
13.8k
    }
3582
13.8k
    let op = found_op_info.unwrap();
3583
13.8k
    if !AMBIGUOUS_OPERATORS.contains(operator_str) {
3584
      // debug!("   op is not ambiguous");
3585
13.4k
      return original_op;
3586
401
    };
3587
  
3588
401
    let operator_versions = OperatorVersions::new(op);
3589
401
    if let Some(
prefix360
) = operator_versions.prefix &&
3590
360
       (top(parse_stack).last_child_in_mrow().is_none() || 
!top(parse_stack).is_operand260
) {
3591
      // debug!("   is prefix");
3592
115
      return prefix;
3593
286
    }
3594
    
3595
    // We have either a right fence or an infix operand at the top of the stack
3596
    // If this is already parsed, we'd look to the right to see if there is an operand after this child.
3597
    // But it isn't parsed and there might be a prefix operator which will eventually become an operand, so it is tricky.
3598
    // It is even trickier because we might have an implicit times, so we can't really tell
3599
    // For example:  |x|y|z| which can be '|x| y |z|' or '|x |y| z|', or even | (x|y)|z |'
3600
    // We can't really know what is intended (without @intent).
3601
    // It seems like the case where it could be paired with a matching vertical bar as what most people would choose, so we favor that.
3602
  
3603
    // If there is a matching open vertical bar, it is either at the top of the stack or the entry just below the top
3604
3605
286
    let has_left_match = if let Some(
op_prefix245
) = operator_versions.prefix {
3606
245
      if ptr_eq(top(parse_stack).op_pair.op, op_prefix) {   // match at top of stack? (empty matching bars)
3607
109
        true
3608
136
      } else if parse_stack.len() > 2 {
3609
        // matching op is below top (operand between matching bars) -- pop, peek, push
3610
36
        let old_top = parse_stack.pop().unwrap();   
3611
36
        let top_op = top(parse_stack).op_pair.op;                                 // can only access top, so we need to pop off top and push back later
3612
36
        parse_stack.push(old_top);
3613
36
        ptr_eq(top_op, op_prefix)
3614
      } else {
3615
100
        false
3616
      }
3617
    } else {
3618
41
      false
3619
    };
3620
286
    if let Some(
postfix245
) =operator_versions.postfix && (
next_child245
.
is_none245
() ||
has_left_match130
) {
3621
      // last child in row (must be a close) or we have a left match
3622
      // debug!("   is postfix");
3623
136
      return postfix;
3624
150
    } else if next_child.is_none() {
3625
      // operand on left, so prefer infix version
3626
18
      return if let Some(infix) = operator_versions.infix {infix} else {
op0
};
3627
132
    }
3628
  
3629
132
    let next_child = next_child.unwrap();
3630
132
    if let Some(
prefix109
) = operator_versions.prefix &&
(n_vertical_bars_on_right & 0x1 != 0)109
{
3631
      //  ("   is prefix");
3632
3
      return prefix;   // odd number of vertical bars remain, so consider this the start of a pair
3633
129
    }
3634
  
3635
129
    let next_child = get_possible_embellished_node(next_child);
3636
129
    let next_child_op = if name(next_child) != "mo" {
3637
128
        None
3638
      } else {
3639
1
        let next_next_children = next_child.following_siblings();
3640
1
        let next_next_child = if next_next_children.is_empty() { 
None0
} else { Some( as_element(next_next_children[0]) )};
3641
1
        Some( CanonicalizeContext::find_operator(Some(self), next_child, operator_versions.infix,
3642
1
                  top(parse_stack).last_child_in_mrow(), next_next_child) )
3643
      };
3644
                          
3645
    // If the next child is a prefix op or a left fence, it will reduce to an operand, so don't consider it an operator
3646
129
    if next_child_op.is_some() && 
!next_child_op.unwrap().is_left_fence()1
&&
!next_child_op.unwrap().is_prefix()0
{
3647
0
      if let Some(postfix) =operator_versions.postfix {
3648
        // debug!("   is postfix");
3649
0
        return postfix; 
3650
0
      }
3651
129
    } else if let Some(infix) = operator_versions.infix {
3652
      // debug!("   is infix");
3653
129
      return infix; 
3654
0
    }
3655
  
3656
    // nothing good to match
3657
0
    return op;
3658
13.9k
  }
3659
3660
3661
  // return FunctionNameCertainty::False or Maybe if 'node' is a chemical element and is followed by a state (solid, liquid, ...)
3662
  //  in other words, we are certain this can't be a function since it looks like it is or might be chemistry
3663
1.71k
  fn is_likely_chemical_state<'a>(&self, node: Element<'a>, right_sibling: Element<'a>) -> FunctionNameCertainty {
3664
1.71k
    assert_eq!(name(get_parent(node)), "mrow"); // should be here because we are parsing an mrow
3665
  
3666
    // debug!("   in is_likely_chemical_state: '{}'?",element_summary(node));
3667
1.71k
    let node_chem_likelihood= node.attribute_value(MAYBE_CHEMISTRY);
3668
1.71k
    if node.attribute(MAYBE_CHEMISTRY).is_none() {
3669
1.16k
      return FunctionNameCertainty::True;
3670
549
    }
3671
3672
549
    if name(right_sibling) == "mrow" {    // clean_chemistry_mrow made sure any state-like structure is an mrow
3673
75
      let state_likelihood = likely_chem_state(right_sibling);
3674
75
      if state_likelihood > 0 {
3675
49
        right_sibling.set_attribute_value(MAYBE_CHEMISTRY, state_likelihood.to_string().as_str());
3676
        // at this point, we know both node and right_sibling are positive, so we have at least a maybe
3677
49
        if state_likelihood + node_chem_likelihood.unwrap().parse::<i32>().unwrap() > 2 {
3678
49
          return FunctionNameCertainty::False;
3679
        } else {
3680
0
          return FunctionNameCertainty::Maybe
3681
        }
3682
26
      }
3683
474
    }
3684
3685
500
    return FunctionNameCertainty::True;
3686
1.71k
  }
3687
  
3688
  // Try to figure out whether an <mi> is a function name or not.
3689
  // There are two important cases depending upon whether parens/brackets are used or not.
3690
  // E.g, sin x and f(x)
3691
  // 1. If parens follow the name, then we use a more inclusive set of heuristics as it is more likely a function
3692
  // The heuristics used are:
3693
  //   - it is on the list of known function names (e.g., sin" and "log")
3694
  //   - it is on the list of likely function names (e.g, f, g, h)
3695
  //   - multi-char names that begin with a capital letter (e.g, "Tr")
3696
  //   - there is a single token inside the parens (why else would someone use parens), any name (e.g, a(x))
3697
  //   - if there are multiple comma-separated args
3698
  //
3699
  // 2. If there are no parens, then only names on the known function list are used (e.g., "sin x")
3700
  //
3701
  // If the name if followed by parens but doesn't fit into the above categories, we return a "maybe"
3702
32.0k
  fn is_function_name<'a>(&self, node: Element<'a>, right_siblings: Option<&[ChildOfElement<'a>]>) -> FunctionNameCertainty {
3703
32.0k
    let base_of_name = get_possible_embellished_node(node);
3704
  
3705
    // actually only 'mi' should be legal here, but some systems used 'mtext' for multi-char variables
3706
    // FIX: need to allow for composition of function names. E.g, (f+g)(x) and (f^2/g)'(x)
3707
32.0k
    let node_name = name(base_of_name);
3708
32.0k
    if node_name != "mi" && 
node_name != "mtext"15.7k
{
3709
15.4k
      return FunctionNameCertainty::False;
3710
16.6k
    }
3711
    // whitespace is sometimes added to the mi since braille needs it, so do a trim here to get function name
3712
16.6k
    let base_name = as_text(base_of_name).trim();
3713
16.6k
    if base_name.is_empty() {
3714
2
      return FunctionNameCertainty::False;
3715
16.6k
    }
3716
    // debug!("    is_function_name({}), {} following nodes", base_name, if right_siblings.is_none() {"No".to_string()} else {right_siblings.unwrap().len().to_string()});
3717
16.6k
    return crate::definitions::SPEECH_DEFINITIONS.with(|defs| {
3718
      // names that are always function names (e.g, "sin" and "log")
3719
16.6k
      let defs = defs.borrow();
3720
16.6k
      let names = defs.get_hashset("FunctionNames").unwrap();
3721
      // UEB seems to think "Sin" (etc) is used for "sin", so we move to lower case
3722
16.6k
      if names.contains(&base_name.to_ascii_lowercase()) {
3723
        // debug!("     ...is in FunctionNames");
3724
1.02k
        return FunctionNameCertainty::True; // always treated as function names
3725
15.5k
      }
3726
3727
      // We include shapes as function names so that △ABC makes sense since △ and
3728
      //   the other shapes are not in the operator dictionary
3729
15.5k
      let shapes = defs.get_hashset("GeometryShapes").unwrap();
3730
15.5k
      if shapes.contains(base_name) {
3731
23
        return FunctionNameCertainty::True; // always treated as function names
3732
15.5k
      }
3733
  
3734
15.5k
      if right_siblings.is_none() {
3735
13.8k
        return FunctionNameCertainty::False; // only accept known names, which is tested above
3736
1.71k
      }
3737
3738
      // make sure that what follows starts and ends with parens/brackets
3739
1.71k
      assert_eq!(name(get_parent(node)), "mrow");
3740
1.71k
      let right_siblings = right_siblings.unwrap();
3741
1.71k
      let non_whitespace = right_siblings.iter().enumerate()
3742
1.71k
            .find(|&(_, child)| {
3743
1.71k
              let child = as_element(*child);
3744
1.71k
              name(child) != "mtext" || 
!as_text(child).trim().is_empty()54
3745
1.71k
            });
3746
1.71k
      let right_siblings = if let Some( (i, _) ) = non_whitespace {&right_siblings[i..]} else {
right_siblings0
};
3747
1.71k
      if right_siblings.is_empty() {
3748
        // debug!("     ...right siblings not None, but zero of them");
3749
0
        return FunctionNameCertainty::False;
3750
1.71k
      }
3751
3752
1.71k
      let first_child = as_element(right_siblings[0]);
3753
          
3754
      // clean_chemistry wrapped up a state in an mrow and this is assumed by is_likely_chemical_state()
3755
1.71k
      let chem_state_certainty = self.is_likely_chemical_state(node, first_child);
3756
1.71k
      if chem_state_certainty != FunctionNameCertainty::True {
3757
        // debug!("      ...is_likely_chemical_state says it is a function ={:?}", chem_state_certainty);
3758
49
        return chem_state_certainty;
3759
1.66k
      }
3760
3761
1.66k
      if name(first_child) == "mrow" && 
is_left_paren238
(
as_element238
(
first_child.children()[0]238
)) {
3762
        // debug!("     ...trying again after expanding mrow");
3763
235
        return self.is_function_name(node, Some(&first_child.children()));
3764
1.43k
      }
3765
3766
1.43k
      if right_siblings.len() < 2 {
3767
        // debug!("     ...not enough right siblings");
3768
542
        return FunctionNameCertainty::False; // can't be (...)
3769
892
      }
3770
3771
      // at least two siblings are this point -- check that they are parens/brackets
3772
      // we can only check the open paren/bracket because the right side is unparsed and we don't know the close location
3773
892
      let first_sibling = as_element(right_siblings[0]);
3774
892
      if name(first_sibling) != "mo"  || 
!is_left_paren(first_sibling)384
// '(' or '['
3775
      {
3776
        // debug!("     ...first sibling is not '(' or '['");
3777
522
        return FunctionNameCertainty::False;
3778
370
      }
3779
  
3780
370
      let likely_names = defs.get_hashset("LikelyFunctionNames").unwrap();
3781
370
      if likely_names.contains(base_name) {
3782
206
        return FunctionNameCertainty::True; // don't bother checking contents of parens, consider these as function names
3783
164
      }
3784
  
3785
164
      if is_single_arg(as_text(first_sibling), &right_siblings[1..]) {
3786
        // debug!("      ...is single arg");
3787
64
        return FunctionNameCertainty::True; // if there is only a single arg, why else would you use parens?
3788
100
      };
3789
3790
100
      if is_comma_arg(as_text(first_sibling), &right_siblings[1..]) {
3791
        // debug!("      ...is comma arg");
3792
2
        return FunctionNameCertainty::True; // if there is only a single arg, why else would you use parens?
3793
98
      };
3794
  
3795
      // FIX: should really make sure all the args are marked as MAYBE_CHEMISTRY, but we don't know the matching close paren/bracket
3796
98
      if node.attribute(MAYBE_CHEMISTRY).is_some() &&
3797
34
         as_element(right_siblings[1]).attribute(MAYBE_CHEMISTRY).is_some() {
3798
1
        return FunctionNameCertainty::False;
3799
97
      }
3800
  
3801
      // Names like "Tr" are likely function names, single letter names like "M" or "J" are iffy
3802
      // This needs to be after the chemical state check above to rule out Cl(g), etc
3803
      // This would be better if it were part of 'likely_names' as "[A-Za-z]+", but reg exprs don't work in HashSets.
3804
      // FIX: create our own struct and write appropriate traits for it and then it could work
3805
97
      let mut chars = base_name.chars();
3806
97
      let first_char = chars.next().unwrap();   // we know there is at least one byte in it, hence one char
3807
97
      if chars.next().is_some() && 
first_char4
.
is_uppercase4
() {
3808
        // debug!("      ...is uppercase name");
3809
4
        return FunctionNameCertainty::True;
3810
93
      }
3811
3812
      // debug!("      ...didn't match options to be a function");
3813
      // debug!("Right siblings:\n{}  ", right_siblings.iter().map(|&child| mml_to_string(as_element(child))).collect::<Vec<String>>().join("\n  "));
3814
93
      return if is_name_inside_parens(base_name, right_siblings) {
FunctionNameCertainty::False5
} else {
FunctionNameCertainty::Maybe88
};
3815
16.6k
    });
3816
  
3817
164
    fn is_single_arg(open: &str, following_nodes: &[ChildOfElement]) -> bool {
3818
      // following_nodes are nodes after "("
3819
164
      if following_nodes.is_empty() {
3820
0
        return true;   // "a(" might or might not be a function call -- treat as "is" because we can't see more 
3821
164
      }
3822
  
3823
164
      let first_child = as_element(following_nodes[0]);
3824
164
      if is_matching_right_paren(open, first_child) {
3825
0
        return true;   // no-arg case "a()"
3826
164
      }
3827
  
3828
      // could be really picky and restrict to checking for only mi/mn
3829
      // that might make more sense in stranger cases, but mfrac, msqrt, etc., probably shouldn't have parens if times 
3830
164
      return following_nodes.len() > 1 && 
3831
164
          name(first_child) != "mrow" &&
3832
127
          is_matching_right_paren(open, as_element(following_nodes[1]));
3833
164
    }
3834
  
3835
100
    fn is_comma_arg(open: &str, following_nodes: &[ChildOfElement]) -> bool {
3836
      // following_nodes are nodes after "("
3837
100
      if following_nodes.len() == 1 {
3838
0
        return false;
3839
100
      }
3840
3841
100
      let first_child = as_element(following_nodes[1]);
3842
100
      if name(first_child) == "mrow" {
3843
0
        return is_comma_arg(open, &first_child.children()[..]);
3844
100
      }
3845
3846
      // FIX: this loop is very simplistic and could be improved to count parens, etc., to make sure "," is at top-level
3847
318
      for child in 
following_nodes100
{
3848
318
        let child = as_element(*child);
3849
318
        if name(child) == "mo" {
3850
141
          if as_text(child) == "," {
3851
2
            return true;
3852
139
          }
3853
139
          if is_matching_right_paren(open, child) {
3854
96
            return false;
3855
43
          }
3856
177
        }
3857
      }
3858
      
3859
2
      return false;
3860
100
    }
3861
  
3862
622
    fn is_left_paren(node: Element) -> bool {
3863
622
      if name(node) != "mo" {
3864
1
        return false;
3865
621
      }
3866
621
      let text = as_text(node);
3867
621
      return text == "(" || 
text == "["22
;
3868
622
    }
3869
  
3870
430
    fn is_matching_right_paren(open: &str, node: Element) -> bool {
3871
430
      if name(node) != "mo" {
3872
184
        return false;
3873
246
      }
3874
246
      let text = as_text(node);
3875
      // debug!("         is_matching_right_paren: open={}, close={}", open, text);
3876
246
      return (open == "(" && 
text == ")"244
) || (
open == "["88
&&
text == "]"2
);
3877
430
    }
3878
3879
    /// Returns true if the name of the potential function is inside the parens. In that case, it is very unlikely to be a function call
3880
    /// For example, "n(n+1)"
3881
93
    fn is_name_inside_parens(function_name: &str, right_siblings: &[ChildOfElement]) -> bool {
3882
      // the first child of right_siblings is either '(' or '['
3883
      // right_siblings may extend well beyond the closing parens, so we first break this into finding the contents
3884
      // then we search the contents for the name
3885
93
      match find_contents(right_siblings) {
3886
2
        None => return false,
3887
91
        Some(contents) => return is_name_inside_contents(function_name, contents),
3888
      }
3889
      
3890
3891
93
      fn find_contents<'a>(right_siblings: &'a[ChildOfElement<'a>]) -> Option<&'a[ChildOfElement<'a>]> {
3892
93
        let open_text = as_text(as_element(right_siblings[0]));
3893
93
        let close_text = if open_text == "("  { 
")"91
} else {
"]"2
};
3894
93
        let mut nesting_level = 1;
3895
93
        let mut i = 1;
3896
296
        while i < right_siblings.len() {
3897
294
          let child = as_element(right_siblings[i]);
3898
294
          if name(child) == "mo" {
3899
133
            let op_text = as_text(child);
3900
133
            if op_text == open_text {
3901
0
              nesting_level += 1;
3902
133
            } else if op_text == close_text {
3903
91
              if nesting_level == 1 {
3904
91
                return Some(&right_siblings[1..i]);
3905
0
              } 
3906
0
              nesting_level -= 1;
3907
42
            }
3908
161
          }
3909
203
          i += 1;
3910
        }
3911
2
        return None; // didn't find matching paren
3912
93
      }
3913
3914
134
      fn is_name_inside_contents(function_name: &str, contents: &[ChildOfElement]) -> bool {
3915
304
        for &child in 
contents134
{
3916
304
          let child = as_element(child);
3917
          // debug!("is_name_inside_contents: child={}", mml_to_string(child));
3918
304
          if is_leaf(child) {
3919
261
            let text = as_text(child);
3920
261
            if (name(child) == "mi" || 
name(child) == "mtext"108
) &&
text == function_name163
{
3921
5
              return true;
3922
256
            }
3923
43
          } else if is_name_inside_contents(function_name, &child.children()) {
3924
4
            return true;
3925
39
          }
3926
        }
3927
125
        return false;
3928
134
      }
3929
93
    }
3930
32.0k
  }
3931
  
3932
5.79k
  fn is_mixed_fraction<'a>(&self, integer_part: Element<'a>, fraction_children: &[ChildOfElement<'a>]) -> Result<bool> {
3933
    // do some simple disqualifying checks on the fraction part
3934
5.79k
    if fraction_children.is_empty() {
3935
0
      return Ok( false );
3936
5.79k
    }
3937
5.79k
    let right_child = as_element(fraction_children[0]);
3938
5.79k
    let right_child_name = name(right_child);
3939
5.79k
    if ! (right_child_name == "mfrac" ||
3940
5.68k
       (right_child_name == "mrow" && 
right_child.children().len() == 3218
) ||
3941
5.48k
         (right_child_name == "mn" && 
fraction_children.len() >= 3138
) ) {
3942
5.46k
      return Ok( false );
3943
329
    };
3944
3945
329
    if !is_integer_part_ok(integer_part) {
3946
219
      return Ok( false );
3947
110
    }
3948
    
3949
110
    if right_child_name == "mfrac" {
3950
75
      return Ok( is_mfrac_ok(right_child) );
3951
35
    }
3952
3953
35
    return is_linear_fraction(self, fraction_children);
3954
3955
3956
351
    fn is_int(integer_part: Element) -> bool {
3957
351
      return name(integer_part) == "mn"  && 
!as_text(integer_part).contains(DECIMAL_SEPARATOR)185
;
3958
351
    }
3959
3960
329
    fn is_integer_part_ok(integer_part: Element) -> bool {
3961
      // integer part must be either 'n' or '-n' (in an mrow)
3962
329
      let integer_part_name = name(integer_part);
3963
329
      if integer_part_name == "mrow" {
3964
83
        let children = integer_part.children();
3965
83
        if children.len() == 2 &&
3966
16
           name(as_element(children[0])) == "mo" &&
3967
0
           as_text(as_element(children[0])) == "-" {
3968
0
          let integer_part = as_element(children[1]);
3969
0
          return is_int(integer_part);
3970
83
        }
3971
83
        return false;
3972
246
      };
3973
    
3974
246
      return is_int(integer_part);
3975
329
    }
3976
3977
75
    fn is_mfrac_ok(fraction_part: Element) -> bool {
3978
      // fraction_part needs to have integer numerator and denominator (already tested it is a frac)
3979
75
      let fraction_children = fraction_part.children();
3980
75
      if fraction_children.len() != 2 {
3981
0
        return false;
3982
75
      }
3983
75
      let numerator = as_element(fraction_children[0]);
3984
75
      if name(numerator) != "mn" || 
as_text(numerator)67
.
contains67
(DECIMAL_SEPARATOR) {
3985
8
        return false;
3986
67
      }
3987
67
      let denominator = as_element(fraction_children[1]);
3988
67
      return is_int(denominator);
3989
75
    }
3990
3991
66
    fn is_linear_fraction(canonicalize: &CanonicalizeContext, fraction_children: &[ChildOfElement]) -> Result<bool> {
3992
      // two possibilities
3993
      // 1. '3 / 4' is in an mrow
3994
      // 2. '3 / 4' are three separate elements
3995
66
      let first_child = as_element(fraction_children[0]);
3996
66
      if name(first_child) == "mrow" {
3997
31
        if first_child.children().len() != 3 {
3998
0
          return Ok( false );
3999
31
        }
4000
31
        return is_linear_fraction(canonicalize, &first_child.children())
4001
35
      }
4002
      
4003
      
4004
      // the length has been checked
4005
35
      assert!(fraction_children.len() >= 3);
4006
      
4007
35
      if !is_int(first_child) {
4008
30
        return Ok( false );
4009
5
      }
4010
5
      let slash_part = canonicalize.canonicalize_mrows(as_element(fraction_children[1]))
?0
;
4011
5
      if name(slash_part) == "mo" && as_text(slash_part) == "/" {
4012
3
        let denom = canonicalize.canonicalize_mrows(as_element(fraction_children[2]))
?0
;
4013
3
        return Ok( is_int(denom) );
4014
2
      }
4015
2
      return Ok( false );
4016
66
    }
4017
5.79k
  }
4018
4019
  /// implied comma when two numbers are adjacent and are in a script position
4020
5.72k
  fn is_implied_comma<'a>(&self, prev: Element<'a>, current: Element<'a>, mrow: Element<'a>) -> bool {
4021
5.72k
    if name(prev) != "mn" || 
name(current) != "mn"4.06k
{
4022
5.63k
      return false;
4023
95
    }
4024
4025
95
    assert_eq!(name(mrow), "mrow");
4026
95
    let container = get_parent(mrow);
4027
95
    let name = name(container);
4028
4029
    // test for script position is that it is not the base and hence has a preceding sibling
4030
95
    return (name == "msub" || 
name == "msubsup"14
||
name == "msup"14
) &&
!mrow.preceding_siblings().is_empty()81
;
4031
5.72k
  }
4032
4033
  /// implied separator when two capital letters are adjacent or two chemical elements
4034
5.64k
  fn is_implied_chemical_bond<'a>(&self, prev: Element<'a>, current: Element<'a>) -> bool {
4035
    // debug!("is_implied_chemical_bond: previous: {:?}", prev.preceding_siblings());
4036
    // debug!("is_implied_chemical_bond: following: {:?}", prev.following_siblings());
4037
5.64k
    if prev.attribute(MAYBE_CHEMISTRY).is_none() || 
current514
.attribute(MAYBE_CHEMISTRY).
is_none514
() {
4038
5.18k
      return false;
4039
462
    }
4040
    // ABC example where B and C are chemical elements is why we need to scan further than just checking B and C
4041
    // look for an mi/mtext with @MAYBE_CHEMISTRY until we get to something that can't have it
4042
626
    for child in 
prev462
.
preceding_siblings462
() {
4043
626
      if !is_valid_chemistry(as_element(child)) {
4044
11
        return false;
4045
615
      }
4046
    }
4047
851
    for child in 
current451
.
following_siblings451
() {
4048
851
      if !is_valid_chemistry(as_element(child)) {
4049
32
        return false;
4050
819
      }
4051
    }
4052
419
    return true;   // sequence of all MAYBE_CHEMISTRY
4053
4054
1.47k
    fn is_valid_chemistry(child: Element) -> bool {
4055
1.47k
      let child = get_possible_embellished_node(child);
4056
1.47k
      return child.attribute(MAYBE_CHEMISTRY).is_some() || (
name(child) != "mi"654
&&
name(child) != "mtext"614
);
4057
1.47k
    }
4058
5.64k
  }
4059
4060
  /// implied separator when two capital letters are adjacent or two chemical elements
4061
  /// also for adjacent omission chars
4062
5.22k
  fn is_implied_separator<'a>(&self, prev: Element<'a>, current: Element<'a>) -> bool {
4063
5.22k
    if name(prev) != "mi" || 
name(current) != "mi"516
{
4064
4.83k
      return false;
4065
390
    }
4066
4067
    // trim because whitespace might have gotten stuffed into the <mi>s
4068
390
    let prev_text = as_text(prev).trim();
4069
390
    let current_text = as_text(current).trim();
4070
390
    return prev_text.len() == 1 && 
current_text.len() == 1352
&&
4071
317
         ((is_cap(prev_text) && 
is_cap174
(
current_text174
)) ||
4072
151
          (prev_text=="_" && 
current_text=="_"0
));
4073
4074
4075
491
    fn is_cap(str: &str) -> bool {
4076
491
      assert_eq!(str.len(), 1);
4077
491
      return str.chars().next().unwrap().is_ascii_uppercase();
4078
491
    }
4079
5.22k
  }
4080
  
4081
42
  fn is_invisible_char_element(mathml: Element) -> bool {
4082
42
    if !is_leaf(mathml) {
4083
8
      return false
4084
34
    }
4085
34
    let text = as_text(mathml);
4086
34
    if text.len() != 3 {   // speed hack: invisible chars are three UTF-8 chars
4087
28
      return false;
4088
6
    } 
4089
6
    let ch = text.chars().next().unwrap();
4090
6
    return ('\u{2061}'..='\u{2064}').contains(&ch);
4091
42
  }
4092
4093
  // Add the current operator if it's not n-ary to the stack
4094
  // 'current_child' and it the operator to the stack.
4095
17.7k
  fn shift_stack<'s, 'a:'s, 'op:'a>(
4096
17.7k
        &self, parse_stack: &'s mut Vec<StackInfo<'a, 'op>>,
4097
17.7k
        current_child: Element<'a>, 
4098
17.7k
        current_op: OperatorPair<'op>) -> (Element<'a>, OperatorPair<'op>) {
4099
17.7k
    let mut new_current_child = current_child;
4100
17.7k
    let mut new_current_op = current_op.clone();
4101
17.7k
    let previous_op = top(parse_stack).op_pair.clone();
4102
    // debug!(" shift_stack: mrow len={}", top(parse_stack).mrow.children().len().to_string());
4103
    // debug!(" shift_stack: shift on '{}'; ops: prev '{}/{}', cur '{}/{}'",
4104
    //    element_summary(current_child),show_invisible_op_char(previous_op.ch), previous_op.op.priority,
4105
    //    show_invisible_op_char(current_op.ch), current_op.op.priority);
4106
17.7k
    if !current_op.op.is_nary(previous_op.op) {
4107
      // grab operand on top of stack (if there is one) and make it part of the new mrow since current op has higher precedence
4108
      // if operators are the same and are binary, then this push makes them act as left associative
4109
13.0k
      let mut top_of_stack = parse_stack.pop().unwrap();
4110
13.0k
      if top_of_stack.mrow.children().is_empty() || (
!top_of_stack.is_operand12.9k
&&
!current_op.op.is_right_fence()72
) {
4111
138
        // "bad" syntax - no operand on left -- don't grab operand (there is none)
4112
138
        //   just start a new mrow beginning with operator
4113
138
        // FIX -- check this shouldn't happen:  parse_stack.push(top_of_stack);
4114
138
        parse_stack.push( top_of_stack );   // put top back on
4115
138
        parse_stack.push( StackInfo::new(current_child.document()) );
4116
12.8k
      } else if current_op.op.is_right_fence() {
4117
        // likely, but not necessarily, there is a left fence to start the mrow
4118
        // this is like the postfix case except we grab the entire mrow, push on the close, and make that the mrow
4119
        // note:  the code does these operations on the stack for consistency, but it could be optimized without push/popping the stack
4120
1.96k
        let mrow = top_of_stack.mrow;
4121
1.96k
        top_of_stack.add_child_to_mrow(current_child, current_op);
4122
        // debug!("shift_stack: after adding right fence to mrow:\n{}", mml_to_string(mrow));
4123
1.96k
        new_current_op = OperatorPair::new();             // treat matched brackets as operand
4124
1.96k
        new_current_child = mrow;
4125
1.96k
        let children = mrow.children();
4126
1.96k
        let base_of_first_child = get_possible_embellished_node(as_element(children[0]));
4127
        // debug!("looking for left fence: len={}, {:#?}", children.len(), CanonicalizeContext::find_operator(Some(self), base_of_first_child, None, Some(as_element(children[0])), Some(mrow)));
4128
1.96k
        if children.len() == 2 &&
4129
64
           (name(base_of_first_child) != "mo" ||
4130
13
            !CanonicalizeContext::find_operator(Some(self), base_of_first_child, None,
4131
51
                            Some(
as_element13
(children[0])), Some(mrow)).is_left_fence()) {
4132
51
          // the mrow did *not* start with an open (hence no push)
4133
51
          // since parser really wants balanced parens to keep stack state right, we do a push here
4134
51
          parse_stack.push( StackInfo::new(mrow.document()) );
4135
51
        } else {
4136
          // the mrow started with some open fence (which caused a push) -- add the close, pop, and push on the "operand"
4137
1.91k
          new_current_child = self.potentially_lift_script(mrow)
4138
        }
4139
10.9k
      } else if current_op.op.is_postfix() {
4140
81
        // grab the left operand and start a new mrow with it and the operator -- put those back on the stack
4141
81
        // note:  the code does these operations on the stack for consistency, but it could be optimized without push/popping the stack
4142
81
        let previous_child = top_of_stack.remove_last_operand_from_mrow();         // remove operand from mrow
4143
81
        parse_stack.push(top_of_stack);
4144
81
        let mut new_top_of_stack = StackInfo::with_op(&current_child.document(), previous_child, current_op.clone()); // begin new mrow with operand
4145
81
        new_top_of_stack.add_child_to_mrow(current_child, current_op);  // add on operator
4146
81
        new_current_child = new_top_of_stack.mrow;                // grab for pushing on old mrow
4147
81
        new_current_op = OperatorPair::new();               // treat "reduced" postfix operator & operand as an operand
4148
81
        // debug!("shift_stack: after adding postfix to mrow has len: {}", new_current_child.children().len().to_string());
4149
10.8k
      } else {
4150
10.8k
        // normal infix op case -- grab the left operand and start a new mrow with it and the operator
4151
10.8k
        let previous_child = top_of_stack.remove_last_operand_from_mrow();
4152
10.8k
        parse_stack.push(top_of_stack);
4153
10.8k
        parse_stack.push( StackInfo::with_op(&current_child.document(),previous_child, current_op) );
4154
10.8k
      }
4155
4.73k
    }
4156
17.7k
    return (new_current_child, new_current_op);
4157
17.7k
  }
4158
  
4159
  
4160
25.2k
  fn reduce_stack<'s, 'a:'s, 'op:'a>(&self, parse_stack: &'s mut Vec<StackInfo<'a, 'op>>, current_priority: usize) {
4161
25.2k
    let mut prev_priority = top(parse_stack).priority();
4162
    // debug!(" reduce_stack: stack len={}, priority: prev={}, cur={}", parse_stack.len(), prev_priority, current_priority);
4163
37.2k
    while current_priority < prev_priority {          // pop off operators until we are back to the right level
4164
12.0k
      if parse_stack.len() == 1 {
4165
0
        break;     // something went wrong -- break before popping too much
4166
12.0k
      }
4167
12.0k
      prev_priority = self.reduce_stack_one_time(parse_stack);
4168
    };
4169
25.2k
  }
4170
4171
12.0k
  fn reduce_stack_one_time<'s, 'a:'s, 'op:'a>(&self, parse_stack: &'s mut Vec<StackInfo<'a, 'op>>) -> usize {
4172
12.0k
    let mut top_of_stack = parse_stack.pop().unwrap();
4173
    // debug!(" ..popped len={} op:'{}/{}', operand: {}",
4174
    //    top_of_stack.mrow.children().len(),
4175
    //    show_invisible_op_char(top_of_stack.op_pair.ch), top_of_stack.op_pair.op.priority,
4176
    //    top_of_stack.is_operand);
4177
12.0k
    let mut mrow = top_of_stack.mrow;
4178
12.0k
    if mrow.children().len() == 1 && 
CanonicalizeContext::is_ok_to_merge_mrow_child63
(
mrow63
) {
4179
63
      // should have added at least operator and operand, but input might not be well-formed
4180
63
      // in this case, unwrap the mrow and expose the single child for pushing onto stack
4181
63
      let single_child = top_of_stack.remove_last_operand_from_mrow();
4182
63
      mrow = single_child;
4183
11.9k
    }
4184
4185
12.0k
    let mut top_of_stack = parse_stack.pop().unwrap();
4186
12.0k
    top_of_stack.add_child_to_mrow(mrow, OperatorPair::new());  // mrow on top is "parsed" -- now add it to previous
4187
12.0k
    let prev_priority = top_of_stack.priority();
4188
12.0k
    parse_stack.push(top_of_stack);
4189
12.0k
    return prev_priority;
4190
12.0k
  }
4191
  
4192
5.06k
  fn is_trig_arg<'a, 'op:'a>(&self, previous_child: Element<'a>, current_child: Element<'a>, parse_stack: &mut Vec<StackInfo<'a, 'op>>) -> bool {
4193
    // We have operand-operand and know we want multiplication at this point. 
4194
    // Check for special case where we want multiplication to bind more tightly than function app (e.g, sin 2x, sin -2xy)
4195
    // We only want to do this for simple args
4196
    // debug!("  is_trig_arg: prev {}, current {}, Stack:", element_summary(previous_child), element_summary(current_child));
4197
    // parse_stack.iter().for_each(|stack_info| debug!("    {}", stack_info));
4198
5.06k
    if !IsNode::is_simple(current_child) {
4199
2.98k
      return false;
4200
2.07k
    }
4201
    // This only matters if we are not inside of parens
4202
2.07k
    if IsBracketed::is_bracketed(previous_child, "(", ")", false, false) ||
4203
2.01k
       IsBracketed::is_bracketed(previous_child, "[", "]", false, false) {
4204
63
      return false;
4205
2.01k
    }
4206
  
4207
    // Use lower priority multiplication if current_child is a function (e.g. "cos" in "sin x cos 3y")
4208
    // if !is_trig(current_child) {
4209
2.01k
    if self.is_function_name(current_child, None) == FunctionNameCertainty::True {
4210
1
      return false;
4211
2.01k
    }
4212
    // Three cases:
4213
    // 1. First operand-operand (e.g, sin 2x, where 'current_child' is 'x') -- top of stack is mrow('sin' f_apply '2')
4214
    // 2. Another First operand-operand (e.g, sin -2x, where 'current_child' is 'x') -- top of stack is mrow('-' '2'), next is mrow('sin', f_apply)
4215
    // 3. Subsequent operand-operand (e.g, sin 2xy, where 'current_child' is 'y') -- top of stack is mrow('2' 'times' 'x')
4216
    //    Note: IMPLIED_TIMES_HIGH_PRIORITY is only present if we have a trig function
4217
2.01k
    let op_on_top = &top(parse_stack).op_pair;
4218
2.01k
    if ptr_eq(op_on_top.op, *INVISIBLE_FUNCTION_APPLICATION) {
4219
8
      let function_element = as_element(top(parse_stack).mrow.children()[0]);
4220
8
      return is_trig(function_element);
4221
2.00k
    }
4222
2.00k
    if ptr_eq(op_on_top.op, *PREFIX_MINUS) {
4223
74
      if parse_stack.len() < 2 {
4224
0
        return false;
4225
74
      }
4226
74
      let next_stack_info = &parse_stack[parse_stack.len()-2];
4227
74
      if !ptr_eq(next_stack_info.op_pair.op, *INVISIBLE_FUNCTION_APPLICATION) {
4228
72
        return false;
4229
2
      }
4230
2
      let function_element = as_element(next_stack_info.mrow.children()[0]);
4231
2
      if is_trig(function_element) {
4232
        // want '- 2' to be an mrow; don't want '- 2 x ...' to be the mrow (IMPLIED_TIMES_HIGH_PRIORITY is an internal hack)
4233
1
        self.reduce_stack_one_time(parse_stack);
4234
1
        return true;
4235
1
      }
4236
1
      return false;
4237
1.92k
    }
4238
1.92k
    return ptr_eq(op_on_top.op, &IMPLIED_TIMES_HIGH_PRIORITY);
4239
4240
10
    fn is_trig(node: Element) -> bool {
4241
10
      let base_of_name = get_possible_embellished_node(node);
4242
  
4243
      // actually only 'mi' should be legal here, but some systems used 'mtext' for multi-char variables
4244
10
      let node_name = name(base_of_name);
4245
10
      if node_name != "mi" && 
node_name != "mtext"0
{
4246
0
        return false;
4247
10
      }
4248
      // whitespace is sometimes added to the mi since braille needs it, so do a trim here to get function name
4249
10
      let base_name = as_text(base_of_name).trim();
4250
10
      if base_name.is_empty() {
4251
0
        return false;
4252
10
      }
4253
10
      return crate::definitions::SPEECH_DEFINITIONS.with(|defs| {
4254
        // names that are always function names (e.g, "sin" and "log")
4255
10
        let defs = defs.borrow();
4256
10
        let names = defs.get_hashset("TrigFunctionNames").unwrap();
4257
        // UEB seems to think "Sin" (etc) is used for "sin", so we move to lower case
4258
10
        return names.contains(&base_name.to_ascii_lowercase());
4259
10
      });
4260
10
    }
4261
5.06k
  }
4262
  
4263
  
4264
  /*
4265
    canonicalize_mrows_in_mrow is a simple(ish) operator precedence parser.
4266
    It works by keeping a stack of 'StackInfo':
4267
    'StackInfo' has three parts:
4268
    1. the mrow being build
4269
    2. info about the operator in the mrow being build
4270
    3. bool to say whether the last thing is an operator or an operand
4271
  
4272
    When the op priority increases (eg, have "=" and get "+"), we push on
4273
    1. a new mrow -- if the operator has a left operand, we remove the last node in the mrow and it becomes
4274
       the first (only so far) child of the new mrow
4275
    2. the operator info
4276
  
4277
    When the op priority decreases, we do the following loop until the this new priority > priority on top of stack
4278
    1. pop the StackInfo
4279
    2. add the StackInfo's mrow  as the last child to the new top of the stack
4280
    We also do this when we hit the end of the mrow (we can treat this case as if we have a negative precedence)
4281
  
4282
    +/- are treated as nary operators and don't push/pop in those cases.
4283
    consecutive operands such as nary times are also considered n-ary operators and don't push/pop in those cases.
4284
  */
4285
7.48k
  fn canonicalize_mrows_in_mrow<'a>(&self, mrow: Element<'a>) -> Result<Element<'a>> {
4286
7.48k
    let is_ok_to_merge_child = mrow.children().len() != 1 || 
CanonicalizeContext::is_ok_to_merge_mrow_child56
(
mrow56
);
4287
7.48k
    let saved_mrow_attrs = mrow.attributes(); 
4288
7.48k
    assert_eq!(name(mrow), "mrow");
4289
  
4290
    // FIX: don't touch/canonicalize
4291
    // 1. if intent is given -- anything intent references
4292
    // 2. if the mrow starts or ends with a fence, don't merge into parent (parse children only) -- allows for "]a,b["
4293
7.48k
    let mut parse_stack = vec![StackInfo::new(mrow.document())];
4294
7.48k
    let mut children = mrow.children();
4295
7.48k
    let num_children = children.len();
4296
  
4297
36.7k
    for i_child in 
0..num_children7.48k
{
4298
      // debug!("\nDealing with child #{}: {}", i_child, mml_to_string(as_element(children[i_child])));
4299
36.7k
      let mut current_child = self.canonicalize_mrows(as_element(children[i_child]))
?0
;
4300
36.7k
      children[i_child] = ChildOfElement::Element( current_child );
4301
36.7k
      let base_of_child = get_possible_embellished_node(current_child);
4302
36.7k
      let acts_as_ch = current_child.attribute_value(ACT_AS_OPERATOR);
4303
36.7k
      let mut current_op = OperatorPair::new();
4304
      // figure what the current operator is -- it either comes from the 'mo' (if we have an 'mo') or it is implied
4305
36.7k
      if (name(base_of_child) == "mo" &&
4306
13.9k
          !( base_of_child.children().is_empty() || as_text(base_of_child) == "\u{00A0}" )) || // shouldn't have empty mo node, but...
4307
22.8k
         acts_as_ch.is_some() {
4308
13.9k
        let previous_op = if top(&parse_stack).is_operand {
None10.9k
} else {
Some( top(&parse_stack).op_pair.op )2.95k
};
4309
13.9k
        let next_node = if i_child + 1 < num_children {
Some(11.9k
as_element11.9k
(children[i_child+1]))} else {
None1.99k
};
4310
13.9k
        if let Some(
acts_as_ch20
) = acts_as_ch {
4311
20
          // ∇× (etc) hack, including ∇ being a vector (maybe eventually others)
4312
20
          let temp_mo = create_mathml_element(&current_child.document(), "mo");
4313
20
          temp_mo.set_text(acts_as_ch);
4314
20
          current_op = OperatorPair{
4315
20
            ch: acts_as_ch,
4316
20
            op: CanonicalizeContext::find_operator(Some(self), temp_mo, previous_op,
4317
20
                top(&parse_stack).last_child_in_mrow(), next_node)
4318
20
          };
4319
13.9k
        } else {
4320
13.9k
          current_op = OperatorPair{
4321
13.9k
            ch: as_text(base_of_child),
4322
13.9k
            op: CanonicalizeContext::find_operator(Some(self), base_of_child, previous_op,
4323
13.9k
                top(&parse_stack).last_child_in_mrow(), next_node)
4324
13.9k
          };
4325
13.9k
    
4326
13.9k
          // deal with vertical bars which might be infix, open, or close fences
4327
13.9k
          // note: mrow shrinks as we iterate through it (removing children from it)
4328
13.9k
          current_op.op = self.determine_vertical_bar_op(
4329
13.9k
            current_op.op,
4330
13.9k
            base_of_child,
4331
13.9k
            next_node,
4332
13.9k
            &mut parse_stack,
4333
13.9k
            self.n_vertical_bars_on_right(&children[i_child+1..], current_op.ch)
4334
13.9k
          );
4335
13.9k
        }
4336
      } else {
4337
22.8k
        let previous_child = top(&parse_stack).last_child_in_mrow();
4338
22.8k
        if let Some(
previous_child17.9k
) = previous_child {
4339
17.9k
          let base_of_previous_child = get_possible_embellished_node(previous_child);
4340
17.9k
          let acts_as_ch = previous_child.attribute_value(ACT_AS_OPERATOR);
4341
17.9k
          if name(base_of_previous_child) != "mo" && 
acts_as_ch6.57k
.
is_none6.57k
() {
4342
6.55k
            let likely_function_name = self.is_function_name(previous_child, Some(&children[i_child..]));
4343
6.55k
            if name(base_of_child) == "mtext" && 
as_text(base_of_child) == "\u{00A0}"184
{
4344
1
              base_of_child.set_attribute_value("data-function-likelihood", &(likely_function_name == FunctionNameCertainty::True).to_string());
4345
1
              base_of_child.remove_attribute("data-was-mo");
4346
1
              set_mathml_name(base_of_child, "mo");
4347
1
              let mut top_of_stack = parse_stack.pop().unwrap();
4348
1
              top_of_stack.add_child_to_mrow(current_child, OperatorPair{ ch: "\u{00A0}", op: *INVISIBLE_FUNCTION_APPLICATION});    // whitespace -- make part of mrow to keep out of parse
4349
1
              parse_stack.push(top_of_stack);
4350
1
              continue;
4351
6.55k
            }
4352
            // consecutive operands -- add an invisible operator as appropriate
4353
6.55k
            current_op = if likely_function_name == FunctionNameCertainty::True {
4354
753
                  OperatorPair{ ch: "\u{2061}", op: *INVISIBLE_FUNCTION_APPLICATION }
4355
5.79k
                } else if self.is_mixed_fraction(previous_child, &children[i_child..])
?0
{
4356
70
                  OperatorPair{ ch: "\u{2064}", op: *IMPLIED_INVISIBLE_PLUS }
4357
5.72k
                } else if self.is_implied_comma(previous_child, current_child, mrow) {
4358
81
                  OperatorPair{ch: "\u{2063}", op: *IMPLIED_INVISIBLE_COMMA }          
4359
5.64k
                } else if self.is_implied_chemical_bond(previous_child, current_child) {
4360
419
                  OperatorPair{ch: "\u{2063}", op: &IMPLIED_CHEMICAL_BOND }          
4361
5.22k
                } else if self.is_implied_separator(previous_child, current_child) {
4362
166
                  OperatorPair{ch: "\u{2063}", op: &IMPLIED_SEPARATOR_HIGH_PRIORITY }          
4363
5.06k
                } else if self.is_trig_arg(base_of_previous_child, base_of_child, &mut parse_stack) {
4364
9
                  OperatorPair{ch: "\u{2062}", op: &IMPLIED_TIMES_HIGH_PRIORITY }          
4365
                } else {
4366
5.05k
                  OperatorPair{ ch: "\u{2062}", op: *IMPLIED_TIMES }
4367
                };
4368
6.55k
            if let Some(
attr_val262
) = base_of_child.attribute_value(CHANGED_ATTR)
4369
262
              && attr_val == "data-was-mo" {
4370
0
                // it really should be an operator
4371
0
                base_of_child.remove_attribute(CHANGED_ATTR);
4372
0
                set_mathml_name(base_of_child, "mo");
4373
6.55k
              }
4374
6.55k
            if name(base_of_child) == "mo" {
4375
1
              current_op.ch = as_text(base_of_child);
4376
1
              // debug!("  Found whitespace op '{}'/{}", show_invisible_op_char(current_op.ch), current_op.op.priority);
4377
1
            } else {
4378
6.54k
              let implied_mo = create_mo(current_child.document(), current_op.ch, ADDED_ATTR_VALUE);
4379
6.54k
              if likely_function_name == FunctionNameCertainty::Maybe {
4380
33
                implied_mo.set_attribute_value("data-function-guess", "true");
4381
6.51k
              }
4382
              // debug!("  Found implicit op {}/{} [{:?}]", show_invisible_op_char(current_op.ch), current_op.op.priority, likely_function_name);
4383
6.54k
              self.reduce_stack(&mut parse_stack, current_op.op.priority);    
4384
6.54k
              let shift_result = self.shift_stack(&mut parse_stack, implied_mo, current_op.clone());
4385
              // ignore shift_result.0 which is just 'implied_mo'
4386
6.54k
              assert_eq!(implied_mo, shift_result.0);
4387
6.54k
              assert!( ptr_eq(current_op.op, shift_result.1.op) );
4388
6.54k
              let mut top_of_stack = parse_stack.pop().unwrap();
4389
6.54k
              top_of_stack.add_child_to_mrow(implied_mo, current_op);
4390
6.54k
              parse_stack.push(top_of_stack);
4391
6.54k
              current_op = OperatorPair::new(); 
4392
            }
4393
11.3k
          }
4394
4.88k
        }
4395
      }
4396
  
4397
36.7k
      if !ptr_eq(current_op.op, &ILLEGAL_OPERATOR_INFO) {
4398
13.9k
        if current_op.op.is_left_fence() || 
current_op.op12.0k
.
is_prefix12.0k
() {
4399
2.95k
          if top(&parse_stack).is_operand {
4400
            // will end up with duplicate operands -- need to choose operator associated with prev child
4401
            // we use the original input here because in this case, we need to look to the right of the ()s to deal with chemical states
4402
232
            let likely_function_name = self.is_function_name(as_element(children[i_child-1]), Some(&children[i_child..]));
4403
232
            let implied_operator = if likely_function_name== FunctionNameCertainty::True {
4404
98
                OperatorPair{ ch: "\u{2061}", op: *INVISIBLE_FUNCTION_APPLICATION }
4405
              } else {
4406
134
                OperatorPair{ ch: "\u{2062}", op: *IMPLIED_TIMES }
4407
              };
4408
            // debug!("  adding implied {}", if ptr_eq(implied_operator.op,*IMPLIED_TIMES) {"times"} else {"function apply"});
4409
  
4410
232
            let implied_mo = create_mo(current_child.document(), implied_operator.ch, ADDED_ATTR_VALUE);
4411
232
            if likely_function_name == FunctionNameCertainty::Maybe {
4412
55
              implied_mo.set_attribute_value("data-function-guess", "true");
4413
177
            }
4414
232
            self.reduce_stack(&mut parse_stack, implied_operator.op.priority);            let shift_result = self.shift_stack(&mut parse_stack, implied_mo, implied_operator.clone());
4415
            // ignore shift_result.0 which is just 'implied_mo'
4416
232
            assert_eq!(implied_mo, shift_result.0);
4417
232
            assert!( ptr_eq(implied_operator.op, shift_result.1.op) );
4418
232
            let mut top_of_stack = parse_stack.pop().unwrap();
4419
232
            top_of_stack.add_child_to_mrow(implied_mo, implied_operator);
4420
232
            parse_stack.push(top_of_stack);
4421
2.72k
          }
4422
          // starting a new mrow
4423
2.95k
          parse_stack.push( StackInfo::new(current_child.document()) );
4424
        } else {
4425
          // One of infix, postfix, or right fence -- all should have a left operand
4426
          // pop the stack if it is lower precedence (it forms an mrow)
4427
          
4428
          // hack to get linear mixed fractions to parse correctly
4429
10.9k
          if current_op.ch == "/" && 
top(&parse_stack).op_pair.ch == "\u{2064}"41
{
4430
2
              current_op.op = &IMPLIED_PLUS_SLASH_HIGH_PRIORITY;
4431
10.9k
          }
4432
10.9k
          self.reduce_stack(&mut parse_stack, current_op.op.priority);
4433
          // push new operator on stack (already handled n-ary case)
4434
10.9k
          let shift_result = self.shift_stack(&mut parse_stack, current_child, current_op);
4435
10.9k
          current_child = shift_result.0;
4436
10.9k
          current_op = shift_result.1;
4437
        }
4438
22.7k
      }
4439
36.7k
      let mut top_of_stack = parse_stack.pop().unwrap();
4440
36.7k
      top_of_stack.add_child_to_mrow(current_child, current_op);
4441
36.7k
      parse_stack.push(top_of_stack);
4442
    }
4443
  
4444
    // Reached the end -- force reduction of what's left on the stack
4445
7.48k
    self.reduce_stack(&mut parse_stack, LEFT_FENCEPOST.priority);
4446
  
4447
    // We essentially have 'terminator( mrow terminator)'
4448
    //   in other words, we have an extra mrow with one child due to the initial start -- remove it
4449
7.48k
    let mut top_of_stack = parse_stack.pop().unwrap();
4450
7.48k
    assert_eq!(parse_stack.len(), 0);
4451
  
4452
7.48k
    let mut parsed_mrow = top_of_stack.mrow;
4453
7.48k
    assert_eq!( name(top_of_stack.mrow), "mrow");
4454
7.48k
    if parsed_mrow.children().len() == 1 && is_ok_to_merge_child {
4455
7.46k
      parsed_mrow = top_of_stack.remove_last_operand_from_mrow();
4456
7.46k
      // was synthesized, but is really the original top level mrow
4457
7.46k
    
}15
4458
  
4459
7.48k
    parsed_mrow.remove_attribute(CHANGED_ATTR);
4460
7.48k
    return Ok( add_attrs(parsed_mrow, &saved_mrow_attrs) );
4461
7.48k
  }  
4462
}
4463
4464
// ---------------- useful utility functions --------------------
4465
102k
fn top<'s, 'a:'s, 'op:'a>(vec: &'s[StackInfo<'a, 'op>]) -> &'s StackInfo<'a, 'op> {
4466
102k
  return &vec[vec.len()-1];
4467
102k
}
4468
// Replace the attrs of 'mathml' with 'attrs' and keep the global attrs of 'mathml' (i.e, lift 'attrs' to 'mathml' for replacing children)
4469
10.0k
pub fn add_attrs<'a>(mathml: Element<'a>, attrs: &[Attribute]) -> Element<'a> {
4470
  static GLOBAL_ATTRS: phf::Set<&str> = phf_set! {
4471
    "class", "dir", "displaystyle", "id", "mathbackground", "mathcolor", "mathsize",
4472
    "mathvariant", "nonce", "scriptlevel", "style", "tabindex",
4473
    "intent", "arg",
4474
  };
4475
  
4476
  // debug!(   "Adding back {} attr(s) to {}", attrs.len(), name(mathml));
4477
  // remove non-global attrs
4478
10.0k
  for 
attr740
in mathml.attributes() {
4479
740
    let attr_name = attr.name().local_part();
4480
740
    if !( attr_name.starts_with("data-") || 
GLOBAL_ATTRS534
.
contains534
(
attr_name534
) ||
4481
278
          attr_name.starts_with("on") ) {     // allows too much - cheapo way to allow event handlers like "onchange"
4482
278
      mathml.remove_attribute(attr.name());
4483
462
    }
4484
  }
4485
4486
  // add in 'attrs'
4487
10.0k
  for 
attr5.22k
in attrs {
4488
5.22k
    mathml.set_attribute_value(attr.name(), attr.value());
4489
5.22k
  }
4490
10.0k
  return mathml;
4491
10.0k
}
4492
4493
4494
2.91M
pub fn name(node: Element<'_>) -> &str {
4495
2.91M
  return node.name().local_part();
4496
2.91M
}
4497
4498
/// The child of a non-leaf element must be an element
4499
// Note: can't use references as that results in 'returning use of local variable'
4500
1.14M
pub fn as_element(child: ChildOfElement) -> Element {
4501
1.14M
  return match child {
4502
1.14M
    ChildOfElement::Element(e) => e,
4503
    _ => {
4504
0
      panic!("as_element: internal error -- found non-element child (text? '{:?}')", child.text());
4505
    },
4506
  };
4507
1.14M
}
4508
4509
/// The child of a leaf element must be text (previously trimmed)
4510
/// Note: trim() combines all the Text children into a single string
4511
603k
pub fn as_text(leaf_child: Element<'_>) -> &str {
4512
603k
  assert!(is_leaf(leaf_child));
4513
603k
  let children = leaf_child.children();
4514
603k
  if children.is_empty() {
4515
401
    return "";
4516
602k
  }
4517
602k
  assert!(children.len() == 1);
4518
602k
  return match children[0] {
4519
602k
    ChildOfElement::Text(t) => t.text(),
4520
0
    _ => panic!("as_text: internal error -- found non-text child of leaf element"),
4521
  }
4522
603k
}
4523
4524
/// Returns the parent of the argument.
4525
/// Warning: this assumes the parent exists
4526
239k
pub fn get_parent(mathml: Element) -> Element {
4527
239k
  return mathml.parent().unwrap().element().unwrap();
4528
239k
}
4529
4530
#[allow(dead_code)] // for debugging
4531
0
pub fn element_summary(mathml: Element) -> String {
4532
0
  return format!("{}<{}>", name(mathml),
4533
0
                if is_leaf(mathml) {show_invisible_op_char(as_text(mathml)).to_string()}
4534
          else 
4535
0
                     {mathml.children().len().to_string()});
4536
0
}
4537
4538
6.86k
fn create_mo<'a, 'd:'a>(doc: Document<'d>, ch: &'a str, attr_value: &str) -> Element<'d> {
4539
6.86k
  let implied_mo = create_mathml_element(&doc, "mo");
4540
6.86k
  implied_mo.set_attribute_value(CHANGED_ATTR, attr_value);
4541
6.86k
  let mo_text = doc.create_text(ch);
4542
6.86k
  implied_mo.append_child(mo_text);
4543
6.86k
  return implied_mo;
4544
6.86k
}
4545
4546
/// return 'node' or if it is adorned, return its base (recursive)
4547
130k
pub fn get_possible_embellished_node(node: Element) -> Element {
4548
130k
  let mut node = node;
4549
138k
  while IsNode::is_modified(node) {
4550
8.33k
    node = as_element(node.children()[0]);
4551
8.33k
  }
4552
130k
  return node;
4553
130k
}    
4554
4555
#[allow(dead_code)] // for debugging with println
4556
0
fn show_invisible_op_char(ch: &str) -> &str {
4557
0
  return match ch.chars().next().unwrap() {
4558
0
    '\u{2061}' => "&#x2061;",
4559
0
    '\u{2062}' => "&#x2062;",
4560
0
    '\u{2063}' => "&#x2063;",
4561
0
    '\u{2064}' => "&#x2064;",
4562
0
    '\u{E000}' => "&#xE000;",
4563
0
    _        => ch
4564
  };
4565
0
}
4566
4567
4568
#[cfg(test)]
4569
mod canonicalize_tests {
4570
  use crate::errors::Result;
4571
  use crate::{are_strs_canonically_equal_result, are_strs_canonically_equal_with_locale};
4572
4573
#[allow(unused_imports)]
4574
  use super::super::init_logger;
4575
  use super::super::abs_rules_dir_path;
4576
    use super::*;
4577
    use sxd_document::parser;
4578
4579
4580
    #[test]
4581
1
    fn canonical_same() -> Result<()> {
4582
1
        let target_str = "<math><mrow><mo>-</mo><mi>a</mi></mrow></math>";
4583
1
        are_strs_canonically_equal_result(target_str, target_str, &[])
4584
1
    }
4585
4586
  #[test]
4587
1
    fn plane1_common() -> Result<()> {
4588
1
        let test_str = "<math>
4589
1
        <mi mathvariant='normal'>sin</mi> <mo>,</mo>    <!-- shouldn't change -->
4590
1
        <mi mathvariant='italic'>bB4</mi> <mo>,</mo>    <!-- shouldn't change -->
4591
1
        <mi mathvariant='bold'>a</mi> <mo>,</mo>      <!-- single char id tests -->
4592
1
        <mi mathvariant='bold'>Z</mi> <mo>,</mo>
4593
1
        <mn mathvariant='bold'>19=&#x1D7D7;</mn> <mo>,</mo> <!-- '=' and plane1 shouldn't change -->
4594
1
        <mn mathvariant='double-struck'>024689</mn> <mo>,</mo>  <!-- '=' and plane1 shouldn't change -->
4595
1
        <mi mathvariant='double-struck'>yzCHNPQRZ</mi> <mo>,</mo>
4596
1
        <mi mathvariant='fraktur'>0yACHIRZ</mi> <mo>,</mo>  <!-- 0 stays as ASCII -->
4597
1
        <mi mathvariant='bold-fraktur'>nC</mi> <mo>,</mo>
4598
1
        <mi mathvariant='script'>ABEFHILMRegow</mi> <mo>,</mo>
4599
1
        <msup>
4600
1
          <mi mathvariant='bold-script'>fG</mi>
4601
1
          <mo mathvariant='bold-script'>*</mo>        <!-- '*' shouldn't change -->
4602
1
        </msup>
4603
1
      </math>";
4604
1
        let target_str = "<math>
4605
1
      <mrow data-changed='added'>
4606
1
        <mi mathvariant='normal'>sin</mi>
4607
1
        <mo >,</mo>
4608
1
        <mi mathvariant='italic'>bB4</mi>
4609
1
        <mo>,</mo>
4610
1
        <mi mathvariant='bold'>𝐚</mi>
4611
1
        <mo>,</mo>
4612
1
        <mi mathvariant='bold'>𝐙</mi>
4613
1
        <mo>,</mo>
4614
1
        <mn mathvariant='bold'>𝟏𝟗=𝟗</mn>
4615
1
        <mo>,</mo>
4616
1
        <mn mathvariant='double-struck'>𝟘𝟚𝟜𝟞𝟠𝟡</mn>
4617
1
        <mo>,</mo>
4618
1
        <mi mathvariant='double-struck'>𝕪𝕫ℂℍℕℙℚℝℤ</mi>
4619
1
        <mo>,</mo>
4620
1
        <mi mathvariant='fraktur'>0𝔶𝔄ℭℌℑℜℨ</mi>
4621
1
        <mo>,</mo>
4622
1
        <mi mathvariant='bold-fraktur'>𝖓𝕮</mi>
4623
1
        <mo>,</mo>
4624
1
        <mi mathvariant='script'>𝒜ℬℰℱℋℐℒℳℛℯℊℴ𝓌</mi>
4625
1
        <mo>,</mo>
4626
1
        <msup>
4627
1
          <mi mathvariant='bold-script'>𝓯𝓖</mi>
4628
1
          <mo mathvariant='bold-script'>*</mo>        <!-- '*' shouldn't change -->
4629
1
        </msup>
4630
1
      </mrow>
4631
1
    </math>";
4632
1
    are_strs_canonically_equal_result(test_str, target_str, &[])
4633
1
  }
4634
  
4635
  #[test]
4636
1
    fn plane1_font_styles() -> Result<()> {
4637
1
        let test_str = "<math>
4638
1
        <mi mathvariant='sans-serif'>aA09=</mi> <mo>,</mo>      <!-- '=' shouldn't change -->
4639
1
        <mi mathvariant='bold-sans-serif'>zZ09</mi> <mo>,</mo>  
4640
1
        <mi mathvariant='sans-serif-italic'>azAZ09</mi> <mo>,</mo>  <!-- italic digits don't exist: revert to sans-serif -->
4641
1
        <mi mathvariant='sans-serif-bold-italic'>AZaz09</mi> <mo>,</mo> <!--  italic digits don't exist: revert to just bold -->
4642
1
        <mi mathvariant='monospace'>aA09</mi>
4643
1
      </math>";
4644
1
        let target_str = "<math>
4645
1
        <mrow data-changed='added'>
4646
1
          <mi mathvariant='sans-serif'>𝖺𝖠𝟢𝟫=</mi>
4647
1
          <mo>,</mo>
4648
1
          <mi mathvariant='bold-sans-serif'>𝘇𝗭𝟬𝟵</mi>
4649
1
          <mo>,</mo>
4650
1
          <mi mathvariant='sans-serif-italic'>𝘢𝘻𝘈𝘡𝟢𝟫</mi>
4651
1
          <mo>,</mo>
4652
1
          <mi mathvariant='sans-serif-bold-italic'>𝘼𝙕𝙖𝙯𝟬𝟵</mi>
4653
1
          <mo>,</mo>
4654
1
          <mi mathvariant='monospace'>𝚊𝙰𝟶𝟿</mi>
4655
1
        </mrow>
4656
1
      </math>";
4657
1
    are_strs_canonically_equal_result(test_str, target_str, &[])
4658
1
  }
4659
  
4660
  #[test]
4661
1
    fn plane1_greek() -> Result<()> {
4662
1
        let test_str = "<math>
4663
1
        <mi mathvariant='normal'>ΑΩαω∇∂ϵ=</mi> <mo>,</mo>    <!-- shouldn't change -->
4664
1
        <mi mathvariant='italic'>ϴΑΩαω∇∂ϵ</mi> <mo>,</mo>
4665
1
        <mi mathvariant='bold'>ΑΩαωϝϜ</mi> <mo>,</mo> 
4666
1
        <mi mathvariant='double-struck'>Σβ∇</mi> <mo>,</mo>   <!-- shouldn't change -->
4667
1
        <mi mathvariant='fraktur'>ΞΦλϱ</mi> <mo>,</mo>      <!-- shouldn't change -->
4668
1
        <mi mathvariant='bold-fraktur'>ψΓ</mi> <mo>,</mo>   <!-- map to bold -->
4669
1
        <mi mathvariant='script'>μΨ</mi> <mo>,</mo>       <!-- shouldn't change -->
4670
1
        <mi mathvariant='bold-script'>Σπ</mi>         <!-- map to bold -->
4671
1
      </math>";
4672
1
        let target_str = "<math>
4673
1
        <mrow data-changed='added'>
4674
1
          <mi mathvariant='normal'>ΑΩαω∇∂ϵ=</mi>
4675
1
          <mo>,</mo>
4676
1
          <mi mathvariant='italic'>𝛳𝛢𝛺𝛼𝜔𝛻𝜕𝜖</mi>
4677
1
          <mo>,</mo>
4678
1
          <mi mathvariant='bold'>𝚨𝛀𝛂𝛚𝟋𝟊</mi>
4679
1
          <mo>,</mo>
4680
1
          <mi mathvariant='double-struck'>Σβ∇</mi>
4681
1
          <mo>,</mo>
4682
1
          <mi mathvariant='fraktur'>ΞΦλϱ</mi>
4683
1
          <mo>,</mo>
4684
1
          <mi mathvariant='bold-fraktur'>𝛙𝚪</mi>
4685
1
          <mo>,</mo>
4686
1
          <mi mathvariant='script'>μΨ</mi>
4687
1
          <mo>,</mo>
4688
1
          <mi mathvariant='bold-script'>𝚺𝛑</mi>
4689
1
        </mrow>
4690
1
      </math>";
4691
1
    are_strs_canonically_equal_result(test_str, target_str, &[])
4692
1
  }
4693
  
4694
  #[test]
4695
1
    fn plane1_greek_font_styles() -> Result<()> {
4696
1
        let test_str = "<math>
4697
1
        <mi mathvariant='sans-serif'>ΑΩαω∇∂ϵ=</mi> <mo>,</mo>      <!-- '=' shouldn't change -->
4698
1
        <mi mathvariant='bold-sans-serif'>ϴ0ΑΩαω∇∂ϵ</mi> <mo>,</mo> 
4699
1
        <mi mathvariant='sans-serif-italic'>aΑΩαω∇∂ϵ</mi> <mo>,</mo> <!-- italic digits don't exist: revert to sans-serif -->
4700
1
        <mi mathvariant='sans-serif-bold-italic'>ZΑΩαωϰϕϱϖ</mi> <mo>,</mo>  <!--  italic digits don't exist: revert to just bold -->
4701
1
        <mi mathvariant='monospace'>zΑΩαω∇∂</mi>
4702
1
      </math>";
4703
1
        let target_str = "<math>
4704
1
        <mrow data-changed='added'>
4705
1
          <mi mathvariant='sans-serif'>ΑΩαω∇∂ϵ=</mi>
4706
1
          <mo>,</mo>
4707
1
          <mi mathvariant='bold-sans-serif'>𝝧𝟬𝝖𝝮𝝰𝞈𝝯𝞉𝞊</mi>
4708
1
          <mo>,</mo>
4709
1
          <mi mathvariant='sans-serif-italic'>𝘢ΑΩαω∇∂ϵ</mi>
4710
1
          <mo>,</mo>
4711
1
          <mi mathvariant='sans-serif-bold-italic'>𝙕𝞐𝞨𝞪𝟂𝟆𝟇𝟈𝟉</mi>
4712
1
          <mo>,</mo>
4713
1
          <mi mathvariant='monospace'>𝚣ΑΩαω∇∂</mi>
4714
1
        </mrow>
4715
1
      </math>";
4716
1
    are_strs_canonically_equal_result(test_str, target_str, &[])
4717
1
  }
4718
4719
    #[test]
4720
1
    fn short_and_long_dash() -> Result<()> {
4721
1
        let test_str = "<math><mi>x</mi> <mo>=</mo> <mi>--</mi><mo>+</mo><mtext>----</mtext></math>";
4722
1
        let target_str = "<math>
4723
1
      <mrow data-changed='added'>
4724
1
      <mi>x</mi>
4725
1
      <mo>=</mo>
4726
1
      <mrow data-changed='added'>
4727
1
        <mi>—</mi>
4728
1
        <mo>+</mo>
4729
1
        <mtext>―</mtext>
4730
1
      </mrow>
4731
1
      </mrow>
4732
1
    </math>";
4733
1
    are_strs_canonically_equal_result(test_str, target_str, &[])
4734
1
    }
4735
4736
    #[test]
4737
1
    fn illegal_mathml_element() {
4738
    use crate::interface::*;
4739
1
        let test_str = "<math><foo><mi>f</mi></foo></math>";
4740
1
        let package1 = &parser::parse(test_str).expect("Failed to parse test input");
4741
1
    let mathml = get_element(package1);
4742
1
    trim_element(mathml, false);
4743
1
    assert!(canonicalize(mathml).is_err());
4744
1
    }
4745
4746
    #[test]
4747
1
    fn illegal_mtd_element() {
4748
    use crate::interface::*;
4749
1
        let test_str = "<math>
4750
1
      <mtable>
4751
1
        <mtr>
4752
1
          <mtd>
4753
1
          <mtext></mtext>
4754
1
          </mtd>
4755
1
          <mrow>
4756
1
          <mi>E</mi>
4757
1
          <mo>=</mo>
4758
1
          <mrow>
4759
1
          <mtd>
4760
1
            <mi>m</mi>
4761
1
            <mo>⁢<!--INVISIBLE TIMES--></mo>
4762
1
            <msup>
4763
1
            <mi>c</mi>
4764
1
            <mn>2</mn>
4765
1
            </msup>
4766
1
            </mtd></mrow>
4767
1
          </mrow>
4768
1
          
4769
1
        </mtr>
4770
1
      </mtable>
4771
1
    </math>";
4772
1
        let package1 = &parser::parse(test_str).expect("Failed to parse test input");
4773
1
    let mathml = get_element(package1);
4774
1
    trim_element(mathml, false);
4775
1
    assert!(canonicalize(mathml).is_err());
4776
1
    }
4777
4778
4779
    #[test]
4780
1
    fn a_to_mrow() -> Result<()> {
4781
1
        let test_str = "<math>
4782
1
      <a href='https://www.example.com'>
4783
1
        <mo>(</mo>
4784
1
        <a href='#its_relative'>
4785
1
          <mi>x</mi>
4786
1
          <mo>,</mo>
4787
1
          <mi>y</mi>
4788
1
        </a>
4789
1
        <mo>)</mo>
4790
1
      </a>
4791
1
      </math>
4792
1
";
4793
1
        let target_str = " <math>
4794
1
      <mrow href='https://www.example.com'>
4795
1
        <mo>(</mo>
4796
1
        <mrow href='#its_relative'>
4797
1
        <mi>x</mi>
4798
1
        <mo>,</mo>
4799
1
        <mi>y</mi>
4800
1
        </mrow>
4801
1
        <mo>)</mo>
4802
1
      </mrow>
4803
1
    </math>";
4804
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
4805
1
    }
4806
4807
    #[test]
4808
1
    fn mfenced_no_children() -> Result<()> {
4809
1
        let test_str = "<math><mi>f</mi><mfenced><mrow/></mfenced></math>";
4810
1
        let target_str = "<math>
4811
1
      <mrow data-changed='added'>
4812
1
        <mi>f</mi>
4813
1
        <mo data-changed='added'>&#x2061;</mo>
4814
1
        <mrow>
4815
1
          <mo data-changed='from_mfenced'>(</mo>
4816
1
          <mo data-changed='from_mfenced'>)</mo>
4817
1
        </mrow>
4818
1
      </mrow>
4819
1
    </math>";
4820
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
4821
1
    }
4822
4823
    #[test]
4824
1
    fn mfenced_one_child() -> Result<()> {
4825
1
        let test_str = "<math><mi>f</mi><mfenced open='[' close=']'><mi>x</mi></mfenced></math>";
4826
1
        let target_str = " <math>
4827
1
      <mrow data-changed='added'>
4828
1
      <mi>f</mi>
4829
1
      <mo data-changed='added'>&#x2061;</mo>
4830
1
      <mrow>
4831
1
        <mo data-changed='from_mfenced'>[</mo>
4832
1
        <mi>x</mi>
4833
1
        <mo data-changed='from_mfenced'>]</mo>
4834
1
      </mrow>
4835
1
      </mrow>
4836
1
    </math>";
4837
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
4838
1
    }
4839
4840
    #[test]
4841
1
    fn mfenced_no_attrs() -> Result<()> {
4842
1
        let test_str = "<math><mi>f</mi><mfenced><mrow><mi>x</mi><mo>,</mo><mi>y</mi><mo>,</mo><mi>z</mi></mrow></mfenced></math>";
4843
1
        let target_str = " <math>
4844
1
      <mrow data-changed='added'>
4845
1
      <mi>f</mi>
4846
1
      <mo data-changed='added'>&#x2061;</mo>
4847
1
      <mrow>
4848
1
        <mo data-changed='from_mfenced'>(</mo>
4849
1
        <mrow>
4850
1
        <mi>x</mi>
4851
1
        <mo>,</mo>
4852
1
        <mi>y</mi>
4853
1
        <mo>,</mo>
4854
1
        <mi>z</mi>
4855
1
        </mrow>
4856
1
        <mo data-changed='from_mfenced'>)</mo>
4857
1
      </mrow>
4858
1
      </mrow>
4859
1
    </math>";
4860
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
4861
1
    }
4862
4863
    #[test]
4864
1
    fn mfenced_with_separators() -> Result<()> {
4865
1
        let test_str = "<math><mi>f</mi><mfenced separators=',;'><mi>x</mi><mi>y</mi><mi>z</mi><mi>a</mi></mfenced></math>";
4866
1
        let target_str = "<math>
4867
1
      <mrow data-changed='added'>
4868
1
      <mi>f</mi>
4869
1
      <mo data-changed='added'>&#x2061;</mo>
4870
1
      <mrow>
4871
1
        <mo data-changed='from_mfenced'>(</mo>
4872
1
        <mrow data-changed='added'>
4873
1
        <mrow data-changed='added'>
4874
1
          <mi>x</mi>
4875
1
          <mo data-changed='from_mfenced'>,</mo>
4876
1
          <mi>y</mi>
4877
1
        </mrow>
4878
1
        <mo data-changed='from_mfenced'>;</mo>
4879
1
        <mrow data-changed='added'>
4880
1
          <mi>z</mi>
4881
1
          <mo data-changed='from_mfenced'>,</mo>
4882
1
          <mi>a</mi>
4883
1
        </mrow>
4884
1
        </mrow>
4885
1
        <mo data-changed='from_mfenced'>)</mo>
4886
1
      </mrow>
4887
1
      </mrow>
4888
1
    </math>";
4889
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
4890
1
    }
4891
4892
    #[test]
4893
1
    fn canonical_one_element_mrow_around_mrow() -> Result<()> {
4894
1
        let test_str = "<math><mrow><mrow><mo>-</mo><mi>a</mi></mrow></mrow></math>";
4895
1
        let target_str = "<math><mrow><mo>-</mo><mi>a</mi></mrow></math>";
4896
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
4897
1
    }
4898
4899
    #[test]
4900
1
    fn canonical_mtext_in_mtd_477() -> Result<()> {
4901
    // make sure mtext doesn't go away
4902
1
        let test_str = r#"<math>
4903
1
      <mtable>
4904
1
        <mtr>
4905
1
          <mtd>
4906
1
            <mstyle scriptlevel="0">
4907
1
              <mspace width="2em"/>
4908
1
            </mstyle>
4909
1
            <mstyle scriptlevel="0">
4910
1
              <mspace width="1em"/>
4911
1
            </mstyle>
4912
1
          </mtd>
4913
1
        </mtr>
4914
1
      </mtable>
4915
1
    </math>"#;
4916
1
        let target_str = r#"   <math>
4917
1
      <mtable>
4918
1
        <mtr>
4919
1
        <mtd>
4920
1
          <mtext data-width='1' data-following-space-width='4' scriptlevel='0' data-changed='added'> </mtext>
4921
1
        </mtd>
4922
1
        </mtr>
4923
1
      </mtable>
4924
1
    </math>"#;
4925
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
4926
1
    }
4927
4928
    #[test]
4929
1
    fn canonical_mtext_in_mtr() -> Result<()> {
4930
    // make sure mtext doesn't go away
4931
1
        let test_str = "<math> <mtable> <mtr> <mtext> </mtext> </mtr> <mtr> <mtext> </mtext> </mtr> </mtable> </math>";
4932
1
        let target_str = "   <math>
4933
1
      <mtable>
4934
1
        <mtr>
4935
1
          <mtext data-changed='empty_content' data-width='0' data-empty-in-2D='true'> </mtext>
4936
1
        </mtr>
4937
1
        <mtr>
4938
1
          <mtext data-changed='empty_content' data-width='0' data-empty-in-2D='true'> </mtext>
4939
1
        </mtr>
4940
1
      </mtable>
4941
1
    </math>";
4942
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
4943
1
    }
4944
4945
    #[test]
4946
1
    fn canonical_mtext_in_mtable() -> Result<()> {
4947
    // make sure mtext doesn't go away
4948
1
        let test_str = r"<math> <mtable> <mtr> <mtd> <mi>L</mi> </mtd> <mtd> <mrow> <mi>&lt;mi/&gt;</mi> <mo>=</mo> 
4949
1
            <mrow> <mo>[</mo> <mtable> <mtext> </mtext> </mtable> <mo>]</mo> </mrow> </mrow> </mtd> </mtr> </mtable> </math>";
4950
1
        let target_str = r"<math>
4951
1
      <mtable>
4952
1
      <mtr>
4953
1
        <mtd>
4954
1
        <mi>L</mi>
4955
1
        </mtd>
4956
1
        <mtd>
4957
1
        <mrow>
4958
1
          <mi>&lt;mi/&gt;</mi>
4959
1
          <mo>=</mo>
4960
1
          <mrow>
4961
1
          <mo>[</mo>
4962
1
          <mtable>
4963
1
            <mtext data-changed='empty_content' data-width='0' data-empty-in-2D='true'> </mtext>
4964
1
          </mtable>
4965
1
          <mo>]</mo>
4966
1
          </mrow>
4967
1
        </mrow>
4968
1
        </mtd>
4969
1
      </mtr>
4970
1
      </mtable>
4971
1
    </math>";
4972
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
4973
1
    }
4974
4975
    #[test]
4976
1
    fn mrow_with_intent_and_single_child() -> Result<()> {
4977
    use crate::interface::*;
4978
    use sxd_document::parser;
4979
    use crate::canonicalize::canonicalize;
4980
    // this forces initialization
4981
1
    crate::interface::set_rules_dir(abs_rules_dir_path()).unwrap();
4982
1
    crate::speech::SPEECH_RULES.with(|_| true);
4983
4984
    // we don't want to remove the mrow because the intent on the mi would reference itself
4985
1
        let test = "<math><mrow intent='log($x)'><mi arg='x'>X</mi></mrow></math>"; 
4986
4987
1
    let package1 = &parser::parse(test).expect("Failed to parse test input");
4988
1
    let mathml = get_element(package1);
4989
1
    trim_element(mathml, false);
4990
1
    let mathml_test = canonicalize(mathml).unwrap();
4991
1
    let first_child = as_element( mathml_test.children()[0] );
4992
1
    assert_eq!(name(first_child), "mrow");
4993
1
    assert_eq!(first_child.children().len(), 1);
4994
1
    let mi = as_element(first_child.children()[0]);
4995
1
    assert_eq!(name(mi), "mi");
4996
1
    Ok(())
4997
1
    }
4998
4999
    #[test]
5000
1
    fn empty_mrow_with_intent() -> Result<()> {
5001
    // we don't want to remove the mrow because the intent on the mi would reference itself
5002
    use crate::interface::*;
5003
    use sxd_document::parser;
5004
    use crate::canonicalize::canonicalize;
5005
    // this forces initialization
5006
1
    crate::interface::set_rules_dir(abs_rules_dir_path()).unwrap();
5007
1
    crate::speech::SPEECH_RULES.with(|_| true);
5008
5009
    // we don't want to remove the mrow because the intent needs to stick around
5010
1
        let test = "<math><mrow intent='log(x)'/></math>";
5011
5012
1
    let package1 = &parser::parse(test).expect("Failed to parse test input");
5013
1
    let mathml = get_element(package1);
5014
1
    trim_element(mathml, false);
5015
1
    let mathml_test = canonicalize(mathml).unwrap();
5016
1
    let first_child = as_element( mathml_test.children()[0] );
5017
1
    assert_eq!(name(first_child), "mrow");
5018
1
    assert_eq!(first_child.children().len(), 1);
5019
1
    let mtext = as_element(first_child.children()[0]);
5020
1
    assert_eq!(name(mtext), "mtext");
5021
1
    Ok(())
5022
1
    }
5023
5024
    #[test]
5025
1
    fn mn_with_negative_sign() -> Result<()> {
5026
1
        let test_str = "<math><mfrac>
5027
1
        <mrow><mn>-1</mn></mrow>
5028
1
        <mn>−987</mn>
5029
1
        </mfrac></math>";
5030
1
        let target_str = "<math><mfrac>
5031
1
      <mrow data-changed='added'><mo>-</mo><mn>1</mn></mrow>
5032
1
      <mrow data-changed='added'><mo>-</mo><mn>987</mn></mrow>
5033
1
      </mfrac></math>";
5034
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5035
1
    }
5036
5037
    #[test]
5038
1
    fn mn_with_degree_sign() -> Result<()> {
5039
1
        let test_str = "<math> <mrow> <mi>cos</mi> <mo>⁡</mo> <mrow> <mo>(</mo> <mn>150°</mn> <mo>)</mo> </mrow> </mrow> </math>";
5040
1
        let target_str = "<math>
5041
1
      <mrow>
5042
1
        <mi>cos</mi> <mo>&#x2061;</mo>
5043
1
        <mrow>
5044
1
          <mo>(</mo>
5045
1
          <msup data-changed='added'> <mn>150</mn> <mo>°</mo> </msup>
5046
1
          <mo>)</mo>
5047
1
        </mrow>
5048
1
      </mrow>
5049
1
    </math>";
5050
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5051
1
    }
5052
5053
    #[test]
5054
1
    fn canonical_one_element_mrow_around_mo() -> Result<()> {
5055
1
        let test_str = "<math><mrow><mrow><mo>-</mo></mrow><mi>a</mi></mrow></math>";
5056
1
        let target_str = "<math><mrow><mo>-</mo><mi>a</mi></mrow></math>";
5057
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5058
1
    }
5059
5060
    #[test]
5061
1
    fn canonical_flat_to_times_and_plus() -> Result<()> {
5062
1
        let test_str = "<math><mi>c</mi><mo>+</mo><mi>x</mi><mi>y</mi></math>";
5063
1
        let target_str = "<math>
5064
1
    <mrow data-changed='added'><mi>c</mi><mo>+</mo>
5065
1
      <mrow data-changed='added'><mi>x</mi><mo data-changed='added'>&#x2062;</mo><mi>y</mi></mrow>
5066
1
    </mrow></math>";
5067
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5068
1
    }
5069
5070
    #[test]
5071
1
    fn canonical_prefix_and_infix() -> Result<()> {
5072
1
        let test_str = "<math><mrow><mo>-</mo><mi>a</mi><mo>-</mo><mi>b</mi></mrow></math>";
5073
1
        let target_str = "<math>
5074
1
    <mrow>
5075
1
      <mrow data-changed='added'>
5076
1
      <mo>-</mo>
5077
1
      <mi>a</mi>
5078
1
      </mrow>
5079
1
      <mo>-</mo>
5080
1
      <mi>b</mi>
5081
1
    </mrow>
5082
1
     </math>";
5083
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5084
1
    }
5085
5086
5087
    #[test]
5088
1
    fn canonical_prefix_implied_times_prefix() -> Result<()> {
5089
1
        let test_str = "<math><mrow><mo>∂</mo><mi>x</mi><mo>∂</mo><mi>y</mi></mrow></math>";
5090
1
        let target_str = "<math>
5091
1
      <mrow>
5092
1
      <mrow data-changed='added'><mo>∂</mo><mi>x</mi></mrow>
5093
1
      <mo data-changed='added'>&#x2062;</mo>
5094
1
      <mrow data-changed='added'><mo>∂</mo><mi>y</mi></mrow>
5095
1
      </mrow>
5096
1
    </math>";
5097
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5098
1
    }
5099
5100
    #[test]
5101
1
    fn function_with_single_arg() -> Result<()> {
5102
1
        let test_str = "<math><mrow>
5103
1
      <mi>sin</mi><mo>(</mo><mi>x</mi><mo>)</mo>
5104
1
      <mo>+</mo>
5105
1
      <mi>f</mi><mo>(</mo><mi>x</mi><mo>)</mo>
5106
1
      <mo>+</mo>
5107
1
      <mi>t</mi><mrow><mo>(</mo><mi>x</mi><mo>)</mo></mrow>
5108
1
    </mrow></math>";
5109
1
        let target_str = "<math>
5110
1
    <mrow>
5111
1
      <mrow data-changed='added'>
5112
1
      <mi>sin</mi>
5113
1
      <mo data-changed='added'>&#x2061;</mo>
5114
1
      <mrow data-changed='added'>
5115
1
        <mo>(</mo>
5116
1
        <mi>x</mi>
5117
1
        <mo>)</mo>
5118
1
      </mrow>
5119
1
      </mrow>
5120
1
      <mo>+</mo>
5121
1
      <mrow data-changed='added'>
5122
1
      <mi>f</mi>
5123
1
      <mo data-changed='added'>&#x2061;</mo>
5124
1
      <mrow data-changed='added'>
5125
1
        <mo>(</mo>
5126
1
        <mi>x</mi>
5127
1
        <mo>)</mo>
5128
1
      </mrow>
5129
1
      </mrow>
5130
1
      <mo>+</mo>
5131
1
      <mrow data-changed='added'>
5132
1
      <mi>t</mi>
5133
1
      <mo data-changed='added'>&#x2061;</mo>
5134
1
      <mrow>
5135
1
        <mo>(</mo>
5136
1
        <mi>x</mi>
5137
1
        <mo>)</mo>
5138
1
      </mrow>
5139
1
      </mrow>
5140
1
    </mrow>
5141
1
     </math>";
5142
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5143
1
    }
5144
5145
  #[test]
5146
1
  fn maybe_function() -> Result<()> {
5147
1
    let test_str = "<math>
5148
1
        <mrow>
5149
1
          <mi>P</mi>
5150
1
          <mo>(</mo>
5151
1
          <mi>A</mi>
5152
1
          <mo>∩</mo>
5153
1
          <mi>B</mi>
5154
1
          <mo>)</mo>
5155
1
        </mrow>
5156
1
      </math>";
5157
1
    let target_str = "<math>
5158
1
        <mrow>
5159
1
        <mi>P</mi>
5160
1
        <mo data-function-guess='true' data-changed='added'>&#x2062;</mo>
5161
1
        <mrow data-changed='added'>
5162
1
          <mo>(</mo>
5163
1
          <mrow data-changed='added'>
5164
1
          <mi>A</mi>
5165
1
          <mo>∩</mo>
5166
1
          <mi>B</mi>
5167
1
          </mrow>
5168
1
          <mo>)</mo>
5169
1
        </mrow>
5170
1
        </mrow>
5171
1
      </math>";
5172
1
    are_strs_canonically_equal_result(test_str, target_str, &[])
5173
1
  }
5174
5175
    #[test]
5176
1
    fn function_with_multiple_args() -> Result<()> {
5177
1
        let test_str = "<math>
5178
1
    <mi>sin</mi><mo>(</mo><mi>x</mi><mo>+</mo><mi>y</mi><mo>)</mo>
5179
1
      <mo>+</mo>
5180
1
     <mi>f</mi><mo>(</mo><mi>x</mi><mo>+</mo><mi>y</mi><mo>)</mo>
5181
1
      <mo>+</mo>
5182
1
     <mi>t</mi><mo>(</mo><mi>x</mi><mo>+</mo><mi>y</mi><mo>)</mo>
5183
1
      <mo>+</mo>
5184
1
     <mi>w</mi><mo>(</mo><mi>x</mi><mo>,</mo><mi>y</mi><mo>)</mo>
5185
1
    </math>";
5186
1
        let target_str = " <math>
5187
1
    <mrow data-changed='added'>
5188
1
    <mrow data-changed='added'>
5189
1
      <mi>sin</mi>
5190
1
      <mo data-changed='added'>&#x2061;</mo>
5191
1
      <mrow data-changed='added'>
5192
1
      <mo>(</mo>
5193
1
      <mrow data-changed='added'>
5194
1
        <mi>x</mi>
5195
1
        <mo>+</mo>
5196
1
        <mi>y</mi>
5197
1
      </mrow>
5198
1
      <mo>)</mo>
5199
1
      </mrow>
5200
1
    </mrow>
5201
1
    <mo>+</mo>
5202
1
    <mrow data-changed='added'>
5203
1
      <mi>f</mi>
5204
1
      <mo data-changed='added'>&#x2061;</mo>
5205
1
      <mrow data-changed='added'>
5206
1
      <mo>(</mo>
5207
1
      <mrow data-changed='added'>
5208
1
        <mi>x</mi>
5209
1
        <mo>+</mo>
5210
1
        <mi>y</mi>
5211
1
      </mrow>
5212
1
      <mo>)</mo>
5213
1
      </mrow>
5214
1
    </mrow>
5215
1
    <mo>+</mo>
5216
1
    <mrow data-changed='added'>
5217
1
      <mi>t</mi>
5218
1
      <mo data-changed='added' data-function-guess='true'>&#x2062;</mo>
5219
1
      <mrow data-changed='added'>
5220
1
      <mo>(</mo>
5221
1
      <mrow data-changed='added'>
5222
1
        <mi>x</mi>
5223
1
        <mo>+</mo>
5224
1
        <mi>y</mi>
5225
1
      </mrow>
5226
1
      <mo>)</mo>
5227
1
      </mrow>
5228
1
    </mrow>
5229
1
    <mo>+</mo>
5230
1
    <mrow data-changed='added'>
5231
1
      <mi>w</mi>
5232
1
      <mo data-changed='added'>&#x2061;</mo>
5233
1
      <mrow data-changed='added'>
5234
1
      <mo>(</mo>
5235
1
      <mrow data-changed='added'>
5236
1
        <mi>x</mi>
5237
1
        <mo>,</mo>
5238
1
        <mi>y</mi>
5239
1
      </mrow>
5240
1
      <mo>)</mo>
5241
1
      </mrow>
5242
1
    </mrow>
5243
1
    </mrow>
5244
1
      </math>";
5245
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5246
1
    }
5247
5248
    #[test]
5249
1
    fn function_with_no_args() -> Result<()> {
5250
1
        let test_str = "<math><mrow>
5251
1
    <mi>sin</mi><mi>x</mi>
5252
1
      <mo>+</mo>
5253
1
     <mi>f</mi><mi>x</mi>
5254
1
      <mo>+</mo>
5255
1
     <mi>t</mi><mi>x</mi>
5256
1
    </mrow></math>";
5257
1
        let target_str = " <math>
5258
1
    <mrow>
5259
1
      <mrow data-changed='added'>
5260
1
      <mi>sin</mi>
5261
1
      <mo data-changed='added'>&#x2061;</mo>
5262
1
      <mi>x</mi>
5263
1
      </mrow>
5264
1
      <mo>+</mo>
5265
1
      <mrow data-changed='added'>
5266
1
      <mi>f</mi>
5267
1
      <mo data-changed='added'>&#x2062;</mo>
5268
1
      <mi>x</mi>
5269
1
      </mrow>
5270
1
      <mo>+</mo>
5271
1
      <mrow data-changed='added'>
5272
1
      <mi>t</mi>
5273
1
      <mo data-changed='added'>&#x2062;</mo>
5274
1
      <mi>x</mi>
5275
1
      </mrow>
5276
1
    </mrow>
5277
1
     </math>";
5278
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5279
5280
1
  }
5281
5282
5283
    #[test]
5284
1
    fn function_call_vs_implied_times() -> Result<()> {
5285
1
        let test_str = "<math><mi>f</mi><mo>(</mo><mi>x</mi><mo>)</mo><mi>y</mi></math>";
5286
1
        let target_str = "<math>
5287
1
      <mrow data-changed='added'>
5288
1
        <mrow data-changed='added'>
5289
1
          <mi>f</mi>
5290
1
          <mo data-changed='added'>&#x2061;</mo>
5291
1
          <mrow data-changed='added'> <mo>(</mo> <mi>x</mi> <mo>)</mo> </mrow>
5292
1
        </mrow>
5293
1
      <mo data-changed='added'>&#x2062;</mo>
5294
1
      <mi>y</mi>    </mrow>
5295
1
     </math>";
5296
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5297
1
    }
5298
5299
    #[test]
5300
1
    fn implied_plus() -> Result<()> {
5301
1
        let test_str = "<math><mrow>
5302
1
    <mn>2</mn><mfrac><mn>3</mn><mn>4</mn></mfrac>
5303
1
    </mrow></math>";
5304
1
        let target_str = "<math>
5305
1
      <mrow>
5306
1
        <mn>2</mn>
5307
1
        <mo data-changed='added'>&#x2064;</mo>
5308
1
        <mfrac>
5309
1
          <mn>3</mn>
5310
1
          <mn>4</mn>
5311
1
        </mfrac>
5312
1
      </mrow>
5313
1
    </math>";
5314
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5315
1
    }
5316
5317
    #[test]
5318
1
    fn implied_plus_linear() -> Result<()> {
5319
1
        let test_str = "<math><mrow>
5320
1
      <mn>2</mn><mspace width='0.278em'></mspace><mn>3</mn><mo>/</mo><mn>4</mn>
5321
1
      </mrow></math>";
5322
1
        let target_str = "<math>
5323
1
      <mrow>
5324
1
        <mn>2</mn>
5325
1
        <mo data-changed='added'>&#x2064;</mo>
5326
1
        <mrow data-changed='added'>>
5327
1
          <mn data-previous-space-width='0.278'>3</mn>
5328
1
          <mo>/</mo>
5329
1
          <mn>4</mn>
5330
1
        </mrow>
5331
1
      </mrow>
5332
1
    </math>";
5333
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5334
1
    }
5335
5336
    #[test]
5337
1
    fn implied_plus_linear2() -> Result<()> {
5338
1
        let test_str = "<math><mrow>
5339
1
      <mn>2</mn><mrow><mn>3</mn><mo>/</mo><mn>4</mn></mrow>
5340
1
      </mrow></math>";
5341
1
        let target_str = "<math>
5342
1
      <mrow>
5343
1
        <mn>2</mn>
5344
1
        <mo data-changed='added'>&#x2064;</mo>
5345
1
        <mrow>
5346
1
          <mn>3</mn>
5347
1
          <mo>/</mo>
5348
1
          <mn>4</mn>
5349
1
        </mrow>
5350
1
      </mrow>
5351
1
    </math>";
5352
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5353
1
    }
5354
5355
    #[test]
5356
1
    fn implied_comma() -> Result<()> {
5357
1
        let test_str = "<math><msub><mi>b</mi><mrow><mn>1</mn><mn>2</mn></mrow></msub></math>";
5358
1
        let target_str = "<math>
5359
1
       <msub><mi>b</mi><mrow><mn>1</mn><mo data-changed='added'>&#x2063;</mo><mn>2</mn></mrow></msub>
5360
1
    </math>";
5361
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5362
1
    }
5363
5364
    #[test]
5365
1
    fn no_implied_comma() -> Result<()> {
5366
1
        let test_str = "<math><mfrac><mi>b</mi><mrow><mn>1</mn><mn>2</mn></mrow></mfrac></math>";
5367
1
        let target_str = "<math>
5368
1
       <mfrac><mi>b</mi><mrow><mn>1</mn><mo data-changed='added'>&#x2062;</mo><mn>2</mn></mrow></mfrac>
5369
1
    </math>";
5370
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5371
1
    }
5372
5373
    #[test]
5374
1
    fn vertical_bars() -> Result<()> {
5375
1
        let test_str = "<math>
5376
1
    <mo>|</mo> <mi>x</mi> <mo>|</mo><mo>+</mo><mo>|</mo>
5377
1
     <mi>a</mi><mo>+</mo><mn>1</mn> <mo>|</mo>
5378
1
    </math>";
5379
1
    let target_str = " <math>
5380
1
    <mrow data-changed='added'>
5381
1
    <mrow data-changed='added'>
5382
1
      <mo>|</mo>
5383
1
      <mi>x</mi>
5384
1
      <mo>|</mo>
5385
1
    </mrow>
5386
1
    <mo>+</mo>
5387
1
    <mrow data-changed='added'>
5388
1
      <mo>|</mo>
5389
1
      <mrow data-changed='added'>
5390
1
      <mi>a</mi>
5391
1
      <mo>+</mo>
5392
1
      <mn>1</mn>
5393
1
      </mrow>
5394
1
      <mo>|</mo>
5395
1
    </mrow>
5396
1
    </mrow>
5397
1
   </math>";
5398
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5399
1
    }
5400
5401
5402
    #[test]
5403
1
    fn vertical_bars_nested() -> Result<()> {
5404
1
        let test_str = "<math><mo>|</mo><mi>x</mi><mo>|</mo><mi>y</mi><mo>|</mo><mi>z</mi><mo>|</mo></math>";
5405
1
    let target_str = "<math>
5406
1
    <mrow data-changed='added'>
5407
1
    <mrow data-changed='added'>
5408
1
      <mo>|</mo>
5409
1
      <mi>x</mi>
5410
1
      <mo>|</mo>
5411
1
    </mrow>
5412
1
    <mo data-changed='added'>&#x2062;</mo>
5413
1
    <mi>y</mi>
5414
1
    <mo data-changed='added'>&#x2062;</mo>
5415
1
    <mrow data-changed='added'>
5416
1
      <mo>|</mo>
5417
1
      <mi>z</mi>
5418
1
      <mo>|</mo>
5419
1
    </mrow>
5420
1
    </mrow>
5421
1
   </math>";
5422
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5423
1
    }
5424
5425
    #[test]
5426
1
    fn double_vertical_bars() -> Result<()> {
5427
1
      let test_str = "<math><mrow><mo>||</mo><mi>x</mi><mo>||</mo><mo>||</mo><mi>y</mi><mo>||</mo></mrow></math>";
5428
1
    let target_str = "<math>
5429
1
      <mrow>
5430
1
        <mrow data-changed='added'><mo>‖</mo><mi>x</mi><mo>‖</mo></mrow>
5431
1
        <mo data-changed='added'>&#x2062;</mo>
5432
1
        <mrow data-changed='added'><mo>‖</mo><mi>y</mi><mo>‖</mo></mrow>
5433
1
      </mrow>
5434
1
    </math>";
5435
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5436
1
    }
5437
5438
    #[test]
5439
1
    fn double_vertical_bars_mo() -> Result<()> {
5440
1
      let test_str = "<math><mo>|</mo><mo>|</mo><mi>a</mi><mo>|</mo><mo>|</mo></math>";
5441
1
    let target_str = "<math><mrow data-changed='added'><mo>‖</mo><mi>a</mi><mo>‖</mo></mrow></math>";
5442
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5443
1
    }
5444
5445
    #[test]
5446
1
    fn no_double_vertical_bars_mo() -> Result<()> {
5447
1
      let test_str = "<math><mo>|</mo><mi>x</mi><mo>|</mo><mo>|</mo><mi>y</mi><mo>|</mo></math>";
5448
1
        let target_str = "<math>  <mrow data-changed='added'>
5449
1
        <mrow data-changed='added'><mo>|</mo><mi>x</mi><mo>|</mo></mrow>
5450
1
        <mo data-changed='added'>&#x2062;</mo>
5451
1
        <mrow data-changed='added'><mo>|</mo><mi>y</mi><mo>|</mo></mrow>
5452
1
      </mrow> </math>";
5453
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5454
1
    }
5455
5456
    #[test]
5457
1
    fn vertical_bar_such_that() -> Result<()> {
5458
1
        let test_str = "<math>
5459
1
        <mo>{</mo><mi>x</mi><mo>|</mo><mi>x</mi><mo>&#x2208;</mo><mi>S</mi><mo>}</mo>
5460
1
            </math>";
5461
1
        let target_str = "<math>
5462
1
    <mrow data-changed='added'>
5463
1
      <mo>{</mo>
5464
1
      <mrow data-changed='added'>
5465
1
      <mi>x</mi>
5466
1
      <mo>|</mo>
5467
1
      <mrow data-changed='added'>
5468
1
        <mi>x</mi>
5469
1
        <mo>∈</mo>
5470
1
        <mi>S</mi>
5471
1
      </mrow>
5472
1
      </mrow>
5473
1
      <mo>}</mo>
5474
1
    </mrow>
5475
1
     </math>";
5476
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5477
1
    }
5478
5479
    #[test]
5480
  #[ignore]  // need to figure out a test for this ("|" should have a precedence around ":" since that is an alternative notation for "such that", but "∣" is higher precedence)
5481
0
    fn vertical_bar_divides() -> Result<()> {
5482
0
        let test_str = "<math>
5483
0
    <mi>x</mi><mo>+</mo><mi>y</mi> <mo>|</mo><mn>12</mn>
5484
0
            </math>";
5485
0
        let target_str = "<math>
5486
0
        <mrow data-changed='added'>
5487
0
        <mrow data-changed='added'>
5488
0
          <mi>x</mi>
5489
0
          <mo>+</mo>
5490
0
          <mi>y</mi>
5491
0
        </mrow>
5492
0
        <mo>∣ <!--divides--></mo>
5493
0
        <mn>12</mn>
5494
0
        </mrow>
5495
0
      </math>";
5496
0
        are_strs_canonically_equal_result(test_str, target_str, &[])
5497
0
    }
5498
5499
5500
    #[test]
5501
1
    fn trig_mo() -> Result<()> {
5502
1
        let test_str = "<math><mo>sin</mo><mi>x</mi>
5503
1
        <mo>+</mo><mo>cos</mo><mi>y</mi>
5504
1
        <mo>+</mo><munder><mo>lim</mo><mi>D</mi></munder><mi>y</mi>
5505
1
      </math>";
5506
1
        let target_str = "<math>
5507
1
    <mrow data-changed='added'>
5508
1
      <mrow data-changed='added'>
5509
1
      <mi>sin</mi>
5510
1
      <mo data-changed='added'>&#x2061;</mo>
5511
1
      <mi>x</mi>
5512
1
      </mrow>
5513
1
      <mo>+</mo>
5514
1
      <mrow data-changed='added'>
5515
1
      <mi>cos</mi>
5516
1
      <mo data-changed='added'>&#x2061;</mo>
5517
1
      <mi>y</mi>
5518
1
      </mrow>
5519
1
      <mo>+</mo>
5520
1
      <mrow data-changed='added'>
5521
1
      <munder>
5522
1
        <mi>lim</mi>
5523
1
        <mi>D</mi>
5524
1
      </munder>
5525
1
      <mo data-changed='added'>&#x2061;</mo>
5526
1
      <mi>y</mi>
5527
1
      </mrow>
5528
1
    </mrow>
5529
1
     </math>";
5530
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5531
1
    }
5532
5533
    #[test]
5534
1
    fn trig_mtext() -> Result<()> {
5535
1
        let test_str = "<math><mtext>sin</mtext><mi>x</mi>
5536
1
        <mo>+</mo><mtext>cos</mtext><mi>y</mi>
5537
1
        <mo>+</mo><munder><mtext>lim</mtext><mi>D</mi></munder><mi>y</mi>
5538
1
      </math>";
5539
1
        let target_str = "<math>
5540
1
    <mrow data-changed='added'>
5541
1
      <mrow data-changed='added'>
5542
1
      <mi>sin</mi>
5543
1
      <mo data-changed='added'>&#x2061;</mo>
5544
1
      <mi>x</mi>
5545
1
      </mrow>
5546
1
      <mo>+</mo>
5547
1
      <mrow data-changed='added'>
5548
1
      <mi>cos</mi>
5549
1
      <mo data-changed='added'>&#x2061;</mo>
5550
1
      <mi>y</mi>
5551
1
      </mrow>
5552
1
      <mo>+</mo>
5553
1
      <mrow data-changed='added'>
5554
1
      <munder>
5555
1
        <mi>lim</mi>
5556
1
        <mi>D</mi>
5557
1
      </munder>
5558
1
      <mo data-changed='added'>&#x2061;</mo>
5559
1
      <mi>y</mi>
5560
1
      </mrow>
5561
1
    </mrow>
5562
1
     </math>";
5563
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5564
1
    }
5565
  
5566
    #[test]
5567
1
    fn trig_negative_args() -> Result<()> {
5568
1
        let test_str = "<math><mi>sin</mi><mo>-</mo><mn>2</mn><mi>π</mi><mi>x</mi></math>";
5569
1
        let target_str = "<math>
5570
1
    <mrow data-changed='added'>
5571
1
      <mi>sin</mi>
5572
1
      <mo data-changed='added'>&#x2061;</mo>
5573
1
      <mrow data-changed='added'>
5574
1
      <mrow data-changed='added'>
5575
1
        <mo>-</mo>
5576
1
        <mn>2</mn>
5577
1
      </mrow>
5578
1
      <mo data-changed='added'>&#x2062;</mo>
5579
1
      <mi>π</mi>
5580
1
      <mo data-changed='added'>&#x2062;</mo>
5581
1
      <mi>x</mi>
5582
1
      </mrow>
5583
1
    </mrow>
5584
1
     </math>";
5585
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5586
1
    }
5587
  
5588
    #[test]
5589
1
    fn not_trig_negative_args() -> Result<()> {
5590
    // this is here to make sure that only trig functions get the special treatment
5591
1
        let test_str = "<math><mi>ker</mi><mo>-</mo><mn>2</mn><mi>π</mi><mi>x</mi></math>";
5592
1
        let target_str = "<math>
5593
1
      <mrow data-changed='added'>
5594
1
          <mrow data-changed='added'>
5595
1
          <mi>ker</mi>
5596
1
          <mo data-changed='added'>&#x2061;</mo>
5597
1
          <mrow data-changed='added'>
5598
1
            <mo>-</mo>
5599
1
            <mn>2</mn>
5600
1
          </mrow>
5601
1
          </mrow>
5602
1
        <mo data-changed='added'>&#x2062;</mo>
5603
1
        <mi>π</mi>
5604
1
        <mo data-changed='added'>&#x2062;</mo>
5605
1
        <mi>x</mi>
5606
1
      </mrow>
5607
1
    </math>";
5608
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5609
1
    }
5610
5611
    #[test]
5612
1
    fn trig_args() -> Result<()> {
5613
1
        let test_str = "<math><mi>sin</mi><mn>2</mn><mi>π</mi><mi>x</mi></math>";
5614
1
        let target_str = "<math>
5615
1
    <mrow data-changed='added'>
5616
1
      <mi>sin</mi>
5617
1
      <mo data-changed='added'>&#x2061;</mo>
5618
1
      <mrow data-changed='added'>
5619
1
      <mn>2</mn>
5620
1
      <mo data-changed='added'>&#x2062;</mo>
5621
1
      <mi>π</mi>
5622
1
      <mo data-changed='added'>&#x2062;</mo>
5623
1
      <mi>x</mi>
5624
1
      </mrow>
5625
1
    </mrow>
5626
1
     </math>";
5627
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5628
1
    }
5629
5630
    #[test]
5631
1
    fn not_trig_args() -> Result<()> {
5632
    // this is here to make sure that only trig functions get the special treatment
5633
1
        let test_str = "<math><mi>ker</mi><mn>2</mn><mi>π</mi><mi>x</mi></math>";
5634
1
        let target_str = "<math>
5635
1
    <mrow data-changed='added'>
5636
1
      <mrow data-changed='added'>
5637
1
        <mi>ker</mi>
5638
1
        <mo data-changed='added'>&#x2061;</mo>
5639
1
        <mn>2</mn>
5640
1
      </mrow>
5641
1
      <mo data-changed='added'>&#x2062;</mo>
5642
1
      <mi>π</mi>
5643
1
      <mo data-changed='added'>&#x2062;</mo>
5644
1
      <mi>x</mi>
5645
1
    </mrow>
5646
1
     </math>";
5647
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5648
1
    }
5649
5650
    #[test]
5651
1
    fn trig_trig() -> Result<()> {
5652
1
        let test_str = "<math><mi>sin</mi><mi>x</mi><mi>cos</mi><mi>y</mi></math>";
5653
1
        let target_str = "<math>
5654
1
    <mrow data-changed='added'>
5655
1
      <mrow data-changed='added'>
5656
1
        <mi>sin</mi>
5657
1
        <mo data-changed='added'>&#x2061;</mo>
5658
1
        <mi>x</mi>
5659
1
      </mrow>
5660
1
      <mo data-changed='added'>&#x2062;</mo>
5661
1
      <mrow data-changed='added'>
5662
1
        <mi>cos</mi>
5663
1
        <mo data-changed='added'>&#x2061;</mo>
5664
1
        <mi>y</mi>
5665
1
      </mrow>
5666
1
    </mrow>
5667
1
    </math>";
5668
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5669
1
    }
5670
5671
    #[test]
5672
1
    fn trig_function_composition() -> Result<()> {
5673
1
        let test_str = "<math><mo>(</mo><mi>sin</mi><mo>-</mo><mi>cos</mi><mo>)</mo><mi>x</mi></math>";
5674
1
        let target_str = "<math>
5675
1
    <mrow data-changed='added'>
5676
1
      <mrow data-changed='added'>
5677
1
      <mo>(</mo>
5678
1
      <mrow data-changed='added'>
5679
1
        <mi>sin</mi>
5680
1
        <mo>-</mo>
5681
1
        <mi>cos</mi>
5682
1
      </mrow>
5683
1
      <mo>)</mo>
5684
1
      </mrow>
5685
1
      <mo data-changed='added'>&#x2062;</mo>
5686
1
      <mi>x</mi>
5687
1
    </mrow>
5688
1
     </math>";
5689
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5690
1
    }
5691
5692
  
5693
  #[test]
5694
1
    fn currency_in_leaf_prefix() -> Result<()> {
5695
1
        let test_str = "<math><mn>$8.54</mn></math>";
5696
1
        let target_str = "<math>
5697
1
      <mrow data-changed='added'>
5698
1
      <mi>$</mi>
5699
1
      <mo data-changed='added'>&#x2062;</mo>
5700
1
      <mn>8.54</mn>
5701
1
      </mrow>
5702
1
    </math>";
5703
1
    are_strs_canonically_equal_result(test_str, target_str, &[])
5704
1
  }
5705
5706
  #[test]
5707
1
    fn currency_in_leaf_postfix() -> Result<()> {
5708
1
        let test_str = "<math><mn>188,23€</mn></math>";
5709
1
        let target_str = " <math>
5710
1
      <mrow data-changed='added'>
5711
1
        <mo data-changed='added'>&#x2062;</mo>
5712
1
        <mn>188,23</mn>
5713
1
        <mo data-changed='added'>&#x2062;</mo>
5714
1
        <mi>€</mi>
5715
1
      </mrow>
5716
1
    </math>";
5717
1
   are_strs_canonically_equal_with_locale(test_str, target_str, &[], ".", ",")
5718
1
}
5719
5720
  #[test]
5721
1
    fn currency_in_leaf_infix() -> Result<()> {
5722
1
        let test_str = "<math><mn>1€23</mn></math>";
5723
1
        let target_str = " <math>
5724
1
      <mrow data-changed='added'>
5725
1
        <mn>1</mn>
5726
1
        <mo data-changed='added'>&#x2062;</mo>
5727
1
        <mi>€</mi>
5728
1
        <mo data-changed='added'>&#x2062;</mo>
5729
1
        <mn>23</mn>
5730
1
      </mrow>
5731
1
    </math>";
5732
1
   are_strs_canonically_equal_with_locale(test_str, target_str, &[], ".", ",")
5733
1
}
5734
  
5735
  #[test]
5736
1
    fn mtext_whitespace_string() -> Result<()> {
5737
1
        let test_str = "<math><mi>t</mi><mtext>&#x00A0;&#x205F;</mtext></math>";
5738
1
        let target_str = "<math><mi data-following-space-width='0.922'>t</mi></math>";
5739
1
    are_strs_canonically_equal_result(test_str, target_str, &[])
5740
1
  }
5741
  
5742
  #[test]
5743
1
    fn mtext_whitespace_string_before() -> Result<()> {
5744
1
        let test_str = "<math><mtext>&#x00A0;&#x205F;</mtext><mi>t</mi></math>";
5745
1
        let target_str = "<math><mi data-previous-space-width='0.922'>t</mi></math>";
5746
1
    are_strs_canonically_equal_result(test_str, target_str, &[])
5747
1
  }
5748
  
5749
  #[test]
5750
1
    fn mtext_whitespace_1() -> Result<()> {
5751
1
        let test_str = "<math><mi>t</mi><mtext>&#x00A0;&#x205F;</mtext>
5752
1
        <mrow><mo>(</mo><mi>x</mi><mo>+</mo><mi>y</mi><mo>)</mo></mrow></math>";
5753
1
        let target_str = " <math>
5754
1
    <mrow data-changed='added'>
5755
1
      <mi>t</mi>
5756
1
      <mo data-changed='added' data-function-guess='true'>&#x2062;</mo>
5757
1
      <mrow data-previous-space-width='0.922'>
5758
1
      <mo>(</mo>
5759
1
      <mrow data-changed='added'>
5760
1
        <mi>x</mi>
5761
1
        <mo>+</mo>
5762
1
        <mi>y</mi>
5763
1
      </mrow>
5764
1
      <mo>)</mo>
5765
1
      </mrow>
5766
1
    </mrow>
5767
1
     </math>";
5768
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5769
1
  }
5770
  
5771
  #[test]
5772
1
    fn mtext_whitespace_2() -> Result<()> {
5773
1
        let test_str = "<math><mi>f</mi><mtext>&#x00A0;&#x205F;</mtext>
5774
1
        <mrow><mo>(</mo><mi>x</mi><mo>+</mo><mi>y</mi><mo>)</mo></mrow></math>";
5775
1
        let target_str = " <math>
5776
1
    <mrow data-changed='added'>
5777
1
      <mi>f</mi>
5778
1
      <mo data-changed='added'>&#x2061;</mo>
5779
1
      <mrow  data-previous-space-width='0.922'>
5780
1
      <mo>(</mo>
5781
1
      <mrow data-changed='added'>
5782
1
        <mi>x</mi>
5783
1
        <mo>+</mo>
5784
1
        <mi>y</mi>
5785
1
      </mrow>
5786
1
      <mo>)</mo>
5787
1
      </mrow>
5788
1
    </mrow>
5789
1
     </math>";
5790
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5791
1
  }
5792
5793
  #[test]
5794
1
    fn remove_mtext_whitespace_3() -> Result<()> {
5795
1
        let test_str = "<math><mi>t</mi>
5796
1
        <mrow><mtext>&#x2009;</mtext><mo>(</mo><mi>x</mi><mo>+</mo><mi>y</mi><mo>)</mo></mrow></math>";
5797
1
        let target_str = "<math>
5798
1
    <mrow data-changed='added'>
5799
1
      <mi>t</mi>
5800
1
      <mo data-changed='added' data-function-guess='true'>&#x2062;</mo>
5801
1
      <mrow>
5802
1
      <mo data-previous-space-width='0.167'>(</mo>
5803
1
      <mrow data-changed='added'>
5804
1
        <mi>x</mi>
5805
1
        <mo>+</mo>
5806
1
        <mi>y</mi>
5807
1
      </mrow>
5808
1
      <mo>)</mo>
5809
1
      </mrow>
5810
1
    </mrow>
5811
1
     </math>";
5812
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5813
1
  }
5814
5815
  #[test]
5816
1
    fn do_not_remove_any_whitespace() -> Result<()> {
5817
1
        let test_str = "<math><mfrac>
5818
1
          <mrow><mspace width='3em'/></mrow>
5819
1
          <mtext>&#x2009;</mtext>
5820
1
        </mfrac></math>";
5821
1
        let target_str = " <math>
5822
1
      <mfrac>
5823
1
        <mtext width='3em' data-changed='was-mspace' data-width='3' data-empty-in-2D='true'> </mtext>
5824
1
        <mtext data-width='0.167' data-empty-in-2D='true'> </mtext>
5825
1
      </mfrac>
5826
1
     </math>";
5827
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5828
1
  }
5829
5830
  #[test]
5831
1
    fn remove_mo_whitespace() -> Result<()> {
5832
1
        let test_str = "<math><mi>cos</mi><mo>&#xA0;</mo><mi>x</mi></math>";
5833
1
        let target_str = "<math>
5834
1
        <mrow data-changed='added'>
5835
1
          <mi>cos</mi>
5836
1
          <mo data-changed='added'>&#x2061;</mo>
5837
1
          <mi data-previous-space-width='0.7'>x</mi>
5838
1
        </mrow>
5839
1
        </math>";
5840
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5841
1
  }
5842
5843
  #[test]
5844
1
    fn do_not_remove_some_whitespace() -> Result<()> {
5845
1
        let test_str = "<math><mroot>
5846
1
          <mrow><mi>b</mi><mphantom><mi>y</mi></mphantom></mrow>
5847
1
          <mtext>&#x2009;</mtext>
5848
1
        </mroot></math>";
5849
1
        let target_str = "<math><mroot>
5850
1
        <mi>b</mi>
5851
1
        <mtext data-empty-in-2D='true' data-width='0.167'>&#xA0;</mtext>
5852
1
      </mroot></math>";
5853
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5854
1
  }
5855
5856
  #[test]
5857
1
    fn remove_all_extra_elements() -> Result<()> {
5858
1
        let test_str = "<math><msqrt>
5859
1
          <mstyle> <mi>b</mi> </mstyle>
5860
1
          <mphantom><mi>y</mi></mphantom>
5861
1
          <mtext>&#x2009;</mtext>
5862
1
          <mspace width='3em'/>
5863
1
        </msqrt></math>";
5864
1
        let target_str = "<math><msqrt>
5865
1
        <mi data-following-space-width='3.167'>b</mi>
5866
1
      </msqrt></math>";
5867
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5868
1
  }
5869
5870
  #[test]
5871
1
    fn empty_content() -> Result<()> {
5872
1
        let test_str = "<math></math>";
5873
1
        let target_str = " <math><mtext data-added='missing-content' data-width='0.700'> </mtext></math>";
5874
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5875
1
  }
5876
5877
  #[test]
5878
1
    fn empty_content_after_cleanup() -> Result<()> {
5879
1
        let test_str = "<math><mrow><mphantom><mn>1</mn></mphantom></mrow></math>";
5880
1
        let target_str = " <math><mtext data-added='missing-content' data-width='0'> </mtext></math>";
5881
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5882
1
  }
5883
5884
  #[test]
5885
1
    fn empty_content_fix_num_children() -> Result<()> {
5886
1
        let test_str = "  <math><mfrac><menclose notation='box'><mrow/></menclose><mrow/></mfrac></math>";
5887
1
        let target_str = "<math>
5888
1
    <mfrac>
5889
1
      <menclose notation='box'>
5890
1
      <mtext data-added='missing-content' data-empty-in-2D='true' data-width='0'> </mtext>
5891
1
      </menclose>
5892
1
      <mtext data-changed='empty_content' data-empty-in-2D='true' data-width='0'> </mtext>
5893
1
    </mfrac>
5894
1
     </math>";
5895
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5896
1
  }
5897
5898
5899
  #[test]
5900
1
    fn clean_semantics() -> Result<()> {
5901
    // this comes from LateXML
5902
1
        let test_str = "<math>
5903
1
        <semantics>
5904
1
          <mrow><mi>z</mi></mrow>
5905
1
          <annotation-xml encoding='MathML-Content'>
5906
1
            <ci>𝑧</ci>
5907
1
          </annotation-xml>
5908
1
          <annotation encoding='application/x-tex'>z</annotation>
5909
1
          <annotation encoding='application/x-llamapun'>italic_z</annotation>
5910
1
        </semantics>
5911
1
      </math>";
5912
    // the annotation-xml value is very touchy and must exactly match what mml-to-string() generates for the test to pass
5913
1
    let target_str = " <math>
5914
1
    <mi data-annotation-xml-MathML-Content=' &lt;annotation-xml encoding=&apos;MathML-Content&apos;&gt;
5915
1
  &lt;ci&gt;𝑧&lt;/ci&gt;
5916
1
 &lt;/annotation-xml&gt;
5917
1
' data-annotation-application_slash_x-tex='z' data-annotation-application_slash_x-llamapun='italic_z'>z</mi>
5918
1
     </math>";
5919
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5920
1
  }
5921
5922
  #[test]
5923
1
    fn clean_up_mi_operator() -> Result<()> {
5924
1
        let test_str = "<math><mrow><mi>∠</mi><mi>A</mi><mi>B</mi><mi>C</mi></mrow></math>";
5925
1
        let target_str = " <math>
5926
1
        <mrow>
5927
1
        <mo>∠</mo>
5928
1
        <mrow data-changed='added'>
5929
1
          <mi>A</mi>
5930
1
          <mo data-changed='added'>&#x2063;</mo>
5931
1
          <mi>B</mi>
5932
1
          <mo data-changed='added'>&#x2063;</mo>
5933
1
          <mi>C</mi>
5934
1
        </mrow>
5935
1
        </mrow>
5936
1
      </math>";
5937
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5938
1
  }
5939
5940
5941
  #[test]
5942
1
    fn clean_up_arc() -> Result<()> {
5943
1
        let test_str = "<math><mtext>arc&#xA0;</mtext><mi>cos</mi><mi>x</mi></math>";
5944
1
        let target_str = "<math>
5945
1
      <mrow data-changed='added'>
5946
1
      <mi>arccos</mi>
5947
1
      <mo data-changed='added'>&#x2061;</mo>
5948
1
      <mi>x</mi>
5949
1
      </mrow>
5950
1
    </math>";
5951
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5952
1
  }
5953
5954
  #[test]
5955
1
    fn clean_up_arc_nospace() -> Result<()> {
5956
1
        let test_str = "<math><mtext>arc</mtext><mi>cos</mi><mi>x</mi></math>";
5957
1
        let target_str = "<math>
5958
1
      <mrow data-changed='added'>
5959
1
      <mi>arccos</mi>
5960
1
      <mo data-changed='added'>&#x2061;</mo>
5961
1
      <mi>x</mi>
5962
1
      </mrow>
5963
1
    </math>";
5964
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5965
1
  }
5966
5967
  #[test]
5968
1
    fn roman_numeral() -> Result<()> {
5969
1
        let test_str = "<math><mrow><mtext>XLVIII</mtext> <mo>+</mo><mn>mmxxvi</mn></mrow></math>";
5970
    // turns out there is no need to mark them as Roman Numerals -- thought that was need for braille
5971
1
        let target_str = "<math><mrow>
5972
1
      <mn data-roman-numeral='true' data-number='48'>XLVIII</mn> <mo>+</mo><mn data-roman-numeral='true' data-number='2026'>mmxxvi</mn>
5973
1
      </mrow></math>";
5974
        // let target_str = "<math><mrow><mtext>XLVIII</mtext> <mo>+</mo><mn>mmxxvi</mn></mrow></math>";
5975
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5976
1
  }
5977
5978
  // #[test]
5979
    // fn roman_numeral_context() {
5980
    //     let test_str = "<math><mi>vi</mi><mo>-</mo><mi mathvariant='normal'>i</mi><mo>=</mo><mtext>v</mtext></math>";
5981
    //     let target_str = "<math> <mrow data-changed='added'>
5982
  //    <mrow data-changed='added'><mn data-roman-numeral='true'>vi</mn><mo>-</mo><mn mathvariant='normal' data-roman-numeral='true'>i</mn></mrow> 
5983
  //    <mo>=</mo> <mn data-roman-numeral='true'>v</mn>
5984
  //  </mrow> </math>";
5985
    //     are_strs_canonically_equal_result(test_str, target_str, &[])
5986
  // }
5987
5988
  #[test]
5989
1
    fn not_roman_numeral() -> Result<()> {
5990
1
        let test_str = "<math><mtext>cm</mtext></math>";
5991
    // shouldn't change
5992
1
        let target_str = "<math><mtext>cm</mtext></math>";
5993
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
5994
1
  }
5995
5996
  #[test]
5997
1
    fn digit_block_binary() -> Result<()> {
5998
1
        let test_str = "<math><mo>(</mo><mn>0110</mn><mspace width=\"thickmathspace\"></mspace><mn>1110</mn><mspace width=\"thickmathspace\"></mspace><mn>0110</mn><mo>)</mo></math>";
5999
1
        let target_str = " <math>
6000
1
        <mrow data-changed='added'>
6001
1
        <mo>(</mo>
6002
1
        <mn>0110\u{00A0}1110\u{00A0}0110</mn>
6003
1
        <mo>)</mo>
6004
1
        </mrow>
6005
1
      </math>";
6006
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
6007
1
  }
6008
6009
  #[test]
6010
1
    fn digit_block_decimal() -> Result<()> {
6011
1
        let test_str = "<math><mn>8</mn><mo>,</mo><mn>123</mn><mo>,</mo><mn>456</mn><mo>+</mo>
6012
1
                    <mn>4</mn><mo>.</mo><mn>32</mn></math>";
6013
1
        let target_str = " <math>
6014
1
        <mrow data-changed='added'>
6015
1
        <mn>8,123,456</mn>
6016
1
        <mo>+</mo>
6017
1
        <mn>4.32</mn>
6018
1
        </mrow>
6019
1
      </math>";
6020
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
6021
1
  }
6022
  #[test]
6023
1
    fn digit_block_comma() -> Result<()> {
6024
1
        let test_str = "<math><mn>8</mn><mo>.</mo><mn>123</mn><mo>.</mo><mn>456</mn><mo>+</mo>
6025
1
                    <mn>4</mn><mo>,</mo><mn>32</mn></math>";
6026
1
        let target_str = " <math>
6027
1
        <mrow data-changed='added'>
6028
1
        <mn>8.123.456</mn>
6029
1
        <mo>+</mo>
6030
1
        <mn>4,32</mn>
6031
1
        </mrow>
6032
1
      </math>";
6033
1
        are_strs_canonically_equal_with_locale(test_str, target_str, &[], ".", ", ")
6034
1
  }
6035
6036
  #[test]
6037
1
  fn digit_block_int() -> Result<()> {
6038
1
        let test_str = "<math><mn>12</mn><mo>,</mo><mn>345</mn><mo>+</mo>
6039
1
                    <mn>1</mn><mo>,</mo><mn>000</mn></math>";
6040
1
        let target_str = " <math>
6041
1
        <mrow data-changed='added'>
6042
1
        <mn>12,345</mn>
6043
1
        <mo>+</mo>
6044
1
        <mn>1,000</mn>
6045
1
        </mrow>
6046
1
      </math>";
6047
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
6048
1
  }
6049
6050
  #[test]
6051
1
  fn digit_block_non_ascii_int() -> Result<()> {
6052
1
        let test_str = "<math><mn>𝟏𝟐</mn><mo>,</mo><mn>3𝟰𝟻</mn><mo>+</mo>
6053
1
                    <mn>𝟙</mn><mo>,</mo><mn>𝟬𝟬𝟬</mn></math>";
6054
1
        let target_str = " <math>
6055
1
        <mrow data-changed='added'>
6056
1
        <mn>𝟏𝟐,3𝟰𝟻</mn>
6057
1
        <mo>+</mo>
6058
1
        <mn>𝟙,𝟬𝟬𝟬</mn>
6059
1
        </mrow>
6060
1
      </math>";
6061
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
6062
1
  }
6063
6064
  #[test]
6065
1
  fn digit_block_int_dots() -> Result<()> {
6066
1
        let test_str = "<math><mn>12</mn><mo>.</mo><mn>345</mn><mo>+</mo>
6067
1
                    <mn>1</mn><mo>.</mo><mn>000</mn></math>";
6068
1
        let target_str = " <math>
6069
1
        <mrow data-changed='added'>
6070
1
        <mn>12.345</mn>
6071
1
        <mo>+</mo>
6072
1
        <mn>1.000</mn>
6073
1
        </mrow>
6074
1
      </math>";
6075
1
        are_strs_canonically_equal_with_locale(test_str, target_str, &[], ".", ", ")
6076
1
  }
6077
6078
  #[test]
6079
1
    fn digit_block_decimal_pt() -> Result<()> {
6080
1
        let test_str = "<math><mn>8</mn><mo>,</mo><mn>123</mn><mo>.</mo>
6081
1
                <mo>+</mo><mn>4</mn><mo>.</mo>
6082
1
                <mo>+</mo><mo>.</mo><mn>01</mn></math>";
6083
1
        let target_str = " <math>
6084
1
        <mrow data-changed='added'>
6085
1
        <mn>8,123.</mn>
6086
1
        <mo>+</mo>
6087
1
        <mn>4.</mn>
6088
1
        <mo>+</mo>
6089
1
        <mn>.01</mn>
6090
1
        </mrow>
6091
1
      </math>";
6092
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
6093
1
  }
6094
6095
  #[test]
6096
1
    fn number_with_decimal_pt() -> Result<()> {
6097
    // this is output from WIRIS for "12.3"
6098
1
        let test_str = "<math><mn>12</mn><mo>.</mo><mn>3</mn></math>";
6099
1
        let target_str = "<math><mn>12.3</mn></math>";
6100
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
6101
1
  }
6102
6103
  #[test]
6104
1
    fn number_with_comma_decimal_pt() -> Result<()> {
6105
    // this is output from WIRIS for "12.3"
6106
1
        let test_str = "<math><mn>12</mn><mo>,</mo><mn>3</mn></math>";
6107
1
        let target_str = "<math><mn>12,3</mn></math>";
6108
1
        are_strs_canonically_equal_with_locale(test_str, target_str, &[], ".", ", ")
6109
1
  }
6110
6111
  #[test]
6112
1
    fn addition_with_decimal_point_at_end() -> Result<()> {
6113
    // in this case, the trailing "." is probably a decimal point" -- testing special case combine the "."
6114
    // this comes from WIRIS
6115
1
        let test_str = "<math><mn>1</mn><mo>.</mo><mn>3</mn><mo>+</mo><mn>2</mn><mo>.</mo></math>";
6116
1
        let target_str = "<math><mrow data-changed='added'><mn>1.3</mn><mo>+</mo><mn>2.</mn></mrow></math>";
6117
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
6118
1
  }
6119
6120
  #[test]
6121
1
    fn addition_with_decimal_point_at_end_and_comma_decimal_separator() -> Result<()> {
6122
    // in this case, the trailing "." is probably a decimal point" -- testing special case combine the "."
6123
    // this comes from WIRIS
6124
1
        let test_str = "<math><mn>1</mn><mo>,</mo><mn>3</mn><mo>+</mo><mn>2</mn><mo>,</mo></math>";
6125
1
        let target_str = "<math><mrow data-changed='added'><mn>1,3</mn><mo>+</mo><mn>2,</mn></mrow></math>";
6126
1
        are_strs_canonically_equal_with_locale(test_str, target_str, &[], ".", ", ")
6127
1
  }
6128
6129
  #[test]
6130
1
    fn sequence_with_period() -> Result<()> {
6131
    // in this case, we don't want "5." -- testing special case to avoid combining the period.
6132
1
        let test_str = "<math><mn>1</mn><mo>,</mo><mn>3</mn><mo>,</mo><mn>5</mn><mo>.</mo></math>";
6133
1
        let target_str = "<math><mrow data-changed='added'>
6134
1
        <mrow data-changed='added'><mn>1</mn><mo>,</mo><mn>3</mn><mo>,</mo><mn>5</mn></mrow><mo>.</mo>
6135
1
      </mrow></math>";
6136
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
6137
1
  }
6138
6139
  #[test]
6140
1
    fn addition_decimal_pt() -> Result<()> {
6141
1
        let test_str = "<math><mo>.</mo><mn>4</mn><mo>=</mo><mn>0</mn><mo>.</mo><mn>4</mn></math>";
6142
1
        let target_str = "<math><mrow data-changed='added'><mn>.4</mn><mo>=</mo><mn>0.4</mn></mrow></math>";
6143
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
6144
1
  }
6145
6146
  #[test]
6147
1
    fn fraction_decimal_pt() -> Result<()> {
6148
1
        let test_str = "<math><mfrac><mrow><mn>1</mn><mo>.</mo></mrow><mrow><mn>2</mn><mo>.</mo></mrow></mfrac></math>";
6149
1
        let target_str = "<math><mfrac><mn>1.</mn><mn>2.</mn></mfrac></math>";
6150
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
6151
1
  }
6152
6153
  #[test]
6154
1
    fn fraction_decimal_pt_no_split() -> Result<()> {
6155
    // don't split off the '.'
6156
1
        let test_str = "<math><mfrac><mn>1.</mn><mn>2.</mn></mfrac></math>";
6157
1
        let target_str = "<math><mfrac><mn>1.</mn><mn>2.</mn></mfrac></math>";
6158
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
6159
1
  }
6160
6161
  #[test]
6162
1
    fn not_digit_block_parens() -> Result<()> {
6163
1
        let test_str = "<math><mo>(</mo><mn>451</mn><mo>,</mo><mn>231</mn><mo>)</mo></math>";
6164
1
        let target_str = " <math> <mrow data-changed='added'>
6165
1
        <mo>(</mo>
6166
1
        <mrow data-changed='added'>
6167
1
        <mn>451</mn> <mo>,</mo> <mn>231</mn>
6168
1
        </mrow>
6169
1
        <mo>)</mo>
6170
1
      </mrow></math>";
6171
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
6172
1
  }
6173
6174
  #[test]
6175
1
    fn not_digit_block_parens_mrow() -> Result<()> {
6176
1
        let test_str = "<math><mo>(</mo><mrow><mn>451</mn><mo>,</mo><mn>231</mn></mrow><mo>)</mo></math>";
6177
1
        let target_str = " <math> <mrow data-changed='added'>
6178
1
        <mo>(</mo>
6179
1
        <mrow>
6180
1
        <mn>451</mn> <mo>,</mo> <mn>231</mn>
6181
1
        </mrow>
6182
1
        <mo>)</mo>
6183
1
      </mrow></math>";
6184
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
6185
1
  }
6186
6187
  #[test]
6188
1
    fn not_digit_block_decimal() -> Result<()> {
6189
1
    let test_str = "<math><mn>8</mn><mo>,</mo><mn>49</mn><mo>,</mo><mn>456</mn><mo>+</mo>
6190
1
                    <mn>4</mn><mtext> </mtext><mn>32</mn><mo>+</mo>
6191
1
                  <mn>1</mn><mo>,</mo><mn>234</mn><mo>,</mo><mn>56</mn></math>";
6192
1
        let target_str = "<math>
6193
1
        <mrow data-changed='added'>
6194
1
        <mn>8</mn>
6195
1
        <mo>,</mo>
6196
1
        <mn>49</mn>
6197
1
        <mo>,</mo>
6198
1
        <mrow data-changed='added'>
6199
1
          <mn>456</mn>
6200
1
          <mo>+</mo>
6201
1
          <mrow data-changed='added'>
6202
1
          <mn>4</mn>
6203
1
          <mo data-changed='added'>&#x2062;</mo>
6204
1
          <mn>32</mn>
6205
1
          </mrow>
6206
1
          <mo>+</mo>
6207
1
          <mn>1</mn>
6208
1
        </mrow>
6209
1
        <mo>,</mo>
6210
1
        <mn>234</mn>
6211
1
        <mo>,</mo>
6212
1
        <mn>56</mn>
6213
1
        </mrow>
6214
1
      </math>";
6215
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
6216
1
  }
6217
6218
  #[test]
6219
1
    fn not_digit_block_ellipsis() -> Result<()> {
6220
1
        let test_str = "<math><mrow><mn>8</mn><mo>,</mo><mn>123</mn><mo>,</mo><mn>456</mn><mo>,</mo>
6221
1
                    <mi>…</mi></mrow></math>";
6222
1
        let target_str = "<math>
6223
1
    <mrow>
6224
1
      <mn>8</mn>
6225
1
      <mo>,</mo>
6226
1
      <mn>123</mn>
6227
1
      <mo>,</mo>
6228
1
      <mn>456</mn>
6229
1
      <mo>,</mo>
6230
1
      <mi>…</mi>
6231
1
    </mrow>
6232
1
     </math>";
6233
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
6234
1
  }
6235
6236
  #[test]
6237
1
    fn not_digit_block_negative_numbers_euro() -> Result<()> {
6238
1
        let test_str = "<math><mrow>
6239
1
      <mo>-</mo><mn>1</mn><mo>,</mo>
6240
1
      <mo>-</mo><mn>2</mn><mo>,</mo>
6241
1
      <mo>-</mo><mn>3</mn><mo>,</mo>
6242
1
      <mo>&#x2026;</mo>
6243
1
    </mrow></math>";
6244
1
        let target_str = "<math><mrow>
6245
1
        <mrow data-changed='added'>
6246
1
          <mo>-</mo>
6247
1
          <mn>1</mn>
6248
1
        </mrow>
6249
1
        <mo>,</mo>
6250
1
        <mrow data-changed='added'>
6251
1
          <mo>-</mo>
6252
1
          <mn>2</mn>
6253
1
        </mrow>
6254
1
        <mo>,</mo>
6255
1
        <mrow data-changed='added'>
6256
1
          <mo>-</mo>
6257
1
          <mn>3</mn>
6258
1
        </mrow>
6259
1
        <mo>,</mo>
6260
1
        <mi>…</mi>
6261
1
      </mrow></math>";
6262
1
      are_strs_canonically_equal_with_locale(test_str, target_str, &[], " .", ",")
6263
1
  }
6264
6265
  #[test]
6266
1
    fn ellipsis() -> Result<()> {
6267
1
        let test_str = "<math><mn>5</mn><mo>,</mo><mo>.</mo><mo>.</mo><mo>.</mo><mo>,</mo><mn>8</mn><mo>,</mo>
6268
1
        <mn>9</mn><mo>,</mo><mo>.</mo><mo>.</mo><mo>.</mo><mo>,</mo><mn>11</mn><mo>,</mo>
6269
1
        <mn>5</mn><mo>,</mo><mo>.</mo><mo>.</mo><mo>,</mo><mn>8</mn>
6270
1
      </math>";
6271
1
        let target_str = "<math><mrow data-changed='added'>
6272
1
      <mn>5</mn><mo>,</mo><mi>…</mi><mo>,</mo><mn>8</mn><mo>,</mo>
6273
1
      <mn>9</mn><mo>,</mo><mi>…</mi><mo>,</mo><mn>11</mn><mo>,</mo>
6274
1
      <mn>5</mn><mo>,</mo><mrow data-changed='added'><mo>.</mo><mo>.</mo></mrow>
6275
1
      <mo>,</mo><mn>8</mn></mrow></math>";
6276
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
6277
1
  }
6278
6279
6280
  #[test]
6281
1
    fn no_merge_271() -> Result<()> {
6282
1
        let test_str = "<math><mrow><mo>{</mo>
6283
1
        <mrow><mn>2</mn><mo>,</mo><mn>4</mn><mo>,</mo><mn>6</mn></mrow>
6284
1
      <mo>}</mo></mrow></math>";
6285
1
        let target_str = "<math><mrow><mo>{</mo>
6286
1
        <mrow><mn>2</mn><mo>,</mo><mn>4</mn><mo>,</mo><mn>6</mn></mrow>
6287
1
      <mo>}</mo></mrow></math>";
6288
1
      are_strs_canonically_equal_with_locale(test_str, target_str, &[], " .", ",")
6289
1
  }
6290
6291
  #[test]
6292
1
    fn not_digit_block_271() -> Result<()> {
6293
1
        let test_str = "<math><mrow>
6294
1
        <mi>…</mi><mo>,</mo>
6295
1
        <mo>-</mo><mn>2</mn><mo>,</mo>
6296
1
        <mo>-</mo><mn>1</mn><mo>,</mo>
6297
1
        <mn>0</mn>
6298
1
      </mrow></math>";
6299
1
        let target_str = "<math> <mrow>
6300
1
      <mi>…</mi>
6301
1
      <mo>,</mo>
6302
1
      <mrow data-changed='added'><mo>-</mo><mn>2</mn></mrow>
6303
1
      <mo>,</mo>
6304
1
      <mrow data-changed='added'><mo>-</mo><mn>1</mn></mrow>
6305
1
      <mo>,</mo>
6306
1
      <mn>0</mn>
6307
1
      </mrow></math>";
6308
1
      are_strs_canonically_equal_with_locale(test_str, target_str, &[], " .", ",")
6309
1
  }
6310
6311
  #[test]
6312
1
    fn merge_decimal_in_list_271() -> Result<()> {
6313
1
        let test_str = "<math><mi>x</mi><mo>,</mo><mn>2</mn><mo>.</mo><mn>5</mn><mi>g</mi><mo>,</mo><mn>3</mn></math>";
6314
1
        let target_str = "<math> <mrow data-changed='added'>
6315
1
        <mi>x</mi>
6316
1
        <mo>,</mo>
6317
1
        <mrow data-changed='added'> <mn>2.5</mn> <mo data-changed='added'>&#x2062;</mo> <mi>g</mi> </mrow>
6318
1
        <mo>,</mo>
6319
1
        <mn>3</mn>
6320
1
      </mrow> </math>";
6321
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
6322
1
  }
6323
6324
  #[test]
6325
1
    fn primes_common() -> Result<()> {
6326
1
        let test_str = "<math><msup><mn>5</mn><mo>'</mo></msup>
6327
1
              <msup><mn>5</mn><mo>''</mo></msup>
6328
1
              <msup><mn>8</mn><mrow><mo>'</mo><mo>'</mo></mrow></msup></math>";
6329
1
        let target_str = "<math>
6330
1
        <mrow data-changed='added'>
6331
1
        <msup>
6332
1
          <mn>5</mn>
6333
1
          <mo>′</mo>
6334
1
        </msup>
6335
1
        <mo data-changed='added'>&#x2062;</mo>
6336
1
        <msup>
6337
1
          <mn>5</mn>
6338
1
          <mo>″</mo>
6339
1
        </msup>
6340
1
        <mo data-changed='added'>&#x2062;</mo>
6341
1
        <msup>
6342
1
          <mn>8</mn>
6343
1
          <mo>″</mo>
6344
1
        </msup>
6345
1
        </mrow>
6346
1
      </math>";
6347
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
6348
1
  }
6349
6350
  #[test]
6351
1
    fn primes_uncommon() -> Result<()> {
6352
1
        let test_str = "<math><msup><mn>5</mn><mo>''′</mo></msup>
6353
1
              <msup><mn>5</mn><mo>''''</mo></msup>
6354
1
              <msup><mn>8</mn><mrow><mo>′</mo><mo>⁗</mo></mrow></msup></math>";
6355
1
        let target_str = " <math>
6356
1
        <mrow data-changed='added'>
6357
1
        <msup>
6358
1
          <mn>5</mn>
6359
1
          <mo>‴</mo>
6360
1
        </msup>
6361
1
        <mo data-changed='added'>&#x2062;</mo>
6362
1
        <msup>
6363
1
          <mn>5</mn>
6364
1
          <mo>⁗</mo>
6365
1
        </msup>
6366
1
        <mo data-changed='added'>&#x2062;</mo>
6367
1
        <msup>
6368
1
          <mn>8</mn>
6369
1
          <mo>⁗′</mo>
6370
1
        </msup>
6371
1
        </mrow>
6372
1
      </math>";
6373
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
6374
1
  }
6375
6376
  #[test]
6377
1
    fn merge_mi_test() -> Result<()> {
6378
1
        let test_str = "<math>
6379
1
      <mi>c</mi><mi>o</mi><mi>s</mi><mo>=</mo>
6380
1
      <mi>w</mi><mi>x</mi><mi>y</mi><mi>z</mi><mo>+</mo>
6381
1
      <mi>n</mi><mi>a</mi><mi>x</mi><mo>+</mo>
6382
1
        <mi>i</mi><mi>ω</mi><mi>t</mi><mo>+</mo>
6383
1
      <mi>f</mi><mi>l</mi><mi>o</mi><mi>w</mi><mo>+</mo>
6384
1
      <mi>m</mi><mi>a</mi><mi>x</mi>
6385
1
    </math> 
6386
1
  ";
6387
1
        let target_str = "<math>
6388
1
    <mrow data-changed='added'>
6389
1
      <mi>cos</mi>
6390
1
      <mo>=</mo>
6391
1
      <mrow data-changed='added'>
6392
1
        <mrow data-changed='added'>
6393
1
          <mi>w</mi>
6394
1
          <mo data-changed='added'>&#x2062;</mo>
6395
1
          <mi>x</mi>
6396
1
          <mo data-changed='added'>&#x2062;</mo>
6397
1
          <mi>y</mi>
6398
1
          <mo data-changed='added'>&#x2062;</mo>
6399
1
          <mi>z</mi>
6400
1
        </mrow>
6401
1
        <mo>+</mo>
6402
1
        <mrow data-changed='added'>
6403
1
          <mi>n</mi>
6404
1
          <mo data-changed='added'>&#x2062;</mo>
6405
1
          <mi>a</mi>
6406
1
          <mo data-changed='added'>&#x2062;</mo>
6407
1
          <mi>x</mi>
6408
1
        </mrow>
6409
1
        <mo>+</mo>
6410
1
        <mrow data-changed='added'>
6411
1
          <mi>i</mi>
6412
1
          <mo data-changed='added'>&#x2062;</mo>
6413
1
          <mi>ω</mi>
6414
1
          <mo data-changed='added'>&#x2062;</mo>
6415
1
          <mi>t</mi>
6416
1
        </mrow>
6417
1
        <mo>+</mo>
6418
1
        <mi>flow</mi>
6419
1
        <mo>+</mo>
6420
1
        <mi>max</mi>
6421
1
      </mrow>
6422
1
      </mrow>
6423
1
    </math>";
6424
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
6425
1
  }
6426
6427
  #[test]
6428
1
    fn merge_mi_with_script_test() -> Result<()> {
6429
1
        let test_str = "<math>
6430
1
      <mi>c</mi><mi>o</mi><msup><mi>s</mi><mn>2</mn></msup><mi>y</mi><mo>=</mo>
6431
1
      <mi>l</mi><mi>o</mi><msup><mi>g</mi><mn>2</mn></msup><mi>y</mi><mo>+</mo>
6432
1
      <mi>d</mi><mi>a</mi><msup><mi>g</mi><mn>2</mn></msup>
6433
1
    </math>";
6434
1
        let target_str = "<math>
6435
1
        <mrow data-changed='added'>
6436
1
          <mrow data-changed='added'>
6437
1
            <msup>
6438
1
              <mi>cos</mi>
6439
1
              <mn>2</mn>
6440
1
            </msup>
6441
1
            <mo data-changed='added'>&#x2061;</mo>
6442
1
            <mi>y</mi>
6443
1
          </mrow>
6444
1
          <mo>=</mo>
6445
1
          <mrow data-changed='added'>
6446
1
            <mrow data-changed='added'>
6447
1
              <msup>
6448
1
                <mi>log</mi>
6449
1
                <mn>2</mn>
6450
1
              </msup>
6451
1
              <mo data-changed='added'>&#x2061;</mo>
6452
1
              <mi>y</mi>
6453
1
            </mrow>
6454
1
            <mo>+</mo>
6455
1
            <mrow data-changed='added'>
6456
1
              <mi>d</mi>
6457
1
              <mo data-changed='added'>&#x2062;</mo>
6458
1
              <mi>a</mi>
6459
1
              <mo data-changed='added'>&#x2062;</mo>
6460
1
              <msup>
6461
1
                <mi>g</mi>
6462
1
                <mn>2</mn>
6463
1
              </msup>
6464
1
            </mrow>
6465
1
          </mrow>
6466
1
        </mrow>
6467
1
      </math>";
6468
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
6469
1
  }
6470
6471
  #[test]
6472
1
    fn merge_mi_with_script_bug_333_test() -> Result<()> {
6473
1
        let test_str = "<math>
6474
1
      <mi>l</mi><mi>o</mi><msub><mrow><mi>g</mi></mrow><mrow><mn>2</mn></mrow></msub><mo>=</mo>
6475
1
      <mi>l</mi><mi>i</mi><msub><mrow><mi>m</mi></mrow><mrow><mi>n</mi><mo>→</mo><mi>∞</mi></mrow></msub>
6476
1
    </math> 
6477
1
  ";
6478
1
        let target_str = " <math>
6479
1
        <mrow data-changed='added'>
6480
1
        <msub>
6481
1
          <mi>log</mi>
6482
1
          <mn>2</mn>
6483
1
        </msub>
6484
1
        <mo>=</mo>
6485
1
        <msub>
6486
1
          <mi>lim</mi>
6487
1
          <mrow>
6488
1
          <mi>n</mi>
6489
1
          <mo>→</mo>
6490
1
          <mi>∞</mi>
6491
1
          </mrow>
6492
1
        </msub>
6493
1
        </mrow>
6494
1
      </math>";
6495
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
6496
1
  }
6497
6498
  #[test]
6499
1
    fn merge_mi_bug_545() -> Result<()> {
6500
1
        let test_str = "<math><mi>S</mi><mi>I</mi><msup><mi>N</mi><mrow><mo>-</mo><mn>1</mn></mrow></msup></math>";
6501
1
        let target_str = "<math><msup><mi mathvariant='normal'>SIN</mi><mrow><mo>-</mo><mn>1</mn></mrow></msup></math>";
6502
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
6503
1
  }
6504
6505
  #[test]
6506
1
    fn parent_bug_94() -> Result<()> {
6507
    // This is a test to make sure the crash in the bug report doesn't happen.
6508
    // Note: in the bug, they behavior they would like is a single mn with content "0.02"
6509
    // However, TeX input "1 2 3" will produce three consecutive <mn>s, so merging <mn>s isn't good in general
6510
    // This test 
6511
1
        let test_str = " <math>
6512
1
      <mrow>
6513
1
        <msqrt>
6514
1
          <mrow>
6515
1
            <mstyle mathvariant='bold' mathsize='normal'><mn>0</mn></mstyle>
6516
1
            <mstyle mathvariant='bold' mathsize='normal'><mo>.</mo><mn>0</mn><mn>2</mn></mstyle>
6517
1
          </mrow>
6518
1
        </msqrt>
6519
1
      </mrow>
6520
1
    </math>
6521
1
    ";
6522
1
      let target_str = "<math>
6523
1
      <msqrt>
6524
1
        <mn mathsize='normal' mathvariant='bold' data-changed='added'>0.02</mn>
6525
1
      </msqrt>
6526
1
    </math>";
6527
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
6528
1
  }
6529
6530
  #[test]
6531
1
  fn mstyle_merge_bug_272() -> Result<()> {
6532
1
        let test_str = r#"<math>
6533
1
      <msup>
6534
1
        <mstyle mathvariant="bold" mathsize="normal">
6535
1
          <mn>6</mn>
6536
1
        </mstyle>
6537
1
        <mstyle mathvariant="bold" mathsize="normal">
6538
1
          <mn>9</mn>
6539
1
        </mstyle>
6540
1
      </msup>
6541
1
    </math>"#;
6542
1
      let target_str = "<math>
6543
1
      <msup>
6544
1
      <mn mathsize='normal' mathvariant='bold'>𝟔</mn>
6545
1
      <mn mathsize='normal' mathvariant='bold'>𝟗</mn>
6546
1
      </msup>
6547
1
    </math>";
6548
1
    are_strs_canonically_equal_result(test_str, target_str, &[])
6549
1
  }
6550
6551
6552
  #[test]
6553
1
  fn munder_mspace_bug_296() -> Result<()> {
6554
    // this was a "typo" bug that should have looking embellished base
6555
1
        let test_str = r#"<math>
6556
1
      <mrow><mn>5</mn><mfrac><mn>9</mn><mrow><mn>10</mn></mrow></mfrac>
6557
1
        <munder accentunder="true"><mspace width="2.7em" /><mo stretchy="true">_</mo></munder>
6558
1
        </mrow></math>"#;
6559
1
      let target_str = "<math><mrow>
6560
1
        <mrow data-changed='added'>
6561
1
          <mn>5</mn>
6562
1
          <mo data-changed='added'>&#x2064;</mo>
6563
1
          <mfrac> <mn>9</mn><mn>10</mn> </mfrac>
6564
1
        </mrow>
6565
1
        <munder accentunder='true'>
6566
1
          <mo width='2.7em' data-changed='was-mspace' data-width='2.7' data-empty-in-2D='true' data-function-likelihood='false'> </mo>
6567
1
          <mo stretchy='true'>¯</mo>
6568
1
        </munder>
6569
1
      </mrow></math>";
6570
1
    are_strs_canonically_equal_result(test_str, target_str, &[])
6571
1
  }
6572
6573
  #[test]
6574
1
  fn parse_scripted_open_paren_439() -> Result<()> {
6575
    // this was a "typo" bug that should have looking embellished base
6576
1
        let test_str = r#"<math><mrow><msub><mo>(</mo><mn>2</mn></msub><mo>)</mo></mrow></math>"#;
6577
1
      let target_str = "<math><mrow><msub><mo>(</mo><mn>2</mn></msub><mo>)</mo></mrow></math>";
6578
1
    are_strs_canonically_equal_result(test_str, target_str, &[])
6579
1
  }
6580
6581
  #[test]
6582
1
    fn lift_script() -> Result<()> {
6583
1
        let test_str = "<math xmlns='http://www.w3.org/1998/Math/MathML' >
6584
1
    <mrow>
6585
1
      <mstyle scriptlevel='0' displaystyle='true'>
6586
1
      <mrow>
6587
1
        <msqrt>
6588
1
        <munder>
6589
1
          <mo>∑<!-- ∑ --></mo>
6590
1
          <mrow>
6591
1
          <mn>0</mn>
6592
1
          <mo>≤<!-- ≤ --></mo>
6593
1
          <mi>k</mi>
6594
1
          <mo>≤<!-- ≤ --></mo>
6595
1
          <mi>n</mi>
6596
1
          </mrow>
6597
1
        </munder>
6598
1
        <mrow>
6599
1
          <mo stretchy='false'>|</mo>
6600
1
        </mrow>
6601
1
        <msub>
6602
1
          <mi>a</mi>
6603
1
          <mrow>
6604
1
          <mi>k</mi>
6605
1
          </mrow>
6606
1
        </msub>
6607
1
        <msup>
6608
1
          <mrow>
6609
1
          <mo stretchy='false'>|</mo>
6610
1
          </mrow>
6611
1
          <mrow>
6612
1
          <mn>2</mn>
6613
1
          </mrow>
6614
1
        </msup>
6615
1
        </msqrt>
6616
1
      </mrow>
6617
1
      </mstyle>
6618
1
    </mrow>
6619
1
    </math>";
6620
1
        let target_str = "<math>
6621
1
    <msqrt scriptlevel='0' displaystyle='true'>
6622
1
      <mrow data-changed='added'>
6623
1
      <munder>
6624
1
        <mo>∑</mo>
6625
1
        <mrow>
6626
1
        <mn>0</mn>
6627
1
        <mo>≤</mo>
6628
1
        <mi>k</mi>
6629
1
        <mo>≤</mo>
6630
1
        <mi>n</mi>
6631
1
        </mrow>
6632
1
      </munder>
6633
1
      <msup>
6634
1
        <mrow data-changed='added'>
6635
1
        <mo stretchy='false'>|</mo>
6636
1
        <msub>
6637
1
          <mi>a</mi>
6638
1
          <mi>k</mi>
6639
1
        </msub>
6640
1
        <mo stretchy='false'>|</mo>
6641
1
        </mrow>
6642
1
        <mn>2</mn>
6643
1
      </msup>
6644
1
      </mrow>
6645
1
    </msqrt>
6646
1
     </math>";
6647
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
6648
1
  }
6649
6650
  #[test]
6651
1
    fn pseudo_scripts() -> Result<()> {
6652
1
        let test_str = "<math><mrow>
6653
1
        <mi>cos</mi><mn>30</mn><mo>°</mo>
6654
1
        <mi>sin</mi><mn>60</mn><mo>′</mo>
6655
1
        </mrow></math>";
6656
1
        let target_str = "<math>
6657
1
    <mrow>
6658
1
      <mrow data-changed='added'>
6659
1
      <mi>cos</mi>
6660
1
      <mo data-changed='added'>&#x2061;</mo>
6661
1
      <msup data-changed='added'><mn>30</mn><mo>°</mo></msup>
6662
1
      </mrow>
6663
1
      <mo data-changed='added'>&#x2062;</mo>
6664
1
      <mrow data-changed='added'>
6665
1
      <mi>sin</mi>
6666
1
      <mo data-changed='added'>&#x2061;</mo>
6667
1
      <msup data-changed='added'><mn>60</mn><mo>′</mo></msup>
6668
1
      </mrow>
6669
1
    </mrow>
6670
1
     </math>";
6671
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
6672
1
  }
6673
6674
  #[test]
6675
1
    fn pseudo_scripts_in_mi() -> Result<()> {
6676
1
        let test_str = "<math><mrow><mi>p'</mi><mo>=</mo><mi>µ°C</mi></mrow></math>";
6677
1
        let target_str = "<math><mrow><msup><mi>p</mi><mo>′</mo></msup><mo>=</mo><mi>µ°C</mi></mrow></math>";
6678
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
6679
1
  }
6680
6681
  #[test]
6682
1
    fn prescript_only() -> Result<()> {
6683
1
        let test_str = "<math><msub><mtext/><mn>92</mn></msub><mi>U</mi></math>";
6684
1
        let target_str = "<math><mmultiscripts><mi>U</mi><mprescripts/> <mn>92</mn><none/> </mmultiscripts></math>";
6685
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
6686
1
  }
6687
6688
  #[test]
6689
1
    fn pre_and_postscript_only() -> Result<()> {
6690
1
        let test_str = "<math>
6691
1
      <msub><mrow/><mn>0</mn></msub>
6692
1
      <msub><mi>F</mi><mn>1</mn></msub>
6693
1
      <mo stretchy='false'>(</mo>
6694
1
      <mi>a</mi><mo>,</mo><mi>b</mi><mo>;</mo><mi>c</mi><mo>;</mo><mi>z</mi>
6695
1
      <mo stretchy='false'>)</mo>
6696
1
    </math>";
6697
1
      let target_str = " <math>
6698
1
      <mrow data-changed='added'>
6699
1
      <mmultiscripts>
6700
1
        <mi>F</mi>
6701
1
        <mn>1</mn>
6702
1
        <none></none>
6703
1
        <mprescripts></mprescripts>
6704
1
        <mn>0</mn>
6705
1
        <none></none>
6706
1
      </mmultiscripts>
6707
1
      <mo data-changed='added'>&#x2061;</mo>
6708
1
      <mrow data-changed='added'>
6709
1
        <mo stretchy='false'>(</mo>
6710
1
        <mrow data-changed='added'>
6711
1
        <mrow data-changed='added'>
6712
1
          <mi>a</mi>
6713
1
          <mo>,</mo>
6714
1
          <mi>b</mi>
6715
1
        </mrow>
6716
1
        <mo>;</mo>
6717
1
        <mi>c</mi>
6718
1
        <mo>;</mo>
6719
1
        <mi>z</mi>
6720
1
        </mrow>
6721
1
        <mo stretchy='false'>)</mo>
6722
1
      </mrow>
6723
1
      </mrow>
6724
1
    </math>";
6725
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
6726
1
  }
6727
6728
  #[test]
6729
1
    fn pointless_nones_in_mmultiscripts() -> Result<()> {
6730
1
        let test_str = "<math><mmultiscripts>
6731
1
        <mtext>C</mtext>
6732
1
        <none />
6733
1
        <none />
6734
1
        <mprescripts />
6735
1
        <mn>6</mn>
6736
1
        <mn>14</mn>
6737
1
      </mmultiscripts></math>";
6738
1
        let target_str = "<math>
6739
1
    <mmultiscripts data-chem-formula='6'>
6740
1
    <mtext data-chem-element='1'>C</mtext>
6741
1
    <mprescripts></mprescripts>
6742
1
    <mn>6</mn>
6743
1
    <mn>14</mn>
6744
1
    </mmultiscripts>
6745
1
    </math>";
6746
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
6747
1
  }
6748
6749
  #[test]
6750
1
    fn empty_mmultiscripts_485() -> Result<()> {
6751
1
        let test_str = "<math><mmultiscripts>   </mmultiscripts></math>";
6752
1
        let target_str = ""; // shouldn't get to the point of comparing because the input is illegal.
6753
1
        let err = are_strs_canonically_equal_result(test_str, target_str, &[])
6754
1
            .expect_err("empty mmultiscripts should be rejected");
6755
1
        assert!(
6756
1
            err.to_string().contains("mmultiscripts has the wrong number of children:\n <mmultiscripts></mmultiscripts>"),
6757
            "unexpected error message: {err}"
6758
        );
6759
1
        Ok(())
6760
1
  }
6761
6762
  #[test]
6763
1
    fn empty_mmultiscripts_544() -> Result<()> {
6764
1
        let test_str = "<math><mmultiscripts><mrow/><mprescripts></mprescripts><mrow/><mrow/></mmultiscripts></math>";
6765
1
        let target_str = "<math> <mtext data-changed='empty_content' data-width='0'> </mtext></math>";
6766
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
6767
1
  }
6768
6769
  #[test]
6770
1
    fn empty_mrows_in_mmultiscripts_306() -> Result<()> {
6771
1
        let test_str = "<math display='block'>
6772
1
      <mmultiscripts intent='_permutation:prefix(_of,$k,_from,$n)'>
6773
1
        <mi>P</mi>
6774
1
        <mi arg='k'>k</mi>
6775
1
        <mrow/>
6776
1
        <mprescripts/>
6777
1
        <mrow/>
6778
1
        <mi arg='n'>n</mi>
6779
1
      </mmultiscripts>
6780
1
    </math>";
6781
1
        let target_str = "<math display='block'>
6782
1
      <mmultiscripts intent='_permutation:prefix(_of,$k,_from,$n)'>
6783
1
        <mi>P</mi>
6784
1
        <mi arg='k'>k</mi>
6785
1
        <none></none>
6786
1
        <mprescripts></mprescripts>
6787
1
        <none></none>
6788
1
        <mi arg='n'>n</mi>
6789
1
      </mmultiscripts>
6790
1
    </math>";
6791
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
6792
1
  }
6793
6794
6795
  #[test]
6796
  #[ignore] // this fails -- need to figure out grabbing base from previous or next child
6797
0
    fn tensor() -> Result<()> {
6798
0
        let test_str = "<math>
6799
0
        <msub><mi>R</mi><mi>i</mi></msub>
6800
0
        <msup><mrow/><mi>j</mi></msup>
6801
0
        <msub><mrow/><mi>k</mi></msub>
6802
0
        <msub><mrow/><mi>l</mi></msub>
6803
0
      </math>";
6804
0
    let target_str = "<math>
6805
0
      <mmultiscripts>
6806
0
        <mi> R </mi>
6807
0
        <mi> i </mi>
6808
0
        <none/>
6809
0
        <none/>
6810
0
        <mi> j </mi>
6811
0
        <mi> k </mi>
6812
0
        <none/>
6813
0
        <mi> l </mi>
6814
0
        <none/>
6815
0
      </mmultiscripts>
6816
0
    </math>";
6817
0
        are_strs_canonically_equal_result(test_str, target_str, &[])
6818
0
  }
6819
6820
6821
  #[test]
6822
1
    fn test_nonascii_function_name() -> Result<()> {
6823
1
        let test_str = r#"<math>
6824
1
        <mi mathvariant="bold-italic">x</mi>
6825
1
        <mo>=</mo>
6826
1
        <mn>2</mn>
6827
1
        <mrow>
6828
1
        <mi>𝒔𝒊𝒏</mi>
6829
1
        <mo>&#x2061;</mo>
6830
1
        <mrow><mi mathvariant="bold-italic">t</mi></mrow>
6831
1
        </mrow>
6832
1
        <mo>-</mo>
6833
1
        <mn>1</mn>
6834
1
      </math>"#;
6835
1
    let target_str = r#"<math>
6836
1
      <mrow data-changed='added'>
6837
1
      <mi mathvariant='bold-italic'>𝒙</mi>
6838
1
      <mo>=</mo>
6839
1
      <mrow data-changed='added'>
6840
1
        <mrow data-changed='added'>
6841
1
        <mn>2</mn>
6842
1
        <mo data-changed='added'>&#x2062;</mo>
6843
1
        <mrow>
6844
1
          <mi>sin</mi>
6845
1
          <mo>&#x2061;</mo>
6846
1
          <mi mathvariant='bold-italic'>𝒕</mi>
6847
1
        </mrow>
6848
1
        </mrow>
6849
1
        <mo>-</mo>
6850
1
        <mn>1</mn>
6851
1
      </mrow>
6852
1
      </mrow>
6853
1
    </math>"#;
6854
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
6855
1
  }
6856
6857
  #[test]
6858
1
    fn test_nonascii_function_name_as_chars() -> Result<()> {
6859
1
        let test_str = r#"<math display="block">
6860
1
      <mi>&#x1D499;</mi>
6861
1
      <mo>=</mo>
6862
1
      <mrow>
6863
1
        <mrow>
6864
1
          <mi>&#x1D484;</mi>
6865
1
          <mi>&#x1D490;</mi>
6866
1
          <mi>&#x1D494;</mi>
6867
1
        </mrow>
6868
1
        <mo>&#x2061;</mo>
6869
1
        <mrow>
6870
1
          <mi>&#x1D495;</mi>
6871
1
        </mrow>
6872
1
      </mrow>
6873
1
      <mo>+</mo>
6874
1
      <mn>&#x1D7D0;</mn>
6875
1
    </math>"#;
6876
1
    let target_str = r#"<math display='block'>
6877
1
      <mrow data-changed='added'>
6878
1
        <mi>𝒙</mi>
6879
1
        <mo>=</mo>
6880
1
        <mrow data-changed='added'>
6881
1
          <mrow>
6882
1
          <mi>cos</mi>
6883
1
          <mo>&#x2061;</mo>
6884
1
          <mi>𝒕</mi>
6885
1
          </mrow>
6886
1
          <mo>+</mo>
6887
1
          <mn>𝟐</mn>
6888
1
        </mrow>
6889
1
      </mrow>
6890
1
    </math>"#;
6891
1
        are_strs_canonically_equal_result(test_str, target_str, &[])
6892
1
  }
6893
6894
6895
}
\ No newline at end of file diff --git a/docs/llvm-cov/html/coverage/home/runner/work/MathCAT/MathCAT/src/chemistry.rs.html b/docs/llvm-cov/html/coverage/home/runner/work/MathCAT/MathCAT/src/chemistry.rs.html index ce7c50b5..29c0666c 100644 --- a/docs/llvm-cov/html/coverage/home/runner/work/MathCAT/MathCAT/src/chemistry.rs.html +++ b/docs/llvm-cov/html/coverage/home/runner/work/MathCAT/MathCAT/src/chemistry.rs.html @@ -1 +1 @@ -

Coverage Report

Created: 2026-04-30 05:51

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/home/runner/work/MathCAT/MathCAT/src/chemistry.rs
Line
Count
Source
1
#![allow(clippy::needless_return)]
2
3
// Chemistry terms used here:
4
// chemical formula -- this references a molecule (one or more elements with bonds between them), including its state.
5
// chemical equation -- this is a notation specialized to chemistry -- it has concentration, arrows, equality, "addition" along with 
6
//    some special symbols for operators and (mostly) chemical formulas for operands.
7
//    Operand exceptions are the equilibrium constant, numbers, and identifiers.
8
//    Although a chemical equation is a superset of a chemical formula, because we want to distinguish the two (e.g., '=' is in both),
9
//      we require that chemical equation is an mrow
10
//    FIX?? -- can it be an adorned mrow?
11
//    Note: with the current definition, if any element in a potential chem equation is ruled out, the entire mrow is ruled out.
12
//
13
// The general flow is that for every element that looks like a chem formula/equation, we mark it with data-likely-[equation/formula]
14
// After we are done marking "likely", we go back and either delete them or replace them with data-[equation/formula].
15
// Note: anything already marked with data-[equation/formula] doesn't need recomputation later (essentially the result is cached)
16
//
17
// There is a chicken and egg problem with detecting chemistry: to more reliably detect it, we need good structure.
18
// However, to get the structure right (e.,g "=" being a double bond, not equality; chem elements being in 'mi's; ...),
19
//   we need to know "=" is part of a chemical formula.
20
// The imperfect solution used is:
21
//   As the final step of each recursive call to 'clean_mathml',
22
//     1. mi/mtext: is it a chemical element(s) or one of the symbols used in chemical formulas (not equations).
23
//        If so, mark it MAYBE_CHEMISTRY.
24
//     2. msub/msup/msubsup/mmultiscripts: is base marked MAYBE_CHEMISTRY and the scripts are potential adornments, mark it MAYBE_CHEMISTRY
25
//     3. mrows: these take a few passes (remember, they aren't structured properly yet)
26
//        On the assumption that chemistry is not common we implement a "show me" attitude before changing the structure.
27
//        Pass 1:
28
//        a) for any run of mi/mtext that can be re-split into chem elements, split them and mark them if it is at least 3 chars long
29
//        b) if there are any potential chem formula operators (e.g., "=" and ":") and the previous node is marked MAYBE_CHEMISTRY,
30
//           mark this as MAYBE_CHEMISTRY
31
//        Pass 2: (assuming something was marked in pass 1)
32
//        a) find the first marked child and then the last consecutive marked child and trim any mo's from the ends
33
//        b) evaluate the likelihood that the sequence is chemistry
34
//           yes: replace mathml children with new (potentially restructured) children
35
//           no: clear all the marks for the old children
36
// After canonicalization, we take another pass looking for chemical equations and marking them if found.
37
38
use sxd_document::dom::{Element, Document, ChildOfElement};
39
use crate::canonicalize::*;
40
use crate::pretty_print::mml_to_string;
41
use crate::xpath_functions::{is_leaf, IsNode};
42
use regex::Regex;
43
use crate::xpath_functions::IsBracketed;
44
use phf::{phf_map, phf_set};
45
use std::convert::TryInto;
46
#[allow(unused_imports)]
47
use log::{error, debug};
48
use std::collections::HashSet;
49
use std::cmp::Ordering;
50
use crate::errors::*;
51
use std::sync::LazyLock;
52
53
54
pub static NOT_CHEMISTRY: i32 = -10000;  // should overwhelm any positive signal
55
static NOT_CHEMISTRY_THRESHOLD: i32 = -10000/2;  // value for testing -- that way some can be added to NOT_CHEMISTRY and still meet the test
56
static CHEMISTRY_THRESHOLD: i32 = 5;   // if this changes, change CHEMISTRY_THRESHOLD_STR
57
58
59
/// this might be chemistry -- should only exist during canonicalization
60
pub static MAYBE_CHEMISTRY: &str = "data-maybe-chemistry";
61
62
/// Attr flag to indicate chemical equation
63
static CHEM_EQUATION: &str = "data-chem-equation";
64
/// Attr flag to indicate chemical formula
65
static CHEM_FORMULA: &str = "data-chem-formula";
66
/// Attr flag to indicate chemical element
67
static CHEM_ELEMENT: &str = "data-chem-element";
68
static CHEM_FORMULA_OPERATOR: &str = "data-chem-formula-op";
69
static CHEM_EQUATION_OPERATOR: &str = "data-chem-equation-op";
70
static CHEM_STATE: &str = "data-chem-state";
71
72
/// mark a new chem element that happened due to splitting a leaf
73
pub static SPLIT_TOKEN: &str = "data-split";
74
75
/// mark a new chem element that happened due to merging two leaves
76
static MERGED_TOKEN: &str = "data-merged";
77
78
/// these can be in the base of an under/over script
79
6.64k
fn is_chem_equation_arrow(ch: char) -> bool {
80
6.64k
    
matches!6.44k
(ch,
81
        '→' | '➔' | '←' | '⟶' | '⟵' | '⤻' | '⇋' | '⇌' |
82
        '↑' | '↓' | '↿' | '↾' | '⇃' | '⇂' | '⥮' | '⥯' | '⇷' | '⇸' | '⤉' | '⤈' |
83
        '⥂' | '⥄' | '⥃' |
84
        '\u{1f8d0}' | '\u{1f8d1}' | '\u{1f8d2}' | '\u{1f8d3}' | '\u{1f8d4}' | '\u{1f8d5}'  // proposed Unicode equilibrium arrows
85
    )
86
6.64k
}
87
88
// Returns true if the 'property' (should have ":") is in the intent
89
196k
fn has_chem_intent(mathml: Element, property: &str) -> bool {
90
196k
    if let Some(
intent16.9k
) = mathml.attribute_value(INTENT_ATTR) {
91
16.9k
        let head = intent.split('(').next().unwrap();
92
16.9k
        return head.contains(property);
93
179k
    }
94
179k
    return false;
95
196k
}
96
97
26.7k
fn has_inherited_property(mathml: Element, property: &str) -> bool {
98
26.7k
    let mut current = mathml;
99
    loop {
100
101k
        if has_chem_intent(current, property) {
101
0
            return true;
102
101k
        }
103
        // chem might not be temp node without a 'math' parent
104
101k
        if name(current) == "math" || 
current.parent()74.6k
.
is_none74.6k
() {
105
26.7k
            break;
106
74.6k
        }
107
74.6k
        current = get_parent(current);
108
    }
109
26.7k
    return false;
110
26.7k
}
111
112
30.2k
pub fn is_chemistry_off(mathml: Element) -> bool {
113
30.2k
    if has_chem_intent(mathml, ":chemical-formula") || has_chem_intent(mathml, ":chemical-equation") {
114
4
        return false;
115
30.2k
    }
116
30.2k
    let pref_manager = crate::prefs::PreferenceManager::get();
117
30.2k
    return pref_manager.borrow().pref_to_string("Chemistry") == "Off";
118
30.2k
}
119
120
10.1k
pub fn clean_chemistry_mrow(mathml: Element) {
121
10.1k
    if is_chemistry_off(mathml) {
122
0
        return;
123
10.1k
    }
124
    // debug!("clean_chemistry_mrow:\n{}", mml_to_string(mathml));
125
10.1k
    let mut children = mathml.children().iter()
126
31.3k
                .
map10.1k
(|child| as_element(*child))
127
10.1k
                .collect::<Vec<Element>>();
128
10.1k
    if let Some(
new_children246
) = clean_mrow_children_restructure_pass(&children) {
129
246
        mathml.replace_children(&new_children);
130
246
        children = new_children;
131
9.93k
    }
132
10.1k
    clean_mrow_children_mark_pass(&children);
133
10.1k
}
134
135
/// Do some aggressive structural changes and if they make this look like a chemistry formula, mark it as one else remove other marks
136
/// Note: the element is replaced with a new restructured element if it is marked as chemistry
137
///        Pass 1:
138
///        a) for any run of mi/mtext that can be re-split into chem elements, split them and mark them if it is at least 3 chars long.
139
///           Also split "(g)", etc., when in mi/mtext
140
///        b) if there are any potential chem formula operators (e.g., "=" and ":") and the previous node is marked MAYBE_CHEMISTRY,
141
///           mark this as MAYBE_CHEMISTRY
142
10.1k
fn clean_mrow_children_restructure_pass<'a>(old_children: &[Element<'a>]) -> Option<Vec<Element<'a>>> {
143
10.1k
    let mut changed = false;
144
10.1k
    let mut new_children = Vec::with_capacity(2*old_children.len());
145
10.1k
    let mut i = 0;
146
40.7k
    while i < old_children.len() {
147
30.6k
        if let Some(
paren_mrow_aq1
) = clean_aq_state(old_children, i) {
148
1
            new_children.push(paren_mrow_aq);
149
1
            i += 4;                                 // skipping "( a q )"
150
1
            changed = true;
151
1
            continue;
152
        } else {
153
30.6k
            let child = old_children[i];
154
30.6k
            let child_name = name(child);
155
30.6k
            if  child_name == "mi" || (
child_name == "mtext"22.0k
&&
as_text(child).len() < 4228
) {
156
                // break mi/mtext that is done as "(g)", etc. Even if it isn't 'g', 'l', etc., it probably shouldn't be an mi/text.
157
8.62k
                let text = as_text(child);
158
8.62k
                if text.starts_with('(') && 
text4
.
ends_with4
(')') {
159
4
                    let doc = child.document();
160
4
                    let state = create_mathml_element(&doc, "mi");
161
4
                    state.set_text(&text[1..text.len()-1]);
162
4
                    let open = create_mathml_element(&doc, "mo");
163
4
                    open.set_text("(");
164
4
                    let close = create_mathml_element(&doc, "mo");
165
4
                    close.set_text(")");
166
4
                    let mrow = create_mathml_element(&doc, "mrow");
167
4
                    mrow.append_children(&[open,state,close]);
168
4
                    new_children.push(mrow);
169
4
                    i += 1;
170
4
                    changed = true;
171
4
                    continue;
172
8.62k
                }
173
21.9k
            } else if i + 2 < old_children.len() {
174
                // wrap with an mrow if we are not already an 'mrow'
175
9.68k
                let parent = get_parent(child); // safe since 'math' is always at root
176
9.68k
                if !(name(parent) == "mrow" && 
i == 02.86k
&&
old_children.len() == 31.44k
) &&
177
8.68k
                    let Some(
paren_mrow377
) = make_mrow(old_children[i..i+3].try_into().unwrap()) {
178
                        // debug!("make_mrow added mrow");
179
377
                        new_children.push(paren_mrow);
180
377
                        i += 3;
181
377
                        changed = true;
182
377
                        continue;
183
9.30k
                    }
184
12.3k
            }
185
30.2k
            if child_name == "mo" {
186
9.50k
                let likely_chemistry_op = likely_chem_formula_operator(child);
187
                // debug!("clean_mrow_children_restructure_pass -- in mo: likely {}, {}", likely_chemistry_op, mml_to_string(child));
188
9.50k
                if likely_chemistry_op >= 0 {
189
                    // if possible chemistry to left and right, then override text for operator lookup
190
                    // note: on the right, we haven't set chem flag for operators yet, so we skip them
191
2.98k
                    let preceding = child.preceding_siblings();
192
2.98k
                    let following = child.following_siblings();
193
2.98k
                    if !preceding.is_empty() &&
194
1.84k
                       ( has_inherited_property(child, "chemical-formula") ||
195
2.27k
                         
preceding.iter()1.84k
.
all1.84k
(|&child| {
196
2.27k
                            let child = as_element(child);
197
2.27k
                            name(child)=="mn" || 
child2.13k
.attribute(MAYBE_CHEMISTRY).
is_some2.13k
()}) &&
198
574
                            
!following.is_empty()273
&&
following.iter()246
.
all246
(|&child| {
199
574
                                let child = as_element(child);
200
574
                                name(child)=="mo" || 
name(child)=="mn"437
||
child351
.attribute(MAYBE_CHEMISTRY).
is_some351
()
201
574
                            })) {
202
146
                        // "=", etc., should be treated as high priority separators
203
146
                        // debug!("clean_mrow_children_restructure: child = {}", mml_to_string(child));
204
146
                        child.set_attribute_value(CHEMICAL_BOND, "true");
205
146
                        child.set_attribute_value(CHEM_FORMULA_OPERATOR, &likely_chemistry_op.to_string());
206
146
                        child.set_attribute_value(MAYBE_CHEMISTRY, &likely_chemistry_op.to_string());
207
2.83k
                    }
208
6.52k
                } else {
209
6.52k
                    likely_chem_equation_operator(child);   // need to mark MAYBE_CHEMISTRY for CHEMICAL_BOND tests
210
6.52k
                }
211
20.7k
            } else if child_name == "mrow" &&
212
2.05k
                      let Some(
latex_value1
) = child.attribute_value("data-latex") &&
213
1
                      latex_value == r"\mathrel{\longrightleftharpoons}" {
214
0
                child.set_attribute_value("data-unicode", "\u{1f8d2}");
215
0
                child.set_attribute_value(MAYBE_CHEMISTRY, "2");    // same as is_hack_for_missing_arrows()
216
20.7k
            }
217
30.2k
            i += 1;
218
30.2k
            new_children.push(child);
219
        }
220
    }
221
222
10.1k
    return if changed {
Some(new_children)246
} else {
None9.93k
};
223
    
224
225
    /// if it looks like we have ChemFormula ( a q ), merge the 'a' and 'q' together into an 'mi'
226
    /// if not already true, structure '( aq )' into a single mrow (might be other elements on either side)
227
    /// returns the last char matched
228
30.6k
    fn clean_aq_state<'a>(children: &[Element<'a>], i: usize) -> Option<Element<'a>> {
229
30.6k
        if i+3 >= children.len() || (
i > 010.8k
&&
children[i-1]9.38k
.attribute(MAYBE_CHEMISTRY).
is_none9.38k
()) {
230
27.8k
            return None;       // can't be '( a q )' -- not enough elements left or not Chem Formula on left
231
2.79k
        }
232
        
233
        // this is a little sloppy in that we allow matching text in any leaf element, but we can use the same function
234
2.79k
        if is_text(children[i], "(") &&
235
244
           is_text(children[i+1], "a") && 
is_text9
(
children[i+2]9
,
"q"9
) &&
236
1
           is_text(children[i+3], ")") {
237
1
            let mi = create_mathml_element(&children[i].document(), "mi");
238
1
            mi.set_text("aq");
239
1
            return make_mrow([children[i], mi, children[i+3]]);
240
2.79k
        }
241
2.79k
        return None;
242
30.6k
    }
243
244
12.3k
    fn is_text(node: Element, target: &str) -> bool {
245
12.3k
        return is_leaf(node) && 
as_text(node) == target11.1k
;
246
12.3k
    }
247
248
    /// Converts  "( child )" to mrow with those elements as children.
249
    /// This is to make ascertaining whether this is a chemical state easier, but it is correct even if not a chemical state.
250
8.68k
    fn make_mrow(children: [Element; 3]) -> Option<Element> {
251
        // this is a little sloppy in that we allow matching text in any leaf element, but we can use the same function
252
8.68k
        if is_text(children[0], "(") &&
253
631
           is_text(children[2], ")") {
254
378
      let mrow = create_mathml_element(&children[0].document(), "mrow");
255
378
      mrow.set_attribute_value(CHANGED_ATTR, ADDED_ATTR_VALUE);
256
378
      mrow.append_children(children);
257
378
            return Some(mrow);
258
8.31k
        }
259
8.31k
        return None;
260
8.68k
    }
261
10.1k
}
262
263
/// Pass 2: (assuming something was marked in pass 1)
264
/// a) find the first marked child and then the last consecutive marked child and trim any mo's from the ends
265
/// b) evaluate the likelihood that the sequence is chemistry
266
10.1k
fn clean_mrow_children_mark_pass(children: &[Element]) {
267
10.1k
    let mut start = None;
268
30.6k
    for i in 
0..children.len()10.1k
{
269
30.6k
        let child = children[i];
270
30.6k
        if child.attribute(MAYBE_CHEMISTRY).is_some()  {
271
4.64k
            if start.is_none() {
272
3.63k
                if name(child) == "mo" {
273
2.38k
                    // debug!(" start.is_none(): removing MAYBE_CHEMISTRY on {}", as_text(child));
274
2.38k
                    child.remove_attribute(MAYBE_CHEMISTRY);
275
2.38k
                    child.remove_attribute(CHEM_FORMULA_OPERATOR);
276
2.38k
                    child.remove_attribute(CHEM_EQUATION_OPERATOR);
277
2.38k
                    child.remove_attribute(CHEMICAL_BOND);
278
2.38k
                } else {
279
1.25k
                    start = Some(i);
280
1.25k
                }
281
1.00k
            }
282
25.9k
        } else if let Some(
seq_start804
) = start &&
283
804
                  remove_operators_at_end_of_sequence(children, seq_start, i) {
284
804
            start = None;
285
25.1k
        }
286
    }
287
288
10.1k
    if let Some(
seq_start452
) = start {
289
452
        remove_operators_at_end_of_sequence(children, seq_start, children.len());
290
9.73k
    }
291
10.1k
    return;
292
293
294
1.25k
    fn remove_operators_at_end_of_sequence(children: &[Element], start: usize, end: usize) -> bool {
295
        // debug!("  looking for ops at end of {}..{}, last is:{}", start, end, mml_to_string(children[end-1]));
296
1.45k
        for stop in (
start..end1.25k
).
rev1.25k
() {
297
1.45k
            let end_child = children[stop];
298
1.45k
            if name(end_child) == "mo" {
299
202
                end_child.remove_attribute(MAYBE_CHEMISTRY);
300
202
            } else {
301
1.25k
                return true;
302
            }
303
        }
304
0
        return false
305
1.25k
}
306
10.1k
}
307
308
309
/// Very little software gets the token elements for chemistry right.
310
/// Sometimes multiple elements are in a single token (e.g. "NaCl") and sometimes
311
/// a single element is spread across multiple tokens (e.g. "N", "a").
312
/// 
313
/// Here we attempt one or the other repair, but not both on the assumption there is 
314
/// consistency in the error.
315
/// 
316
/// Returns a Vec of the chemical elements or None. If a merge happened, the tree is altered.
317
12.3k
pub fn convert_leaves_to_chem_elements(mathml: Element) -> Option<Vec<Element>> {
318
    // gather up all the consecutive mi/mtext
319
12.3k
    if !(name(mathml) == "mi" || 
name(mathml) == "mtext"942
) {
320
0
        return None;       // do nothing
321
12.3k
    }
322
323
    // we play games with the string to avoid allocation...
324
12.3k
    let token_string = as_text(mathml);
325
12.3k
    if !token_string.is_ascii() {
326
2.67k
        return None;    // chemical elements are ASCII
327
9.62k
    }
328
9.62k
    let doc = mathml.document();
329
9.62k
    if token_string.len() > 1 {   // safe because all chars are ASCII
330
2.54k
        return split_string_chem_element(&doc, mathml);
331
7.08k
    }   
332
7.08k
    let parent = get_parent(mathml);
333
7.08k
    let parent_name = name(parent);
334
7.08k
    if !(parent_name == "mrow" || 
parent_name == "math"4.28k
) { // not canonicalized yet
335
2.57k
        return None;    // only try to merge if in an mrow
336
4.50k
    }
337
4.50k
    let answer = merge_tokens_chem_element(&doc, mathml, &mathml.following_siblings());
338
4.50k
    return answer;
339
340
341
4.50k
    fn merge_tokens_chem_element<'a>(doc: &Document<'a>, leaf: Element<'a>, following_siblings: &[ChildOfElement<'a>]) -> Option<Vec<Element<'a>>> {
342
4.50k
        if following_siblings.is_empty() {
343
1.22k
            return None;
344
3.28k
        }
345
3.28k
        let second_element = as_element(following_siblings[0]);
346
3.28k
        let second_element_name = name(second_element);
347
3.28k
        if second_element_name != "mi" && 
second_element_name != "mtext"3.05k
{
348
3.02k
            return None;
349
256
        }
350
256
        let second_element_text = as_text(second_element);
351
256
        if second_element_text.len() != 1 {
352
57
            return None;
353
199
        }
354
199
        let token_string = as_text(leaf);
355
199
        let chem_token_string = vec![token_string.as_bytes()[0], second_element_text.as_bytes()[0]];
356
199
        if let Some(
chem_element4
) = get_chem_element(doc, &chem_token_string, 2) {
357
4
            chem_element.set_text(as_text(chem_element));
358
4
            chem_element.set_attribute_value(MAYBE_CHEMISTRY, chem_element.attribute_value(MAYBE_CHEMISTRY).unwrap());
359
4
            chem_element.set_attribute_value(MERGED_TOKEN, "true");
360
4
            second_element.remove_from_parent();
361
4
            return Some(vec![chem_element]);
362
195
        }
363
195
        return None;
364
4.50k
    }
365
366
    /// split the string which has been checked to be all ASCII chars
367
2.54k
    fn split_string_chem_element<'a>(doc: &Document<'a>, leaf: Element<'a>) -> Option<Vec<Element<'a>>> {
368
2.54k
        let token_string = as_text(leaf).as_bytes();
369
2.54k
        let token_len = token_string.len();
370
2.54k
        let mut j = 0;
371
2.54k
        let mut new_children = Vec::with_capacity(token_string.len());
372
3.31k
        while j < token_len {
373
            // try elements of length 2 and 1, preferring longer elements (e.g., prefer "Na" over "N")
374
2.94k
            if let Some(
chem_element310
) = get_chem_element(doc, &token_string[j..], 2) {
375
310
                new_children.push(chem_element);
376
310
                j += 2;
377
310
                continue;
378
2.63k
            } else if let Some(
chem_element457
) = get_chem_element(doc, &token_string[j..], 1) {
379
457
                new_children.push(chem_element);
380
457
                j += 1;
381
457
                continue;
382
2.18k
            }
383
2.18k
            return None;    // didn't find a valid chem element
384
        }
385
362
        if new_children.len() <= 1 {
386
231
            return None;
387
131
        }
388
131
        add_attrs(new_children[new_children.len()-1], &leaf.attributes());
389
131
        new_children[new_children.len()-1].set_attribute_value(SPLIT_TOKEN, "true");
390
        // debug!("split_string_chem_element: {} -> {}", String::from_utf8(token_string.to_vec()).unwrap(), new_children.len());
391
131
        return Some(new_children);
392
2.54k
    }
393
394
    /// Returns element or None
395
5.78k
    fn get_chem_element<'a>(doc: &Document<'a>, bytes_str: &[u8], n: usize) -> Option<Element<'a>> {
396
        use std::str;
397
5.78k
        let len = bytes_str.len();
398
5.78k
        if n > len {
399
277
            return None;    // can't be an chemical letter
400
5.50k
        }
401
5.50k
        match str::from_utf8(&bytes_str[..n]) {
402
5.50k
            Ok(chem_element) => {
403
5.50k
                if CHEMICAL_ELEMENT_ELECTRONEGATIVITY.contains_key(chem_element) {
404
771
                    return Some(new_chemical_element(doc, chem_element));
405
4.73k
                }
406
4.73k
                return None;
407
            }
408
0
            Err(_) => return None,
409
        }
410
5.78k
    }
411
412
771
    fn new_chemical_element<'a>(doc: &Document<'a>, chem_element_str: &str) -> Element<'a> {
413
771
        let result = create_mathml_element(doc, "mi");
414
771
        result.set_text(chem_element_str);
415
771
        result.set_attribute_value(MAYBE_CHEMISTRY, if chem_element_str.len() == 1 {
"1"457
} else {
"3"314
});
416
771
        if chem_element_str.len() == 1 {
417
457
            result.set_attribute_value("mathvariant", "normal");
418
457
        
}314
419
771
        return result;
420
771
    }
421
12.3k
}
422
423
/// Looks at the children of the element and uses heuristics to decide whether this is a chemical equation/formula
424
/// If it is, it is marked with either data-chem-equation or data-chem-formula
425
/// This function assumes proper structure
426
/// 
427
/// Returns true if not chemistry -- added attrs, mrows, and leaves are removed in preparation for a second parse
428
5.05k
pub fn scan_and_mark_chemistry(mathml: Element) -> bool {
429
5.05k
    if is_chemistry_off(mathml) {
430
0
        return true;
431
5.05k
    }
432
433
5.05k
    let child = as_element(mathml.children()[0]);
434
    // debug!("scan_and_mark_chemistry:\n{}", mml_to_string(child));
435
5.05k
    assert_eq!(name(mathml), "math");
436
5.05k
    let is_chemistry = if let Some(
latex5
) = mathml.attribute_value("data-latex") {
437
        // MathJax v4 includes this really useful info -- if it starts \ce -- we have Chemistry
438
        // need to determine if it is an equation or a formula
439
5
        latex.trim_start().starts_with(r"\ce") 
440
    } else {
441
5.05k
        has_chem_intent(mathml, ":chemical-formula") || has_chem_intent(mathml, ":chemical-equation")
442
    };
443
444
5.05k
    if is_chemistry || 
is_chemistry_sanity_check5.05k
(
mathml5.05k
) {
445
669
        assert_eq!(mathml.children().len(), 1);
446
669
        let likelihood = likely_chem_formula(child);
447
669
        if likelihood >= CHEMISTRY_THRESHOLD || 
has_chem_intent458
(
mathml458
,
":chemical-formula"458
) {
448
211
            child.set_attribute_value(MAYBE_CHEMISTRY, std::cmp::max(CHEMISTRY_THRESHOLD, likelihood).to_string().as_str());
449
211
            set_marked_chemistry_attr(child, CHEM_FORMULA);
450
458
        }
451
452
669
        if child.attribute(CHEM_FORMULA).is_none() {
453
            // can't be both an equation and a formula...
454
458
            let likelihood = likely_chem_equation(child);
455
458
            if is_chemistry || 
likelihood >= CHEMISTRY_THRESHOLD455
||
has_chem_intent422
(
mathml422
,
":chemical-equation"422
) {
456
36
                child.set_attribute_value(MAYBE_CHEMISTRY, std::cmp::max(CHEMISTRY_THRESHOLD, likelihood).to_string().as_str());
457
36
                set_marked_chemistry_attr(child, CHEM_EQUATION);
458
422
            }
459
211
        }
460
4.38k
    }
461
    // debug!("...after marking:\n{}", mml_to_string(child));
462
463
5.05k
    if child.attribute(CHEM_FORMULA).is_none() && 
child4.84k
.attribute(CHEM_EQUATION).
is_none4.84k
() {
464
4.80k
        if !has_maybe_chemistry(mathml) {
465
3.68k
            return true;    // quick check avoids needing a second parse due to removing added elements
466
1.12k
        }
467
1.12k
        return !is_changed_after_unmarking_chemistry(mathml);
468
    } else {
469
247
        return true;
470
    }
471
5.05k
}
472
473
// returns the marked attr value or None
474
16.2k
fn get_marked_value(mathml: Element) -> Option<i32> {
475
16.2k
    return mathml.attribute_value(MAYBE_CHEMISTRY).map(|value| 
value3.11k
.
parse3.11k
().
unwrap3.11k
());
476
16.2k
}
477
478
/// Sets the attr 'chem'
479
/// Recurse through all the children that have MAYBE_CHEMISTRY set
480
4.24k
fn set_marked_chemistry_attr(mathml: Element, chem: &str) {
481
4.24k
    let tag_name = name(mathml);
482
4.24k
    if let Some(
maybe_attr2.88k
) = mathml.attribute(MAYBE_CHEMISTRY) {
483
2.88k
        maybe_attr.remove_from_parent();
484
485
2.88k
        match tag_name {
486
2.88k
            "mi" | 
"mtext"2.09k
=>
{852
mathml852
.
set_attribute_value852
(
CHEM_ELEMENT852
, maybe_attr.value());},
487
2.03k
            "mo" => {
488
686
                if mathml.attribute(CHEM_FORMULA_OPERATOR).is_none() && 
mathml589
.attribute(CHEM_EQUATION_OPERATOR).
is_none589
(){
489
                    // don't mark as both formula and equation
490
433
                    mathml.set_attribute_value(if chem == CHEM_FORMULA {
CHEM_FORMULA_OPERATOR216
} else {
CHEM_EQUATION_OPERATOR217
}, maybe_attr.value());
491
253
                }
492
            },
493
1.35k
            "mn" => 
()87
,
494
1.26k
            "mrow" | 
"msub"515
|
"msup"275
|
"msubsup"216
|
"mmultiscripts"213
=> {
495
1.25k
                let mut chem_name = chem;
496
1.25k
                if tag_name != "mrow" && 
chem != CHEM_FORMULA505
{
497
                    // look at base -- if an mi/mtext then this is really a chemical formula
498
69
                    let base = as_element(mathml.children()[0]);
499
69
                    let base_name = name(base);
500
69
                    if base_name == "mi" || 
base_name == "mtext"8
{
501
63
                        chem_name = CHEM_FORMULA;
502
63
                    
}6
503
1.18k
                }
504
505
1.25k
                if mathml.attribute(CHEM_FORMULA).is_none() {
506
1.23k
                    // don't mark as both formula and equation
507
1.23k
                    mathml.set_attribute_value(chem_name, maybe_attr.value());
508
1.23k
                
}18
509
3.92k
                for child in 
mathml1.25k
.
children1.25k
() {
510
3.92k
                    set_marked_chemistry_attr(as_element(child), chem);
511
3.92k
                };
512
            }
513
10
            "mfrac" => {
514
0
                let children = mathml.children();
515
                // debug!("mfrac children: {}", mml_to_string(mathml));
516
0
                let numerator_is_chem_equation = IsBracketed::is_bracketed(as_element(children[0]), "[", "]", false, true);
517
0
                let denominator_is_chem_equation = IsBracketed::is_bracketed(as_element(children[1]), "[", "]", false, true);
518
0
                if  numerator_is_chem_equation && denominator_is_chem_equation {
519
0
                    mathml.set_attribute_value(CHEM_EQUATION, "true");
520
0
                }
521
            }
522
10
            _ => error!("Internal error: {tag_name} should not be marked as 'MAYBE_CHEMISTRY'"),
523
        }
524
1.35k
    } else if tag_name == "mrow" {
525
        // could have been added during canonicalization, so never marked. Recurse to the children
526
68
        for child in 
mathml33
.
children33
() {
527
68
            set_marked_chemistry_attr(as_element(child), chem);
528
68
        };
529
1.32k
    }
530
4.24k
}
531
532
/// returns true if MAYBE_CHEMISTRY's occur within the element
533
41.3k
fn has_maybe_chemistry(mathml: Element) -> bool {
534
41.3k
    if mathml.attribute(MAYBE_CHEMISTRY).is_some() {
535
1.12k
        return true;
536
40.2k
    }
537
40.2k
    if !is_leaf(mathml) {
538
36.5k
        for child in 
mathml17.9k
.
children17.9k
() {
539
36.5k
            if has_maybe_chemistry(as_element(child)) {
540
3.15k
                return true;
541
33.3k
            }
542
        }
543
22.2k
    }
544
37.0k
    return false;
545
41.3k
}
546
547
/// Clears MAYBE_CHEMISTRY from this element and its decedents
548
/// Also deletes added mrows and leaves; returns true if anything is deleted
549
19.7k
fn is_changed_after_unmarking_chemistry(mathml: Element) -> bool {
550
19.7k
    mathml.remove_attribute(MAYBE_CHEMISTRY);
551
19.7k
    if is_leaf(mathml) {
552
        // don't bother testing for the attr -- just remove and nothing bad happens if they aren't there
553
13.3k
        mathml.remove_attribute(CHEM_FORMULA_OPERATOR);
554
13.3k
        mathml.remove_attribute(CHEM_EQUATION_OPERATOR);
555
13.3k
        mathml.remove_attribute(CHEMICAL_BOND);
556
13.3k
        if mathml.attribute(MERGED_TOKEN).is_some() {
557
3
            unmerge_element(mathml);
558
3
            return true;    // need to re-parse
559
13.3k
        } else if mathml.attribute(SPLIT_TOKEN).is_some() {
560
33
            if let Err(
err0
) = merge_element(mathml) {
561
0
                panic!("{}", err);
562
33
            }
563
            // debug!("After merge_element:{}", mml_to_string(mathml));
564
            // let parent = get_parent(mathml);
565
            // debug!("After merge_element: -- parent{}", mml_to_string(parent));
566
567
13.3k
        } else if let Some(
changed_value2.14k
) = mathml.attribute_value(CHANGED_ATTR) &&
568
2.14k
                  changed_value == ADDED_ATTR_VALUE &&
569
2.11k
                  name(mathml) != "mtext" {  // a hack fix for #477 (chem never modifies mtext, so this is ok)
570
2.11k
            mathml.remove_from_parent();
571
2.11k
            return true;
572
11.1k
        }
573
11.2k
        return false;
574
6.38k
    } else if IsNode::is_scripted(mathml) &&
575
1.04k
              name(as_element(mathml.children()[0])) == "mi" &&
576
575
              as_element(mathml.children()[0]).attribute(SPLIT_TOKEN).is_some() {
577
        // Undo a split that happened in a scripted element.
578
        // We put the preceding elements into the base and call merge_element on the last element of the base
579
        // The first and/or the last child in the sequence could be a script that needs to be unwrapped
580
1
        let mut parent = get_parent(mathml);   // there is always a "math" node
581
        // debug!("mathml:\n{}", mml_to_string(mathml));
582
        // debug!("parent before merge:\n{}", mml_to_string(parent));
583
        // debug!("grandparent before merge:\n{}", mml_to_string(get_parent(parent)));
584
585
1
        let mut preceding_children = mathml.preceding_siblings();
586
        // could be no preceding children to canonicalization creating mrows (see issue #303), so might need to use parent, etc
587
2
        while preceding_children.is_empty() {
588
1
            preceding_children = parent.preceding_siblings();
589
1
            if name(parent) == "math" {
590
0
                break;  // consider {SIN}^{-1} -- no preceding child
591
1
            }
592
1
            parent = get_parent(parent);
593
        }
594
595
1
        let mut new_script_children = vec![];
596
1
        if !preceding_children.is_empty() {
597
            // deal with the first element (if it needs unwrapping, it has only prescripts)
598
1
            let first_element_of_split = as_element(preceding_children[preceding_children.len()-1]);
599
            // debug!("first_element_of_split: \n{}", mml_to_string(first_element_of_split));
600
1
            if name(first_element_of_split) == "mmultiscripts" {
601
                // take the base and make it the first child of preceding_children (what will get merged)
602
                // put the rest of the elements (the prescripts) at the end of the parent last element (mathml) which must be an mmultiscripts
603
0
                let first_element_children = first_element_of_split.children();
604
0
                assert_eq!(name(mathml), "mmultiscripts");
605
0
                let mut script_children = mathml.children();
606
0
                assert_eq!(name(as_element(script_children[0])), "mi");
607
0
                assert!(!script_children.len().is_multiple_of(2));  // doesn't have <mprescripts/>
608
0
                script_children.push(first_element_children[1]);    // mprescripts
609
0
                script_children.push(first_element_children[2]);    // prescripts subscript
610
0
                script_children.push(first_element_children[3]);    // prescripts superscript
611
612
0
                let base_of_first_element = first_element_children[0];  // base
613
0
                assert_eq!(name(as_element(base_of_first_element)), "mi");
614
0
                let script_base = as_element(script_children[0]);
615
0
                let mut merged_base_text = as_text( as_element(base_of_first_element)).to_string();
616
0
                merged_base_text.push_str(as_text(script_base));
617
0
                script_base.set_text(&merged_base_text);
618
0
                script_base.remove_attribute("mathvariant");
619
0
                script_base.remove_attribute(ADDED_ATTR_VALUE);
620
0
                script_base.remove_attribute(MAYBE_CHEMISTRY);
621
0
                script_base.remove_attribute(SPLIT_TOKEN);
622
0
                mathml.replace_children(script_children);
623
        
624
0
                first_element_of_split.remove_from_parent();
625
0
                return true;
626
1
            }
627
1
            new_script_children.push(ChildOfElement::Element(first_element_of_split));
628
0
        }
629
1
        debug!("mathml after handling preceding children:\n{}", 
mml_to_string0
(
mathml0
));
630
1
        let mut children_of_script = mathml.children();
631
1
        let split_child = as_element(children_of_script[0]);
632
1
        new_script_children.append(&mut children_of_script);
633
1
        mathml.replace_children(new_script_children);     // temporarily has bad number of children 
634
        // debug!("After making bad script:\n{}", mml_to_string(mathml));
635
1
        if let Err(
err0
) = merge_element(split_child) {
636
0
            panic!("{}", err);
637
1
        }
638
1
        return true;
639
    } else {
640
6.37k
        let mut answer = false;
641
18.5k
        for child in 
mathml6.37k
.
children6.37k
() {
642
18.5k
            let child = as_element(child);
643
18.5k
            if name(child) == "mtd" && 
child77
.attribute(MAYBE_CHEMISTRY).
is_some77
() {
644
2
                answer = true;  // each mtd acts as a potential island for chemistry, so don't clear it
645
18.5k
            } else {
646
18.5k
                answer |= is_changed_after_unmarking_chemistry(child);
647
18.5k
            }
648
        }
649
6.37k
        if name(mathml) == "mrow" {
650
3.58k
            if let Some(
changed_value2.86k
) = mathml.attribute_value(CHANGED_ATTR) {
651
                // we added an mrow, we can remove it -- but this might be already processed which is the case if "data-id-added" is true (exists)
652
2.86k
                if changed_value == ADDED_ATTR_VALUE && mathml.attribute("data-id-added").is_none() {
653
                    // mrows get added for several reasons. One of them is to canonicalize elements like msqrt that can have 1 or more children;
654
                    //   those should not get removed because the re-parse doesn't add those
655
                    // Although they would never be added, elements with fixed number of children also shouldn't have the mrow go away
656
                    // We are left with only removing mrows with one child or mrows that are children of mrows (simpler test than ELEMENTS_WITH_ONE_CHILD)
657
2.86k
                    let parent = get_parent(mathml);   // mathml is mrow, so parent always exists
658
2.86k
                    if mathml.children().len() == 1 || 
name(parent) == "mrow"2.84k
{
659
6.26k
                        let 
children2.31k
=
mathml.children().iter()2.31k
.
map2.31k
(|&el| as_element(el)).
collect2.31k
::<Vec<Element>>();
660
2.31k
                        mathml.remove_attribute(CHANGED_ATTR);  // if just one child, the attrs are pushed onto the child
661
                        // debug!("is_changed_after_unmarking: before replace - parent\n{}", mml_to_string(parent));
662
2.31k
                        replace_children(mathml, children);
663
                        // debug!("is_changed_after_unmarking: parent\n{}", mml_to_string(parent));
664
665
557
                    }
666
0
                }
667
720
            }
668
3.58k
            return true;
669
2.79k
        }
670
2.79k
        return answer;
671
    }
672
673
3
    fn unmerge_element(mathml: Element) {
674
        // a merged token occurs when two single letters get merged into one. Here we recreate the two tokens
675
3
        assert!(is_leaf(mathml));
676
        // debug!("unmerge_element: {}", mml_to_string(mathml));
677
3
        let mut token_str = as_text(mathml).chars();
678
3
        let first = create_mathml_element(&mathml.document(), name(mathml));
679
3
        first.set_text(&token_str.next().unwrap().to_string());
680
3
        let second = create_mathml_element(&mathml.document(), name(mathml));
681
3
        second.set_text(&token_str.next().unwrap().to_string());
682
3
        replace_children(mathml, vec![first, second]);
683
3
    }
684
685
    /// Put the split pieces back together (undo the split)
686
34
    fn merge_element(mathml: Element) -> Result<()> {
687
        // debug!("merge_element: {}", mml_to_string(mathml));
688
        // debug!("merge_element parent: {}", mml_to_string(get_parent(mathml)));
689
34
        assert!(is_leaf(mathml));
690
34
        let mut preceding_children = mathml.preceding_siblings();
691
        // debug!("preceding_children: {}", preceding_children.iter().map(|&el| name(as_element(el)).to_string()).collect::<Vec<String>>().join(", "));
692
34
        if preceding_children.is_empty() {
693
            // handle:
694
            // * case where we have mi mmultiscripts mi ... where the second mi needs to join with the first (see test mhchem_so4)
695
            // * case where the child got buried in an added mrow (can only happen one level deep because invisible times should get inserted)
696
0
            let parent = get_parent(mathml);   // mathml is leaf, so parent always exists
697
0
            preceding_children = parent.preceding_siblings();
698
0
            if preceding_children.is_empty() ||
699
0
               !(name(parent) == "mmultiscripts" ||
700
0
                (name(parent) == "mrow" && parent.attribute_value(CHANGED_ATTR).is_some() &&
701
0
                 parent.attribute_value(CHANGED_ATTR).unwrap() == ADDED_ATTR_VALUE)) {
702
0
                    bail!("Internal error: {} should not have been split'", mml_to_string(mathml));
703
0
            }
704
34
        }
705
        // Note: there was an invisible U+2063, but it was removed before we got here
706
        // The parent mrow could have many children that couldn't have been part of a split -- only consider feasible children to split (mi/mtext)
707
        // To figure this out, we walk backwards adding the text in reverse and then reverse that text in the end
708
34
        let mut merged_text = Vec::default();
709
46
        for &child in 
preceding_children.iter()34
.
rev34
() {
710
46
            let child = as_element(child);
711
            // because this is before canonicalization, there could be an mrow with just mi/mtext
712
46
            if name(child) == "mrow" && 
child.children().len() == 10
&&
child.attribute(INTENT_ATTR)0
.
is_none0
() {
713
0
                // "lift" the child up so all the links (e.g., siblings) are correct
714
0
                let child = as_element(child.children()[0]);
715
0
                set_mathml_name(child, name(child));
716
0
                crate::canonicalize::add_attrs(child, &child.attributes());
717
0
                child.replace_children(child.children());
718
46
            }
719
46
            if name(child) != "mi" && 
name(child) != "mtext"12
{
720
12
                break;
721
34
            }
722
34
            merged_text.push(as_text(child));
723
34
            child.remove_from_parent();
724
        }
725
34
        merged_text.reverse();
726
34
        let mut merged_text = merged_text.join("");
727
34
        merged_text.push_str(as_text(mathml));
728
34
        mathml.set_text(&merged_text);
729
34
        mathml.remove_attribute("mathvariant");
730
34
        mathml.remove_attribute(ADDED_ATTR_VALUE);
731
34
        mathml.remove_attribute(MAYBE_CHEMISTRY);
732
34
        mathml.remove_attribute(SPLIT_TOKEN);
733
34
        return Ok( () );
734
34
    }
735
19.7k
}
736
737
/// Returns true only if 'mathml' potentially is chemistry.
738
/// This assumes canonicalization has happened and that 'mathml' is the 'math' element
739
5.05k
fn is_chemistry_sanity_check(mathml: Element) -> bool {
740
    // This does some sanity checking. More can definitely be done
741
    // Checks:
742
    // * there should be chemical elements
743
    // * if the child is an mrow with three children, the operator should be '=' (not CHEMICAL_BOND) or  an arrow
744
    //   in this case, we gather up the elements on the lhs and rhs. The sets should be equal and non-empty.
745
    //   the exception is if there are prescripts, in which as we might have radioactive decay so we don't require the sets to be equal
746
    // * otherwise, we gather up all the chemical elements and make sure the set is non-empty
747
    // * if it isn't an mrow, we leave it to likely_chem_equation() to rule it out
748
5.05k
    assert_eq!(name(mathml), "math");
749
5.05k
    assert_eq!(mathml.children().len(), 1);
750
5.05k
    let mathml = as_element(mathml.children()[0]);
751
5.05k
    if name(mathml) == "mrow" {
752
3.29k
        let mrow_children = mathml.children();
753
3.29k
        if mrow_children.len() == 3 && 
is_arrow_or_equal2.52k
(
as_element2.52k
(
mrow_children[1]2.52k
)) {
754
371
            let mut lhs_elements = HashSet::with_capacity(8);   // likely more than anything we'll encounter -- bigger affects '=' op
755
371
            let lhs_has_prescripts = gather_chemical_elements(as_element(mrow_children[0]), &mut lhs_elements);
756
            // need to include the arrow as it might have the addition of some chemical elements (see UEB/iceb.rs/chem_16_5_2)
757
371
            gather_chemical_elements(as_element(mrow_children[1]), &mut lhs_elements);
758
371
            let mut rhs_elements = HashSet::with_capacity(8);  // likely more than anything we'll encounter -- bigger affects '=' op
759
371
            let rhs_has_prescripts = gather_chemical_elements(as_element(mrow_children[2]), &mut rhs_elements);
760
371
            if lhs_elements.is_empty() {
761
269
                return false;
762
102
            }
763
            // debug!("lhs/rhs elements: {:?}, {:?}", lhs_elements, rhs_elements);
764
            // debug!("lhs/rhs has prescripts: {}, {}", lhs_has_prescripts, rhs_has_prescripts);
765
102
            if lhs_elements == rhs_elements {
766
37
                return !(lhs_has_prescripts ^ rhs_has_prescripts);      // seems reasonable that if the lhs has prescripts, so should the rhs
767
65
            }
768
65
            return lhs_has_prescripts && 
rhs_has_prescripts32
; // non-equal sets only if radioactive decay.
769
2.92k
        }
770
1.76k
    }
771
4.68k
    let mut chem_elements = HashSet::with_capacity(8);   // likely more than anything we'll encounter -- bigger affects '=' op
772
4.68k
    gather_chemical_elements(mathml, &mut chem_elements);
773
4.68k
    return !chem_elements.is_empty();
774
775
    
776
2.52k
    fn is_arrow_or_equal(mathml: Element) -> bool {
777
2.52k
        let base = get_possible_embellished_node(mathml);
778
2.52k
        if name(base) != "mo" || 
mathml.attribute(CHEMICAL_BOND)1.98k
.
is_some1.98k
() {
779
542
            return false;
780
1.98k
        }
781
1.98k
        let text = as_text(base);
782
1.98k
        return text == "=" || 
is_single_char_matching1.67k
(
text1.67k
, is_chem_equation_arrow);
783
784
2.52k
    }
785
786
    /// Gather up all the chemical elements in the element and return true if it has numerical prescripts
787
48.3k
    fn gather_chemical_elements<'a>(mathml: Element<'a>, chem_elements: &mut HashSet<&'a str>) -> bool {
788
48.3k
        match name(mathml) {
789
48.3k
            "mi" | 
"mtext"37.7k
=> {
790
10.8k
                if is_chemical_element(mathml) {
791
1.60k
                    chem_elements.insert(as_text(mathml));
792
9.27k
                }
793
10.8k
                return false;
794
            },
795
37.4k
            "msub" | 
"msup"36.7k
|
"msubsup"35.6k
|
"mmultiscripts"35.5k
=> {
796
2.16k
                gather_chemical_elements(get_possible_embellished_node(mathml), chem_elements);
797
2.16k
                return name(mathml) == "mmultiscripts" &&  
has_numerical_prescripts291
(
mathml291
);
798
            },
799
35.2k
            "semantics" => {
800
0
                return gather_chemical_elements( get_presentation_element(mathml).1, chem_elements );
801
            },
802
35.2k
           _ => if is_leaf(mathml) { return 
false21.5k
;
}13.7k
,
803
        }
804
    
805
        // mrow, msqrt, etc
806
13.7k
        let mut has_prescripts = false;
807
40.3k
        for child in 
mathml13.7k
.
children13.7k
() {
808
40.3k
            let child = as_element(child);
809
40.3k
            has_prescripts |= gather_chemical_elements(child, chem_elements);
810
40.3k
        }
811
13.7k
        return has_prescripts;
812
48.3k
    }
813
814
        /// find the mprescripts child and then check the following siblings for numerical prescripts
815
291
    fn has_numerical_prescripts(mathml: Element) -> bool {
816
291
        let children = mathml.children();
817
        // quick check to see if there is an mprescripts child
818
291
        if !children.len().is_multiple_of(2) { // <mprescripts/> => even number of children
819
129
            return false;
820
162
        }
821
        // we need enumerate because the "step_by" will cause any returned iterator to jump ahead by 2
822
162
        let i_mprescripts = children.iter()
823
162
            .enumerate()
824
162
            .skip(1)
825
162
            .step_by(2)
826
222
            .
find162
(|(_, child)| name(as_element(**child)) == "mprescripts")
827
162
            .map(|(i, _)| i);
828
829
162
        if let Some(i) = i_mprescripts {
830
162
            let subscript = as_element(children[i+1]);  // can be +1/-1 for beta decay
831
162
            let superscript = as_element(children[i+2]);  // mass number, so always >= 0
832
162
            if name(superscript) != "mn" {
833
55
                return false;
834
107
            }
835
107
            return name(subscript) == "mn" ||
836
36
                   (name(subscript) == "mrow" && 
subscript.children().len() == 331
&&
837
0
                    name(as_element(subscript.children()[3])) == "mm" && 
838
0
                    name(as_element(subscript.children()[1])) == "mo" &&
839
0
                    matches!(as_text(as_element(subscript.children()[1])), "+" | "-"));
840
0
        }
841
0
        return false;
842
291
    }
843
5.05k
}
844
845
/// Looks at the children of the element and uses heuristics to decide whether this is a chemical equation.
846
/// This assumes canonicalization of characters has happened
847
713
fn likely_chem_equation(mathml: Element) -> i32 {
848
    // mfrac -- could be a ratio of concentrations
849
713
    if name(mathml) != "mrow" && 
name(mathml) != "mtd"127
&&
name(mathml) != "mfrac"120
{
850
119
        return NOT_CHEMISTRY;
851
594
    }
852
853
    // debug!("start likely_chem_equation:\n{}", mml_to_string(mathml));
854
  // mrow -- check the children to see if we are likely to be a chemical equation
855
856
    // concentrations should either be unscripted or have a superscript that isn't a charge
857
    // they occur in an mrow or mfrac
858
594
    if IsBracketed::is_bracketed(mathml, "[", "]", false, true) {
859
10
        let parent_name = name(get_parent(mathml));
860
10
        if parent_name == "mfrac" || parent_name == "mrow"  || 
parent_name == "math"9
||
861
0
           (parent_name == "msup" && likely_chem_superscript(as_element(mathml.following_siblings()[0])) < 0){
862
10
            return if as_element(mathml.children()[0]).attribute(CHEM_FORMULA).is_some() {
CHEMISTRY_THRESHOLD0
} else {NOT_CHEMISTRY};
863
0
        }
864
584
    }
865
    
866
    // possible improvement -- give bonus points for consecutive (not counting invisible separators) chemical elements on top of the existing points
867
584
  let mut likelihood = 0;           // indicator of likely match
868
584
  let mut has_equilibrium_constant = false;
869
584
    let children = mathml.children();
870
1.22k
  for i in 
0..children.len()584
{
871
1.22k
    let child = as_element(children[i]);
872
        // debug!("   i={}, likelihood={}, child={}", i, likelihood, crate::canonicalize::element_summary(child));
873
1.22k
        if let Some(
likely457
) = get_marked_value(child) {
874
457
            likelihood += likely;
875
457
            continue;
876
771
        }
877
771
    if i == children.len()-1 {
878
195
            let likely = likely_chem_state(child);
879
195
            if likely > 0 {
880
0
                likelihood += likely;
881
0
                break;
882
195
      }
883
            // otherwise, check the last element as normal
884
576
        }
885
771
        let tag_name = name(child);
886
771
        let likely = match tag_name {
887
771
            "mi" => 
likely_chem_element146
(
child146
),
888
625
            "mn" => 
09
, // not much info
889
616
            "mo" | 
"mover"372
|
"munder"352
|
"munderover"308
=>
likely_chem_equation_operator330
(
child330
),
890
286
            "msub" | 
"msup"259
|
"msubsup"254
|
"mmultiscripts"252
=> {
891
38
                if is_equilibrium_constant(child) {
892
0
                    has_equilibrium_constant = true;
893
0
                    2
894
                } else {
895
38
                    likely_adorned_chem_formula(child)
896
                }
897
            },
898
248
            "mfrac" => {
899
0
                if has_equilibrium_constant {
900
0
                    2
901
                } else {
902
0
                    -3    // fraction tend only to appear after an equilibrium constant
903
                }
904
            },
905
248
            "mrow" => {
906
248
                let likely = likely_chem_formula(child);
907
248
                if likely < 0 {
908
248
                    likely_chem_equation(child)
909
                } else {
910
0
                    likely
911
                }     
912
            },
913
            // no need to check for mtr or mtd because they only exist in a table and the recursion is dealt with here.
914
0
            "mtable" => {
915
0
                for mrow in child.children() {
916
0
                    let mrow = as_element(mrow);
917
0
                    for mtd in mrow.children() {
918
0
                        let mtd = as_element(mtd);
919
0
                        let mut likely = likely_chem_formula(mtd);
920
0
                        if likely < CHEMISTRY_THRESHOLD {
921
0
                            likely = likely_chem_equation(mtd);
922
0
                        }     
923
0
                        if likely < CHEMISTRY_THRESHOLD {
924
0
                            is_changed_after_unmarking_chemistry(mtd);
925
0
                        }     
926
                    }
927
                }
928
0
                NOT_CHEMISTRY
929
            },
930
0
            "semantics" => {
931
0
                likely_chem_equation(get_presentation_element(mathml).1)
932
            },
933
0
            _ => NOT_CHEMISTRY,
934
        };
935
771
        if likely >= 0 {
936
164
            child.set_attribute_value(MAYBE_CHEMISTRY, &likely.to_string());
937
607
        }
938
771
        likelihood += likely;
939
771
        if likelihood < NOT_CHEMISTRY_THRESHOLD {
940
396
            return NOT_CHEMISTRY;
941
375
        }
942
    }
943
944
188
    if likelihood >= 0 {
945
108
        mathml.set_attribute_value(MAYBE_CHEMISTRY, &likelihood.to_string());
946
108
    
}80
947
188
    return likelihood;
948
713
}
949
950
951
/// could be a number, a state ("(l)", "(g)", etc), or a number followed by a state
952
1.19k
fn likely_chem_subscript(subscript: Element) -> i32 {
953
1.19k
    let subscript_name = name(subscript);
954
1.19k
    if  subscript_name == "mn" && 
!as_text(subscript).contains('.')676
{
955
674
        return 0;       // not really much chem info about an integer subscript
956
525
    } else if subscript_name == "mi" {
957
328
        let text = as_text(subscript);
958
328
        if text == "s" || 
text == "l"323
||
text == "g"323
||
text == "aq"323
{
959
6
            subscript.set_attribute_value(CHEM_STATE, "true");
960
6
            return 2;
961
322
        }
962
197
    } else if subscript_name == "mrow" {
963
        // debug!("likely_chem_subscript:\n{}", mml_to_string(subscript));
964
184
        let children = subscript.children();
965
184
        if children.len() == 3 && 
IsBracketed::is_bracketed71
(
subscript71
,
"("71
,
")"71
, false, true) {
966
6
            return likely_chem_subscript(as_element(children[1]));
967
178
        }
968
178
        let i_first_child = as_element(children[0]);
969
178
        if children.len() == 2 &&
970
103
           name(i_first_child) == "mn" && 
!as_text(i_first_child).contains('.')81
&&
971
81
           name(as_element(children[1])) == "mrow" &&
972
0
           likely_chem_state(as_element(children[1])) > 0 { // notation used in en.wikipedia.org/wiki/Electrolyte#Formation
973
0
                return 2;
974
178
        }     
975
13
    }
976
    // could be a variable 'n' or something else -- just not likely
977
513
    return -3
978
1.19k
}
979
980
17
fn small_roman_to_number(text: &str) -> &str {
981
    // simplest to do a look up
982
    static ROMAN_TO_NUMBER: phf::Map<&str, &str> = phf_map! {
983
        "I" => "1", "II" => "2", "III" => "3", "IV" => "4", "V" => "5", "VI" => "6", "VII" => "7", "VIII" => "8", "IX" => "9",
984
    };
985
17
    return ROMAN_TO_NUMBER.get(text).unwrap_or(&"");
986
987
17
}
988
989
1.65k
fn likely_chem_superscript(sup: Element) -> i32 {
990
    // either one or more '+'s (or '-'s) or a number followed by +/-
991
    // also could be state (en.wikipedia.org/wiki/Nuclear_chemistry#PUREX_chemistry)
992
    // bullet is radical (en.wikipedia.org/wiki/Radical_(chemistry)#Depiction_in_chemical_reactions); mhchem uses dot operator
993
    //  these can stand alone, be followed by +/- or have a number in front "(2•)-"" [examples from mhchem documentation]
994
    // roman numerals are "oxidation state" and range from -4 to +9
995
3
    static MULTIPLE_PLUS_OR_MINUS_OR_DOT: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\++$|^-+$|^\U{2212}+$|^[⋅∙•][-+\U{2212}]*$").unwrap());
996
3
    static SINGLE_PLUS_OR_MINUS_OR_DOT: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^[+-\U{2212}⋅∙•]$").unwrap());
997
    static DOTS: &[char; 3] = &['⋅', '∙', '•'];
998
1.65k
    let sup_name = name(sup);
999
1.65k
    if sup_name == "mo" && 
MULTIPLE_PLUS_OR_MINUS_OR_DOT226
.
is_match226
(as_text(sup)) {
1000
113
        if as_text(sup).find(DOTS).is_some() {
1001
7
            sup.set_attribute_value(MAYBE_CHEMISTRY, "1");
1002
7
            sup.set_attribute_value(CHEM_FORMULA_OPERATOR, "1");   // value doesn't really matter
1003
106
        }
1004
113
        return if as_text(sup).len()==1 {
198
} else {
215
};
1005
1.54k
    } else if (sup_name == "mi" || 
sup_name == "mn"1.36k
||
sup_name=="mtext"548
) &&
SMALL_UPPER_ROMAN_NUMERAL1.00k
.
is_match1.00k
(as_text(sup)){
1006
17
        sup.set_attribute_value("data-number", small_roman_to_number(as_text(sup)));
1007
17
        sup.set_attribute_value(MAYBE_CHEMISTRY, "2");
1008
17
        return 2;
1009
1.52k
    } else if sup_name == "mrow" {
1010
        // look for something like '2+'
1011
311
        let children = sup.children();
1012
311
        if children.len() == 2 {
1013
177
            let first = as_element(children[0]);
1014
177
            let second = as_element(children[1]);
1015
177
            if name(first) == "mn" && 
name(second) == "mo"79
&&
!as_text(first).contains('.')55
{
1016
55
                let second_text = as_text(second);
1017
55
                if SINGLE_PLUS_OR_MINUS_OR_DOT.is_match(second_text) {
1018
55
                    if second_text.find(DOTS).is_some() {
1019
0
                        second.set_attribute_value(MAYBE_CHEMISTRY, "2");
1020
0
                        second.set_attribute_value(CHEM_FORMULA_OPERATOR, "2");   // value doesn't really matter
1021
55
                    }
1022
55
                    sup.set_attribute_value(MAYBE_CHEMISTRY, "3");
1023
55
                    return 3;   // ending with a +/- makes it likely this is an ion
1024
0
                }
1025
122
            }
1026
134
        }
1027
        // gather up the text and see if it is all +, -, etc
1028
256
        let mut text = "".to_string();
1029
414
        for child in 
&children256
{ // 'children' used later, so need to borrow rather than move
1030
414
            let child = as_element(*child);
1031
414
            if name(child) == "mo" {
1032
169
                text.push_str(as_text(child));
1033
169
            } else {
1034
                // could have something like 'mrow(mrow 2n, -)  (chem example 5-9) -- so fallback to still ok if ends with + or -
1035
245
                let last_super_child = as_element(children[children.len()-1]);
1036
245
                if name(last_super_child) == "mo" {
1037
7
                    let text = as_text(last_super_child);
1038
7
                    if text == "+" || text == "-" {
1039
1
                        sup.set_attribute_value(MAYBE_CHEMISTRY, "3");
1040
1
                        return 3;
1041
6
                    }
1042
238
                }
1043
244
                return NOT_CHEMISTRY;
1044
            }
1045
        }
1046
11
        if MULTIPLE_PLUS_OR_MINUS_OR_DOT.is_match(&text) {
1047
13
            for child in 
children6
{
1048
13
                let child = as_element(child);
1049
13
                if name(child) == "mo" && as_text(child).find(DOTS).is_some() {
1050
0
                    child.set_attribute_value(MAYBE_CHEMISTRY, "1");
1051
0
                    child.set_attribute_value(CHEM_FORMULA_OPERATOR, "1");   // value doesn't really matter
1052
13
                }
1053
            }
1054
6
            let likely = 2*text.len() as i32;
1055
6
            sup.set_attribute_value(MAYBE_CHEMISTRY, &likely.to_string());
1056
6
            return likely;
1057
5
        }
1058
1.21k
    }
1059
1.21k
    return NOT_CHEMISTRY
1060
1.65k
}
1061
1062
1063
/// chem_formula is likely if it is one of:
1064
/// * a (possibly adorned) chemical element
1065
/// * an operator that represents a bond
1066
/// * fences around a chemical formula
1067
/// * an mrow made up of only chemical formulas
1068
15.0k
fn likely_chem_formula(mathml: Element) -> i32 {
1069
    // debug!("start likely_chem_formula:\n{}", mml_to_string(mathml));
1070
15.0k
    if let Some(
value2.65k
) = get_marked_value(mathml) {
1071
2.65k
        return value;       // already marked
1072
12.3k
    }
1073
1074
12.3k
    let tag_name = name(mathml);
1075
12.3k
    let likelihood = match tag_name {
1076
        // a parent may clear the chem flags if something says can't be chemistry (e.g, a non chemically valid script)
1077
12.3k
        "mi" => 
likely_chem_element2.01k
(
mathml2.01k
),
1078
10.3k
        "mo" => 
likely_chem_formula_operator4.48k
(
mathml4.48k
),
1079
5.90k
        "mtext" => 
044
, // definitely need to skip empty mtext, but others are probably neutral also
1080
5.85k
        "mn" => 
01.98k
, // no info
1081
3.87k
        "msub" | 
"msup"3.76k
|
"msubsup"3.70k
|
"mmultiscripts"3.69k
=> {
1082
225
            likely_chem_formula(as_element(mathml.children()[0]));  // set MAYBE_CHEMISTRY attribute
1083
225
            likely_adorned_chem_formula(mathml)
1084
        },
1085
3.64k
        "mrow" => {
1086
3.41k
            let chem_state = likely_chem_state(mathml);
1087
3.41k
            if chem_state > 0 {
1088
18
                chem_state
1089
            } else {
1090
3.39k
                likely_mrow_chem_formula(mathml)
1091
            }
1092
        },
1093
232
        "mfrac" => {
1094
73
            let children = mathml.children();
1095
73
            let num_likely = likely_chem_formula(as_element(children[0]));
1096
73
            let denom_likely = likely_chem_formula(as_element(children[1]));
1097
73
            let likely = num_likely.max(denom_likely);
1098
73
            if likely < CHEMISTRY_THRESHOLD {NOT_CHEMISTRY} else {
likely0
}
1099
        }
1100
159
        "mtd" => {
1101
5
            let mut likely = likely_chem_formula(as_element(mathml.children()[0]));
1102
5
            if likely < CHEMISTRY_THRESHOLD {
1103
4
                likely = likely_chem_equation(mathml);
1104
4
            
}1
1105
5
            likely
1106
        }
1107
154
        "mtable" => {
1108
4
            for mrow in 
mathml2
.
children2
() {
1109
4
                let mrow = as_element(mrow);
1110
5
                for mtd in 
mrow4
.
children4
() {
1111
5
                    let mtd = as_element(mtd);
1112
5
                    let mut likely = likely_chem_formula(mtd);
1113
5
                    if likely < CHEMISTRY_THRESHOLD {
1114
3
                        likely = likely_chem_equation(mtd);
1115
3
                    
}2
1116
5
                    if likely < CHEMISTRY_THRESHOLD {
1117
3
                        is_changed_after_unmarking_chemistry(mtd);
1118
3
                    
}2
1119
                }
1120
            }
1121
2
            NOT_CHEMISTRY
1122
        },
1123
152
        "semantics" => {
1124
0
            likely_chem_formula(get_presentation_element(mathml).1)
1125
        },
1126
        _ => {
1127
152
            if !is_leaf(mathml) {
1128
                // mfrac, msqrt, etc
1129
320
                for child in 
mathml152
.
children152
() {
1130
320
                    let child = as_element(child);
1131
320
                    let likelihood = likely_chem_formula(child);
1132
320
                    if  likelihood > 0 {
1133
77
                        child.set_attribute_value(MAYBE_CHEMISTRY, likelihood.to_string().as_str());
1134
243
                    };
1135
                }
1136
0
            }
1137
            // debug!("NOT_CHEMISTRY:\n{}", mml_to_string(mathml));
1138
152
            NOT_CHEMISTRY
1139
        }
1140
    };
1141
12.3k
    if likelihood >= 0 {
1142
5.09k
        mathml.set_attribute_value(MAYBE_CHEMISTRY, &likelihood.to_string());
1143
7.30k
    }
1144
    // debug!("likely_chem_formula {}:\n{}", likelihood, mml_to_string(mathml));
1145
1146
12.3k
    return likelihood;
1147
1148
3.39k
    fn likely_mrow_chem_formula(mrow: Element) -> i32 {
1149
        // For parens, the only reason to add them is to group the children and then indicate that there is more than one molecule
1150
3.39k
        if IsBracketed::is_bracketed(mrow, "(", ")", false, false) ||
1151
3.14k
           IsBracketed::is_bracketed(mrow, "[", "]", false, false) {
1152
            // If it is bracketed, it should have a subscript to indicate the number of the element.
1153
            // We give a pass to unadorned bracketing chars
1154
310
            if mrow.children().len() != 3 {
1155
0
                return NOT_CHEMISTRY;
1156
310
            }
1157
310
            let contents = as_element(mrow.children()[1]);
1158
310
            let parent = get_parent(mrow);
1159
310
            let parent_is_scripted = IsNode::is_scripted(parent);
1160
310
            if name(contents) != "mrow" && 
!parent_is_scripted82
{
1161
53
                return NOT_CHEMISTRY;
1162
257
            }
1163
257
            let likely = likely_chem_formula(contents);
1164
257
            if parent_is_scripted {
1165
149
                return likely + 3;
1166
            } else {
1167
108
                return likely;
1168
            }
1169
3.08k
        }
1170
1171
3.08k
        let mut likelihood = if is_order_ok(mrow) {
0832
} else {
-42.25k
};
1172
1173
        // check all the children and compute the likelihood of that this is a chemical formula
1174
        // bonus point for consecutive chemical formula children (not counting invisible children)
1175
3.08k
        let mut last_was_likely_formula = 0;        // 0 is false, 1 is true
1176
3.08k
        let mut is_chem_formula = true;              // assume true until we prove otherwise (still want to mark the children)
1177
12.5k
        for child in 
mrow3.08k
.
children3.08k
() {
1178
12.5k
            let child = as_element(child);
1179
12.5k
            let likely = likely_chem_formula(child);
1180
            // debug!("   in mrow: likely={}, likelihood={}", likely, likelihood);
1181
12.5k
            match likely.cmp(&0) {
1182
                Ordering::Greater => { 
1183
2.56k
                    likelihood += likely + last_was_likely_formula;
1184
2.56k
                    last_was_likely_formula = if name(child) == "mo" {
0279
} else {
12.28k
};
1185
                },
1186
5.86k
                Ordering::Less => {
1187
5.86k
                    // debug!("in likely_chem_formula: FALSE: likelihood={}, child\n{}", likelihood, mml_to_string(child));
1188
5.86k
                    is_chem_formula = false;
1189
5.86k
                    last_was_likely_formula = 0;
1190
5.86k
                    likelihood += likely;
1191
5.86k
                },
1192
                Ordering::Equal => {
1193
4.08k
                    if name(child) == "mo" {
1194
2.27k
                        let text = as_text(child);
1195
2.27k
                        if text != "\u{2062}" && 
text != "\u{2063}"466
{ // one of these, we don't change the status
1196
8
                            last_was_likely_formula = 0;
1197
2.26k
                        }
1198
1.81k
                    }
1199
                },
1200
            }
1201
            // debug!("in likely_chem_formula likelihood={}, child\n{}", likelihood, mml_to_string(child));
1202
            // debug!("   likelihood={} (likely={})", likelihood, likely);
1203
        }
1204
1205
3.08k
        if !is_chem_formula || 
likelihood <= NOT_CHEMISTRY832
{
1206
            // the children may have looked have looked right, but something has said "not likely"
1207
2.25k
            return NOT_CHEMISTRY;
1208
832
        } else if likelihood < CHEMISTRY_THRESHOLD && 
is_short_formula387
(
mrow387
) {
1209
                    // debug!("is_short_formula is true for:\n{}", mml_to_string(mrow));
1210
47
                    return CHEMISTRY_THRESHOLD
1211
785
        }
1212
785
        return likelihood;
1213
3.39k
    }
1214
1215
15.0k
}
1216
1217
/// This does some checks that sort of follow IUPAC's "Red Book" in section IR-4.4.
1218
/// Those rules require knowledge that the program doesn't have (e.g., which bond is closest to the central atom).
1219
/// Instead, we mainly use the two main types of orderings: alphabetical and electronegativity.
1220
/// We first do a test to see if this looks like a structural formula -- if so, ordering doesn't apply.
1221
/// If a formula has groupings, each grouping is checked independently of the rest since
1222
///   there are cases where the outer ordering doesn't match the inner ordering.
1223
/// For "generalized salts", we need to split the elements into positive and negative ions, and within each group
1224
///   the order is suppose to be alphabetical but many use electronegativity (the point being there are two separate groups).
1225
/// This site has a nice summary of the rules: https://chemistry.stackexchange.com/questions/537/why-is-arsenous-acid-denoted-h3aso3/538#538
1226
/// Note: "(OH)" doesn't fit with the above, and Susan Jolly suggests allowing any sequence that ends with H, so we allow that.
1227
/// Also, Susan Jolly suggested allowing any compound with C, H, and O
1228
3.08k
fn is_order_ok(mrow: Element) -> bool {
1229
3.08k
    assert_eq!(name(mrow), "mrow");
1230
3.08k
    if let Some(
elements2.32k
) = collect_elements(mrow) {
1231
2.73k
        if 
elements.iter()2.32k
.
any2.32k
(|&e| !CHEMICAL_ELEMENT_ELECTRONEGATIVITY.contains_key(e)) {
1232
1.48k
            return false;
1233
846
        }
1234
846
        let n_elements = elements.len();
1235
846
        if n_elements < 2 {
1236
475
            return true;
1237
371
        } else if has_noble_element(&elements) {
1238
0
            return false;    // noble elements don't form compounds
1239
        } else {
1240
371
            return elements[n_elements-1] == "H"   ||        // special case that includes "OH"
1241
                    // has_non_metal_element(&elements) && !has_non_metal_element(&elements) &&    // must have a metal and non-metal
1242
295
                    has_c_h_o(&elements) ||
1243
291
                    is_structural(&elements) ||
1244
271
                    is_alphabetical(&elements) ||
1245
169
                    is_ordered_by_electronegativity(&elements) ||
1246
12
                    is_generalized_salt(&elements);
1247
        }
1248
    } else {
1249
759
        return false;
1250
    }
1251
3.08k
}
1252
1253
// from https://learnwithdrscott.com/ionic-bond-definition/
1254
// I don't include the noble gases since they don't interact with other elements and are ruled out elsewhere
1255
// fn has_non_metal_element(elements: &[&str]) -> bool {
1256
//     static NON_METAL_ELEMENTS: phf::Set<&str> = phf_set! {
1257
//         "H", "B", "C", "N", "O", "F", "Si", "P", "S", "Cl", "As", "Se", "Br", "Te", "I", "At",
1258
//     };
1259
//     return elements.iter().any(|&e| NON_METAL_ELEMENTS.contains(e));
1260
// }
1261
1262
1263
374
fn has_noble_element(elements: &[&str]) -> bool {
1264
    static NOBLE_ELEMENTS: phf::Set<&str> = phf_set! {
1265
        "He", "Ne", "Ar", "Kr", "Xe", "Rn", "Og" // Og might be reactive, but it is unstable
1266
    };
1267
893
    return 
elements.iter()374
.
any374
(|&e| NOBLE_ELEMENTS.contains(e));
1268
374
}
1269
1270
295
fn has_c_h_o(elements: &[&str]) -> bool {
1271
295
    return elements.contains(&"C") && 
elements39
.
contains39
(
&"H"39
) &&
elements8
.
contains8
(
&"O"8
);
1272
295
}
1273
1274
1275
295
fn is_structural(elements: &[&str]) -> bool {
1276
295
    assert!(elements.len() > 1);   // already handled
1277
1278
    // debug!("is_structural: {:?}", elements);
1279
295
    let mut element_set = HashSet::with_capacity(elements.len());
1280
627
    
elements295
.
iter295
().
for_each295
(|&e| {element_set.insert(e);});
1281
295
    return element_set.len() < elements.len();
1282
295
}
1283
1284
/// collect up all the elements in the mrow.
1285
///  Returns the elements (which can be an empty vector) or None if something (right now an operator) rules out them being elements
1286
3.10k
fn collect_elements(mrow: Element<'_>) -> Option<Vec<&str>> {
1287
3.10k
    let mut elements = Vec::with_capacity(mrow.children().len()/2+1);       // don't bother with slots for operators
1288
8.86k
    for child in 
mrow3.10k
.
children3.10k
() {
1289
8.86k
        let child = as_element(child);
1290
8.86k
        match name(child) {
1291
8.86k
            "mi" | 
"mtext"6.18k
=>
elements2.80k
.
push2.80k
(
as_text2.80k
(
child2.80k
)),
1292
6.06k
            "msub" | 
"msup"5.73k
|
"mmultiscripts"5.65k
=> {
1293
584
                let base = as_element(child.children()[0]);
1294
584
                let base_name = name(base);
1295
584
                if base_name == "mi" || 
base_name == "mtext"115
{
1296
514
                    elements.push(as_text(base));
1297
514
                
}70
// else skip and let recursive likely_chem_formula call check the contents
1298
            },
1299
5.48k
            "mo" if 
likely_chem_formula_operator3.22k
(
child3.22k
) <
0759
=> return
None759
,
1300
2.46k
            "mo" => (),
1301
2.25k
            _ => (),    // let loop in likely_chem_formula() deal with all the negatives
1302
        }
1303
    }
1304
2.34k
    return Some(elements);
1305
3.10k
}
1306
1307
/// check to make sure elements are ordered alphabetically
1308
/// Actually check Hill's system that puts 'C' followed by 'H' first if 'C' is present
1309
275
fn is_alphabetical(elements: &[&str]) -> bool {
1310
275
    assert!(elements.len() > 1);   // already handled
1311
    // debug!("is_alphabetical: {:?}", elements);
1312
275
    let mut elements = elements;
1313
275
    if elements[1..].contains(&"C") {  // "C" must be first if present
1314
22
        return false;
1315
253
    }
1316
253
    if elements[0] == "C" {
1317
10
        elements = if elements[1]=="H" {
&elements[2..]2
} else {
&elements[1..]8
};
1318
243
    }
1319
253
    return elements.len() < 2 || 
elements.windows(2)243
.
all243
(|pair|
pair[0]251
<
pair[1]251
);
1320
275
}
1321
1322
174
fn is_ordered_by_electronegativity(elements: &[&str]) -> bool {
1323
    // HPO_4^2 (Mono-hydrogen phosphate) doesn't fit this pattern, nor does HCO_3^- (Hydrogen carbonate) and some others
1324
    // FIX: drop "H" from the ordering??
1325
174
    assert!(elements.len() > 1);   // already handled
1326
188
    return 
elements.windows(2)174
.
all174
(|pair| CHEMICAL_ELEMENT_ELECTRONEGATIVITY.get(pair[0]).unwrap() < CHEMICAL_ELEMENT_ELECTRONEGATIVITY.get(pair[1]).unwrap());
1327
174
}
1328
1329
12
fn is_generalized_salt(elements: &[&str]) -> bool {
1330
12
    assert!(!elements.is_empty());
1331
12
    return false;
1332
12
}
1333
1334
1335
/// Returns the likelihood that the arg is an adorned chem formula
1336
/// Adornments are:
1337
///   superscripts with +/- and optionally a number (charge)
1338
///  numeric subscripts (e.g. H_2)
1339
/// In addition to chemical elements, we include nuclear decay since there is a lot of overlap in notation
1340
/// The nuclear decay notation is mostly taken from https://tinyurl.com/2f6b8e3a
1341
/// Basically it is a chemical element or 'e', 'p', 'n', 'α', 'β', or 'γ' with pre-sub/superscript
1342
/// There is also an instance with a charge on the referenced page, so we allow that also.
1343
/// 
1344
/// Note: https://tinyurl.com/ysmr8cw2 says "++"/"--", etc., is sometimes used in a superscript particle physics instead of a "2"
1345
/// 
1346
/// Note:  msubsup cleaning for an empty script hasn't happened and we consider an empty script a sign of attempting to vertically align sub/superscripts
1347
///
1348
/// Note: 'mathml' is not necessarily canonicalized   
1349
2.85k
pub fn likely_adorned_chem_formula(mathml: Element) -> i32 {
1350
2.85k
    if !
matches!2.85k
(name(mathml), "msub" |
"msup"1.94k
|
"msubsup"546
|
"mmultiscripts"352
) {
1351
1
        return NOT_CHEMISTRY;
1352
2.85k
    }
1353
    // some simple sanity checks on the scripts...
1354
2.85k
    let tag_name = name(mathml);
1355
2.85k
    let children = mathml.children();
1356
2.85k
    let mut likelihood = 0;
1357
2.85k
    let mut is_empty_subscript = false;
1358
    // debug!("likely_adorned_chem_formula:\n{}", mml_to_string(mathml));
1359
2.85k
    if tag_name == "msub" || 
tag_name == "msubsup"1.94k
{
1360
        // subscripts should be just a number, although they could be 'n' or '2n' or other exprs.
1361
1.10k
        let subscript = as_element(children[1]);
1362
1.10k
        is_empty_subscript = name(subscript) == "mtext" && 
as_text(subscript).trim()3
.
is_empty3
();
1363
1.10k
        if !is_empty_subscript {
1364
1.10k
            likelihood += likely_chem_subscript(subscript);
1365
1.10k
        
}3
1366
1.74k
    }
1367
1368
2.85k
    let mut empty_superscript = false;
1369
2.85k
    if tag_name == "msup" || 
tag_name == "msubsup"1.45k
{
1370
        // debug!("likely_adorned_chem_formula: mathml\n{}", mml_to_string(mathml));
1371
1.59k
        let superscript = as_element(children[if tag_name == "msup" {
11.39k
} else {
2194
}]);
1372
1.59k
        empty_superscript = name(superscript) == "mtext" && 
as_text(superscript).trim()13
.
is_empty13
();
1373
1.59k
        if !empty_superscript {
1374
1.58k
            likelihood += likely_chem_superscript(superscript);
1375
1.58k
        
}6
1376
1.26k
    }
1377
2.85k
    if tag_name == "msubsup" && (
is_empty_subscript194
||
empty_superscript191
) {
1378
9
        likelihood += 1; // might be trying to vertically align scripts as in done in chemistry
1379
2.84k
    }
1380
1381
2.85k
    if tag_name == "mmultiscripts" {
1382
        // prescripts are normally positive integers, chem 2.5.1 allows for a superscript for a Lewis dot
1383
        // postscript should be a charge
1384
1385
        let prescripts;
1386
        let postscripts;
1387
351
        if children.len() == 4 && 
name138
(
as_element138
(children[1]))=="mprescripts" { // just prescripts
1388
138
            prescripts = &children[2..4];
1389
138
            postscripts = &children[0..0]; // empty
1390
213
        } else if children.len() == 6 && 
name57
(
as_element57
(children[3]))=="mprescripts" { // pre and postscripts
1391
55
            prescripts = &children[4..6];
1392
55
            postscripts = &children[1..3]; // empty
1393
158
        } else if children.len() == 3 || 
children.len() == 568
{ // just postscripts (simultaneous or offset)
1394
118
            prescripts = &children[0..0]; // empty
1395
118
            postscripts = &children[1..];
1396
118
        } else {
1397
40
            return NOT_CHEMISTRY;
1398
        };
1399
1400
311
        if !prescripts.is_empty() {
1401
193
            let pre_subscript = as_element(prescripts[0]);
1402
193
            let pre_subscript_name = name(pre_subscript);
1403
1404
193
            let pre_superscript = as_element(prescripts[1]);
1405
193
            let pre_superscript_name = name(pre_superscript);
1406
1407
            // deal with special case of 'e' with prescripts of -1 and 0
1408
193
            if is_adorned_electron(children[0], prescripts) {
1409
31
                return 100;     // very likely chemistry
1410
162
            }
1411
162
            let base = as_element(children[0]);
1412
162
            let base_name = name(base);
1413
162
            let 
atomic_number127
= if
matches!154
(base_name, "mi" |
"mtext"41
) &&
1414
154
                                        let Some(
atomic_number127
) = CHEMICAL_ELEMENT_ATOMIC_NUMBER.get(as_text(base)) {
1415
127
                        *atomic_number
1416
                    } else {
1417
35
                        return NOT_CHEMISTRY;
1418
                    };
1419
127
            if pre_superscript_name == "mo" {
1420
                // Lewis dot prescript case
1421
3
                if pre_subscript_name != "none" {
1422
0
                    return NOT_CHEMISTRY;
1423
3
                }
1424
3
                likelihood += likely_chem_superscript(pre_superscript);
1425
124
            } else if pre_superscript_name == "mn" { // must have a pre-superscript (neutrons + protons)
1426
75
                if let Ok(mass) = as_text(pre_superscript).parse::<u32>() {
1427
                    // "drip line" is 1.5 * mass < 3.5 * mass -- it is possible to outside of this range, but VERY unlikely
1428
                    // to avoid floating point, we multiply by 2 and compare to 3 and 7
1429
75
                    if 3*atomic_number < 2*mass && 
2*mass < 7*atomic_number74
{
1430
74
                        likelihood += 3;
1431
74
                    
}1
1432
0
                }
1433
75
                if pre_subscript_name == "mn"  && 
as_text(pre_subscript)71
== atomic_number.to_string() {
1434
69
                        likelihood = CHEMISTRY_THRESHOLD;
1435
69
                
}6
1436
            } else {
1437
49
                return NOT_CHEMISTRY;
1438
            }
1439
118
        }
1440
1441
196
        if !postscripts.is_empty() {
1442
119
            let mut i = 0;
1443
266
            while i < postscripts.len() {
1444
147
                let sub = as_element(postscripts[i]);
1445
                // debug!("sub: {}", mml_to_string(sub));
1446
147
                if name(sub) != "none" {
1447
91
                    likelihood += likely_chem_subscript(sub);
1448
91
                
}56
1449
147
                let sup = as_element(postscripts[i+1]);
1450
147
                if name(sup) != "none" {
1451
65
                    // debug!("sup: {}", mml_to_string(sub));
1452
65
                    likelihood += likely_chem_superscript(sup);
1453
82
                }
1454
147
                i += 2;
1455
            }
1456
77
        }
1457
2.50k
    }
1458
1459
2.69k
    let base = as_element(children[0]);
1460
2.69k
    let base_name = name(base);
1461
2.69k
    if base_name == "mi" || 
base_name == "mtext"822
{
1462
2.05k
        likelihood += likely_chem_element(base);
1463
2.05k
    } else if 
base_name == "mrow"641
{
1464
        // debug!("mrow addition:\n{}", mml_to_string(base));
1465
        // a safe minor canonicalization that allows "short_form" calculations if appropriate
1466
187
        if (IsBracketed::is_bracketed(base, "(", ")", false, false) ||
1467
89
            IsBracketed::is_bracketed(base, "[", "]", false, false)) &&
1468
148
           base.children().len() > 3 {
1469
77
            let inner_mrow = create_mathml_element(&base.document(), "mrow");
1470
77
            inner_mrow.set_attribute_value(CHANGED_ATTR, ADDED_ATTR_VALUE);
1471
77
            let mut children = base.children();
1472
77
            let inside_of_parens = children.drain(1..children.len()-1);
1473
77
            inner_mrow.append_children(inside_of_parens);
1474
77
            base.replace_children(vec![children[0], ChildOfElement::Element(inner_mrow), children[children.len()-1]]);
1475
110
        }
1476
187
        likelihood += likely_chem_formula(base);
1477
454
    } else {
1478
454
        likelihood += likely_chem_formula(base);
1479
454
    }
1480
    
1481
    // debug!("returning from likely_adorned_chem_formula: likelihood={}, mathml\n{}", likelihood, mml_to_string(mathml));
1482
2.69k
    return likelihood;
1483
1484
1485
193
    fn is_adorned_electron(base: ChildOfElement, prescripts: &[ChildOfElement]) -> bool {
1486
        // looking for 'e' with prescripts of -1 and 0
1487
193
        let base = as_element(base);
1488
193
        let pre_lower = as_element(prescripts[0]);
1489
193
        let pre_upper = as_element(prescripts[1]);
1490
193
        if (name(base) == "mi" || 
name(base) == "mtext"57
) &&
as_text(base) == "e"185
&&
1491
31
           name(pre_upper) == "mn" && as_text(pre_upper) == "0" && 
1492
31
           name(pre_lower) == "mrow" && pre_lower.children().len() == 2 {
1493
            // looking '-' and '1'
1494
31
            let lower_children = pre_lower.children();
1495
31
            let minus = as_element(lower_children[0]);
1496
31
            let one = as_element(lower_children[1]);
1497
            // not yet normalized, so we need to compare against ASCII minus and u+2212
1498
31
            return name(minus) == "mo" && (as_text(minus) == "-" || as_text(minus) == "−") && 
1499
31
                   name(one) == "mn"   && as_text(one) == "1";
1500
        } else {
1501
162
            return false;
1502
        }
1503
193
    }
1504
2.85k
}
1505
1506
/// useful function to see if the str is a single char matching the predicate
1507
29.6k
fn is_single_char_matching(leaf_text: &str, pred: impl Fn(char) -> bool) -> bool {
1508
29.6k
    let mut chars = leaf_text.chars();
1509
29.6k
    if let Some(ch) = chars.next() && chars.next().is_none() {
1510
29.5k
        return pred(ch);
1511
87
    }
1512
87
    return false;
1513
29.6k
}
1514
1515
17.2k
fn likely_chem_formula_operator(mathml: Element) -> i32 {
1516
    // mostly from chenzhijin.com/en/article/Useful%20Unicode%20for%20Chemists (Arrows and Other)
1517
    // also en.wikipedia.org/wiki/Chemical_formula#Condensed_formula
1518
    #[derive(PartialEq, Eq)]
1519
    enum BondType {DoubleBond, TripleBond}      // options for is_legal_bond()
1520
    // "⋅" is used in GTM 16.2 and en.wikipedia.org/wiki/Cement_chemist_notation -- may want to add some similar chars
1521
    static CHEM_FORMULA_OPERATORS: phf::Set<&str> = phf_set! {
1522
        "-", "\u{2212}", "⋅", ":", "=", "∷", "≡", ":::", "≣", "::::", // bond symbols (need both 2212 and minus because maybe not canonicalized)
1523
        "⋮", // lewis dots, part of "⋮⋮" - triple bond (see Nemeth chem guide 2.5.4)
1524
    };
1525
16.1k
    fn is_chem_formula_ok(ch: char) -> bool {
1526
16.1k
        
matches!9.64k
(ch, '(' | ')' | '[' | ']' | '\u{2062}' | '\u{2063}')
1527
16.1k
    }
1528
1529
17.2k
    assert_eq!(name(mathml), "mo");
1530
17.2k
    let leaf_text = as_text(mathml);
1531
17.2k
    if CHEM_FORMULA_OPERATORS.contains(leaf_text) &&
1532
1.85k
       (has_inherited_property(mathml, "chemical-formula") ||
1533
1.85k
        ( !(leaf_text == "=" || 
leaf_text == "∷"1.02k
) ||
is_legal_bond848
(
mathml848
,
BondType::DoubleBond848
) ) &&
1534
1.05k
        ( !(leaf_text == "≡" || 
leaf_text == ":::"1.03k
) ||
is_legal_bond26
(
mathml26
,
BondType::TripleBond26
) )
1535
       )  {
1536
1.04k
        mathml.set_attribute_value(MAYBE_CHEMISTRY, "1");
1537
1.04k
        mathml.set_attribute_value(CHEM_FORMULA_OPERATOR, "1");
1538
1.04k
        return 1;
1539
16.1k
    } else if is_single_char_matching(leaf_text, is_chem_formula_ok) {
1540
6.49k
        return 0;  // not much info
1541
    } else {
1542
9.67k
        return -3; // still a small chance;
1543
    }
1544
1545
874
    fn is_legal_bond(mathml: Element, bond_type: BondType) -> bool {
1546
874
        let preceding = mathml.preceding_siblings();
1547
874
        let following = mathml.following_siblings();
1548
874
        if preceding.is_empty() || 
following783
.
is_empty783
() {
1549
115
            return false;
1550
759
        }
1551
1552
759
        let mut preceding_element = as_element(preceding[preceding.len()-1]);
1553
        // special check for CH_2 -- double bond is really with C
1554
759
        if bond_type == BondType::DoubleBond && 
name(preceding_element) == "msub"734
&&
1555
31
           preceding.len() > 1 &&  
&11
convert_to_short_form11
(preceding_element).unwrap_or_default() == "H_2" {
1556
2
            preceding_element = as_element(preceding[preceding.len()-2]);
1557
2
            if !is_leaf(preceding_element) || as_text(preceding_element) != "C" {
1558
0
                return false;
1559
2
            }
1560
757
        } else if name(preceding_element) != "mi" && 
name(preceding_element) != "mtext"353
{
1561
320
            return false;
1562
437
        }
1563
439
        let following_element = get_possible_embellished_node(as_element(following[0]));
1564
439
        if name(following_element) != "mi" && 
name(following_element) != "mtext"315
{
1565
313
            return false;
1566
126
        }
1567
126
        let preceding_text = as_text(preceding_element);
1568
126
        let following_text = as_text(following_element);
1569
126
        return match bond_type {
1570
105
            BondType::DoubleBond => is_legal_double_bond(preceding_text, following_text),
1571
21
            BondType::TripleBond => is_legal_triple_bond(preceding_text, following_text),
1572
        };
1573
1574
105
        fn is_legal_double_bond(left: &str, right: &str) -> bool {
1575
            // this is based on table in en.wikipedia.org/wiki/Double_bond#Types_of_double_bonds_between_atoms
1576
            static DOUBLE_BOND_TO_SELF: phf::Set<&str> = phf_set! {
1577
                "C", "O", "N", "S", "Si", "Ge", "Sn", "Pb"
1578
            };
1579
                // "C" => &["O", "N", "S"],
1580
                // "O" => &["N", "S"],
1581
105
            if left == right && 
DOUBLE_BOND_TO_SELF50
.
contains50
(
left50
) {
1582
44
                return true;
1583
61
            }
1584
61
            return match left {
1585
61
                "C" => 
right=="O"3
||
right=="N"2
||
right=="S"2
,
1586
58
                "O" => 
right=="N"1
||
right=="S"1
,
1587
57
                "Si" => 
right=="C"0
,
1588
57
                _ => false,
1589
            }
1590
105
        }
1591
1592
21
        fn is_legal_triple_bond(left: &str, right: &str) -> bool {
1593
            // According to https://tinyurl.com/rkynhwj3 (from physics.org)
1594
            // triple bonds can be formed between any of B, C, N, and O
1595
            // Apparently they can also be forced in other cases, but they are rare.
1596
            // 'B' is from studiousguy.com/triple-bond-examples/
1597
21
            return  (left == "B"  || left == "C"  || 
left == "N"5
||
left == "O"5
) &&
1598
18
                    (right == "B" || right == "C" || 
right == "N"5
||
right == "O"5
);
1599
21
        }
1600
874
    }
1601
17.2k
}
1602
1603
/// This assumes canonicalization of characters has happened
1604
6.85k
fn likely_chem_equation_operator(mathml: Element) -> i32 {
1605
1606
6.73k
    fn is_chem_equation_operator(ch: char) -> bool {
1607
6.73k
        
matches!4.90k
(ch, '+' | '=' | '-' | '·' | '℃' | '°' | '‡' | '∆' | '×' | '\u{2062}')
1608
6.73k
    }
1609
1610
6.85k
    let elem_name = name(mathml);
1611
6.85k
    if elem_name == "munder" || 
elem_name == "mover"6.80k
||
elem_name == "munderover"6.78k
{
1612
86
        let base = as_element(mathml.children()[0]);
1613
86
        if name(base) == "mo" && 
is_single_char_matching64
(
as_text(base)64
, is_chem_equation_arrow) {
1614
1
            base.set_attribute_value(MAYBE_CHEMISTRY, "1");
1615
1
            base.set_attribute_value(CHEM_EQUATION_OPERATOR, "1");
1616
1
            return 1;
1617
85
        } else if elem_name == "mover" && 
is_hack_for_missing_arrows20
(
mathml20
) {
1618
9
            return 2;
1619
        } else {
1620
76
            return NOT_CHEMISTRY;
1621
        }    
1622
6.76k
    }
1623
1624
6.76k
    if name(mathml) == "mo" {
1625
6.76k
        let text = as_text(mathml);
1626
6.76k
        if is_single_char_matching(text, is_chem_equation_operator) || 
is_single_char_matching4.93k
(
text4.93k
, is_chem_equation_arrow) {
1627
1.96k
            mathml.set_attribute_value(MAYBE_CHEMISTRY, "1");
1628
1.96k
            mathml.set_attribute_value(CHEM_EQUATION_OPERATOR, "1");
1629
1.96k
            return 1;
1630
4.79k
        } else if text == "\u{2062}" || text == "\u{2063}" {
1631
            // FIX: the invisible operator between elements should be well-defined, but this likely needs work, so both accepted for now
1632
0
            return 0;
1633
4.79k
        }
1634
0
    }
1635
4.79k
    return -3;  // there is still a chance
1636
1637
    /// Detects output of mhchem for some equilibrium arrows that currently (11/22) don't have Unicode points
1638
    /// See github.com/NSoiffer/MathCAT/issues/60 for the patterns being matched
1639
20
    fn is_hack_for_missing_arrows(mover: Element) -> bool {
1640
20
        assert_eq!(name(mover), "mover");
1641
20
        let children = mover.children();
1642
20
        let base = as_element(children[0]);
1643
20
        let mo_base = if name(base) == "mrow" && 
base.children().len() == 212
{
1644
9
            as_element(base.children()[0])
1645
        } else {
1646
11
            base
1647
        };
1648
20
        let upper = as_element(children[1]);
1649
20
        let mo_upper = if name(upper) == "mrow" && 
upper.children().len() == 29
{
1650
9
            as_element(upper.children()[1])
1651
        } else {
1652
11
            upper
1653
        };
1654
        // slightly sloppy match, but almost certainly good enough
1655
20
        return name(mo_base) == "mo" && 
name(mo_upper) == "mo"9
&&
1656
9
                as_text(mo_base) == "↽" && as_text(mo_upper) == "⇀";
1657
20
        }
1658
6.85k
}
1659
1660
38
fn is_equilibrium_constant(mut mathml: Element) -> bool {
1661
38
    if name(mathml) == "msub" {
1662
27
        mathml = as_element(mathml.children()[0]);
1663
27
    
}11
1664
1665
38
    return name(mathml) == "mi" && 
as_text(mathml) == "K"25
;
1666
38
}
1667
1668
// Oxidation states range from -4 to 9 and are written with (a subset of) roman numerals.
1669
// All instances seem to be upper case that I've seen.
1670
3
static SMALL_UPPER_ROMAN_NUMERAL: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\s*^(IX|IV|V?I{0,3})\s*$").unwrap());
1671
1672
/// look for "(s), "(l)", "(g)", "(aq)" (could also use [...])
1673
/// this might be called before canonicalization, but in clean_chemistry_mrow, we made sure "( xxx )" is grouped properly
1674
3.68k
pub fn likely_chem_state(mathml: Element) -> i32 {
1675
    
1676
3.68k
    if IsBracketed::is_bracketed(mathml, "(", ")", false, false) ||
1677
3.30k
       IsBracketed::is_bracketed(mathml, "[", "]", false, false) {
1678
438
        let contents = as_element(mathml.children()[1]);
1679
438
        let contents_name = name(contents);
1680
438
        if contents_name == "mi" || 
contents_name == "mtext"331
{
1681
109
            let text = as_text(contents);
1682
109
            if text == "s" || 
text == "l"102
||
text == "g"102
||
text == "aq"68
{
1683
67
                return text.len() as i32 + 1;       // hack to count chars -- works because all are ASCII 
1684
42
            };
1685
329
        }
1686
3.24k
     }
1687
3.61k
     return NOT_CHEMISTRY;
1688
3.68k
}
1689
1690
/// Returns the likelihood that the arg is an element
1691
16.4k
pub fn likely_chem_element(mathml: Element) -> i32 {
1692
    static NUCLEAR_SYMBOLS: [&str; 6] = ["e", "p", "n", "α", "β","γ"];
1693
1694
16.4k
    assert!(name(mathml) == "mi" || 
name(mathml) == "mtext"1.11k
, "{} is not 'mi' or 'mtext'",
name0
(
mathml0
));
1695
16.4k
    let text = as_text(mathml);
1696
16.4k
    if as_text(mathml).trim().is_empty() {
1697
782
        return 0;   // whitespace
1698
15.6k
    } else if is_chemical_element(mathml) {
1699
        // single letter = 1; single letter with mathvariant="normal" = 2; double = 3 -- all elements are ASCII
1700
2.21k
        return if text.len() == 1 {
1701
1.90k
            if mathml.attribute_value("mathvariant").unwrap_or_default() == "normal" {
2491
} else {
11.41k
}
1702
        } else {
1703
311
            3
1704
        };
1705
13.4k
    } else if NUCLEAR_SYMBOLS.contains(&text) {
1706
659
        return 0;
1707
        // not much special about them;
1708
    } else {
1709
12.7k
        return NOT_CHEMISTRY;
1710
    }
1711
16.4k
}
1712
1713
static SHORT_SINGLE_LETTER_ELEMENT_FORMULAE: phf::Set<&str> = phf_set! {
1714
    // from en.wikipedia.org/wiki/Glossary_of_chemical_formulae (via chem_formula_from_wikipedia.py)
1715
    "BF_3", "BI_3", "BN", "BP", "B_2F_4", "B_2H_6", "B_2O_3", "B_2S_3", "B_4C",
1716
    "CB_4", "CF_4", "CH_2", "CH_4", "CO", "CO_2", "CO_3", "CS_2", "CW", "C_2F_4",
1717
    "C_2H_4", "C_2H_6", "C_2U", "C_2Y", "C_3H_4", "C_3H_6", "C_3H_8", "C_4H_2",
1718
    "C_4H_8", "C_4I_2", "C_6H_6", "C_6N_4", "C_7H_8", "C_8H_8", "DI", "D_2O",
1719
    "FI", "FI_2", "FK", "FN", "FO", "FO_2", "FP", "FS", "FW", "FY", "F_2",
1720
    "F_2N", "F_2O", "F_2O_2", "F_2P", "F_2S", "F_2S_2", "F_2W", "F_2Y", "F_3B",
1721
    "F_3P", "F_3S", "F_3W", "F_3Y", "F_4B_2", "F_4C", "F_4C_2", "F_4N_2",
1722
    "F_4S", "F_4U", "F_4W", "F_5I", "F_5P", "F_5S", "F_5U", "F_5W", "F_6S",
1723
    "F_6W", "F_7I", "HF", "HI", "HK", "HN_3", "H_2", "H_2C", "H_2C_2", "H_2C_4",
1724
    "H_2O", "H_2O_2", "H_2S", "H_3N", "H_3P", "H_4C", "H_4C_2", "H_4C_3",
1725
    "H_4N_2", "H_4N_4", "H_6B_2", "H_6C_2", "H_6C_3", "H_6C_6", "H_8C_3",
1726
    "H_8C_7", "H_8C_8", "ID", "IF", "IF_5", "IF_7", "IH", "IK", "IO_3", "I_2",
1727
    "I_2F", "I_2O_5", "I_2W", "I_3B", "I_3N", "I_3U", "I_3V", "I_4P_2", "I_4W",
1728
    "KH", "KI", "K_2F_2", "K_2O", "K_2O_2", "K_2S", "NB", "NF", "NF_2", "NF_3",
1729
    "NI_3", "NO", "NO_2", "NU", "NV", "N_2", "N_2F_4", "N_2H_2", "N_2H_4",
1730
    "N_2O_3", "N_2O_4", "N_2O_5", "N_3H", "N_4C_6", "N_4H_4", "N_5P_3", "O",
1731
    "OD_2", "OF", "OF_2", "OH_2", "OK_2", "ON", "ON_2", "OT_2", "O_2", "O_2C",
1732
    "O_2F_2", "O_2H_2", "O_2K_2", "O_2N", "O_2S", "O_2U", "O_2W", "O_3",
1733
    "O_3C", "O_3I", "O_3N_2", "O_3S", "O_3U", "O_3V_2", "O_3W", "O_3Y_2",
1734
    "O_5I_2", "O_5N_2", "O_5P_2", "O_5V_2", "O_8U_3", "PB", "PF", "PF_2", "PF_3",
1735
    "PH_3", "PY", "P_2F_4", "P_2I_4", "P_2O_5", "P_2S_3", "P_3N_5", "SF", "SF_2",
1736
    "SF_4", "SF_5", "SF_6", "SH_2", "SK_2", "SO_2", "SO_3", "S_2C", "S_2F_2",
1737
    "S_2W", "S_3B_2", "S_3P_2", "S_3W", "S_3Y_2", "T_2O", "UC_2", "UF_4", "UF_5",
1738
    "UI_3", "UN", "UO_2", "UO_3", "US_2", "U_3O_8", "VI_3", "VN", "V_2O_3",
1739
    "WC", "WF", "WF_2", "WF_3", "WF_4", "WF_5", "WF_6", "WI_2", "WI_4", "WO_2",
1740
    "WS_2", "WS_3", "YB_6", "YC_2", "YF", "YF_2", "YF_3", "YP", "Y_2O_3",
1741
1742
    // from en.wikipedia.org/wiki/Ion#Common_ions (via chem_formula_from_wikipedia.py)
1743
    "CH_3COO^−", "CN^−", "CO_3^2−", "C^−", "C_2O_4^2−", "F^−", "HCOO^−", 
1744
    "HPO_4^2−", "HSO_3^−", "HSO_4^−", "H^+", "H^−", "H_2PO_4^−", "H_3O^+", "I^−", 
1745
    "NH_4^+", "NO_2^−", "NO_3^−", "N^3−", "N_3^−", "OH^−", "O^2−", "O_2^2−", 
1746
    "PO_4^3−", "P^3−", "SO_3^2−", "SO_4^2−", "S^2−", "S_2O_3^2−",
1747
1748
    // from gchem.cm.utexas.edu/canvas.php?target=bonding/ionic/polyatomic-ions.html
1749
    "PO_3^3−", "IO_3^−",
1750
1751
    // others
1752
    "CH_3", /* methyl */
1753
    "NH_3",  // ammonium
1754
};
1755
1756
/// Returns true if the formula is composed of 1 or 2 single letter elements and it matches a known compound/ion
1757
/// This might be called (via likely_adorned_chem_formula) unparsed
1758
387
fn is_short_formula(mrow: Element) -> bool {
1759
387
    assert_eq!(name(mrow), "mrow");
1760
387
    let children = mrow.children();
1761
387
    let n_children = children.len();
1762
387
    if n_children == 0 || n_children > 3 || (
n_children == 3378
&&
name317
(
as_element317
(children[1])) != "mo") {
1763
12
        return false;
1764
375
    }
1765
1766
375
    let first_element = convert_to_short_form( as_element(children[0]) );
1767
375
    if n_children == 1 {
1768
2
        return first_element.is_ok();
1769
373
    }
1770
373
    let second_element = convert_to_short_form( as_element(children[if n_children == 2 {
159
} else {
2314
}]) );
1771
373
    return match (first_element, second_element) {
1772
365
        (Ok(first), Ok(second)) => {
1773
365
            let short_form = first + second.as_str();
1774
            // debug!("short_form: {}", short_form);
1775
365
            return SHORT_SINGLE_LETTER_ELEMENT_FORMULAE.contains(&short_form);
1776
        },
1777
8
        _ => false,
1778
    }
1779
387
}
1780
1781
931
fn convert_to_short_form(mathml: Element) -> Result<String> {
1782
931
    let mathml_name = name(mathml);
1783
931
    return match mathml_name {
1784
931
        "mi" | 
"mtext"441
|
"mn"393
|
"mo"104
=>
Ok( as_text(mathml).to_string() )836
,
1785
95
        "none" => 
Ok( "".to_string() )0
,
1786
95
        "msub" | 
"msup"16
|
"msubsup"13
|
"mmultiscripts"13
=> {
1787
86
            let is_mmultiscripts = mathml_name == "mmultiscripts";
1788
86
            let children = mathml.children();
1789
86
            let mut result = convert_to_short_form(as_element(children[0]))
?0
;
1790
86
            if is_mmultiscripts && 
children.len() != 34
{
1791
0
                bail!("mmultiscripts found with {} children -- not part of chemical formula", children.len());
1792
86
            }
1793
86
            if mathml_name == "msub" || 
mathml_name == "msubsup"7
|| (
is_mmultiscripts7
&&
name4
(
as_element4
(children[1])) != "none") {
1794
83
                result += "_";
1795
83
                result += &convert_to_short_form(as_element(children[1]))
?1
;
1796
3
            }
1797
85
            if mathml_name == "msup" || 
mathml_name == "msubsup"82
|| (
is_mmultiscripts82
&&
name4
(
as_element4
(children[2])) != "none") {
1798
3
                result += "^";
1799
3
                result += &convert_to_short_form(as_element(children[if mathml_name=="msup" {1} else {
20
}]))
?0
;
1800
82
            }
1801
85
            Ok( result )
1802
        },
1803
9
        "mrow" => {
1804
            // the only time this is valid is if the superscript is something like "+" or "2+", so we do a few checks and short circuit false now
1805
9
            let mrow_children = mathml.children();
1806
9
            if mrow_children.len() == 1 || mrow_children.len() == 2 {
1807
0
                let mut result = convert_to_short_form(as_element(mrow_children[0]))?;
1808
0
                if mrow_children.len() == 2 {
1809
0
                    result += &convert_to_short_form(as_element(mrow_children[1]))?;
1810
0
                }
1811
0
                return Ok(result)
1812
            } else {
1813
9
                bail!("mrow found with {} children -- not part of chemical formula", mrow_children.len());
1814
            }
1815
        }
1816
0
        _ => bail!("{} found -- not part of chemical formula", mathml_name),
1817
    }
1818
931
}
1819
1820
/// A map of chemical elements and their relative IUPAC electronegativity (https://i.stack.imgur.com/VCSzW.png)
1821
/// That list uses a horizontal line for the Lanthanide and Actinide Series.
1822
/// Because I had already ordered the elements before realizing that, I opened a gap and started the higher ones again with a '1' in front.
1823
/// The list is missing recent (unstable) elements -- I added them with the same value as the element above them in the periodic table.
1824
static CHEMICAL_ELEMENT_ELECTRONEGATIVITY: phf::Map<&str, u32> = phf_map! {
1825
  "Ac" => 40, "Ag" => 155, "Al" => 163, "Am" => 29, "Ar" => 4, "As" => 172, "At" => 181, "Au" => 154,
1826
    "B" => 164, "Ba" => 14, "Be" => 18, "Bh" => 137, "Bi" => 170, "Bk" => 27, "Br" => 183,
1827
  "C" => 169, "Ca" => 16, "Cd" => 158, "Ce" => 56, "Cf" => 26, "Cl" => 184, "Cm" => 28, "Cn" => 157, "Co" => 148, "Cr" => 136, "Cs" => 8, "Cu" => 156,
1828
    "Db" => 129, "Ds" => 149, "Dy" => 48, 
1829
  "Er" => 46, "Es" => 25, "Eu" => 51, "F" => 185, "Fe" => 144, "Fl" => 165, "Fm" => 24, "Fr" => 7, "Ga" => 162, "Gd" => 50, "Ge" => 167,
1830
  "H" => 175, "He" => 6, "Hf" => 126, "Hg" => 157, "Ho" => 47, "Hs" => 141, "I" => 182, "In" => 161, "Ir" => 146, "K" => 10, "Kr" => 3,
1831
  "La" => 62, "Li" => 12, "Lr" => 19, "Lu" => 41, "Lv" => 176, "Mc" => 170, "Md" => 23, "Mg" => 17, "Mn" => 140, "Mo" => 135, "Mt" => 145, 
1832
  "N" => 174, "Na" => 11, "Nb" => 131, "Nd" => 54, "Ne" => 5, "Nh" => 160, "Ni" => 152, "No" => 22, "Np" => 31, "O" => 180, "Og" => 1, "Os" => 142, 
1833
  "P" => 173, "Pa" => 33, "Pb" => 165, "Pd" => 151, "Pm" => 53, "Po" => 176, "Pr" => 55, "Pt" => 150, "Pu" => 30,
1834
  "Ra" => 13, "Rb" => 9, "Re" => 138, "Rf" => 125, "Rg" => 153, "Rh" => 147, "Rn" => 1, "Ru" => 143, 
1835
  "S" => 179, "Sb" => 171, "Sc" => 124, "Se" => 178, "Sg" => 133, "Si" => 168, "Sm" => 52, "Sn" => 166, "Sr" => 15,
1836
  "Ta" => 130, "Tb" => 49, "Tc" => 139, "Te" => 177, "Th" => 34, "Ti" => 128, "Tl" => 160, "Tm" => 45, "Ts" => 181, 
1837
  "U" => 32, "V" => 132, "W" => 134, "Xe" => 2, "Y" => 123, "Yb" => 44, "Zn" => 159, "Zr" => 127,
1838
    // The following come from E.A. Moore who said to treat them like chemicals 
1839
    // These stand for methyl, ethyl, alkyl, acetyl and phenyl and apparently are quite commonly used ("Ac" is already a chemical)
1840
    // A full(er?) list is at en.wikipedia.org/wiki/Skeletal_formula#Alkyl_groups and in following sections
1841
    "Me" => 0, "Et" => 0, "R" => 0, /* "Ac" => 0, */ "Ph" => 0,
1842
    "X" => 0, /* treated as an unknown */
1843
};
1844
1845
// A map of the chemical elements and their atomic numbers
1846
static CHEMICAL_ELEMENT_ATOMIC_NUMBER: phf::Map<&str, u32> = phf_map! {
1847
    "H" => 1, "He" => 2, "Li" => 3, "Be" => 4, "B" => 5, "C" => 6, "N" => 7, "O" => 8, "F" => 9, "Ne" => 10,
1848
    "Na" => 11, "Mg" => 12, "Al" => 13, "Si" => 14, "P" => 15, "S" => 16, "Cl" => 17, "Ar" => 18, "K" => 19, "Ca" => 20,
1849
    "Sc" => 21, "Ti" => 22, "V" => 23, "Cr" => 24, "Mn" => 25, "Fe" => 26, "Co" => 27, "Ni" => 28, "Cu" => 29, "Zn" => 30,
1850
    "Ga" => 31, "Ge" => 32, "As" => 33, "Se" => 34, "Br" => 35, "Kr" => 36, "Rb" => 37, "Sr" => 38, "Y" => 39, "Zr" => 40,
1851
    "Nb" => 41, "Mo" => 42, "Tc" => 43, "Ru" => 44, "Rh" => 45, "Pd" => 46, "Ag" => 47, "Cd" => 48, "In" => 49, "Sn" => 50,
1852
    "Sb" => 51, "Te" => 52, "I" => 53, "Xe" => 54, "Cs" => 55, "Ba" => 56, "La" => 57, "Ce" => 58, "Pr" => 59, "Nd" => 60, 
1853
    "Pm" => 61, "Sm" => 62, "Eu" => 63, "Gd" => 64, "Tb" => 65, "Dy" => 66, "Ho" => 67, "Er" => 68, "Tm" => 69, "Yb" => 70,
1854
    "Lu" => 71, "Hf" => 72, "Ta" => 73, "W" => 74, "Re" => 75, "Os" => 76, "Ir" => 77, "Pt" => 78, "Au" => 79, "Hg" => 80,
1855
    "Tl" => 81, "Pb" => 82, "Bi" => 83, "Po" => 84, "At" => 85, "Rn" => 86, "Fr" => 87, "Ra" => 88, "Ac" => 89, "Th" => 90,
1856
    "Pa" => 91, "U" => 92, "Np" => 93, "Pu" => 94, "Am" => 95, "Cm" => 96, "Bk" => 97, "Cf" => 98, "Es" => 99, "Fm" => 100,
1857
    "Md" => 101, "No" => 102, "Lr" => 103, "Rf" => 104, "Db" => 105, "Sg" => 106, "Bh" => 107, "Hs" => 108, "Mt" => 109, "Ds" => 110,
1858
    "Rg" => 111, "Cn" => 112, "Nh" => 113, "Fl" => 114, "Mc" => 115, "Lv" => 116, "Ts" => 117, "Og" => 118, 
1859
};
1860
1861
26.9k
pub fn is_chemical_element(node: Element) -> bool {
1862
  // FIX: allow name to be in an mrow (e.g., <mi>N</mi><mi>a</mi>
1863
26.9k
  let name = name(node);
1864
26.9k
  if name != "mi" && 
name != "mtext"702
{
1865
71
    return false;
1866
26.9k
  }
1867
1868
26.9k
  let text = as_text(node);
1869
26.9k
  return CHEMICAL_ELEMENT_ELECTRONEGATIVITY.contains_key(text) ||
1870
23.0k
           has_chem_intent(node, "chemical-element") ||
1871
23.0k
           has_inherited_property(node, "chemical-formula");
1872
26.9k
}
1873
1874
1875
#[cfg(test)]
1876
mod chem_tests {
1877
1878
1879
#[allow(unused_imports)]
1880
  use super::super::init_logger;
1881
  use super::super::are_strs_canonically_equal;
1882
    use super::*;
1883
1884
40
    fn parse_mathml_string<F>(test: &str, test_mathml: F) -> bool
1885
40
            where F: Fn(Element) -> bool {
1886
        use sxd_document::parser;
1887
        use crate::interface::{get_element, trim_element};
1888
1889
        
1890
40
        let test = if test.starts_with("<math") {
test0
} else {&format!("<math>{}</math>", test)};
1891
40
        let new_package = parser::parse(test);
1892
40
        if let Err(
e0
) = new_package {
1893
0
            panic!("Invalid MathML input:\n{}\nError is: {}", &test, &e.to_string());
1894
40
        }
1895
1896
40
        let new_package = new_package.unwrap();
1897
40
        let mut mathml = get_element(&new_package);
1898
40
        trim_element(mathml, false);
1899
40
        mathml = as_element(mathml.children()[0]);
1900
40
        return test_mathml(mathml);
1901
40
    }
1902
1903
    #[test]
1904
1
    fn test_noble_element() {
1905
        // mathml test strings need to be canonical MathML since we aren't testing canonicalize()
1906
1
        let test = "<mrow> <mi>Na</mi> <mo>&#x2063;</mo> <mi>Cl</mi> </mrow>"; // 
1907
1
        assert!( !parse_mathml_string(test, |mathml| has_noble_element( &collect_elements(mathml).unwrap() )) );
1908
1
        let test = "<mrow> <mi>Ar</mi> <mo>&#x2063;</mo> <mi>Cl</mi> </mrow>"; // 
1909
1
        assert!( parse_mathml_string(test, |mathml| has_noble_element( &collect_elements(mathml).unwrap() )) );
1910
1
        let test = "<mrow> <mi>Ne</mi> </mrow>"; // 
1911
1
        assert!( parse_mathml_string(test, |mathml| has_noble_element( &collect_elements(mathml).unwrap() )) );
1912
1
    }
1913
1914
    #[test]
1915
1
    fn test_alphabetical_order() {
1916
        // mathml test strings need to be canonical MathML since we aren't testing canonicalize()
1917
1
        let test = r#"<mrow>  
1918
1
            <msub><mi>C</mi><mn>6</mn></msub><mo>&#x2063;</mo> 
1919
1
            <msub><mi>H</mi><mn>14</mn></msub>
1920
1
             </mrow>"#;
1921
1
        assert!( parse_mathml_string(test, |mathml| is_alphabetical( &collect_elements(mathml).unwrap() )) );
1922
1
        let test = r#"<mrow>  
1923
1
             <msub><mi>C</mi><mn>6</mn></msub><mo>&#x2063;</mo> 
1924
1
             <msub><mi>H</mi><mn>12</mn></msub><mo>&#x2063;</mo>
1925
1
             <msub><mi>O</mi><mn>6</mn></msub>
1926
1
              </mrow>"#;
1927
1
        assert!( parse_mathml_string(test, |mathml| is_alphabetical( &collect_elements(mathml).unwrap() )) );
1928
1
        let test = "<mrow> <mi>B</mi> <mo>&#x2063;</mo> <mi>C</mi> <mo>&#x2063;</mo> <mi>O</mi></mrow>"; // "C" should be first
1929
1
        assert!( !parse_mathml_string(test, |mathml| is_alphabetical( &collect_elements(mathml).unwrap() )) );
1930
1
        let test = "<mrow> <mi>P</mi> <mo>&#x2063;</mo> <mi>B</mi> <mo>&#x2063;</mo> <mi>O</mi></mrow>"; // not alphabetical
1931
1
        assert!( !parse_mathml_string(test, |mathml| is_alphabetical( &collect_elements(mathml).unwrap() )) );
1932
1
    }
1933
1934
    #[test]
1935
1
    fn test_is_structural() {
1936
        // mathml test strings need to be canonical MathML since we aren't testing canonicalize()
1937
1
        let test = r#"<mrow>  
1938
1
            <msub><mi>C</mi><mn>6</mn></msub><mo>&#x2063;</mo> 
1939
1
            <msub><mi>H</mi><mn>14</mn></msub>
1940
1
             </mrow>"#;
1941
1
        assert!( !parse_mathml_string(test, |mathml| is_structural( &collect_elements(mathml).unwrap() )) );
1942
1
        let test = "<mrow> <mi>B</mi> <mo>&#x2063;</mo> <mi>C</mi> <mo>&#x2063;</mo> <mi>O</mi></mrow>";
1943
1
        assert!( !parse_mathml_string(test, |mathml| is_structural( &collect_elements(mathml).unwrap() )) );
1944
1
        let test = "<mrow> <mi>H</mi> <mo>&#x2063;</mo> <mi>O</mi> <mo>&#x2063;</mo> <mi>H</mi></mrow>";
1945
1
        assert!( parse_mathml_string(test, |mathml| is_structural( &collect_elements(mathml).unwrap() )) );
1946
1
        let test = "<mrow data-chem-formula='9'>
1947
1
                <mmultiscripts data-chem-formula='1'>
1948
1
                <mi mathvariant='normal' data-chem-element='1'>H</mi>
1949
1
                <mn>2</mn>
1950
1
                <none></none>
1951
1
                </mmultiscripts>
1952
1
                <mo data-changed='added'>&#x2063;</mo>
1953
1
                <mi mathvariant='normal' data-chem-element='1'>C</mi>
1954
1
                <mo data-chemical-bond='true' data-chem-formula-op='1'>=</mo>
1955
1
                <mi mathvariant='normal' data-chem-element='1'>C</mi>
1956
1
                <mo data-changed='added'>&#x2063;</mo>
1957
1
                <mmultiscripts data-chem-formula='1'>
1958
1
                <mi mathvariant='normal' data-chem-element='1'>H</mi>
1959
1
                <mn>2</mn>
1960
1
                <none></none>
1961
1
                </mmultiscripts>
1962
1
            </mrow>";
1963
1
        assert!( parse_mathml_string(test, |mathml| is_structural( &collect_elements(mathml).unwrap() )) );
1964
1
    }
1965
1966
1967
    #[test]
1968
1
    fn test_electronegativity_order() {
1969
        // mathml test strings need to be canonical MathML since we aren't testing canonicalize()
1970
1
        let test = r#"<mrow>  
1971
1
            <mi>N</mi><mo>&#x2063;</mo> 
1972
1
            <msub><mi>H</mi><mn>3</mn></msub>
1973
1
             </mrow>"#;
1974
1
        assert!( parse_mathml_string(test, |mathml| is_ordered_by_electronegativity( &collect_elements(mathml).unwrap() )) );
1975
1
        let test = r#"<mrow>  
1976
1
            <mi>O</mi><mo>&#x2063;</mo> 
1977
1
            <msub><mi>F</mi><mn>2</mn></msub>
1978
1
             </mrow>"#;
1979
1
        assert!( parse_mathml_string(test, |mathml| is_ordered_by_electronegativity( &collect_elements(mathml).unwrap() )) );
1980
1
        let test = r#"<mrow>  
1981
1
            <msub><mi>Rb</mi><mn>15</mn></msub><mo>&#x2063;</mo> 
1982
1
            <msub><mi>Hg</mi><mn>16</mn></msub>
1983
1
             </mrow>"#;
1984
1
        assert!( parse_mathml_string(test, |mathml| is_ordered_by_electronegativity( &collect_elements(mathml).unwrap() )) );
1985
1
        let test = r#" 
1986
1
            <mrow><msup>
1987
1
                <mo>[</mo>
1988
1
                    <mi>Si</mi><mo>&#x2063;</mo> 
1989
1
                    <msub><mi>As</mi><mn>4</mn></msub>
1990
1
                <mo>]</mo>
1991
1
                <mrow><mn>8</mn><mo>-</mo></mrow>
1992
1
            </msup></mrow>"#;
1993
1
        assert!( parse_mathml_string(test, |mathml| is_ordered_by_electronegativity( &collect_elements(as_element(mathml.children()[0])).unwrap() )) );
1994
1
        let test = r#"<mrow>  
1995
1
                <mi>Si</mi><mo>&#x2063;</mo> 
1996
1
                <msub><mi>H</mi><mn>2</mn></msub>
1997
1
                <mi>Br</mi><mo>&#x2063;</mo> 
1998
1
                <mi>Cl</mi>
1999
1
                </mrow>"#;
2000
1
        assert!( parse_mathml_string(test, |mathml| is_ordered_by_electronegativity( &collect_elements(mathml).unwrap() )) );
2001
1
    }
2002
2003
    #[test]
2004
1
    fn test_order() {
2005
1
        let test = r#"<mrow>  
2006
1
            <msub><mi>C</mi><mn>2</mn></msub><mo>&#x2063;</mo> 
2007
1
            <msub><mi>H</mi><mn>4</mn></msub><mo>&#x2063;</mo>
2008
1
            <msub><mrow> <mo>(</mo><mi>N</mi> <mo>&#x2063;</mo> <msub> <mi>H</mi> <mn>2</mn> </msub><mo>)</mo> </mrow><mn>2</mn></msub>
2009
1
             </mrow>"#;
2010
1
        assert!( parse_mathml_string(test, is_order_ok) );
2011
1
        let test = r#"<mrow>
2012
1
            <mi>Fe</mi><mo>&#x2063;</mo> 
2013
1
            <mi>O</mi><mo>&#x2063;</mo> 
2014
1
            <mrow> <mo>(</mo><mrow><mi>O</mi> <mo>&#x2063;</mo><mi>H</mi> </mrow><mo>)</mo> </mrow>
2015
1
             </mrow>"#;
2016
1
        assert!( parse_mathml_string(test, is_order_ok) );
2017
1
        let test = r#"<mrow>  // R-4.4.3.3 -- Chain compound doesn't fit rules but should be accepted
2018
1
                <mi>Br</mi><mo>&#x2063;</mo> 
2019
1
                <mi>S</mi><mo>&#x2063;</mo> 
2020
1
                <mi>C</mi><mo>&#x2063;</mo> 
2021
1
                <mi>N</mi>
2022
1
                </mrow>"#;
2023
1
        assert!( parse_mathml_string(test, |mathml| likely_chem_formula(mathml)==5) );
2024
1
    }
2025
2026
    #[test]
2027
1
    fn test_simple_double_bond() {
2028
1
        let test1 = r#"<mrow><mi>C</mi><mo>=</mo><mi>C</mi></mrow>"#;
2029
1
        assert!( parse_mathml_string(test1, |mathml| likely_chem_formula(mathml) < CHEMISTRY_THRESHOLD) ); // just under threshold
2030
1
        let test2 = r#"<mrow><mi>C</mi><mo>∷</mo><mi>O</mi></mrow>"#;
2031
1
        assert!( parse_mathml_string(test2, |mathml| likely_chem_formula(mathml)==CHEMISTRY_THRESHOLD) );
2032
1
        let test3 = r#"<mrow><mi>N</mi><mo>=</mo><mi>N</mi></mrow>"#;
2033
1
        assert!( parse_mathml_string(test3, |mathml| likely_chem_formula(mathml) < CHEMISTRY_THRESHOLD) ); // just under threshold
2034
1
        let test4 = r#"<mrow><mi>Sn</mi><mo>=</mo><mi>Sn</mi></mrow>"#;
2035
1
        assert!( parse_mathml_string(test4, |mathml| likely_chem_formula(mathml) == 8) );
2036
1
        let test5 = r#"<mrow><mi>O</mi><mo>=</mo><mi>S</mi></mrow>"#;
2037
1
        assert!( parse_mathml_string(test5, |mathml| likely_chem_formula(mathml) < CHEMISTRY_THRESHOLD) );  // just under threshold
2038
1
        let test10 = r#"<mrow><mi>K</mi><mo>=</mo><mi>K</mi></mrow>"#;
2039
1
        assert!( parse_mathml_string(test10, |mathml| likely_chem_formula(mathml) == NOT_CHEMISTRY) );
2040
1
        let test11 = r#"<mrow><mi>C</mi><mo>=</mo><mi>K</mi></mrow>"#;
2041
1
        assert!( parse_mathml_string(test11, |mathml| likely_chem_formula(mathml) == NOT_CHEMISTRY) );
2042
1
    }
2043
2044
    #[test]
2045
1
    fn test_double_bond() {
2046
1
        let test1 = r#"<mrow><mi mathvariant='normal'>C</mi><msub><mi mathvariant='normal'>H</mi><mn>2</mn></msub><mo>=</mo><mi>C</mi></mrow>"#;
2047
1
        assert!( parse_mathml_string(test1, |mathml| likely_chem_formula(mathml)==8) );
2048
1
        let test2 = r#"<mrow><mi mathvariant='normal'>C</mi><msub><mi mathvariant='normal'>H</mi><mn>2</mn></msub><mo>=</mo>
2049
1
        <mi>C</mi><mi>H</mi><mi>R</mi></mrow>"#;
2050
1
        assert!( parse_mathml_string(test2, |mathml| likely_chem_formula(mathml)==12) );
2051
1
        let test3 = r#"<mrow><msub><mi mathvariant='normal'>H</mi><mn>2</mn></msub><mi mathvariant='normal'>C</mi><mo>=</mo>
2052
1
                <mi>C</mi><msub><mi mathvariant='normal'>H</mi><mn>2</mn></msub></mrow>"#;
2053
1
        assert!( parse_mathml_string(test3, |mathml| likely_chem_formula(mathml)==11) );
2054
1
        let test4 = r#"<mrow><mi>H</mi><mo>-</mo><mi>N</mi><mo>=</mo><mi>N</mi><mo>-</mo><mi>H</mi></mrow>"#;
2055
1
        assert!( parse_mathml_string(test4, |mathml| likely_chem_formula(mathml)==10) );
2056
1
        let test10 = r#"<mrow><mi mathvariant='normal'>C</mi><msub><mi mathvariant='normal'>H</mi><mn>3</mn></msub><mo>=</mo><mi>C</mi></mrow>"#;
2057
1
        assert!( parse_mathml_string(test10, |mathml| likely_chem_formula(mathml)==NOT_CHEMISTRY) );
2058
1
    }
2059
2060
    #[test]
2061
    #[ignore]   // It would be good to say "not chemistry" for this, but there aren't rules for that at the moment
2062
0
    fn test_water_bond() {
2063
0
        let test11 = r#"<mrow><msub><mi mathvariant='normal'>H</mi><mn>2</mn></msub><mi mathvariant='normal'>O</mi><mo>=</mo><mi>O</mi></mrow>"#;
2064
0
        assert!( parse_mathml_string(test11, |mathml| {println!("val={}", likely_chem_formula(mathml)); likely_chem_formula(mathml)==8}) );
2065
        // assert!( parse_mathml_string(test11, |mathml| likely_chem_formula(mathml)==NOT_CHEMISTRY) );
2066
0
    }
2067
2068
2069
    #[test]
2070
1
    fn test_triple_bond() {
2071
1
        let test1 = r#"<mrow><mi>C</mi><mo>≡</mo><mi>C</mi></mrow>"#;
2072
1
        assert!( parse_mathml_string(test1, |mathml| likely_chem_formula(mathml) < CHEMISTRY_THRESHOLD) );
2073
1
        let test2 = r#"<mrow><mi>C</mi><mo>:::</mo><mi>O</mi></mrow>"#;
2074
1
        assert!( parse_mathml_string(test2, |mathml| likely_chem_formula(mathml)==CHEMISTRY_THRESHOLD) );
2075
1
        let test3 = r#"<mrow><mi>H</mi><mo>-</mo><mi>C</mi><mo>≡</mo><mi>C</mi><mo>-</mo><mi>H</mi></mrow>"#;
2076
1
        assert!( parse_mathml_string(test3, |mathml| likely_chem_formula(mathml)==10) );
2077
1
        let test4 = r#"<mrow><mi>H</mi><mo>-</mo><mi>C</mi><mo>≡</mo><mi>C</mi><mo>-</mo><mi>H</mi></mrow>"#;
2078
1
        assert!( parse_mathml_string(test4, |mathml| likely_chem_formula(mathml)==10) );
2079
1
        let test5 = r#"<mrow><mi>N</mi><mo>-</mo><mi>C</mi><mo>≡</mo><mi>C</mi><mo>-</mo><mi>N</mi></mrow>"#;
2080
1
        assert!( parse_mathml_string(test5, |mathml| likely_chem_formula(mathml)==10) );
2081
1
        let test6 = r#"<mrow><mi>H</mi><mo>-</mo><mi>C</mi><mo>≡</mo>
2082
1
            <mi>C</mi><mo>-</mo><mi mathvariant='normal'>C</mi><msub><mi mathvariant='normal'>H</mi><mn>3</mn></msub></mrow>"#; // 1-Propyne
2083
1
        assert!( parse_mathml_string(test6, |mathml| likely_chem_formula(mathml)==14) );
2084
        // assert!( parse_mathml_string(test6, |mathml| {println!("val={}", likely_chem_formula(mathml)); likely_chem_formula(mathml)==10}) );
2085
1
        let test10 = r#"<mrow><mi>O</mi><mo>:::</mo><mi>S</mi></mrow>"#;
2086
1
        assert!( parse_mathml_string(test10, |mathml| likely_chem_formula(mathml)==NOT_CHEMISTRY) );
2087
1
        let test11 = r#"<mrow><mi>Pb</mi><mo>≡</mo><mi>Pb</mi></mrow>"#;
2088
1
        assert!( parse_mathml_string(test11, |mathml| likely_chem_formula(mathml)==NOT_CHEMISTRY) );
2089
1
        let test12 = r#"<mrow><mi>C</mi><mo>≡</mo><mi>K</mi></mrow>"#;
2090
1
        assert!( parse_mathml_string(test12, |mathml| likely_chem_formula(mathml)==NOT_CHEMISTRY) );
2091
1
    }
2092
2093
    #[test]
2094
1
    fn split_mi() {
2095
1
        let test = "<math><mi>LiF</mi></math>";
2096
1
        let target = "<math>
2097
1
            <mrow data-changed='added' data-chem-formula='5'>
2098
1
                <mi data-chem-element='3'>Li</mi>
2099
1
                <mo data-changed='added' data-chem-formula-op='0'>&#x2063;</mo>
2100
1
                <mi mathvariant='normal' data-split='true' data-chem-element='1'>F</mi>
2101
1
            </mrow>
2102
1
       </math>";
2103
1
        assert!(are_strs_canonically_equal(test, target, &[]));
2104
1
    }
2105
2106
    #[test]
2107
1
    fn no_split_mi() {
2108
1
        let test = "<math><mi>HC</mi></math>";
2109
1
        let target = "<math>
2110
1
             <mi>HC</mi>
2111
1
        </math>";
2112
1
        assert!(are_strs_canonically_equal(test, target, &[]));
2113
1
    }
2114
2115
    #[test]
2116
1
    fn combine_mi() {
2117
1
        let test = "<math><mi>H</mi><mi>C</mi><mi>l</mi></math>";
2118
1
        let target = " <math>
2119
1
            <mrow data-changed='added' data-chem-formula='5'>
2120
1
            <mi data-chem-element='1'>H</mi>
2121
1
            <mo data-changed='added' data-chem-formula-op='0'>&#x2063;</mo>
2122
1
            <mi data-merged='true' data-chem-element='3'>Cl</mi>
2123
1
            </mrow>
2124
1
        </math>";
2125
1
        assert!(are_strs_canonically_equal(test, target, &[]));
2126
1
    }
2127
2128
    #[test]
2129
1
    fn no_combine() {
2130
1
        let test = "<math><mi>C</mi><mi>l</mi></math>";
2131
1
        let target = "<math>
2132
1
            <mrow data-changed='added'>
2133
1
                <mi>C</mi>
2134
1
                <mo data-changed='added'>&#x2062;</mo>
2135
1
                <mi>l</mi>
2136
1
            </mrow>
2137
1
        </math>";
2138
1
        assert!(are_strs_canonically_equal(test, target, &[]));
2139
1
    }
2140
2141
    #[test]
2142
1
    fn add_script() {
2143
1
        let test = "<math> <mi>SO</mi>  <msub> <mrow></mrow> <mn>2</mn> </msub> </math>";
2144
1
        let target = "<math>
2145
1
            <mrow data-changed='added' data-chem-formula='5'>
2146
1
                <mi mathvariant='normal' data-chem-element='1'>S</mi>
2147
1
                <mo data-changed='added' data-chem-formula-op='0'>&#x2063;</mo>
2148
1
                <mmultiscripts data-chem-formula='2'>
2149
1
                    <mi mathvariant='normal' data-split='true' data-chem-element='1'>O</mi>
2150
1
                    <mn>2</mn>
2151
1
                    <none></none>
2152
1
                </mmultiscripts>
2153
1
            </mrow>
2154
1
       </math>";
2155
1
        assert!(are_strs_canonically_equal(test, target, &[]));
2156
1
    }
2157
2158
    #[test]
2159
1
    fn add_script_bug_287() {
2160
1
        let test = r#"<math><mrow>
2161
1
            <msubsup>
2162
1
                <mrow><mi mathvariant="normal">SO</mi></mrow>
2163
1
                <mn>4</mn>
2164
1
                <mrow><mn>2</mn><mo>&#x2212;</mo></mrow>
2165
1
            </msubsup>
2166
1
            </mrow></math>"#;
2167
1
        let target = r#"<math>
2168
1
            <mrow data-changed='added' data-chem-formula='7'>
2169
1
                <mi mathvariant='normal' data-chem-element='1'>S</mi>
2170
1
                <mo data-changed='added' data-chem-formula-op='0'>&#x2063;</mo>
2171
1
                <msubsup data-chem-formula='5'>
2172
1
                    <mi mathvariant='normal' data-split='true' data-chem-element='1'>O</mi>
2173
1
                    <mn>4</mn>
2174
1
                    <mrow data-chem-formula='3'><mn>2</mn><mo>-</mo></mrow>
2175
1
                </msubsup>
2176
1
            </mrow>
2177
1
            </math>"#;
2178
1
        assert!(are_strs_canonically_equal(test, target, &[]));
2179
1
    }
2180
2181
    #[test]
2182
1
    fn salt() {
2183
1
        let test = "<math><mi>Na</mi><mi>Cl</mi></math>";
2184
1
        let target = "<math>
2185
1
            <mrow data-changed='added' data-chem-formula='7'>
2186
1
                <mi data-chem-element='3'>Na</mi>
2187
1
                <mo data-changed='added' data-chem-formula-op='0'>&#x2063;</mo>
2188
1
                <mi data-chem-element='3'>Cl</mi>
2189
1
            </mrow>
2190
1
        </math>";
2191
1
        assert!(are_strs_canonically_equal(test, target, &[]));
2192
1
    }
2193
2194
    #[test]
2195
1
    fn water() {
2196
1
        let test = "<math><msub><mi mathvariant='normal'>H</mi><mn>2</mn></msub><mi mathvariant='normal'>O</mi></math>";
2197
1
        let target = "<math>
2198
1
            <mrow data-changed='added' data-chem-formula='5'>
2199
1
                <msub data-chem-formula='2'>
2200
1
                    <mi mathvariant='normal' data-chem-element='2'>H</mi>
2201
1
                    <mn>2</mn>
2202
1
                </msub>
2203
1
                <mo data-changed='added' data-chem-formula-op='0'>&#x2063;</mo>
2204
1
                <mi mathvariant='normal' data-chem-element='2'>O</mi>
2205
1
            </mrow>
2206
1
        </math>";
2207
1
        assert!(are_strs_canonically_equal(test, target, &[]));
2208
1
    }
2209
2210
    #[test]
2211
1
    fn mhchem_water() {
2212
1
        let test = "<math>
2213
1
            <mrow>
2214
1
            <mrow>
2215
1
                <mi mathvariant='normal'>H</mi>
2216
1
            </mrow>
2217
1
            <msub>
2218
1
                <mrow>
2219
1
                <mrow>
2220
1
                    <mpadded width='0'>
2221
1
                    <mphantom>
2222
1
                        <mi>A</mi>
2223
1
                    </mphantom>
2224
1
                    </mpadded>
2225
1
                </mrow>
2226
1
                </mrow>
2227
1
                <mrow>
2228
1
                <mrow>
2229
1
                    <mpadded height='0'>
2230
1
                    <mn>2</mn>
2231
1
                    </mpadded>
2232
1
                </mrow>
2233
1
                </mrow>
2234
1
            </msub>
2235
1
            <mrow>
2236
1
                <mi mathvariant='normal'>O</mi>
2237
1
            </mrow>
2238
1
            </mrow>
2239
1
        </math>";
2240
1
        let target = "<math>
2241
1
            <mrow data-chem-formula='5'>
2242
1
                <mmultiscripts data-chem-formula='2'>
2243
1
                    <mi mathvariant='normal' data-chem-element='2'>H</mi>
2244
1
                    <mn>2</mn>
2245
1
                    <none></none>
2246
1
                </mmultiscripts>
2247
1
                <mo data-changed='added' data-chem-formula-op='0'>&#x2063;</mo>
2248
1
                <mi mathvariant='normal' data-chem-element='2'>O</mi>
2249
1
            </mrow>
2250
1
       </math>";
2251
1
        assert!(are_strs_canonically_equal(test, target, &[]));
2252
1
    }
2253
2254
    #[test]
2255
1
    fn carbon() {
2256
1
        let test = "<math><mi>C</mi></math>";     // not enough to trigger recognition
2257
1
        let target = " <math>
2258
1
            <mi>C</mi>
2259
1
        </math>";
2260
1
        assert!(are_strs_canonically_equal(test, target, &[]));
2261
1
    }
2262
2263
    #[test]
2264
1
    fn sulfate() {
2265
1
        let test = "<math><mrow><msup>
2266
1
                <mrow><mo>[</mo><mi>S</mi><msub><mi>O</mi><mn>4</mn></msub><mo>]</mo></mrow>
2267
1
                <mrow><mn>2</mn><mo>&#x2212;</mo></mrow>
2268
1
            </msup></mrow></math>";
2269
1
        let target = "<math>
2270
1
        <msup data-chem-formula='9'>
2271
1
          <mrow data-chem-formula='6'>
2272
1
            <mo>[</mo>
2273
1
            <mrow data-changed='added' data-chem-formula='3'>
2274
1
              <mi data-chem-element='1'>S</mi>
2275
1
              <mo data-changed='added'>&#x2063;</mo>
2276
1
              <msub data-chem-formula='1'>
2277
1
                <mi data-chem-element='1'>O</mi>
2278
1
                <mn>4</mn>
2279
1
              </msub>
2280
1
            </mrow>
2281
1
            <mo>]</mo>
2282
1
          </mrow>
2283
1
          <mrow data-chem-formula='3'>
2284
1
            <mn>2</mn>
2285
1
            <mo>-</mo>
2286
1
          </mrow>
2287
1
        </msup>
2288
1
       </math>";
2289
1
        assert!(are_strs_canonically_equal(test, target, &[]));
2290
1
    }
2291
2292
    #[test]
2293
1
    fn aluminum_sulfate() {
2294
1
        let test = "<math><mrow><msub><mi>Al</mi><mn>2</mn></msub>
2295
1
                <msub><mrow><mo>(</mo><mi>S</mi><msub><mi>O</mi><mn>4</mn></msub><mo>)</mo></mrow><mn>3</mn></msub></mrow></math>";
2296
1
        let target = " <math>
2297
1
                <mrow data-chem-formula='10'>
2298
1
                    <msub data-chem-formula='3'>
2299
1
                        <mi data-chem-element='3'>Al</mi>
2300
1
                        <mn>2</mn>
2301
1
                    </msub>
2302
1
                    <mo data-changed='added' data-chem-formula-op='0'>&#x2063;</mo>
2303
1
                    <msub data-chem-formula='6'>
2304
1
                        <mrow data-chem-formula='6'>
2305
1
                        <mo>(</mo>
2306
1
                        <mrow data-changed='added' data-chem-formula='3'>
2307
1
                            <mi data-chem-element='1'>S</mi>
2308
1
                            <mo data-changed='added'>&#x2063;</mo>
2309
1
                            <msub data-chem-formula='1'>
2310
1
                            <mi data-chem-element='1'>O</mi>
2311
1
                            <mn>4</mn>
2312
1
                            </msub>
2313
1
                        </mrow>
2314
1
                        <mo>)</mo>
2315
1
                        </mrow>
2316
1
                        <mn>3</mn>
2317
1
                    </msub>
2318
1
                </mrow>
2319
1
            </math>";
2320
1
        assert!(are_strs_canonically_equal(test, target, &[]));
2321
1
    }
2322
2323
    #[test]
2324
1
    fn ethanol_bonds() {
2325
1
        let test = "<math>
2326
1
                <mrow>
2327
1
                    <mi>C</mi>
2328
1
                    <msub>  <mi>H</mi> <mn>3</mn> </msub>
2329
1
                    <mo>&#x2212;</mo>
2330
1
                    <mi>C</mi>
2331
1
                    <msub>  <mi>H</mi> <mn>2</mn> </msub>
2332
1
                    <mo>&#x2212;</mo>
2333
1
                    <mi>O</mi>
2334
1
                    <mi>H</mi>
2335
1
                </mrow>
2336
1
            </math>";
2337
1
        let target = "<math>
2338
1
        <mrow data-chem-formula='13'>
2339
1
          <mi data-chem-element='1'>C</mi>
2340
1
          <mo data-changed='added' data-chem-formula-op='0'>&#x2063;</mo>
2341
1
          <msub data-chem-formula='1'>
2342
1
            <mi data-chem-element='1'>H</mi>
2343
1
            <mn>3</mn>
2344
1
          </msub>
2345
1
          <mo data-chemical-bond='true' data-chem-formula-op='1'>-</mo>
2346
1
          <mi data-chem-element='1'>C</mi>
2347
1
          <mo data-changed='added' data-chem-formula-op='0'>&#x2063;</mo>
2348
1
          <msub data-chem-formula='1'>
2349
1
            <mi data-chem-element='1'>H</mi>
2350
1
            <mn>2</mn>
2351
1
          </msub>
2352
1
          <mo data-chemical-bond='true' data-chem-formula-op='1'>-</mo>
2353
1
          <mi data-chem-element='1'>O</mi>
2354
1
          <mo data-changed='added' data-chem-formula-op='0'>&#x2063;</mo>
2355
1
          <mi data-chem-element='1'>H</mi>
2356
1
        </mrow>
2357
1
       </math>";
2358
1
        assert!(are_strs_canonically_equal(test, target, &[]));
2359
1
    }
2360
2361
    #[test]
2362
1
    fn dichlorine_hexoxide() {
2363
        // init_logger();
2364
1
        let test = "<math><mrow>
2365
1
            <msup>
2366
1
            <mrow><mo>[</mo><mi>Cl</mi><msub><mi>O</mi><mn>2</mn></msub><mo>]</mo></mrow>
2367
1
            <mo>+</mo>
2368
1
            </msup>
2369
1
            <msup>
2370
1
            <mrow><mo>[</mo><mi>Cl</mi><msub><mi>O</mi><mn>4</mn></msub><mo>]</mo></mrow>
2371
1
            <mo>-</mo>
2372
1
            </msup>
2373
1
        </mrow></math>";
2374
1
        let target = "<math>
2375
1
            <mrow data-chem-formula='19'>
2376
1
                <msup data-chem-formula='9'>
2377
1
                    <mrow data-chem-formula='8'>
2378
1
                    <mo>[</mo>
2379
1
                    <mrow data-changed='added' data-chem-formula='5'>
2380
1
                        <mi data-chem-element='3'>Cl</mi>
2381
1
                        <mo data-changed='added'>&#x2063;</mo>
2382
1
                        <msub data-chem-formula='1'>
2383
1
                        <mi data-chem-element='1'>O</mi>
2384
1
                        <mn>2</mn>
2385
1
                        </msub>
2386
1
                    </mrow>
2387
1
                    <mo>]</mo>
2388
1
                    </mrow>
2389
1
                    <mo>+</mo>
2390
1
                </msup>
2391
1
                <mo data-changed='added' data-chem-formula-op='0'>&#x2063;</mo>
2392
1
                <msup data-chem-formula='9'>
2393
1
                    <mrow data-chem-formula='8'>
2394
1
                    <mo>[</mo>
2395
1
                    <mrow data-changed='added' data-chem-formula='5'>
2396
1
                        <mi data-chem-element='3'>Cl</mi>
2397
1
                        <mo data-changed='added'>&#x2063;</mo>
2398
1
                        <msub data-chem-formula='1'>
2399
1
                        <mi data-chem-element='1'>O</mi>
2400
1
                        <mn>4</mn>
2401
1
                        </msub>
2402
1
                    </mrow>
2403
1
                    <mo>]</mo>
2404
1
                    </mrow>
2405
1
                    <mo>-</mo>
2406
1
                </msup>
2407
1
            </mrow>
2408
1
       </math>";
2409
1
        assert!(are_strs_canonically_equal(test, target, &[]));
2410
1
    }
2411
2412
    #[test]
2413
1
    fn ethylene_with_bond() {
2414
1
        let test = "<math><mrow>
2415
1
                <msub><mi>H</mi><mn>2</mn></msub><mi>C</mi>
2416
1
                <mo>=</mo>
2417
1
                <mi>C</mi><msub><mi>H</mi><mn>2</mn></msub>
2418
1
            </mrow></math>";
2419
1
        let target = "<math>
2420
1
            <mrow data-chem-formula='8'>
2421
1
                <msub data-chem-formula='1'>
2422
1
                    <mi data-chem-element='1'>H</mi>
2423
1
                    <mn>2</mn>
2424
1
                </msub>
2425
1
                <mo data-changed='added' data-chem-formula-op='0'>&#x2063;</mo>
2426
1
                <mi data-chem-element='1'>C</mi>
2427
1
                <mo data-chemical-bond='true' data-chem-formula-op='1'>=</mo>
2428
1
                <mi data-chem-element='1'>C</mi>
2429
1
                <mo data-changed='added' data-chem-formula-op='0'>&#x2063;</mo>
2430
1
                <msub data-chem-formula='1'>
2431
1
                    <mi data-chem-element='1'>H</mi>
2432
1
                    <mn>2</mn>
2433
1
                </msub>
2434
1
            </mrow>
2435
1
        </math>";
2436
1
        assert!(are_strs_canonically_equal(test, target, &[]));
2437
1
    }
2438
2439
    #[test]
2440
1
    fn ferric_chloride_aq() {
2441
1
        let test = "<math><mrow>
2442
1
            <mi>Fe</mi>
2443
1
            <msub><mi>Cl</mi><mn>3</mn></msub>
2444
1
            <mrow><mo>(</mo><mrow><mi>aq</mi></mrow><mo>)</mo></mrow>
2445
1
        </mrow></math>";
2446
1
        let target = "<math>
2447
1
            <mrow data-chem-formula='11'>
2448
1
                <mi data-chem-element='3'>Fe</mi>
2449
1
                <mo data-changed='added' data-chem-formula-op='0'>&#x2063;</mo>
2450
1
                <msub data-chem-formula='3'>
2451
1
                    <mi data-chem-element='3'>Cl</mi>
2452
1
                    <mn>3</mn>
2453
1
                </msub>
2454
1
                <mo data-changed='added' data-chem-formula-op='0'>&#x2063;</mo>
2455
1
                <mrow data-chem-formula='3'>
2456
1
                    <mo>(</mo>
2457
1
                    <mi>aq</mi>
2458
1
                    <mo>)</mo>
2459
1
                </mrow>
2460
1
            </mrow>
2461
1
       </math>";
2462
1
        assert!(are_strs_canonically_equal(test, target, &[]));
2463
1
    }
2464
2465
    #[test]
2466
1
    fn ferric_chloride_aq_as_mi() {
2467
1
        let test = "<math><mrow>
2468
1
            <mi>Fe</mi>
2469
1
            <msub><mi>Cl</mi><mn>3</mn></msub>
2470
1
            <mi>(aq)</mi>
2471
1
        </mrow></math>";
2472
1
        let target = "<math>
2473
1
            <mrow data-chem-formula='11'>
2474
1
                <mi data-chem-element='3'>Fe</mi>
2475
1
                <mo data-changed='added' data-chem-formula-op='0'>&#x2063;</mo>
2476
1
                <msub data-chem-formula='3'>
2477
1
                    <mi data-chem-element='3'>Cl</mi>
2478
1
                    <mn>3</mn>
2479
1
                </msub>
2480
1
                <mo data-changed='added' data-chem-formula-op='0'>&#x2063;</mo>
2481
1
                <mrow data-chem-formula='3'>
2482
1
                    <mo>(</mo>
2483
1
                    <mi>aq</mi>
2484
1
                    <mo>)</mo>
2485
1
                </mrow>
2486
1
            </mrow>
2487
1
        </math>";
2488
1
        assert!(are_strs_canonically_equal(test, target, &[]));
2489
1
    }
2490
2491
    #[test]
2492
1
    fn chemtype_ammonia() {
2493
1
        let test = r#"<math><msub><mi>NH</mi><mn>3</mn></msub></math>"#;
2494
1
        let target = " <math>
2495
1
            <mrow data-changed='added' data-chem-formula='5'>
2496
1
            <mi mathvariant='normal' data-chem-element='1'>N</mi>
2497
1
            <mo data-changed='added' data-chem-formula-op='0'>&#x2063;</mo>
2498
1
            <msub data-chem-formula='2'>
2499
1
                <mi mathvariant='normal' data-chem-element='1' data-split='true'>H</mi>
2500
1
                <mn>3</mn>
2501
1
            </msub>
2502
1
            </mrow>
2503
1
        </math>";
2504
1
        assert!(are_strs_canonically_equal(test, target, &[]));
2505
1
    }
2506
2507
    #[test]
2508
1
    fn mhchem_ammonia() {
2509
1
        let test = r#"<math>
2510
1
            <mrow>
2511
1
                <mi data-mjx-auto-op="false">NH</mi>
2512
1
                <msub>
2513
1
                    <mpadded width="0">
2514
1
                    <mphantom>
2515
1
                        <mi>A</mi>
2516
1
                    </mphantom>
2517
1
                    </mpadded>
2518
1
                    <mpadded height="0">
2519
1
                    <mn>3</mn>
2520
1
                    </mpadded>
2521
1
                </msub>
2522
1
            </mrow>
2523
1
        </math>"#;
2524
1
        let target = "<math>
2525
1
            <mrow data-chem-formula='5'>
2526
1
                <mi mathvariant='normal' data-chem-element='1'>N</mi>
2527
1
                <mo data-changed='added' data-chem-formula-op='0'>&#x2063;</mo>
2528
1
                <mmultiscripts data-mjx-auto-op='false' data-chem-formula='2'>
2529
1
                <mi mathvariant='normal' data-mjx-auto-op='false' data-split='true' data-chem-element='1'>H</mi>
2530
1
                <mn>3</mn>
2531
1
                <none></none>
2532
1
                </mmultiscripts>
2533
1
            </mrow>
2534
1
            </math>";
2535
1
        assert!(are_strs_canonically_equal(test, target, &[]));
2536
1
    }
2537
2538
    #[test]
2539
1
    fn mhchem_so4() {
2540
1
        let test = "<math>
2541
1
            <mrow>
2542
1
            <mi>SO</mi>
2543
1
            <msub>
2544
1
                <mpadded width='0'>
2545
1
                <mphantom>
2546
1
                    <mi>A</mi>
2547
1
                </mphantom>
2548
1
                </mpadded>
2549
1
                <mpadded height='0'>
2550
1
                <mn>4</mn>
2551
1
                </mpadded>
2552
1
            </msub>
2553
1
            <msup>
2554
1
                <mpadded width='0'>
2555
1
                <mphantom>
2556
1
                    <mi>A</mi>
2557
1
                </mphantom>
2558
1
                </mpadded>
2559
1
                <mrow>
2560
1
                <mn>2</mn>
2561
1
                <mo>&#x2212;</mo>
2562
1
                </mrow>
2563
1
            </msup>
2564
1
            </mrow>
2565
1
        </math>";
2566
1
        let target = "<math>
2567
1
            <mrow data-chem-formula='7'>
2568
1
                <mi mathvariant='normal' data-chem-element='1'>S</mi>
2569
1
                <mo data-changed='added' data-chem-formula-op='0'>&#x2063;</mo>
2570
1
                <mmultiscripts data-chem-formula='5'>
2571
1
                    <mi mathvariant='normal' data-split='true' data-chem-element='1'>O</mi>
2572
1
                    <mn>4</mn>
2573
1
                    <none/>
2574
1
                    <none/>
2575
1
                    <mrow data-chem-formula='3'>
2576
1
                    <mn>2</mn>
2577
1
                    <mo>-</mo>
2578
1
                    </mrow>
2579
1
                </mmultiscripts>
2580
1
            </mrow>
2581
1
       </math>";
2582
1
        assert!(are_strs_canonically_equal(test, target, &[]));
2583
1
    }
2584
2585
    #[test]
2586
1
    fn mhchem_short_ion() {
2587
1
        let test = "  <math>
2588
1
                <mrow>
2589
1
                <mi mathvariant='normal'>H</mi>
2590
1
                <msub>
2591
1
                    <mpadded width='0'> <mphantom> <mi>A</mi> </mphantom>  </mpadded>
2592
1
                    <mpadded height='0'> <mn>3</mn></mpadded>
2593
1
                </msub>
2594
1
                <mi mathvariant='normal'>O</mi>
2595
1
                <msup>
2596
1
                    <mpadded width='0'> <mphantom> <mi>A</mi> </mphantom>  </mpadded>
2597
1
                    <mo>+</mo>
2598
1
                </msup>
2599
1
                </mrow>
2600
1
            </math>";
2601
1
        let target = "<math>
2602
1
            <mrow data-chem-formula='6'>
2603
1
                <mmultiscripts data-chem-formula='2'>
2604
1
                    <mi mathvariant='normal' data-chem-element='2'>H</mi>
2605
1
                    <mn>3</mn>
2606
1
                    <none></none>
2607
1
                </mmultiscripts>
2608
1
                <mo data-changed='added' data-chem-formula-op='0'>&#x2063;</mo>
2609
1
                <mmultiscripts data-chem-formula='3'>
2610
1
                    <mi mathvariant='normal' data-chem-element='2'>O</mi>
2611
1
                    <none></none>
2612
1
                    <mo>+</mo>
2613
1
                </mmultiscripts>
2614
1
            </mrow>
2615
1
       </math>";
2616
1
        assert!(are_strs_canonically_equal(test, target, &[]));
2617
1
    }
2618
2619
    #[test]
2620
1
    fn mhchem_ions_and_state() {
2621
1
        let test = "<math>
2622
1
            <mrow>
2623
1
            <mrow>
2624
1
                <mi>Na</mi>
2625
1
            </mrow>
2626
1
            <msup>
2627
1
                <mrow>
2628
1
                <mrow>
2629
1
                    <mpadded width='0'>
2630
1
                    <mphantom>
2631
1
                        <mi>A</mi>
2632
1
                    </mphantom>
2633
1
                    </mpadded>
2634
1
                </mrow>
2635
1
                </mrow>
2636
1
                <mrow>
2637
1
                <mo>+</mo>
2638
1
                </mrow>
2639
1
            </msup>
2640
1
            <mo stretchy='false'>(</mo>
2641
1
            <mrow>
2642
1
                <mi>aq</mi>
2643
1
            </mrow>
2644
1
            <mo stretchy='false'>)</mo>
2645
1
            <mrow>
2646
1
                <mi>Cl</mi>
2647
1
            </mrow>
2648
1
            <msup>
2649
1
                <mrow>
2650
1
                <mrow>
2651
1
                    <mpadded width='0'>
2652
1
                    <mphantom>
2653
1
                        <mi>A</mi>
2654
1
                    </mphantom>
2655
1
                    </mpadded>
2656
1
                </mrow>
2657
1
                </mrow>
2658
1
                <mrow>
2659
1
                <mo>&#x2212;</mo>
2660
1
                </mrow>
2661
1
            </msup>
2662
1
            <mspace width='0.111em'></mspace>
2663
1
            <mo stretchy='false'>(</mo>
2664
1
            <mrow>
2665
1
                <mi>aq</mi>
2666
1
            </mrow>
2667
1
            <mo stretchy='false'>)</mo>
2668
1
            </mrow>
2669
1
            </math>";
2670
1
        let target = "<math>
2671
1
            <mrow data-chem-formula='18'>
2672
1
                <mmultiscripts data-chem-formula='4'>
2673
1
                    <mi data-chem-element='3'>Na</mi>
2674
1
                    <none></none>
2675
1
                    <mo>+</mo>
2676
1
                </mmultiscripts>
2677
1
                <mo data-changed='added' data-chem-formula-op='0'>&#x2063;</mo>
2678
1
                <mrow data-changed='added' data-chem-formula='3'>
2679
1
                    <mo stretchy='false'>(</mo>
2680
1
                    <mi>aq</mi>
2681
1
                    <mo stretchy='false'>)</mo>
2682
1
                </mrow>
2683
1
                <mo data-changed='added' data-chem-formula-op='0'>&#x2063;</mo>
2684
1
                <mmultiscripts data-chem-formula='5'>
2685
1
                    <mi data-chem-element='3'>Cl</mi>
2686
1
                    <none></none>
2687
1
                    <mo>-</mo>
2688
1
                </mmultiscripts>
2689
1
                <mo data-changed='added' data-chem-formula-op='0'>&#x2063;</mo>
2690
1
                <mrow data-changed='added' data-chem-formula='3'>
2691
1
                    <mo stretchy='false' data-previous-space-width='0.111'>(</mo>
2692
1
                    <mi>aq</mi>
2693
1
                    <mo stretchy='false'>)</mo>
2694
1
                </mrow>
2695
1
            </mrow>
2696
1
        </math>";
2697
1
        assert!(are_strs_canonically_equal(test, target, &[]));
2698
1
    }
2699
2700
    #[test]
2701
1
    fn ethylene_with_colon_bond() {
2702
1
        let test = "<math><mrow>
2703
1
                <msub><mi>H</mi><mn>2</mn></msub><mi>C</mi>
2704
1
                <mo>::</mo>
2705
1
                <mi>C</mi><msub><mi>H</mi><mn>2</mn></msub>
2706
1
            </mrow></math>";
2707
1
        let target = "<math>
2708
1
            <mrow data-chem-formula='8'>
2709
1
                <msub data-chem-formula='1'>
2710
1
                    <mi data-chem-element='1'>H</mi>
2711
1
                    <mn>2</mn>
2712
1
                </msub>
2713
1
                <mo data-changed='added' data-chem-formula-op='0'>&#x2063;</mo>
2714
1
                <mi data-chem-element='1'>C</mi>
2715
1
                <mo data-chemical-bond='true' data-chem-formula-op='1'>∷</mo>
2716
1
                <mi data-chem-element='1'>C</mi>
2717
1
                <mo data-changed='added' data-chem-formula-op='0'>&#x2063;</mo>
2718
1
                <msub data-chem-formula='1'>
2719
1
                    <mi data-chem-element='1'>H</mi>
2720
1
                    <mn>2</mn>
2721
1
                </msub>
2722
1
            </mrow>
2723
1
        </math>";
2724
1
        assert!(are_strs_canonically_equal(test, target, &[]));
2725
1
    }
2726
2727
    #[test]
2728
1
    fn mhchem_u238() {
2729
1
        let test = "<math>
2730
1
        <mrow>
2731
1
          <msubsup>
2732
1
            <mrow>
2733
1
              <mrow>
2734
1
                <mpadded width='0'>
2735
1
                  <mphantom>
2736
1
                    <mi>A</mi>
2737
1
                  </mphantom>
2738
1
                </mpadded>
2739
1
              </mrow>
2740
1
            </mrow>
2741
1
            <mrow>
2742
1
              <mrow>
2743
1
                <mpadded height='0' depth='0'>
2744
1
                  <mphantom></mphantom>
2745
1
                </mpadded>
2746
1
              </mrow>
2747
1
            </mrow>
2748
1
            <mrow>
2749
1
              <mrow>
2750
1
                <mpadded height='0' depth='0'>
2751
1
                  <mphantom>
2752
1
                    <mn>238</mn>
2753
1
                  </mphantom>
2754
1
                </mpadded>
2755
1
              </mrow>
2756
1
            </mrow>
2757
1
          </msubsup>
2758
1
          <mspace width='-0.083em' linebreak='nobreak'></mspace>
2759
1
          <msubsup>
2760
1
            <mrow>
2761
1
              <mrow>
2762
1
                <mpadded width='0'>
2763
1
                  <mphantom>
2764
1
                    <mi>A</mi>
2765
1
                  </mphantom>
2766
1
                </mpadded>
2767
1
              </mrow>
2768
1
            </mrow>
2769
1
            <mrow>
2770
1
              <mrow>
2771
1
                <mpadded width='0'>
2772
1
                  <mphantom>
2773
1
                    <mn>2</mn>
2774
1
                  </mphantom>
2775
1
                </mpadded>
2776
1
              </mrow>
2777
1
              <mrow>
2778
1
                <mpadded width='0' lspace='-1width'>
2779
1
                  <mrow>
2780
1
                    <mpadded height='0'></mpadded>
2781
1
                  </mrow>
2782
1
                </mpadded>
2783
1
              </mrow>
2784
1
            </mrow>
2785
1
            <mrow>
2786
1
              <mrow>
2787
1
                <mpadded height='0'>
2788
1
                  <mrow>
2789
1
                    <mpadded width='0'>
2790
1
                      <mphantom>
2791
1
                        <mn>2</mn>
2792
1
                      </mphantom>
2793
1
                    </mpadded>
2794
1
                  </mrow>
2795
1
                </mpadded>
2796
1
              </mrow>
2797
1
              <mrow>
2798
1
                <mpadded width='0' lspace='-1width'>
2799
1
                  <mn>238</mn>
2800
1
                </mpadded>
2801
1
              </mrow>
2802
1
            </mrow>
2803
1
          </msubsup>
2804
1
          <mrow>
2805
1
            <mi mathvariant='normal'>U</mi>
2806
1
          </mrow>
2807
1
        </mrow>
2808
1
      </math>";
2809
1
        let target = " <math>
2810
1
            <mmultiscripts data-previous-space-width='-0.083' data-chem-formula='5'>
2811
1
                <mi mathvariant='normal' data-chem-element='2'>U</mi>
2812
1
                <mprescripts></mprescripts>
2813
1
                <none></none>
2814
1
                <mn>238</mn>
2815
1
            </mmultiscripts>
2816
1
         </math>";
2817
1
        assert!(are_strs_canonically_equal(test, target, &[]));
2818
1
    }
2819
2820
    #[test]
2821
1
    fn mhchem_hcl_aq() {
2822
1
        let test = "<math>
2823
1
        <mrow>
2824
1
          <mn>2</mn>
2825
1
          <mstyle scriptlevel='0'>
2826
1
            <mspace width='0.167em'></mspace>
2827
1
          </mstyle>
2828
1
          <mrow>
2829
1
            <mi>HCl</mi>
2830
1
          </mrow>
2831
1
          <mspace width='0.111em'></mspace>
2832
1
          <mo stretchy='false'>(</mo>
2833
1
          <mrow>
2834
1
            <mi>aq</mi>
2835
1
          </mrow>
2836
1
          <mo stretchy='false'>)</mo>
2837
1
        </mrow>
2838
1
      </math>";
2839
1
        let target = "<math>
2840
1
            <mrow data-chem-formula='9'>
2841
1
                <mn>2</mn>
2842
1
                <mo data-changed='added' data-chem-formula-op='0'>&#x2062;</mo>
2843
1
                <mrow data-changed='added' data-chem-formula='9'>
2844
1
                    <mi mathvariant='normal' data-previous-space-width='0.167' data-chem-element='1'>H</mi>
2845
1
                    <mo data-changed='added' data-chem-formula-op='0'>&#x2063;</mo>
2846
1
                    <mi data-split='true' data-chem-element='3'>Cl</mi>
2847
1
                    <mo data-changed='added' data-chem-formula-op='0'>&#x2063;</mo>
2848
1
                    <mrow data-changed='added' data-chem-formula='3'>
2849
1
                    <mo stretchy='false' data-previous-space-width='0.111'>(</mo>
2850
1
                    <mi>aq</mi>
2851
1
                    <mo stretchy='false'>)</mo>
2852
1
                    </mrow>
2853
1
                </mrow>
2854
1
            </mrow>
2855
1
        </math>";
2856
1
        assert!(are_strs_canonically_equal(test, target, &[]));
2857
1
    }
2858
2859
    #[test]
2860
1
    fn mhchem_nested_sub() {
2861
        // from \ce{(CH3)3}
2862
1
        let test = "<math>
2863
1
        <mrow>
2864
1
          <mo stretchy='false'>(</mo>
2865
1
          <mrow>
2866
1
            <mi>CH</mi>
2867
1
          </mrow>
2868
1
          <msub>
2869
1
            <mrow>
2870
1
              <mrow>
2871
1
                <mpadded width='0'>
2872
1
                  <mphantom>
2873
1
                    <mi>A</mi>
2874
1
                  </mphantom>
2875
1
                </mpadded>
2876
1
              </mrow>
2877
1
            </mrow>
2878
1
            <mrow>
2879
1
              <mrow>
2880
1
                <mpadded height='0'>
2881
1
                  <mn>3</mn>
2882
1
                </mpadded>
2883
1
              </mrow>
2884
1
            </mrow>
2885
1
          </msub>
2886
1
          <mo stretchy='false'>)</mo>
2887
1
          <msub>
2888
1
            <mrow>
2889
1
              <mrow>
2890
1
                <mpadded width='0'>
2891
1
                  <mphantom>
2892
1
                    <mi>A</mi>
2893
1
                  </mphantom>
2894
1
                </mpadded>
2895
1
              </mrow>
2896
1
            </mrow>
2897
1
            <mrow>
2898
1
              <mrow>
2899
1
                <mpadded height='0'>
2900
1
                  <mn>3</mn>
2901
1
                </mpadded>
2902
1
              </mrow>
2903
1
            </mrow>
2904
1
          </msub>
2905
1
        </mrow>
2906
1
      </math>";
2907
1
    let target = "<math>
2908
1
        <mmultiscripts data-chem-formula='8'>
2909
1
            <mrow data-changed='added' data-chem-formula='8'>
2910
1
                <mo stretchy='false'>(</mo>
2911
1
                <mrow data-changed='added' data-chem-formula='5'>
2912
1
                <mi mathvariant='normal' data-chem-element='1'>C</mi>
2913
1
                <mo data-changed='added'>&#x2063;</mo>
2914
1
                <mmultiscripts data-chem-formula='2'>
2915
1
                    <mi mathvariant='normal' data-split='true' data-chem-element='1'>H</mi>
2916
1
                    <mn>3</mn>
2917
1
                    <none></none>
2918
1
                </mmultiscripts>
2919
1
                </mrow>
2920
1
                <mo stretchy='false'>)</mo>
2921
1
            </mrow>
2922
1
            <mn>3</mn>
2923
1
            <none></none>
2924
1
        </mmultiscripts>
2925
1
    </math>";
2926
1
    assert!(are_strs_canonically_equal(test, target, &[]));
2927
1
    }
2928
2929
    #[test]
2930
1
    fn mhchem_isotopes() {
2931
        // from \ce{^{18}O{}^{16}O}
2932
1
        let test = "<math>
2933
1
        <mrow>
2934
1
          <msubsup>
2935
1
            <mpadded width='0'>
2936
1
              <mphantom>
2937
1
                <mi>A</mi>
2938
1
              </mphantom>
2939
1
            </mpadded>
2940
1
            <mpadded height='0' depth='0'>
2941
1
              <mphantom></mphantom>
2942
1
            </mpadded>
2943
1
            <mpadded height='0' depth='0'>
2944
1
              <mphantom>
2945
1
                <mn>18</mn>
2946
1
              </mphantom>
2947
1
            </mpadded>
2948
1
          </msubsup>
2949
1
          <mspace width='-0.083em'></mspace>
2950
1
          <msubsup>
2951
1
            <mpadded width='0'>
2952
1
              <mphantom>
2953
1
                <mi>A</mi>
2954
1
              </mphantom>
2955
1
            </mpadded>
2956
1
            <mrow>
2957
1
              <mpadded width='0'>
2958
1
                <mphantom>
2959
1
                  <mn>2</mn>
2960
1
                </mphantom>
2961
1
              </mpadded>
2962
1
              <mpadded width='0' lspace='-1width'>
2963
1
                <mpadded height='0'></mpadded>
2964
1
              </mpadded>
2965
1
            </mrow>
2966
1
            <mrow>
2967
1
              <mpadded height='0'>
2968
1
                <mpadded width='0'>
2969
1
                  <mphantom>
2970
1
                    <mn>2</mn>
2971
1
                  </mphantom>
2972
1
                </mpadded>
2973
1
              </mpadded>
2974
1
              <mpadded width='0' lspace='-1width'>
2975
1
                <mn>18</mn>
2976
1
              </mpadded>
2977
1
            </mrow>
2978
1
          </msubsup>
2979
1
          <mi mathvariant='normal'>O</mi>
2980
1
          <mspace width='0.111em'></mspace>
2981
1
          <msubsup>
2982
1
            <mpadded width='0'>
2983
1
              <mphantom>
2984
1
                <mi>A</mi>
2985
1
              </mphantom>
2986
1
            </mpadded>
2987
1
            <mpadded height='0' depth='0'>
2988
1
              <mphantom></mphantom>
2989
1
            </mpadded>
2990
1
            <mpadded height='0' depth='0'>
2991
1
              <mphantom>
2992
1
                <mn>16</mn>
2993
1
              </mphantom>
2994
1
            </mpadded>
2995
1
          </msubsup>
2996
1
          <mspace width='-0.083em'></mspace>
2997
1
          <msubsup>
2998
1
            <mpadded width='0'>
2999
1
              <mphantom>
3000
1
                <mi>A</mi>
3001
1
              </mphantom>
3002
1
            </mpadded>
3003
1
            <mrow>
3004
1
              <mpadded width='0'>
3005
1
                <mphantom>
3006
1
                  <mn>2</mn>
3007
1
                </mphantom>
3008
1
              </mpadded>
3009
1
              <mpadded width='0' lspace='-1width'>
3010
1
                <mpadded height='0'></mpadded>
3011
1
              </mpadded>
3012
1
            </mrow>
3013
1
            <mrow>
3014
1
              <mpadded height='0'>
3015
1
                <mpadded width='0'>
3016
1
                  <mphantom>
3017
1
                    <mn>2</mn>
3018
1
                  </mphantom>
3019
1
                </mpadded>
3020
1
              </mpadded>
3021
1
              <mpadded width='0' lspace='-1width'>
3022
1
                <mn>16</mn>
3023
1
              </mpadded>
3024
1
            </mrow>
3025
1
          </msubsup>
3026
1
          <mi mathvariant='normal'>O</mi>
3027
1
        </mrow>
3028
1
      </math>";
3029
1
    let target = "<math>
3030
1
        <mrow data-chem-formula='11'>
3031
1
            <mmultiscripts data-previous-space-width='-0.083' data-chem-formula='5'>
3032
1
                <mi mathvariant='normal' data-chem-element='2'>O</mi>
3033
1
                <mprescripts></mprescripts>
3034
1
                <none></none>
3035
1
                <mn>18</mn>
3036
1
            </mmultiscripts>
3037
1
            <mo data-changed='added' data-chem-formula-op='0'>&#x2063;</mo>
3038
1
            <mmultiscripts data-previous-space-width='0.027999999999999997' data-chem-formula='5'>
3039
1
                <mi mathvariant='normal' data-chem-element='2'>O</mi>
3040
1
                <mprescripts></mprescripts>
3041
1
                <none></none>
3042
1
                <mn>16</mn>
3043
1
            </mmultiscripts>
3044
1
        </mrow>
3045
1
    </math>";
3046
1
    assert!(are_strs_canonically_equal(test, target, &[]));
3047
1
    }
3048
3049
    
3050
    #[test]
3051
1
    fn merge_bug_274() {
3052
1
        let test = r#"
3053
1
        <math>
3054
1
            <mrow>
3055
1
                <mtable>
3056
1
                    <mtr>
3057
1
                        <mtd>
3058
1
                            <mrow>
3059
1
                                <msub><mtext>H</mtext><mn>2</mn></msub>
3060
1
                                <mtext>g</mtext>
3061
1
                                <mtext/>
3062
1
                                <mtext>+</mtext>
3063
1
                                <mtext/>
3064
1
                                <msub><mrow><mtext>Cl</mtext></mrow><mn>2</mn></msub>
3065
1
                                <mo stretchy="false">(</mo>
3066
1
                                <mtext>g</mtext>
3067
1
                                <mo stretchy="false">)</mo>
3068
1
                                <mo>&#x2192;</mo>
3069
1
                                <mn>2</mn>
3070
1
                                <mtext>HCl(g)</mtext>
3071
1
                            </mrow>
3072
1
                        </mtd>
3073
1
                    </mtr>
3074
1
                    <mtr>
3075
1
                        <mtd>
3076
1
                            <mrow>
3077
1
                                <mn>1</mn>
3078
1
                                <mo>:</mo>
3079
1
                                <mn>1</mn>
3080
1
                                <mo>:</mo>
3081
1
                                <mn>2</mn>
3082
1
                            </mrow>
3083
1
                        </mtd>
3084
1
                    </mtr>
3085
1
                    <mtr>
3086
1
                        <mtd>
3087
1
                            <mrow>
3088
1
                                <mn>1</mn>
3089
1
                                <mtext/>
3090
1
                                <msub><mtext>H</mtext><mn>2</mn></msub>
3091
1
                                <mtext/>
3092
1
                                <mtext>to</mtext>
3093
1
                                <mtext/>
3094
1
                                <mn>1</mn>
3095
1
                                <mtext/>
3096
1
                                <msub><mrow><mtext>Cl</mtext></mrow><mn>2</mn></msub>
3097
1
                                <mtext/>
3098
1
                                <mtext>to</mtext>
3099
1
                                <mtext/>
3100
1
                                <mtext>2</mtext>
3101
1
                                <mtext/>
3102
1
                                <mtext>HCl</mtext>
3103
1
                            </mrow>
3104
1
                        </mtd>
3105
1
                    </mtr>
3106
1
                </mtable>
3107
1
            </mrow>
3108
1
        </math>
3109
1
        "#;
3110
1
        let target = "
3111
1
            <math>
3112
1
            <mtable>
3113
1
                <mtr>
3114
1
                <mtd data-maybe-chemistry='9'>
3115
1
                    <mrow data-maybe-chemistry='9'>
3116
1
                    <mrow data-changed='added' data-maybe-chemistry='8'>
3117
1
                        <mrow data-changed='added' data-maybe-chemistry='1'>
3118
1
                        <msub data-maybe-chemistry='1'>
3119
1
                            <mtext data-maybe-chemistry='1'>H</mtext>
3120
1
                            <mn>2</mn>
3121
1
                        </msub>
3122
1
                        <mo data-changed='added' data-maybe-chemistry='0'>&#x2062;</mo>
3123
1
                        <mtext data-maybe-chemistry='0'>g</mtext>
3124
1
                        </mrow>
3125
1
                        <mo data-chem-equation-op='1' data-maybe-chemistry='1'>+</mo>
3126
1
                        <mrow data-changed='added' data-maybe-chemistry='6'>
3127
1
                        <msub data-maybe-chemistry='3'>
3128
1
                            <mtext data-maybe-chemistry='3'>Cl</mtext>
3129
1
                            <mn>2</mn>
3130
1
                        </msub>
3131
1
                        <mo data-changed='added' data-maybe-chemistry='0'>&#x2063;</mo>
3132
1
                        <mrow data-changed='added' data-maybe-chemistry='2'>
3133
1
                            <mo stretchy='false'>(</mo>
3134
1
                            <mtext>g</mtext>
3135
1
                            <mo stretchy='false'>)</mo>
3136
1
                        </mrow>
3137
1
                        </mrow>
3138
1
                    </mrow>
3139
1
                    <mo data-chem-equation-op='1' data-maybe-chemistry='1'>→</mo>
3140
1
                    <mrow data-changed='added' data-maybe-chemistry='0'>
3141
1
                        <mn data-maybe-chemistry='0'>2</mn>
3142
1
                        <mo data-changed='added' data-maybe-chemistry='0'>&#x2062;</mo>
3143
1
                        <mtext data-maybe-chemistry='0'>HCl(g)</mtext>
3144
1
                    </mrow>
3145
1
                    </mrow>
3146
1
                </mtd>
3147
1
                </mtr>
3148
1
                <mtr>
3149
1
                <mtd>
3150
1
                    <mrow>
3151
1
                    <mn>1</mn>
3152
1
                    <mo>:</mo>
3153
1
                    <mn>1</mn>
3154
1
                    <mo>:</mo>
3155
1
                    <mn>2</mn>
3156
1
                    </mrow>
3157
1
                </mtd>
3158
1
                </mtr>
3159
1
                <mtr>
3160
1
                <mtd data-maybe-chemistry='7'>
3161
1
                    <mrow data-maybe-chemistry='7'>
3162
1
                    <mn data-maybe-chemistry='0'>1</mn>
3163
1
                    <mo data-changed='added' data-maybe-chemistry='0'>&#x2062;</mo>
3164
1
                    <msub data-maybe-chemistry='1'>
3165
1
                        <mtext data-maybe-chemistry='1'>H</mtext>
3166
1
                        <mn>2</mn>
3167
1
                    </msub>
3168
1
                    <mo data-changed='added' data-maybe-chemistry='0'>&#x2062;</mo>
3169
1
                    <mtext data-maybe-chemistry='0'>to</mtext>
3170
1
                    <mo data-changed='added' data-maybe-chemistry='0'>&#x2062;</mo>
3171
1
                    <mn data-maybe-chemistry='0'>1</mn>
3172
1
                    <mo data-changed='added' data-maybe-chemistry='0'>&#x2062;</mo>
3173
1
                    <msub data-maybe-chemistry='3'>
3174
1
                        <mtext data-maybe-chemistry='3'>Cl</mtext>
3175
1
                        <mn>2</mn>
3176
1
                    </msub>
3177
1
                    <mo data-changed='added' data-maybe-chemistry='0'>&#x2062;</mo>
3178
1
                    <mtext data-maybe-chemistry='0'>to</mtext>
3179
1
                    <mo data-changed='added' data-maybe-chemistry='0'>&#x2062;</mo>
3180
1
                    <mn data-maybe-chemistry='0'>2</mn>
3181
1
                    <mo data-changed='added' data-maybe-chemistry='0'>&#x2062;</mo>
3182
1
                    <mi data-maybe-chemistry='1' mathvariant='normal'>H</mi>
3183
1
                    <mo data-changed='added' data-maybe-chemistry='0'>&#x2062;</mo>
3184
1
                    <mi data-maybe-chemistry='3' data-split='true'>Cl</mi>
3185
1
                    </mrow>
3186
1
                </mtd>
3187
1
                </mtr>
3188
1
            </mtable>
3189
1
            </math>
3190
1
        ";
3191
1
        assert!(are_strs_canonically_equal(test, target, &[]));
3192
1
    }
3193
    
3194
    #[test]
3195
1
    fn merge_bug_303() {
3196
1
        let test = r#"
3197
1
            <math>
3198
1
                <mn>2</mn>
3199
1
                <msup><mtext>OH</mtext><mo>−</mo></msup>
3200
1
                <mo stretchy="false">(</mo>
3201
1
                <mtext>aq</mtext>
3202
1
                <mo stretchy="false">)</mo>
3203
1
                <mo>+</mo>
3204
1
                <mtext>C</mtext>
3205
1
                <msup><mtext>u</mtext><mrow><mn>2</mn><mo>+</mo></mrow></msup>
3206
1
            </math>
3207
1
        "#;
3208
1
        let target = "
3209
1
            <math>
3210
1
                <mrow data-changed='added'>
3211
1
                <mrow data-changed='added'>
3212
1
                    <mn>2</mn>
3213
1
                    <mo data-changed='added'>&#x2062;</mo>
3214
1
                    <mrow data-changed='added'>
3215
1
                        <msup><mi>OH</mi><mo>-</mo></msup>
3216
1
                        <mo data-changed='added'>&#x2061;</mo>
3217
1
                        <mrow data-changed='added'>
3218
1
                            <mo stretchy='false'>(</mo>
3219
1
                            <mtext>aq</mtext>
3220
1
                            <mo stretchy='false'>)</mo>
3221
1
                        </mrow>
3222
1
                    </mrow>
3223
1
                </mrow>
3224
1
                <mo>+</mo>
3225
1
                <mrow data-changed='added'>
3226
1
                    <mtext>C</mtext>
3227
1
                    <mo data-changed='added'>&#x2062;</mo>
3228
1
                    <msup> <mtext>u</mtext> <mrow><mn>2</mn><mo>+</mo></mrow> </msup>
3229
1
                </mrow>
3230
1
                </mrow>
3231
1
            </math>
3232
1
           ";
3233
1
        assert!(are_strs_canonically_equal(test, target, &[]));
3234
1
    }
3235
    
3236
    #[test]
3237
1
    fn mtd_assert_bug_393() {
3238
1
        let test = r#"
3239
1
        <math display="block">
3240
1
            <mtable>
3241
1
                <mtr>
3242
1
                <mtd>
3243
1
                    <mrow>
3244
1
                    <mi>A</mi>
3245
1
                    <mi>c</mi>
3246
1
                    </mrow>
3247
1
                </mtd>
3248
1
                <mtd>
3249
1
                    <mi>A</mi>
3250
1
                    <mfenced>
3251
1
                    <mtable>
3252
1
                        <mtr>
3253
1
                        <mtd>
3254
1
                            <mrow>
3255
1
                            <mi>c</mi>
3256
1
                            <mi>n</mi>
3257
1
                            </mrow>
3258
1
                        </mtd>
3259
1
                        </mtr>
3260
1
                    </mtable>
3261
1
                    </mfenced>
3262
1
                </mtd>
3263
1
                </mtr>
3264
1
            </mtable>
3265
1
        </math>"#;
3266
1
        let target = "
3267
1
        <math display='block'>
3268
1
            <mtable>
3269
1
            <mtr>
3270
1
                <mtd>
3271
1
                <mi>A</mi>
3272
1
                <mi>c</mi>
3273
1
                </mtd>
3274
1
                <mtd>
3275
1
                <mrow data-changed='added'>
3276
1
                    <mi>A</mi>
3277
1
                    <mrow>
3278
1
                    <mo data-changed='from_mfenced'>(</mo>
3279
1
                    <mtable>
3280
1
                        <mtr>
3281
1
                        <mtd>
3282
1
                            <mrow>
3283
1
                            <mi>c</mi>
3284
1
                            <mi>n</mi>
3285
1
                            </mrow>
3286
1
                        </mtd>
3287
1
                        </mtr>
3288
1
                    </mtable>
3289
1
                    <mo data-changed='from_mfenced'>)</mo>
3290
1
                    </mrow>
3291
1
                </mrow>
3292
1
                </mtd>
3293
1
            </mtr>
3294
1
            </mtable>
3295
1
        </math>";
3296
1
        assert!(are_strs_canonically_equal(test, target, &[]));
3297
1
    }
3298
3299
}
\ No newline at end of file +

Coverage Report

Created: 2026-05-04 09:37

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/home/runner/work/MathCAT/MathCAT/src/chemistry.rs
Line
Count
Source
1
#![allow(clippy::needless_return)]
2
3
// Chemistry terms used here:
4
// chemical formula -- this references a molecule (one or more elements with bonds between them), including its state.
5
// chemical equation -- this is a notation specialized to chemistry -- it has concentration, arrows, equality, "addition" along with 
6
//    some special symbols for operators and (mostly) chemical formulas for operands.
7
//    Operand exceptions are the equilibrium constant, numbers, and identifiers.
8
//    Although a chemical equation is a superset of a chemical formula, because we want to distinguish the two (e.g., '=' is in both),
9
//      we require that chemical equation is an mrow
10
//    FIX?? -- can it be an adorned mrow?
11
//    Note: with the current definition, if any element in a potential chem equation is ruled out, the entire mrow is ruled out.
12
//
13
// The general flow is that for every element that looks like a chem formula/equation, we mark it with data-likely-[equation/formula]
14
// After we are done marking "likely", we go back and either delete them or replace them with data-[equation/formula].
15
// Note: anything already marked with data-[equation/formula] doesn't need recomputation later (essentially the result is cached)
16
//
17
// There is a chicken and egg problem with detecting chemistry: to more reliably detect it, we need good structure.
18
// However, to get the structure right (e.,g "=" being a double bond, not equality; chem elements being in 'mi's; ...),
19
//   we need to know "=" is part of a chemical formula.
20
// The imperfect solution used is:
21
//   As the final step of each recursive call to 'clean_mathml',
22
//     1. mi/mtext: is it a chemical element(s) or one of the symbols used in chemical formulas (not equations).
23
//        If so, mark it MAYBE_CHEMISTRY.
24
//     2. msub/msup/msubsup/mmultiscripts: is base marked MAYBE_CHEMISTRY and the scripts are potential adornments, mark it MAYBE_CHEMISTRY
25
//     3. mrows: these take a few passes (remember, they aren't structured properly yet)
26
//        On the assumption that chemistry is not common we implement a "show me" attitude before changing the structure.
27
//        Pass 1:
28
//        a) for any run of mi/mtext that can be re-split into chem elements, split them and mark them if it is at least 3 chars long
29
//        b) if there are any potential chem formula operators (e.g., "=" and ":") and the previous node is marked MAYBE_CHEMISTRY,
30
//           mark this as MAYBE_CHEMISTRY
31
//        Pass 2: (assuming something was marked in pass 1)
32
//        a) find the first marked child and then the last consecutive marked child and trim any mo's from the ends
33
//        b) evaluate the likelihood that the sequence is chemistry
34
//           yes: replace mathml children with new (potentially restructured) children
35
//           no: clear all the marks for the old children
36
// After canonicalization, we take another pass looking for chemical equations and marking them if found.
37
38
use sxd_document::dom::{Element, Document, ChildOfElement};
39
use crate::canonicalize::*;
40
use crate::pretty_print::mml_to_string;
41
use crate::xpath_functions::{is_leaf, IsNode};
42
use regex::Regex;
43
use crate::xpath_functions::IsBracketed;
44
use phf::{phf_map, phf_set};
45
use std::convert::TryInto;
46
#[allow(unused_imports)]
47
use log::{error, debug};
48
use std::collections::HashSet;
49
use std::cmp::Ordering;
50
use crate::errors::*;
51
use std::sync::LazyLock;
52
53
54
pub static NOT_CHEMISTRY: i32 = -10000;  // should overwhelm any positive signal
55
static NOT_CHEMISTRY_THRESHOLD: i32 = -10000/2;  // value for testing -- that way some can be added to NOT_CHEMISTRY and still meet the test
56
static CHEMISTRY_THRESHOLD: i32 = 5;   // if this changes, change CHEMISTRY_THRESHOLD_STR
57
58
59
/// this might be chemistry -- should only exist during canonicalization
60
pub static MAYBE_CHEMISTRY: &str = "data-maybe-chemistry";
61
62
/// Attr flag to indicate chemical equation
63
static CHEM_EQUATION: &str = "data-chem-equation";
64
/// Attr flag to indicate chemical formula
65
static CHEM_FORMULA: &str = "data-chem-formula";
66
/// Attr flag to indicate chemical element
67
static CHEM_ELEMENT: &str = "data-chem-element";
68
static CHEM_FORMULA_OPERATOR: &str = "data-chem-formula-op";
69
static CHEM_EQUATION_OPERATOR: &str = "data-chem-equation-op";
70
static CHEM_STATE: &str = "data-chem-state";
71
72
/// mark a new chem element that happened due to splitting a leaf
73
pub static SPLIT_TOKEN: &str = "data-split";
74
75
/// mark a new chem element that happened due to merging two leaves
76
static MERGED_TOKEN: &str = "data-merged";
77
78
/// these can be in the base of an under/over script
79
6.64k
fn is_chem_equation_arrow(ch: char) -> bool {
80
6.64k
    
matches!6.44k
(ch,
81
        '→' | '➔' | '←' | '⟶' | '⟵' | '⤻' | '⇋' | '⇌' |
82
        '↑' | '↓' | '↿' | '↾' | '⇃' | '⇂' | '⥮' | '⥯' | '⇷' | '⇸' | '⤉' | '⤈' |
83
        '⥂' | '⥄' | '⥃' |
84
        '\u{1f8d0}' | '\u{1f8d1}' | '\u{1f8d2}' | '\u{1f8d3}' | '\u{1f8d4}' | '\u{1f8d5}'  // proposed Unicode equilibrium arrows
85
    )
86
6.64k
}
87
88
// Returns true if the 'property' (should have ":") is in the intent
89
196k
fn has_chem_intent(mathml: Element, property: &str) -> bool {
90
196k
    if let Some(
intent16.9k
) = mathml.attribute_value(INTENT_ATTR) {
91
16.9k
        let head = intent.split('(').next().unwrap();
92
16.9k
        return head.contains(property);
93
179k
    }
94
179k
    return false;
95
196k
}
96
97
26.7k
fn has_inherited_property(mathml: Element, property: &str) -> bool {
98
26.7k
    let mut current = mathml;
99
    loop {
100
101k
        if has_chem_intent(current, property) {
101
0
            return true;
102
101k
        }
103
        // chem might not be temp node without a 'math' parent
104
101k
        if name(current) == "math" || 
current.parent()74.6k
.
is_none74.6k
() {
105
26.7k
            break;
106
74.6k
        }
107
74.6k
        current = get_parent(current);
108
    }
109
26.7k
    return false;
110
26.7k
}
111
112
30.2k
pub fn is_chemistry_off(mathml: Element) -> bool {
113
30.2k
    if has_chem_intent(mathml, ":chemical-formula") || has_chem_intent(mathml, ":chemical-equation") {
114
4
        return false;
115
30.2k
    }
116
30.2k
    let pref_manager = crate::prefs::PreferenceManager::get();
117
30.2k
    return pref_manager.borrow().pref_to_string("Chemistry") == "Off";
118
30.2k
}
119
120
10.1k
pub fn clean_chemistry_mrow(mathml: Element) {
121
10.1k
    if is_chemistry_off(mathml) {
122
0
        return;
123
10.1k
    }
124
    // debug!("clean_chemistry_mrow:\n{}", mml_to_string(mathml));
125
10.1k
    let mut children = mathml.children().iter()
126
31.3k
                .
map10.1k
(|child| as_element(*child))
127
10.1k
                .collect::<Vec<Element>>();
128
10.1k
    if let Some(
new_children246
) = clean_mrow_children_restructure_pass(&children) {
129
246
        mathml.replace_children(&new_children);
130
246
        children = new_children;
131
9.93k
    }
132
10.1k
    clean_mrow_children_mark_pass(&children);
133
10.1k
}
134
135
/// Do some aggressive structural changes and if they make this look like a chemistry formula, mark it as one else remove other marks
136
/// Note: the element is replaced with a new restructured element if it is marked as chemistry
137
///        Pass 1:
138
///        a) for any run of mi/mtext that can be re-split into chem elements, split them and mark them if it is at least 3 chars long.
139
///           Also split "(g)", etc., when in mi/mtext
140
///        b) if there are any potential chem formula operators (e.g., "=" and ":") and the previous node is marked MAYBE_CHEMISTRY,
141
///           mark this as MAYBE_CHEMISTRY
142
10.1k
fn clean_mrow_children_restructure_pass<'a>(old_children: &[Element<'a>]) -> Option<Vec<Element<'a>>> {
143
10.1k
    let mut changed = false;
144
10.1k
    let mut new_children = Vec::with_capacity(2*old_children.len());
145
10.1k
    let mut i = 0;
146
40.7k
    while i < old_children.len() {
147
30.6k
        if let Some(
paren_mrow_aq1
) = clean_aq_state(old_children, i) {
148
1
            new_children.push(paren_mrow_aq);
149
1
            i += 4;                                 // skipping "( a q )"
150
1
            changed = true;
151
1
            continue;
152
        } else {
153
30.6k
            let child = old_children[i];
154
30.6k
            let child_name = name(child);
155
30.6k
            if  child_name == "mi" || (
child_name == "mtext"22.0k
&&
as_text(child).len() < 4228
) {
156
                // break mi/mtext that is done as "(g)", etc. Even if it isn't 'g', 'l', etc., it probably shouldn't be an mi/text.
157
8.62k
                let text = as_text(child);
158
8.62k
                if text.starts_with('(') && 
text4
.
ends_with4
(')') {
159
4
                    let doc = child.document();
160
4
                    let state = create_mathml_element(&doc, "mi");
161
4
                    state.set_text(&text[1..text.len()-1]);
162
4
                    let open = create_mathml_element(&doc, "mo");
163
4
                    open.set_text("(");
164
4
                    let close = create_mathml_element(&doc, "mo");
165
4
                    close.set_text(")");
166
4
                    let mrow = create_mathml_element(&doc, "mrow");
167
4
                    mrow.append_children(&[open,state,close]);
168
4
                    new_children.push(mrow);
169
4
                    i += 1;
170
4
                    changed = true;
171
4
                    continue;
172
8.62k
                }
173
21.9k
            } else if i + 2 < old_children.len() {
174
                // wrap with an mrow if we are not already an 'mrow'
175
9.68k
                let parent = get_parent(child); // safe since 'math' is always at root
176
9.68k
                if !(name(parent) == "mrow" && 
i == 02.86k
&&
old_children.len() == 31.44k
) &&
177
8.68k
                    let Some(
paren_mrow377
) = make_mrow(old_children[i..i+3].try_into().unwrap()) {
178
                        // debug!("make_mrow added mrow");
179
377
                        new_children.push(paren_mrow);
180
377
                        i += 3;
181
377
                        changed = true;
182
377
                        continue;
183
9.30k
                    }
184
12.3k
            }
185
30.2k
            if child_name == "mo" {
186
9.50k
                let likely_chemistry_op = likely_chem_formula_operator(child);
187
                // debug!("clean_mrow_children_restructure_pass -- in mo: likely {}, {}", likely_chemistry_op, mml_to_string(child));
188
9.50k
                if likely_chemistry_op >= 0 {
189
                    // if possible chemistry to left and right, then override text for operator lookup
190
                    // note: on the right, we haven't set chem flag for operators yet, so we skip them
191
2.98k
                    let preceding = child.preceding_siblings();
192
2.98k
                    let following = child.following_siblings();
193
2.98k
                    if !preceding.is_empty() &&
194
1.84k
                       ( has_inherited_property(child, "chemical-formula") ||
195
2.27k
                         
preceding.iter()1.84k
.
all1.84k
(|&child| {
196
2.27k
                            let child = as_element(child);
197
2.27k
                            name(child)=="mn" || 
child2.13k
.attribute(MAYBE_CHEMISTRY).
is_some2.13k
()}) &&
198
574
                            
!following.is_empty()273
&&
following.iter()246
.
all246
(|&child| {
199
574
                                let child = as_element(child);
200
574
                                name(child)=="mo" || 
name(child)=="mn"437
||
child351
.attribute(MAYBE_CHEMISTRY).
is_some351
()
201
574
                            })) {
202
146
                        // "=", etc., should be treated as high priority separators
203
146
                        // debug!("clean_mrow_children_restructure: child = {}", mml_to_string(child));
204
146
                        child.set_attribute_value(CHEMICAL_BOND, "true");
205
146
                        child.set_attribute_value(CHEM_FORMULA_OPERATOR, &likely_chemistry_op.to_string());
206
146
                        child.set_attribute_value(MAYBE_CHEMISTRY, &likely_chemistry_op.to_string());
207
2.83k
                    }
208
6.52k
                } else {
209
6.52k
                    likely_chem_equation_operator(child);   // need to mark MAYBE_CHEMISTRY for CHEMICAL_BOND tests
210
6.52k
                }
211
20.7k
            } else if child_name == "mrow" &&
212
2.05k
                      let Some(
latex_value1
) = child.attribute_value("data-latex") &&
213
1
                      latex_value == r"\mathrel{\longrightleftharpoons}" {
214
0
                child.set_attribute_value("data-unicode", "\u{1f8d2}");
215
0
                child.set_attribute_value(MAYBE_CHEMISTRY, "2");    // same as is_hack_for_missing_arrows()
216
20.7k
            }
217
30.2k
            i += 1;
218
30.2k
            new_children.push(child);
219
        }
220
    }
221
222
10.1k
    return if changed {
Some(new_children)246
} else {
None9.93k
};
223
    
224
225
    /// if it looks like we have ChemFormula ( a q ), merge the 'a' and 'q' together into an 'mi'
226
    /// if not already true, structure '( aq )' into a single mrow (might be other elements on either side)
227
    /// returns the last char matched
228
30.6k
    fn clean_aq_state<'a>(children: &[Element<'a>], i: usize) -> Option<Element<'a>> {
229
30.6k
        if i+3 >= children.len() || (
i > 010.8k
&&
children[i-1]9.38k
.attribute(MAYBE_CHEMISTRY).
is_none9.38k
()) {
230
27.8k
            return None;       // can't be '( a q )' -- not enough elements left or not Chem Formula on left
231
2.79k
        }
232
        
233
        // this is a little sloppy in that we allow matching text in any leaf element, but we can use the same function
234
2.79k
        if is_text(children[i], "(") &&
235
244
           is_text(children[i+1], "a") && 
is_text9
(
children[i+2]9
,
"q"9
) &&
236
1
           is_text(children[i+3], ")") {
237
1
            let mi = create_mathml_element(&children[i].document(), "mi");
238
1
            mi.set_text("aq");
239
1
            return make_mrow([children[i], mi, children[i+3]]);
240
2.79k
        }
241
2.79k
        return None;
242
30.6k
    }
243
244
12.3k
    fn is_text(node: Element, target: &str) -> bool {
245
12.3k
        return is_leaf(node) && 
as_text(node) == target11.1k
;
246
12.3k
    }
247
248
    /// Converts  "( child )" to mrow with those elements as children.
249
    /// This is to make ascertaining whether this is a chemical state easier, but it is correct even if not a chemical state.
250
8.68k
    fn make_mrow(children: [Element; 3]) -> Option<Element> {
251
        // this is a little sloppy in that we allow matching text in any leaf element, but we can use the same function
252
8.68k
        if is_text(children[0], "(") &&
253
631
           is_text(children[2], ")") {
254
378
      let mrow = create_mathml_element(&children[0].document(), "mrow");
255
378
      mrow.set_attribute_value(CHANGED_ATTR, ADDED_ATTR_VALUE);
256
378
      mrow.append_children(children);
257
378
            return Some(mrow);
258
8.31k
        }
259
8.31k
        return None;
260
8.68k
    }
261
10.1k
}
262
263
/// Pass 2: (assuming something was marked in pass 1)
264
/// a) find the first marked child and then the last consecutive marked child and trim any mo's from the ends
265
/// b) evaluate the likelihood that the sequence is chemistry
266
10.1k
fn clean_mrow_children_mark_pass(children: &[Element]) {
267
10.1k
    let mut start = None;
268
30.6k
    for i in 
0..children.len()10.1k
{
269
30.6k
        let child = children[i];
270
30.6k
        if child.attribute(MAYBE_CHEMISTRY).is_some()  {
271
4.64k
            if start.is_none() {
272
3.63k
                if name(child) == "mo" {
273
2.38k
                    // debug!(" start.is_none(): removing MAYBE_CHEMISTRY on {}", as_text(child));
274
2.38k
                    child.remove_attribute(MAYBE_CHEMISTRY);
275
2.38k
                    child.remove_attribute(CHEM_FORMULA_OPERATOR);
276
2.38k
                    child.remove_attribute(CHEM_EQUATION_OPERATOR);
277
2.38k
                    child.remove_attribute(CHEMICAL_BOND);
278
2.38k
                } else {
279
1.25k
                    start = Some(i);
280
1.25k
                }
281
1.00k
            }
282
25.9k
        } else if let Some(
seq_start804
) = start &&
283
804
                  remove_operators_at_end_of_sequence(children, seq_start, i) {
284
804
            start = None;
285
25.1k
        }
286
    }
287
288
10.1k
    if let Some(
seq_start452
) = start {
289
452
        remove_operators_at_end_of_sequence(children, seq_start, children.len());
290
9.73k
    }
291
10.1k
    return;
292
293
294
1.25k
    fn remove_operators_at_end_of_sequence(children: &[Element], start: usize, end: usize) -> bool {
295
        // debug!("  looking for ops at end of {}..{}, last is:{}", start, end, mml_to_string(children[end-1]));
296
1.45k
        for stop in (
start..end1.25k
).
rev1.25k
() {
297
1.45k
            let end_child = children[stop];
298
1.45k
            if name(end_child) == "mo" {
299
202
                end_child.remove_attribute(MAYBE_CHEMISTRY);
300
202
            } else {
301
1.25k
                return true;
302
            }
303
        }
304
0
        return false
305
1.25k
}
306
10.1k
}
307
308
309
/// Very little software gets the token elements for chemistry right.
310
/// Sometimes multiple elements are in a single token (e.g. "NaCl") and sometimes
311
/// a single element is spread across multiple tokens (e.g. "N", "a").
312
/// 
313
/// Here we attempt one or the other repair, but not both on the assumption there is 
314
/// consistency in the error.
315
/// 
316
/// Returns a Vec of the chemical elements or None. If a merge happened, the tree is altered.
317
12.3k
pub fn convert_leaves_to_chem_elements(mathml: Element) -> Option<Vec<Element>> {
318
    // gather up all the consecutive mi/mtext
319
12.3k
    if !(name(mathml) == "mi" || 
name(mathml) == "mtext"942
) {
320
0
        return None;       // do nothing
321
12.3k
    }
322
323
    // we play games with the string to avoid allocation...
324
12.3k
    let token_string = as_text(mathml);
325
12.3k
    if !token_string.is_ascii() {
326
2.67k
        return None;    // chemical elements are ASCII
327
9.62k
    }
328
9.62k
    let doc = mathml.document();
329
9.62k
    if token_string.len() > 1 {   // safe because all chars are ASCII
330
2.54k
        return split_string_chem_element(&doc, mathml);
331
7.08k
    }   
332
7.08k
    let parent = get_parent(mathml);
333
7.08k
    let parent_name = name(parent);
334
7.08k
    if !(parent_name == "mrow" || 
parent_name == "math"4.28k
) { // not canonicalized yet
335
2.57k
        return None;    // only try to merge if in an mrow
336
4.50k
    }
337
4.50k
    let answer = merge_tokens_chem_element(&doc, mathml, &mathml.following_siblings());
338
4.50k
    return answer;
339
340
341
4.50k
    fn merge_tokens_chem_element<'a>(doc: &Document<'a>, leaf: Element<'a>, following_siblings: &[ChildOfElement<'a>]) -> Option<Vec<Element<'a>>> {
342
4.50k
        if following_siblings.is_empty() {
343
1.22k
            return None;
344
3.28k
        }
345
3.28k
        let second_element = as_element(following_siblings[0]);
346
3.28k
        let second_element_name = name(second_element);
347
3.28k
        if second_element_name != "mi" && 
second_element_name != "mtext"3.05k
{
348
3.02k
            return None;
349
256
        }
350
256
        let second_element_text = as_text(second_element);
351
256
        if second_element_text.len() != 1 {
352
57
            return None;
353
199
        }
354
199
        let token_string = as_text(leaf);
355
199
        let chem_token_string = vec![token_string.as_bytes()[0], second_element_text.as_bytes()[0]];
356
199
        if let Some(
chem_element4
) = get_chem_element(doc, &chem_token_string, 2) {
357
4
            chem_element.set_text(as_text(chem_element));
358
4
            chem_element.set_attribute_value(MAYBE_CHEMISTRY, chem_element.attribute_value(MAYBE_CHEMISTRY).unwrap());
359
4
            chem_element.set_attribute_value(MERGED_TOKEN, "true");
360
4
            second_element.remove_from_parent();
361
4
            return Some(vec![chem_element]);
362
195
        }
363
195
        return None;
364
4.50k
    }
365
366
    /// split the string which has been checked to be all ASCII chars
367
2.54k
    fn split_string_chem_element<'a>(doc: &Document<'a>, leaf: Element<'a>) -> Option<Vec<Element<'a>>> {
368
2.54k
        let token_string = as_text(leaf).as_bytes();
369
2.54k
        let token_len = token_string.len();
370
2.54k
        let mut j = 0;
371
2.54k
        let mut new_children = Vec::with_capacity(token_string.len());
372
3.31k
        while j < token_len {
373
            // try elements of length 2 and 1, preferring longer elements (e.g., prefer "Na" over "N")
374
2.94k
            if let Some(
chem_element310
) = get_chem_element(doc, &token_string[j..], 2) {
375
310
                new_children.push(chem_element);
376
310
                j += 2;
377
310
                continue;
378
2.63k
            } else if let Some(
chem_element457
) = get_chem_element(doc, &token_string[j..], 1) {
379
457
                new_children.push(chem_element);
380
457
                j += 1;
381
457
                continue;
382
2.18k
            }
383
2.18k
            return None;    // didn't find a valid chem element
384
        }
385
362
        if new_children.len() <= 1 {
386
231
            return None;
387
131
        }
388
131
        add_attrs(new_children[new_children.len()-1], &leaf.attributes());
389
131
        new_children[new_children.len()-1].set_attribute_value(SPLIT_TOKEN, "true");
390
        // debug!("split_string_chem_element: {} -> {}", String::from_utf8(token_string.to_vec()).unwrap(), new_children.len());
391
131
        return Some(new_children);
392
2.54k
    }
393
394
    /// Returns element or None
395
5.78k
    fn get_chem_element<'a>(doc: &Document<'a>, bytes_str: &[u8], n: usize) -> Option<Element<'a>> {
396
        use std::str;
397
5.78k
        let len = bytes_str.len();
398
5.78k
        if n > len {
399
277
            return None;    // can't be an chemical letter
400
5.50k
        }
401
5.50k
        match str::from_utf8(&bytes_str[..n]) {
402
5.50k
            Ok(chem_element) => {
403
5.50k
                if CHEMICAL_ELEMENT_ELECTRONEGATIVITY.contains_key(chem_element) {
404
771
                    return Some(new_chemical_element(doc, chem_element));
405
4.73k
                }
406
4.73k
                return None;
407
            }
408
0
            Err(_) => return None,
409
        }
410
5.78k
    }
411
412
771
    fn new_chemical_element<'a>(doc: &Document<'a>, chem_element_str: &str) -> Element<'a> {
413
771
        let result = create_mathml_element(doc, "mi");
414
771
        result.set_text(chem_element_str);
415
771
        result.set_attribute_value(MAYBE_CHEMISTRY, if chem_element_str.len() == 1 {
"1"457
} else {
"3"314
});
416
771
        if chem_element_str.len() == 1 {
417
457
            result.set_attribute_value("mathvariant", "normal");
418
457
        
}314
419
771
        return result;
420
771
    }
421
12.3k
}
422
423
/// Looks at the children of the element and uses heuristics to decide whether this is a chemical equation/formula
424
/// If it is, it is marked with either data-chem-equation or data-chem-formula
425
/// This function assumes proper structure
426
/// 
427
/// Returns true if not chemistry -- added attrs, mrows, and leaves are removed in preparation for a second parse
428
5.05k
pub fn scan_and_mark_chemistry(mathml: Element) -> bool {
429
5.05k
    if is_chemistry_off(mathml) {
430
0
        return true;
431
5.05k
    }
432
433
5.05k
    let child = as_element(mathml.children()[0]);
434
    // debug!("scan_and_mark_chemistry:\n{}", mml_to_string(child));
435
5.05k
    assert_eq!(name(mathml), "math");
436
5.05k
    let is_chemistry = if let Some(
latex5
) = mathml.attribute_value("data-latex") {
437
        // MathJax v4 includes this really useful info -- if it starts \ce -- we have Chemistry
438
        // need to determine if it is an equation or a formula
439
5
        latex.trim_start().starts_with(r"\ce") 
440
    } else {
441
5.05k
        has_chem_intent(mathml, ":chemical-formula") || has_chem_intent(mathml, ":chemical-equation")
442
    };
443
444
5.05k
    if is_chemistry || 
is_chemistry_sanity_check5.05k
(
mathml5.05k
) {
445
669
        assert_eq!(mathml.children().len(), 1);
446
669
        let likelihood = likely_chem_formula(child);
447
669
        if likelihood >= CHEMISTRY_THRESHOLD || 
has_chem_intent458
(
mathml458
,
":chemical-formula"458
) {
448
211
            child.set_attribute_value(MAYBE_CHEMISTRY, std::cmp::max(CHEMISTRY_THRESHOLD, likelihood).to_string().as_str());
449
211
            set_marked_chemistry_attr(child, CHEM_FORMULA);
450
458
        }
451
452
669
        if child.attribute(CHEM_FORMULA).is_none() {
453
            // can't be both an equation and a formula...
454
458
            let likelihood = likely_chem_equation(child);
455
458
            if is_chemistry || 
likelihood >= CHEMISTRY_THRESHOLD455
||
has_chem_intent422
(
mathml422
,
":chemical-equation"422
) {
456
36
                child.set_attribute_value(MAYBE_CHEMISTRY, std::cmp::max(CHEMISTRY_THRESHOLD, likelihood).to_string().as_str());
457
36
                set_marked_chemistry_attr(child, CHEM_EQUATION);
458
422
            }
459
211
        }
460
4.38k
    }
461
    // debug!("...after marking:\n{}", mml_to_string(child));
462
463
5.05k
    if child.attribute(CHEM_FORMULA).is_none() && 
child4.84k
.attribute(CHEM_EQUATION).
is_none4.84k
() {
464
4.80k
        if !has_maybe_chemistry(mathml) {
465
3.68k
            return true;    // quick check avoids needing a second parse due to removing added elements
466
1.12k
        }
467
1.12k
        return !is_changed_after_unmarking_chemistry(mathml);
468
    } else {
469
247
        return true;
470
    }
471
5.05k
}
472
473
// returns the marked attr value or None
474
16.2k
fn get_marked_value(mathml: Element) -> Option<i32> {
475
16.2k
    return mathml.attribute_value(MAYBE_CHEMISTRY).map(|value| 
value3.11k
.
parse3.11k
().
unwrap3.11k
());
476
16.2k
}
477
478
/// Sets the attr 'chem'
479
/// Recurse through all the children that have MAYBE_CHEMISTRY set
480
4.24k
fn set_marked_chemistry_attr(mathml: Element, chem: &str) {
481
4.24k
    let tag_name = name(mathml);
482
4.24k
    if let Some(
maybe_attr2.88k
) = mathml.attribute(MAYBE_CHEMISTRY) {
483
2.88k
        maybe_attr.remove_from_parent();
484
485
2.88k
        match tag_name {
486
2.88k
            "mi" | 
"mtext"2.09k
=>
{852
mathml852
.
set_attribute_value852
(
CHEM_ELEMENT852
, maybe_attr.value());},
487
2.03k
            "mo" => {
488
686
                if mathml.attribute(CHEM_FORMULA_OPERATOR).is_none() && 
mathml589
.attribute(CHEM_EQUATION_OPERATOR).
is_none589
(){
489
                    // don't mark as both formula and equation
490
433
                    mathml.set_attribute_value(if chem == CHEM_FORMULA {
CHEM_FORMULA_OPERATOR216
} else {
CHEM_EQUATION_OPERATOR217
}, maybe_attr.value());
491
253
                }
492
            },
493
1.35k
            "mn" => 
()87
,
494
1.26k
            "mrow" | 
"msub"515
|
"msup"275
|
"msubsup"216
|
"mmultiscripts"213
=> {
495
1.25k
                let mut chem_name = chem;
496
1.25k
                if tag_name != "mrow" && 
chem != CHEM_FORMULA505
{
497
                    // look at base -- if an mi/mtext then this is really a chemical formula
498
69
                    let base = as_element(mathml.children()[0]);
499
69
                    let base_name = name(base);
500
69
                    if base_name == "mi" || 
base_name == "mtext"8
{
501
63
                        chem_name = CHEM_FORMULA;
502
63
                    
}6
503
1.18k
                }
504
505
1.25k
                if mathml.attribute(CHEM_FORMULA).is_none() {
506
1.23k
                    // don't mark as both formula and equation
507
1.23k
                    mathml.set_attribute_value(chem_name, maybe_attr.value());
508
1.23k
                
}18
509
3.92k
                for child in 
mathml1.25k
.
children1.25k
() {
510
3.92k
                    set_marked_chemistry_attr(as_element(child), chem);
511
3.92k
                };
512
            }
513
10
            "mfrac" => {
514
0
                let children = mathml.children();
515
                // debug!("mfrac children: {}", mml_to_string(mathml));
516
0
                let numerator_is_chem_equation = IsBracketed::is_bracketed(as_element(children[0]), "[", "]", false, true);
517
0
                let denominator_is_chem_equation = IsBracketed::is_bracketed(as_element(children[1]), "[", "]", false, true);
518
0
                if  numerator_is_chem_equation && denominator_is_chem_equation {
519
0
                    mathml.set_attribute_value(CHEM_EQUATION, "true");
520
0
                }
521
            }
522
10
            _ => error!("Internal error: {tag_name} should not be marked as 'MAYBE_CHEMISTRY'"),
523
        }
524
1.35k
    } else if tag_name == "mrow" {
525
        // could have been added during canonicalization, so never marked. Recurse to the children
526
68
        for child in 
mathml33
.
children33
() {
527
68
            set_marked_chemistry_attr(as_element(child), chem);
528
68
        };
529
1.32k
    }
530
4.24k
}
531
532
/// returns true if MAYBE_CHEMISTRY's occur within the element
533
41.3k
fn has_maybe_chemistry(mathml: Element) -> bool {
534
41.3k
    if mathml.attribute(MAYBE_CHEMISTRY).is_some() {
535
1.12k
        return true;
536
40.2k
    }
537
40.2k
    if !is_leaf(mathml) {
538
36.5k
        for child in 
mathml17.9k
.
children17.9k
() {
539
36.5k
            if has_maybe_chemistry(as_element(child)) {
540
3.15k
                return true;
541
33.3k
            }
542
        }
543
22.2k
    }
544
37.0k
    return false;
545
41.3k
}
546
547
/// Clears MAYBE_CHEMISTRY from this element and its decedents
548
/// Also deletes added mrows and leaves; returns true if anything is deleted
549
19.7k
fn is_changed_after_unmarking_chemistry(mathml: Element) -> bool {
550
19.7k
    mathml.remove_attribute(MAYBE_CHEMISTRY);
551
19.7k
    if is_leaf(mathml) {
552
        // don't bother testing for the attr -- just remove and nothing bad happens if they aren't there
553
13.3k
        mathml.remove_attribute(CHEM_FORMULA_OPERATOR);
554
13.3k
        mathml.remove_attribute(CHEM_EQUATION_OPERATOR);
555
13.3k
        mathml.remove_attribute(CHEMICAL_BOND);
556
13.3k
        if mathml.attribute(MERGED_TOKEN).is_some() {
557
3
            unmerge_element(mathml);
558
3
            return true;    // need to re-parse
559
13.3k
        } else if mathml.attribute(SPLIT_TOKEN).is_some() {
560
33
            if let Err(
err0
) = merge_element(mathml) {
561
0
                panic!("{}", err);
562
33
            }
563
            // debug!("After merge_element:{}", mml_to_string(mathml));
564
            // let parent = get_parent(mathml);
565
            // debug!("After merge_element: -- parent{}", mml_to_string(parent));
566
567
13.3k
        } else if let Some(
changed_value2.14k
) = mathml.attribute_value(CHANGED_ATTR) &&
568
2.14k
                  changed_value == ADDED_ATTR_VALUE &&
569
2.11k
                  name(mathml) != "mtext" {  // a hack fix for #477 (chem never modifies mtext, so this is ok)
570
2.11k
            mathml.remove_from_parent();
571
2.11k
            return true;
572
11.1k
        }
573
11.2k
        return false;
574
6.38k
    } else if IsNode::is_scripted(mathml) &&
575
1.04k
              name(as_element(mathml.children()[0])) == "mi" &&
576
575
              as_element(mathml.children()[0]).attribute(SPLIT_TOKEN).is_some() {
577
        // Undo a split that happened in a scripted element.
578
        // We put the preceding elements into the base and call merge_element on the last element of the base
579
        // The first and/or the last child in the sequence could be a script that needs to be unwrapped
580
1
        let mut parent = get_parent(mathml);   // there is always a "math" node
581
        // debug!("mathml:\n{}", mml_to_string(mathml));
582
        // debug!("parent before merge:\n{}", mml_to_string(parent));
583
        // debug!("grandparent before merge:\n{}", mml_to_string(get_parent(parent)));
584
585
1
        let mut preceding_children = mathml.preceding_siblings();
586
        // could be no preceding children to canonicalization creating mrows (see issue #303), so might need to use parent, etc
587
2
        while preceding_children.is_empty() {
588
1
            preceding_children = parent.preceding_siblings();
589
1
            if name(parent) == "math" {
590
0
                break;  // consider {SIN}^{-1} -- no preceding child
591
1
            }
592
1
            parent = get_parent(parent);
593
        }
594
595
1
        let mut new_script_children = vec![];
596
1
        if !preceding_children.is_empty() {
597
            // deal with the first element (if it needs unwrapping, it has only prescripts)
598
1
            let first_element_of_split = as_element(preceding_children[preceding_children.len()-1]);
599
            // debug!("first_element_of_split: \n{}", mml_to_string(first_element_of_split));
600
1
            if name(first_element_of_split) == "mmultiscripts" {
601
                // take the base and make it the first child of preceding_children (what will get merged)
602
                // put the rest of the elements (the prescripts) at the end of the parent last element (mathml) which must be an mmultiscripts
603
0
                let first_element_children = first_element_of_split.children();
604
0
                assert_eq!(name(mathml), "mmultiscripts");
605
0
                let mut script_children = mathml.children();
606
0
                assert_eq!(name(as_element(script_children[0])), "mi");
607
0
                assert!(!script_children.len().is_multiple_of(2));  // doesn't have <mprescripts/>
608
0
                script_children.push(first_element_children[1]);    // mprescripts
609
0
                script_children.push(first_element_children[2]);    // prescripts subscript
610
0
                script_children.push(first_element_children[3]);    // prescripts superscript
611
612
0
                let base_of_first_element = first_element_children[0];  // base
613
0
                assert_eq!(name(as_element(base_of_first_element)), "mi");
614
0
                let script_base = as_element(script_children[0]);
615
0
                let mut merged_base_text = as_text( as_element(base_of_first_element)).to_string();
616
0
                merged_base_text.push_str(as_text(script_base));
617
0
                script_base.set_text(&merged_base_text);
618
0
                script_base.remove_attribute("mathvariant");
619
0
                script_base.remove_attribute(ADDED_ATTR_VALUE);
620
0
                script_base.remove_attribute(MAYBE_CHEMISTRY);
621
0
                script_base.remove_attribute(SPLIT_TOKEN);
622
0
                mathml.replace_children(script_children);
623
        
624
0
                first_element_of_split.remove_from_parent();
625
0
                return true;
626
1
            }
627
1
            new_script_children.push(ChildOfElement::Element(first_element_of_split));
628
0
        }
629
1
        debug!("mathml after handling preceding children:\n{}", 
mml_to_string0
(
mathml0
));
630
1
        let mut children_of_script = mathml.children();
631
1
        let split_child = as_element(children_of_script[0]);
632
1
        new_script_children.append(&mut children_of_script);
633
1
        mathml.replace_children(new_script_children);     // temporarily has bad number of children 
634
        // debug!("After making bad script:\n{}", mml_to_string(mathml));
635
1
        if let Err(
err0
) = merge_element(split_child) {
636
0
            panic!("{}", err);
637
1
        }
638
1
        return true;
639
    } else {
640
6.37k
        let mut answer = false;
641
18.5k
        for child in 
mathml6.37k
.
children6.37k
() {
642
18.5k
            let child = as_element(child);
643
18.5k
            if name(child) == "mtd" && 
child77
.attribute(MAYBE_CHEMISTRY).
is_some77
() {
644
2
                answer = true;  // each mtd acts as a potential island for chemistry, so don't clear it
645
18.5k
            } else {
646
18.5k
                answer |= is_changed_after_unmarking_chemistry(child);
647
18.5k
            }
648
        }
649
6.37k
        if name(mathml) == "mrow" {
650
3.58k
            if let Some(
changed_value2.86k
) = mathml.attribute_value(CHANGED_ATTR) {
651
                // we added an mrow, we can remove it -- but this might be already processed which is the case if "data-id-added" is true (exists)
652
2.86k
                if changed_value == ADDED_ATTR_VALUE && mathml.attribute("data-id-added").is_none() {
653
                    // mrows get added for several reasons. One of them is to canonicalize elements like msqrt that can have 1 or more children;
654
                    //   those should not get removed because the re-parse doesn't add those
655
                    // Although they would never be added, elements with fixed number of children also shouldn't have the mrow go away
656
                    // We are left with only removing mrows with one child or mrows that are children of mrows (simpler test than ELEMENTS_WITH_ONE_CHILD)
657
2.86k
                    let parent = get_parent(mathml);   // mathml is mrow, so parent always exists
658
2.86k
                    if mathml.children().len() == 1 || 
name(parent) == "mrow"2.84k
{
659
6.26k
                        let 
children2.31k
=
mathml.children().iter()2.31k
.
map2.31k
(|&el| as_element(el)).
collect2.31k
::<Vec<Element>>();
660
2.31k
                        mathml.remove_attribute(CHANGED_ATTR);  // if just one child, the attrs are pushed onto the child
661
                        // debug!("is_changed_after_unmarking: before replace - parent\n{}", mml_to_string(parent));
662
2.31k
                        replace_children(mathml, children);
663
                        // debug!("is_changed_after_unmarking: parent\n{}", mml_to_string(parent));
664
665
557
                    }
666
0
                }
667
720
            }
668
3.58k
            return true;
669
2.79k
        }
670
2.79k
        return answer;
671
    }
672
673
3
    fn unmerge_element(mathml: Element) {
674
        // a merged token occurs when two single letters get merged into one. Here we recreate the two tokens
675
3
        assert!(is_leaf(mathml));
676
        // debug!("unmerge_element: {}", mml_to_string(mathml));
677
3
        let mut token_str = as_text(mathml).chars();
678
3
        let first = create_mathml_element(&mathml.document(), name(mathml));
679
3
        first.set_text(&token_str.next().unwrap().to_string());
680
3
        let second = create_mathml_element(&mathml.document(), name(mathml));
681
3
        second.set_text(&token_str.next().unwrap().to_string());
682
3
        replace_children(mathml, vec![first, second]);
683
3
    }
684
685
    /// Put the split pieces back together (undo the split)
686
34
    fn merge_element(mathml: Element) -> Result<()> {
687
        // debug!("merge_element: {}", mml_to_string(mathml));
688
        // debug!("merge_element parent: {}", mml_to_string(get_parent(mathml)));
689
34
        assert!(is_leaf(mathml));
690
34
        let mut preceding_children = mathml.preceding_siblings();
691
        // debug!("preceding_children: {}", preceding_children.iter().map(|&el| name(as_element(el)).to_string()).collect::<Vec<String>>().join(", "));
692
34
        if preceding_children.is_empty() {
693
            // handle:
694
            // * case where we have mi mmultiscripts mi ... where the second mi needs to join with the first (see test mhchem_so4)
695
            // * case where the child got buried in an added mrow (can only happen one level deep because invisible times should get inserted)
696
0
            let parent = get_parent(mathml);   // mathml is leaf, so parent always exists
697
0
            preceding_children = parent.preceding_siblings();
698
0
            if preceding_children.is_empty() ||
699
0
               !(name(parent) == "mmultiscripts" ||
700
0
                (name(parent) == "mrow" && parent.attribute_value(CHANGED_ATTR).is_some() &&
701
0
                 parent.attribute_value(CHANGED_ATTR).unwrap() == ADDED_ATTR_VALUE)) {
702
0
                    bail!("Internal error: {} should not have been split'", mml_to_string(mathml));
703
0
            }
704
34
        }
705
        // Note: there was an invisible U+2063, but it was removed before we got here
706
        // The parent mrow could have many children that couldn't have been part of a split -- only consider feasible children to split (mi/mtext)
707
        // To figure this out, we walk backwards adding the text in reverse and then reverse that text in the end
708
34
        let mut merged_text = Vec::default();
709
46
        for &child in 
preceding_children.iter()34
.
rev34
() {
710
46
            let child = as_element(child);
711
            // because this is before canonicalization, there could be an mrow with just mi/mtext
712
46
            if name(child) == "mrow" && 
child.children().len() == 10
&&
child.attribute(INTENT_ATTR)0
.
is_none0
() {
713
0
                // "lift" the child up so all the links (e.g., siblings) are correct
714
0
                let child = as_element(child.children()[0]);
715
0
                set_mathml_name(child, name(child));
716
0
                crate::canonicalize::add_attrs(child, &child.attributes());
717
0
                child.replace_children(child.children());
718
46
            }
719
46
            if name(child) != "mi" && 
name(child) != "mtext"12
{
720
12
                break;
721
34
            }
722
34
            merged_text.push(as_text(child));
723
34
            child.remove_from_parent();
724
        }
725
34
        merged_text.reverse();
726
34
        let mut merged_text = merged_text.join("");
727
34
        merged_text.push_str(as_text(mathml));
728
34
        mathml.set_text(&merged_text);
729
34
        mathml.remove_attribute("mathvariant");
730
34
        mathml.remove_attribute(ADDED_ATTR_VALUE);
731
34
        mathml.remove_attribute(MAYBE_CHEMISTRY);
732
34
        mathml.remove_attribute(SPLIT_TOKEN);
733
34
        return Ok( () );
734
34
    }
735
19.7k
}
736
737
/// Returns true only if 'mathml' potentially is chemistry.
738
/// This assumes canonicalization has happened and that 'mathml' is the 'math' element
739
5.05k
fn is_chemistry_sanity_check(mathml: Element) -> bool {
740
    // This does some sanity checking. More can definitely be done
741
    // Checks:
742
    // * there should be chemical elements
743
    // * if the child is an mrow with three children, the operator should be '=' (not CHEMICAL_BOND) or  an arrow
744
    //   in this case, we gather up the elements on the lhs and rhs. The sets should be equal and non-empty.
745
    //   the exception is if there are prescripts, in which as we might have radioactive decay so we don't require the sets to be equal
746
    // * otherwise, we gather up all the chemical elements and make sure the set is non-empty
747
    // * if it isn't an mrow, we leave it to likely_chem_equation() to rule it out
748
5.05k
    assert_eq!(name(mathml), "math");
749
5.05k
    assert_eq!(mathml.children().len(), 1);
750
5.05k
    let mathml = as_element(mathml.children()[0]);
751
5.05k
    if name(mathml) == "mrow" {
752
3.29k
        let mrow_children = mathml.children();
753
3.29k
        if mrow_children.len() == 3 && 
is_arrow_or_equal2.52k
(
as_element2.52k
(
mrow_children[1]2.52k
)) {
754
371
            let mut lhs_elements = HashSet::with_capacity(8);   // likely more than anything we'll encounter -- bigger affects '=' op
755
371
            let lhs_has_prescripts = gather_chemical_elements(as_element(mrow_children[0]), &mut lhs_elements);
756
            // need to include the arrow as it might have the addition of some chemical elements (see UEB/iceb.rs/chem_16_5_2)
757
371
            gather_chemical_elements(as_element(mrow_children[1]), &mut lhs_elements);
758
371
            let mut rhs_elements = HashSet::with_capacity(8);  // likely more than anything we'll encounter -- bigger affects '=' op
759
371
            let rhs_has_prescripts = gather_chemical_elements(as_element(mrow_children[2]), &mut rhs_elements);
760
371
            if lhs_elements.is_empty() {
761
269
                return false;
762
102
            }
763
            // debug!("lhs/rhs elements: {:?}, {:?}", lhs_elements, rhs_elements);
764
            // debug!("lhs/rhs has prescripts: {}, {}", lhs_has_prescripts, rhs_has_prescripts);
765
102
            if lhs_elements == rhs_elements {
766
37
                return !(lhs_has_prescripts ^ rhs_has_prescripts);      // seems reasonable that if the lhs has prescripts, so should the rhs
767
65
            }
768
65
            return lhs_has_prescripts && 
rhs_has_prescripts32
; // non-equal sets only if radioactive decay.
769
2.92k
        }
770
1.76k
    }
771
4.68k
    let mut chem_elements = HashSet::with_capacity(8);   // likely more than anything we'll encounter -- bigger affects '=' op
772
4.68k
    gather_chemical_elements(mathml, &mut chem_elements);
773
4.68k
    return !chem_elements.is_empty();
774
775
    
776
2.52k
    fn is_arrow_or_equal(mathml: Element) -> bool {
777
2.52k
        let base = get_possible_embellished_node(mathml);
778
2.52k
        if name(base) != "mo" || 
mathml.attribute(CHEMICAL_BOND)1.98k
.
is_some1.98k
() {
779
542
            return false;
780
1.98k
        }
781
1.98k
        let text = as_text(base);
782
1.98k
        return text == "=" || 
is_single_char_matching1.67k
(
text1.67k
, is_chem_equation_arrow);
783
784
2.52k
    }
785
786
    /// Gather up all the chemical elements in the element and return true if it has numerical prescripts
787
48.3k
    fn gather_chemical_elements<'a>(mathml: Element<'a>, chem_elements: &mut HashSet<&'a str>) -> bool {
788
48.3k
        match name(mathml) {
789
48.3k
            "mi" | 
"mtext"37.7k
=> {
790
10.8k
                if is_chemical_element(mathml) {
791
1.60k
                    chem_elements.insert(as_text(mathml));
792
9.27k
                }
793
10.8k
                return false;
794
            },
795
37.4k
            "msub" | 
"msup"36.7k
|
"msubsup"35.6k
|
"mmultiscripts"35.5k
=> {
796
2.16k
                gather_chemical_elements(get_possible_embellished_node(mathml), chem_elements);
797
2.16k
                return name(mathml) == "mmultiscripts" &&  
has_numerical_prescripts291
(
mathml291
);
798
            },
799
35.2k
            "semantics" => {
800
0
                return gather_chemical_elements( get_presentation_element(mathml).1, chem_elements );
801
            },
802
35.2k
           _ => if is_leaf(mathml) { return 
false21.5k
;
}13.7k
,
803
        }
804
    
805
        // mrow, msqrt, etc
806
13.7k
        let mut has_prescripts = false;
807
40.3k
        for child in 
mathml13.7k
.
children13.7k
() {
808
40.3k
            let child = as_element(child);
809
40.3k
            has_prescripts |= gather_chemical_elements(child, chem_elements);
810
40.3k
        }
811
13.7k
        return has_prescripts;
812
48.3k
    }
813
814
        /// find the mprescripts child and then check the following siblings for numerical prescripts
815
291
    fn has_numerical_prescripts(mathml: Element) -> bool {
816
291
        let children = mathml.children();
817
        // quick check to see if there is an mprescripts child
818
291
        if !children.len().is_multiple_of(2) { // <mprescripts/> => even number of children
819
129
            return false;
820
162
        }
821
        // we need enumerate because the "step_by" will cause any returned iterator to jump ahead by 2
822
162
        let i_mprescripts = children.iter()
823
162
            .enumerate()
824
162
            .skip(1)
825
162
            .step_by(2)
826
222
            .
find162
(|(_, child)| name(as_element(**child)) == "mprescripts")
827
162
            .map(|(i, _)| i);
828
829
162
        if let Some(i) = i_mprescripts {
830
162
            let subscript = as_element(children[i+1]);  // can be +1/-1 for beta decay
831
162
            let superscript = as_element(children[i+2]);  // mass number, so always >= 0
832
162
            if name(superscript) != "mn" {
833
55
                return false;
834
107
            }
835
107
            return name(subscript) == "mn" ||
836
36
                   (name(subscript) == "mrow" && 
subscript.children().len() == 331
&&
837
0
                    name(as_element(subscript.children()[3])) == "mm" && 
838
0
                    name(as_element(subscript.children()[1])) == "mo" &&
839
0
                    matches!(as_text(as_element(subscript.children()[1])), "+" | "-"));
840
0
        }
841
0
        return false;
842
291
    }
843
5.05k
}
844
845
/// Looks at the children of the element and uses heuristics to decide whether this is a chemical equation.
846
/// This assumes canonicalization of characters has happened
847
713
fn likely_chem_equation(mathml: Element) -> i32 {
848
    // mfrac -- could be a ratio of concentrations
849
713
    if name(mathml) != "mrow" && 
name(mathml) != "mtd"127
&&
name(mathml) != "mfrac"120
{
850
119
        return NOT_CHEMISTRY;
851
594
    }
852
853
    // debug!("start likely_chem_equation:\n{}", mml_to_string(mathml));
854
  // mrow -- check the children to see if we are likely to be a chemical equation
855
856
    // concentrations should either be unscripted or have a superscript that isn't a charge
857
    // they occur in an mrow or mfrac
858
594
    if IsBracketed::is_bracketed(mathml, "[", "]", false, true) {
859
10
        let parent_name = name(get_parent(mathml));
860
10
        if parent_name == "mfrac" || parent_name == "mrow"  || 
parent_name == "math"9
||
861
0
           (parent_name == "msup" && likely_chem_superscript(as_element(mathml.following_siblings()[0])) < 0){
862
10
            return if as_element(mathml.children()[0]).attribute(CHEM_FORMULA).is_some() {
CHEMISTRY_THRESHOLD0
} else {NOT_CHEMISTRY};
863
0
        }
864
584
    }
865
    
866
    // possible improvement -- give bonus points for consecutive (not counting invisible separators) chemical elements on top of the existing points
867
584
  let mut likelihood = 0;           // indicator of likely match
868
584
  let mut has_equilibrium_constant = false;
869
584
    let children = mathml.children();
870
1.22k
  for i in 
0..children.len()584
{
871
1.22k
    let child = as_element(children[i]);
872
        // debug!("   i={}, likelihood={}, child={}", i, likelihood, crate::canonicalize::element_summary(child));
873
1.22k
        if let Some(
likely457
) = get_marked_value(child) {
874
457
            likelihood += likely;
875
457
            continue;
876
771
        }
877
771
    if i == children.len()-1 {
878
195
            let likely = likely_chem_state(child);
879
195
            if likely > 0 {
880
0
                likelihood += likely;
881
0
                break;
882
195
      }
883
            // otherwise, check the last element as normal
884
576
        }
885
771
        let tag_name = name(child);
886
771
        let likely = match tag_name {
887
771
            "mi" => 
likely_chem_element146
(
child146
),
888
625
            "mn" => 
09
, // not much info
889
616
            "mo" | 
"mover"372
|
"munder"352
|
"munderover"308
=>
likely_chem_equation_operator330
(
child330
),
890
286
            "msub" | 
"msup"259
|
"msubsup"254
|
"mmultiscripts"252
=> {
891
38
                if is_equilibrium_constant(child) {
892
0
                    has_equilibrium_constant = true;
893
0
                    2
894
                } else {
895
38
                    likely_adorned_chem_formula(child)
896
                }
897
            },
898
248
            "mfrac" => {
899
0
                if has_equilibrium_constant {
900
0
                    2
901
                } else {
902
0
                    -3    // fraction tend only to appear after an equilibrium constant
903
                }
904
            },
905
248
            "mrow" => {
906
248
                let likely = likely_chem_formula(child);
907
248
                if likely < 0 {
908
248
                    likely_chem_equation(child)
909
                } else {
910
0
                    likely
911
                }     
912
            },
913
            // no need to check for mtr or mtd because they only exist in a table and the recursion is dealt with here.
914
0
            "mtable" => {
915
0
                for mrow in child.children() {
916
0
                    let mrow = as_element(mrow);
917
0
                    for mtd in mrow.children() {
918
0
                        let mtd = as_element(mtd);
919
0
                        let mut likely = likely_chem_formula(mtd);
920
0
                        if likely < CHEMISTRY_THRESHOLD {
921
0
                            likely = likely_chem_equation(mtd);
922
0
                        }     
923
0
                        if likely < CHEMISTRY_THRESHOLD {
924
0
                            is_changed_after_unmarking_chemistry(mtd);
925
0
                        }     
926
                    }
927
                }
928
0
                NOT_CHEMISTRY
929
            },
930
0
            "semantics" => {
931
0
                likely_chem_equation(get_presentation_element(mathml).1)
932
            },
933
0
            _ => NOT_CHEMISTRY,
934
        };
935
771
        if likely >= 0 {
936
164
            child.set_attribute_value(MAYBE_CHEMISTRY, &likely.to_string());
937
607
        }
938
771
        likelihood += likely;
939
771
        if likelihood < NOT_CHEMISTRY_THRESHOLD {
940
396
            return NOT_CHEMISTRY;
941
375
        }
942
    }
943
944
188
    if likelihood >= 0 {
945
108
        mathml.set_attribute_value(MAYBE_CHEMISTRY, &likelihood.to_string());
946
108
    
}80
947
188
    return likelihood;
948
713
}
949
950
951
/// could be a number, a state ("(l)", "(g)", etc), or a number followed by a state
952
1.19k
fn likely_chem_subscript(subscript: Element) -> i32 {
953
1.19k
    let subscript_name = name(subscript);
954
1.19k
    if  subscript_name == "mn" && 
!as_text(subscript).contains('.')676
{
955
674
        return 0;       // not really much chem info about an integer subscript
956
525
    } else if subscript_name == "mi" {
957
328
        let text = as_text(subscript);
958
328
        if text == "s" || 
text == "l"323
||
text == "g"323
||
text == "aq"323
{
959
6
            subscript.set_attribute_value(CHEM_STATE, "true");
960
6
            return 2;
961
322
        }
962
197
    } else if subscript_name == "mrow" {
963
        // debug!("likely_chem_subscript:\n{}", mml_to_string(subscript));
964
184
        let children = subscript.children();
965
184
        if children.len() == 3 && 
IsBracketed::is_bracketed71
(
subscript71
,
"("71
,
")"71
, false, true) {
966
6
            return likely_chem_subscript(as_element(children[1]));
967
178
        }
968
178
        let i_first_child = as_element(children[0]);
969
178
        if children.len() == 2 &&
970
103
           name(i_first_child) == "mn" && 
!as_text(i_first_child).contains('.')81
&&
971
81
           name(as_element(children[1])) == "mrow" &&
972
0
           likely_chem_state(as_element(children[1])) > 0 { // notation used in en.wikipedia.org/wiki/Electrolyte#Formation
973
0
                return 2;
974
178
        }     
975
13
    }
976
    // could be a variable 'n' or something else -- just not likely
977
513
    return -3
978
1.19k
}
979
980
17
fn small_roman_to_number(text: &str) -> &str {
981
    // simplest to do a look up
982
    static ROMAN_TO_NUMBER: phf::Map<&str, &str> = phf_map! {
983
        "I" => "1", "II" => "2", "III" => "3", "IV" => "4", "V" => "5", "VI" => "6", "VII" => "7", "VIII" => "8", "IX" => "9",
984
    };
985
17
    return ROMAN_TO_NUMBER.get(text).unwrap_or(&"");
986
987
17
}
988
989
1.65k
fn likely_chem_superscript(sup: Element) -> i32 {
990
    // either one or more '+'s (or '-'s) or a number followed by +/-
991
    // also could be state (en.wikipedia.org/wiki/Nuclear_chemistry#PUREX_chemistry)
992
    // bullet is radical (en.wikipedia.org/wiki/Radical_(chemistry)#Depiction_in_chemical_reactions); mhchem uses dot operator
993
    //  these can stand alone, be followed by +/- or have a number in front "(2•)-"" [examples from mhchem documentation]
994
    // roman numerals are "oxidation state" and range from -4 to +9
995
3
    static MULTIPLE_PLUS_OR_MINUS_OR_DOT: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\++$|^-+$|^\U{2212}+$|^[⋅∙•][-+\U{2212}]*$").unwrap());
996
3
    static SINGLE_PLUS_OR_MINUS_OR_DOT: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^[+-\U{2212}⋅∙•]$").unwrap());
997
    static DOTS: &[char; 3] = &['⋅', '∙', '•'];
998
1.65k
    let sup_name = name(sup);
999
1.65k
    if sup_name == "mo" && 
MULTIPLE_PLUS_OR_MINUS_OR_DOT226
.
is_match226
(as_text(sup)) {
1000
113
        if as_text(sup).find(DOTS).is_some() {
1001
7
            sup.set_attribute_value(MAYBE_CHEMISTRY, "1");
1002
7
            sup.set_attribute_value(CHEM_FORMULA_OPERATOR, "1");   // value doesn't really matter
1003
106
        }
1004
113
        return if as_text(sup).len()==1 {
198
} else {
215
};
1005
1.54k
    } else if (sup_name == "mi" || 
sup_name == "mn"1.36k
||
sup_name=="mtext"548
) &&
SMALL_UPPER_ROMAN_NUMERAL1.00k
.
is_match1.00k
(as_text(sup)){
1006
17
        sup.set_attribute_value("data-number", small_roman_to_number(as_text(sup)));
1007
17
        sup.set_attribute_value(MAYBE_CHEMISTRY, "2");
1008
17
        return 2;
1009
1.52k
    } else if sup_name == "mrow" {
1010
        // look for something like '2+'
1011
311
        let children = sup.children();
1012
311
        if children.len() == 2 {
1013
177
            let first = as_element(children[0]);
1014
177
            let second = as_element(children[1]);
1015
177
            if name(first) == "mn" && 
name(second) == "mo"79
&&
!as_text(first).contains('.')55
{
1016
55
                let second_text = as_text(second);
1017
55
                if SINGLE_PLUS_OR_MINUS_OR_DOT.is_match(second_text) {
1018
55
                    if second_text.find(DOTS).is_some() {
1019
0
                        second.set_attribute_value(MAYBE_CHEMISTRY, "2");
1020
0
                        second.set_attribute_value(CHEM_FORMULA_OPERATOR, "2");   // value doesn't really matter
1021
55
                    }
1022
55
                    sup.set_attribute_value(MAYBE_CHEMISTRY, "3");
1023
55
                    return 3;   // ending with a +/- makes it likely this is an ion
1024
0
                }
1025
122
            }
1026
134
        }
1027
        // gather up the text and see if it is all +, -, etc
1028
256
        let mut text = "".to_string();
1029
414
        for child in 
&children256
{ // 'children' used later, so need to borrow rather than move
1030
414
            let child = as_element(*child);
1031
414
            if name(child) == "mo" {
1032
169
                text.push_str(as_text(child));
1033
169
            } else {
1034
                // could have something like 'mrow(mrow 2n, -)  (chem example 5-9) -- so fallback to still ok if ends with + or -
1035
245
                let last_super_child = as_element(children[children.len()-1]);
1036
245
                if name(last_super_child) == "mo" {
1037
7
                    let text = as_text(last_super_child);
1038
7
                    if text == "+" || text == "-" {
1039
1
                        sup.set_attribute_value(MAYBE_CHEMISTRY, "3");
1040
1
                        return 3;
1041
6
                    }
1042
238
                }
1043
244
                return NOT_CHEMISTRY;
1044
            }
1045
        }
1046
11
        if MULTIPLE_PLUS_OR_MINUS_OR_DOT.is_match(&text) {
1047
13
            for child in 
children6
{
1048
13
                let child = as_element(child);
1049
13
                if name(child) == "mo" && as_text(child).find(DOTS).is_some() {
1050
0
                    child.set_attribute_value(MAYBE_CHEMISTRY, "1");
1051
0
                    child.set_attribute_value(CHEM_FORMULA_OPERATOR, "1");   // value doesn't really matter
1052
13
                }
1053
            }
1054
6
            let likely = 2*text.len() as i32;
1055
6
            sup.set_attribute_value(MAYBE_CHEMISTRY, &likely.to_string());
1056
6
            return likely;
1057
5
        }
1058
1.21k
    }
1059
1.21k
    return NOT_CHEMISTRY
1060
1.65k
}
1061
1062
1063
/// chem_formula is likely if it is one of:
1064
/// * a (possibly adorned) chemical element
1065
/// * an operator that represents a bond
1066
/// * fences around a chemical formula
1067
/// * an mrow made up of only chemical formulas
1068
15.0k
fn likely_chem_formula(mathml: Element) -> i32 {
1069
    // debug!("start likely_chem_formula:\n{}", mml_to_string(mathml));
1070
15.0k
    if let Some(
value2.65k
) = get_marked_value(mathml) {
1071
2.65k
        return value;       // already marked
1072
12.3k
    }
1073
1074
12.3k
    let tag_name = name(mathml);
1075
12.3k
    let likelihood = match tag_name {
1076
        // a parent may clear the chem flags if something says can't be chemistry (e.g, a non chemically valid script)
1077
12.3k
        "mi" => 
likely_chem_element2.01k
(
mathml2.01k
),
1078
10.3k
        "mo" => 
likely_chem_formula_operator4.48k
(
mathml4.48k
),
1079
5.90k
        "mtext" => 
044
, // definitely need to skip empty mtext, but others are probably neutral also
1080
5.85k
        "mn" => 
01.98k
, // no info
1081
3.87k
        "msub" | 
"msup"3.76k
|
"msubsup"3.70k
|
"mmultiscripts"3.69k
=> {
1082
225
            likely_chem_formula(as_element(mathml.children()[0]));  // set MAYBE_CHEMISTRY attribute
1083
225
            likely_adorned_chem_formula(mathml)
1084
        },
1085
3.64k
        "mrow" => {
1086
3.41k
            let chem_state = likely_chem_state(mathml);
1087
3.41k
            if chem_state > 0 {
1088
18
                chem_state
1089
            } else {
1090
3.39k
                likely_mrow_chem_formula(mathml)
1091
            }
1092
        },
1093
232
        "mfrac" => {
1094
73
            let children = mathml.children();
1095
73
            let num_likely = likely_chem_formula(as_element(children[0]));
1096
73
            let denom_likely = likely_chem_formula(as_element(children[1]));
1097
73
            let likely = num_likely.max(denom_likely);
1098
73
            if likely < CHEMISTRY_THRESHOLD {NOT_CHEMISTRY} else {
likely0
}
1099
        }
1100
159
        "mtd" => {
1101
5
            let mut likely = likely_chem_formula(as_element(mathml.children()[0]));
1102
5
            if likely < CHEMISTRY_THRESHOLD {
1103
4
                likely = likely_chem_equation(mathml);
1104
4
            
}1
1105
5
            likely
1106
        }
1107
154
        "mtable" => {
1108
4
            for mrow in 
mathml2
.
children2
() {
1109
4
                let mrow = as_element(mrow);
1110
5
                for mtd in 
mrow4
.
children4
() {
1111
5
                    let mtd = as_element(mtd);
1112
5
                    let mut likely = likely_chem_formula(mtd);
1113
5
                    if likely < CHEMISTRY_THRESHOLD {
1114
3
                        likely = likely_chem_equation(mtd);
1115
3
                    
}2
1116
5
                    if likely < CHEMISTRY_THRESHOLD {
1117
3
                        is_changed_after_unmarking_chemistry(mtd);
1118
3
                    
}2
1119
                }
1120
            }
1121
2
            NOT_CHEMISTRY
1122
        },
1123
152
        "semantics" => {
1124
0
            likely_chem_formula(get_presentation_element(mathml).1)
1125
        },
1126
        _ => {
1127
152
            if !is_leaf(mathml) {
1128
                // mfrac, msqrt, etc
1129
320
                for child in 
mathml152
.
children152
() {
1130
320
                    let child = as_element(child);
1131
320
                    let likelihood = likely_chem_formula(child);
1132
320
                    if  likelihood > 0 {
1133
77
                        child.set_attribute_value(MAYBE_CHEMISTRY, likelihood.to_string().as_str());
1134
243
                    };
1135
                }
1136
0
            }
1137
            // debug!("NOT_CHEMISTRY:\n{}", mml_to_string(mathml));
1138
152
            NOT_CHEMISTRY
1139
        }
1140
    };
1141
12.3k
    if likelihood >= 0 {
1142
5.09k
        mathml.set_attribute_value(MAYBE_CHEMISTRY, &likelihood.to_string());
1143
7.30k
    }
1144
    // debug!("likely_chem_formula {}:\n{}", likelihood, mml_to_string(mathml));
1145
1146
12.3k
    return likelihood;
1147
1148
3.39k
    fn likely_mrow_chem_formula(mrow: Element) -> i32 {
1149
        // For parens, the only reason to add them is to group the children and then indicate that there is more than one molecule
1150
3.39k
        if IsBracketed::is_bracketed(mrow, "(", ")", false, false) ||
1151
3.14k
           IsBracketed::is_bracketed(mrow, "[", "]", false, false) {
1152
            // If it is bracketed, it should have a subscript to indicate the number of the element.
1153
            // We give a pass to unadorned bracketing chars
1154
310
            if mrow.children().len() != 3 {
1155
0
                return NOT_CHEMISTRY;
1156
310
            }
1157
310
            let contents = as_element(mrow.children()[1]);
1158
310
            let parent = get_parent(mrow);
1159
310
            let parent_is_scripted = IsNode::is_scripted(parent);
1160
310
            if name(contents) != "mrow" && 
!parent_is_scripted82
{
1161
53
                return NOT_CHEMISTRY;
1162
257
            }
1163
257
            let likely = likely_chem_formula(contents);
1164
257
            if parent_is_scripted {
1165
149
                return likely + 3;
1166
            } else {
1167
108
                return likely;
1168
            }
1169
3.08k
        }
1170
1171
3.08k
        let mut likelihood = if is_order_ok(mrow) {
0832
} else {
-42.25k
};
1172
1173
        // check all the children and compute the likelihood of that this is a chemical formula
1174
        // bonus point for consecutive chemical formula children (not counting invisible children)
1175
3.08k
        let mut last_was_likely_formula = 0;        // 0 is false, 1 is true
1176
3.08k
        let mut is_chem_formula = true;              // assume true until we prove otherwise (still want to mark the children)
1177
12.5k
        for child in 
mrow3.08k
.
children3.08k
() {
1178
12.5k
            let child = as_element(child);
1179
12.5k
            let likely = likely_chem_formula(child);
1180
            // debug!("   in mrow: likely={}, likelihood={}", likely, likelihood);
1181
12.5k
            match likely.cmp(&0) {
1182
                Ordering::Greater => { 
1183
2.56k
                    likelihood += likely + last_was_likely_formula;
1184
2.56k
                    last_was_likely_formula = if name(child) == "mo" {
0279
} else {
12.28k
};
1185
                },
1186
5.86k
                Ordering::Less => {
1187
5.86k
                    // debug!("in likely_chem_formula: FALSE: likelihood={}, child\n{}", likelihood, mml_to_string(child));
1188
5.86k
                    is_chem_formula = false;
1189
5.86k
                    last_was_likely_formula = 0;
1190
5.86k
                    likelihood += likely;
1191
5.86k
                },
1192
                Ordering::Equal => {
1193
4.08k
                    if name(child) == "mo" {
1194
2.27k
                        let text = as_text(child);
1195
2.27k
                        if text != "\u{2062}" && 
text != "\u{2063}"466
{ // one of these, we don't change the status
1196
8
                            last_was_likely_formula = 0;
1197
2.26k
                        }
1198
1.81k
                    }
1199
                },
1200
            }
1201
            // debug!("in likely_chem_formula likelihood={}, child\n{}", likelihood, mml_to_string(child));
1202
            // debug!("   likelihood={} (likely={})", likelihood, likely);
1203
        }
1204
1205
3.08k
        if !is_chem_formula || 
likelihood <= NOT_CHEMISTRY832
{
1206
            // the children may have looked have looked right, but something has said "not likely"
1207
2.25k
            return NOT_CHEMISTRY;
1208
832
        } else if likelihood < CHEMISTRY_THRESHOLD && 
is_short_formula387
(
mrow387
) {
1209
                    // debug!("is_short_formula is true for:\n{}", mml_to_string(mrow));
1210
47
                    return CHEMISTRY_THRESHOLD
1211
785
        }
1212
785
        return likelihood;
1213
3.39k
    }
1214
1215
15.0k
}
1216
1217
/// This does some checks that sort of follow IUPAC's "Red Book" in section IR-4.4.
1218
/// Those rules require knowledge that the program doesn't have (e.g., which bond is closest to the central atom).
1219
/// Instead, we mainly use the two main types of orderings: alphabetical and electronegativity.
1220
/// We first do a test to see if this looks like a structural formula -- if so, ordering doesn't apply.
1221
/// If a formula has groupings, each grouping is checked independently of the rest since
1222
///   there are cases where the outer ordering doesn't match the inner ordering.
1223
/// For "generalized salts", we need to split the elements into positive and negative ions, and within each group
1224
///   the order is suppose to be alphabetical but many use electronegativity (the point being there are two separate groups).
1225
/// This site has a nice summary of the rules: https://chemistry.stackexchange.com/questions/537/why-is-arsenous-acid-denoted-h3aso3/538#538
1226
/// Note: "(OH)" doesn't fit with the above, and Susan Jolly suggests allowing any sequence that ends with H, so we allow that.
1227
/// Also, Susan Jolly suggested allowing any compound with C, H, and O
1228
3.08k
fn is_order_ok(mrow: Element) -> bool {
1229
3.08k
    assert_eq!(name(mrow), "mrow");
1230
3.08k
    if let Some(
elements2.32k
) = collect_elements(mrow) {
1231
2.73k
        if 
elements.iter()2.32k
.
any2.32k
(|&e| !CHEMICAL_ELEMENT_ELECTRONEGATIVITY.contains_key(e)) {
1232
1.48k
            return false;
1233
846
        }
1234
846
        let n_elements = elements.len();
1235
846
        if n_elements < 2 {
1236
475
            return true;
1237
371
        } else if has_noble_element(&elements) {
1238
0
            return false;    // noble elements don't form compounds
1239
        } else {
1240
371
            return elements[n_elements-1] == "H"   ||        // special case that includes "OH"
1241
                    // has_non_metal_element(&elements) && !has_non_metal_element(&elements) &&    // must have a metal and non-metal
1242
295
                    has_c_h_o(&elements) ||
1243
291
                    is_structural(&elements) ||
1244
271
                    is_alphabetical(&elements) ||
1245
169
                    is_ordered_by_electronegativity(&elements) ||
1246
12
                    is_generalized_salt(&elements);
1247
        }
1248
    } else {
1249
759
        return false;
1250
    }
1251
3.08k
}
1252
1253
// from https://learnwithdrscott.com/ionic-bond-definition/
1254
// I don't include the noble gases since they don't interact with other elements and are ruled out elsewhere
1255
// fn has_non_metal_element(elements: &[&str]) -> bool {
1256
//     static NON_METAL_ELEMENTS: phf::Set<&str> = phf_set! {
1257
//         "H", "B", "C", "N", "O", "F", "Si", "P", "S", "Cl", "As", "Se", "Br", "Te", "I", "At",
1258
//     };
1259
//     return elements.iter().any(|&e| NON_METAL_ELEMENTS.contains(e));
1260
// }
1261
1262
1263
374
fn has_noble_element(elements: &[&str]) -> bool {
1264
    static NOBLE_ELEMENTS: phf::Set<&str> = phf_set! {
1265
        "He", "Ne", "Ar", "Kr", "Xe", "Rn", "Og" // Og might be reactive, but it is unstable
1266
    };
1267
893
    return 
elements.iter()374
.
any374
(|&e| NOBLE_ELEMENTS.contains(e));
1268
374
}
1269
1270
295
fn has_c_h_o(elements: &[&str]) -> bool {
1271
295
    return elements.contains(&"C") && 
elements39
.
contains39
(
&"H"39
) &&
elements8
.
contains8
(
&"O"8
);
1272
295
}
1273
1274
1275
295
fn is_structural(elements: &[&str]) -> bool {
1276
295
    assert!(elements.len() > 1);   // already handled
1277
1278
    // debug!("is_structural: {:?}", elements);
1279
295
    let mut element_set = HashSet::with_capacity(elements.len());
1280
627
    
elements295
.
iter295
().
for_each295
(|&e| {element_set.insert(e);});
1281
295
    return element_set.len() < elements.len();
1282
295
}
1283
1284
/// collect up all the elements in the mrow.
1285
///  Returns the elements (which can be an empty vector) or None if something (right now an operator) rules out them being elements
1286
3.10k
fn collect_elements(mrow: Element<'_>) -> Option<Vec<&str>> {
1287
3.10k
    let mut elements = Vec::with_capacity(mrow.children().len()/2+1);       // don't bother with slots for operators
1288
8.86k
    for child in 
mrow3.10k
.
children3.10k
() {
1289
8.86k
        let child = as_element(child);
1290
8.86k
        match name(child) {
1291
8.86k
            "mi" | 
"mtext"6.18k
=>
elements2.80k
.
push2.80k
(
as_text2.80k
(
child2.80k
)),
1292
6.06k
            "msub" | 
"msup"5.73k
|
"mmultiscripts"5.65k
=> {
1293
584
                let base = as_element(child.children()[0]);
1294
584
                let base_name = name(base);
1295
584
                if base_name == "mi" || 
base_name == "mtext"115
{
1296
514
                    elements.push(as_text(base));
1297
514
                
}70
// else skip and let recursive likely_chem_formula call check the contents
1298
            },
1299
5.48k
            "mo" if 
likely_chem_formula_operator3.22k
(
child3.22k
) <
0759
=> return
None759
,
1300
2.46k
            "mo" => (),
1301
2.25k
            _ => (),    // let loop in likely_chem_formula() deal with all the negatives
1302
        }
1303
    }
1304
2.34k
    return Some(elements);
1305
3.10k
}
1306
1307
/// check to make sure elements are ordered alphabetically
1308
/// Actually check Hill's system that puts 'C' followed by 'H' first if 'C' is present
1309
275
fn is_alphabetical(elements: &[&str]) -> bool {
1310
275
    assert!(elements.len() > 1);   // already handled
1311
    // debug!("is_alphabetical: {:?}", elements);
1312
275
    let mut elements = elements;
1313
275
    if elements[1..].contains(&"C") {  // "C" must be first if present
1314
22
        return false;
1315
253
    }
1316
253
    if elements[0] == "C" {
1317
10
        elements = if elements[1]=="H" {
&elements[2..]2
} else {
&elements[1..]8
};
1318
243
    }
1319
253
    return elements.len() < 2 || 
elements.windows(2)243
.
all243
(|pair|
pair[0]251
<
pair[1]251
);
1320
275
}
1321
1322
174
fn is_ordered_by_electronegativity(elements: &[&str]) -> bool {
1323
    // HPO_4^2 (Mono-hydrogen phosphate) doesn't fit this pattern, nor does HCO_3^- (Hydrogen carbonate) and some others
1324
    // FIX: drop "H" from the ordering??
1325
174
    assert!(elements.len() > 1);   // already handled
1326
188
    return 
elements.windows(2)174
.
all174
(|pair| CHEMICAL_ELEMENT_ELECTRONEGATIVITY.get(pair[0]).unwrap() < CHEMICAL_ELEMENT_ELECTRONEGATIVITY.get(pair[1]).unwrap());
1327
174
}
1328
1329
12
fn is_generalized_salt(elements: &[&str]) -> bool {
1330
12
    assert!(!elements.is_empty());
1331
12
    return false;
1332
12
}
1333
1334
1335
/// Returns the likelihood that the arg is an adorned chem formula
1336
/// Adornments are:
1337
///   superscripts with +/- and optionally a number (charge)
1338
///  numeric subscripts (e.g. H_2)
1339
/// In addition to chemical elements, we include nuclear decay since there is a lot of overlap in notation
1340
/// The nuclear decay notation is mostly taken from https://tinyurl.com/2f6b8e3a
1341
/// Basically it is a chemical element or 'e', 'p', 'n', 'α', 'β', or 'γ' with pre-sub/superscript
1342
/// There is also an instance with a charge on the referenced page, so we allow that also.
1343
/// 
1344
/// Note: https://tinyurl.com/ysmr8cw2 says "++"/"--", etc., is sometimes used in a superscript particle physics instead of a "2"
1345
/// 
1346
/// Note:  msubsup cleaning for an empty script hasn't happened and we consider an empty script a sign of attempting to vertically align sub/superscripts
1347
///
1348
/// Note: 'mathml' is not necessarily canonicalized   
1349
2.85k
pub fn likely_adorned_chem_formula(mathml: Element) -> i32 {
1350
2.85k
    if !
matches!2.85k
(name(mathml), "msub" |
"msup"1.94k
|
"msubsup"546
|
"mmultiscripts"352
) {
1351
1
        return NOT_CHEMISTRY;
1352
2.85k
    }
1353
    // some simple sanity checks on the scripts...
1354
2.85k
    let tag_name = name(mathml);
1355
2.85k
    let children = mathml.children();
1356
2.85k
    let mut likelihood = 0;
1357
2.85k
    let mut is_empty_subscript = false;
1358
    // debug!("likely_adorned_chem_formula:\n{}", mml_to_string(mathml));
1359
2.85k
    if tag_name == "msub" || 
tag_name == "msubsup"1.94k
{
1360
        // subscripts should be just a number, although they could be 'n' or '2n' or other exprs.
1361
1.10k
        let subscript = as_element(children[1]);
1362
1.10k
        is_empty_subscript = name(subscript) == "mtext" && 
as_text(subscript).trim()3
.
is_empty3
();
1363
1.10k
        if !is_empty_subscript {
1364
1.10k
            likelihood += likely_chem_subscript(subscript);
1365
1.10k
        
}3
1366
1.74k
    }
1367
1368
2.85k
    let mut empty_superscript = false;
1369
2.85k
    if tag_name == "msup" || 
tag_name == "msubsup"1.45k
{
1370
        // debug!("likely_adorned_chem_formula: mathml\n{}", mml_to_string(mathml));
1371
1.59k
        let superscript = as_element(children[if tag_name == "msup" {
11.39k
} else {
2194
}]);
1372
1.59k
        empty_superscript = name(superscript) == "mtext" && 
as_text(superscript).trim()13
.
is_empty13
();
1373
1.59k
        if !empty_superscript {
1374
1.58k
            likelihood += likely_chem_superscript(superscript);
1375
1.58k
        
}6
1376
1.26k
    }
1377
2.85k
    if tag_name == "msubsup" && (
is_empty_subscript194
||
empty_superscript191
) {
1378
9
        likelihood += 1; // might be trying to vertically align scripts as in done in chemistry
1379
2.84k
    }
1380
1381
2.85k
    if tag_name == "mmultiscripts" {
1382
        // prescripts are normally positive integers, chem 2.5.1 allows for a superscript for a Lewis dot
1383
        // postscript should be a charge
1384
1385
        let prescripts;
1386
        let postscripts;
1387
351
        if children.len() == 4 && 
name138
(
as_element138
(children[1]))=="mprescripts" { // just prescripts
1388
138
            prescripts = &children[2..4];
1389
138
            postscripts = &children[0..0]; // empty
1390
213
        } else if children.len() == 6 && 
name57
(
as_element57
(children[3]))=="mprescripts" { // pre and postscripts
1391
55
            prescripts = &children[4..6];
1392
55
            postscripts = &children[1..3]; // empty
1393
158
        } else if children.len() == 3 || 
children.len() == 568
{ // just postscripts (simultaneous or offset)
1394
118
            prescripts = &children[0..0]; // empty
1395
118
            postscripts = &children[1..];
1396
118
        } else {
1397
40
            return NOT_CHEMISTRY;
1398
        };
1399
1400
311
        if !prescripts.is_empty() {
1401
193
            let pre_subscript = as_element(prescripts[0]);
1402
193
            let pre_subscript_name = name(pre_subscript);
1403
1404
193
            let pre_superscript = as_element(prescripts[1]);
1405
193
            let pre_superscript_name = name(pre_superscript);
1406
1407
            // deal with special case of 'e' with prescripts of -1 and 0
1408
193
            if is_adorned_electron(children[0], prescripts) {
1409
31
                return 100;     // very likely chemistry
1410
162
            }
1411
162
            let base = as_element(children[0]);
1412
162
            let base_name = name(base);
1413
162
            let 
atomic_number127
= if
matches!154
(base_name, "mi" |
"mtext"41
) &&
1414
154
                                        let Some(
atomic_number127
) = CHEMICAL_ELEMENT_ATOMIC_NUMBER.get(as_text(base)) {
1415
127
                        *atomic_number
1416
                    } else {
1417
35
                        return NOT_CHEMISTRY;
1418
                    };
1419
127
            if pre_superscript_name == "mo" {
1420
                // Lewis dot prescript case
1421
3
                if pre_subscript_name != "none" {
1422
0
                    return NOT_CHEMISTRY;
1423
3
                }
1424
3
                likelihood += likely_chem_superscript(pre_superscript);
1425
124
            } else if pre_superscript_name == "mn" { // must have a pre-superscript (neutrons + protons)
1426
75
                if let Ok(mass) = as_text(pre_superscript).parse::<u32>() {
1427
                    // "drip line" is 1.5 * mass < 3.5 * mass -- it is possible to outside of this range, but VERY unlikely
1428
                    // to avoid floating point, we multiply by 2 and compare to 3 and 7
1429
75
                    if 3*atomic_number < 2*mass && 
2*mass < 7*atomic_number74
{
1430
74
                        likelihood += 3;
1431
74
                    
}1
1432
0
                }
1433
75
                if pre_subscript_name == "mn"  && 
as_text(pre_subscript)71
== atomic_number.to_string() {
1434
69
                        likelihood = CHEMISTRY_THRESHOLD;
1435
69
                
}6
1436
            } else {
1437
49
                return NOT_CHEMISTRY;
1438
            }
1439
118
        }
1440
1441
196
        if !postscripts.is_empty() {
1442
119
            let mut i = 0;
1443
266
            while i < postscripts.len() {
1444
147
                let sub = as_element(postscripts[i]);
1445
                // debug!("sub: {}", mml_to_string(sub));
1446
147
                if name(sub) != "none" {
1447
91
                    likelihood += likely_chem_subscript(sub);
1448
91
                
}56
1449
147
                let sup = as_element(postscripts[i+1]);
1450
147
                if name(sup) != "none" {
1451
65
                    // debug!("sup: {}", mml_to_string(sub));
1452
65
                    likelihood += likely_chem_superscript(sup);
1453
82
                }
1454
147
                i += 2;
1455
            }
1456
77
        }
1457
2.50k
    }
1458
1459
2.69k
    let base = as_element(children[0]);
1460
2.69k
    let base_name = name(base);
1461
2.69k
    if base_name == "mi" || 
base_name == "mtext"822
{
1462
2.05k
        likelihood += likely_chem_element(base);
1463
2.05k
    } else if 
base_name == "mrow"641
{
1464
        // debug!("mrow addition:\n{}", mml_to_string(base));
1465
        // a safe minor canonicalization that allows "short_form" calculations if appropriate
1466
187
        if (IsBracketed::is_bracketed(base, "(", ")", false, false) ||
1467
89
            IsBracketed::is_bracketed(base, "[", "]", false, false)) &&
1468
148
           base.children().len() > 3 {
1469
77
            let inner_mrow = create_mathml_element(&base.document(), "mrow");
1470
77
            inner_mrow.set_attribute_value(CHANGED_ATTR, ADDED_ATTR_VALUE);
1471
77
            let mut children = base.children();
1472
77
            let inside_of_parens = children.drain(1..children.len()-1);
1473
77
            inner_mrow.append_children(inside_of_parens);
1474
77
            base.replace_children(vec![children[0], ChildOfElement::Element(inner_mrow), children[children.len()-1]]);
1475
110
        }
1476
187
        likelihood += likely_chem_formula(base);
1477
454
    } else {
1478
454
        likelihood += likely_chem_formula(base);
1479
454
    }
1480
    
1481
    // debug!("returning from likely_adorned_chem_formula: likelihood={}, mathml\n{}", likelihood, mml_to_string(mathml));
1482
2.69k
    return likelihood;
1483
1484
1485
193
    fn is_adorned_electron(base: ChildOfElement, prescripts: &[ChildOfElement]) -> bool {
1486
        // looking for 'e' with prescripts of -1 and 0
1487
193
        let base = as_element(base);
1488
193
        let pre_lower = as_element(prescripts[0]);
1489
193
        let pre_upper = as_element(prescripts[1]);
1490
193
        if (name(base) == "mi" || 
name(base) == "mtext"57
) &&
as_text(base) == "e"185
&&
1491
31
           name(pre_upper) == "mn" && as_text(pre_upper) == "0" && 
1492
31
           name(pre_lower) == "mrow" && pre_lower.children().len() == 2 {
1493
            // looking '-' and '1'
1494
31
            let lower_children = pre_lower.children();
1495
31
            let minus = as_element(lower_children[0]);
1496
31
            let one = as_element(lower_children[1]);
1497
            // not yet normalized, so we need to compare against ASCII minus and u+2212
1498
31
            return name(minus) == "mo" && (as_text(minus) == "-" || as_text(minus) == "−") && 
1499
31
                   name(one) == "mn"   && as_text(one) == "1";
1500
        } else {
1501
162
            return false;
1502
        }
1503
193
    }
1504
2.85k
}
1505
1506
/// useful function to see if the str is a single char matching the predicate
1507
29.6k
fn is_single_char_matching(leaf_text: &str, pred: impl Fn(char) -> bool) -> bool {
1508
29.6k
    let mut chars = leaf_text.chars();
1509
29.6k
    if let Some(ch) = chars.next() && chars.next().is_none() {
1510
29.5k
        return pred(ch);
1511
87
    }
1512
87
    return false;
1513
29.6k
}
1514
1515
17.2k
fn likely_chem_formula_operator(mathml: Element) -> i32 {
1516
    // mostly from chenzhijin.com/en/article/Useful%20Unicode%20for%20Chemists (Arrows and Other)
1517
    // also en.wikipedia.org/wiki/Chemical_formula#Condensed_formula
1518
    #[derive(PartialEq, Eq)]
1519
    enum BondType {DoubleBond, TripleBond}      // options for is_legal_bond()
1520
    // "⋅" is used in GTM 16.2 and en.wikipedia.org/wiki/Cement_chemist_notation -- may want to add some similar chars
1521
    static CHEM_FORMULA_OPERATORS: phf::Set<&str> = phf_set! {
1522
        "-", "\u{2212}", "⋅", ":", "=", "∷", "≡", ":::", "≣", "::::", // bond symbols (need both 2212 and minus because maybe not canonicalized)
1523
        "⋮", // lewis dots, part of "⋮⋮" - triple bond (see Nemeth chem guide 2.5.4)
1524
    };
1525
16.1k
    fn is_chem_formula_ok(ch: char) -> bool {
1526
16.1k
        
matches!9.64k
(ch, '(' | ')' | '[' | ']' | '\u{2062}' | '\u{2063}')
1527
16.1k
    }
1528
1529
17.2k
    assert_eq!(name(mathml), "mo");
1530
17.2k
    let leaf_text = as_text(mathml);
1531
17.2k
    if CHEM_FORMULA_OPERATORS.contains(leaf_text) &&
1532
1.85k
       (has_inherited_property(mathml, "chemical-formula") ||
1533
1.85k
        ( !(leaf_text == "=" || 
leaf_text == "∷"1.02k
) ||
is_legal_bond848
(
mathml848
,
BondType::DoubleBond848
) ) &&
1534
1.05k
        ( !(leaf_text == "≡" || 
leaf_text == ":::"1.03k
) ||
is_legal_bond26
(
mathml26
,
BondType::TripleBond26
) )
1535
       )  {
1536
1.04k
        mathml.set_attribute_value(MAYBE_CHEMISTRY, "1");
1537
1.04k
        mathml.set_attribute_value(CHEM_FORMULA_OPERATOR, "1");
1538
1.04k
        return 1;
1539
16.1k
    } else if is_single_char_matching(leaf_text, is_chem_formula_ok) {
1540
6.49k
        return 0;  // not much info
1541
    } else {
1542
9.67k
        return -3; // still a small chance;
1543
    }
1544
1545
874
    fn is_legal_bond(mathml: Element, bond_type: BondType) -> bool {
1546
874
        let preceding = mathml.preceding_siblings();
1547
874
        let following = mathml.following_siblings();
1548
874
        if preceding.is_empty() || 
following783
.
is_empty783
() {
1549
115
            return false;
1550
759
        }
1551
1552
759
        let mut preceding_element = as_element(preceding[preceding.len()-1]);
1553
        // special check for CH_2 -- double bond is really with C
1554
759
        if bond_type == BondType::DoubleBond && 
name(preceding_element) == "msub"734
&&
1555
31
           preceding.len() > 1 &&  
&11
convert_to_short_form11
(preceding_element).unwrap_or_default() == "H_2" {
1556
2
            preceding_element = as_element(preceding[preceding.len()-2]);
1557
2
            if !is_leaf(preceding_element) || as_text(preceding_element) != "C" {
1558
0
                return false;
1559
2
            }
1560
757
        } else if name(preceding_element) != "mi" && 
name(preceding_element) != "mtext"353
{
1561
320
            return false;
1562
437
        }
1563
439
        let following_element = get_possible_embellished_node(as_element(following[0]));
1564
439
        if name(following_element) != "mi" && 
name(following_element) != "mtext"315
{
1565
313
            return false;
1566
126
        }
1567
126
        let preceding_text = as_text(preceding_element);
1568
126
        let following_text = as_text(following_element);
1569
126
        return match bond_type {
1570
105
            BondType::DoubleBond => is_legal_double_bond(preceding_text, following_text),
1571
21
            BondType::TripleBond => is_legal_triple_bond(preceding_text, following_text),
1572
        };
1573
1574
105
        fn is_legal_double_bond(left: &str, right: &str) -> bool {
1575
            // this is based on table in en.wikipedia.org/wiki/Double_bond#Types_of_double_bonds_between_atoms
1576
            static DOUBLE_BOND_TO_SELF: phf::Set<&str> = phf_set! {
1577
                "C", "O", "N", "S", "Si", "Ge", "Sn", "Pb"
1578
            };
1579
                // "C" => &["O", "N", "S"],
1580
                // "O" => &["N", "S"],
1581
105
            if left == right && 
DOUBLE_BOND_TO_SELF50
.
contains50
(
left50
) {
1582
44
                return true;
1583
61
            }
1584
61
            return match left {
1585
61
                "C" => 
right=="O"3
||
right=="N"2
||
right=="S"2
,
1586
58
                "O" => 
right=="N"1
||
right=="S"1
,
1587
57
                "Si" => 
right=="C"0
,
1588
57
                _ => false,
1589
            }
1590
105
        }
1591
1592
21
        fn is_legal_triple_bond(left: &str, right: &str) -> bool {
1593
            // According to https://tinyurl.com/rkynhwj3 (from physics.org)
1594
            // triple bonds can be formed between any of B, C, N, and O
1595
            // Apparently they can also be forced in other cases, but they are rare.
1596
            // 'B' is from studiousguy.com/triple-bond-examples/
1597
21
            return  (left == "B"  || left == "C"  || 
left == "N"5
||
left == "O"5
) &&
1598
18
                    (right == "B" || right == "C" || 
right == "N"5
||
right == "O"5
);
1599
21
        }
1600
874
    }
1601
17.2k
}
1602
1603
/// This assumes canonicalization of characters has happened
1604
6.85k
fn likely_chem_equation_operator(mathml: Element) -> i32 {
1605
1606
6.73k
    fn is_chem_equation_operator(ch: char) -> bool {
1607
6.73k
        
matches!4.90k
(ch, '+' | '=' | '-' | '·' | '℃' | '°' | '‡' | '∆' | '×' | '\u{2062}')
1608
6.73k
    }
1609
1610
6.85k
    let elem_name = name(mathml);
1611
6.85k
    if elem_name == "munder" || 
elem_name == "mover"6.80k
||
elem_name == "munderover"6.78k
{
1612
86
        let base = as_element(mathml.children()[0]);
1613
86
        if name(base) == "mo" && 
is_single_char_matching64
(
as_text(base)64
, is_chem_equation_arrow) {
1614
1
            base.set_attribute_value(MAYBE_CHEMISTRY, "1");
1615
1
            base.set_attribute_value(CHEM_EQUATION_OPERATOR, "1");
1616
1
            return 1;
1617
85
        } else if elem_name == "mover" && 
is_hack_for_missing_arrows20
(
mathml20
) {
1618
9
            return 2;
1619
        } else {
1620
76
            return NOT_CHEMISTRY;
1621
        }    
1622
6.76k
    }
1623
1624
6.76k
    if name(mathml) == "mo" {
1625
6.76k
        let text = as_text(mathml);
1626
6.76k
        if is_single_char_matching(text, is_chem_equation_operator) || 
is_single_char_matching4.93k
(
text4.93k
, is_chem_equation_arrow) {
1627
1.96k
            mathml.set_attribute_value(MAYBE_CHEMISTRY, "1");
1628
1.96k
            mathml.set_attribute_value(CHEM_EQUATION_OPERATOR, "1");
1629
1.96k
            return 1;
1630
4.79k
        } else if text == "\u{2062}" || text == "\u{2063}" {
1631
            // FIX: the invisible operator between elements should be well-defined, but this likely needs work, so both accepted for now
1632
0
            return 0;
1633
4.79k
        }
1634
0
    }
1635
4.79k
    return -3;  // there is still a chance
1636
1637
    /// Detects output of mhchem for some equilibrium arrows that currently (11/22) don't have Unicode points
1638
    /// See github.com/NSoiffer/MathCAT/issues/60 for the patterns being matched
1639
20
    fn is_hack_for_missing_arrows(mover: Element) -> bool {
1640
20
        assert_eq!(name(mover), "mover");
1641
20
        let children = mover.children();
1642
20
        let base = as_element(children[0]);
1643
20
        let mo_base = if name(base) == "mrow" && 
base.children().len() == 212
{
1644
9
            as_element(base.children()[0])
1645
        } else {
1646
11
            base
1647
        };
1648
20
        let upper = as_element(children[1]);
1649
20
        let mo_upper = if name(upper) == "mrow" && 
upper.children().len() == 29
{
1650
9
            as_element(upper.children()[1])
1651
        } else {
1652
11
            upper
1653
        };
1654
        // slightly sloppy match, but almost certainly good enough
1655
20
        return name(mo_base) == "mo" && 
name(mo_upper) == "mo"9
&&
1656
9
                as_text(mo_base) == "↽" && as_text(mo_upper) == "⇀";
1657
20
        }
1658
6.85k
}
1659
1660
38
fn is_equilibrium_constant(mut mathml: Element) -> bool {
1661
38
    if name(mathml) == "msub" {
1662
27
        mathml = as_element(mathml.children()[0]);
1663
27
    
}11
1664
1665
38
    return name(mathml) == "mi" && 
as_text(mathml) == "K"25
;
1666
38
}
1667
1668
// Oxidation states range from -4 to 9 and are written with (a subset of) roman numerals.
1669
// All instances seem to be upper case that I've seen.
1670
3
static SMALL_UPPER_ROMAN_NUMERAL: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\s*^(IX|IV|V?I{0,3})\s*$").unwrap());
1671
1672
/// look for "(s), "(l)", "(g)", "(aq)" (could also use [...])
1673
/// this might be called before canonicalization, but in clean_chemistry_mrow, we made sure "( xxx )" is grouped properly
1674
3.68k
pub fn likely_chem_state(mathml: Element) -> i32 {
1675
    
1676
3.68k
    if IsBracketed::is_bracketed(mathml, "(", ")", false, false) ||
1677
3.30k
       IsBracketed::is_bracketed(mathml, "[", "]", false, false) {
1678
438
        let contents = as_element(mathml.children()[1]);
1679
438
        let contents_name = name(contents);
1680
438
        if contents_name == "mi" || 
contents_name == "mtext"331
{
1681
109
            let text = as_text(contents);
1682
109
            if text == "s" || 
text == "l"102
||
text == "g"102
||
text == "aq"68
{
1683
67
                return text.len() as i32 + 1;       // hack to count chars -- works because all are ASCII 
1684
42
            };
1685
329
        }
1686
3.24k
     }
1687
3.61k
     return NOT_CHEMISTRY;
1688
3.68k
}
1689
1690
/// Returns the likelihood that the arg is an element
1691
16.4k
pub fn likely_chem_element(mathml: Element) -> i32 {
1692
    static NUCLEAR_SYMBOLS: [&str; 6] = ["e", "p", "n", "α", "β","γ"];
1693
1694
16.4k
    assert!(name(mathml) == "mi" || 
name(mathml) == "mtext"1.11k
, "{} is not 'mi' or 'mtext'",
name0
(
mathml0
));
1695
16.4k
    let text = as_text(mathml);
1696
16.4k
    if as_text(mathml).trim().is_empty() {
1697
782
        return 0;   // whitespace
1698
15.6k
    } else if is_chemical_element(mathml) {
1699
        // single letter = 1; single letter with mathvariant="normal" = 2; double = 3 -- all elements are ASCII
1700
2.21k
        return if text.len() == 1 {
1701
1.90k
            if mathml.attribute_value("mathvariant").unwrap_or_default() == "normal" {
2491
} else {
11.41k
}
1702
        } else {
1703
311
            3
1704
        };
1705
13.4k
    } else if NUCLEAR_SYMBOLS.contains(&text) {
1706
659
        return 0;
1707
        // not much special about them;
1708
    } else {
1709
12.7k
        return NOT_CHEMISTRY;
1710
    }
1711
16.4k
}
1712
1713
static SHORT_SINGLE_LETTER_ELEMENT_FORMULAE: phf::Set<&str> = phf_set! {
1714
    // from en.wikipedia.org/wiki/Glossary_of_chemical_formulae (via chem_formula_from_wikipedia.py)
1715
    "BF_3", "BI_3", "BN", "BP", "B_2F_4", "B_2H_6", "B_2O_3", "B_2S_3", "B_4C",
1716
    "CB_4", "CF_4", "CH_2", "CH_4", "CO", "CO_2", "CO_3", "CS_2", "CW", "C_2F_4",
1717
    "C_2H_4", "C_2H_6", "C_2U", "C_2Y", "C_3H_4", "C_3H_6", "C_3H_8", "C_4H_2",
1718
    "C_4H_8", "C_4I_2", "C_6H_6", "C_6N_4", "C_7H_8", "C_8H_8", "DI", "D_2O",
1719
    "FI", "FI_2", "FK", "FN", "FO", "FO_2", "FP", "FS", "FW", "FY", "F_2",
1720
    "F_2N", "F_2O", "F_2O_2", "F_2P", "F_2S", "F_2S_2", "F_2W", "F_2Y", "F_3B",
1721
    "F_3P", "F_3S", "F_3W", "F_3Y", "F_4B_2", "F_4C", "F_4C_2", "F_4N_2",
1722
    "F_4S", "F_4U", "F_4W", "F_5I", "F_5P", "F_5S", "F_5U", "F_5W", "F_6S",
1723
    "F_6W", "F_7I", "HF", "HI", "HK", "HN_3", "H_2", "H_2C", "H_2C_2", "H_2C_4",
1724
    "H_2O", "H_2O_2", "H_2S", "H_3N", "H_3P", "H_4C", "H_4C_2", "H_4C_3",
1725
    "H_4N_2", "H_4N_4", "H_6B_2", "H_6C_2", "H_6C_3", "H_6C_6", "H_8C_3",
1726
    "H_8C_7", "H_8C_8", "ID", "IF", "IF_5", "IF_7", "IH", "IK", "IO_3", "I_2",
1727
    "I_2F", "I_2O_5", "I_2W", "I_3B", "I_3N", "I_3U", "I_3V", "I_4P_2", "I_4W",
1728
    "KH", "KI", "K_2F_2", "K_2O", "K_2O_2", "K_2S", "NB", "NF", "NF_2", "NF_3",
1729
    "NI_3", "NO", "NO_2", "NU", "NV", "N_2", "N_2F_4", "N_2H_2", "N_2H_4",
1730
    "N_2O_3", "N_2O_4", "N_2O_5", "N_3H", "N_4C_6", "N_4H_4", "N_5P_3", "O",
1731
    "OD_2", "OF", "OF_2", "OH_2", "OK_2", "ON", "ON_2", "OT_2", "O_2", "O_2C",
1732
    "O_2F_2", "O_2H_2", "O_2K_2", "O_2N", "O_2S", "O_2U", "O_2W", "O_3",
1733
    "O_3C", "O_3I", "O_3N_2", "O_3S", "O_3U", "O_3V_2", "O_3W", "O_3Y_2",
1734
    "O_5I_2", "O_5N_2", "O_5P_2", "O_5V_2", "O_8U_3", "PB", "PF", "PF_2", "PF_3",
1735
    "PH_3", "PY", "P_2F_4", "P_2I_4", "P_2O_5", "P_2S_3", "P_3N_5", "SF", "SF_2",
1736
    "SF_4", "SF_5", "SF_6", "SH_2", "SK_2", "SO_2", "SO_3", "S_2C", "S_2F_2",
1737
    "S_2W", "S_3B_2", "S_3P_2", "S_3W", "S_3Y_2", "T_2O", "UC_2", "UF_4", "UF_5",
1738
    "UI_3", "UN", "UO_2", "UO_3", "US_2", "U_3O_8", "VI_3", "VN", "V_2O_3",
1739
    "WC", "WF", "WF_2", "WF_3", "WF_4", "WF_5", "WF_6", "WI_2", "WI_4", "WO_2",
1740
    "WS_2", "WS_3", "YB_6", "YC_2", "YF", "YF_2", "YF_3", "YP", "Y_2O_3",
1741
1742
    // from en.wikipedia.org/wiki/Ion#Common_ions (via chem_formula_from_wikipedia.py)
1743
    "CH_3COO^−", "CN^−", "CO_3^2−", "C^−", "C_2O_4^2−", "F^−", "HCOO^−", 
1744
    "HPO_4^2−", "HSO_3^−", "HSO_4^−", "H^+", "H^−", "H_2PO_4^−", "H_3O^+", "I^−", 
1745
    "NH_4^+", "NO_2^−", "NO_3^−", "N^3−", "N_3^−", "OH^−", "O^2−", "O_2^2−", 
1746
    "PO_4^3−", "P^3−", "SO_3^2−", "SO_4^2−", "S^2−", "S_2O_3^2−",
1747
1748
    // from gchem.cm.utexas.edu/canvas.php?target=bonding/ionic/polyatomic-ions.html
1749
    "PO_3^3−", "IO_3^−",
1750
1751
    // others
1752
    "CH_3", /* methyl */
1753
    "NH_3",  // ammonium
1754
};
1755
1756
/// Returns true if the formula is composed of 1 or 2 single letter elements and it matches a known compound/ion
1757
/// This might be called (via likely_adorned_chem_formula) unparsed
1758
387
fn is_short_formula(mrow: Element) -> bool {
1759
387
    assert_eq!(name(mrow), "mrow");
1760
387
    let children = mrow.children();
1761
387
    let n_children = children.len();
1762
387
    if n_children == 0 || n_children > 3 || (
n_children == 3378
&&
name317
(
as_element317
(children[1])) != "mo") {
1763
12
        return false;
1764
375
    }
1765
1766
375
    let first_element = convert_to_short_form( as_element(children[0]) );
1767
375
    if n_children == 1 {
1768
2
        return first_element.is_ok();
1769
373
    }
1770
373
    let second_element = convert_to_short_form( as_element(children[if n_children == 2 {
159
} else {
2314
}]) );
1771
373
    return match (first_element, second_element) {
1772
365
        (Ok(first), Ok(second)) => {
1773
365
            let short_form = first + second.as_str();
1774
            // debug!("short_form: {}", short_form);
1775
365
            return SHORT_SINGLE_LETTER_ELEMENT_FORMULAE.contains(&short_form);
1776
        },
1777
8
        _ => false,
1778
    }
1779
387
}
1780
1781
931
fn convert_to_short_form(mathml: Element) -> Result<String> {
1782
931
    let mathml_name = name(mathml);
1783
931
    return match mathml_name {
1784
931
        "mi" | 
"mtext"441
|
"mn"393
|
"mo"104
=>
Ok( as_text(mathml).to_string() )836
,
1785
95
        "none" => 
Ok( "".to_string() )0
,
1786
95
        "msub" | 
"msup"16
|
"msubsup"13
|
"mmultiscripts"13
=> {
1787
86
            let is_mmultiscripts = mathml_name == "mmultiscripts";
1788
86
            let children = mathml.children();
1789
86
            let mut result = convert_to_short_form(as_element(children[0]))
?0
;
1790
86
            if is_mmultiscripts && 
children.len() != 34
{
1791
0
                bail!("mmultiscripts found with {} children -- not part of chemical formula", children.len());
1792
86
            }
1793
86
            if mathml_name == "msub" || 
mathml_name == "msubsup"7
|| (
is_mmultiscripts7
&&
name4
(
as_element4
(children[1])) != "none") {
1794
83
                result += "_";
1795
83
                result += &convert_to_short_form(as_element(children[1]))
?1
;
1796
3
            }
1797
85
            if mathml_name == "msup" || 
mathml_name == "msubsup"82
|| (
is_mmultiscripts82
&&
name4
(
as_element4
(children[2])) != "none") {
1798
3
                result += "^";
1799
3
                result += &convert_to_short_form(as_element(children[if mathml_name=="msup" {1} else {
20
}]))
?0
;
1800
82
            }
1801
85
            Ok( result )
1802
        },
1803
9
        "mrow" => {
1804
            // the only time this is valid is if the superscript is something like "+" or "2+", so we do a few checks and short circuit false now
1805
9
            let mrow_children = mathml.children();
1806
9
            if mrow_children.len() == 1 || mrow_children.len() == 2 {
1807
0
                let mut result = convert_to_short_form(as_element(mrow_children[0]))?;
1808
0
                if mrow_children.len() == 2 {
1809
0
                    result += &convert_to_short_form(as_element(mrow_children[1]))?;
1810
0
                }
1811
0
                return Ok(result)
1812
            } else {
1813
9
                bail!("mrow found with {} children -- not part of chemical formula", mrow_children.len());
1814
            }
1815
        }
1816
0
        _ => bail!("{} found -- not part of chemical formula", mathml_name),
1817
    }
1818
931
}
1819
1820
/// A map of chemical elements and their relative IUPAC electronegativity (https://i.stack.imgur.com/VCSzW.png)
1821
/// That list uses a horizontal line for the Lanthanide and Actinide Series.
1822
/// Because I had already ordered the elements before realizing that, I opened a gap and started the higher ones again with a '1' in front.
1823
/// The list is missing recent (unstable) elements -- I added them with the same value as the element above them in the periodic table.
1824
static CHEMICAL_ELEMENT_ELECTRONEGATIVITY: phf::Map<&str, u32> = phf_map! {
1825
  "Ac" => 40, "Ag" => 155, "Al" => 163, "Am" => 29, "Ar" => 4, "As" => 172, "At" => 181, "Au" => 154,
1826
    "B" => 164, "Ba" => 14, "Be" => 18, "Bh" => 137, "Bi" => 170, "Bk" => 27, "Br" => 183,
1827
  "C" => 169, "Ca" => 16, "Cd" => 158, "Ce" => 56, "Cf" => 26, "Cl" => 184, "Cm" => 28, "Cn" => 157, "Co" => 148, "Cr" => 136, "Cs" => 8, "Cu" => 156,
1828
    "Db" => 129, "Ds" => 149, "Dy" => 48, 
1829
  "Er" => 46, "Es" => 25, "Eu" => 51, "F" => 185, "Fe" => 144, "Fl" => 165, "Fm" => 24, "Fr" => 7, "Ga" => 162, "Gd" => 50, "Ge" => 167,
1830
  "H" => 175, "He" => 6, "Hf" => 126, "Hg" => 157, "Ho" => 47, "Hs" => 141, "I" => 182, "In" => 161, "Ir" => 146, "K" => 10, "Kr" => 3,
1831
  "La" => 62, "Li" => 12, "Lr" => 19, "Lu" => 41, "Lv" => 176, "Mc" => 170, "Md" => 23, "Mg" => 17, "Mn" => 140, "Mo" => 135, "Mt" => 145, 
1832
  "N" => 174, "Na" => 11, "Nb" => 131, "Nd" => 54, "Ne" => 5, "Nh" => 160, "Ni" => 152, "No" => 22, "Np" => 31, "O" => 180, "Og" => 1, "Os" => 142, 
1833
  "P" => 173, "Pa" => 33, "Pb" => 165, "Pd" => 151, "Pm" => 53, "Po" => 176, "Pr" => 55, "Pt" => 150, "Pu" => 30,
1834
  "Ra" => 13, "Rb" => 9, "Re" => 138, "Rf" => 125, "Rg" => 153, "Rh" => 147, "Rn" => 1, "Ru" => 143, 
1835
  "S" => 179, "Sb" => 171, "Sc" => 124, "Se" => 178, "Sg" => 133, "Si" => 168, "Sm" => 52, "Sn" => 166, "Sr" => 15,
1836
  "Ta" => 130, "Tb" => 49, "Tc" => 139, "Te" => 177, "Th" => 34, "Ti" => 128, "Tl" => 160, "Tm" => 45, "Ts" => 181, 
1837
  "U" => 32, "V" => 132, "W" => 134, "Xe" => 2, "Y" => 123, "Yb" => 44, "Zn" => 159, "Zr" => 127,
1838
    // The following come from E.A. Moore who said to treat them like chemicals 
1839
    // These stand for methyl, ethyl, alkyl, acetyl and phenyl and apparently are quite commonly used ("Ac" is already a chemical)
1840
    // A full(er?) list is at en.wikipedia.org/wiki/Skeletal_formula#Alkyl_groups and in following sections
1841
    "Me" => 0, "Et" => 0, "R" => 0, /* "Ac" => 0, */ "Ph" => 0,
1842
    "X" => 0, /* treated as an unknown */
1843
};
1844
1845
// A map of the chemical elements and their atomic numbers
1846
static CHEMICAL_ELEMENT_ATOMIC_NUMBER: phf::Map<&str, u32> = phf_map! {
1847
    "H" => 1, "He" => 2, "Li" => 3, "Be" => 4, "B" => 5, "C" => 6, "N" => 7, "O" => 8, "F" => 9, "Ne" => 10,
1848
    "Na" => 11, "Mg" => 12, "Al" => 13, "Si" => 14, "P" => 15, "S" => 16, "Cl" => 17, "Ar" => 18, "K" => 19, "Ca" => 20,
1849
    "Sc" => 21, "Ti" => 22, "V" => 23, "Cr" => 24, "Mn" => 25, "Fe" => 26, "Co" => 27, "Ni" => 28, "Cu" => 29, "Zn" => 30,
1850
    "Ga" => 31, "Ge" => 32, "As" => 33, "Se" => 34, "Br" => 35, "Kr" => 36, "Rb" => 37, "Sr" => 38, "Y" => 39, "Zr" => 40,
1851
    "Nb" => 41, "Mo" => 42, "Tc" => 43, "Ru" => 44, "Rh" => 45, "Pd" => 46, "Ag" => 47, "Cd" => 48, "In" => 49, "Sn" => 50,
1852
    "Sb" => 51, "Te" => 52, "I" => 53, "Xe" => 54, "Cs" => 55, "Ba" => 56, "La" => 57, "Ce" => 58, "Pr" => 59, "Nd" => 60, 
1853
    "Pm" => 61, "Sm" => 62, "Eu" => 63, "Gd" => 64, "Tb" => 65, "Dy" => 66, "Ho" => 67, "Er" => 68, "Tm" => 69, "Yb" => 70,
1854
    "Lu" => 71, "Hf" => 72, "Ta" => 73, "W" => 74, "Re" => 75, "Os" => 76, "Ir" => 77, "Pt" => 78, "Au" => 79, "Hg" => 80,
1855
    "Tl" => 81, "Pb" => 82, "Bi" => 83, "Po" => 84, "At" => 85, "Rn" => 86, "Fr" => 87, "Ra" => 88, "Ac" => 89, "Th" => 90,
1856
    "Pa" => 91, "U" => 92, "Np" => 93, "Pu" => 94, "Am" => 95, "Cm" => 96, "Bk" => 97, "Cf" => 98, "Es" => 99, "Fm" => 100,
1857
    "Md" => 101, "No" => 102, "Lr" => 103, "Rf" => 104, "Db" => 105, "Sg" => 106, "Bh" => 107, "Hs" => 108, "Mt" => 109, "Ds" => 110,
1858
    "Rg" => 111, "Cn" => 112, "Nh" => 113, "Fl" => 114, "Mc" => 115, "Lv" => 116, "Ts" => 117, "Og" => 118, 
1859
};
1860
1861
26.9k
pub fn is_chemical_element(node: Element) -> bool {
1862
  // FIX: allow name to be in an mrow (e.g., <mi>N</mi><mi>a</mi>
1863
26.9k
  let name = name(node);
1864
26.9k
  if name != "mi" && 
name != "mtext"702
{
1865
71
    return false;
1866
26.9k
  }
1867
1868
26.9k
  let text = as_text(node);
1869
26.9k
  return CHEMICAL_ELEMENT_ELECTRONEGATIVITY.contains_key(text) ||
1870
23.0k
           has_chem_intent(node, "chemical-element") ||
1871
23.0k
           has_inherited_property(node, "chemical-formula");
1872
26.9k
}
1873
1874
1875
#[cfg(test)]
1876
mod chem_tests {
1877
1878
1879
#[allow(unused_imports)]
1880
  use super::super::init_logger;
1881
  use super::super::are_strs_canonically_equal;
1882
    use super::*;
1883
1884
40
    fn parse_mathml_string<F>(test: &str, test_mathml: F) -> bool
1885
40
            where F: Fn(Element) -> bool {
1886
        use sxd_document::parser;
1887
        use crate::interface::{get_element, trim_element};
1888
1889
        
1890
40
        let test = if test.starts_with("<math") {
test0
} else {&format!("<math>{}</math>", test)};
1891
40
        let new_package = parser::parse(test);
1892
40
        if let Err(
e0
) = new_package {
1893
0
            panic!("Invalid MathML input:\n{}\nError is: {}", &test, &e.to_string());
1894
40
        }
1895
1896
40
        let new_package = new_package.unwrap();
1897
40
        let mut mathml = get_element(&new_package);
1898
40
        trim_element(mathml, false);
1899
40
        mathml = as_element(mathml.children()[0]);
1900
40
        return test_mathml(mathml);
1901
40
    }
1902
1903
    #[test]
1904
1
    fn test_noble_element() {
1905
        // mathml test strings need to be canonical MathML since we aren't testing canonicalize()
1906
1
        let test = "<mrow> <mi>Na</mi> <mo>&#x2063;</mo> <mi>Cl</mi> </mrow>"; // 
1907
1
        assert!( !parse_mathml_string(test, |mathml| has_noble_element( &collect_elements(mathml).unwrap() )) );
1908
1
        let test = "<mrow> <mi>Ar</mi> <mo>&#x2063;</mo> <mi>Cl</mi> </mrow>"; // 
1909
1
        assert!( parse_mathml_string(test, |mathml| has_noble_element( &collect_elements(mathml).unwrap() )) );
1910
1
        let test = "<mrow> <mi>Ne</mi> </mrow>"; // 
1911
1
        assert!( parse_mathml_string(test, |mathml| has_noble_element( &collect_elements(mathml).unwrap() )) );
1912
1
    }
1913
1914
    #[test]
1915
1
    fn test_alphabetical_order() {
1916
        // mathml test strings need to be canonical MathML since we aren't testing canonicalize()
1917
1
        let test = r#"<mrow>  
1918
1
            <msub><mi>C</mi><mn>6</mn></msub><mo>&#x2063;</mo> 
1919
1
            <msub><mi>H</mi><mn>14</mn></msub>
1920
1
             </mrow>"#;
1921
1
        assert!( parse_mathml_string(test, |mathml| is_alphabetical( &collect_elements(mathml).unwrap() )) );
1922
1
        let test = r#"<mrow>  
1923
1
             <msub><mi>C</mi><mn>6</mn></msub><mo>&#x2063;</mo> 
1924
1
             <msub><mi>H</mi><mn>12</mn></msub><mo>&#x2063;</mo>
1925
1
             <msub><mi>O</mi><mn>6</mn></msub>
1926
1
              </mrow>"#;
1927
1
        assert!( parse_mathml_string(test, |mathml| is_alphabetical( &collect_elements(mathml).unwrap() )) );
1928
1
        let test = "<mrow> <mi>B</mi> <mo>&#x2063;</mo> <mi>C</mi> <mo>&#x2063;</mo> <mi>O</mi></mrow>"; // "C" should be first
1929
1
        assert!( !parse_mathml_string(test, |mathml| is_alphabetical( &collect_elements(mathml).unwrap() )) );
1930
1
        let test = "<mrow> <mi>P</mi> <mo>&#x2063;</mo> <mi>B</mi> <mo>&#x2063;</mo> <mi>O</mi></mrow>"; // not alphabetical
1931
1
        assert!( !parse_mathml_string(test, |mathml| is_alphabetical( &collect_elements(mathml).unwrap() )) );
1932
1
    }
1933
1934
    #[test]
1935
1
    fn test_is_structural() {
1936
        // mathml test strings need to be canonical MathML since we aren't testing canonicalize()
1937
1
        let test = r#"<mrow>  
1938
1
            <msub><mi>C</mi><mn>6</mn></msub><mo>&#x2063;</mo> 
1939
1
            <msub><mi>H</mi><mn>14</mn></msub>
1940
1
             </mrow>"#;
1941
1
        assert!( !parse_mathml_string(test, |mathml| is_structural( &collect_elements(mathml).unwrap() )) );
1942
1
        let test = "<mrow> <mi>B</mi> <mo>&#x2063;</mo> <mi>C</mi> <mo>&#x2063;</mo> <mi>O</mi></mrow>";
1943
1
        assert!( !parse_mathml_string(test, |mathml| is_structural( &collect_elements(mathml).unwrap() )) );
1944
1
        let test = "<mrow> <mi>H</mi> <mo>&#x2063;</mo> <mi>O</mi> <mo>&#x2063;</mo> <mi>H</mi></mrow>";
1945
1
        assert!( parse_mathml_string(test, |mathml| is_structural( &collect_elements(mathml).unwrap() )) );
1946
1
        let test = "<mrow data-chem-formula='9'>
1947
1
                <mmultiscripts data-chem-formula='1'>
1948
1
                <mi mathvariant='normal' data-chem-element='1'>H</mi>
1949
1
                <mn>2</mn>
1950
1
                <none></none>
1951
1
                </mmultiscripts>
1952
1
                <mo data-changed='added'>&#x2063;</mo>
1953
1
                <mi mathvariant='normal' data-chem-element='1'>C</mi>
1954
1
                <mo data-chemical-bond='true' data-chem-formula-op='1'>=</mo>
1955
1
                <mi mathvariant='normal' data-chem-element='1'>C</mi>
1956
1
                <mo data-changed='added'>&#x2063;</mo>
1957
1
                <mmultiscripts data-chem-formula='1'>
1958
1
                <mi mathvariant='normal' data-chem-element='1'>H</mi>
1959
1
                <mn>2</mn>
1960
1
                <none></none>
1961
1
                </mmultiscripts>
1962
1
            </mrow>";
1963
1
        assert!( parse_mathml_string(test, |mathml| is_structural( &collect_elements(mathml).unwrap() )) );
1964
1
    }
1965
1966
1967
    #[test]
1968
1
    fn test_electronegativity_order() {
1969
        // mathml test strings need to be canonical MathML since we aren't testing canonicalize()
1970
1
        let test = r#"<mrow>  
1971
1
            <mi>N</mi><mo>&#x2063;</mo> 
1972
1
            <msub><mi>H</mi><mn>3</mn></msub>
1973
1
             </mrow>"#;
1974
1
        assert!( parse_mathml_string(test, |mathml| is_ordered_by_electronegativity( &collect_elements(mathml).unwrap() )) );
1975
1
        let test = r#"<mrow>  
1976
1
            <mi>O</mi><mo>&#x2063;</mo> 
1977
1
            <msub><mi>F</mi><mn>2</mn></msub>
1978
1
             </mrow>"#;
1979
1
        assert!( parse_mathml_string(test, |mathml| is_ordered_by_electronegativity( &collect_elements(mathml).unwrap() )) );
1980
1
        let test = r#"<mrow>  
1981
1
            <msub><mi>Rb</mi><mn>15</mn></msub><mo>&#x2063;</mo> 
1982
1
            <msub><mi>Hg</mi><mn>16</mn></msub>
1983
1
             </mrow>"#;
1984
1
        assert!( parse_mathml_string(test, |mathml| is_ordered_by_electronegativity( &collect_elements(mathml).unwrap() )) );
1985
1
        let test = r#" 
1986
1
            <mrow><msup>
1987
1
                <mo>[</mo>
1988
1
                    <mi>Si</mi><mo>&#x2063;</mo> 
1989
1
                    <msub><mi>As</mi><mn>4</mn></msub>
1990
1
                <mo>]</mo>
1991
1
                <mrow><mn>8</mn><mo>-</mo></mrow>
1992
1
            </msup></mrow>"#;
1993
1
        assert!( parse_mathml_string(test, |mathml| is_ordered_by_electronegativity( &collect_elements(as_element(mathml.children()[0])).unwrap() )) );
1994
1
        let test = r#"<mrow>  
1995
1
                <mi>Si</mi><mo>&#x2063;</mo> 
1996
1
                <msub><mi>H</mi><mn>2</mn></msub>
1997
1
                <mi>Br</mi><mo>&#x2063;</mo> 
1998
1
                <mi>Cl</mi>
1999
1
                </mrow>"#;
2000
1
        assert!( parse_mathml_string(test, |mathml| is_ordered_by_electronegativity( &collect_elements(mathml).unwrap() )) );
2001
1
    }
2002
2003
    #[test]
2004
1
    fn test_order() {
2005
1
        let test = r#"<mrow>  
2006
1
            <msub><mi>C</mi><mn>2</mn></msub><mo>&#x2063;</mo> 
2007
1
            <msub><mi>H</mi><mn>4</mn></msub><mo>&#x2063;</mo>
2008
1
            <msub><mrow> <mo>(</mo><mi>N</mi> <mo>&#x2063;</mo> <msub> <mi>H</mi> <mn>2</mn> </msub><mo>)</mo> </mrow><mn>2</mn></msub>
2009
1
             </mrow>"#;
2010
1
        assert!( parse_mathml_string(test, is_order_ok) );
2011
1
        let test = r#"<mrow>
2012
1
            <mi>Fe</mi><mo>&#x2063;</mo> 
2013
1
            <mi>O</mi><mo>&#x2063;</mo> 
2014
1
            <mrow> <mo>(</mo><mrow><mi>O</mi> <mo>&#x2063;</mo><mi>H</mi> </mrow><mo>)</mo> </mrow>
2015
1
             </mrow>"#;
2016
1
        assert!( parse_mathml_string(test, is_order_ok) );
2017
1
        let test = r#"<mrow>  // R-4.4.3.3 -- Chain compound doesn't fit rules but should be accepted
2018
1
                <mi>Br</mi><mo>&#x2063;</mo> 
2019
1
                <mi>S</mi><mo>&#x2063;</mo> 
2020
1
                <mi>C</mi><mo>&#x2063;</mo> 
2021
1
                <mi>N</mi>
2022
1
                </mrow>"#;
2023
1
        assert!( parse_mathml_string(test, |mathml| likely_chem_formula(mathml)==5) );
2024
1
    }
2025
2026
    #[test]
2027
1
    fn test_simple_double_bond() {
2028
1
        let test1 = r#"<mrow><mi>C</mi><mo>=</mo><mi>C</mi></mrow>"#;
2029
1
        assert!( parse_mathml_string(test1, |mathml| likely_chem_formula(mathml) < CHEMISTRY_THRESHOLD) ); // just under threshold
2030
1
        let test2 = r#"<mrow><mi>C</mi><mo>∷</mo><mi>O</mi></mrow>"#;
2031
1
        assert!( parse_mathml_string(test2, |mathml| likely_chem_formula(mathml)==CHEMISTRY_THRESHOLD) );
2032
1
        let test3 = r#"<mrow><mi>N</mi><mo>=</mo><mi>N</mi></mrow>"#;
2033
1
        assert!( parse_mathml_string(test3, |mathml| likely_chem_formula(mathml) < CHEMISTRY_THRESHOLD) ); // just under threshold
2034
1
        let test4 = r#"<mrow><mi>Sn</mi><mo>=</mo><mi>Sn</mi></mrow>"#;
2035
1
        assert!( parse_mathml_string(test4, |mathml| likely_chem_formula(mathml) == 8) );
2036
1
        let test5 = r#"<mrow><mi>O</mi><mo>=</mo><mi>S</mi></mrow>"#;
2037
1
        assert!( parse_mathml_string(test5, |mathml| likely_chem_formula(mathml) < CHEMISTRY_THRESHOLD) );  // just under threshold
2038
1
        let test10 = r#"<mrow><mi>K</mi><mo>=</mo><mi>K</mi></mrow>"#;
2039
1
        assert!( parse_mathml_string(test10, |mathml| likely_chem_formula(mathml) == NOT_CHEMISTRY) );
2040
1
        let test11 = r#"<mrow><mi>C</mi><mo>=</mo><mi>K</mi></mrow>"#;
2041
1
        assert!( parse_mathml_string(test11, |mathml| likely_chem_formula(mathml) == NOT_CHEMISTRY) );
2042
1
    }
2043
2044
    #[test]
2045
1
    fn test_double_bond() {
2046
1
        let test1 = r#"<mrow><mi mathvariant='normal'>C</mi><msub><mi mathvariant='normal'>H</mi><mn>2</mn></msub><mo>=</mo><mi>C</mi></mrow>"#;
2047
1
        assert!( parse_mathml_string(test1, |mathml| likely_chem_formula(mathml)==8) );
2048
1
        let test2 = r#"<mrow><mi mathvariant='normal'>C</mi><msub><mi mathvariant='normal'>H</mi><mn>2</mn></msub><mo>=</mo>
2049
1
        <mi>C</mi><mi>H</mi><mi>R</mi></mrow>"#;
2050
1
        assert!( parse_mathml_string(test2, |mathml| likely_chem_formula(mathml)==12) );
2051
1
        let test3 = r#"<mrow><msub><mi mathvariant='normal'>H</mi><mn>2</mn></msub><mi mathvariant='normal'>C</mi><mo>=</mo>
2052
1
                <mi>C</mi><msub><mi mathvariant='normal'>H</mi><mn>2</mn></msub></mrow>"#;
2053
1
        assert!( parse_mathml_string(test3, |mathml| likely_chem_formula(mathml)==11) );
2054
1
        let test4 = r#"<mrow><mi>H</mi><mo>-</mo><mi>N</mi><mo>=</mo><mi>N</mi><mo>-</mo><mi>H</mi></mrow>"#;
2055
1
        assert!( parse_mathml_string(test4, |mathml| likely_chem_formula(mathml)==10) );
2056
1
        let test10 = r#"<mrow><mi mathvariant='normal'>C</mi><msub><mi mathvariant='normal'>H</mi><mn>3</mn></msub><mo>=</mo><mi>C</mi></mrow>"#;
2057
1
        assert!( parse_mathml_string(test10, |mathml| likely_chem_formula(mathml)==NOT_CHEMISTRY) );
2058
1
    }
2059
2060
    #[test]
2061
    #[ignore]   // It would be good to say "not chemistry" for this, but there aren't rules for that at the moment
2062
0
    fn test_water_bond() {
2063
0
        let test11 = r#"<mrow><msub><mi mathvariant='normal'>H</mi><mn>2</mn></msub><mi mathvariant='normal'>O</mi><mo>=</mo><mi>O</mi></mrow>"#;
2064
0
        assert!( parse_mathml_string(test11, |mathml| {println!("val={}", likely_chem_formula(mathml)); likely_chem_formula(mathml)==8}) );
2065
        // assert!( parse_mathml_string(test11, |mathml| likely_chem_formula(mathml)==NOT_CHEMISTRY) );
2066
0
    }
2067
2068
2069
    #[test]
2070
1
    fn test_triple_bond() {
2071
1
        let test1 = r#"<mrow><mi>C</mi><mo>≡</mo><mi>C</mi></mrow>"#;
2072
1
        assert!( parse_mathml_string(test1, |mathml| likely_chem_formula(mathml) < CHEMISTRY_THRESHOLD) );
2073
1
        let test2 = r#"<mrow><mi>C</mi><mo>:::</mo><mi>O</mi></mrow>"#;
2074
1
        assert!( parse_mathml_string(test2, |mathml| likely_chem_formula(mathml)==CHEMISTRY_THRESHOLD) );
2075
1
        let test3 = r#"<mrow><mi>H</mi><mo>-</mo><mi>C</mi><mo>≡</mo><mi>C</mi><mo>-</mo><mi>H</mi></mrow>"#;
2076
1
        assert!( parse_mathml_string(test3, |mathml| likely_chem_formula(mathml)==10) );
2077
1
        let test4 = r#"<mrow><mi>H</mi><mo>-</mo><mi>C</mi><mo>≡</mo><mi>C</mi><mo>-</mo><mi>H</mi></mrow>"#;
2078
1
        assert!( parse_mathml_string(test4, |mathml| likely_chem_formula(mathml)==10) );
2079
1
        let test5 = r#"<mrow><mi>N</mi><mo>-</mo><mi>C</mi><mo>≡</mo><mi>C</mi><mo>-</mo><mi>N</mi></mrow>"#;
2080
1
        assert!( parse_mathml_string(test5, |mathml| likely_chem_formula(mathml)==10) );
2081
1
        let test6 = r#"<mrow><mi>H</mi><mo>-</mo><mi>C</mi><mo>≡</mo>
2082
1
            <mi>C</mi><mo>-</mo><mi mathvariant='normal'>C</mi><msub><mi mathvariant='normal'>H</mi><mn>3</mn></msub></mrow>"#; // 1-Propyne
2083
1
        assert!( parse_mathml_string(test6, |mathml| likely_chem_formula(mathml)==14) );
2084
        // assert!( parse_mathml_string(test6, |mathml| {println!("val={}", likely_chem_formula(mathml)); likely_chem_formula(mathml)==10}) );
2085
1
        let test10 = r#"<mrow><mi>O</mi><mo>:::</mo><mi>S</mi></mrow>"#;
2086
1
        assert!( parse_mathml_string(test10, |mathml| likely_chem_formula(mathml)==NOT_CHEMISTRY) );
2087
1
        let test11 = r#"<mrow><mi>Pb</mi><mo>≡</mo><mi>Pb</mi></mrow>"#;
2088
1
        assert!( parse_mathml_string(test11, |mathml| likely_chem_formula(mathml)==NOT_CHEMISTRY) );
2089
1
        let test12 = r#"<mrow><mi>C</mi><mo>≡</mo><mi>K</mi></mrow>"#;
2090
1
        assert!( parse_mathml_string(test12, |mathml| likely_chem_formula(mathml)==NOT_CHEMISTRY) );
2091
1
    }
2092
2093
    #[test]
2094
1
    fn split_mi() {
2095
1
        let test = "<math><mi>LiF</mi></math>";
2096
1
        let target = "<math>
2097
1
            <mrow data-changed='added' data-chem-formula='5'>
2098
1
                <mi data-chem-element='3'>Li</mi>
2099
1
                <mo data-changed='added' data-chem-formula-op='0'>&#x2063;</mo>
2100
1
                <mi mathvariant='normal' data-split='true' data-chem-element='1'>F</mi>
2101
1
            </mrow>
2102
1
       </math>";
2103
1
        assert!(are_strs_canonically_equal(test, target, &[]));
2104
1
    }
2105
2106
    #[test]
2107
1
    fn no_split_mi() {
2108
1
        let test = "<math><mi>HC</mi></math>";
2109
1
        let target = "<math>
2110
1
             <mi>HC</mi>
2111
1
        </math>";
2112
1
        assert!(are_strs_canonically_equal(test, target, &[]));
2113
1
    }
2114
2115
    #[test]
2116
1
    fn combine_mi() {
2117
1
        let test = "<math><mi>H</mi><mi>C</mi><mi>l</mi></math>";
2118
1
        let target = " <math>
2119
1
            <mrow data-changed='added' data-chem-formula='5'>
2120
1
            <mi data-chem-element='1'>H</mi>
2121
1
            <mo data-changed='added' data-chem-formula-op='0'>&#x2063;</mo>
2122
1
            <mi data-merged='true' data-chem-element='3'>Cl</mi>
2123
1
            </mrow>
2124
1
        </math>";
2125
1
        assert!(are_strs_canonically_equal(test, target, &[]));
2126
1
    }
2127
2128
    #[test]
2129
1
    fn no_combine() {
2130
1
        let test = "<math><mi>C</mi><mi>l</mi></math>";
2131
1
        let target = "<math>
2132
1
            <mrow data-changed='added'>
2133
1
                <mi>C</mi>
2134
1
                <mo data-changed='added'>&#x2062;</mo>
2135
1
                <mi>l</mi>
2136
1
            </mrow>
2137
1
        </math>";
2138
1
        assert!(are_strs_canonically_equal(test, target, &[]));
2139
1
    }
2140
2141
    #[test]
2142
1
    fn add_script() {
2143
1
        let test = "<math> <mi>SO</mi>  <msub> <mrow></mrow> <mn>2</mn> </msub> </math>";
2144
1
        let target = "<math>
2145
1
            <mrow data-changed='added' data-chem-formula='5'>
2146
1
                <mi mathvariant='normal' data-chem-element='1'>S</mi>
2147
1
                <mo data-changed='added' data-chem-formula-op='0'>&#x2063;</mo>
2148
1
                <mmultiscripts data-chem-formula='2'>
2149
1
                    <mi mathvariant='normal' data-split='true' data-chem-element='1'>O</mi>
2150
1
                    <mn>2</mn>
2151
1
                    <none></none>
2152
1
                </mmultiscripts>
2153
1
            </mrow>
2154
1
       </math>";
2155
1
        assert!(are_strs_canonically_equal(test, target, &[]));
2156
1
    }
2157
2158
    #[test]
2159
1
    fn add_script_bug_287() {
2160
1
        let test = r#"<math><mrow>
2161
1
            <msubsup>
2162
1
                <mrow><mi mathvariant="normal">SO</mi></mrow>
2163
1
                <mn>4</mn>
2164
1
                <mrow><mn>2</mn><mo>&#x2212;</mo></mrow>
2165
1
            </msubsup>
2166
1
            </mrow></math>"#;
2167
1
        let target = r#"<math>
2168
1
            <mrow data-changed='added' data-chem-formula='7'>
2169
1
                <mi mathvariant='normal' data-chem-element='1'>S</mi>
2170
1
                <mo data-changed='added' data-chem-formula-op='0'>&#x2063;</mo>
2171
1
                <msubsup data-chem-formula='5'>
2172
1
                    <mi mathvariant='normal' data-split='true' data-chem-element='1'>O</mi>
2173
1
                    <mn>4</mn>
2174
1
                    <mrow data-chem-formula='3'><mn>2</mn><mo>-</mo></mrow>
2175
1
                </msubsup>
2176
1
            </mrow>
2177
1
            </math>"#;
2178
1
        assert!(are_strs_canonically_equal(test, target, &[]));
2179
1
    }
2180
2181
    #[test]
2182
1
    fn salt() {
2183
1
        let test = "<math><mi>Na</mi><mi>Cl</mi></math>";
2184
1
        let target = "<math>
2185
1
            <mrow data-changed='added' data-chem-formula='7'>
2186
1
                <mi data-chem-element='3'>Na</mi>
2187
1
                <mo data-changed='added' data-chem-formula-op='0'>&#x2063;</mo>
2188
1
                <mi data-chem-element='3'>Cl</mi>
2189
1
            </mrow>
2190
1
        </math>";
2191
1
        assert!(are_strs_canonically_equal(test, target, &[]));
2192
1
    }
2193
2194
    #[test]
2195
1
    fn water() {
2196
1
        let test = "<math><msub><mi mathvariant='normal'>H</mi><mn>2</mn></msub><mi mathvariant='normal'>O</mi></math>";
2197
1
        let target = "<math>
2198
1
            <mrow data-changed='added' data-chem-formula='5'>
2199
1
                <msub data-chem-formula='2'>
2200
1
                    <mi mathvariant='normal' data-chem-element='2'>H</mi>
2201
1
                    <mn>2</mn>
2202
1
                </msub>
2203
1
                <mo data-changed='added' data-chem-formula-op='0'>&#x2063;</mo>
2204
1
                <mi mathvariant='normal' data-chem-element='2'>O</mi>
2205
1
            </mrow>
2206
1
        </math>";
2207
1
        assert!(are_strs_canonically_equal(test, target, &[]));
2208
1
    }
2209
2210
    #[test]
2211
1
    fn mhchem_water() {
2212
1
        let test = "<math>
2213
1
            <mrow>
2214
1
            <mrow>
2215
1
                <mi mathvariant='normal'>H</mi>
2216
1
            </mrow>
2217
1
            <msub>
2218
1
                <mrow>
2219
1
                <mrow>
2220
1
                    <mpadded width='0'>
2221
1
                    <mphantom>
2222
1
                        <mi>A</mi>
2223
1
                    </mphantom>
2224
1
                    </mpadded>
2225
1
                </mrow>
2226
1
                </mrow>
2227
1
                <mrow>
2228
1
                <mrow>
2229
1
                    <mpadded height='0'>
2230
1
                    <mn>2</mn>
2231
1
                    </mpadded>
2232
1
                </mrow>
2233
1
                </mrow>
2234
1
            </msub>
2235
1
            <mrow>
2236
1
                <mi mathvariant='normal'>O</mi>
2237
1
            </mrow>
2238
1
            </mrow>
2239
1
        </math>";
2240
1
        let target = "<math>
2241
1
            <mrow data-chem-formula='5'>
2242
1
                <mmultiscripts data-chem-formula='2'>
2243
1
                    <mi mathvariant='normal' data-chem-element='2'>H</mi>
2244
1
                    <mn>2</mn>
2245
1
                    <none></none>
2246
1
                </mmultiscripts>
2247
1
                <mo data-changed='added' data-chem-formula-op='0'>&#x2063;</mo>
2248
1
                <mi mathvariant='normal' data-chem-element='2'>O</mi>
2249
1
            </mrow>
2250
1
       </math>";
2251
1
        assert!(are_strs_canonically_equal(test, target, &[]));
2252
1
    }
2253
2254
    #[test]
2255
1
    fn carbon() {
2256
1
        let test = "<math><mi>C</mi></math>";     // not enough to trigger recognition
2257
1
        let target = " <math>
2258
1
            <mi>C</mi>
2259
1
        </math>";
2260
1
        assert!(are_strs_canonically_equal(test, target, &[]));
2261
1
    }
2262
2263
    #[test]
2264
1
    fn sulfate() {
2265
1
        let test = "<math><mrow><msup>
2266
1
                <mrow><mo>[</mo><mi>S</mi><msub><mi>O</mi><mn>4</mn></msub><mo>]</mo></mrow>
2267
1
                <mrow><mn>2</mn><mo>&#x2212;</mo></mrow>
2268
1
            </msup></mrow></math>";
2269
1
        let target = "<math>
2270
1
        <msup data-chem-formula='9'>
2271
1
          <mrow data-chem-formula='6'>
2272
1
            <mo>[</mo>
2273
1
            <mrow data-changed='added' data-chem-formula='3'>
2274
1
              <mi data-chem-element='1'>S</mi>
2275
1
              <mo data-changed='added'>&#x2063;</mo>
2276
1
              <msub data-chem-formula='1'>
2277
1
                <mi data-chem-element='1'>O</mi>
2278
1
                <mn>4</mn>
2279
1
              </msub>
2280
1
            </mrow>
2281
1
            <mo>]</mo>
2282
1
          </mrow>
2283
1
          <mrow data-chem-formula='3'>
2284
1
            <mn>2</mn>
2285
1
            <mo>-</mo>
2286
1
          </mrow>
2287
1
        </msup>
2288
1
       </math>";
2289
1
        assert!(are_strs_canonically_equal(test, target, &[]));
2290
1
    }
2291
2292
    #[test]
2293
1
    fn aluminum_sulfate() {
2294
1
        let test = "<math><mrow><msub><mi>Al</mi><mn>2</mn></msub>
2295
1
                <msub><mrow><mo>(</mo><mi>S</mi><msub><mi>O</mi><mn>4</mn></msub><mo>)</mo></mrow><mn>3</mn></msub></mrow></math>";
2296
1
        let target = " <math>
2297
1
                <mrow data-chem-formula='10'>
2298
1
                    <msub data-chem-formula='3'>
2299
1
                        <mi data-chem-element='3'>Al</mi>
2300
1
                        <mn>2</mn>
2301
1
                    </msub>
2302
1
                    <mo data-changed='added' data-chem-formula-op='0'>&#x2063;</mo>
2303
1
                    <msub data-chem-formula='6'>
2304
1
                        <mrow data-chem-formula='6'>
2305
1
                        <mo>(</mo>
2306
1
                        <mrow data-changed='added' data-chem-formula='3'>
2307
1
                            <mi data-chem-element='1'>S</mi>
2308
1
                            <mo data-changed='added'>&#x2063;</mo>
2309
1
                            <msub data-chem-formula='1'>
2310
1
                            <mi data-chem-element='1'>O</mi>
2311
1
                            <mn>4</mn>
2312
1
                            </msub>
2313
1
                        </mrow>
2314
1
                        <mo>)</mo>
2315
1
                        </mrow>
2316
1
                        <mn>3</mn>
2317
1
                    </msub>
2318
1
                </mrow>
2319
1
            </math>";
2320
1
        assert!(are_strs_canonically_equal(test, target, &[]));
2321
1
    }
2322
2323
    #[test]
2324
1
    fn ethanol_bonds() {
2325
1
        let test = "<math>
2326
1
                <mrow>
2327
1
                    <mi>C</mi>
2328
1
                    <msub>  <mi>H</mi> <mn>3</mn> </msub>
2329
1
                    <mo>&#x2212;</mo>
2330
1
                    <mi>C</mi>
2331
1
                    <msub>  <mi>H</mi> <mn>2</mn> </msub>
2332
1
                    <mo>&#x2212;</mo>
2333
1
                    <mi>O</mi>
2334
1
                    <mi>H</mi>
2335
1
                </mrow>
2336
1
            </math>";
2337
1
        let target = "<math>
2338
1
        <mrow data-chem-formula='13'>
2339
1
          <mi data-chem-element='1'>C</mi>
2340
1
          <mo data-changed='added' data-chem-formula-op='0'>&#x2063;</mo>
2341
1
          <msub data-chem-formula='1'>
2342
1
            <mi data-chem-element='1'>H</mi>
2343
1
            <mn>3</mn>
2344
1
          </msub>
2345
1
          <mo data-chemical-bond='true' data-chem-formula-op='1'>-</mo>
2346
1
          <mi data-chem-element='1'>C</mi>
2347
1
          <mo data-changed='added' data-chem-formula-op='0'>&#x2063;</mo>
2348
1
          <msub data-chem-formula='1'>
2349
1
            <mi data-chem-element='1'>H</mi>
2350
1
            <mn>2</mn>
2351
1
          </msub>
2352
1
          <mo data-chemical-bond='true' data-chem-formula-op='1'>-</mo>
2353
1
          <mi data-chem-element='1'>O</mi>
2354
1
          <mo data-changed='added' data-chem-formula-op='0'>&#x2063;</mo>
2355
1
          <mi data-chem-element='1'>H</mi>
2356
1
        </mrow>
2357
1
       </math>";
2358
1
        assert!(are_strs_canonically_equal(test, target, &[]));
2359
1
    }
2360
2361
    #[test]
2362
1
    fn dichlorine_hexoxide() {
2363
        // init_logger();
2364
1
        let test = "<math><mrow>
2365
1
            <msup>
2366
1
            <mrow><mo>[</mo><mi>Cl</mi><msub><mi>O</mi><mn>2</mn></msub><mo>]</mo></mrow>
2367
1
            <mo>+</mo>
2368
1
            </msup>
2369
1
            <msup>
2370
1
            <mrow><mo>[</mo><mi>Cl</mi><msub><mi>O</mi><mn>4</mn></msub><mo>]</mo></mrow>
2371
1
            <mo>-</mo>
2372
1
            </msup>
2373
1
        </mrow></math>";
2374
1
        let target = "<math>
2375
1
            <mrow data-chem-formula='19'>
2376
1
                <msup data-chem-formula='9'>
2377
1
                    <mrow data-chem-formula='8'>
2378
1
                    <mo>[</mo>
2379
1
                    <mrow data-changed='added' data-chem-formula='5'>
2380
1
                        <mi data-chem-element='3'>Cl</mi>
2381
1
                        <mo data-changed='added'>&#x2063;</mo>
2382
1
                        <msub data-chem-formula='1'>
2383
1
                        <mi data-chem-element='1'>O</mi>
2384
1
                        <mn>2</mn>
2385
1
                        </msub>
2386
1
                    </mrow>
2387
1
                    <mo>]</mo>
2388
1
                    </mrow>
2389
1
                    <mo>+</mo>
2390
1
                </msup>
2391
1
                <mo data-changed='added' data-chem-formula-op='0'>&#x2063;</mo>
2392
1
                <msup data-chem-formula='9'>
2393
1
                    <mrow data-chem-formula='8'>
2394
1
                    <mo>[</mo>
2395
1
                    <mrow data-changed='added' data-chem-formula='5'>
2396
1
                        <mi data-chem-element='3'>Cl</mi>
2397
1
                        <mo data-changed='added'>&#x2063;</mo>
2398
1
                        <msub data-chem-formula='1'>
2399
1
                        <mi data-chem-element='1'>O</mi>
2400
1
                        <mn>4</mn>
2401
1
                        </msub>
2402
1
                    </mrow>
2403
1
                    <mo>]</mo>
2404
1
                    </mrow>
2405
1
                    <mo>-</mo>
2406
1
                </msup>
2407
1
            </mrow>
2408
1
       </math>";
2409
1
        assert!(are_strs_canonically_equal(test, target, &[]));
2410
1
    }
2411
2412
    #[test]
2413
1
    fn ethylene_with_bond() {
2414
1
        let test = "<math><mrow>
2415
1
                <msub><mi>H</mi><mn>2</mn></msub><mi>C</mi>
2416
1
                <mo>=</mo>
2417
1
                <mi>C</mi><msub><mi>H</mi><mn>2</mn></msub>
2418
1
            </mrow></math>";
2419
1
        let target = "<math>
2420
1
            <mrow data-chem-formula='8'>
2421
1
                <msub data-chem-formula='1'>
2422
1
                    <mi data-chem-element='1'>H</mi>
2423
1
                    <mn>2</mn>
2424
1
                </msub>
2425
1
                <mo data-changed='added' data-chem-formula-op='0'>&#x2063;</mo>
2426
1
                <mi data-chem-element='1'>C</mi>
2427
1
                <mo data-chemical-bond='true' data-chem-formula-op='1'>=</mo>
2428
1
                <mi data-chem-element='1'>C</mi>
2429
1
                <mo data-changed='added' data-chem-formula-op='0'>&#x2063;</mo>
2430
1
                <msub data-chem-formula='1'>
2431
1
                    <mi data-chem-element='1'>H</mi>
2432
1
                    <mn>2</mn>
2433
1
                </msub>
2434
1
            </mrow>
2435
1
        </math>";
2436
1
        assert!(are_strs_canonically_equal(test, target, &[]));
2437
1
    }
2438
2439
    #[test]
2440
1
    fn ferric_chloride_aq() {
2441
1
        let test = "<math><mrow>
2442
1
            <mi>Fe</mi>
2443
1
            <msub><mi>Cl</mi><mn>3</mn></msub>
2444
1
            <mrow><mo>(</mo><mrow><mi>aq</mi></mrow><mo>)</mo></mrow>
2445
1
        </mrow></math>";
2446
1
        let target = "<math>
2447
1
            <mrow data-chem-formula='11'>
2448
1
                <mi data-chem-element='3'>Fe</mi>
2449
1
                <mo data-changed='added' data-chem-formula-op='0'>&#x2063;</mo>
2450
1
                <msub data-chem-formula='3'>
2451
1
                    <mi data-chem-element='3'>Cl</mi>
2452
1
                    <mn>3</mn>
2453
1
                </msub>
2454
1
                <mo data-changed='added' data-chem-formula-op='0'>&#x2063;</mo>
2455
1
                <mrow data-chem-formula='3'>
2456
1
                    <mo>(</mo>
2457
1
                    <mi>aq</mi>
2458
1
                    <mo>)</mo>
2459
1
                </mrow>
2460
1
            </mrow>
2461
1
       </math>";
2462
1
        assert!(are_strs_canonically_equal(test, target, &[]));
2463
1
    }
2464
2465
    #[test]
2466
1
    fn ferric_chloride_aq_as_mi() {
2467
1
        let test = "<math><mrow>
2468
1
            <mi>Fe</mi>
2469
1
            <msub><mi>Cl</mi><mn>3</mn></msub>
2470
1
            <mi>(aq)</mi>
2471
1
        </mrow></math>";
2472
1
        let target = "<math>
2473
1
            <mrow data-chem-formula='11'>
2474
1
                <mi data-chem-element='3'>Fe</mi>
2475
1
                <mo data-changed='added' data-chem-formula-op='0'>&#x2063;</mo>
2476
1
                <msub data-chem-formula='3'>
2477
1
                    <mi data-chem-element='3'>Cl</mi>
2478
1
                    <mn>3</mn>
2479
1
                </msub>
2480
1
                <mo data-changed='added' data-chem-formula-op='0'>&#x2063;</mo>
2481
1
                <mrow data-chem-formula='3'>
2482
1
                    <mo>(</mo>
2483
1
                    <mi>aq</mi>
2484
1
                    <mo>)</mo>
2485
1
                </mrow>
2486
1
            </mrow>
2487
1
        </math>";
2488
1
        assert!(are_strs_canonically_equal(test, target, &[]));
2489
1
    }
2490
2491
    #[test]
2492
1
    fn chemtype_ammonia() {
2493
1
        let test = r#"<math><msub><mi>NH</mi><mn>3</mn></msub></math>"#;
2494
1
        let target = " <math>
2495
1
            <mrow data-changed='added' data-chem-formula='5'>
2496
1
            <mi mathvariant='normal' data-chem-element='1'>N</mi>
2497
1
            <mo data-changed='added' data-chem-formula-op='0'>&#x2063;</mo>
2498
1
            <msub data-chem-formula='2'>
2499
1
                <mi mathvariant='normal' data-chem-element='1' data-split='true'>H</mi>
2500
1
                <mn>3</mn>
2501
1
            </msub>
2502
1
            </mrow>
2503
1
        </math>";
2504
1
        assert!(are_strs_canonically_equal(test, target, &[]));
2505
1
    }
2506
2507
    #[test]
2508
1
    fn mhchem_ammonia() {
2509
1
        let test = r#"<math>
2510
1
            <mrow>
2511
1
                <mi data-mjx-auto-op="false">NH</mi>
2512
1
                <msub>
2513
1
                    <mpadded width="0">
2514
1
                    <mphantom>
2515
1
                        <mi>A</mi>
2516
1
                    </mphantom>
2517
1
                    </mpadded>
2518
1
                    <mpadded height="0">
2519
1
                    <mn>3</mn>
2520
1
                    </mpadded>
2521
1
                </msub>
2522
1
            </mrow>
2523
1
        </math>"#;
2524
1
        let target = "<math>
2525
1
            <mrow data-chem-formula='5'>
2526
1
                <mi mathvariant='normal' data-chem-element='1'>N</mi>
2527
1
                <mo data-changed='added' data-chem-formula-op='0'>&#x2063;</mo>
2528
1
                <mmultiscripts data-mjx-auto-op='false' data-chem-formula='2'>
2529
1
                <mi mathvariant='normal' data-mjx-auto-op='false' data-split='true' data-chem-element='1'>H</mi>
2530
1
                <mn>3</mn>
2531
1
                <none></none>
2532
1
                </mmultiscripts>
2533
1
            </mrow>
2534
1
            </math>";
2535
1
        assert!(are_strs_canonically_equal(test, target, &[]));
2536
1
    }
2537
2538
    #[test]
2539
1
    fn mhchem_so4() {
2540
1
        let test = "<math>
2541
1
            <mrow>
2542
1
            <mi>SO</mi>
2543
1
            <msub>
2544
1
                <mpadded width='0'>
2545
1
                <mphantom>
2546
1
                    <mi>A</mi>
2547
1
                </mphantom>
2548
1
                </mpadded>
2549
1
                <mpadded height='0'>
2550
1
                <mn>4</mn>
2551
1
                </mpadded>
2552
1
            </msub>
2553
1
            <msup>
2554
1
                <mpadded width='0'>
2555
1
                <mphantom>
2556
1
                    <mi>A</mi>
2557
1
                </mphantom>
2558
1
                </mpadded>
2559
1
                <mrow>
2560
1
                <mn>2</mn>
2561
1
                <mo>&#x2212;</mo>
2562
1
                </mrow>
2563
1
            </msup>
2564
1
            </mrow>
2565
1
        </math>";
2566
1
        let target = "<math>
2567
1
            <mrow data-chem-formula='7'>
2568
1
                <mi mathvariant='normal' data-chem-element='1'>S</mi>
2569
1
                <mo data-changed='added' data-chem-formula-op='0'>&#x2063;</mo>
2570
1
                <mmultiscripts data-chem-formula='5'>
2571
1
                    <mi mathvariant='normal' data-split='true' data-chem-element='1'>O</mi>
2572
1
                    <mn>4</mn>
2573
1
                    <none/>
2574
1
                    <none/>
2575
1
                    <mrow data-chem-formula='3'>
2576
1
                    <mn>2</mn>
2577
1
                    <mo>-</mo>
2578
1
                    </mrow>
2579
1
                </mmultiscripts>
2580
1
            </mrow>
2581
1
       </math>";
2582
1
        assert!(are_strs_canonically_equal(test, target, &[]));
2583
1
    }
2584
2585
    #[test]
2586
1
    fn mhchem_short_ion() {
2587
1
        let test = "  <math>
2588
1
                <mrow>
2589
1
                <mi mathvariant='normal'>H</mi>
2590
1
                <msub>
2591
1
                    <mpadded width='0'> <mphantom> <mi>A</mi> </mphantom>  </mpadded>
2592
1
                    <mpadded height='0'> <mn>3</mn></mpadded>
2593
1
                </msub>
2594
1
                <mi mathvariant='normal'>O</mi>
2595
1
                <msup>
2596
1
                    <mpadded width='0'> <mphantom> <mi>A</mi> </mphantom>  </mpadded>
2597
1
                    <mo>+</mo>
2598
1
                </msup>
2599
1
                </mrow>
2600
1
            </math>";
2601
1
        let target = "<math>
2602
1
            <mrow data-chem-formula='6'>
2603
1
                <mmultiscripts data-chem-formula='2'>
2604
1
                    <mi mathvariant='normal' data-chem-element='2'>H</mi>
2605
1
                    <mn>3</mn>
2606
1
                    <none></none>
2607
1
                </mmultiscripts>
2608
1
                <mo data-changed='added' data-chem-formula-op='0'>&#x2063;</mo>
2609
1
                <mmultiscripts data-chem-formula='3'>
2610
1
                    <mi mathvariant='normal' data-chem-element='2'>O</mi>
2611
1
                    <none></none>
2612
1
                    <mo>+</mo>
2613
1
                </mmultiscripts>
2614
1
            </mrow>
2615
1
       </math>";
2616
1
        assert!(are_strs_canonically_equal(test, target, &[]));
2617
1
    }
2618
2619
    #[test]
2620
1
    fn mhchem_ions_and_state() {
2621
1
        let test = "<math>
2622
1
            <mrow>
2623
1
            <mrow>
2624
1
                <mi>Na</mi>
2625
1
            </mrow>
2626
1
            <msup>
2627
1
                <mrow>
2628
1
                <mrow>
2629
1
                    <mpadded width='0'>
2630
1
                    <mphantom>
2631
1
                        <mi>A</mi>
2632
1
                    </mphantom>
2633
1
                    </mpadded>
2634
1
                </mrow>
2635
1
                </mrow>
2636
1
                <mrow>
2637
1
                <mo>+</mo>
2638
1
                </mrow>
2639
1
            </msup>
2640
1
            <mo stretchy='false'>(</mo>
2641
1
            <mrow>
2642
1
                <mi>aq</mi>
2643
1
            </mrow>
2644
1
            <mo stretchy='false'>)</mo>
2645
1
            <mrow>
2646
1
                <mi>Cl</mi>
2647
1
            </mrow>
2648
1
            <msup>
2649
1
                <mrow>
2650
1
                <mrow>
2651
1
                    <mpadded width='0'>
2652
1
                    <mphantom>
2653
1
                        <mi>A</mi>
2654
1
                    </mphantom>
2655
1
                    </mpadded>
2656
1
                </mrow>
2657
1
                </mrow>
2658
1
                <mrow>
2659
1
                <mo>&#x2212;</mo>
2660
1
                </mrow>
2661
1
            </msup>
2662
1
            <mspace width='0.111em'></mspace>
2663
1
            <mo stretchy='false'>(</mo>
2664
1
            <mrow>
2665
1
                <mi>aq</mi>
2666
1
            </mrow>
2667
1
            <mo stretchy='false'>)</mo>
2668
1
            </mrow>
2669
1
            </math>";
2670
1
        let target = "<math>
2671
1
            <mrow data-chem-formula='18'>
2672
1
                <mmultiscripts data-chem-formula='4'>
2673
1
                    <mi data-chem-element='3'>Na</mi>
2674
1
                    <none></none>
2675
1
                    <mo>+</mo>
2676
1
                </mmultiscripts>
2677
1
                <mo data-changed='added' data-chem-formula-op='0'>&#x2063;</mo>
2678
1
                <mrow data-changed='added' data-chem-formula='3'>
2679
1
                    <mo stretchy='false'>(</mo>
2680
1
                    <mi>aq</mi>
2681
1
                    <mo stretchy='false'>)</mo>
2682
1
                </mrow>
2683
1
                <mo data-changed='added' data-chem-formula-op='0'>&#x2063;</mo>
2684
1
                <mmultiscripts data-chem-formula='5'>
2685
1
                    <mi data-chem-element='3'>Cl</mi>
2686
1
                    <none></none>
2687
1
                    <mo>-</mo>
2688
1
                </mmultiscripts>
2689
1
                <mo data-changed='added' data-chem-formula-op='0'>&#x2063;</mo>
2690
1
                <mrow data-changed='added' data-chem-formula='3'>
2691
1
                    <mo stretchy='false' data-previous-space-width='0.111'>(</mo>
2692
1
                    <mi>aq</mi>
2693
1
                    <mo stretchy='false'>)</mo>
2694
1
                </mrow>
2695
1
            </mrow>
2696
1
        </math>";
2697
1
        assert!(are_strs_canonically_equal(test, target, &[]));
2698
1
    }
2699
2700
    #[test]
2701
1
    fn ethylene_with_colon_bond() {
2702
1
        let test = "<math><mrow>
2703
1
                <msub><mi>H</mi><mn>2</mn></msub><mi>C</mi>
2704
1
                <mo>::</mo>
2705
1
                <mi>C</mi><msub><mi>H</mi><mn>2</mn></msub>
2706
1
            </mrow></math>";
2707
1
        let target = "<math>
2708
1
            <mrow data-chem-formula='8'>
2709
1
                <msub data-chem-formula='1'>
2710
1
                    <mi data-chem-element='1'>H</mi>
2711
1
                    <mn>2</mn>
2712
1
                </msub>
2713
1
                <mo data-changed='added' data-chem-formula-op='0'>&#x2063;</mo>
2714
1
                <mi data-chem-element='1'>C</mi>
2715
1
                <mo data-chemical-bond='true' data-chem-formula-op='1'>∷</mo>
2716
1
                <mi data-chem-element='1'>C</mi>
2717
1
                <mo data-changed='added' data-chem-formula-op='0'>&#x2063;</mo>
2718
1
                <msub data-chem-formula='1'>
2719
1
                    <mi data-chem-element='1'>H</mi>
2720
1
                    <mn>2</mn>
2721
1
                </msub>
2722
1
            </mrow>
2723
1
        </math>";
2724
1
        assert!(are_strs_canonically_equal(test, target, &[]));
2725
1
    }
2726
2727
    #[test]
2728
1
    fn mhchem_u238() {
2729
1
        let test = "<math>
2730
1
        <mrow>
2731
1
          <msubsup>
2732
1
            <mrow>
2733
1
              <mrow>
2734
1
                <mpadded width='0'>
2735
1
                  <mphantom>
2736
1
                    <mi>A</mi>
2737
1
                  </mphantom>
2738
1
                </mpadded>
2739
1
              </mrow>
2740
1
            </mrow>
2741
1
            <mrow>
2742
1
              <mrow>
2743
1
                <mpadded height='0' depth='0'>
2744
1
                  <mphantom></mphantom>
2745
1
                </mpadded>
2746
1
              </mrow>
2747
1
            </mrow>
2748
1
            <mrow>
2749
1
              <mrow>
2750
1
                <mpadded height='0' depth='0'>
2751
1
                  <mphantom>
2752
1
                    <mn>238</mn>
2753
1
                  </mphantom>
2754
1
                </mpadded>
2755
1
              </mrow>
2756
1
            </mrow>
2757
1
          </msubsup>
2758
1
          <mspace width='-0.083em' linebreak='nobreak'></mspace>
2759
1
          <msubsup>
2760
1
            <mrow>
2761
1
              <mrow>
2762
1
                <mpadded width='0'>
2763
1
                  <mphantom>
2764
1
                    <mi>A</mi>
2765
1
                  </mphantom>
2766
1
                </mpadded>
2767
1
              </mrow>
2768
1
            </mrow>
2769
1
            <mrow>
2770
1
              <mrow>
2771
1
                <mpadded width='0'>
2772
1
                  <mphantom>
2773
1
                    <mn>2</mn>
2774
1
                  </mphantom>
2775
1
                </mpadded>
2776
1
              </mrow>
2777
1
              <mrow>
2778
1
                <mpadded width='0' lspace='-1width'>
2779
1
                  <mrow>
2780
1
                    <mpadded height='0'></mpadded>
2781
1
                  </mrow>
2782
1
                </mpadded>
2783
1
              </mrow>
2784
1
            </mrow>
2785
1
            <mrow>
2786
1
              <mrow>
2787
1
                <mpadded height='0'>
2788
1
                  <mrow>
2789
1
                    <mpadded width='0'>
2790
1
                      <mphantom>
2791
1
                        <mn>2</mn>
2792
1
                      </mphantom>
2793
1
                    </mpadded>
2794
1
                  </mrow>
2795
1
                </mpadded>
2796
1
              </mrow>
2797
1
              <mrow>
2798
1
                <mpadded width='0' lspace='-1width'>
2799
1
                  <mn>238</mn>
2800
1
                </mpadded>
2801
1
              </mrow>
2802
1
            </mrow>
2803
1
          </msubsup>
2804
1
          <mrow>
2805
1
            <mi mathvariant='normal'>U</mi>
2806
1
          </mrow>
2807
1
        </mrow>
2808
1
      </math>";
2809
1
        let target = " <math>
2810
1
            <mmultiscripts data-previous-space-width='-0.083' data-chem-formula='5'>
2811
1
                <mi mathvariant='normal' data-chem-element='2'>U</mi>
2812
1
                <mprescripts></mprescripts>
2813
1
                <none></none>
2814
1
                <mn>238</mn>
2815
1
            </mmultiscripts>
2816
1
         </math>";
2817
1
        assert!(are_strs_canonically_equal(test, target, &[]));
2818
1
    }
2819
2820
    #[test]
2821
1
    fn mhchem_hcl_aq() {
2822
1
        let test = "<math>
2823
1
        <mrow>
2824
1
          <mn>2</mn>
2825
1
          <mstyle scriptlevel='0'>
2826
1
            <mspace width='0.167em'></mspace>
2827
1
          </mstyle>
2828
1
          <mrow>
2829
1
            <mi>HCl</mi>
2830
1
          </mrow>
2831
1
          <mspace width='0.111em'></mspace>
2832
1
          <mo stretchy='false'>(</mo>
2833
1
          <mrow>
2834
1
            <mi>aq</mi>
2835
1
          </mrow>
2836
1
          <mo stretchy='false'>)</mo>
2837
1
        </mrow>
2838
1
      </math>";
2839
1
        let target = "<math>
2840
1
            <mrow data-chem-formula='9'>
2841
1
                <mn>2</mn>
2842
1
                <mo data-changed='added' data-chem-formula-op='0'>&#x2062;</mo>
2843
1
                <mrow data-changed='added' data-chem-formula='9'>
2844
1
                    <mi mathvariant='normal' data-previous-space-width='0.167' data-chem-element='1'>H</mi>
2845
1
                    <mo data-changed='added' data-chem-formula-op='0'>&#x2063;</mo>
2846
1
                    <mi data-split='true' data-chem-element='3'>Cl</mi>
2847
1
                    <mo data-changed='added' data-chem-formula-op='0'>&#x2063;</mo>
2848
1
                    <mrow data-changed='added' data-chem-formula='3'>
2849
1
                    <mo stretchy='false' data-previous-space-width='0.111'>(</mo>
2850
1
                    <mi>aq</mi>
2851
1
                    <mo stretchy='false'>)</mo>
2852
1
                    </mrow>
2853
1
                </mrow>
2854
1
            </mrow>
2855
1
        </math>";
2856
1
        assert!(are_strs_canonically_equal(test, target, &[]));
2857
1
    }
2858
2859
    #[test]
2860
1
    fn mhchem_nested_sub() {
2861
        // from \ce{(CH3)3}
2862
1
        let test = "<math>
2863
1
        <mrow>
2864
1
          <mo stretchy='false'>(</mo>
2865
1
          <mrow>
2866
1
            <mi>CH</mi>
2867
1
          </mrow>
2868
1
          <msub>
2869
1
            <mrow>
2870
1
              <mrow>
2871
1
                <mpadded width='0'>
2872
1
                  <mphantom>
2873
1
                    <mi>A</mi>
2874
1
                  </mphantom>
2875
1
                </mpadded>
2876
1
              </mrow>
2877
1
            </mrow>
2878
1
            <mrow>
2879
1
              <mrow>
2880
1
                <mpadded height='0'>
2881
1
                  <mn>3</mn>
2882
1
                </mpadded>
2883
1
              </mrow>
2884
1
            </mrow>
2885
1
          </msub>
2886
1
          <mo stretchy='false'>)</mo>
2887
1
          <msub>
2888
1
            <mrow>
2889
1
              <mrow>
2890
1
                <mpadded width='0'>
2891
1
                  <mphantom>
2892
1
                    <mi>A</mi>
2893
1
                  </mphantom>
2894
1
                </mpadded>
2895
1
              </mrow>
2896
1
            </mrow>
2897
1
            <mrow>
2898
1
              <mrow>
2899
1
                <mpadded height='0'>
2900
1
                  <mn>3</mn>
2901
1
                </mpadded>
2902
1
              </mrow>
2903
1
            </mrow>
2904
1
          </msub>
2905
1
        </mrow>
2906
1
      </math>";
2907
1
    let target = "<math>
2908
1
        <mmultiscripts data-chem-formula='8'>
2909
1
            <mrow data-changed='added' data-chem-formula='8'>
2910
1
                <mo stretchy='false'>(</mo>
2911
1
                <mrow data-changed='added' data-chem-formula='5'>
2912
1
                <mi mathvariant='normal' data-chem-element='1'>C</mi>
2913
1
                <mo data-changed='added'>&#x2063;</mo>
2914
1
                <mmultiscripts data-chem-formula='2'>
2915
1
                    <mi mathvariant='normal' data-split='true' data-chem-element='1'>H</mi>
2916
1
                    <mn>3</mn>
2917
1
                    <none></none>
2918
1
                </mmultiscripts>
2919
1
                </mrow>
2920
1
                <mo stretchy='false'>)</mo>
2921
1
            </mrow>
2922
1
            <mn>3</mn>
2923
1
            <none></none>
2924
1
        </mmultiscripts>
2925
1
    </math>";
2926
1
    assert!(are_strs_canonically_equal(test, target, &[]));
2927
1
    }
2928
2929
    #[test]
2930
1
    fn mhchem_isotopes() {
2931
        // from \ce{^{18}O{}^{16}O}
2932
1
        let test = "<math>
2933
1
        <mrow>
2934
1
          <msubsup>
2935
1
            <mpadded width='0'>
2936
1
              <mphantom>
2937
1
                <mi>A</mi>
2938
1
              </mphantom>
2939
1
            </mpadded>
2940
1
            <mpadded height='0' depth='0'>
2941
1
              <mphantom></mphantom>
2942
1
            </mpadded>
2943
1
            <mpadded height='0' depth='0'>
2944
1
              <mphantom>
2945
1
                <mn>18</mn>
2946
1
              </mphantom>
2947
1
            </mpadded>
2948
1
          </msubsup>
2949
1
          <mspace width='-0.083em'></mspace>
2950
1
          <msubsup>
2951
1
            <mpadded width='0'>
2952
1
              <mphantom>
2953
1
                <mi>A</mi>
2954
1
              </mphantom>
2955
1
            </mpadded>
2956
1
            <mrow>
2957
1
              <mpadded width='0'>
2958
1
                <mphantom>
2959
1
                  <mn>2</mn>
2960
1
                </mphantom>
2961
1
              </mpadded>
2962
1
              <mpadded width='0' lspace='-1width'>
2963
1
                <mpadded height='0'></mpadded>
2964
1
              </mpadded>
2965
1
            </mrow>
2966
1
            <mrow>
2967
1
              <mpadded height='0'>
2968
1
                <mpadded width='0'>
2969
1
                  <mphantom>
2970
1
                    <mn>2</mn>
2971
1
                  </mphantom>
2972
1
                </mpadded>
2973
1
              </mpadded>
2974
1
              <mpadded width='0' lspace='-1width'>
2975
1
                <mn>18</mn>
2976
1
              </mpadded>
2977
1
            </mrow>
2978
1
          </msubsup>
2979
1
          <mi mathvariant='normal'>O</mi>
2980
1
          <mspace width='0.111em'></mspace>
2981
1
          <msubsup>
2982
1
            <mpadded width='0'>
2983
1
              <mphantom>
2984
1
                <mi>A</mi>
2985
1
              </mphantom>
2986
1
            </mpadded>
2987
1
            <mpadded height='0' depth='0'>
2988
1
              <mphantom></mphantom>
2989
1
            </mpadded>
2990
1
            <mpadded height='0' depth='0'>
2991
1
              <mphantom>
2992
1
                <mn>16</mn>
2993
1
              </mphantom>
2994
1
            </mpadded>
2995
1
          </msubsup>
2996
1
          <mspace width='-0.083em'></mspace>
2997
1
          <msubsup>
2998
1
            <mpadded width='0'>
2999
1
              <mphantom>
3000
1
                <mi>A</mi>
3001
1
              </mphantom>
3002
1
            </mpadded>
3003
1
            <mrow>
3004
1
              <mpadded width='0'>
3005
1
                <mphantom>
3006
1
                  <mn>2</mn>
3007
1
                </mphantom>
3008
1
              </mpadded>
3009
1
              <mpadded width='0' lspace='-1width'>
3010
1
                <mpadded height='0'></mpadded>
3011
1
              </mpadded>
3012
1
            </mrow>
3013
1
            <mrow>
3014
1
              <mpadded height='0'>
3015
1
                <mpadded width='0'>
3016
1
                  <mphantom>
3017
1
                    <mn>2</mn>
3018
1
                  </mphantom>
3019
1
                </mpadded>
3020
1
              </mpadded>
3021
1
              <mpadded width='0' lspace='-1width'>
3022
1
                <mn>16</mn>
3023
1
              </mpadded>
3024
1
            </mrow>
3025
1
          </msubsup>
3026
1
          <mi mathvariant='normal'>O</mi>
3027
1
        </mrow>
3028
1
      </math>";
3029
1
    let target = "<math>
3030
1
        <mrow data-chem-formula='11'>
3031
1
            <mmultiscripts data-previous-space-width='-0.083' data-chem-formula='5'>
3032
1
                <mi mathvariant='normal' data-chem-element='2'>O</mi>
3033
1
                <mprescripts></mprescripts>
3034
1
                <none></none>
3035
1
                <mn>18</mn>
3036
1
            </mmultiscripts>
3037
1
            <mo data-changed='added' data-chem-formula-op='0'>&#x2063;</mo>
3038
1
            <mmultiscripts data-previous-space-width='0.027999999999999997' data-chem-formula='5'>
3039
1
                <mi mathvariant='normal' data-chem-element='2'>O</mi>
3040
1
                <mprescripts></mprescripts>
3041
1
                <none></none>
3042
1
                <mn>16</mn>
3043
1
            </mmultiscripts>
3044
1
        </mrow>
3045
1
    </math>";
3046
1
    assert!(are_strs_canonically_equal(test, target, &[]));
3047
1
    }
3048
3049
    
3050
    #[test]
3051
1
    fn merge_bug_274() {
3052
1
        let test = r#"
3053
1
        <math>
3054
1
            <mrow>
3055
1
                <mtable>
3056
1
                    <mtr>
3057
1
                        <mtd>
3058
1
                            <mrow>
3059
1
                                <msub><mtext>H</mtext><mn>2</mn></msub>
3060
1
                                <mtext>g</mtext>
3061
1
                                <mtext/>
3062
1
                                <mtext>+</mtext>
3063
1
                                <mtext/>
3064
1
                                <msub><mrow><mtext>Cl</mtext></mrow><mn>2</mn></msub>
3065
1
                                <mo stretchy="false">(</mo>
3066
1
                                <mtext>g</mtext>
3067
1
                                <mo stretchy="false">)</mo>
3068
1
                                <mo>&#x2192;</mo>
3069
1
                                <mn>2</mn>
3070
1
                                <mtext>HCl(g)</mtext>
3071
1
                            </mrow>
3072
1
                        </mtd>
3073
1
                    </mtr>
3074
1
                    <mtr>
3075
1
                        <mtd>
3076
1
                            <mrow>
3077
1
                                <mn>1</mn>
3078
1
                                <mo>:</mo>
3079
1
                                <mn>1</mn>
3080
1
                                <mo>:</mo>
3081
1
                                <mn>2</mn>
3082
1
                            </mrow>
3083
1
                        </mtd>
3084
1
                    </mtr>
3085
1
                    <mtr>
3086
1
                        <mtd>
3087
1
                            <mrow>
3088
1
                                <mn>1</mn>
3089
1
                                <mtext/>
3090
1
                                <msub><mtext>H</mtext><mn>2</mn></msub>
3091
1
                                <mtext/>
3092
1
                                <mtext>to</mtext>
3093
1
                                <mtext/>
3094
1
                                <mn>1</mn>
3095
1
                                <mtext/>
3096
1
                                <msub><mrow><mtext>Cl</mtext></mrow><mn>2</mn></msub>
3097
1
                                <mtext/>
3098
1
                                <mtext>to</mtext>
3099
1
                                <mtext/>
3100
1
                                <mtext>2</mtext>
3101
1
                                <mtext/>
3102
1
                                <mtext>HCl</mtext>
3103
1
                            </mrow>
3104
1
                        </mtd>
3105
1
                    </mtr>
3106
1
                </mtable>
3107
1
            </mrow>
3108
1
        </math>
3109
1
        "#;
3110
1
        let target = "
3111
1
            <math>
3112
1
            <mtable>
3113
1
                <mtr>
3114
1
                <mtd data-maybe-chemistry='9'>
3115
1
                    <mrow data-maybe-chemistry='9'>
3116
1
                    <mrow data-changed='added' data-maybe-chemistry='8'>
3117
1
                        <mrow data-changed='added' data-maybe-chemistry='1'>
3118
1
                        <msub data-maybe-chemistry='1'>
3119
1
                            <mtext data-maybe-chemistry='1'>H</mtext>
3120
1
                            <mn>2</mn>
3121
1
                        </msub>
3122
1
                        <mo data-changed='added' data-maybe-chemistry='0'>&#x2062;</mo>
3123
1
                        <mtext data-maybe-chemistry='0'>g</mtext>
3124
1
                        </mrow>
3125
1
                        <mo data-chem-equation-op='1' data-maybe-chemistry='1'>+</mo>
3126
1
                        <mrow data-changed='added' data-maybe-chemistry='6'>
3127
1
                        <msub data-maybe-chemistry='3'>
3128
1
                            <mtext data-maybe-chemistry='3'>Cl</mtext>
3129
1
                            <mn>2</mn>
3130
1
                        </msub>
3131
1
                        <mo data-changed='added' data-maybe-chemistry='0'>&#x2063;</mo>
3132
1
                        <mrow data-changed='added' data-maybe-chemistry='2'>
3133
1
                            <mo stretchy='false'>(</mo>
3134
1
                            <mtext>g</mtext>
3135
1
                            <mo stretchy='false'>)</mo>
3136
1
                        </mrow>
3137
1
                        </mrow>
3138
1
                    </mrow>
3139
1
                    <mo data-chem-equation-op='1' data-maybe-chemistry='1'>→</mo>
3140
1
                    <mrow data-changed='added' data-maybe-chemistry='0'>
3141
1
                        <mn data-maybe-chemistry='0'>2</mn>
3142
1
                        <mo data-changed='added' data-maybe-chemistry='0'>&#x2062;</mo>
3143
1
                        <mtext data-maybe-chemistry='0'>HCl(g)</mtext>
3144
1
                    </mrow>
3145
1
                    </mrow>
3146
1
                </mtd>
3147
1
                </mtr>
3148
1
                <mtr>
3149
1
                <mtd>
3150
1
                    <mrow>
3151
1
                    <mn>1</mn>
3152
1
                    <mo>:</mo>
3153
1
                    <mn>1</mn>
3154
1
                    <mo>:</mo>
3155
1
                    <mn>2</mn>
3156
1
                    </mrow>
3157
1
                </mtd>
3158
1
                </mtr>
3159
1
                <mtr>
3160
1
                <mtd data-maybe-chemistry='7'>
3161
1
                    <mrow data-maybe-chemistry='7'>
3162
1
                    <mn data-maybe-chemistry='0'>1</mn>
3163
1
                    <mo data-changed='added' data-maybe-chemistry='0'>&#x2062;</mo>
3164
1
                    <msub data-maybe-chemistry='1'>
3165
1
                        <mtext data-maybe-chemistry='1'>H</mtext>
3166
1
                        <mn>2</mn>
3167
1
                    </msub>
3168
1
                    <mo data-changed='added' data-maybe-chemistry='0'>&#x2062;</mo>
3169
1
                    <mtext data-maybe-chemistry='0'>to</mtext>
3170
1
                    <mo data-changed='added' data-maybe-chemistry='0'>&#x2062;</mo>
3171
1
                    <mn data-maybe-chemistry='0'>1</mn>
3172
1
                    <mo data-changed='added' data-maybe-chemistry='0'>&#x2062;</mo>
3173
1
                    <msub data-maybe-chemistry='3'>
3174
1
                        <mtext data-maybe-chemistry='3'>Cl</mtext>
3175
1
                        <mn>2</mn>
3176
1
                    </msub>
3177
1
                    <mo data-changed='added' data-maybe-chemistry='0'>&#x2062;</mo>
3178
1
                    <mtext data-maybe-chemistry='0'>to</mtext>
3179
1
                    <mo data-changed='added' data-maybe-chemistry='0'>&#x2062;</mo>
3180
1
                    <mn data-maybe-chemistry='0'>2</mn>
3181
1
                    <mo data-changed='added' data-maybe-chemistry='0'>&#x2062;</mo>
3182
1
                    <mi data-maybe-chemistry='1' mathvariant='normal'>H</mi>
3183
1
                    <mo data-changed='added' data-maybe-chemistry='0'>&#x2062;</mo>
3184
1
                    <mi data-maybe-chemistry='3' data-split='true'>Cl</mi>
3185
1
                    </mrow>
3186
1
                </mtd>
3187
1
                </mtr>
3188
1
            </mtable>
3189
1
            </math>
3190
1
        ";
3191
1
        assert!(are_strs_canonically_equal(test, target, &[]));
3192
1
    }
3193
    
3194
    #[test]
3195
1
    fn merge_bug_303() {
3196
1
        let test = r#"
3197
1
            <math>
3198
1
                <mn>2</mn>
3199
1
                <msup><mtext>OH</mtext><mo>−</mo></msup>
3200
1
                <mo stretchy="false">(</mo>
3201
1
                <mtext>aq</mtext>
3202
1
                <mo stretchy="false">)</mo>
3203
1
                <mo>+</mo>
3204
1
                <mtext>C</mtext>
3205
1
                <msup><mtext>u</mtext><mrow><mn>2</mn><mo>+</mo></mrow></msup>
3206
1
            </math>
3207
1
        "#;
3208
1
        let target = "
3209
1
            <math>
3210
1
                <mrow data-changed='added'>
3211
1
                <mrow data-changed='added'>
3212
1
                    <mn>2</mn>
3213
1
                    <mo data-changed='added'>&#x2062;</mo>
3214
1
                    <mrow data-changed='added'>
3215
1
                        <msup><mi>OH</mi><mo>-</mo></msup>
3216
1
                        <mo data-changed='added'>&#x2061;</mo>
3217
1
                        <mrow data-changed='added'>
3218
1
                            <mo stretchy='false'>(</mo>
3219
1
                            <mtext>aq</mtext>
3220
1
                            <mo stretchy='false'>)</mo>
3221
1
                        </mrow>
3222
1
                    </mrow>
3223
1
                </mrow>
3224
1
                <mo>+</mo>
3225
1
                <mrow data-changed='added'>
3226
1
                    <mtext>C</mtext>
3227
1
                    <mo data-changed='added'>&#x2062;</mo>
3228
1
                    <msup> <mtext>u</mtext> <mrow><mn>2</mn><mo>+</mo></mrow> </msup>
3229
1
                </mrow>
3230
1
                </mrow>
3231
1
            </math>
3232
1
           ";
3233
1
        assert!(are_strs_canonically_equal(test, target, &[]));
3234
1
    }
3235
    
3236
    #[test]
3237
1
    fn mtd_assert_bug_393() {
3238
1
        let test = r#"
3239
1
        <math display="block">
3240
1
            <mtable>
3241
1
                <mtr>
3242
1
                <mtd>
3243
1
                    <mrow>
3244
1
                    <mi>A</mi>
3245
1
                    <mi>c</mi>
3246
1
                    </mrow>
3247
1
                </mtd>
3248
1
                <mtd>
3249
1
                    <mi>A</mi>
3250
1
                    <mfenced>
3251
1
                    <mtable>
3252
1
                        <mtr>
3253
1
                        <mtd>
3254
1
                            <mrow>
3255
1
                            <mi>c</mi>
3256
1
                            <mi>n</mi>
3257
1
                            </mrow>
3258
1
                        </mtd>
3259
1
                        </mtr>
3260
1
                    </mtable>
3261
1
                    </mfenced>
3262
1
                </mtd>
3263
1
                </mtr>
3264
1
            </mtable>
3265
1
        </math>"#;
3266
1
        let target = "
3267
1
        <math display='block'>
3268
1
            <mtable>
3269
1
            <mtr>
3270
1
                <mtd>
3271
1
                <mi>A</mi>
3272
1
                <mi>c</mi>
3273
1
                </mtd>
3274
1
                <mtd>
3275
1
                <mrow data-changed='added'>
3276
1
                    <mi>A</mi>
3277
1
                    <mrow>
3278
1
                    <mo data-changed='from_mfenced'>(</mo>
3279
1
                    <mtable>
3280
1
                        <mtr>
3281
1
                        <mtd>
3282
1
                            <mrow>
3283
1
                            <mi>c</mi>
3284
1
                            <mi>n</mi>
3285
1
                            </mrow>
3286
1
                        </mtd>
3287
1
                        </mtr>
3288
1
                    </mtable>
3289
1
                    <mo data-changed='from_mfenced'>)</mo>
3290
1
                    </mrow>
3291
1
                </mrow>
3292
1
                </mtd>
3293
1
            </mtr>
3294
1
            </mtable>
3295
1
        </math>";
3296
1
        assert!(are_strs_canonically_equal(test, target, &[]));
3297
1
    }
3298
3299
}
\ No newline at end of file diff --git a/docs/llvm-cov/html/coverage/home/runner/work/MathCAT/MathCAT/src/definitions.rs.html b/docs/llvm-cov/html/coverage/home/runner/work/MathCAT/MathCAT/src/definitions.rs.html index ea111d2d..8f813908 100644 --- a/docs/llvm-cov/html/coverage/home/runner/work/MathCAT/MathCAT/src/definitions.rs.html +++ b/docs/llvm-cov/html/coverage/home/runner/work/MathCAT/MathCAT/src/definitions.rs.html @@ -1 +1 @@ -

Coverage Report

Created: 2026-04-30 05:51

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/home/runner/work/MathCAT/MathCAT/src/definitions.rs
Line
Count
Source
1
//! # Definitions module
2
//! This module is responsible for reading in the definitions files and converting them to either vectors or hashmaps so that
3
//! the definitions can be used by the program.
4
//!
5
//! ## Leaked Implementation Details
6
//! There is no escaping some implementation details.
7
//! Because these definitions are stored in global variables, the variables need to be protected
8
//!   in some way so they can be written at runtime when the files are read.
9
//!   This is done by putting them inside of a lock (`thread_local`).
10
//!
11
//! Furthermore, it was necessary to use `RefCell` and `Rc` to deal with interior mutability.
12
//! All of this means that a lock needs to be obtained _and_ the contents borrowed to access a definition.
13
//!
14
//! To minimize the global variable footprint, all of the definitions are put inside of a single global variable [`DEFINITIONS`].
15
//!
16
//! //! Note: some of the variables are `vec`s and some are `hashset`s.
17
//! Numbers are typically vectors so that indexing a digit is easy.
18
//! Others such as `functions_names` are a hashset because you just want to know if an `mi` is a known name or not.
19
//! The functions `as_vec` and `as_hashset` should be used on the appropriate variable.
20
//! ## Names
21
//! The names of "variables" in the definition files use camel case (e.g., "FunctionNames"). In the code, to fit with rust
22
//! naming conventions, snake case is used (e.g, "function_names"). 
23
//!
24
//! See the struct [`Definitions`] for the variables that are read in.
25
#![allow(clippy::needless_return)]
26
27
use yaml_rust::yaml::Hash;
28
use yaml_rust::Yaml;
29
use crate::errors::*;
30
use crate::prefs::*;
31
use std::{cell::RefCell, cell::Ref, cell::RefMut, rc::Rc};
32
use std::path::{Path, PathBuf};
33
use std::collections::{HashMap, HashSet};
34
use crate::shim_filesystem::read_to_string_shim;
35
36
/// An enum to paper over the different types of data access needed.
37
///
38
/// Having a Rc<RefCell<FromFileVariable>> seems a bit complicated in terms of types but...
39
/// 1. The rust book seems to endorse the Rc<RefCell<...>>> approach when there are multiple owners of mutable date.
40
///    See <https://doc.rust-lang.org/book/ch15-05-interior-mutability.html> towards the end
41
/// 2. When a file is read, we need to clear and add data to the structure being read (reassigning could work for clearing).
42
///    When we use the data, we either want to index into it or test if an item is there.
43
///    The structures we use are either a Vec or a HashMap, so we need to abstract that away in `FromFileVariable`.
44
///    Unfortunately, traits don't quite work as an option here:
45
///    *  Vec implements extends (`add`), but there is no test/contains
46
///    *  Hashmap implements `index`, but panics if the item isn't there
47
///
48
/// Because of the above limitations, we introduce the enum [`Contains`] which dispatches appropriately to Vec/Hashmap
49
#[derive(Debug, Clone)]
50
pub enum Contains {
51
    Vec(Rc<RefCell<Vec<String>>>),
52
    Set(Rc<RefCell<HashSet<String>>>),
53
    Map(Rc<RefCell<HashMap<String, String>>>),
54
}
55
56
impl Contains {
57
    // fn add(&mut self, item: String) {
58
    //     match self {
59
    //         Contains::Vec(v) => { v.borrow_mut().push(item); },
60
    //         Contains::Set(s) => { s.borrow_mut().insert(item); }
61
    //     }
62
    // }
63
64
    // fn clear(&mut self) {
65
    //     match self {
66
    //         Contains::Vec(v) => { v.borrow_mut().clear(); },
67
    //         Contains::Set(s) => { s.borrow_mut().clear(); }
68
    //     }
69
    // }
70
}
71
pub type CollectionFromFile = Contains;
72
type VariableDefHashMap = HashMap<String, CollectionFromFile>;
73
74
/// Global structure containing all of the definitions.
75
/// Each field in the structure corresponds to a named value read in from the `definitions.yaml` files.
76
///
77
/// The names of "variables" in the definition files use camel case (e.g., "FunctionNames"). In the code, to fit with rust
78
/// naming conventions, snake case is used (e.g, "function_names").
79
///
80
/// There should only be one instance of this structure ([`DEFINITIONS`])
81
// FIX: this probably can done with a macro to remove all the repetition
82
pub struct Definitions {
83
    pub name_to_var_mapping: VariableDefHashMap,
84
}
85
86
impl Default for Definitions {
87
0
    fn default() -> Self {
88
0
        Definitions {
89
0
            name_to_var_mapping: HashMap::with_capacity(30),
90
0
        }
91
0
    }
92
}
93
94
impl Definitions {
95
5.48k
    fn new() -> Self {
96
5.48k
        Definitions {
97
5.48k
            name_to_var_mapping: HashMap::with_capacity(30),
98
5.48k
        }
99
5.48k
    }
100
101
158k
    pub fn get_hashset(&self, name: &str) -> Option<Ref<'_, HashSet<String>>> {
102
158k
        let names = self.name_to_var_mapping.get(name);
103
151k
        if let Some(Contains::Set(
set151k
)) = names {
104
151k
            return Some(set.borrow());
105
7.29k
        }
106
7.29k
        return None;
107
158k
    }
108
109
15.9k
    pub fn get_hashmap(&self, name: &str) ->  Option<Ref<'_, HashMap<String, String>>> {
110
15.9k
        let names = self.name_to_var_mapping.get(name);
111
15.9k
        if let Some(Contains::Map(map)) = names {
112
15.9k
            return Some(map.borrow());
113
0
        }
114
0
        return None;
115
15.9k
    }
116
117
1.31k
    pub fn get_vec(&self, name: &str) -> Option<Ref<'_, Vec<String>>> {
118
1.31k
        let names = self.name_to_var_mapping.get(name);
119
1.31k
        if let Some(Contains::Vec(vec)) = names {
120
1.31k
            return Some(vec.borrow());
121
0
        }
122
0
        return None;
123
1.31k
    }
124
}
125
126
thread_local!{
127
    /// Global variable containing all of the definitions.
128
    /// See [`Definitions`] for more details.
129
    pub static SPEECH_DEFINITIONS: RefCell<Definitions> = RefCell::new( Definitions::new() );
130
    pub static BRAILLE_DEFINITIONS: RefCell<Definitions> = RefCell::new( Definitions::new() );
131
    pub static DEFINITIONS: &'static std::thread::LocalKey<RefCell<Definitions>> = const { &SPEECH_DEFINITIONS };
132
}
133
134
/// Reads the `definitions.yaml` files specified by current_files -- these are presumed to need updating. 
135
///
136
/// If there is a failure during read, the error is propagated to the caller
137
5.50k
pub fn read_definitions_file(use_speech_defs: bool) -> Result<Vec<PathBuf>> {
138
    // for each file in `locations`, read the contents and process them
139
5.50k
    let pref_manager = PreferenceManager::get();
140
5.50k
    let pref_manager = pref_manager.borrow();
141
5.50k
    let file_path = pref_manager.get_definitions_file(use_speech_defs);
142
5.50k
    let definitions = if use_speech_defs {
&SPEECH_DEFINITIONS4.14k
} else {
&BRAILLE_DEFINITIONS1.35k
};
143
5.50k
    definitions.with( |defs| defs.borrow_mut().name_to_var_mapping.clear() );
144
5.50k
    let mut new_files = vec![file_path.to_path_buf()];
145
5.50k
    let mut files_read = read_one_definitions_file(use_speech_defs, file_path).with_context(|| 
format!0
("in file '{}",
file_path0
.
to_string_lossy0
()))
?0
;
146
5.50k
    new_files.append(&mut files_read);
147
148
    // merge the contents of `TrigFunctions` into a set that contains all the function names (from `AdditionalFunctionNames`).
149
5.50k
    return definitions.with(|defs| {
150
5.50k
        let mut defs = defs.borrow_mut();
151
5.50k
        make_all_set_references_valid(&mut defs);
152
5.50k
        return Ok(new_files);
153
5.50k
    });
154
    
155
156
    /// Make references to all used set be valid by creating empty sets if they weren't defined
157
5.50k
    fn make_all_set_references_valid(defs: &mut RefMut<Definitions>) {
158
        // FIX: this list is created by hand -- it would be better if there was a way to create the list Automatically
159
        // Note: "FunctionNames" is created in build_all_functions_set() if not already set
160
5.50k
        let used_set_names = ["GeometryPrefixOperators", "LikelyFunctionNames", "TrigFunctionNames", "AdditionalFunctionNames", "Arrows", "GeometryShapes"];
161
        // let name_to_mapping = defs.name_to_var_mapping.borrow_mut();
162
33.0k
        for set_name in 
used_set_names5.50k
{
163
33.0k
            if defs.get_hashset(set_name).is_none() {
164
1.74k
                defs.name_to_var_mapping.insert(set_name.to_string(), Contains::Set( Rc::new( RefCell::new( HashSet::with_capacity(0) ) ) ));
165
31.2k
            }
166
        }
167
5.50k
        if defs.get_hashset("FunctionNames").is_none() {
168
5.46k
            let all_functions = build_all_functions_set(defs);
169
5.46k
            defs.name_to_var_mapping.insert("FunctionNames".to_string(), Contains::Set( Rc::new( RefCell::new( all_functions ) ) ));
170
5.46k
        
}41
171
5.50k
    }
172
173
    /// merge "TrigFunctions" and "AdditionalFunctionNames" into a new set named "FunctionNames"
174
5.46k
    fn build_all_functions_set(defs: &mut RefMut<Definitions>) -> HashSet<String> {
175
5.46k
        let trig_functions = defs.get_hashset("TrigFunctionNames").unwrap();
176
5.46k
        let mut all_functions = defs.get_hashset("AdditionalFunctionNames").unwrap().clone();
177
109k
        for trig_name in 
trig_functions.iter()5.46k
{
178
109k
            all_functions.insert(trig_name.clone());
179
109k
        }
180
5.46k
        return all_functions;
181
5.46k
    }
182
5.50k
}
183
184
use crate::speech::*;
185
11.7k
fn read_one_definitions_file(use_speech_defs: bool, path: &Path) -> Result<Vec<PathBuf>> {
186
    // read in the file contents   
187
11.7k
    let definition_file_contents = read_to_string_shim(path)
188
11.7k
            .with_context(|| 
format!0
("trying to read {}",
path0
.
to_str0
().
unwrap0
()))
?0
;
189
190
    // callback to do the work of building up the defined vectors/hashmaps (in 'build_values') from YAML
191
11.7k
    let defs_build_fn = |variable_def_list: &Yaml| {
192
        // Rule::DefinitionList
193
        // debug!("variable_def_list {} is\n{}", yaml_to_type(variable_def_list), yaml_to_string(variable_def_list));
194
11.7k
        let mut files_read = vec![path.to_path_buf()];
195
11.7k
        let vec = crate::speech::as_vec_checked(variable_def_list)
196
11.7k
                    .with_context(||
format!0
("in file {:?}",
path0
.
to_str0
()))
?0
;
197
175k
        for variable_def in 
vec11.7k
{
198
175k
            if let Some(
mut added_files6.27k
) = build_values(variable_def, use_speech_defs, path).with_context(||
format!0
("in file {:?}",
path0
.
to_str0
()))
?0
{
199
6.27k
                files_read.append(&mut added_files);
200
168k
            }
201
        }
202
11.7k
        return Ok(files_read);
203
11.7k
    };
204
205
    // Convert the file contents to YAML and call the callback
206
11.7k
    return crate::speech::compile_rule(&definition_file_contents, defs_build_fn)
207
11.7k
        .with_context(|| 
format!0
("In file '{}'",
path0
.
to_str0
().
unwrap0
()));
208
11.7k
}
209
210
/// Do the work of converting a single YAML def into the vec/hashset/hashmap
211
/// name: [a, b, c] -- assume an indexed vector
212
/// name: {a, b, c} -- assume a hash set
213
/// name: {a: A, b: B, c: C} -- assume a hashmap
214
/// Returns all the files that were read
215
175k
fn build_values(definition: &Yaml, use_speech_defs: bool, path: &Path) -> Result<Option<Vec<PathBuf>>> {
216
    // Rule::Definition
217
175k
    let dictionary = crate::speech::as_hash_checked(definition)
?0
;
218
175k
    if dictionary.len()!=1 {
219
0
        bail!("Should only be one definition rule: {}", yaml_to_type(definition));
220
175k
    }
221
175k
    let (key, value) = dictionary.iter().next().unwrap();
222
175k
    let def_name = key.as_str().ok_or_else(|| 
anyhow!0
("definition list name '{}' is not a string",
yaml_to_type0
(
key0
)))
?0
;
223
175k
    if def_name == "include" {
224
6.27k
        let do_include_fn = |new_file: &Path| {
225
6.27k
            read_one_definitions_file(use_speech_defs, new_file)
226
6.27k
        };
227
6.27k
        let include_file_name = value.as_str().ok_or_else(|| 
anyhow!0
("definition list include name '{}' is not a string",
yaml_to_type0
(
value0
)))
?0
;
228
6.27k
        return Ok( Some(crate::speech::process_include(path, include_file_name, do_include_fn)
?0
) );
229
168k
    }
230
231
    let result;
232
168k
    if def_name.starts_with("Numbers") || 
def_name110k
.
ends_with110k
("_vec") {
233
58.0k
         result = Contains::Vec( Rc::new( RefCell::new( get_vec_values(value.as_vec().unwrap())
?0
) ) );
234
    } else {
235
        // match value.as_vec() {
236
        //     Some(vec) => {
237
        //         result = Contains::Set( Rc::new( RefCell::new( get_set_values(vec)? ) ) );            },
238
        //     None => {
239
        //         let dict = value.as_hash().ok_or_else(|| anyhow!("definition list value '{}' is not an array or dictionary", yaml_to_type(value)))?;
240
        //         result = Contains::Map( Rc::new( RefCell::new( get_map_values(dict)
241
        //                     .chain_err(||format!("while reading value '{}'", def_name))? ) ) );
242
243
        //     },
244
        // }
245
110k
        let dict = value.as_hash().ok_or_else(|| 
anyhow!0
("definition list value '{}' is not an array or dictionary",
yaml_to_type0
(
value0
)))
?0
;
246
110k
        if dict.is_empty() {
247
15.6k
            result = Contains::Set( Rc::new( RefCell::new( HashSet::with_capacity(0) ) ) );
248
15.6k
        } else {
249
            // peak and see if this is a set or a map
250
95.3k
            let (_, entry_value) = dict.iter().next().unwrap();
251
95.3k
            if entry_value.is_null() {
252
63.4k
                result = Contains::Set( Rc::new( RefCell::new( get_set_values(dict)
253
63.4k
                            .with_context(||
format!0
("while reading value '{def_name}'"))
?0
) ) );
254
            } else {
255
                // peak and see if this is a set or a map
256
31.8k
                let (_, entry_value) = dict.iter().next().unwrap();
257
31.8k
                if entry_value.is_null() {
258
0
                    result = Contains::Set( Rc::new( RefCell::new( get_set_values(dict)
259
0
                                .with_context(||format!("while reading value '{def_name}'"))? ) ) );
260
                } else {
261
31.8k
                    result = Contains::Map( Rc::new( RefCell::new( get_map_values(dict)
262
31.8k
                                .with_context(||
format!0
("while reading value '{def_name}'"))
?0
) ) );
263
                }
264
            }
265
        }
266
    };
267
268
168k
    let definitions = if use_speech_defs {
&SPEECH_DEFINITIONS149k
} else {
&BRAILLE_DEFINITIONS19.3k
};
269
168k
    return definitions.with(|definitions| {
270
168k
        let name_definition_map = &mut definitions.borrow_mut().name_to_var_mapping;
271
168k
        name_definition_map.insert(def_name.to_string(), result);
272
168k
        return Ok(None);
273
168k
    });
274
275
58.0k
    fn get_vec_values(values: &Vec<Yaml>) -> Result<Vec<String>> {
276
58.0k
        let mut result = Vec::with_capacity(values.len());
277
787k
        for yaml_value in 
values58.0k
{
278
787k
            let value = yaml_value.as_str()
279
787k
                .ok_or_else(|| 
anyhow!0
("list entry '{}' is not a string",
yaml_to_type0
(
yaml_value0
)))
?0
280
787k
                .to_string();
281
787k
            result.push(value);
282
        }
283
58.0k
        return Ok(result);
284
58.0k
    }
285
286
63.4k
    fn get_set_values(values: &Hash) -> Result<HashSet<String>> {
287
63.4k
        let mut result = HashSet::with_capacity(2*values.len());
288
5.81M
        for (key, value) in 
values63.4k
{
289
5.81M
            let key = key.as_str()
290
5.81M
                .ok_or_else(|| 
anyhow!0
("list entry '{}' is not a string",
yaml_to_type0
(
key0
)))
?0
291
5.81M
                .to_string();
292
5.81M
            if let Yaml::Null = value {
293
5.81M
            } else {
294
0
                bail!("list entry '{}' is not a string", yaml_to_type(value));
295
            }
296
5.81M
            result.insert(key);
297
        }
298
63.4k
        return Ok(result);
299
63.4k
    }
300
301
31.8k
    fn get_map_values(values: &Hash) -> Result<HashMap<String, String>> {
302
31.8k
        let mut result = HashMap::with_capacity(2*values.len());
303
1.13M
        for (key, value) in 
values31.8k
{
304
1.13M
            let key = key.as_str()
305
1.13M
                .ok_or_else(|| 
anyhow!0
("list entry '{}' is not a string",
yaml_to_type0
(
key0
)))
?0
306
1.13M
                .to_string();
307
1.13M
            let value = value.as_str()
308
1.13M
                .ok_or_else(|| 
anyhow!0
("list entry '{}' is not a string",
yaml_to_type0
(
value0
)))
?0
309
1.13M
                .to_string();
310
1.13M
            result.insert(key, value);
311
        }
312
31.8k
        return Ok(result);
313
31.8k
    }
314
175k
}
315
316
317
#[cfg(test)]
318
mod tests {
319
    use super::*;
320
321
    #[test]
322
1
    fn test_vec() {
323
1
        let numbers = r#"[NumbersTens: ["", "ten", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety"]]"#;
324
1
        let defs_build_fn = |variable_def_list: &Yaml| {
325
            // Rule::DefinitionList
326
            //debug!("variable_def_list {} is\n{}", yaml_to_type(variable_def_list), yaml_to_string(variable_def_list, 0));
327
1
            for variable_def in variable_def_list.as_vec().unwrap() {
328
1
                if let Err(
e0
) = build_values(variable_def, true, Path::new("")) {
329
0
                    bail!("{}", crate::interface::errors_to_string(&e.context(format!("in file {:?}", numbers))));
330
1
                }
331
            }
332
1
            return Ok(vec![]);
333
1
        };
334
1
        compile_rule(numbers, defs_build_fn).unwrap();
335
1
        SPEECH_DEFINITIONS.with(|defs| {
336
1
            let defs = defs.borrow();
337
1
            let names = defs.get_vec("NumbersTens");
338
1
            assert!(names.is_some());
339
1
            let names = names.unwrap();
340
1
            assert_eq!(names.len(), 10);
341
1
            assert_eq!(names[0], "");
342
1
            assert_eq!(names[9], "ninety");
343
1
        });
344
1
    }
345
346
347
    #[test]
348
1
    fn test_set() {
349
1
        let likely_function_names = r#"[LikelyFunctionNames: {"f", "g", "h", "F", "G", "H", "[A-Za-z]+"}]"#;
350
1
        let defs_build_fn = |variable_def_list: &Yaml| {
351
            // Rule::DefinitionList
352
            //debug!("variable_def_list {} is\n{}", yaml_to_type(variable_def_list), yaml_to_string(variable_def_list, 0));
353
1
            for variable_def in variable_def_list.as_vec().unwrap() {
354
1
                if let Err(
e0
) = build_values(variable_def, true, Path::new("")) {
355
0
                    bail!("{}", crate::interface::errors_to_string(&e.context(format!("in file {:?}", likely_function_names))));
356
1
                }
357
            }
358
1
            return Ok(vec![]);
359
1
        };
360
1
        compile_rule(likely_function_names, defs_build_fn).unwrap();
361
1
        SPEECH_DEFINITIONS.with(|defs| {
362
1
            let defs = defs.borrow();
363
1
            let names = defs.get_hashset("LikelyFunctionNames");
364
1
            assert!(names.is_some());
365
1
            let names = names.unwrap();
366
1
            assert_eq!(names.len(), 7);
367
1
            assert!(names.contains("f"));
368
1
            assert!(!names.contains("a"));
369
1
        });
370
1
    }
371
372
    #[test]
373
1
    fn test_hashmap() {
374
1
        let units = r#"[Units: {"A": "amp", "g": "gram", "m": "meter", "sec": "second"}]"#;
375
1
        let defs_build_fn = |variable_def_list: &Yaml| {
376
            // Rule::DefinitionList
377
            //debug!("variable_def_list {} is\n{}", yaml_to_type(variable_def_list), yaml_to_string(variable_def_list, 0));
378
1
            for variable_def in variable_def_list.as_vec().unwrap() {
379
1
                if let Err(
e0
) = build_values(variable_def, true, Path::new("")) {
380
0
                    bail!("{}", crate::interface::errors_to_string(&e.context(format!("in file {:?}", units))));
381
1
                }
382
            }
383
1
            return Ok(vec![]);
384
1
        };
385
1
        compile_rule(units, defs_build_fn).unwrap();
386
1
        SPEECH_DEFINITIONS.with(|defs| {
387
1
            let defs = defs.borrow();
388
1
            let names = defs.get_hashmap("Units");
389
1
            assert!(names.is_some());
390
1
            let names = names.unwrap();
391
1
            assert_eq!(names.len(), 4);
392
1
            assert_eq!(names.get("A").unwrap(), "amp");
393
1
            assert_eq!(names.get("sec").unwrap(), "second");
394
1
            assert_eq!(names.get("xxx"), None);
395
1
        });
396
1
    }
397
}
\ No newline at end of file +

Coverage Report

Created: 2026-05-04 09:37

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/home/runner/work/MathCAT/MathCAT/src/definitions.rs
Line
Count
Source
1
//! # Definitions module
2
//! This module is responsible for reading in the definitions files and converting them to either vectors or hashmaps so that
3
//! the definitions can be used by the program.
4
//!
5
//! ## Leaked Implementation Details
6
//! There is no escaping some implementation details.
7
//! Because these definitions are stored in global variables, the variables need to be protected
8
//!   in some way so they can be written at runtime when the files are read.
9
//!   This is done by putting them inside of a lock (`thread_local`).
10
//!
11
//! Furthermore, it was necessary to use `RefCell` and `Rc` to deal with interior mutability.
12
//! All of this means that a lock needs to be obtained _and_ the contents borrowed to access a definition.
13
//!
14
//! To minimize the global variable footprint, all of the definitions are put inside of a single global variable [`DEFINITIONS`].
15
//!
16
//! //! Note: some of the variables are `vec`s and some are `hashset`s.
17
//! Numbers are typically vectors so that indexing a digit is easy.
18
//! Others such as `functions_names` are a hashset because you just want to know if an `mi` is a known name or not.
19
//! The functions `as_vec` and `as_hashset` should be used on the appropriate variable.
20
//! ## Names
21
//! The names of "variables" in the definition files use camel case (e.g., "FunctionNames"). In the code, to fit with rust
22
//! naming conventions, snake case is used (e.g, "function_names"). 
23
//!
24
//! See the struct [`Definitions`] for the variables that are read in.
25
#![allow(clippy::needless_return)]
26
27
use yaml_rust::yaml::Hash;
28
use yaml_rust::Yaml;
29
use crate::errors::*;
30
use crate::prefs::*;
31
use std::{cell::RefCell, cell::Ref, cell::RefMut, rc::Rc};
32
use std::path::{Path, PathBuf};
33
use std::collections::{HashMap, HashSet};
34
use crate::shim_filesystem::read_to_string_shim;
35
36
/// An enum to paper over the different types of data access needed.
37
///
38
/// Having a Rc<RefCell<FromFileVariable>> seems a bit complicated in terms of types but...
39
/// 1. The rust book seems to endorse the Rc<RefCell<...>>> approach when there are multiple owners of mutable date.
40
///    See <https://doc.rust-lang.org/book/ch15-05-interior-mutability.html> towards the end
41
/// 2. When a file is read, we need to clear and add data to the structure being read (reassigning could work for clearing).
42
///    When we use the data, we either want to index into it or test if an item is there.
43
///    The structures we use are either a Vec or a HashMap, so we need to abstract that away in `FromFileVariable`.
44
///    Unfortunately, traits don't quite work as an option here:
45
///    *  Vec implements extends (`add`), but there is no test/contains
46
///    *  Hashmap implements `index`, but panics if the item isn't there
47
///
48
/// Because of the above limitations, we introduce the enum [`Contains`] which dispatches appropriately to Vec/Hashmap
49
#[derive(Debug, Clone)]
50
pub enum Contains {
51
    Vec(Rc<RefCell<Vec<String>>>),
52
    Set(Rc<RefCell<HashSet<String>>>),
53
    Map(Rc<RefCell<HashMap<String, String>>>),
54
}
55
56
impl Contains {
57
    // fn add(&mut self, item: String) {
58
    //     match self {
59
    //         Contains::Vec(v) => { v.borrow_mut().push(item); },
60
    //         Contains::Set(s) => { s.borrow_mut().insert(item); }
61
    //     }
62
    // }
63
64
    // fn clear(&mut self) {
65
    //     match self {
66
    //         Contains::Vec(v) => { v.borrow_mut().clear(); },
67
    //         Contains::Set(s) => { s.borrow_mut().clear(); }
68
    //     }
69
    // }
70
}
71
pub type CollectionFromFile = Contains;
72
type VariableDefHashMap = HashMap<String, CollectionFromFile>;
73
74
/// Global structure containing all of the definitions.
75
/// Each field in the structure corresponds to a named value read in from the `definitions.yaml` files.
76
///
77
/// The names of "variables" in the definition files use camel case (e.g., "FunctionNames"). In the code, to fit with rust
78
/// naming conventions, snake case is used (e.g, "function_names").
79
///
80
/// There should only be one instance of this structure ([`DEFINITIONS`])
81
// FIX: this probably can done with a macro to remove all the repetition
82
pub struct Definitions {
83
    pub name_to_var_mapping: VariableDefHashMap,
84
}
85
86
impl Default for Definitions {
87
0
    fn default() -> Self {
88
0
        Definitions {
89
0
            name_to_var_mapping: HashMap::with_capacity(30),
90
0
        }
91
0
    }
92
}
93
94
impl Definitions {
95
5.48k
    fn new() -> Self {
96
5.48k
        Definitions {
97
5.48k
            name_to_var_mapping: HashMap::with_capacity(30),
98
5.48k
        }
99
5.48k
    }
100
101
158k
    pub fn get_hashset(&self, name: &str) -> Option<Ref<'_, HashSet<String>>> {
102
158k
        let names = self.name_to_var_mapping.get(name);
103
151k
        if let Some(Contains::Set(
set151k
)) = names {
104
151k
            return Some(set.borrow());
105
7.29k
        }
106
7.29k
        return None;
107
158k
    }
108
109
15.9k
    pub fn get_hashmap(&self, name: &str) ->  Option<Ref<'_, HashMap<String, String>>> {
110
15.9k
        let names = self.name_to_var_mapping.get(name);
111
15.9k
        if let Some(Contains::Map(map)) = names {
112
15.9k
            return Some(map.borrow());
113
0
        }
114
0
        return None;
115
15.9k
    }
116
117
1.31k
    pub fn get_vec(&self, name: &str) -> Option<Ref<'_, Vec<String>>> {
118
1.31k
        let names = self.name_to_var_mapping.get(name);
119
1.31k
        if let Some(Contains::Vec(vec)) = names {
120
1.31k
            return Some(vec.borrow());
121
0
        }
122
0
        return None;
123
1.31k
    }
124
}
125
126
thread_local!{
127
    /// Global variable containing all of the definitions.
128
    /// See [`Definitions`] for more details.
129
    pub static SPEECH_DEFINITIONS: RefCell<Definitions> = RefCell::new( Definitions::new() );
130
    pub static BRAILLE_DEFINITIONS: RefCell<Definitions> = RefCell::new( Definitions::new() );
131
    pub static DEFINITIONS: &'static std::thread::LocalKey<RefCell<Definitions>> = const { &SPEECH_DEFINITIONS };
132
}
133
134
/// Reads the `definitions.yaml` files specified by current_files -- these are presumed to need updating. 
135
///
136
/// If there is a failure during read, the error is propagated to the caller
137
5.50k
pub fn read_definitions_file(use_speech_defs: bool) -> Result<Vec<PathBuf>> {
138
    // for each file in `locations`, read the contents and process them
139
5.50k
    let pref_manager = PreferenceManager::get();
140
5.50k
    let pref_manager = pref_manager.borrow();
141
5.50k
    let file_path = pref_manager.get_definitions_file(use_speech_defs);
142
5.50k
    let definitions = if use_speech_defs {
&SPEECH_DEFINITIONS4.14k
} else {
&BRAILLE_DEFINITIONS1.35k
};
143
5.50k
    definitions.with( |defs| defs.borrow_mut().name_to_var_mapping.clear() );
144
5.50k
    let mut new_files = vec![file_path.to_path_buf()];
145
5.50k
    let mut files_read = read_one_definitions_file(use_speech_defs, file_path).with_context(|| 
format!0
("in file '{}",
file_path0
.
to_string_lossy0
()))
?0
;
146
5.50k
    new_files.append(&mut files_read);
147
148
    // merge the contents of `TrigFunctions` into a set that contains all the function names (from `AdditionalFunctionNames`).
149
5.50k
    return definitions.with(|defs| {
150
5.50k
        let mut defs = defs.borrow_mut();
151
5.50k
        make_all_set_references_valid(&mut defs);
152
5.50k
        return Ok(new_files);
153
5.50k
    });
154
    
155
156
    /// Make references to all used set be valid by creating empty sets if they weren't defined
157
5.50k
    fn make_all_set_references_valid(defs: &mut RefMut<Definitions>) {
158
        // FIX: this list is created by hand -- it would be better if there was a way to create the list Automatically
159
        // Note: "FunctionNames" is created in build_all_functions_set() if not already set
160
5.50k
        let used_set_names = ["GeometryPrefixOperators", "LikelyFunctionNames", "TrigFunctionNames", "AdditionalFunctionNames", "Arrows", "GeometryShapes"];
161
        // let name_to_mapping = defs.name_to_var_mapping.borrow_mut();
162
33.0k
        for set_name in 
used_set_names5.50k
{
163
33.0k
            if defs.get_hashset(set_name).is_none() {
164
1.74k
                defs.name_to_var_mapping.insert(set_name.to_string(), Contains::Set( Rc::new( RefCell::new( HashSet::with_capacity(0) ) ) ));
165
31.2k
            }
166
        }
167
5.50k
        if defs.get_hashset("FunctionNames").is_none() {
168
5.46k
            let all_functions = build_all_functions_set(defs);
169
5.46k
            defs.name_to_var_mapping.insert("FunctionNames".to_string(), Contains::Set( Rc::new( RefCell::new( all_functions ) ) ));
170
5.46k
        
}41
171
5.50k
    }
172
173
    /// merge "TrigFunctions" and "AdditionalFunctionNames" into a new set named "FunctionNames"
174
5.46k
    fn build_all_functions_set(defs: &mut RefMut<Definitions>) -> HashSet<String> {
175
5.46k
        let trig_functions = defs.get_hashset("TrigFunctionNames").unwrap();
176
5.46k
        let mut all_functions = defs.get_hashset("AdditionalFunctionNames").unwrap().clone();
177
109k
        for trig_name in 
trig_functions.iter()5.46k
{
178
109k
            all_functions.insert(trig_name.clone());
179
109k
        }
180
5.46k
        return all_functions;
181
5.46k
    }
182
5.50k
}
183
184
use crate::speech::*;
185
11.7k
fn read_one_definitions_file(use_speech_defs: bool, path: &Path) -> Result<Vec<PathBuf>> {
186
    // read in the file contents   
187
11.7k
    let definition_file_contents = read_to_string_shim(path)
188
11.7k
            .with_context(|| 
format!0
("trying to read {}",
path0
.
to_str0
().
unwrap0
()))
?0
;
189
190
    // callback to do the work of building up the defined vectors/hashmaps (in 'build_values') from YAML
191
11.7k
    let defs_build_fn = |variable_def_list: &Yaml| {
192
        // Rule::DefinitionList
193
        // debug!("variable_def_list {} is\n{}", yaml_to_type(variable_def_list), yaml_to_string(variable_def_list));
194
11.7k
        let mut files_read = vec![path.to_path_buf()];
195
11.7k
        let vec = crate::speech::as_vec_checked(variable_def_list)
196
11.7k
                    .with_context(||
format!0
("in file {:?}",
path0
.
to_str0
()))
?0
;
197
175k
        for variable_def in 
vec11.7k
{
198
175k
            if let Some(
mut added_files6.27k
) = build_values(variable_def, use_speech_defs, path).with_context(||
format!0
("in file {:?}",
path0
.
to_str0
()))
?0
{
199
6.27k
                files_read.append(&mut added_files);
200
168k
            }
201
        }
202
11.7k
        return Ok(files_read);
203
11.7k
    };
204
205
    // Convert the file contents to YAML and call the callback
206
11.7k
    return crate::speech::compile_rule(&definition_file_contents, defs_build_fn)
207
11.7k
        .with_context(|| 
format!0
("In file '{}'",
path0
.
to_str0
().
unwrap0
()));
208
11.7k
}
209
210
/// Do the work of converting a single YAML def into the vec/hashset/hashmap
211
/// name: [a, b, c] -- assume an indexed vector
212
/// name: {a, b, c} -- assume a hash set
213
/// name: {a: A, b: B, c: C} -- assume a hashmap
214
/// Returns all the files that were read
215
175k
fn build_values(definition: &Yaml, use_speech_defs: bool, path: &Path) -> Result<Option<Vec<PathBuf>>> {
216
    // Rule::Definition
217
175k
    let dictionary = crate::speech::as_hash_checked(definition)
?0
;
218
175k
    if dictionary.len()!=1 {
219
0
        bail!("Should only be one definition rule: {}", yaml_to_type(definition));
220
175k
    }
221
175k
    let (key, value) = dictionary.iter().next().unwrap();
222
175k
    let def_name = key.as_str().ok_or_else(|| 
anyhow!0
("definition list name '{}' is not a string",
yaml_to_type0
(
key0
)))
?0
;
223
175k
    if def_name == "include" {
224
6.27k
        let do_include_fn = |new_file: &Path| {
225
6.27k
            read_one_definitions_file(use_speech_defs, new_file)
226
6.27k
        };
227
6.27k
        let include_file_name = value.as_str().ok_or_else(|| 
anyhow!0
("definition list include name '{}' is not a string",
yaml_to_type0
(
value0
)))
?0
;
228
6.27k
        return Ok( Some(crate::speech::process_include(path, include_file_name, do_include_fn)
?0
) );
229
168k
    }
230
231
    let result;
232
168k
    if def_name.starts_with("Numbers") || 
def_name110k
.
ends_with110k
("_vec") {
233
58.0k
         result = Contains::Vec( Rc::new( RefCell::new( get_vec_values(value.as_vec().unwrap())
?0
) ) );
234
    } else {
235
        // match value.as_vec() {
236
        //     Some(vec) => {
237
        //         result = Contains::Set( Rc::new( RefCell::new( get_set_values(vec)? ) ) );            },
238
        //     None => {
239
        //         let dict = value.as_hash().ok_or_else(|| anyhow!("definition list value '{}' is not an array or dictionary", yaml_to_type(value)))?;
240
        //         result = Contains::Map( Rc::new( RefCell::new( get_map_values(dict)
241
        //                     .chain_err(||format!("while reading value '{}'", def_name))? ) ) );
242
243
        //     },
244
        // }
245
110k
        let dict = value.as_hash().ok_or_else(|| 
anyhow!0
("definition list value '{}' is not an array or dictionary",
yaml_to_type0
(
value0
)))
?0
;
246
110k
        if dict.is_empty() {
247
15.6k
            result = Contains::Set( Rc::new( RefCell::new( HashSet::with_capacity(0) ) ) );
248
15.6k
        } else {
249
            // peak and see if this is a set or a map
250
95.3k
            let (_, entry_value) = dict.iter().next().unwrap();
251
95.3k
            if entry_value.is_null() {
252
63.4k
                result = Contains::Set( Rc::new( RefCell::new( get_set_values(dict)
253
63.4k
                            .with_context(||
format!0
("while reading value '{def_name}'"))
?0
) ) );
254
            } else {
255
                // peak and see if this is a set or a map
256
31.8k
                let (_, entry_value) = dict.iter().next().unwrap();
257
31.8k
                if entry_value.is_null() {
258
0
                    result = Contains::Set( Rc::new( RefCell::new( get_set_values(dict)
259
0
                                .with_context(||format!("while reading value '{def_name}'"))? ) ) );
260
                } else {
261
31.8k
                    result = Contains::Map( Rc::new( RefCell::new( get_map_values(dict)
262
31.8k
                                .with_context(||
format!0
("while reading value '{def_name}'"))
?0
) ) );
263
                }
264
            }
265
        }
266
    };
267
268
168k
    let definitions = if use_speech_defs {
&SPEECH_DEFINITIONS149k
} else {
&BRAILLE_DEFINITIONS19.3k
};
269
168k
    return definitions.with(|definitions| {
270
168k
        let name_definition_map = &mut definitions.borrow_mut().name_to_var_mapping;
271
168k
        name_definition_map.insert(def_name.to_string(), result);
272
168k
        return Ok(None);
273
168k
    });
274
275
58.0k
    fn get_vec_values(values: &Vec<Yaml>) -> Result<Vec<String>> {
276
58.0k
        let mut result = Vec::with_capacity(values.len());
277
787k
        for yaml_value in 
values58.0k
{
278
787k
            let value = yaml_value.as_str()
279
787k
                .ok_or_else(|| 
anyhow!0
("list entry '{}' is not a string",
yaml_to_type0
(
yaml_value0
)))
?0
280
787k
                .to_string();
281
787k
            result.push(value);
282
        }
283
58.0k
        return Ok(result);
284
58.0k
    }
285
286
63.4k
    fn get_set_values(values: &Hash) -> Result<HashSet<String>> {
287
63.4k
        let mut result = HashSet::with_capacity(2*values.len());
288
5.81M
        for (key, value) in 
values63.4k
{
289
5.81M
            let key = key.as_str()
290
5.81M
                .ok_or_else(|| 
anyhow!0
("list entry '{}' is not a string",
yaml_to_type0
(
key0
)))
?0
291
5.81M
                .to_string();
292
5.81M
            if let Yaml::Null = value {
293
5.81M
            } else {
294
0
                bail!("list entry '{}' is not a string", yaml_to_type(value));
295
            }
296
5.81M
            result.insert(key);
297
        }
298
63.4k
        return Ok(result);
299
63.4k
    }
300
301
31.8k
    fn get_map_values(values: &Hash) -> Result<HashMap<String, String>> {
302
31.8k
        let mut result = HashMap::with_capacity(2*values.len());
303
1.13M
        for (key, value) in 
values31.8k
{
304
1.13M
            let key = key.as_str()
305
1.13M
                .ok_or_else(|| 
anyhow!0
("list entry '{}' is not a string",
yaml_to_type0
(
key0
)))
?0
306
1.13M
                .to_string();
307
1.13M
            let value = value.as_str()
308
1.13M
                .ok_or_else(|| 
anyhow!0
("list entry '{}' is not a string",
yaml_to_type0
(
value0
)))
?0
309
1.13M
                .to_string();
310
1.13M
            result.insert(key, value);
311
        }
312
31.8k
        return Ok(result);
313
31.8k
    }
314
175k
}
315
316
317
#[cfg(test)]
318
mod tests {
319
    use super::*;
320
321
    #[test]
322
1
    fn test_vec() {
323
1
        let numbers = r#"[NumbersTens: ["", "ten", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety"]]"#;
324
1
        let defs_build_fn = |variable_def_list: &Yaml| {
325
            // Rule::DefinitionList
326
            //debug!("variable_def_list {} is\n{}", yaml_to_type(variable_def_list), yaml_to_string(variable_def_list, 0));
327
1
            for variable_def in variable_def_list.as_vec().unwrap() {
328
1
                if let Err(
e0
) = build_values(variable_def, true, Path::new("")) {
329
0
                    bail!("{}", crate::interface::errors_to_string(&e.context(format!("in file {:?}", numbers))));
330
1
                }
331
            }
332
1
            return Ok(vec![]);
333
1
        };
334
1
        compile_rule(numbers, defs_build_fn).unwrap();
335
1
        SPEECH_DEFINITIONS.with(|defs| {
336
1
            let defs = defs.borrow();
337
1
            let names = defs.get_vec("NumbersTens");
338
1
            assert!(names.is_some());
339
1
            let names = names.unwrap();
340
1
            assert_eq!(names.len(), 10);
341
1
            assert_eq!(names[0], "");
342
1
            assert_eq!(names[9], "ninety");
343
1
        });
344
1
    }
345
346
347
    #[test]
348
1
    fn test_set() {
349
1
        let likely_function_names = r#"[LikelyFunctionNames: {"f", "g", "h", "F", "G", "H", "[A-Za-z]+"}]"#;
350
1
        let defs_build_fn = |variable_def_list: &Yaml| {
351
            // Rule::DefinitionList
352
            //debug!("variable_def_list {} is\n{}", yaml_to_type(variable_def_list), yaml_to_string(variable_def_list, 0));
353
1
            for variable_def in variable_def_list.as_vec().unwrap() {
354
1
                if let Err(
e0
) = build_values(variable_def, true, Path::new("")) {
355
0
                    bail!("{}", crate::interface::errors_to_string(&e.context(format!("in file {:?}", likely_function_names))));
356
1
                }
357
            }
358
1
            return Ok(vec![]);
359
1
        };
360
1
        compile_rule(likely_function_names, defs_build_fn).unwrap();
361
1
        SPEECH_DEFINITIONS.with(|defs| {
362
1
            let defs = defs.borrow();
363
1
            let names = defs.get_hashset("LikelyFunctionNames");
364
1
            assert!(names.is_some());
365
1
            let names = names.unwrap();
366
1
            assert_eq!(names.len(), 7);
367
1
            assert!(names.contains("f"));
368
1
            assert!(!names.contains("a"));
369
1
        });
370
1
    }
371
372
    #[test]
373
1
    fn test_hashmap() {
374
1
        let units = r#"[Units: {"A": "amp", "g": "gram", "m": "meter", "sec": "second"}]"#;
375
1
        let defs_build_fn = |variable_def_list: &Yaml| {
376
            // Rule::DefinitionList
377
            //debug!("variable_def_list {} is\n{}", yaml_to_type(variable_def_list), yaml_to_string(variable_def_list, 0));
378
1
            for variable_def in variable_def_list.as_vec().unwrap() {
379
1
                if let Err(
e0
) = build_values(variable_def, true, Path::new("")) {
380
0
                    bail!("{}", crate::interface::errors_to_string(&e.context(format!("in file {:?}", units))));
381
1
                }
382
            }
383
1
            return Ok(vec![]);
384
1
        };
385
1
        compile_rule(units, defs_build_fn).unwrap();
386
1
        SPEECH_DEFINITIONS.with(|defs| {
387
1
            let defs = defs.borrow();
388
1
            let names = defs.get_hashmap("Units");
389
1
            assert!(names.is_some());
390
1
            let names = names.unwrap();
391
1
            assert_eq!(names.len(), 4);
392
1
            assert_eq!(names.get("A").unwrap(), "amp");
393
1
            assert_eq!(names.get("sec").unwrap(), "second");
394
1
            assert_eq!(names.get("xxx"), None);
395
1
        });
396
1
    }
397
}
\ No newline at end of file diff --git a/docs/llvm-cov/html/coverage/home/runner/work/MathCAT/MathCAT/src/infer_intent.rs.html b/docs/llvm-cov/html/coverage/home/runner/work/MathCAT/MathCAT/src/infer_intent.rs.html index 54264a94..55656ee0 100644 --- a/docs/llvm-cov/html/coverage/home/runner/work/MathCAT/MathCAT/src/infer_intent.rs.html +++ b/docs/llvm-cov/html/coverage/home/runner/work/MathCAT/MathCAT/src/infer_intent.rs.html @@ -1 +1 @@ -

Coverage Report

Created: 2026-04-30 05:51

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/home/runner/work/MathCAT/MathCAT/src/infer_intent.rs
Line
Count
Source
1
//! Use heuristics to infer the intent.
2
//! For example, an `mfrac` with `linethickness=0` would be a binomial
3
//! The inference is added to the MathML
4
//!
5
//! The implementation of the module is on hold until the MathML committee figures out how it wants to do this.
6
#![allow(clippy::needless_return)]
7
8
use sxd_document::dom::{Element, Document, ChildOfElement};
9
use crate::prefs::PreferenceManager;
10
use crate::speech::SpeechRulesWithContext;
11
use crate::canonicalize::{as_element, as_text, name, create_mathml_element, set_mathml_name, INTENT_ATTR, MATHML_FROM_NAME_ATTR};
12
use crate::errors::*;
13
use std::fmt;
14
use std::sync::LazyLock;
15
use crate::pretty_print::mml_to_string;
16
use crate::xpath_functions::is_leaf;
17
use regex::Regex;
18
use phf::phf_set;
19
use log::{debug, error, warn};
20
21
const IMPLICIT_FUNCTION_NAME: &str = "apply-function";
22
23
2.47k
pub fn infer_intent<'r, 'c, 's:'c, 'm:'c>(rules_with_context: &'r mut SpeechRulesWithContext<'c,'s,'m>, mathml: Element<'c>) -> Result<Element<'m>> {
24
2.47k
    match catch_errors_building_intent(rules_with_context, mathml) {
25
2.45k
        Ok(intent) => return Ok(intent),
26
19
        Err(e) => {
27
            // lookup what we should do for error recovery
28
19
            let intent_preference = rules_with_context.get_rules().pref_manager.borrow().pref_to_string("IntentErrorRecovery");
29
19
            if intent_preference == "Error" {
30
9
                return Err(e);
31
            } else {
32
10
                let saved_intent_attr = mathml.attribute_value(INTENT_ATTR).unwrap();
33
10
                mathml.remove_attribute(INTENT_ATTR);
34
                // can't call intent_from_mathml() because we have already borrowed_mut -- we call a more internal version
35
10
                let intent_tree =  match rules_with_context.match_pattern::<Element<'m>>(mathml)
36
10
                                            .context("Pattern match/replacement failure!") {
37
0
                    Err(e) => Err(e),
38
10
                    Ok(intent) => {
39
10
                        intent.set_attribute_value(INTENT_ATTR, saved_intent_attr); //  so attr can be potentially be viewed later
40
10
                        Ok(intent)
41
                    },
42
                };
43
10
                mathml.set_attribute_value(INTENT_ATTR, saved_intent_attr);
44
10
                return intent_tree;
45
            }
46
        }
47
    }
48
49
2.47k
    fn catch_errors_building_intent<'r, 'c, 's:'c, 'm:'c>(rules_with_context: &'r mut SpeechRulesWithContext<'c,'s,'m>, mathml: Element<'c>) -> Result<Element<'m>> {
50
2.47k
        if let Some(intent_str) = mathml.attribute_value(INTENT_ATTR) {
51
            // debug!("Before intent: {}", crate::pretty_print::mml_to_string(mathml));
52
2.47k
            let mut lex_state = LexState::init(intent_str.trim())
?0
;
53
2.47k
            let mut intent_offset = 0;
54
2.47k
            let 
result2.46k
= build_intent(rules_with_context, &mut lex_state, mathml, &mut intent_offset)
55
2.47k
                        .with_context(|| 
format!14
("occurs before '{}' in intent attribute value '{}'", lex_state.remaining_str, intent_str))
?14
;
56
2.46k
            if lex_state.token != Token::None {
57
5
                bail!("Error in intent value: extra unparsed intent '{}' in intent attribute value '{}'", lex_state.remaining_str, intent_str);
58
2.45k
            }
59
2.45k
            assert!(lex_state.remaining_str.is_empty());
60
            // debug!("Resulting intent:\n{}", crate::pretty_print::mml_to_string(result));
61
2.45k
            return Ok(result);
62
0
        }
63
0
        bail!("Internal error: infer_intent() called on MathML with no intent arg:\n{}", mml_to_string(mathml));
64
2.47k
    }
65
2.47k
}
66
67
68
static FIXITIES: phf::Set<&str> = phf_set! {
69
    "function", "infix", "prefix", "postfix", "silent", "other",
70
};
71
72
/// Eliminate all but the last fixity property
73
7.63k
pub fn simplify_fixity_properties(properties: &str) -> String {
74
7.63k
    let parts: Vec<&str> = properties.split(':').collect();
75
    // debug!("simplify_fixity_properties {} parts from input: '{}'", parts.len(), properties);
76
7.63k
    let mut fixity_property = "";
77
7.63k
    let mut answer = ":".to_string();
78
19.2k
    for part in 
parts7.63k
{
79
19.2k
        if FIXITIES.contains(part) {
80
1.12k
            fixity_property = part;
81
18.1k
        } else if !part.is_empty() {
82
4.71k
            answer.push_str(part);
83
4.71k
            answer.push(':');
84
13.4k
        }
85
    }
86
7.63k
    if !fixity_property.is_empty() {
87
1.12k
        answer.push_str(fixity_property);
88
1.12k
        answer.push(':');
89
6.51k
    }
90
7.63k
    return answer;
91
7.63k
}
92
93
/// Given the intent add the fixity property for the intent if it isn't given (and one exists)
94
2.72k
fn add_fixity(intent: Element) {
95
2.72k
    let properties = intent.attribute_value(INTENT_PROPERTY).unwrap_or_default();
96
7.47k
    if 
properties.split(":")2.72k
.
all2.72k
(|property| !FIXITIES.contains(property)) {
97
2.63k
        let intent_name = name(intent);
98
2.63k
        crate::definitions::SPEECH_DEFINITIONS.with(|definitions| {
99
2.63k
            let definitions = definitions.borrow();
100
2.63k
            if let Some(
definition12
) = definitions.get_hashmap("IntentMappings").unwrap().get(intent_name) &&
101
12
                let Some((fixity, _)) = definition.split_once("=") {
102
12
                    let new_properties = (if properties.is_empty() {":"} else {
properties0
}).to_string() + fixity + ":";
103
12
                    intent.set_attribute_value(INTENT_PROPERTY, &new_properties);
104
                    // debug!("Added fixity: new value '{}'", intent.attribute_value(INTENT_PROPERTY).unwrap());
105
2.62k
                };
106
2.63k
        });
107
90
    }
108
2.72k
}
109
110
111
/// Given some MathML, expand out any intents taking into account their fixity property
112
/// This is recursive
113
363
pub fn add_fixity_children(intent: Element) -> Element {
114
363
    let children = intent.children();
115
363
    if children.is_empty() || (children.len() == 1 && children[0].element().is_none()) {
116
0
        return intent;
117
363
    }
118
119
363
    for child in children {
120
363
        let child = as_element(child);
121
363
        if child.attribute_value(INTENT_ATTR).is_some() {
122
0
            add_fixity_child(child);
123
363
        }
124
    }
125
363
    return intent;
126
127
0
    fn add_fixity_child(mathml: Element) -> Element {        
128
0
        let mut children = mathml.children();
129
0
        if children.is_empty() {
130
0
            return mathml;
131
0
        }
132
        // we also exclude fixity on mtable because they mess up the counts (see 'en::mtable::unknown_mtable_property')
133
0
        if mathml.attribute_value(MATHML_FROM_NAME_ATTR).unwrap_or_default() == "mtable" {
134
0
            return mathml;
135
0
        }
136
0
        let doc = mathml.document();
137
0
        let properties = mathml.attribute_value(INTENT_PROPERTY).unwrap_or_default();
138
0
        let fixity = properties.rsplit(':').find(|&property| FIXITIES.contains(property)).unwrap_or_default();
139
0
        let intent_name = name(mathml);
140
    
141
0
        let op_name_id = mathml.attribute_value("id").unwrap_or("new-id");
142
0
        match fixity {
143
0
            "infix" => {
144
0
                let mut new_children = Vec::with_capacity(2*children.len()-1);
145
0
                new_children.push(children[0]);
146
0
                for (i, &child) in children.iter().enumerate().skip(1) {
147
0
                    new_children.push(create_operator_element(intent_name, fixity, op_name_id, i, &doc));
148
0
                    new_children.push(child);
149
0
                }
150
0
                mathml.replace_children(new_children);
151
            },
152
0
            "prefix" => { 
153
0
                children.insert(0, create_operator_element(intent_name, fixity, op_name_id, 1, &doc));                       
154
0
                mathml.replace_children(children);
155
0
            },
156
0
            "postfix" => { 
157
0
                children.push( create_operator_element(intent_name, fixity, op_name_id, 1, &doc));                       
158
0
                mathml.replace_children(children);
159
0
            },
160
0
            "silent" => {
161
0
                // children remain the same -- nothing to do
162
0
            },
163
0
            "other" => {
164
0
                // a special case -- will be handled with specific rules (e.g., intervals need to add "from" and "to", not a single word)
165
0
            },
166
            _ => {  // "function" is the default
167
                // build a function like notation function-name U+2061 <mrow> children </mrow>
168
0
                let mut new_children = Vec::with_capacity(3);
169
0
                let function_name = create_operator_element(intent_name, "function", op_name_id, 1, &doc);
170
0
                new_children.push(function_name);
171
0
                let invisible_apply_function = create_operator_element("mo", "infix", op_name_id, 2, &doc);
172
0
                invisible_apply_function.element().unwrap().set_text("\u{2061}");
173
0
                new_children.push(invisible_apply_function);
174
0
                let mrow_wrapper = create_mathml_element(&doc, "mrow");
175
0
                mrow_wrapper.set_attribute_value("id", (op_name_id.to_string() + "3").as_str());
176
0
                mrow_wrapper.append_children(children);
177
0
                new_children.push(ChildOfElement::Element(mrow_wrapper));
178
0
                mathml.replace_children(new_children);
179
0
                if fixity.is_empty() {
180
0
                    mathml.set_attribute_value(INTENT_PROPERTY, ":function:");
181
0
                }
182
            },
183
        }
184
0
        return mathml;
185
    
186
0
        fn create_operator_element<'a>(intent_name: &str, fixity: &str, id: &str, id_inc: usize, doc: &Document<'a>) -> ChildOfElement<'a> {
187
0
            let intent_name = intent_speech_for_name(intent_name, &PreferenceManager::get().borrow().pref_to_string("NavMode"), fixity);
188
0
            let element = create_mathml_element(doc, &intent_name);
189
0
            element.set_attribute_value("id", &format!("{id}-fixity-{id_inc}"));
190
0
            element.set_attribute_value(MATHML_FROM_NAME_ATTR, "mo");
191
0
            return ChildOfElement::Element(element);
192
0
        }
193
0
    }
194
363
}
195
196
340
pub fn intent_speech_for_name(intent_name: &str, verbosity: &str, fixity: &str) -> String {
197
340
    crate::definitions::SPEECH_DEFINITIONS.with(|definitions| {
198
340
        let definitions = definitions.borrow();
199
340
        if let Some(
intent_name_pattern294
) = definitions.get_hashmap("IntentMappings").unwrap().get(intent_name) {
200
            // Split the pattern is:
201
            //   fixity-def [|| fixity-def]*
202
            //   fixity-def := fixity=[open;] verbosity[; close]
203
            //   verbosity := terse | medium | verbose
204
396
            if let Some(
matched_intent294
) =
intent_name_pattern.split("||")294
.
find294
(|&entry| entry.trim().starts_with(fixity)) {
205
294
                let (_, matched_intent) = matched_intent.split_once("=").unwrap_or_default();
206
294
                let parts = matched_intent.trim().split(";").collect::<Vec<&str>>();
207
294
                let mut operator_names = (if parts.len() > 1 {
parts[1]129
} else {
parts[0]165
}).split(":").collect::<Vec<&str>>();
208
294
                match operator_names.len() {
209
236
                    1 => return operator_names[0].trim().to_string(),
210
                    2 | 3 => {
211
58
                        if operator_names.len() == 2 {
212
0
                            warn!("Intent '{intent_name}' has only two operator names, but should have three");
213
0
                            operator_names.push(operator_names[1]);
214
58
                        }
215
58
                        let intent_word = match verbosity {
216
58
                            "Terse" => 
operator_names[0]2
,
217
56
                            "Medium" => 
operator_names[1]54
,
218
2
                            _ => operator_names[2],
219
                        };
220
58
                        return intent_word.trim().to_string();
221
                    },
222
                    _ => {
223
0
                        error!("Intent '{}' has too many ({}) operator names, should only have 2", intent_name, operator_names.len());
224
0
                        return intent_name.to_string();
225
                    },
226
                }
227
0
            }
228
46
        };
229
46
        return intent_name.replace(['_', '-'], " ").trim().to_string();
230
340
    })
231
340
}
232
233
234
235
// intent             := self-property-list | expression
236
// self-property-list := property+ S    
237
// expression         := S ( term property* | application ) S 
238
// term               := concept-or-literal | number | reference 
239
// concept-or-literal := NCName
240
// number             := '-'? \d+ ( '.' \d+ )?
241
// reference          := '$' NCName
242
// application        := expression '(' arguments? S ')'
243
// arguments          := expression ( ',' expression )*
244
// property           := S ':' NCName
245
// S                  := [ \t\n\r]*
246
247
// The practical restrictions of NCName are that it cannot contain several symbol characters like
248
//  !, ", #, $, %, &, ', (, ), *, +, ,, /, :, ;, <, =, >, ?, @, [, \, ], ^, `, {, |, }, ~, and whitespace characters
249
//  Furthermore an NCName cannot begin with a number, dot or minus character although they can appear later in an NCName.
250
// NC_NAME defined in www.w3.org/TR/REC-xml/#sec-common-syn, but is complicated
251
//   We follow NC_NAME for the basic latin block, but then allow everything
252
2
static CONCEPT_OR_LITERAL: LazyLock<Regex> = LazyLock::new(|| {
253
2
    Regex::new(r#"^[^\s\u{0}-\u{40}\[\\\]^`\u{7B}-\u{BF}][^\s\u{0}-\u{2C}/:;<=>?@\[\\\]^`\u{7B}-\u{BF}]*"#     // NC_NAME but simpler
254
2
    ).unwrap()
255
2
});
256
2
static PROPERTY: LazyLock<Regex> = LazyLock::new(|| {
257
2
    Regex::new(r#"^:[^\s\u{0}-\u{40}\[\\\]^`\u{7B}-\u{BF}][^\s\u{0}-\u{2C}/:;<=>?@\[\\\]^`\u{7B}-\u{BF}]*"#    // : NC_NAME
258
2
    ).unwrap()
259
2
});
260
2
static ARG_REF: LazyLock<Regex> = LazyLock::new(|| {
261
2
    Regex::new(r#"^\$[^\s\u{0}-\u{40}\[\\\]^`\u{7B}-\u{BF}][^\s\u{0}-\u{2C}/:;<=>?@\[\\\]^`\u{7B}-\u{BF}]*"#   // $ NC_NAME
262
2
    ).unwrap()
263
2
});
264
2
static NUMBER: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"^-?[0-9]+(\.[0-9]+)?"#).unwrap());
265
266
static TERMINALS_AS_U8: [u8; 3] = [b'(', b',', b')'];
267
// static TERMINALS: [char; 3] = ['(', ',',')'];
268
269
// 'i -- "i" for the lifetime of the INTENT_ATTR string
270
#[derive(Debug, PartialEq, Eq, Clone)]
271
enum Token<'i> {
272
    Terminal(&'i str),  // "(", ",", ")"
273
    Property(&'i str),
274
    ArgRef(&'i str),
275
    ConceptOrLiteral(&'i str),
276
    Number(&'i str),
277
    None,               // out of characters
278
}
279
280
impl fmt::Display for Token<'_> {
281
3
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
282
3
        return write!(f, "{}",
283
3
            match self {
284
3
                Token::Terminal(str) => format!("Terminal('{str}')"),
285
0
                Token::Property(str) => format!("Property({str})"),
286
0
                Token::ArgRef(str) => format!("ArgRef({str})"),
287
0
                Token::ConceptOrLiteral(str) => format!("Literal({str})"),
288
0
                Token::Number(str) => format!("Number({str})"),
289
0
                Token::None => "None".to_string(),
290
            }
291
        );
292
3
    }
293
}
294
295
impl Token<'_> {
296
3.64k
    fn is_terminal(&self, terminal: &str) -> bool {
297
3.64k
        if let Token::Terminal(
value1.02k
) = *self {
298
1.02k
            return value == terminal;
299
        } else {
300
2.61k
            return false;
301
        }
302
3.64k
    }
303
304
5.21k
    fn as_str(&self) -> &str {
305
5.21k
        return match self {
306
0
            Token::Terminal(str) => str,
307
4.79k
            Token::Property(str) => str,
308
226
            Token::ArgRef(str) => str,
309
161
            Token::ConceptOrLiteral(str) => str,
310
29
            Token::Number(str) => str,
311
0
            Token::None => "",
312
        }
313
5.21k
    }
314
}
315
316
struct LexState<'i> {
317
    token: Token<'i>,
318
    remaining_str: &'i str,     // always trimmed
319
}
320
321
impl fmt::Display for LexState<'_> {
322
0
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
323
0
        return writeln!(f, "token: {}, remaining: '{}'", self.token, self.remaining_str);
324
0
    }
325
}
326
327
impl<'i> LexState<'i> {
328
2.50k
    fn init(str: &'i str) -> Result<LexState<'i>> {
329
2.50k
        let mut lex_state = LexState {  token: Token::None, remaining_str: str.trim() };
330
2.50k
        lex_state.get_next()
?0
;
331
2.50k
        return Ok(lex_state);
332
2.50k
    }
333
334
    // helper function for LexState -- do not call outside of the impl
335
2.82k
    fn set_token(&mut self, str: &'i str) -> Result<()> {
336
        // Note: 'str' is already trimmed
337
2.82k
        if str.is_empty() {
338
0
            self.token = Token::None;
339
2.82k
        } else if TERMINALS_AS_U8.contains(&str.as_bytes()[0]) {
340
0
            self.token = Token::Terminal(str);
341
2.82k
        } else if let Some(
matched_property2.40k
) = PROPERTY.find(str) {
342
2.40k
            self.token = Token::Property(matched_property.as_str());
343
2.40k
        } else if let Some(
matched_arg_ref226
) =
ARG_REF416
.find(str) {
344
226
            self.token = Token::ArgRef(matched_arg_ref.as_str());
345
226
        } else if  let Some(
matched_literal161
) =
CONCEPT_OR_LITERAL190
.find(str) {
346
161
            self.token = Token::ConceptOrLiteral(matched_literal.as_str());
347
161
        } else if  let Some(
matched_number29
) =
NUMBER29
.find(str) {
348
29
            self.token = Token::Number(matched_number.as_str());
349
29
        } else {
350
0
            bail!("Illegal 'intent' syntax: {}", str);
351
        }
352
2.82k
        return Ok( () );
353
2.82k
    }
354
355
5.69k
    fn get_next(&mut self) -> Result<&Token<'_>> {
356
5.69k
        if self.remaining_str.is_empty() {
357
2.48k
            self.token = Token::None;
358
3.21k
        } else if TERMINALS_AS_U8.contains(&self.remaining_str.as_bytes()[0]) {
359
391
            self.token = Token::Terminal(&self.remaining_str[..1]);
360
391
            self.remaining_str = self.remaining_str[1..].trim_start();
361
391
        } else {
362
2.82k
            self.set_token(self.remaining_str)
?0
;
363
2.82k
            self.remaining_str = self.remaining_str[self.token.as_str().len()..].trim_start();
364
}    
365
5.69k
        return Ok(&self.token);
366
5.69k
    }
367
368
3.64k
    fn is_terminal(&self, terminal: &str) -> bool {
369
3.64k
        return self.token.is_terminal(terminal);
370
3.64k
    }
371
}
372
373
2.74k
fn build_intent<'b, 'r, 'c, 's:'c, 'm:'c>(rules_with_context: &'r mut SpeechRulesWithContext<'c,'s,'m>,
374
2.74k
                                         lex_state: &mut LexState<'b>,
375
2.74k
                                         mathml: Element<'c>,
376
2.74k
                                         intent_offset: &mut u32) -> Result<Element<'m>> {
377
    // intent             := self-property-list | expression
378
    // self-property-list := property+ S    
379
    // expression         := S ( term property* | application ) S 
380
    // term               := concept-or-literal | number | reference 
381
    // concept-or-literal := NCName
382
    // number             := '-'? \d+ ( '.' \d+ )?
383
    // reference          := '$' NCName
384
    // application        := expression '(' arguments? S ')'
385
    //
386
    // When we flatten intent we have this implementation looking for Tokens or '(' [for application]
387
    // Essentially, the grammar we deal with here is:
388
    // intent := property+ | (concept-or-literal | number | reference) property* '('?
389
    // debug!("  start build_intent: state: {}", lex_state);
390
2.74k
    let doc = rules_with_context.get_document();
391
    let mut intent;
392
2.74k
    debug!("    build_intent: start mathml name={}, intent_offset={}", 
name0
(
mathml0
), intent_offset);
393
2.74k
    match lex_state.token {
394
        Token::Property(_) => {
395
            // We only have a property -- we want to keep this tag/element
396
            // There are two paths:
397
            // 1. If there is a function call, then the children are dealt with there
398
            // 2. If there is *no* function call, then the children are kept, which means we return to pattern matching
399
            //    Note: to avoid infinite loop, we need to remove the 'intent' so we don't end up back here; we put it back later
400
2.33k
            let properties = get_properties(lex_state)
?0
; // advance state to see if funcall
401
2.33k
            if lex_state.is_terminal("(") {
402
2
                intent = create_mathml_element(&doc, name(mathml));
403
2
                intent.set_attribute_value(INTENT_PROPERTY, &properties);
404
2
                intent.set_attribute_value(MATHML_FROM_NAME_ATTR, name(mathml));
405
2
                intent.set_attribute_value("id", mathml.attribute_value("id")
406
2
                      .ok_or_else(|| 
anyhow!0
("no id on intent function name"))
?0
);
407
            } else {
408
2.32k
                let saved_intent = mathml.attribute_value(INTENT_ATTR).unwrap();
409
2.32k
                mathml.remove_attribute(INTENT_ATTR);
410
2.32k
                mathml.set_attribute_value(INTENT_PROPERTY, &properties);   // needs to be set before the pattern match
411
2.32k
                intent = rules_with_context.match_pattern::<Element<'m>>(mathml)
?0
;
412
                // debug!("Intent after pattern match:\n{}", mml_to_string(intent));
413
2.32k
                mathml.set_attribute_value(INTENT_ATTR, saved_intent);
414
            }
415
2.33k
            add_fixity(intent);
416
2.33k
            return Ok(intent);      // if we start with properties, then there can only be properties
417
        },
418
161
        Token::ConceptOrLiteral(word) | Token::Number(
word28
) => {
419
189
            let leaf_name = if let Token::Number(_) = lex_state.token {
"mn"28
} else {
"mi"161
};
420
189
            intent = create_mathml_element(&doc, leaf_name);
421
            // if the str is part of a larger intent and not the head (e.g., "a" in "f($x, a)", but not the "f" in it), then it is "made up"
422
            // debug!("    Token::ConceptOrLiteral, word={}, leaf_name={}", word, leaf_name);
423
189
            intent.set_attribute_value(MATHML_FROM_NAME_ATTR, 
424
189
                if word == mathml.attribute_value(INTENT_ATTR).unwrap_or_default() {
name(mathml)30
} else {
leaf_name159
});
425
189
            intent.set_text(word);       // '-' and '_' get removed by the rules.
426
189
            if let Some(
id136
) = mathml.attribute_value("id") {
427
136
                intent.set_attribute_value("id", &format!("{}-literal-{}", id, intent_offset));
428
136
                *intent_offset += 1;
429
136
            
}53
430
189
            lex_state.get_next()
?0
;
431
189
            if let Token::Property(_) = lex_state.token {
432
60
                let properties = get_properties(lex_state)
?0
;
433
60
                intent.set_attribute_value(INTENT_PROPERTY, &properties);
434
129
            }
435
        },
436
223
        Token::ArgRef(word) => {
437
223
            intent = match find_arg(rules_with_context, &word[1..], mathml, intent_offset, true, false)
?1
{
438
221
                Some(e) => {
439
221
                    lex_state.get_next()
?0
;
440
221
                    e
441
                },
442
1
                None => bail!("intent arg '{}' not found", word),
443
            };
444
221
            if let Token::Property(_) = lex_state.token {
445
3
                let properties = get_properties(lex_state)
?0
;
446
3
                intent.set_attribute_value(INTENT_PROPERTY, &properties);
447
218
            }
448
        },
449
3
        _ => bail!("Illegal 'intent' syntax: found {}", lex_state.token),
450
    };
451
410
    if lex_state.is_terminal("(") {
452
136
        intent = build_function(intent, rules_with_context, lex_state, mathml, intent_offset)
?15
;
453
274
    }
454
    // debug!("    end build_intent: state: {}     piece: {}", lex_state, mml_to_string(intent));
455
395
    add_fixity(intent);
456
395
    return Ok(intent);
457
2.74k
}
458
459
pub const INTENT_PROPERTY: &str = "data-intent-property";
460
461
/// Get all the properties, stopping we don't have any more
462
/// Returns the string of the properties terminated with an additional ":"
463
2.39k
fn get_properties(lex_state: &mut LexState) -> Result<String> {
464
    // return the 'hint' leaving the state
465
2.39k
    assert!(matches!(lex_state.token, Token::Property(str) if str.starts_with(':')));
466
2.39k
    let mut properties = String::with_capacity(60);
467
2.39k
    properties.push_str(lex_state.token.as_str());
468
    loop {
469
2.40k
        let token = lex_state.get_next()
?0
;
470
2.40k
        if let Token::Property(
property11
) = token {
471
11
            properties.push_str(property);
472
11
        } else {
473
2.39k
            properties.push(':');
474
            // debug!("      get_properties: returns {}", properties);
475
2.39k
            return Ok(simplify_fixity_properties(&properties));
476
        }
477
    }
478
2.39k
}
479
480
/// Build a function 'f(...)' where '...' can be empty
481
///
482
/// Also handles nested functions like f(...)(...)
483
/// 
484
/// Start state: at '('
485
/// 
486
/// End state: after ')'
487
136
fn build_function<'b, 'r, 'c, 's:'c, 'm:'c>(
488
136
            function_name: Element<'m>,
489
136
            rules_with_context: &'r mut SpeechRulesWithContext<'c,'s,'m>,
490
136
            lex_state: &mut LexState<'b>,
491
136
            mathml: Element<'c>,
492
136
            intent_offset: &mut u32) -> Result<Element<'m>> {
493
    // debug!("  start build_function: name: {}, state: {}", name(function_name), lex_state);
494
    // application := intent '(' arguments? S ')'  where 'function_name' is 'intent'
495
136
    assert!(lex_state.is_terminal("("));
496
136
    let mut function = function_name;
497
136
    function.set_attribute_value(MATHML_FROM_NAME_ATTR, name(mathml));
498
260
    while lex_state.is_terminal("(") {
499
139
        lex_state.get_next()
?0
;
500
139
        if lex_state.is_terminal(")") {
501
            // grammar requires at least one argument
502
9
            bail!("Illegal 'intent' syntax: missing argument for intent name '{}'", name(function_name));
503
130
        }
504
130
        let 
children125
= build_arguments(rules_with_context, lex_state, mathml, intent_offset)
?5
;
505
125
        function = lift_function_name(rules_with_context.get_document(), function, children);
506
507
125
        if !lex_state.is_terminal(")") {
508
1
            bail!("Illegal 'intent' syntax: missing ')' for intent name '{}'", name(function_name));
509
124
        }
510
124
        lex_state.get_next()
?0
;
511
    }
512
513
    // debug!("  end build_function/# children: {}, #state: {}  ..[bfa] function name: {}",
514
        // function.children().len(), lex_state, mml_to_string(function));
515
121
    return Ok(function);
516
136
}
517
518
// process all the args of a function
519
// Start state: after '('
520
// End state: on ')'
521
130
fn build_arguments<'b, 'r, 'c, 's:'c, 'm:'c>(
522
130
            rules_with_context: &'r mut SpeechRulesWithContext<'c,'s,'m>,
523
130
            lex_state: &mut LexState<'b>,
524
130
            mathml: Element<'c>,
525
130
            intent_offset: &mut u32) -> Result<Vec<Element<'m>>> {
526
    // arguments := intent ( ',' intent )*' 
527
    // debug!("    start build_args state: {}", lex_state);
528
529
    // there is at least one arg
530
130
    let mut children = Vec::with_capacity(lex_state.remaining_str.len()/3 + 1);   // conservative estimate ('3' - "$x,");
531
130
    children.
push127
( build_intent(rules_with_context, lex_state, mathml, intent_offset)
?3
); // arg before ','
532
    // debug!("  build_args: # children {};  state: {}", children.len(), lex_state);
533
534
239
    while lex_state.is_terminal(",") {
535
114
        lex_state.get_next()
?0
;
536
114
        children.
push112
( build_intent(rules_with_context, lex_state, mathml, intent_offset)
?2
); // arg before ','
537
        // debug!("    build_args, # children {};  state: {}", children.len(), lex_state);
538
    }
539
540
    // debug!("    end build_args, # children {};  state: {}", children.len(), lex_state);
541
125
    return Ok(children);
542
130
}
543
544
/// lift the children up to LITERAL_NAME
545
125
fn lift_function_name<'m>(doc: Document<'m>, function_name: Element<'m>, children: Vec<Element<'m>>) -> Element<'m> {
546
    // debug!("    lift_function_name: {}", name(function_name));
547
    // debug!("    lift_function_name: {}", mml_to_string(function_name));
548
125
    if name(function_name) == "mi" || 
name(function_name) == "mn"4
{ // FIX -- really want to test for all leaves, but not "data-from-mathml"
549
        // simple/normal case of f(x,y)
550
        // don't want to say that this is a leaf -- doing so messes up because it potentially has children
551
121
        set_mathml_name(function_name, as_text(function_name));
552
121
        function_name.set_text("");
553
121
        function_name.replace_children(children);
554
129
        if 
name(function_name)121
.
find121
(|ch| ch!='_' &&
ch!='-'108
).
is_none121
() {
555
14
            let properties = function_name.attribute_value(INTENT_PROPERTY).unwrap_or(":").to_owned();
556
14
            function_name.set_attribute_value(INTENT_PROPERTY, &(properties + "silent:"));
557
107
        }
558
121
        return function_name;
559
4
    } else if function_name.children().is_empty() {
560
        // "...  :property(...)" -- no function name
561
0
        function_name.replace_children(children);
562
0
        return function_name;
563
    } else {
564
        // more complicated case of nested name: f(x)(y,z)
565
        // create an apply_function(f(x), y, z)
566
4
        let result = create_mathml_element(&doc, IMPLICIT_FUNCTION_NAME);
567
4
        result.set_attribute_value(MATHML_FROM_NAME_ATTR, "mrow");
568
4
        result.append_child(function_name);
569
4
        result.append_children(children);
570
4
        return result;
571
    }
572
125
}
573
574
575
/// look for @arg=name in mathml
576
/// if 'check_intent', then look at an @intent for this element (typically false for non-recursive calls)
577
946
fn find_arg<'r, 'c, 's:'c, 'm:'c>(
578
946
    rules_with_context: &'r mut SpeechRulesWithContext<'c,'s,'m>,
579
946
    name: &str,
580
946
    mathml: Element<'c>,
581
946
    intent_offset: &mut u32,
582
946
    skip_self: bool,
583
946
    no_check_inside: bool) -> Result<Option<Element<'m>>> {
584
    // debug!("Looking for '{}' in\n{}", name, mml_to_string(mathml));
585
946
    if !skip_self &&
586
723
        let Some(
arg_val411
) = mathml.attribute_value("arg") {
587
            // debug!("looking for '{}', found arg='{}'", name, arg_val);
588
411
            if name == arg_val {
589
                // check to see if this mathml has an intent value -- if so the value is the value of its intent value
590
222
                if let Some(
intent_str28
) = mathml.attribute_value(INTENT_ATTR) {
591
28
                    let mut lex_state = LexState::init(intent_str.trim())
?0
;
592
28
                    return Ok( Some( build_intent(rules_with_context, &mut lex_state, mathml, intent_offset)
?1
) );
593
                } else {
594
194
                    return Ok( Some( rules_with_context.match_pattern::<Element<'m>>(mathml)
?0
) );
595
                }
596
189
            } else if no_check_inside {
597
189
                return Ok(None);       // don't look inside 'arg'
598
0
            }
599
535
        }
600
601
535
    if no_check_inside && 
mathml.attribute_value(INTENT_ATTR)312
.
is_some312
() {
602
2
        return Ok(None);           // don't look inside 'intent'
603
533
    }
604
605
533
    if is_leaf(mathml){
606
121
        return Ok(None);
607
412
    }
608
609
723
    for child in 
mathml412
.
children412
() {
610
723
        let child = as_element(child);
611
723
        if let Some(
element396
) = find_arg(rules_with_context, name, child, intent_offset, false, true)
?1
{
612
396
            return Ok( Some(element) );
613
326
        }
614
    }
615
616
15
    return Ok(None);               // not present
617
946
}
618
619
#[cfg(test)]
620
mod tests {
621
    #[allow(unused_imports)]
622
    use crate::init_logger;
623
    use log::debug;
624
    use sxd_document::parser;
625
626
627
27
    fn test_intent(mathml: &str, target: &str, intent_error_recovery: &str) -> bool {
628
    use crate::interface::*;
629
        use crate::pretty_print::mml_to_string;
630
    // this forces initialization
631
27
        crate::interface::set_rules_dir(super::super::abs_rules_dir_path()).unwrap();
632
        // crate::speech::SpeechRules::initialize_all_rules().unwrap();
633
27
        set_preference("IntentErrorRecovery", intent_error_recovery).unwrap();
634
27
        set_preference("SpeechStyle", "SimpleSpeak").unwrap();      // avoids possibility of "LiteralSpeak"
635
27
        let package1 = &parser::parse(mathml).expect("Failed to parse test input");
636
27
        let mathml = get_element(package1);
637
27
        trim_element(mathml, false);
638
27
        debug!("test:\n{}", 
mml_to_string0
(
mathml0
));
639
        
640
27
        let package2 = &parser::parse(target).expect("Failed to parse target input");
641
27
        let target = get_element(package2);
642
27
        trim_element(target,true);
643
27
        debug!("target:\n{}", 
mml_to_string0
(
target0
));
644
645
27
        let 
result18
= match crate::speech::intent_from_mathml(mathml, package2.as_document()) {
646
18
            Ok(e) => e,
647
9
            Err(e) => {
648
9
                debug!("{}", 
crate::interface::errors_to_string0
(
&e0
));
649
9
                return false;       // could be intentional failure
650
            }
651
        };
652
18
        debug!("result:\n{}", 
mml_to_string0
(
result0
));
653
18
        match is_same_element(result, target, &[]) {
654
18
      Ok(_) => return true,
655
0
      Err(e) => panic!("{}:\nresult: {}target: {}", e, mml_to_string(result), mml_to_string(target)),
656
    }
657
27
    }
658
659
    #[test]
660
1
    fn infer_binomial() {
661
1
        let mathml = "<mrow intent='binomial($n, $m)'>
662
1
                <mo>(</mo>
663
1
                <mfrac linethickness='0'> <mn arg='n'>7</mn> <mn arg='m'>3</mn> </mfrac>
664
1
                <mo>)</mo>
665
1
            </mrow>";
666
1
        let intent = "<binomial data-from-mathml='mrow' data-intent-property=':infix:'> <mn data-from-mathml='mn' arg='n'>7</mn> <mn data-from-mathml='mn' arg='m'>3</mn>  </binomial>";
667
1
        assert!(test_intent(mathml, intent, "Error"));
668
1
    }
669
670
    #[test]
671
1
    fn infer_binomial_intent_arg() {
672
1
        let mathml = "<msubsup intent='$op($n,$m)'>
673
1
                <mi arg='op' intent='binomial'>C</mi>
674
1
                <mi arg='n'>n</mi>
675
1
                <mi arg='m'>m</mi>
676
1
            </msubsup>";
677
1
        let intent = "<binomial data-from-mathml='msubsup' data-intent-property=':infix:'> <mi data-from-mathml='mi' arg='n'>n</mi> <mi data-from-mathml='mi' arg='m'>m</mi></binomial>";
678
1
        assert!(test_intent(mathml, intent, "Error"));
679
1
    }
680
681
    #[test]
682
1
    fn silent_underscore() {
683
1
        let mathml = "<mrow><mi intent='__-'>silent</mi><mo>+</mo><mi>e</mi></mrow>";
684
1
        let intent = "<mrow data-from-mathml='mrow'>
685
1
                                <mi data-from-mathml='mi'>__-</mi>
686
1
                                <mo data-from-mathml='mo'>+</mo>
687
1
                                <mi data-from-mathml='mi'>e</mi>
688
1
                            </mrow>";
689
1
        assert!(test_intent(mathml, intent, "Error"));
690
1
    }
691
692
693
    #[test]
694
1
    fn silent_underscore_function() {
695
1
        let mathml = "<mrow intent='__-_(speak, this)'></mrow>";
696
1
        let intent = "<__-_ data-from-mathml='mrow' data-intent-property=':silent:'>
697
1
                                <mi data-from-mathml='mi'>speak</mi>
698
1
                                <mi data-from-mathml='mi'>this</mi>
699
1
                            </__-_>";
700
1
        assert!(test_intent(mathml, intent, "Error"));
701
1
    }
702
703
    #[test]
704
1
    fn intent_multiple_properties() {
705
1
        let mathml = "<mrow intent='foo:silent:int(bar:positive-int:int, $a:foo:bar:foo-bar, $b:number)'>
706
1
                <mi arg='a'>a</mi>
707
1
                <mo arg='p' intent='plus'>+</mo>
708
1
                <mi arg='b' intent=':negative-int:int'>b</mi>
709
1
            </mrow>";
710
1
        let intent = "<foo data-intent-property=':int:silent:' data-from-mathml='mrow'>
711
1
                                <mi data-from-mathml='mi' data-intent-property=':positive-int:int:'>bar</mi>
712
1
                                <mi data-from-mathml='mi' arg='a' data-intent-property=':foo:bar:foo-bar:'>a</mi>
713
1
                                <mi data-from-mathml='mi' arg='b' data-intent-property=':number:'>b</mi>
714
1
                            </foo>";
715
1
        assert!(test_intent(mathml, intent, "Error"));
716
1
    }
717
    #[test]
718
1
    fn intent_nest_no_arg_call() {
719
1
        let mathml = "<mrow intent='foo(bar())'>
720
1
                <mi arg='a'>a</mi>
721
1
                <mo arg='p' intent='plus'>+</mo>
722
1
                <mi arg='b'>b</mi>
723
1
                <mo arg='f' intent='factorial'>!</mo>
724
1
            </mrow>";
725
1
        let intent = "<foo><bar></bar></foo>";
726
1
        assert!(!test_intent(mathml, intent, "Error"));
727
1
    }
728
729
    #[test]
730
1
    fn intent_hints() {
731
1
        let mathml = "<mrow intent='foo:silent(bar:postfix(3))'>
732
1
                <mi arg='a'>a</mi>
733
1
                <mo arg='p' intent='plus'>+</mo>
734
1
                <mi arg='b'>b</mi>
735
1
                <mo arg='f' intent='factorial'>!</mo>
736
1
            </mrow>";
737
1
        let intent = "<foo data-intent-property=':silent:' data-from-mathml='mrow'>
738
1
                                <bar data-intent-property=':postfix:' data-from-mathml='mrow'>
739
1
                                    <mn data-from-mathml='mn'>3</mn>
740
1
                                </bar>
741
1
                            </foo>";
742
1
        assert!(test_intent(mathml, intent, "Error"));
743
1
    }
744
    
745
    #[test]
746
1
    fn intent_hints_and_type() {
747
1
        let mathml = "<mrow intent='foo:is-foolish:function($b)'>
748
1
                <mi arg='a'>a</mi>
749
1
                <mo arg='p' intent='plus'>+</mo>
750
1
                <mi intent='b:int' arg='b'>b</mi>
751
1
                <mo arg='f' intent='factorial'>!</mo>
752
1
            </mrow>";
753
1
        let intent = "<foo data-intent-property=':is-foolish:function:' data-from-mathml='mrow'>
754
1
                                <mi data-intent-property=':int:' data-from-mathml='mi'>b</mi>
755
1
                            </foo>";
756
1
        assert!(test_intent(mathml, intent, "Error"));
757
1
    }
758
759
    #[test]
760
1
    fn intent_in_intent_first_arg() {
761
1
        let mathml = "<mrow intent='p(f(b), a)'>
762
1
                <mi arg='a'>a</mi>
763
1
                <mo arg='p' intent='plus'>+</mo>
764
1
                <mi arg='b'>b</mi>
765
1
                <mo arg='f' intent='factorial'>!</mo>
766
1
            </mrow>";
767
1
        let intent = "<p data-from-mathml='mrow'>
768
1
                                <f data-from-mathml='mrow'>
769
1
                                    <mi data-from-mathml='mi'>b</mi>
770
1
                                </f>
771
1
                                <mi data-from-mathml='mi'>a</mi>
772
1
                            </p>";
773
1
        assert!(test_intent(mathml, intent, "Error"));
774
1
    }
775
776
    #[test]
777
1
    fn intent_in_intent_second_arg() {
778
1
        let mathml = "<mrow intent='$p(a,$f(b))'>
779
1
                <mi arg='a'>a</mi>
780
1
                <mo arg='p' intent='plus'>+</mo>
781
1
                <mi arg='b'>b</mi>
782
1
                <mo arg='f' intent='factorial'>!</mo>
783
1
            </mrow>";
784
1
        let intent = "<plus data-from-mathml='mrow' data-intent-property=':infix:'>
785
1
                                <mi data-from-mathml='mi'>a</mi>
786
1
                                <factorial data-from-mathml='mrow'>
787
1
                                    <mi data-from-mathml='mi'>b</mi>
788
1
                                </factorial>
789
1
                            </plus>";
790
1
        assert!(test_intent(mathml, intent, "Error"));
791
1
    }
792
793
    #[test]
794
1
    fn intent_with_whitespace() {
795
1
        let mathml = "<mrow intent='  $arrow    ( $a ,  $b,$c )  '>
796
1
                <mi arg='a'>A</mi>
797
1
                <mover>
798
1
                    <mo movablelimits='false' arg='arrow' intent='map'>⟶</mo>
799
1
                    <mo arg='U2245' intent='congruence'>≅</mo>
800
1
                </mover>
801
1
                <mi arg='b'>B</mi>
802
1
                <mi arg='c'>C</mi>
803
1
            </mrow>";
804
1
        let intent = "<map data-from-mathml='mrow'> <mi data-from-mathml='mi' arg='a'>A</mi> <mi data-from-mathml='mi' arg='b'>B</mi> <mi data-from-mathml='mi' arg='c'>C</mi> </map>";
805
1
        assert!(test_intent(mathml, intent, "Error"));
806
1
    }
807
808
    #[test]
809
1
    fn intent_template_at_toplevel() {
810
1
        let mathml = "<msup intent='$H $n'>
811
1
            <mi arg='H' mathvariant='normal'>H</mi>
812
1
            <mn arg='n'>2</mn>
813
1
            </msup>";
814
1
        let intent = "<mrow><mi arg='H' mathvariant='normal'>H</mi><mn arg='n'>2</mn></mrow>";
815
1
        assert!(!test_intent(mathml, intent, "Error"));
816
1
    }
817
818
    #[test]
819
1
    fn intent_with_nested_indirect_head() {
820
1
        let mathml = "<mrow intent='$op($a,$b)'>
821
1
                <mi arg='a'>A</mi>
822
1
                <mover arg='op' intent='$ra($cong)'>
823
1
                    <mo movablelimits='false' arg='ra' intent='map'>⟶</mo>
824
1
                    <mo arg='cong' intent='congruence'>≅</mo>
825
1
                </mover>
826
1
                <mi arg='b'>B</mi>
827
1
            </mrow>";
828
1
        let intent = "<apply-function data-from-mathml='mrow'>
829
1
                                <map data-from-mathml='mrow'>
830
1
                                    <mi data-from-mathml='mo'>congruence</mi>
831
1
                                </map>
832
1
                                <mi data-from-mathml='mi' arg='a'>A</mi>
833
1
                                <mi data-from-mathml='mi' arg='b'>B</mi>
834
1
                            </apply-function>";
835
1
        assert!(test_intent(mathml, intent, "Error"));
836
1
    }
837
838
    #[test]
839
1
    fn intent_with_literals() {
840
1
        let mathml = "<mrow intent='vector(1, 0.0, 0.1, -23, -0.1234, last)'>
841
1
                <mi>x</mi>
842
1
            </mrow>";
843
1
        let intent = "<vector data-from-mathml='mrow' data-intent-property=':function:'>
844
1
                                <mn data-from-mathml='mn'>1</mn>
845
1
                                <mn data-from-mathml='mn'>0.0</mn>
846
1
                                <mn data-from-mathml='mn'>0.1</mn>
847
1
                                <mn data-from-mathml='mn'>-23</mn>
848
1
                                <mn data-from-mathml='mn'>-0.1234</mn>
849
1
                                <mi data-from-mathml='mi'>last</mi>
850
1
                            </vector>";
851
1
        assert!(test_intent(mathml, intent, "Error"));
852
1
    }
853
854
    #[test]
855
1
    fn intent_with_template_literals() {
856
1
        let mathml = "<mrow intent='1 0.0 0.1 -23 -0.1234 last'>
857
1
                <mi>x</mi>
858
1
            </mrow>";
859
1
        let intent = "<mrow><mn>1</mn><mn>0.</mn><mn>.1</mn><mn>-23</mn><mn>-.1234</mn><mi>last</mi></mrow>";
860
1
        assert!(!test_intent(mathml, intent, "Error"));
861
1
    }
862
863
    #[test]
864
1
    fn intent_with_nested_head() {
865
1
        let mathml = "<mrow intent='$ra($cong)($a,$b)'>
866
1
                <mi arg='a'>A</mi>
867
1
                <mover>
868
1
                    <mo movablelimits='false' arg='ra' intent='map'>⟶</mo>
869
1
                    <mo arg='cong' intent='congruence'>≅</mo>
870
1
                </mover>
871
1
                <mi arg='b'>B</mi>
872
1
            </mrow>";
873
1
        let intent = "<apply-function data-from-mathml='mrow'>
874
1
                                <map data-from-mathml='mrow'>
875
1
                                    <mi data-from-mathml='mo'>congruence</mi>
876
1
                                </map>
877
1
                                <mi data-from-mathml='mi' arg='a'>A</mi>
878
1
                                <mi data-from-mathml='mi' arg='b'>B</mi>
879
1
                            </apply-function>";
880
1
        assert!(test_intent(mathml, intent, "Error"));
881
1
    }
882
883
884
    #[test]
885
1
    fn intent_with_nested_head_and_hints() {
886
1
        let mathml = "<mrow intent='pre:prefix(in:infix($a, x))(post:postfix($b))'>
887
1
                <mi arg='a'>A</mi>
888
1
                <mover>
889
1
                    <mo intent='map'>⟶</mo>
890
1
                    <mo intent='congruence'>≅</mo>
891
1
                </mover>
892
1
                <mi arg='b'>B</mi>
893
1
            </mrow>";
894
1
        let intent = "<apply-function data-from-mathml='mrow'>
895
1
                <pre data-intent-property=':prefix:' data-from-mathml='mrow'>
896
1
                    <in data-intent-property=':infix:' data-from-mathml='mrow'>
897
1
                        <mi data-from-mathml='mi' arg='a'>A</mi>
898
1
                        <mi data-from-mathml='mi'>x</mi>
899
1
                    </in>
900
1
                </pre>
901
1
                <post data-intent-property=':postfix:' data-from-mathml='mrow'>
902
1
                    <mi data-from-mathml='mi' arg='b'>B</mi>
903
1
                </post>
904
1
            </apply-function>";
905
1
        assert!(test_intent(mathml, intent, "Error"));
906
1
    }
907
908
909
    #[test]
910
1
    fn intent_double_indirect_head() {
911
1
        let mathml = "<mrow intent='$m:prefix($c)($a,$b)'>
912
1
                <mi arg='a'>A</mi>
913
1
                <mover>
914
1
                    <mo movablelimits='false' arg='m' intent='map'>⟶</mo>
915
1
                    <mo arg='c' intent='congruence'>≅</mo>
916
1
                </mover>
917
1
                <mi arg='b'>B</mi>
918
1
            </mrow>";
919
1
        let intent = "<apply-function data-from-mathml='mrow'>
920
1
                                <map data-intent-property=':prefix:' data-from-mathml='mrow'>
921
1
                                    <mi data-from-mathml='mo'>congruence</mi>
922
1
                                </map>
923
1
                                <mi data-from-mathml='mi' arg='a'>A</mi>
924
1
                                <mi data-from-mathml='mi' arg='b'>B</mi>
925
1
                            </apply-function>";
926
1
        assert!(test_intent(mathml, intent, "Error"));
927
1
    }
928
929
    #[test]
930
1
    fn intent_missing_open() {
931
1
        let mathml = "<mrow intent='$p $a,$f($b))'>
932
1
                <mi arg='a'>a</mi>
933
1
                <mo arg='p' intent='plus'>+</mo>
934
1
                <mi arg='b'>b</mi>
935
1
                <mo arg='f' intent='factorial'>!</mo>
936
1
            </mrow>";
937
1
        let intent = "<plus> <mi arg='a'>a</mi> <factorial><mi arg='b'>b</mi></factorial> </plus>";
938
1
        assert!(!test_intent(mathml, intent, "Error"));
939
1
    }
940
941
    #[test]
942
1
    fn intent_no_comma() {
943
1
        let mathml = "<mrow intent='$p($a $f($b))'>
944
1
                <mi arg='a'>a</mi>
945
1
                <mo arg='p' intent='plus'>+</mo>
946
1
                <mi arg='b'>b</mi>
947
1
                <mo arg='f' intent='factorial'>!</mo>
948
1
            </mrow>";
949
1
        let intent = "<plus>
950
1
                <mrow>
951
1
                    <mi arg='a'>a</mi>
952
1
                    <factorial> <mi arg='b'>b</mi> </factorial>
953
1
                </mrow>
954
1
            </plus>";
955
1
        assert!(!test_intent(mathml, intent, "Error"));
956
1
    }
957
958
    #[test]
959
1
    fn intent_no_arg() {
960
1
        let mathml = "<mrow intent='factorial()'>
961
1
                <mi arg='a'>a</mi>
962
1
                <mo arg='p' intent='plus'>+</mo>
963
1
                <mi arg='b'>b</mi>
964
1
                <mo arg='f' intent='factorial'>!</mo>
965
1
            </mrow>";
966
1
        let target = "<factorial></factorial>";
967
1
        assert!(!test_intent(mathml, target, "Error"));
968
1
    }
969
970
    #[test]
971
1
    fn intent_illegal_no_arg() {
972
1
        let mathml = "<mrow intent='factorial(()))'>
973
1
                <mi arg='a'>a</mi>
974
1
                <mo arg='p' intent='plus'>+</mo>
975
1
                <mi arg='b'>b</mi>
976
1
                <mo arg='f' intent='factorial'>!</mo>
977
1
            </mrow>";
978
1
        let target = "<factorial></factorial>";
979
1
        assert!(!test_intent(mathml, target, "Error"));
980
1
    }
981
982
    #[test]
983
1
    fn intent_illegal_no_arg_ignore() {
984
1
        let mathml = "<mrow intent='factorial()'>
985
1
                <mi arg='a'>a</mi>
986
1
                <mo arg='p' intent='plus'>+</mo>
987
1
                <mi arg='b'>b</mi>
988
1
                <mo arg='f' intent='factorial'>!</mo>
989
1
            </mrow>";
990
1
        let target = "<mrow data-from-mathml='mrow' intent='factorial()'>
991
1
                                <mi data-from-mathml='mi' arg='a'>a</mi>
992
1
                                <mi data-from-mathml='mo'>plus</mi>
993
1
                                <mi data-from-mathml='mi' arg='b'>b</mi>
994
1
                                <mi data-from-mathml='mo'>factorial</mi>
995
1
                            </mrow>";
996
1
        assert!(test_intent(mathml, target, "IgnoreIntent"));
997
1
    }
998
999
    #[test]
1000
1
    fn intent_illegal_self_ref() {
1001
1
        let mathml = "<mrow intent='foo:is-foolish:function($b)'>
1002
1
                <mi intent='$b:int' arg='b'>b</mi>
1003
1
            </mrow>";
1004
1
        let target = "<foo data-intent-property=':function:' data-intent-type='is-foolish'><mi data-intent-type='int'>b</mi></foo>";
1005
1
        assert!(!test_intent(mathml, target, "Error"));
1006
1
    }
1007
1008
    #[test]
1009
1
    fn infer_missing_second_arg() {
1010
1
        let mathml = "<mrow intent='binomial($n,)'>
1011
1
                <mo>(</mo>
1012
1
                <mfrac linethickness='0'> <mn arg='n'>7</mn> <mn arg='m'>3</mn> </mfrac>
1013
1
                <mo>)</mo>
1014
1
            </mrow>";
1015
1
        let target = "<binomial data-intent-property='binomial($n,)'> \n
1016
1
                             <mn data-from-mathml='mn' arg='n'>7</mn> <mn data-from-mathml='mn' arg='m'>3</mn>  </binomial>";
1017
1
        assert!(!test_intent(mathml, target, "Error"));
1018
1
    }
1019
1020
    #[test]
1021
1
    fn infer_missing_second_arg_ignore() {
1022
1
        let mathml = "<mrow intent='binomial($n,)'>
1023
1
                <mo>(</mo>
1024
1
                <mfrac linethickness='0'> <mn arg='n'>7</mn> <mn arg='m'>3</mn> </mfrac>
1025
1
                <mo>)</mo>
1026
1
            </mrow>";
1027
1
        let target = "<mrow data-from-mathml='mrow' intent='binomial($n,)'>
1028
1
                <mo data-from-mathml='mo'>(</mo>
1029
1
                <fraction data-from-mathml='mfrac' linethickness='0'> <mn data-from-mathml='mn' arg='n'>7</mn> <mn data-from-mathml='mn' arg='m'>3</mn> </fraction>
1030
1
                <mo data-from-mathml='mo'>)</mo>
1031
1
            </mrow>";
1032
1
        assert!(test_intent(mathml, target, "IgnoreIntent"));
1033
1
    }   
1034
1035
    #[test]
1036
1
    fn plane1_char_in_concept_name() {
1037
1
        let mathml = "<math><mrow><mo intent='🐇'>&#x1F407;</mo><mi>X</mi></mrow></math>";
1038
1
        let intent = "<math data-from-mathml='math'>
1039
1
                                <mrow data-from-mathml='mrow'>
1040
1
                                    <mi data-from-mathml='mo'>🐇</mi>
1041
1
                                    <mi data-from-mathml='mi'>X</mi>
1042
1
                                </mrow>
1043
1
                            </math>";
1044
1
        assert!(test_intent(mathml, intent, "Error"));
1045
1
    }   
1046
}
\ No newline at end of file +

Coverage Report

Created: 2026-05-04 09:37

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/home/runner/work/MathCAT/MathCAT/src/infer_intent.rs
Line
Count
Source
1
//! Use heuristics to infer the intent.
2
//! For example, an `mfrac` with `linethickness=0` would be a binomial
3
//! The inference is added to the MathML
4
//!
5
//! The implementation of the module is on hold until the MathML committee figures out how it wants to do this.
6
#![allow(clippy::needless_return)]
7
8
use sxd_document::dom::{Element, Document, ChildOfElement};
9
use crate::prefs::PreferenceManager;
10
use crate::speech::SpeechRulesWithContext;
11
use crate::canonicalize::{as_element, as_text, name, create_mathml_element, set_mathml_name, INTENT_ATTR, MATHML_FROM_NAME_ATTR};
12
use crate::errors::*;
13
use std::fmt;
14
use std::sync::LazyLock;
15
use crate::pretty_print::mml_to_string;
16
use crate::xpath_functions::is_leaf;
17
use regex::Regex;
18
use phf::phf_set;
19
use log::{debug, error, warn};
20
21
const IMPLICIT_FUNCTION_NAME: &str = "apply-function";
22
23
2.47k
pub fn infer_intent<'r, 'c, 's:'c, 'm:'c>(rules_with_context: &'r mut SpeechRulesWithContext<'c,'s,'m>, mathml: Element<'c>) -> Result<Element<'m>> {
24
2.47k
    match catch_errors_building_intent(rules_with_context, mathml) {
25
2.45k
        Ok(intent) => return Ok(intent),
26
19
        Err(e) => {
27
            // lookup what we should do for error recovery
28
19
            let intent_preference = rules_with_context.get_rules().pref_manager.borrow().pref_to_string("IntentErrorRecovery");
29
19
            if intent_preference == "Error" {
30
9
                return Err(e);
31
            } else {
32
10
                let saved_intent_attr = mathml.attribute_value(INTENT_ATTR).unwrap();
33
10
                mathml.remove_attribute(INTENT_ATTR);
34
                // can't call intent_from_mathml() because we have already borrowed_mut -- we call a more internal version
35
10
                let intent_tree =  match rules_with_context.match_pattern::<Element<'m>>(mathml)
36
10
                                            .context("Pattern match/replacement failure!") {
37
0
                    Err(e) => Err(e),
38
10
                    Ok(intent) => {
39
10
                        intent.set_attribute_value(INTENT_ATTR, saved_intent_attr); //  so attr can be potentially be viewed later
40
10
                        Ok(intent)
41
                    },
42
                };
43
10
                mathml.set_attribute_value(INTENT_ATTR, saved_intent_attr);
44
10
                return intent_tree;
45
            }
46
        }
47
    }
48
49
2.47k
    fn catch_errors_building_intent<'r, 'c, 's:'c, 'm:'c>(rules_with_context: &'r mut SpeechRulesWithContext<'c,'s,'m>, mathml: Element<'c>) -> Result<Element<'m>> {
50
2.47k
        if let Some(intent_str) = mathml.attribute_value(INTENT_ATTR) {
51
            // debug!("Before intent: {}", crate::pretty_print::mml_to_string(mathml));
52
2.47k
            let mut lex_state = LexState::init(intent_str.trim())
?0
;
53
2.47k
            let mut intent_offset = 0;
54
2.47k
            let 
result2.46k
= build_intent(rules_with_context, &mut lex_state, mathml, &mut intent_offset)
55
2.47k
                        .with_context(|| 
format!14
("occurs before '{}' in intent attribute value '{}'", lex_state.remaining_str, intent_str))
?14
;
56
2.46k
            if lex_state.token != Token::None {
57
5
                bail!("Error in intent value: extra unparsed intent '{}' in intent attribute value '{}'", lex_state.remaining_str, intent_str);
58
2.45k
            }
59
2.45k
            assert!(lex_state.remaining_str.is_empty());
60
            // debug!("Resulting intent:\n{}", crate::pretty_print::mml_to_string(result));
61
2.45k
            return Ok(result);
62
0
        }
63
0
        bail!("Internal error: infer_intent() called on MathML with no intent arg:\n{}", mml_to_string(mathml));
64
2.47k
    }
65
2.47k
}
66
67
68
static FIXITIES: phf::Set<&str> = phf_set! {
69
    "function", "infix", "prefix", "postfix", "silent", "other",
70
};
71
72
/// Eliminate all but the last fixity property
73
7.63k
pub fn simplify_fixity_properties(properties: &str) -> String {
74
7.63k
    let parts: Vec<&str> = properties.split(':').collect();
75
    // debug!("simplify_fixity_properties {} parts from input: '{}'", parts.len(), properties);
76
7.63k
    let mut fixity_property = "";
77
7.63k
    let mut answer = ":".to_string();
78
19.2k
    for part in 
parts7.63k
{
79
19.2k
        if FIXITIES.contains(part) {
80
1.12k
            fixity_property = part;
81
18.1k
        } else if !part.is_empty() {
82
4.71k
            answer.push_str(part);
83
4.71k
            answer.push(':');
84
13.4k
        }
85
    }
86
7.63k
    if !fixity_property.is_empty() {
87
1.12k
        answer.push_str(fixity_property);
88
1.12k
        answer.push(':');
89
6.51k
    }
90
7.63k
    return answer;
91
7.63k
}
92
93
/// Given the intent add the fixity property for the intent if it isn't given (and one exists)
94
2.72k
fn add_fixity(intent: Element) {
95
2.72k
    let properties = intent.attribute_value(INTENT_PROPERTY).unwrap_or_default();
96
7.47k
    if 
properties.split(":")2.72k
.
all2.72k
(|property| !FIXITIES.contains(property)) {
97
2.63k
        let intent_name = name(intent);
98
2.63k
        crate::definitions::SPEECH_DEFINITIONS.with(|definitions| {
99
2.63k
            let definitions = definitions.borrow();
100
2.63k
            if let Some(
definition12
) = definitions.get_hashmap("IntentMappings").unwrap().get(intent_name) &&
101
12
                let Some((fixity, _)) = definition.split_once("=") {
102
12
                    let new_properties = (if properties.is_empty() {":"} else {
properties0
}).to_string() + fixity + ":";
103
12
                    intent.set_attribute_value(INTENT_PROPERTY, &new_properties);
104
                    // debug!("Added fixity: new value '{}'", intent.attribute_value(INTENT_PROPERTY).unwrap());
105
2.62k
                };
106
2.63k
        });
107
90
    }
108
2.72k
}
109
110
111
/// Given some MathML, expand out any intents taking into account their fixity property
112
/// This is recursive
113
363
pub fn add_fixity_children(intent: Element) -> Element {
114
363
    let children = intent.children();
115
363
    if children.is_empty() || (children.len() == 1 && children[0].element().is_none()) {
116
0
        return intent;
117
363
    }
118
119
363
    for child in children {
120
363
        let child = as_element(child);
121
363
        if child.attribute_value(INTENT_ATTR).is_some() {
122
0
            add_fixity_child(child);
123
363
        }
124
    }
125
363
    return intent;
126
127
0
    fn add_fixity_child(mathml: Element) -> Element {        
128
0
        let mut children = mathml.children();
129
0
        if children.is_empty() {
130
0
            return mathml;
131
0
        }
132
        // we also exclude fixity on mtable because they mess up the counts (see 'en::mtable::unknown_mtable_property')
133
0
        if mathml.attribute_value(MATHML_FROM_NAME_ATTR).unwrap_or_default() == "mtable" {
134
0
            return mathml;
135
0
        }
136
0
        let doc = mathml.document();
137
0
        let properties = mathml.attribute_value(INTENT_PROPERTY).unwrap_or_default();
138
0
        let fixity = properties.rsplit(':').find(|&property| FIXITIES.contains(property)).unwrap_or_default();
139
0
        let intent_name = name(mathml);
140
    
141
0
        let op_name_id = mathml.attribute_value("id").unwrap_or("new-id");
142
0
        match fixity {
143
0
            "infix" => {
144
0
                let mut new_children = Vec::with_capacity(2*children.len()-1);
145
0
                new_children.push(children[0]);
146
0
                for (i, &child) in children.iter().enumerate().skip(1) {
147
0
                    new_children.push(create_operator_element(intent_name, fixity, op_name_id, i, &doc));
148
0
                    new_children.push(child);
149
0
                }
150
0
                mathml.replace_children(new_children);
151
            },
152
0
            "prefix" => { 
153
0
                children.insert(0, create_operator_element(intent_name, fixity, op_name_id, 1, &doc));                       
154
0
                mathml.replace_children(children);
155
0
            },
156
0
            "postfix" => { 
157
0
                children.push( create_operator_element(intent_name, fixity, op_name_id, 1, &doc));                       
158
0
                mathml.replace_children(children);
159
0
            },
160
0
            "silent" => {
161
0
                // children remain the same -- nothing to do
162
0
            },
163
0
            "other" => {
164
0
                // a special case -- will be handled with specific rules (e.g., intervals need to add "from" and "to", not a single word)
165
0
            },
166
            _ => {  // "function" is the default
167
                // build a function like notation function-name U+2061 <mrow> children </mrow>
168
0
                let mut new_children = Vec::with_capacity(3);
169
0
                let function_name = create_operator_element(intent_name, "function", op_name_id, 1, &doc);
170
0
                new_children.push(function_name);
171
0
                let invisible_apply_function = create_operator_element("mo", "infix", op_name_id, 2, &doc);
172
0
                invisible_apply_function.element().unwrap().set_text("\u{2061}");
173
0
                new_children.push(invisible_apply_function);
174
0
                let mrow_wrapper = create_mathml_element(&doc, "mrow");
175
0
                mrow_wrapper.set_attribute_value("id", (op_name_id.to_string() + "3").as_str());
176
0
                mrow_wrapper.append_children(children);
177
0
                new_children.push(ChildOfElement::Element(mrow_wrapper));
178
0
                mathml.replace_children(new_children);
179
0
                if fixity.is_empty() {
180
0
                    mathml.set_attribute_value(INTENT_PROPERTY, ":function:");
181
0
                }
182
            },
183
        }
184
0
        return mathml;
185
    
186
0
        fn create_operator_element<'a>(intent_name: &str, fixity: &str, id: &str, id_inc: usize, doc: &Document<'a>) -> ChildOfElement<'a> {
187
0
            let intent_name = intent_speech_for_name(intent_name, &PreferenceManager::get().borrow().pref_to_string("NavMode"), fixity);
188
0
            let element = create_mathml_element(doc, &intent_name);
189
0
            element.set_attribute_value("id", &format!("{id}-fixity-{id_inc}"));
190
0
            element.set_attribute_value(MATHML_FROM_NAME_ATTR, "mo");
191
0
            return ChildOfElement::Element(element);
192
0
        }
193
0
    }
194
363
}
195
196
340
pub fn intent_speech_for_name(intent_name: &str, verbosity: &str, fixity: &str) -> String {
197
340
    crate::definitions::SPEECH_DEFINITIONS.with(|definitions| {
198
340
        let definitions = definitions.borrow();
199
340
        if let Some(
intent_name_pattern294
) = definitions.get_hashmap("IntentMappings").unwrap().get(intent_name) {
200
            // Split the pattern is:
201
            //   fixity-def [|| fixity-def]*
202
            //   fixity-def := fixity=[open;] verbosity[; close]
203
            //   verbosity := terse | medium | verbose
204
396
            if let Some(
matched_intent294
) =
intent_name_pattern.split("||")294
.
find294
(|&entry| entry.trim().starts_with(fixity)) {
205
294
                let (_, matched_intent) = matched_intent.split_once("=").unwrap_or_default();
206
294
                let parts = matched_intent.trim().split(";").collect::<Vec<&str>>();
207
294
                let mut operator_names = (if parts.len() > 1 {
parts[1]129
} else {
parts[0]165
}).split(":").collect::<Vec<&str>>();
208
294
                match operator_names.len() {
209
236
                    1 => return operator_names[0].trim().to_string(),
210
                    2 | 3 => {
211
58
                        if operator_names.len() == 2 {
212
0
                            warn!("Intent '{intent_name}' has only two operator names, but should have three");
213
0
                            operator_names.push(operator_names[1]);
214
58
                        }
215
58
                        let intent_word = match verbosity {
216
58
                            "Terse" => 
operator_names[0]2
,
217
56
                            "Medium" => 
operator_names[1]54
,
218
2
                            _ => operator_names[2],
219
                        };
220
58
                        return intent_word.trim().to_string();
221
                    },
222
                    _ => {
223
0
                        error!("Intent '{}' has too many ({}) operator names, should only have 2", intent_name, operator_names.len());
224
0
                        return intent_name.to_string();
225
                    },
226
                }
227
0
            }
228
46
        };
229
46
        return intent_name.replace(['_', '-'], " ").trim().to_string();
230
340
    })
231
340
}
232
233
234
235
// intent             := self-property-list | expression
236
// self-property-list := property+ S    
237
// expression         := S ( term property* | application ) S 
238
// term               := concept-or-literal | number | reference 
239
// concept-or-literal := NCName
240
// number             := '-'? \d+ ( '.' \d+ )?
241
// reference          := '$' NCName
242
// application        := expression '(' arguments? S ')'
243
// arguments          := expression ( ',' expression )*
244
// property           := S ':' NCName
245
// S                  := [ \t\n\r]*
246
247
// The practical restrictions of NCName are that it cannot contain several symbol characters like
248
//  !, ", #, $, %, &, ', (, ), *, +, ,, /, :, ;, <, =, >, ?, @, [, \, ], ^, `, {, |, }, ~, and whitespace characters
249
//  Furthermore an NCName cannot begin with a number, dot or minus character although they can appear later in an NCName.
250
// NC_NAME defined in www.w3.org/TR/REC-xml/#sec-common-syn, but is complicated
251
//   We follow NC_NAME for the basic latin block, but then allow everything
252
2
static CONCEPT_OR_LITERAL: LazyLock<Regex> = LazyLock::new(|| {
253
2
    Regex::new(r#"^[^\s\u{0}-\u{40}\[\\\]^`\u{7B}-\u{BF}][^\s\u{0}-\u{2C}/:;<=>?@\[\\\]^`\u{7B}-\u{BF}]*"#     // NC_NAME but simpler
254
2
    ).unwrap()
255
2
});
256
2
static PROPERTY: LazyLock<Regex> = LazyLock::new(|| {
257
2
    Regex::new(r#"^:[^\s\u{0}-\u{40}\[\\\]^`\u{7B}-\u{BF}][^\s\u{0}-\u{2C}/:;<=>?@\[\\\]^`\u{7B}-\u{BF}]*"#    // : NC_NAME
258
2
    ).unwrap()
259
2
});
260
2
static ARG_REF: LazyLock<Regex> = LazyLock::new(|| {
261
2
    Regex::new(r#"^\$[^\s\u{0}-\u{40}\[\\\]^`\u{7B}-\u{BF}][^\s\u{0}-\u{2C}/:;<=>?@\[\\\]^`\u{7B}-\u{BF}]*"#   // $ NC_NAME
262
2
    ).unwrap()
263
2
});
264
2
static NUMBER: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"^-?[0-9]+(\.[0-9]+)?"#).unwrap());
265
266
static TERMINALS_AS_U8: [u8; 3] = [b'(', b',', b')'];
267
// static TERMINALS: [char; 3] = ['(', ',',')'];
268
269
// 'i -- "i" for the lifetime of the INTENT_ATTR string
270
#[derive(Debug, PartialEq, Eq, Clone)]
271
enum Token<'i> {
272
    Terminal(&'i str),  // "(", ",", ")"
273
    Property(&'i str),
274
    ArgRef(&'i str),
275
    ConceptOrLiteral(&'i str),
276
    Number(&'i str),
277
    None,               // out of characters
278
}
279
280
impl fmt::Display for Token<'_> {
281
3
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
282
3
        return write!(f, "{}",
283
3
            match self {
284
3
                Token::Terminal(str) => format!("Terminal('{str}')"),
285
0
                Token::Property(str) => format!("Property({str})"),
286
0
                Token::ArgRef(str) => format!("ArgRef({str})"),
287
0
                Token::ConceptOrLiteral(str) => format!("Literal({str})"),
288
0
                Token::Number(str) => format!("Number({str})"),
289
0
                Token::None => "None".to_string(),
290
            }
291
        );
292
3
    }
293
}
294
295
impl Token<'_> {
296
3.64k
    fn is_terminal(&self, terminal: &str) -> bool {
297
3.64k
        if let Token::Terminal(
value1.02k
) = *self {
298
1.02k
            return value == terminal;
299
        } else {
300
2.61k
            return false;
301
        }
302
3.64k
    }
303
304
5.21k
    fn as_str(&self) -> &str {
305
5.21k
        return match self {
306
0
            Token::Terminal(str) => str,
307
4.79k
            Token::Property(str) => str,
308
226
            Token::ArgRef(str) => str,
309
161
            Token::ConceptOrLiteral(str) => str,
310
29
            Token::Number(str) => str,
311
0
            Token::None => "",
312
        }
313
5.21k
    }
314
}
315
316
struct LexState<'i> {
317
    token: Token<'i>,
318
    remaining_str: &'i str,     // always trimmed
319
}
320
321
impl fmt::Display for LexState<'_> {
322
0
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
323
0
        return writeln!(f, "token: {}, remaining: '{}'", self.token, self.remaining_str);
324
0
    }
325
}
326
327
impl<'i> LexState<'i> {
328
2.50k
    fn init(str: &'i str) -> Result<LexState<'i>> {
329
2.50k
        let mut lex_state = LexState {  token: Token::None, remaining_str: str.trim() };
330
2.50k
        lex_state.get_next()
?0
;
331
2.50k
        return Ok(lex_state);
332
2.50k
    }
333
334
    // helper function for LexState -- do not call outside of the impl
335
2.82k
    fn set_token(&mut self, str: &'i str) -> Result<()> {
336
        // Note: 'str' is already trimmed
337
2.82k
        if str.is_empty() {
338
0
            self.token = Token::None;
339
2.82k
        } else if TERMINALS_AS_U8.contains(&str.as_bytes()[0]) {
340
0
            self.token = Token::Terminal(str);
341
2.82k
        } else if let Some(
matched_property2.40k
) = PROPERTY.find(str) {
342
2.40k
            self.token = Token::Property(matched_property.as_str());
343
2.40k
        } else if let Some(
matched_arg_ref226
) =
ARG_REF416
.find(str) {
344
226
            self.token = Token::ArgRef(matched_arg_ref.as_str());
345
226
        } else if  let Some(
matched_literal161
) =
CONCEPT_OR_LITERAL190
.find(str) {
346
161
            self.token = Token::ConceptOrLiteral(matched_literal.as_str());
347
161
        } else if  let Some(
matched_number29
) =
NUMBER29
.find(str) {
348
29
            self.token = Token::Number(matched_number.as_str());
349
29
        } else {
350
0
            bail!("Illegal 'intent' syntax: {}", str);
351
        }
352
2.82k
        return Ok( () );
353
2.82k
    }
354
355
5.69k
    fn get_next(&mut self) -> Result<&Token<'_>> {
356
5.69k
        if self.remaining_str.is_empty() {
357
2.48k
            self.token = Token::None;
358
3.21k
        } else if TERMINALS_AS_U8.contains(&self.remaining_str.as_bytes()[0]) {
359
391
            self.token = Token::Terminal(&self.remaining_str[..1]);
360
391
            self.remaining_str = self.remaining_str[1..].trim_start();
361
391
        } else {
362
2.82k
            self.set_token(self.remaining_str)
?0
;
363
2.82k
            self.remaining_str = self.remaining_str[self.token.as_str().len()..].trim_start();
364
}    
365
5.69k
        return Ok(&self.token);
366
5.69k
    }
367
368
3.64k
    fn is_terminal(&self, terminal: &str) -> bool {
369
3.64k
        return self.token.is_terminal(terminal);
370
3.64k
    }
371
}
372
373
2.74k
fn build_intent<'b, 'r, 'c, 's:'c, 'm:'c>(rules_with_context: &'r mut SpeechRulesWithContext<'c,'s,'m>,
374
2.74k
                                         lex_state: &mut LexState<'b>,
375
2.74k
                                         mathml: Element<'c>,
376
2.74k
                                         intent_offset: &mut u32) -> Result<Element<'m>> {
377
    // intent             := self-property-list | expression
378
    // self-property-list := property+ S    
379
    // expression         := S ( term property* | application ) S 
380
    // term               := concept-or-literal | number | reference 
381
    // concept-or-literal := NCName
382
    // number             := '-'? \d+ ( '.' \d+ )?
383
    // reference          := '$' NCName
384
    // application        := expression '(' arguments? S ')'
385
    //
386
    // When we flatten intent we have this implementation looking for Tokens or '(' [for application]
387
    // Essentially, the grammar we deal with here is:
388
    // intent := property+ | (concept-or-literal | number | reference) property* '('?
389
    // debug!("  start build_intent: state: {}", lex_state);
390
2.74k
    let doc = rules_with_context.get_document();
391
    let mut intent;
392
2.74k
    debug!("    build_intent: start mathml name={}, intent_offset={}", 
name0
(
mathml0
), intent_offset);
393
2.74k
    match lex_state.token {
394
        Token::Property(_) => {
395
            // We only have a property -- we want to keep this tag/element
396
            // There are two paths:
397
            // 1. If there is a function call, then the children are dealt with there
398
            // 2. If there is *no* function call, then the children are kept, which means we return to pattern matching
399
            //    Note: to avoid infinite loop, we need to remove the 'intent' so we don't end up back here; we put it back later
400
2.33k
            let properties = get_properties(lex_state)
?0
; // advance state to see if funcall
401
2.33k
            if lex_state.is_terminal("(") {
402
2
                intent = create_mathml_element(&doc, name(mathml));
403
2
                intent.set_attribute_value(INTENT_PROPERTY, &properties);
404
2
                intent.set_attribute_value(MATHML_FROM_NAME_ATTR, name(mathml));
405
2
                intent.set_attribute_value("id", mathml.attribute_value("id")
406
2
                      .ok_or_else(|| 
anyhow!0
("no id on intent function name"))
?0
);
407
            } else {
408
2.32k
                let saved_intent = mathml.attribute_value(INTENT_ATTR).unwrap();
409
2.32k
                mathml.remove_attribute(INTENT_ATTR);
410
2.32k
                mathml.set_attribute_value(INTENT_PROPERTY, &properties);   // needs to be set before the pattern match
411
2.32k
                intent = rules_with_context.match_pattern::<Element<'m>>(mathml)
?0
;
412
                // debug!("Intent after pattern match:\n{}", mml_to_string(intent));
413
2.32k
                mathml.set_attribute_value(INTENT_ATTR, saved_intent);
414
            }
415
2.33k
            add_fixity(intent);
416
2.33k
            return Ok(intent);      // if we start with properties, then there can only be properties
417
        },
418
161
        Token::ConceptOrLiteral(word) | Token::Number(
word28
) => {
419
189
            let leaf_name = if let Token::Number(_) = lex_state.token {
"mn"28
} else {
"mi"161
};
420
189
            intent = create_mathml_element(&doc, leaf_name);
421
            // if the str is part of a larger intent and not the head (e.g., "a" in "f($x, a)", but not the "f" in it), then it is "made up"
422
            // debug!("    Token::ConceptOrLiteral, word={}, leaf_name={}", word, leaf_name);
423
189
            intent.set_attribute_value(MATHML_FROM_NAME_ATTR, 
424
189
                if word == mathml.attribute_value(INTENT_ATTR).unwrap_or_default() {
name(mathml)30
} else {
leaf_name159
});
425
189
            intent.set_text(word);       // '-' and '_' get removed by the rules.
426
189
            if let Some(
id136
) = mathml.attribute_value("id") {
427
136
                intent.set_attribute_value("id", &format!("{}-literal-{}", id, intent_offset));
428
136
                *intent_offset += 1;
429
136
            
}53
430
189
            lex_state.get_next()
?0
;
431
189
            if let Token::Property(_) = lex_state.token {
432
60
                let properties = get_properties(lex_state)
?0
;
433
60
                intent.set_attribute_value(INTENT_PROPERTY, &properties);
434
129
            }
435
        },
436
223
        Token::ArgRef(word) => {
437
223
            intent = match find_arg(rules_with_context, &word[1..], mathml, intent_offset, true, false)
?1
{
438
221
                Some(e) => {
439
221
                    lex_state.get_next()
?0
;
440
221
                    e
441
                },
442
1
                None => bail!("intent arg '{}' not found", word),
443
            };
444
221
            if let Token::Property(_) = lex_state.token {
445
3
                let properties = get_properties(lex_state)
?0
;
446
3
                intent.set_attribute_value(INTENT_PROPERTY, &properties);
447
218
            }
448
        },
449
3
        _ => bail!("Illegal 'intent' syntax: found {}", lex_state.token),
450
    };
451
410
    if lex_state.is_terminal("(") {
452
136
        intent = build_function(intent, rules_with_context, lex_state, mathml, intent_offset)
?15
;
453
274
    }
454
    // debug!("    end build_intent: state: {}     piece: {}", lex_state, mml_to_string(intent));
455
395
    add_fixity(intent);
456
395
    return Ok(intent);
457
2.74k
}
458
459
pub const INTENT_PROPERTY: &str = "data-intent-property";
460
461
/// Get all the properties, stopping we don't have any more
462
/// Returns the string of the properties terminated with an additional ":"
463
2.39k
fn get_properties(lex_state: &mut LexState) -> Result<String> {
464
    // return the 'hint' leaving the state
465
2.39k
    assert!(matches!(lex_state.token, Token::Property(str) if str.starts_with(':')));
466
2.39k
    let mut properties = String::with_capacity(60);
467
2.39k
    properties.push_str(lex_state.token.as_str());
468
    loop {
469
2.40k
        let token = lex_state.get_next()
?0
;
470
2.40k
        if let Token::Property(
property11
) = token {
471
11
            properties.push_str(property);
472
11
        } else {
473
2.39k
            properties.push(':');
474
            // debug!("      get_properties: returns {}", properties);
475
2.39k
            return Ok(simplify_fixity_properties(&properties));
476
        }
477
    }
478
2.39k
}
479
480
/// Build a function 'f(...)' where '...' can be empty
481
///
482
/// Also handles nested functions like f(...)(...)
483
/// 
484
/// Start state: at '('
485
/// 
486
/// End state: after ')'
487
136
fn build_function<'b, 'r, 'c, 's:'c, 'm:'c>(
488
136
            function_name: Element<'m>,
489
136
            rules_with_context: &'r mut SpeechRulesWithContext<'c,'s,'m>,
490
136
            lex_state: &mut LexState<'b>,
491
136
            mathml: Element<'c>,
492
136
            intent_offset: &mut u32) -> Result<Element<'m>> {
493
    // debug!("  start build_function: name: {}, state: {}", name(function_name), lex_state);
494
    // application := intent '(' arguments? S ')'  where 'function_name' is 'intent'
495
136
    assert!(lex_state.is_terminal("("));
496
136
    let mut function = function_name;
497
136
    function.set_attribute_value(MATHML_FROM_NAME_ATTR, name(mathml));
498
260
    while lex_state.is_terminal("(") {
499
139
        lex_state.get_next()
?0
;
500
139
        if lex_state.is_terminal(")") {
501
            // grammar requires at least one argument
502
9
            bail!("Illegal 'intent' syntax: missing argument for intent name '{}'", name(function_name));
503
130
        }
504
130
        let 
children125
= build_arguments(rules_with_context, lex_state, mathml, intent_offset)
?5
;
505
125
        function = lift_function_name(rules_with_context.get_document(), function, children);
506
507
125
        if !lex_state.is_terminal(")") {
508
1
            bail!("Illegal 'intent' syntax: missing ')' for intent name '{}'", name(function_name));
509
124
        }
510
124
        lex_state.get_next()
?0
;
511
    }
512
513
    // debug!("  end build_function/# children: {}, #state: {}  ..[bfa] function name: {}",
514
        // function.children().len(), lex_state, mml_to_string(function));
515
121
    return Ok(function);
516
136
}
517
518
// process all the args of a function
519
// Start state: after '('
520
// End state: on ')'
521
130
fn build_arguments<'b, 'r, 'c, 's:'c, 'm:'c>(
522
130
            rules_with_context: &'r mut SpeechRulesWithContext<'c,'s,'m>,
523
130
            lex_state: &mut LexState<'b>,
524
130
            mathml: Element<'c>,
525
130
            intent_offset: &mut u32) -> Result<Vec<Element<'m>>> {
526
    // arguments := intent ( ',' intent )*' 
527
    // debug!("    start build_args state: {}", lex_state);
528
529
    // there is at least one arg
530
130
    let mut children = Vec::with_capacity(lex_state.remaining_str.len()/3 + 1);   // conservative estimate ('3' - "$x,");
531
130
    children.
push127
( build_intent(rules_with_context, lex_state, mathml, intent_offset)
?3
); // arg before ','
532
    // debug!("  build_args: # children {};  state: {}", children.len(), lex_state);
533
534
239
    while lex_state.is_terminal(",") {
535
114
        lex_state.get_next()
?0
;
536
114
        children.
push112
( build_intent(rules_with_context, lex_state, mathml, intent_offset)
?2
); // arg before ','
537
        // debug!("    build_args, # children {};  state: {}", children.len(), lex_state);
538
    }
539
540
    // debug!("    end build_args, # children {};  state: {}", children.len(), lex_state);
541
125
    return Ok(children);
542
130
}
543
544
/// lift the children up to LITERAL_NAME
545
125
fn lift_function_name<'m>(doc: Document<'m>, function_name: Element<'m>, children: Vec<Element<'m>>) -> Element<'m> {
546
    // debug!("    lift_function_name: {}", name(function_name));
547
    // debug!("    lift_function_name: {}", mml_to_string(function_name));
548
125
    if name(function_name) == "mi" || 
name(function_name) == "mn"4
{ // FIX -- really want to test for all leaves, but not "data-from-mathml"
549
        // simple/normal case of f(x,y)
550
        // don't want to say that this is a leaf -- doing so messes up because it potentially has children
551
121
        set_mathml_name(function_name, as_text(function_name));
552
121
        function_name.set_text("");
553
121
        function_name.replace_children(children);
554
129
        if 
name(function_name)121
.
find121
(|ch| ch!='_' &&
ch!='-'108
).
is_none121
() {
555
14
            let properties = function_name.attribute_value(INTENT_PROPERTY).unwrap_or(":").to_owned();
556
14
            function_name.set_attribute_value(INTENT_PROPERTY, &(properties + "silent:"));
557
107
        }
558
121
        return function_name;
559
4
    } else if function_name.children().is_empty() {
560
        // "...  :property(...)" -- no function name
561
0
        function_name.replace_children(children);
562
0
        return function_name;
563
    } else {
564
        // more complicated case of nested name: f(x)(y,z)
565
        // create an apply_function(f(x), y, z)
566
4
        let result = create_mathml_element(&doc, IMPLICIT_FUNCTION_NAME);
567
4
        result.set_attribute_value(MATHML_FROM_NAME_ATTR, "mrow");
568
4
        result.append_child(function_name);
569
4
        result.append_children(children);
570
4
        return result;
571
    }
572
125
}
573
574
575
/// look for @arg=name in mathml
576
/// if 'check_intent', then look at an @intent for this element (typically false for non-recursive calls)
577
946
fn find_arg<'r, 'c, 's:'c, 'm:'c>(
578
946
    rules_with_context: &'r mut SpeechRulesWithContext<'c,'s,'m>,
579
946
    name: &str,
580
946
    mathml: Element<'c>,
581
946
    intent_offset: &mut u32,
582
946
    skip_self: bool,
583
946
    no_check_inside: bool) -> Result<Option<Element<'m>>> {
584
    // debug!("Looking for '{}' in\n{}", name, mml_to_string(mathml));
585
946
    if !skip_self &&
586
723
        let Some(
arg_val411
) = mathml.attribute_value("arg") {
587
            // debug!("looking for '{}', found arg='{}'", name, arg_val);
588
411
            if name == arg_val {
589
                // check to see if this mathml has an intent value -- if so the value is the value of its intent value
590
222
                if let Some(
intent_str28
) = mathml.attribute_value(INTENT_ATTR) {
591
28
                    let mut lex_state = LexState::init(intent_str.trim())
?0
;
592
28
                    return Ok( Some( build_intent(rules_with_context, &mut lex_state, mathml, intent_offset)
?1
) );
593
                } else {
594
194
                    return Ok( Some( rules_with_context.match_pattern::<Element<'m>>(mathml)
?0
) );
595
                }
596
189
            } else if no_check_inside {
597
189
                return Ok(None);       // don't look inside 'arg'
598
0
            }
599
535
        }
600
601
535
    if no_check_inside && 
mathml.attribute_value(INTENT_ATTR)312
.
is_some312
() {
602
2
        return Ok(None);           // don't look inside 'intent'
603
533
    }
604
605
533
    if is_leaf(mathml){
606
121
        return Ok(None);
607
412
    }
608
609
723
    for child in 
mathml412
.
children412
() {
610
723
        let child = as_element(child);
611
723
        if let Some(
element396
) = find_arg(rules_with_context, name, child, intent_offset, false, true)
?1
{
612
396
            return Ok( Some(element) );
613
326
        }
614
    }
615
616
15
    return Ok(None);               // not present
617
946
}
618
619
#[cfg(test)]
620
mod tests {
621
    #[allow(unused_imports)]
622
    use crate::init_logger;
623
    use log::debug;
624
    use sxd_document::parser;
625
626
627
27
    fn test_intent(mathml: &str, target: &str, intent_error_recovery: &str) -> bool {
628
    use crate::interface::*;
629
        use crate::pretty_print::mml_to_string;
630
    // this forces initialization
631
27
        crate::interface::set_rules_dir(super::super::abs_rules_dir_path()).unwrap();
632
        // crate::speech::SpeechRules::initialize_all_rules().unwrap();
633
27
        set_preference("IntentErrorRecovery", intent_error_recovery).unwrap();
634
27
        set_preference("SpeechStyle", "SimpleSpeak").unwrap();      // avoids possibility of "LiteralSpeak"
635
27
        let package1 = &parser::parse(mathml).expect("Failed to parse test input");
636
27
        let mathml = get_element(package1);
637
27
        trim_element(mathml, false);
638
27
        debug!("test:\n{}", 
mml_to_string0
(
mathml0
));
639
        
640
27
        let package2 = &parser::parse(target).expect("Failed to parse target input");
641
27
        let target = get_element(package2);
642
27
        trim_element(target,true);
643
27
        debug!("target:\n{}", 
mml_to_string0
(
target0
));
644
645
27
        let 
result18
= match crate::speech::intent_from_mathml(mathml, package2.as_document()) {
646
18
            Ok(e) => e,
647
9
            Err(e) => {
648
9
                debug!("{}", 
crate::interface::errors_to_string0
(
&e0
));
649
9
                return false;       // could be intentional failure
650
            }
651
        };
652
18
        debug!("result:\n{}", 
mml_to_string0
(
result0
));
653
18
        match is_same_element(result, target, &[]) {
654
18
      Ok(_) => return true,
655
0
      Err(e) => panic!("{}:\nresult: {}target: {}", e, mml_to_string(result), mml_to_string(target)),
656
    }
657
27
    }
658
659
    #[test]
660
1
    fn infer_binomial() {
661
1
        let mathml = "<mrow intent='binomial($n, $m)'>
662
1
                <mo>(</mo>
663
1
                <mfrac linethickness='0'> <mn arg='n'>7</mn> <mn arg='m'>3</mn> </mfrac>
664
1
                <mo>)</mo>
665
1
            </mrow>";
666
1
        let intent = "<binomial data-from-mathml='mrow' data-intent-property=':infix:'> <mn data-from-mathml='mn' arg='n'>7</mn> <mn data-from-mathml='mn' arg='m'>3</mn>  </binomial>";
667
1
        assert!(test_intent(mathml, intent, "Error"));
668
1
    }
669
670
    #[test]
671
1
    fn infer_binomial_intent_arg() {
672
1
        let mathml = "<msubsup intent='$op($n,$m)'>
673
1
                <mi arg='op' intent='binomial'>C</mi>
674
1
                <mi arg='n'>n</mi>
675
1
                <mi arg='m'>m</mi>
676
1
            </msubsup>";
677
1
        let intent = "<binomial data-from-mathml='msubsup' data-intent-property=':infix:'> <mi data-from-mathml='mi' arg='n'>n</mi> <mi data-from-mathml='mi' arg='m'>m</mi></binomial>";
678
1
        assert!(test_intent(mathml, intent, "Error"));
679
1
    }
680
681
    #[test]
682
1
    fn silent_underscore() {
683
1
        let mathml = "<mrow><mi intent='__-'>silent</mi><mo>+</mo><mi>e</mi></mrow>";
684
1
        let intent = "<mrow data-from-mathml='mrow'>
685
1
                                <mi data-from-mathml='mi'>__-</mi>
686
1
                                <mo data-from-mathml='mo'>+</mo>
687
1
                                <mi data-from-mathml='mi'>e</mi>
688
1
                            </mrow>";
689
1
        assert!(test_intent(mathml, intent, "Error"));
690
1
    }
691
692
693
    #[test]
694
1
    fn silent_underscore_function() {
695
1
        let mathml = "<mrow intent='__-_(speak, this)'></mrow>";
696
1
        let intent = "<__-_ data-from-mathml='mrow' data-intent-property=':silent:'>
697
1
                                <mi data-from-mathml='mi'>speak</mi>
698
1
                                <mi data-from-mathml='mi'>this</mi>
699
1
                            </__-_>";
700
1
        assert!(test_intent(mathml, intent, "Error"));
701
1
    }
702
703
    #[test]
704
1
    fn intent_multiple_properties() {
705
1
        let mathml = "<mrow intent='foo:silent:int(bar:positive-int:int, $a:foo:bar:foo-bar, $b:number)'>
706
1
                <mi arg='a'>a</mi>
707
1
                <mo arg='p' intent='plus'>+</mo>
708
1
                <mi arg='b' intent=':negative-int:int'>b</mi>
709
1
            </mrow>";
710
1
        let intent = "<foo data-intent-property=':int:silent:' data-from-mathml='mrow'>
711
1
                                <mi data-from-mathml='mi' data-intent-property=':positive-int:int:'>bar</mi>
712
1
                                <mi data-from-mathml='mi' arg='a' data-intent-property=':foo:bar:foo-bar:'>a</mi>
713
1
                                <mi data-from-mathml='mi' arg='b' data-intent-property=':number:'>b</mi>
714
1
                            </foo>";
715
1
        assert!(test_intent(mathml, intent, "Error"));
716
1
    }
717
    #[test]
718
1
    fn intent_nest_no_arg_call() {
719
1
        let mathml = "<mrow intent='foo(bar())'>
720
1
                <mi arg='a'>a</mi>
721
1
                <mo arg='p' intent='plus'>+</mo>
722
1
                <mi arg='b'>b</mi>
723
1
                <mo arg='f' intent='factorial'>!</mo>
724
1
            </mrow>";
725
1
        let intent = "<foo><bar></bar></foo>";
726
1
        assert!(!test_intent(mathml, intent, "Error"));
727
1
    }
728
729
    #[test]
730
1
    fn intent_hints() {
731
1
        let mathml = "<mrow intent='foo:silent(bar:postfix(3))'>
732
1
                <mi arg='a'>a</mi>
733
1
                <mo arg='p' intent='plus'>+</mo>
734
1
                <mi arg='b'>b</mi>
735
1
                <mo arg='f' intent='factorial'>!</mo>
736
1
            </mrow>";
737
1
        let intent = "<foo data-intent-property=':silent:' data-from-mathml='mrow'>
738
1
                                <bar data-intent-property=':postfix:' data-from-mathml='mrow'>
739
1
                                    <mn data-from-mathml='mn'>3</mn>
740
1
                                </bar>
741
1
                            </foo>";
742
1
        assert!(test_intent(mathml, intent, "Error"));
743
1
    }
744
    
745
    #[test]
746
1
    fn intent_hints_and_type() {
747
1
        let mathml = "<mrow intent='foo:is-foolish:function($b)'>
748
1
                <mi arg='a'>a</mi>
749
1
                <mo arg='p' intent='plus'>+</mo>
750
1
                <mi intent='b:int' arg='b'>b</mi>
751
1
                <mo arg='f' intent='factorial'>!</mo>
752
1
            </mrow>";
753
1
        let intent = "<foo data-intent-property=':is-foolish:function:' data-from-mathml='mrow'>
754
1
                                <mi data-intent-property=':int:' data-from-mathml='mi'>b</mi>
755
1
                            </foo>";
756
1
        assert!(test_intent(mathml, intent, "Error"));
757
1
    }
758
759
    #[test]
760
1
    fn intent_in_intent_first_arg() {
761
1
        let mathml = "<mrow intent='p(f(b), a)'>
762
1
                <mi arg='a'>a</mi>
763
1
                <mo arg='p' intent='plus'>+</mo>
764
1
                <mi arg='b'>b</mi>
765
1
                <mo arg='f' intent='factorial'>!</mo>
766
1
            </mrow>";
767
1
        let intent = "<p data-from-mathml='mrow'>
768
1
                                <f data-from-mathml='mrow'>
769
1
                                    <mi data-from-mathml='mi'>b</mi>
770
1
                                </f>
771
1
                                <mi data-from-mathml='mi'>a</mi>
772
1
                            </p>";
773
1
        assert!(test_intent(mathml, intent, "Error"));
774
1
    }
775
776
    #[test]
777
1
    fn intent_in_intent_second_arg() {
778
1
        let mathml = "<mrow intent='$p(a,$f(b))'>
779
1
                <mi arg='a'>a</mi>
780
1
                <mo arg='p' intent='plus'>+</mo>
781
1
                <mi arg='b'>b</mi>
782
1
                <mo arg='f' intent='factorial'>!</mo>
783
1
            </mrow>";
784
1
        let intent = "<plus data-from-mathml='mrow' data-intent-property=':infix:'>
785
1
                                <mi data-from-mathml='mi'>a</mi>
786
1
                                <factorial data-from-mathml='mrow'>
787
1
                                    <mi data-from-mathml='mi'>b</mi>
788
1
                                </factorial>
789
1
                            </plus>";
790
1
        assert!(test_intent(mathml, intent, "Error"));
791
1
    }
792
793
    #[test]
794
1
    fn intent_with_whitespace() {
795
1
        let mathml = "<mrow intent='  $arrow    ( $a ,  $b,$c )  '>
796
1
                <mi arg='a'>A</mi>
797
1
                <mover>
798
1
                    <mo movablelimits='false' arg='arrow' intent='map'>⟶</mo>
799
1
                    <mo arg='U2245' intent='congruence'>≅</mo>
800
1
                </mover>
801
1
                <mi arg='b'>B</mi>
802
1
                <mi arg='c'>C</mi>
803
1
            </mrow>";
804
1
        let intent = "<map data-from-mathml='mrow'> <mi data-from-mathml='mi' arg='a'>A</mi> <mi data-from-mathml='mi' arg='b'>B</mi> <mi data-from-mathml='mi' arg='c'>C</mi> </map>";
805
1
        assert!(test_intent(mathml, intent, "Error"));
806
1
    }
807
808
    #[test]
809
1
    fn intent_template_at_toplevel() {
810
1
        let mathml = "<msup intent='$H $n'>
811
1
            <mi arg='H' mathvariant='normal'>H</mi>
812
1
            <mn arg='n'>2</mn>
813
1
            </msup>";
814
1
        let intent = "<mrow><mi arg='H' mathvariant='normal'>H</mi><mn arg='n'>2</mn></mrow>";
815
1
        assert!(!test_intent(mathml, intent, "Error"));
816
1
    }
817
818
    #[test]
819
1
    fn intent_with_nested_indirect_head() {
820
1
        let mathml = "<mrow intent='$op($a,$b)'>
821
1
                <mi arg='a'>A</mi>
822
1
                <mover arg='op' intent='$ra($cong)'>
823
1
                    <mo movablelimits='false' arg='ra' intent='map'>⟶</mo>
824
1
                    <mo arg='cong' intent='congruence'>≅</mo>
825
1
                </mover>
826
1
                <mi arg='b'>B</mi>
827
1
            </mrow>";
828
1
        let intent = "<apply-function data-from-mathml='mrow'>
829
1
                                <map data-from-mathml='mrow'>
830
1
                                    <mi data-from-mathml='mo'>congruence</mi>
831
1
                                </map>
832
1
                                <mi data-from-mathml='mi' arg='a'>A</mi>
833
1
                                <mi data-from-mathml='mi' arg='b'>B</mi>
834
1
                            </apply-function>";
835
1
        assert!(test_intent(mathml, intent, "Error"));
836
1
    }
837
838
    #[test]
839
1
    fn intent_with_literals() {
840
1
        let mathml = "<mrow intent='vector(1, 0.0, 0.1, -23, -0.1234, last)'>
841
1
                <mi>x</mi>
842
1
            </mrow>";
843
1
        let intent = "<vector data-from-mathml='mrow' data-intent-property=':function:'>
844
1
                                <mn data-from-mathml='mn'>1</mn>
845
1
                                <mn data-from-mathml='mn'>0.0</mn>
846
1
                                <mn data-from-mathml='mn'>0.1</mn>
847
1
                                <mn data-from-mathml='mn'>-23</mn>
848
1
                                <mn data-from-mathml='mn'>-0.1234</mn>
849
1
                                <mi data-from-mathml='mi'>last</mi>
850
1
                            </vector>";
851
1
        assert!(test_intent(mathml, intent, "Error"));
852
1
    }
853
854
    #[test]
855
1
    fn intent_with_template_literals() {
856
1
        let mathml = "<mrow intent='1 0.0 0.1 -23 -0.1234 last'>
857
1
                <mi>x</mi>
858
1
            </mrow>";
859
1
        let intent = "<mrow><mn>1</mn><mn>0.</mn><mn>.1</mn><mn>-23</mn><mn>-.1234</mn><mi>last</mi></mrow>";
860
1
        assert!(!test_intent(mathml, intent, "Error"));
861
1
    }
862
863
    #[test]
864
1
    fn intent_with_nested_head() {
865
1
        let mathml = "<mrow intent='$ra($cong)($a,$b)'>
866
1
                <mi arg='a'>A</mi>
867
1
                <mover>
868
1
                    <mo movablelimits='false' arg='ra' intent='map'>⟶</mo>
869
1
                    <mo arg='cong' intent='congruence'>≅</mo>
870
1
                </mover>
871
1
                <mi arg='b'>B</mi>
872
1
            </mrow>";
873
1
        let intent = "<apply-function data-from-mathml='mrow'>
874
1
                                <map data-from-mathml='mrow'>
875
1
                                    <mi data-from-mathml='mo'>congruence</mi>
876
1
                                </map>
877
1
                                <mi data-from-mathml='mi' arg='a'>A</mi>
878
1
                                <mi data-from-mathml='mi' arg='b'>B</mi>
879
1
                            </apply-function>";
880
1
        assert!(test_intent(mathml, intent, "Error"));
881
1
    }
882
883
884
    #[test]
885
1
    fn intent_with_nested_head_and_hints() {
886
1
        let mathml = "<mrow intent='pre:prefix(in:infix($a, x))(post:postfix($b))'>
887
1
                <mi arg='a'>A</mi>
888
1
                <mover>
889
1
                    <mo intent='map'>⟶</mo>
890
1
                    <mo intent='congruence'>≅</mo>
891
1
                </mover>
892
1
                <mi arg='b'>B</mi>
893
1
            </mrow>";
894
1
        let intent = "<apply-function data-from-mathml='mrow'>
895
1
                <pre data-intent-property=':prefix:' data-from-mathml='mrow'>
896
1
                    <in data-intent-property=':infix:' data-from-mathml='mrow'>
897
1
                        <mi data-from-mathml='mi' arg='a'>A</mi>
898
1
                        <mi data-from-mathml='mi'>x</mi>
899
1
                    </in>
900
1
                </pre>
901
1
                <post data-intent-property=':postfix:' data-from-mathml='mrow'>
902
1
                    <mi data-from-mathml='mi' arg='b'>B</mi>
903
1
                </post>
904
1
            </apply-function>";
905
1
        assert!(test_intent(mathml, intent, "Error"));
906
1
    }
907
908
909
    #[test]
910
1
    fn intent_double_indirect_head() {
911
1
        let mathml = "<mrow intent='$m:prefix($c)($a,$b)'>
912
1
                <mi arg='a'>A</mi>
913
1
                <mover>
914
1
                    <mo movablelimits='false' arg='m' intent='map'>⟶</mo>
915
1
                    <mo arg='c' intent='congruence'>≅</mo>
916
1
                </mover>
917
1
                <mi arg='b'>B</mi>
918
1
            </mrow>";
919
1
        let intent = "<apply-function data-from-mathml='mrow'>
920
1
                                <map data-intent-property=':prefix:' data-from-mathml='mrow'>
921
1
                                    <mi data-from-mathml='mo'>congruence</mi>
922
1
                                </map>
923
1
                                <mi data-from-mathml='mi' arg='a'>A</mi>
924
1
                                <mi data-from-mathml='mi' arg='b'>B</mi>
925
1
                            </apply-function>";
926
1
        assert!(test_intent(mathml, intent, "Error"));
927
1
    }
928
929
    #[test]
930
1
    fn intent_missing_open() {
931
1
        let mathml = "<mrow intent='$p $a,$f($b))'>
932
1
                <mi arg='a'>a</mi>
933
1
                <mo arg='p' intent='plus'>+</mo>
934
1
                <mi arg='b'>b</mi>
935
1
                <mo arg='f' intent='factorial'>!</mo>
936
1
            </mrow>";
937
1
        let intent = "<plus> <mi arg='a'>a</mi> <factorial><mi arg='b'>b</mi></factorial> </plus>";
938
1
        assert!(!test_intent(mathml, intent, "Error"));
939
1
    }
940
941
    #[test]
942
1
    fn intent_no_comma() {
943
1
        let mathml = "<mrow intent='$p($a $f($b))'>
944
1
                <mi arg='a'>a</mi>
945
1
                <mo arg='p' intent='plus'>+</mo>
946
1
                <mi arg='b'>b</mi>
947
1
                <mo arg='f' intent='factorial'>!</mo>
948
1
            </mrow>";
949
1
        let intent = "<plus>
950
1
                <mrow>
951
1
                    <mi arg='a'>a</mi>
952
1
                    <factorial> <mi arg='b'>b</mi> </factorial>
953
1
                </mrow>
954
1
            </plus>";
955
1
        assert!(!test_intent(mathml, intent, "Error"));
956
1
    }
957
958
    #[test]
959
1
    fn intent_no_arg() {
960
1
        let mathml = "<mrow intent='factorial()'>
961
1
                <mi arg='a'>a</mi>
962
1
                <mo arg='p' intent='plus'>+</mo>
963
1
                <mi arg='b'>b</mi>
964
1
                <mo arg='f' intent='factorial'>!</mo>
965
1
            </mrow>";
966
1
        let target = "<factorial></factorial>";
967
1
        assert!(!test_intent(mathml, target, "Error"));
968
1
    }
969
970
    #[test]
971
1
    fn intent_illegal_no_arg() {
972
1
        let mathml = "<mrow intent='factorial(()))'>
973
1
                <mi arg='a'>a</mi>
974
1
                <mo arg='p' intent='plus'>+</mo>
975
1
                <mi arg='b'>b</mi>
976
1
                <mo arg='f' intent='factorial'>!</mo>
977
1
            </mrow>";
978
1
        let target = "<factorial></factorial>";
979
1
        assert!(!test_intent(mathml, target, "Error"));
980
1
    }
981
982
    #[test]
983
1
    fn intent_illegal_no_arg_ignore() {
984
1
        let mathml = "<mrow intent='factorial()'>
985
1
                <mi arg='a'>a</mi>
986
1
                <mo arg='p' intent='plus'>+</mo>
987
1
                <mi arg='b'>b</mi>
988
1
                <mo arg='f' intent='factorial'>!</mo>
989
1
            </mrow>";
990
1
        let target = "<mrow data-from-mathml='mrow' intent='factorial()'>
991
1
                                <mi data-from-mathml='mi' arg='a'>a</mi>
992
1
                                <mi data-from-mathml='mo'>plus</mi>
993
1
                                <mi data-from-mathml='mi' arg='b'>b</mi>
994
1
                                <mi data-from-mathml='mo'>factorial</mi>
995
1
                            </mrow>";
996
1
        assert!(test_intent(mathml, target, "IgnoreIntent"));
997
1
    }
998
999
    #[test]
1000
1
    fn intent_illegal_self_ref() {
1001
1
        let mathml = "<mrow intent='foo:is-foolish:function($b)'>
1002
1
                <mi intent='$b:int' arg='b'>b</mi>
1003
1
            </mrow>";
1004
1
        let target = "<foo data-intent-property=':function:' data-intent-type='is-foolish'><mi data-intent-type='int'>b</mi></foo>";
1005
1
        assert!(!test_intent(mathml, target, "Error"));
1006
1
    }
1007
1008
    #[test]
1009
1
    fn infer_missing_second_arg() {
1010
1
        let mathml = "<mrow intent='binomial($n,)'>
1011
1
                <mo>(</mo>
1012
1
                <mfrac linethickness='0'> <mn arg='n'>7</mn> <mn arg='m'>3</mn> </mfrac>
1013
1
                <mo>)</mo>
1014
1
            </mrow>";
1015
1
        let target = "<binomial data-intent-property='binomial($n,)'> \n
1016
1
                             <mn data-from-mathml='mn' arg='n'>7</mn> <mn data-from-mathml='mn' arg='m'>3</mn>  </binomial>";
1017
1
        assert!(!test_intent(mathml, target, "Error"));
1018
1
    }
1019
1020
    #[test]
1021
1
    fn infer_missing_second_arg_ignore() {
1022
1
        let mathml = "<mrow intent='binomial($n,)'>
1023
1
                <mo>(</mo>
1024
1
                <mfrac linethickness='0'> <mn arg='n'>7</mn> <mn arg='m'>3</mn> </mfrac>
1025
1
                <mo>)</mo>
1026
1
            </mrow>";
1027
1
        let target = "<mrow data-from-mathml='mrow' intent='binomial($n,)'>
1028
1
                <mo data-from-mathml='mo'>(</mo>
1029
1
                <fraction data-from-mathml='mfrac' linethickness='0'> <mn data-from-mathml='mn' arg='n'>7</mn> <mn data-from-mathml='mn' arg='m'>3</mn> </fraction>
1030
1
                <mo data-from-mathml='mo'>)</mo>
1031
1
            </mrow>";
1032
1
        assert!(test_intent(mathml, target, "IgnoreIntent"));
1033
1
    }   
1034
1035
    #[test]
1036
1
    fn plane1_char_in_concept_name() {
1037
1
        let mathml = "<math><mrow><mo intent='🐇'>&#x1F407;</mo><mi>X</mi></mrow></math>";
1038
1
        let intent = "<math data-from-mathml='math'>
1039
1
                                <mrow data-from-mathml='mrow'>
1040
1
                                    <mi data-from-mathml='mo'>🐇</mi>
1041
1
                                    <mi data-from-mathml='mi'>X</mi>
1042
1
                                </mrow>
1043
1
                            </math>";
1044
1
        assert!(test_intent(mathml, intent, "Error"));
1045
1
    }   
1046
}
\ No newline at end of file diff --git a/docs/llvm-cov/html/coverage/home/runner/work/MathCAT/MathCAT/src/interface.rs.html b/docs/llvm-cov/html/coverage/home/runner/work/MathCAT/MathCAT/src/interface.rs.html index c836293e..eff97a37 100644 --- a/docs/llvm-cov/html/coverage/home/runner/work/MathCAT/MathCAT/src/interface.rs.html +++ b/docs/llvm-cov/html/coverage/home/runner/work/MathCAT/MathCAT/src/interface.rs.html @@ -1 +1 @@ -

Coverage Report

Created: 2026-04-30 05:51

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/home/runner/work/MathCAT/MathCAT/src/interface.rs
Line
Count
Source
1
//! The interface module provides functionality both for calling from an API and also running the code from `main`.
2
//!
3
#![allow(non_snake_case)]
4
#![allow(clippy::needless_return)]
5
use std::cell::RefCell;
6
use std::sync::LazyLock;
7
8
use crate::canonicalize::{as_text, create_mathml_element};
9
use crate::errors::*;
10
use phf::phf_map;
11
use regex::{Captures, Regex};
12
use sxd_document::dom::{Element, Document, ChildOfRoot, ChildOfElement, Attribute};
13
use sxd_document::parser;
14
use sxd_document::Package;
15
16
use crate::canonicalize::{as_element, name};
17
use crate::shim_filesystem::{find_all_dirs_shim, find_files_in_dir_that_ends_with_shim};
18
use log::{debug, error};
19
20
use crate::navigate::*;
21
use crate::pretty_print::mml_to_string;
22
use crate::xpath_functions::{is_leaf, IsNode};
23
use std::panic::{catch_unwind, AssertUnwindSafe};
24
25
/// Maximum depth to prevent stack overflow on deeply nested MathML
26
pub const MAX_DEPTH: usize = 512;
27
28
#[cfg(feature = "enable-logs")]
29
use std::sync::Once;
30
#[cfg(feature = "enable-logs")]
31
static INIT: Once = Once::new();
32
33
45.0k
fn enable_logs() {
34
    #[cfg(feature = "enable-logs")]
35
    INIT.call_once(||{
36
        #[cfg(target_os = "android")]
37
        {
38
            use log::*;
39
            use android_logger::*;
40
        
41
            android_logger::init_once(
42
                Config::default()
43
                .with_max_level(LevelFilter::Trace)
44
                .with_tag("MathCat")
45
            );    
46
            trace!("Activated Android logger!");  
47
        }    
48
    });
49
45.0k
}
50
51
// For getting a message from a panic
52
thread_local! {
53
    // Stores (Message, File, Line)
54
    static PANIC_INFO: RefCell<Option<(String, String, u32)>> = const { RefCell::new(None) };
55
}
56
57
/// Initialize the panic handler to catch panics and store the message, file, and line number in `PANIC_INFO`.
58
13.5k
pub fn init_panic_handler() {
59
    use std::panic;
60
61
13.5k
    panic::set_hook(Box::new(|info| 
{1
62
1
        let location = info.location()
63
1
            .map(|l| format!("{}:{}", l.file(), l.line()))
64
1
            .unwrap_or_else(|| 
"unknown"0
.
to_string0
());
65
66
1
        let payload = info.payload();
67
1
        let msg = if let Some(
s0
) = payload.downcast_ref::<&'static str>() {
68
0
            s.to_string()
69
1
        } else if let Some(s) = payload.downcast_ref::<String>() {
70
1
            s.clone()
71
        } else {
72
0
            "Unknown panic payload".to_string()
73
        };
74
75
        // Use try_with/try_borrow_mut to ensure the hook never panics itself
76
1
        let _ = PANIC_INFO.try_with(|cell| {
77
1
            if let Ok(mut slot) = cell.try_borrow_mut() {
78
1
                *slot = Some((msg, location, 0));
79
1
            
}0
80
1
        });
81
1
    }));
82
13.5k
}
83
84
41.1k
pub fn report_any_panic<T>(result: Result<Result<T, Error>, Box<dyn std::any::Any + Send>>) -> Result<T, Error> {
85
41.1k
    match result {
86
41.1k
        Ok(val) => val,
87
        Err(_) => {
88
            // Retrieve the smuggled info
89
1
            let details = PANIC_INFO.with(|cell| cell.borrow_mut().take());
90
            
91
1
            if let Some((msg, file, line)) = details {
92
1
                Err(anyhow::anyhow!(
93
1
                    "MathCAT crash! Please report the following information: '{}' at {}:{}",
94
1
                    msg, file, line
95
1
                ))
96
            } else {
97
0
                Err(anyhow::anyhow!("MathCAT crash! -- please report"))
98
            }
99
        }
100
    }
101
41.1k
} 
102
103
// wrap up some common functionality between the call from 'main' and AT
104
4.91k
fn cleanup_mathml(mathml: Element) -> Result<Element> {
105
4.91k
    trim_element(mathml, false);
106
4.91k
    let 
mathml4.91k
= crate::canonicalize::canonicalize(mathml)
?1
;
107
4.91k
    let mathml = add_ids(mathml);
108
4.91k
    return Ok(mathml);
109
4.91k
}
110
111
thread_local! {
112
    /// The current node being navigated (also spoken and brailled) is stored in `MATHML_INSTANCE`.
113
    pub static MATHML_INSTANCE: RefCell<Package> = init_mathml_instance();
114
}
115
116
3.92k
fn init_mathml_instance() -> RefCell<Package> {
117
3.92k
    let package = parser::parse("<math></math>")
118
3.92k
        .expect("Internal error in 'init_mathml_instance;: didn't parse initializer string");
119
3.92k
    return RefCell::new(package);
120
3.92k
}
121
122
/// Set the Rules directory
123
/// IMPORTANT: this should be the very first call to MathCAT. If 'dir' is an empty string, the environment var 'MathCATRulesDir' is tried.
124
5.08k
pub fn set_rules_dir(dir: impl AsRef<str>) -> Result<()> {
125
5.08k
    enable_logs();
126
5.08k
    init_panic_handler();
127
5.08k
    let dir = dir.as_ref().to_string();
128
5.08k
    let result = catch_unwind(AssertUnwindSafe(|| {
129
        use std::path::PathBuf;
130
5.08k
        let dir_os = if dir.is_empty() {
131
0
            std::env::var_os("MathCATRulesDir").unwrap_or_default()
132
        } else {
133
5.08k
            std::ffi::OsString::from(&dir)
134
        };
135
5.08k
        let pref_manager = crate::prefs::PreferenceManager::get();
136
5.08k
        pref_manager.borrow_mut().initialize(PathBuf::from(dir_os))
137
5.08k
    }));
138
5.08k
    return report_any_panic(result);
139
5.08k
}
140
141
/// Returns the version number (from Cargo.toml) of the build
142
0
pub fn get_version() -> String {
143
0
    enable_logs();
144
    const VERSION: &str = env!("CARGO_PKG_VERSION");
145
0
    return VERSION.to_string();
146
0
}
147
148
/// This will override any previous MathML that was set.
149
/// This returns canonical MathML with 'id's set on any node that doesn't have an id.
150
/// The ids can be used for sync highlighting if the `Bookmark` API preference is true.
151
4.88k
pub fn set_mathml(mathml_str: impl AsRef<str>) -> Result<String> {
152
4.88k
    enable_logs();
153
    // if these are present when resent to MathJaX, MathJaX crashes (https://github.com/mathjax/MathJax/issues/2822)
154
3
    static MATHJAX_V2: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"class *= *['"]MJX-.*?['"]"#).unwrap());
155
3
    static MATHJAX_V3: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"class *= *['"]data-mjx-.*?['"]"#).unwrap());
156
157
    // Strip out processing instructions and comments -- these are not MathML and can cause DOS problems in the parser
158
3
    static PROCESSING_INSTRUCTION: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"<\?[\s\S]{1,2048}\?>"#).unwrap());
159
3
    static XML_COMMENT: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"(?s)"#).unwrap());
160
161
    // These have some length limits to avoid DOS attacks via long strings
162
3
    static NAMESPACE_DECL: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"xmlns:[[:alpha:]]{1,32}"#).unwrap());
163
3
    static PREFIX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"(</?)[[:alpha:]]{1,32}:"#).unwrap());
164
3
    static HTML_ENTITIES: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"&([a-zA-Z]{2,10});"#).unwrap());
165
4.88k
    let result = catch_unwind(AssertUnwindSafe(|| {
166
4.88k
        NAVIGATION_STATE.with(|nav_stack| {
167
4.88k
            nav_stack.borrow_mut().reset();
168
4.88k
        });
169
170
        // We need the main definitions files to be read in so canonicalize can work.
171
        // This call reads all of them for the current preferences, but that's ok since they will likely be used
172
4.88k
        crate::speech::SPEECH_RULES.with(|rules| rules.borrow_mut().read_files())
?0
;
173
174
4.88k
        let mathml_str = mathml_str.as_ref();
175
        // Safety guard: Reject strings > 1MB to prevent DoS/Stack issues
176
4.88k
        if mathml_str.len() > 1024 * 1024 {
177
0
            bail!("MathML string of size {} bytes exceeds length limit of 1MB", mathml_str.len());
178
4.88k
        }
179
180
4.88k
        return MATHML_INSTANCE.with(|old_package| {
181
            static HTML_ENTITIES_MAPPING: phf::Map<&str, &str> = include!("entities.in");
182
183
4.88k
            let mut error_message = "".to_string(); // can't return a result inside the replace_all, so we do this hack of setting the message and then returning the error
184
                                                                     
185
4.88k
            let mathml_str = XML_COMMENT.replace_all(mathml_str, "");
186
4.88k
            let mathml_str = PROCESSING_INSTRUCTION.replace_all(&mathml_str, "");
187
            // FIX: need to deal with character data and convert to something the parser knows
188
4.88k
            let mathml_str = HTML_ENTITIES.replace_all(&mathml_str, |cap: &Captures| match 
HTML_ENTITIES_MAPPING96
.
get96
(&cap[1]) {
189
                    None => {
190
1
                        error_message = format!("No entity named '{}'", &cap[0]);
191
1
                        cap[0].to_string()
192
                    }
193
95
                    Some(&ch) => ch.to_string(),
194
96
                });
195
196
4.88k
            if !error_message.is_empty() {
197
                // Clear stale state so subsequent API calls do not return previous user's data (security issue)
198
1
                old_package.replace(parser::parse("<math></math>").unwrap());
199
1
                bail!(error_message);
200
4.88k
            }
201
4.88k
            let mathml_str = MATHJAX_V2.replace_all(&mathml_str, "");
202
4.88k
            let mathml_str = MATHJAX_V3.replace_all(&mathml_str, "");
203
204
            // the speech rules use the xpath "name" function and that includes the prefix
205
            // getting rid of the prefix properly probably involves a recursive replacement in the tree
206
            // if the prefix is used, it is almost certainly something like "m" or "mml", so this cheat will work.
207
4.88k
            let mathml_str = NAMESPACE_DECL.replace(&mathml_str, "xmlns"); // do this before the PREFIX replace!
208
4.88k
            let mathml_str = PREFIX.replace_all(&mathml_str, "$1");
209
210
4.88k
            let new_package = parser::parse(&mathml_str);
211
4.88k
            if let Err(
e1
) = new_package {
212
                // Clear stale state so subsequent API calls do not return previous user's data (security issue)
213
1
                old_package.replace(parser::parse("<math></math>").unwrap());
214
1
                bail!("Invalid MathML input:\n{}\nError is: {}", &mathml_str, &e.to_string());
215
4.88k
            }
216
217
4.88k
            let new_package = new_package.unwrap();
218
4.88k
            let mathml = get_element(&new_package);
219
4.88k
            let 
mathml4.88k
= cleanup_mathml(mathml)
?1
;
220
4.88k
            let mathml_string = mml_to_string(mathml);
221
4.88k
            old_package.replace(new_package);
222
223
4.88k
            return Ok(mathml_string);
224
4.88k
        });
225
4.88k
    }));
226
227
4.88k
    return report_any_panic(result);
228
4.88k
}
229
230
/// Get the spoken text of the MathML that was set.
231
/// The speech takes into account any AT or user preferences.
232
3.46k
pub fn get_spoken_text() -> Result<String> {
233
3.46k
    enable_logs();
234
3.46k
    let result = catch_unwind(AssertUnwindSafe(|| {
235
3.46k
        MATHML_INSTANCE.with(|package_instance| {
236
3.46k
            let package_instance = package_instance.borrow();
237
3.46k
            let mathml = get_element(&package_instance);
238
3.46k
            let new_package = Package::new();
239
3.46k
            let intent = crate::speech::intent_from_mathml(mathml, new_package.as_document())
?0
;
240
3.46k
            debug!("Intent tree:\n{}", 
mml_to_string0
(
intent0
));
241
3.46k
            let speech = crate::speech::speak_mathml(intent, "", 0)
?0
;
242
3.46k
            return Ok(speech);
243
3.46k
        })
244
3.46k
    }));
245
3.46k
    return report_any_panic(result);
246
3.46k
}
247
248
/// Get the spoken text for an overview of the MathML that was set.
249
/// The speech takes into account any AT or user preferences.
250
/// Note: this implementation for is currently minimal and should not be used.
251
0
pub fn get_overview_text() -> Result<String> {
252
0
    enable_logs();
253
0
    let result = catch_unwind(AssertUnwindSafe(|| {
254
0
        MATHML_INSTANCE.with(|package_instance| {
255
0
            let package_instance = package_instance.borrow();
256
0
            let mathml = get_element(&package_instance);
257
0
            let speech = crate::speech::overview_mathml(mathml, "", 0)?;
258
0
            return Ok(speech);
259
0
        })
260
0
    }));
261
0
    return report_any_panic(result);
262
0
}
263
264
/// Get the value of the named preference.
265
/// None is returned if `name` is not a known preference.
266
100
pub fn get_preference(name: impl AsRef<str>) -> Result<String> {
267
100
    enable_logs();
268
100
    let name = name.as_ref().to_string();
269
100
    let result = catch_unwind(AssertUnwindSafe(|| {
270
        use crate::prefs::NO_PREFERENCE;
271
100
        crate::speech::SPEECH_RULES.with(|rules| {
272
100
            let rules = rules.borrow();
273
100
            let pref_manager = rules.pref_manager.borrow();
274
100
            let mut value = pref_manager.pref_to_string(&name);
275
100
            if value == NO_PREFERENCE {
276
1
                value = pref_manager.pref_to_string(&name);
277
99
            }
278
100
            if value == NO_PREFERENCE {
279
1
                bail!("No preference named '{}'", name);
280
            } else {
281
99
                return Ok(value);
282
            }
283
100
        })
284
100
    }));
285
100
    return report_any_panic(result);
286
100
}
287
288
/// Set a MathCAT preference. The preference name should be a known preference name.
289
/// The value should either be a string or a number (depending upon the preference being set)
290
/// The list of known user preferences is in the MathCAT user documentation.
291
/// Here are common preferences set by programs (not settable by the user):
292
/// * TTS -- SSML, SAPI5, None
293
/// * Pitch -- normalized at '1.0'
294
/// * Rate -- words per minute (should match current speech rate).
295
///   There is a separate "MathRate" that is user settable that causes a relative percentage change from this rate.
296
/// * Volume -- default 100
297
/// * Voice -- set a voice to use (not implemented)
298
/// * Gender -- set pick any voice of the given gender (not implemented)
299
/// * Bookmark -- set to `true` if a `mark`/`bookmark` should be part of the returned speech (used for sync highlighting)
300
///
301
/// Important: both the preference name and value are case-sensitive
302
///
303
/// This function can be called multiple times to set different values.
304
/// The values are persistent and extend beyond calls to [`set_mathml`].
305
/// A value can be overwritten by calling this function again with a different value.
306
///
307
/// Be careful setting preferences -- these potentially override user settings, so only preferences that really need setting should be set.
308
17.7k
pub fn set_preference(name: impl AsRef<str>, value: impl AsRef<str>) -> Result<()> {
309
17.7k
    enable_logs();
310
17.7k
    let name = name.as_ref().to_string();
311
17.7k
    let value = value.as_ref().to_string();
312
17.7k
    let result = catch_unwind(AssertUnwindSafe(|| {
313
17.7k
        set_preference_impl(&name, &value)
314
17.7k
    }));
315
17.7k
    return report_any_panic(result);
316
17.7k
}
317
318
17.7k
fn set_preference_impl(name: &str, value: &str) -> Result<()> {
319
17.7k
    let mut value = value.to_string();
320
17.7k
    if name == "Language" || 
name == "LanguageAuto"12.7k
{
321
        // check the format
322
5.02k
        if value != "Auto" {
323
            // could get es, es-419, or en-us-nyc ...  we only care about the first two parts so we clean it up a little
324
5.02k
            let mut lang_country_split = value.split('-');
325
5.02k
            let language = lang_country_split.next().unwrap_or("");
326
5.02k
            let country = lang_country_split.next().unwrap_or("");
327
5.02k
            if language.len() != 2 {
328
0
                bail!(
329
                    "Improper format for 'Language' preference '{}'. Should be of form 'en' or 'en-gb'",
330
                    value
331
                );
332
5.02k
            }
333
5.02k
            let mut new_lang_country = language.to_string(); // need a temp value because 'country' is borrowed from 'value' above
334
5.02k
            if !country.is_empty() {
335
321
                new_lang_country.push('-');
336
321
                new_lang_country.push_str(country);
337
4.70k
            }
338
5.02k
            value = new_lang_country;
339
0
        }
340
5.02k
        if name == "LanguageAuto" && 
value == "Auto"0
{
341
0
            bail!("'LanguageAuto' can not have the value 'Auto'");
342
5.02k
        }
343
12.7k
    }
344
345
17.7k
    crate::speech::SPEECH_RULES.with(|rules| {
346
17.7k
        let rules = rules.borrow_mut();
347
17.7k
        if let Some(
error_string0
) = rules.get_error() {
348
0
            bail!("{}", error_string);
349
17.7k
        }
350
351
        // we set the value even if it was the same as the old value because this might override a potentially changed future user value
352
17.7k
        let mut pref_manager = rules.pref_manager.borrow_mut();
353
17.7k
        if name == "LanguageAuto" {
354
0
            let language_pref = pref_manager.pref_to_string("Language");
355
0
            if language_pref != "Auto" {
356
0
                bail!(
357
                    "'LanguageAuto' can only be used when 'Language' has the value 'Auto'; Language={}",
358
                    language_pref
359
                );
360
0
            }
361
17.7k
        }
362
17.7k
        let lower_case_value = value.to_lowercase();
363
17.7k
        if lower_case_value == "true" || 
lower_case_value == "false"17.6k
{
364
1.50k
            pref_manager.set_api_boolean_pref(name, value.to_lowercase() == "true");
365
1.50k
        } else {
366
16.2k
            match name {
367
16.2k
                "Pitch" | "Rate" | "Volume" | "CapitalLetters_Pitch" | "MathRate" | "PauseFactor" => {
368
0
                    pref_manager.set_api_float_pref(name, to_float(name, &value)?)
369
                }
370
                _ => {
371
16.2k
                    pref_manager.set_string_pref(name, &value)
?0
;
372
                }
373
            }
374
        };
375
17.7k
        return Ok::<(), Error>(());
376
17.7k
    })
?0
;
377
378
17.7k
    return Ok(());
379
17.7k
}
380
381
0
fn to_float(name: &str, value: &str) -> Result<f64> {
382
0
    return match value.parse::<f64>() {
383
0
        Ok(val) => Ok(val),
384
0
        Err(_) => bail!("SetPreference: preference'{}'s value '{}' must be a float", name, value),
385
    };
386
0
}
387
388
/// Get the braille associated with the MathML that was set by [`set_mathml`].
389
/// The braille returned depends upon the preference for the `code` preference (default `Nemeth`).
390
/// If 'nav_node_id' is given, it is highlighted based on the value of `BrailleNavHighlight` (default: `EndPoints`)
391
1.36k
pub fn get_braille(nav_node_id: impl AsRef<str>) -> Result<String> {
392
1.36k
    enable_logs();
393
1.36k
    let nav_node_id = nav_node_id.as_ref().to_string();
394
1.36k
    let result = catch_unwind(AssertUnwindSafe(|| {
395
1.36k
        MATHML_INSTANCE.with(|package_instance| {
396
1.36k
            let package_instance = package_instance.borrow();
397
1.36k
            let mathml = get_element(&package_instance);
398
1.36k
            let braille = crate::braille::braille_mathml(mathml, &nav_node_id)
?0
.0;
399
1.36k
            return Ok(braille);
400
1.36k
        })
401
1.36k
    }));
402
1.36k
    return report_any_panic(result);
403
1.36k
}
404
405
/// Get the braille associated with the current navigation focus of the MathML that was set by [`set_mathml`].
406
/// The braille returned depends upon the preference for the `code` preference (default `Nemeth`).
407
/// The returned braille is brailled as if the current navigation focus is the entire expression to be brailled.
408
0
pub fn get_navigation_braille() -> Result<String> {
409
0
    enable_logs();
410
0
    let result = catch_unwind(AssertUnwindSafe(|| {
411
0
        MATHML_INSTANCE.with(|package_instance| {
412
0
            let package_instance = package_instance.borrow();
413
0
            let mathml = get_element(&package_instance);
414
0
            let new_package = Package::new(); // used if we need to create a new tree
415
0
            let new_doc = new_package.as_document();
416
0
            let nav_mathml = NAVIGATION_STATE.with(|nav_stack| {
417
0
                return match nav_stack.borrow_mut().get_navigation_mathml(mathml) {
418
0
                    Err(e) => Err(e),
419
0
                    Ok((found, offset)) => {
420
                        // get the MathML node and wrap it inside of a <math> element
421
                        // if the offset is given, we need to get the character it references
422
0
                        if offset == 0 {
423
0
                            if name(found) == "math" {
424
0
                                Ok(found)
425
                            } else {
426
0
                                let new_mathml = create_mathml_element(&new_doc, "math");
427
0
                                new_mathml.append_child(copy_mathml(found));
428
0
                                new_doc.root().append_child(new_mathml);
429
0
                                Ok(new_mathml)
430
                            }
431
0
                        } else if !is_leaf(found) {
432
0
                            bail!(
433
                                "Internal error: non-zero offset '{}' on a non-leaf element '{}'",
434
                                offset,
435
0
                                name(found)
436
                            );
437
0
                        } else if let Some(ch) = as_text(found).chars().nth(offset) {
438
0
                            let internal_mathml = create_mathml_element(&new_doc, name(found));
439
0
                            internal_mathml.set_text(&ch.to_string());
440
0
                            let new_mathml = create_mathml_element(&new_doc, "math");
441
0
                            new_mathml.append_child(internal_mathml);
442
0
                            new_doc.root().append_child(new_mathml);
443
0
                            Ok(new_mathml)
444
                        } else {
445
0
                            bail!(
446
                                "Internal error: offset '{}' on leaf element '{}' doesn't exist",
447
                                offset,
448
0
                                mml_to_string(found)
449
                            );
450
                        }
451
                    }
452
                };
453
0
            })?;
454
455
0
            let braille = crate::braille::braille_mathml(nav_mathml, "")?.0;
456
0
            return Ok(braille);
457
0
        })
458
0
    }));
459
0
    return report_any_panic(result);
460
0
}
461
462
/// Given a key code along with the modifier keys, the current node is moved accordingly (or value reported in some cases).
463
/// `key` is the [keycode](https://developer.mozilla.org/en-US/docs/Web/API/KeyboardEvent/keyCode#constants_for_keycode_value) for the key (in JavaScript, `ev.key_code`)
464
/// The spoken text for the new current node is returned.
465
0
pub fn do_navigate_keypress(
466
0
    key: usize,
467
0
    shift_key: bool,
468
0
    control_key: bool,
469
0
    alt_key: bool,
470
0
    meta_key: bool,
471
0
) -> Result<String> {
472
0
    enable_logs();
473
0
    let result = catch_unwind(AssertUnwindSafe(|| {
474
0
        MATHML_INSTANCE.with(|package_instance| {
475
0
            let package_instance = package_instance.borrow();
476
0
            let mathml = get_element(&package_instance);
477
0
            return do_mathml_navigate_key_press(mathml, key, shift_key, control_key, alt_key, meta_key);
478
0
        })
479
0
    }));
480
0
    return report_any_panic(result);
481
0
}
482
483
/// Given a navigation command, the current node is moved accordingly.
484
/// This is a higher level interface than `do_navigate_keypress` for applications that want to interpret the keys themselves.
485
/// The valid commands are:
486
/// * Standard move commands:
487
///   `MovePrevious`, `MoveNext`, `MoveStart`, `MoveEnd`, `MoveLineStart`, `MoveLineEnd`
488
/// * Movement in a table or elementary math:
489
///   `MoveCellPrevious`, `MoveCellNext`, `MoveCellUp`, `MoveCellDown`, `MoveColumnStart`, `MoveColumnEnd`
490
/// * Moving into children or out to parents:
491
///   `ZoomIn`, `ZoomOut`, `ZoomOutAll`, `ZoomInAll`
492
/// * Undo the last movement command:
493
///   `MoveLastLocation`
494
/// * Read commands (standard speech):
495
///   `ReadPrevious`, `ReadNext`, `ReadCurrent`, `ReadCellCurrent`, `ReadStart`, `ReadEnd`, `ReadLineStart`, `ReadLineEnd`
496
/// * Describe commands (overview):
497
///   `DescribePrevious`, `DescribeNext`, `DescribeCurrent`
498
/// * Location information:
499
///   `WhereAmI`, `WhereAmIAll`
500
/// * Change navigation modes (circle up/down):
501
///   `ToggleZoomLockUp`, `ToggleZoomLockDown`
502
/// * Speak the current navigation mode
503
///   `ToggleSpeakMode`
504
///
505
/// There are 10 place markers that can be set/read/described or moved to.
506
/// * Setting:
507
///   `SetPlacemarker0`, `SetPlacemarker1`, `SetPlacemarker2`, `SetPlacemarker3`, `SetPlacemarker4`, `SetPlacemarker5`, `SetPlacemarker6`, `SetPlacemarker7`, `SetPlacemarker8`, `SetPlacemarker9`
508
/// * Reading:
509
///   `Read0`, `Read1`, `Read2`, `Read3`, `Read4`, `Read5`, `Read6`, `Read7`, `Read8`, `Read9`
510
/// * Describing:
511
///   `Describe0`, `Describe1`, `Describe2`, `Describe3`, `Describe4`, `Describe5`, `Describe6`, `Describe7`, `Describe8`, `Describe9`
512
/// * Moving:
513
///   `MoveTo0`, `MoveTo1`, `MoveTo2`, `MoveTo3`, `MoveTo4`, `MoveTo5`, `MoveTo6`, `MoveTo7`, `MoveTo8`, `MoveTo9`
514
///
515
/// When done with Navigation, call with `Exit`
516
0
pub fn do_navigate_command(command: impl AsRef<str>) -> Result<String> {
517
0
    enable_logs();
518
0
    let command = command.as_ref().to_string();
519
0
    let result = catch_unwind(AssertUnwindSafe(|| {
520
0
        let cmd = NAV_COMMANDS.get_key(&command); // gets a &'static version of the command
521
0
        if cmd.is_none() {
522
0
            bail!("Unknown command in call to DoNavigateCommand()");
523
0
        };
524
0
        let cmd = *cmd.unwrap();
525
0
        MATHML_INSTANCE.with(|package_instance| {
526
0
            let package_instance = package_instance.borrow();
527
0
            let mathml = get_element(&package_instance);
528
0
            return do_navigate_command_string(mathml, cmd);
529
0
        })
530
0
    }));
531
0
    return report_any_panic(result);
532
0
}
533
534
/// Given an 'id' and an offset (for tokens), set the navigation node to that id.
535
/// An error is returned if the 'id' doesn't exist
536
2
pub fn set_navigation_node(id: impl AsRef<str>, offset: usize) -> Result<()> {
537
2
    enable_logs();
538
2
    let id = id.as_ref().to_string();
539
2
    let result = catch_unwind(AssertUnwindSafe(|| {
540
2
        MATHML_INSTANCE.with(|package_instance| {
541
2
            let package_instance = package_instance.borrow();
542
2
            let mathml = get_element(&package_instance);
543
2
            return set_navigation_node_from_id(mathml, &id, offset);
544
2
        })
545
2
    }));
546
2
    return report_any_panic(result);
547
2
}
548
549
/// Return the MathML associated with the current (navigation) node and the offset (0-based) from that mathml (not yet implemented)
550
/// The offset is needed for token elements that have multiple characters.
551
0
pub fn get_navigation_mathml() -> Result<(String, usize)> {
552
0
    enable_logs();
553
0
    let result = catch_unwind(AssertUnwindSafe(|| {
554
0
        MATHML_INSTANCE.with(|package_instance| {
555
0
            let package_instance = package_instance.borrow();
556
0
            let mathml = get_element(&package_instance);
557
0
            return NAVIGATION_STATE.with(|nav_stack| {
558
0
                return match nav_stack.borrow_mut().get_navigation_mathml(mathml) {
559
0
                    Err(e) => Err(e),
560
0
                    Ok((found, offset)) => Ok((mml_to_string(found), offset)),
561
                };
562
0
            });
563
0
        })
564
0
    }));
565
0
    return report_any_panic(result);
566
0
}
567
568
/// Return the `id` and `offset` (0-based) associated with the current (navigation) node.
569
/// `offset` (not yet implemented)
570
/// The offset is needed for token elements that have multiple characters.
571
2
pub fn get_navigation_mathml_id() -> Result<(String, usize)> {
572
2
    enable_logs();
573
2
    let result = catch_unwind(AssertUnwindSafe(|| {
574
2
        MATHML_INSTANCE.with(|package_instance| {
575
2
            let package_instance = package_instance.borrow();
576
2
            let mathml = get_element(&package_instance);
577
2
            return Ok(NAVIGATION_STATE.with(|nav_stack| {
578
2
                return nav_stack.borrow().get_navigation_mathml_id(mathml);
579
2
            }));
580
2
        })
581
2
    }));
582
2
    return report_any_panic(result);
583
2
}
584
585
/// Return the start and end braille character positions associated with the current (navigation) node.
586
2
pub fn get_braille_position() -> Result<(usize, usize)> {
587
2
    enable_logs();
588
2
    let result = catch_unwind(AssertUnwindSafe(|| {
589
2
        MATHML_INSTANCE.with(|package_instance| {
590
2
            let package_instance = package_instance.borrow();
591
2
            let mathml = get_element(&package_instance);
592
2
            let nav_node = get_navigation_mathml_id()
?0
;
593
2
            let (_, start, end) = crate::braille::braille_mathml(mathml, &nav_node.0)
?0
;
594
2
            return Ok((start, end));
595
2
        })
596
2
    }));
597
2
    return report_any_panic(result);
598
2
}
599
600
/// Given a 0-based braille position, return the smallest MathML node enclosing it.
601
/// This node might be a leaf with an offset.
602
91
pub fn get_navigation_node_from_braille_position(position: usize) -> Result<(String, usize)> {
603
91
    enable_logs();
604
91
    let result = catch_unwind(AssertUnwindSafe(|| {
605
91
        MATHML_INSTANCE.with(|package_instance| {
606
91
            let package_instance = package_instance.borrow();
607
91
            let mathml = get_element(&package_instance);
608
91
            return crate::braille::get_navigation_node_from_braille_position(mathml, position);
609
91
        })
610
91
    }));
611
91
    return report_any_panic(result);
612
91
}
613
614
0
pub fn get_supported_braille_codes() -> Result<Vec<String>> {
615
0
    enable_logs();
616
0
    let result = catch_unwind(AssertUnwindSafe(|| {
617
0
        let rules_dir = crate::prefs::PreferenceManager::get().borrow().get_rules_dir();
618
0
        let braille_dir = rules_dir.join("Braille");
619
0
        let mut braille_code_paths = Vec::new();
620
621
0
        find_all_dirs_shim(&braille_dir, &mut braille_code_paths);
622
0
        let mut braille_code_paths = braille_code_paths.iter()
623
0
                        .map(|path| path.strip_prefix(&braille_dir).unwrap().to_string_lossy().to_string())
624
0
                        .filter(|string_path| !string_path.is_empty() )
625
0
                        .collect::<Vec<String>>();
626
0
        braille_code_paths.sort();
627
628
0
        Ok(braille_code_paths)
629
0
    }));
630
0
    return report_any_panic(result);
631
0
 }
632
633
/// Returns a Vec of all supported languages ("en", "es", ...)
634
1
pub fn get_supported_languages() -> Result<Vec<String>> {
635
1
    enable_logs();
636
1
    let result = catch_unwind(AssertUnwindSafe(|| {
637
1
        let rules_dir = crate::prefs::PreferenceManager::get().borrow().get_rules_dir();
638
1
        let lang_dir = rules_dir.join("Languages");
639
1
        let mut lang_paths = Vec::new();
640
641
1
        find_all_dirs_shim(&lang_dir, &mut lang_paths);
642
1
        let mut language_paths = lang_paths.iter()
643
13
                        .
map1
(|path| path.strip_prefix(&lang_dir).unwrap()
644
13
                                                  .to_string_lossy()
645
13
                                                  .replace(std::path::MAIN_SEPARATOR, "-")
646
13
                                                  .to_string())
647
13
                        .
filter1
(|string_path| !string_path.is_empty() )
648
1
                        .collect::<Vec<String>>();
649
650
        // make sure the 'zz' test dir isn't included (build.rs removes it, but for debugging is there)
651
13
        
language_paths1
.
retain1
(|s| !s.starts_with("zz"));
652
1
        language_paths.sort();
653
1
        Ok(language_paths)
654
1
    }));
655
1
    return report_any_panic(result);
656
1
 }
657
658
0
 pub fn get_supported_speech_styles(lang: impl AsRef<str>) -> Result<Vec<String>> {
659
0
    enable_logs();
660
0
    let lang = lang.as_ref().to_string();
661
0
    let result = catch_unwind(AssertUnwindSafe(|| {
662
0
        let rules_dir = crate::prefs::PreferenceManager::get().borrow().get_rules_dir();
663
0
        let lang_dir = rules_dir.join("Languages").join(&lang);
664
0
        let mut speech_styles = find_files_in_dir_that_ends_with_shim(&lang_dir, "_Rules.yaml");
665
0
        for file_name in &mut speech_styles {
666
0
            file_name.truncate(file_name.len() - "_Rules.yaml".len())
667
        }
668
0
        speech_styles.sort();
669
0
        speech_styles.dedup(); // remove duplicates -- shouldn't be any, but just in case
670
0
        Ok(speech_styles)
671
0
    }));
672
0
    return report_any_panic(result);
673
0
 }
674
675
// utility functions
676
677
/// Copy (recursively) the (MathML) element and return the new one.
678
/// The Element type does not copy and modifying the structure of an element's child will modify the element, so we need a copy
679
/// Convert the returned error from set_mathml, etc., to a useful string for display
680
363
pub fn copy_mathml(mathml: Element) -> Element {
681
363
    return copy_mathml_recursive(mathml, 0);
682
363
}
683
684
4.53k
fn copy_mathml_recursive(mathml: Element, depth: usize) -> Element {
685
    // Safety: Prevent stack overflow on deeply nested MathML
686
4.53k
    if depth > MAX_DEPTH {
687
        // Return the element as a leaf if it's too deep to prevent crash
688
0
        return create_mathml_element(&mathml.document(), name(mathml));
689
4.53k
    }
690
691
    // If it represents MathML, the 'Element' can only have Text and Element children along with attributes
692
4.53k
    let children = mathml.children();
693
4.53k
    let new_mathml = create_mathml_element(&mathml.document(), name(mathml));
694
9.52k
    
mathml.attributes().iter()4.53k
.
for_each4.53k
(|attr| {
695
9.52k
        new_mathml.set_attribute_value(attr.name(), attr.value());
696
9.52k
    });
697
698
    // can't use is_leaf/as_text because this is also used with the intent tree
699
4.53k
    if children.len() == 1 &&
700
3.26k
       let Some(
text2.59k
) = children[0].text() {
701
2.59k
        new_mathml.set_text(text.text());
702
2.59k
        return new_mathml;
703
1.93k
        }
704
705
1.93k
    let mut new_children = Vec::with_capacity(children.len());
706
4.17k
    for child in 
children1.93k
{
707
4.17k
        let child = as_element(child);
708
4.17k
        let new_child = copy_mathml_recursive(child, depth + 1);
709
4.17k
        new_children.push(new_child);
710
4.17k
    }
711
1.93k
    new_mathml.append_children(new_children);
712
1.93k
    return new_mathml;
713
4.53k
}
714
715
0
pub fn errors_to_string(e: &Error) -> String {
716
0
    enable_logs();
717
0
    let mut result = format!("{e}\n");
718
0
    for cause in e.chain().skip(1) { // skips original error
719
0
        result += &format!("caused by: {cause}\n");
720
0
    }
721
0
    result
722
0
}
723
724
4.91k
fn add_ids(mathml: Element) -> Element {
725
    use std::time::SystemTime;
726
4.91k
    let time = if cfg!(target_family = "wasm") {
727
0
        fastrand::usize(..)
728
    } else {
729
4.91k
        SystemTime::now()
730
4.91k
            .duration_since(SystemTime::UNIX_EPOCH)
731
4.91k
            .unwrap()
732
4.91k
            .as_millis() as usize
733
    };
734
4.91k
    let mut time_part = radix_fmt::radix(time, 36).to_string();
735
4.91k
    if time_part.len() < 3 {
736
0
        time_part.push_str("a2c");      // needs to be at least three chars
737
4.91k
    }
738
4.91k
    let mut random_part = radix_fmt::radix(fastrand::u32(..), 36).to_string();
739
4.91k
    if random_part.len() < 4 {
740
0
        random_part.push_str("a1b2");      // needs to be at least four chars
741
4.91k
    }
742
4.91k
    let prefix = "M".to_string() + &time_part[time_part.len() - 3..] + &random_part[random_part.len() - 4..] + "-"; // begin with letter
743
4.91k
    add_ids_to_all(mathml, &prefix, 0);
744
4.91k
    return mathml;
745
746
57.8k
    fn add_ids_to_all(mathml: Element, id_prefix: &str, count: usize) -> usize {
747
57.8k
        let mut count = count;
748
57.8k
        if mathml.attribute("id").is_none() {
749
57.1k
            mathml.set_attribute_value("id", (id_prefix.to_string() + &count.to_string()).as_str());
750
57.1k
            mathml.set_attribute_value("data-id-added", "true");
751
57.1k
            count += 1;
752
57.1k
        
}707
;
753
754
57.8k
        if crate::xpath_functions::is_leaf(mathml) {
755
35.8k
            return count;
756
22.0k
        }
757
758
52.9k
        for child in 
mathml22.0k
.
children22.0k
() {
759
52.9k
            let child = as_element(child);
760
52.9k
            count = add_ids_to_all(child, id_prefix, count);
761
52.9k
        }
762
22.0k
        return count;
763
57.8k
    }
764
4.91k
}
765
766
10.3k
pub fn get_element(package: &Package) -> Element<'_> {
767
10.3k
    enable_logs();
768
10.3k
    let doc = package.as_document();
769
10.3k
    let mut result = None;
770
10.3k
    for root_child in doc.root().children() {
771
10.3k
        if let ChildOfRoot::Element(e) = root_child {
772
10.3k
            assert!(result.is_none());
773
10.3k
            result = Some(e);
774
0
        }
775
    }
776
10.3k
    return result.unwrap();
777
10.3k
}
778
779
/// Get the intent after setting the MathML
780
/// Used in testing
781
#[allow(dead_code)]
782
32
pub fn get_intent<'a>(mathml: Element<'a>, doc: Document<'a>) -> Result<Element<'a>> {
783
32
    crate::speech::SPEECH_RULES.with(|rules|  rules.borrow_mut().read_files().unwrap());
784
32
    let mathml = cleanup_mathml(mathml)
?0
;
785
32
    return crate::speech::intent_from_mathml(mathml, doc);
786
32
}
787
788
#[allow(dead_code)]
789
22
fn trim_doc(doc: &Document) {
790
22
    for root_child in doc.root().children() {
791
22
        if let ChildOfRoot::Element(e) = root_child {
792
22
            trim_element(e, false);
793
22
        } else {
794
0
            doc.root().remove_child(root_child); // comment or processing instruction
795
0
        }
796
    }
797
22
}
798
799
/// Not really meant to be public -- used by tests in some packages
800
55.5k
pub fn trim_element(e: Element, allow_structure_in_leaves: bool) {
801
    // "<mtext>this is text</mtext" results in 3 text children
802
    // these are combined into one child as it makes code downstream simpler
803
804
    // space, tab, newline, carriage return all get collapsed to a single space
805
    const WHITESPACE: &[char] = &[' ', '\u{0009}', '\u{000A}','\u{000C}', '\u{000D}'];
806
3
    static WHITESPACE_MATCH: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"[ \u{0009}\u{000A}\u{00C}\u{000D}]+"#).unwrap());
807
808
55.5k
    if is_leaf(e) && (
!allow_structure_in_leaves34.7k
||
IsNode::is_mathml230
(
e230
)) {
809
        // Assume it is HTML inside of the leaf -- turn the HTML into a string
810
34.7k
        make_leaf_element(e);
811
34.7k
        return;
812
20.7k
    }
813
814
20.7k
    let mut single_text = "".to_string();
815
87.6k
    for child in 
e20.7k
.
children20.7k
() {
816
87.6k
        match child {
817
50.1k
            ChildOfElement::Element(c) => {
818
50.1k
                trim_element(c, allow_structure_in_leaves);
819
50.1k
            }
820
37.4k
            ChildOfElement::Text(t) => {
821
37.4k
                single_text += t.text();
822
37.4k
                e.remove_child(child);
823
37.4k
            }
824
21
            _ => {
825
21
                e.remove_child(child);
826
21
            }
827
        }
828
    }
829
830
    // CSS considers only space, tab, linefeed, and carriage return as collapsable whitespace
831
20.7k
    if !(is_leaf(e) || name(e) == "intent-literal" || single_text.is_empty()) {
832
        // intent-literal comes from testing intent
833
        // FIX: we have a problem -- what should happen???
834
        // FIX: For now, just keep the children and ignore the text and log an error -- shouldn't panic/crash
835
14.3k
        if !single_text.trim_matches(WHITESPACE).is_empty() {
836
20
            error!(
837
                "trim_element: both element and textual children which shouldn't happen -- ignoring text '{single_text}'"
838
            );
839
14.2k
        }
840
14.3k
        return;
841
6.44k
    }
842
6.44k
    if e.children().is_empty() && 
!single_text.is_empty()276
{
843
0
        // debug!("Combining text in {}: '{}' -> '{}'", e.name().local_part(), single_text, trimmed_text);
844
0
        e.set_text(&WHITESPACE_MATCH.replace_all(&single_text, " "));
845
6.44k
    }
846
847
34.7k
    fn make_leaf_element(mathml_leaf: Element) {
848
        // MathML leaves like <mn> really shouldn't have non-textual content, but you could have embedded HTML
849
        // Here, we convert them to leaves by grabbing up all the text and making that the content
850
        // Potentially, we leave them and let (default) rules do something, but it makes other parts of the code
851
        //   messier because checking the text of a leaf becomes Option<&str> rather than just &str
852
34.7k
        let children = mathml_leaf.children();
853
34.7k
        if children.is_empty() {
854
503
            return;
855
34.2k
        }
856
857
34.2k
        if rewrite_and_flatten_embedded_mathml(mathml_leaf) {
858
1
            return;
859
34.2k
        }
860
861
        // gather up the text
862
34.2k
        let mut text = "".to_string();
863
34.6k
        for child in 
children34.2k
{
864
34.6k
            let child_text = match child {
865
8
                ChildOfElement::Element(child) => {
866
8
                    if name(child) == "mglyph" {
867
3
                        child.attribute_value("alt").unwrap_or("").to_string()
868
                    } else {
869
5
                        gather_text(child)
870
                    }
871
                }
872
34.4k
                ChildOfElement::Text(t) => {
873
                    // debug!("ChildOfElement::Text: '{}'", t.text());
874
34.4k
                    t.text().to_string()
875
                }
876
222
                _ => "".to_string(),
877
            };
878
34.6k
            if !child_text.is_empty() {
879
34.4k
                text += &child_text;
880
34.4k
            
}223
881
        }
882
883
        // get rid of the old children and replace with the text we just built
884
34.2k
        mathml_leaf.clear_children();
885
34.2k
        mathml_leaf.set_text(WHITESPACE_MATCH.replace_all(&text, " ").trim_matches(WHITESPACE));
886
        // debug!("make_leaf_element: text is '{}'", crate::canonicalize::as_text(mathml_leaf));
887
888
        /// gather up all the contents of the element and return them with a leading space
889
7
        fn gather_text(html: Element) -> String {
890
7
            let mut text = "".to_string(); // since we are throwing out the element tag, add a space between the contents
891
7
            for child in html.children() {
892
7
                match child {
893
2
                    ChildOfElement::Element(child) => {
894
2
                        text += &gather_text(child);
895
2
                    }
896
5
                    ChildOfElement::Text(t) => text += t.text(),
897
0
                    _ => (),
898
                }
899
            }
900
            // debug!("gather_text: '{}'", text);
901
7
            return text;
902
7
        }
903
34.7k
    }
904
905
34.2k
    fn rewrite_and_flatten_embedded_mathml(mathml_leaf: Element) -> bool {
906
        // first see if it can or needs to be rewritten
907
        // this is likely rare, so we do a check and if true, to a second pass building the result
908
34.2k
        let mut needs_rewrite = false;
909
34.6k
        for child in 
mathml_leaf34.2k
.
children34.2k
() {
910
34.6k
            if let Some(
element8
) = child.element() {
911
8
                if name(element) != "math" {
912
7
                    return false; // something other than MathML as a child -- can't rewrite
913
1
                }
914
1
                needs_rewrite = true;
915
34.6k
            }
916
        };
917
918
34.2k
        if !needs_rewrite {
919
34.2k
            return false;
920
1
        }
921
922
        // now do the rewrite, flatting out the mathml and returning an mrow with the children
923
1
        let leaf_name = name(mathml_leaf);
924
1
        let doc = mathml_leaf.document();
925
1
        let mut new_children = Vec::new();
926
1
        let mut is_last_mtext = false;
927
5
        for child in 
mathml_leaf1
.
children1
() {
928
5
            if let Some(
element1
) = child.element() {
929
1
                trim_element(element, true);
930
1
                new_children.append(&mut element.children());   // don't want 'math' wrapper
931
1
                is_last_mtext = false;
932
4
            } else if let Some(text) = child.text() {
933
                // combine adjacent text nodes into single nodes
934
4
                if is_last_mtext {
935
2
                    let last_child = new_children.last_mut().unwrap().element().unwrap();
936
2
                    let new_text = as_text(last_child).to_string() + text.text();
937
2
                    last_child.set_text(&new_text);
938
2
                } else {
939
2
                    let new_leaf_node = create_mathml_element(&doc, leaf_name);
940
2
                    new_leaf_node.set_text(text.text());
941
2
                    new_children.push(ChildOfElement::Element(new_leaf_node));
942
2
                    is_last_mtext = true;
943
2
                }
944
0
            }
945
        };
946
947
        // clean up whitespace in text nodes
948
3
        for child in 
&mut new_children1
{
949
3
            if let Some(element) = child.element() && is_leaf(element) {
950
2
                let text = as_text(element);
951
2
                let cleaned_text = WHITESPACE_MATCH.replace_all(text, " ").trim_matches(WHITESPACE).to_string();
952
2
                element.set_text(&cleaned_text);
953
2
            
}1
954
        }
955
        
956
1
        crate::canonicalize::set_mathml_name(mathml_leaf, "mrow");
957
1
        mathml_leaf.clear_children();
958
1
        mathml_leaf.append_children(new_children);
959
960
        // debug!("rewrite_and_flatten_embedded_mathml: flattened\n'{}'", mml_to_string(mathml_leaf));
961
1
        return true;
962
34.2k
    }
963
55.5k
}
964
965
// used for testing trim
966
/// returns Ok() if two Documents are equal or some info where they differ in the Err
967
#[allow(dead_code)]
968
11
fn is_same_doc(doc1: &Document, doc2: &Document) -> Result<()> {
969
    // assume 'e' doesn't have element children until proven otherwise
970
    // this means we keep Text children until we are proven they aren't needed
971
11
    if doc1.root().children().len() != doc2.root().children().len() {
972
0
        bail!(
973
            "Children of docs have {} != {} children",
974
0
            doc1.root().children().len(),
975
0
            doc2.root().children().len()
976
        );
977
11
    }
978
979
11
    for (i, (c1, c2)) in doc1
980
11
        .root()
981
11
        .children()
982
11
        .iter()
983
11
        .zip(doc2.root().children().iter())
984
11
        .enumerate()
985
    {
986
11
        match c1 {
987
11
            ChildOfRoot::Element(e1) => {
988
11
                if let ChildOfRoot::Element(e2) = c2 {
989
11
                    is_same_element(*e1, *e2, &[])
?1
;
990
                } else {
991
0
                    bail!("child #{}, first is element, second is something else", i);
992
                }
993
            }
994
0
            ChildOfRoot::Comment(com1) => {
995
0
                if let ChildOfRoot::Comment(com2) = c2 {
996
0
                    if com1.text() != com2.text() {
997
0
                        bail!("child #{} -- comment text differs", i);
998
0
                    }
999
                } else {
1000
0
                    bail!("child #{}, first is comment, second is something else", i);
1001
                }
1002
            }
1003
0
            ChildOfRoot::ProcessingInstruction(p1) => {
1004
0
                if let ChildOfRoot::ProcessingInstruction(p2) = c2 {
1005
0
                    if p1.target() != p2.target() || p1.value() != p2.value() {
1006
0
                        bail!("child #{} -- processing instruction differs", i);
1007
0
                    }
1008
                } else {
1009
0
                    bail!(
1010
                        "child #{}, first is processing instruction, second is something else",
1011
                        i
1012
                    );
1013
                }
1014
            }
1015
        }
1016
    }
1017
10
    return Ok(());
1018
11
}
1019
1020
/// returns Ok() if two Documents are equal or some info where they differ in the Err
1021
// Not really meant to be public -- used by tests in some packages
1022
#[allow(dead_code)]
1023
1.92k
pub fn is_same_element(e1: Element, e2: Element, ignore_attrs: &[&str]) -> Result<()> {
1024
1.92k
    enable_logs();
1025
1.92k
    if name(e1) != name(e2) {
1026
0
        bail!("Names not the same: {}, {}", name(e1), name(e2));
1027
1.92k
    }
1028
1029
    // assume 'e' doesn't have element children until proven otherwise
1030
    // this means we keep Text children until we are proven they aren't needed
1031
1.92k
    if e1.children().len() != e2.children().len() {
1032
0
        bail!(
1033
            "Children of {} have {} != {} children",
1034
0
            name(e1),
1035
0
            e1.children().len(),
1036
0
            e2.children().len()
1037
        );
1038
1.92k
    }
1039
1040
1.92k
    if let Err(
e0
) = attrs_are_same(e1.attributes(), e2.attributes(), ignore_attrs) {
1041
0
        bail!("In element {}, {}", name(e1), e);
1042
1.92k
    }
1043
1044
2.86k
    for (i, (c1, c2)) in 
e1.children().iter()1.92k
.
zip1.92k
(
e2.children().iter()1.92k
).
enumerate1.92k
() {
1045
2.86k
        match c1 {
1046
1.72k
            ChildOfElement::Element(child1) => {
1047
1.72k
                if let ChildOfElement::Element(child2) = c2 {
1048
1.72k
                    is_same_element(*child1, *child2, ignore_attrs)
?2
;
1049
                } else {
1050
0
                    bail!("{} child #{}, first is element, second is something else", name(e1), i);
1051
                }
1052
            }
1053
0
            ChildOfElement::Comment(com1) => {
1054
0
                if let ChildOfElement::Comment(com2) = c2 {
1055
0
                    if com1.text() != com2.text() {
1056
0
                        bail!("{} child #{} -- comment text differs", name(e1), i);
1057
0
                    }
1058
                } else {
1059
0
                    bail!("{} child #{}, first is comment, second is something else", name(e1), i);
1060
                }
1061
            }
1062
0
            ChildOfElement::ProcessingInstruction(p1) => {
1063
0
                if let ChildOfElement::ProcessingInstruction(p2) = c2 {
1064
0
                    if p1.target() != p2.target() || p1.value() != p2.value() {
1065
0
                        bail!("{} child #{} -- processing instruction differs", name(e1), i);
1066
0
                    }
1067
                } else {
1068
0
                    bail!(
1069
                        "{} child #{}, first is processing instruction, second is something else",
1070
0
                        name(e1),
1071
                        i
1072
                    );
1073
                }
1074
            }
1075
1.14k
            ChildOfElement::Text(t1) => {
1076
1.14k
                if let ChildOfElement::Text(t2) = c2 {
1077
1.14k
                    if t1.text() != t2.text() {
1078
1
                        bail!("{} child #{} --  text differs", name(e1), i);
1079
1.14k
                    }
1080
                } else {
1081
0
                    bail!("{} child #{}, first is text, second is something else", name(e1), i);
1082
                }
1083
            }
1084
        }
1085
    }
1086
1.91k
    return Ok(());
1087
1088
    /// compares attributes -- '==' didn't seems to work
1089
1.92k
    fn attrs_are_same(attrs1: Vec<Attribute>, attrs2: Vec<Attribute>, ignore: &[&str]) -> Result<()> {
1090
1.92k
        let attrs1 = attrs1.iter()
1091
1.92k
                .filter(|a| !
ignore1.40k
.
contains1.40k
(
&a.name().local_part()1.40k
)).cloned()
1092
1.92k
                .collect::<Vec<Attribute>>();
1093
1.92k
        let attrs2 = attrs2.iter()
1094
1.92k
                .filter(|a| !
ignore1.40k
.
contains1.40k
(
&a.name().local_part()1.40k
)).cloned()
1095
1.92k
                .collect::<Vec<Attribute>>();
1096
1.92k
        if attrs1.len() != attrs2.len() {
1097
0
            bail!("Attributes have different length: {:?} != {:?}", attrs1, attrs2);
1098
1.92k
        }
1099
        // can't guarantee attrs are in the same order
1100
1.92k
        for 
attr11.40k
in attrs1 {
1101
1.40k
            if let Some(found_attr2) = attrs2
1102
1.40k
                .iter()
1103
1.88k
                .
find1.40k
(|&attr2| attr1.name().local_part() == attr2.name().local_part())
1104
            {
1105
1.40k
                if attr1.value() == found_attr2.value() {
1106
1.40k
                    continue;
1107
                } else {
1108
0
                    bail!(
1109
                        "Attribute named {} has differing values:\n  '{}'\n  '{}'",
1110
0
                        attr1.name().local_part(),
1111
0
                        attr1.value(),
1112
0
                        found_attr2.value()
1113
                    );
1114
                }
1115
            } else {
1116
0
                bail!(
1117
                    "Attribute name {} not in [{}]",
1118
0
                    print_attr(&attr1),
1119
0
                    print_attrs(&attrs2)
1120
                );
1121
            }
1122
        }
1123
1.92k
        return Ok(());
1124
1125
0
        fn print_attr(attr: &Attribute) -> String {
1126
0
            return format!("@{}='{}'", attr.name().local_part(), attr.value());
1127
0
        }
1128
0
        fn print_attrs(attrs: &[Attribute]) -> String {
1129
0
            return attrs.iter().map(print_attr).collect::<Vec<String>>().join(", ");
1130
0
        }
1131
1.92k
    }
1132
1.92k
}
1133
1134
#[cfg(test)]
1135
mod tests {
1136
    #[allow(unused_imports)]
1137
    use super::super::init_logger;
1138
    use super::*;
1139
1140
10
    fn are_parsed_strs_equal(test: &str, target: &str) -> bool {
1141
10
        let test_package = &parser::parse(test).expect("Failed to parse input");
1142
10
        let test_doc = test_package.as_document();
1143
10
        trim_doc(&test_doc);
1144
10
        debug!("test:\n{}", 
mml_to_string0
(
get_element0
(
test_package0
)));
1145
1146
10
        let target_package = &parser::parse(target).expect("Failed to parse input");
1147
10
        let target_doc = target_package.as_document();
1148
10
        trim_doc(&target_doc);
1149
10
        debug!("target:\n{}", 
mml_to_string0
(
get_element0
(
target_package0
)));
1150
1151
10
        match is_same_doc(&test_doc, &target_doc) {
1152
10
            Ok(_) => return true,
1153
0
            Err(e) => panic!("{}", e),
1154
        }
1155
10
    }
1156
1157
    #[test]
1158
1
    fn trim_same() {
1159
1
        let trimmed_str = "<math><mrow><mo>-</mo><mi>a</mi></mrow></math>";
1160
1
        assert!(are_parsed_strs_equal(trimmed_str, trimmed_str));
1161
1
    }
1162
1163
    #[test]
1164
1
    fn trim_whitespace() {
1165
1
        let trimmed_str = "<math><mrow><mo>-</mo><mi> a </mi></mrow></math>";
1166
1
        let whitespace_str = "<math> <mrow ><mo>-</mo><mi> a </mi></mrow ></math>";
1167
1
        assert!(are_parsed_strs_equal(trimmed_str, whitespace_str));
1168
1
    }
1169
1170
    #[test]
1171
1
    fn no_trim_whitespace_nbsp() {
1172
1
        let trimmed_str = "<math><mrow><mo>-</mo><mtext> &#x00A0;a </mtext></mrow></math>";
1173
1
        let whitespace_str = "<math> <mrow ><mo>-</mo><mtext> &#x00A0;a </mtext></mrow ></math>";
1174
1
        assert!(are_parsed_strs_equal(trimmed_str, whitespace_str));
1175
1
    }
1176
1177
    #[test]
1178
1
    fn trim_comment() {
1179
1
        let whitespace_str = "<math> <mrow ><mo>-</mo><mi> a </mi></mrow ></math>";
1180
1
        let comment_str = "<math><mrow><mo>-</mo><!--a comment --><mi> a </mi></mrow></math>";
1181
1
        assert!(are_parsed_strs_equal(comment_str, whitespace_str));
1182
1
    }
1183
1184
    #[test]
1185
1
    fn replace_mglyph() {
1186
1
        let mglyph_str = "<math>
1187
1
                <mrow>
1188
1
                    <mi>X<mglyph fontfamily='my-braid-font' index='2' alt='23braid' /></mi>
1189
1
                    <mo>+</mo>
1190
1
                    <mi>
1191
1
                        <mglyph fontfamily='my-braid-font' index='5' alt='132braid' />Y
1192
1
                    </mi>
1193
1
                    <mo>=</mo>
1194
1
                    <mi>
1195
1
                        <mglyph fontfamily='my-braid-font' index='3' alt='13braid' />
1196
1
                    </mi>
1197
1
                </mrow>
1198
1
            </math>";
1199
1
        let result_str = "<math>
1200
1
            <mrow>
1201
1
                <mi>X23braid</mi>
1202
1
                <mo>+</mo>
1203
1
                <mi>132braidY</mi>
1204
1
                <mo>=</mo>
1205
1
                <mi>13braid</mi>
1206
1
            </mrow>
1207
1
        </math>";
1208
1
        assert!(are_parsed_strs_equal(mglyph_str, result_str));
1209
1
    }
1210
1211
    #[test]
1212
1
    fn trim_differs() {
1213
1
        let whitespace_str = "<math> <mrow ><mo>-</mo><mi> a </mi></mrow ></math>";
1214
1
        let different_str = "<math> <mrow ><mo>-</mo><mi> b </mi></mrow ></math>";
1215
1216
        // need to manually do this since failure shouldn't be a panic
1217
1
        let package1 = &parser::parse(whitespace_str).expect("Failed to parse input");
1218
1
        let doc1 = package1.as_document();
1219
1
        trim_doc(&doc1);
1220
1
        debug!("doc1:\n{}", 
mml_to_string0
(
get_element0
(
package10
)));
1221
1222
1
        let package2 = parser::parse(different_str).expect("Failed to parse input");
1223
1
        let doc2 = package2.as_document();
1224
1
        trim_doc(&doc2);
1225
1
        debug!("doc2:\n{}", 
mml_to_string0
(
get_element0
(
&package20
)));
1226
1227
1
        assert!(is_same_doc(&doc1, &doc2).is_err());
1228
1
    }
1229
1230
    #[test]
1231
1
    fn test_entities() {
1232
        // this forces initialization
1233
1
        set_rules_dir(super::super::abs_rules_dir_path()).unwrap();
1234
1235
1
        let entity_str = set_mathml("<math><mrow><mo>&minus;</mo><mi>&mopf;</mi></mrow></math>").unwrap();
1236
1
        let converted_str =
1237
1
            set_mathml("<math><mrow><mo>&#x02212;</mo><mi>&#x1D55E;</mi></mrow></math>").unwrap();
1238
1239
        // need to remove unique ids
1240
1
        static ID_MATCH: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"id='.+?' "#).unwrap());
1241
1
        let entity_str = ID_MATCH.replace_all(&entity_str, "");
1242
1
        let converted_str = ID_MATCH.replace_all(&converted_str, "");
1243
1
        assert_eq!(entity_str, converted_str, "normal entity test failed");
1244
1245
1
        let entity_str = set_mathml(
1246
            "<math data-quot=\"&quot;value&quot;\" data-apos='&apos;value&apos;'><mi>XXX</mi></math>",
1247
        )
1248
1
        .unwrap();
1249
1
        let converted_str =
1250
1
            set_mathml("<math data-quot='\"value\"' data-apos=\"'value'\"><mi>XXX</mi></math>").unwrap();
1251
1
        let entity_str = ID_MATCH.replace_all(&entity_str, "");
1252
1
        let converted_str = ID_MATCH.replace_all(&converted_str, "");
1253
1
        assert_eq!(entity_str, converted_str, "special entities quote test failed");
1254
1255
1
        let entity_str =
1256
1
            set_mathml("<math><mo>&lt;</mo><mo>&gt;</mo><mtext>&amp;lt;</mtext></math>").unwrap();
1257
1
        let converted_str =
1258
1
            set_mathml("<math><mo>&#x003C;</mo><mo>&#x003E;</mo><mtext>&#x0026;lt;</mtext></math>")
1259
1
                .unwrap();
1260
1
        let entity_str = ID_MATCH.replace_all(&entity_str, "");
1261
1
        let converted_str = ID_MATCH.replace_all(&converted_str, "");
1262
1
        assert_eq!(entity_str, converted_str, "special entities <,>,& test failed");
1263
1
    }
1264
1265
    #[test]
1266
1
    fn can_recover_from_invalid_set_rules_dir() {
1267
        use std::env;
1268
        // MathCAT will check the env var "MathCATRulesDir" as an override, so the following test might succeed if we don't override the env var
1269
1
        unsafe { env::set_var("MathCATRulesDir", "MathCATRulesDir"); }   // safe because we are single threaded
1270
1
        assert!(set_rules_dir("someInvalidRulesDir").is_err());
1271
1
        assert!(
1272
1
            set_rules_dir(super::super::abs_rules_dir_path()).is_ok(),
1273
            "\nset_rules_dir to '{}' failed",
1274
0
            super::super::abs_rules_dir_path()
1275
        );
1276
1
        assert!(set_mathml("<math><mn>1</mn></math>").is_ok());
1277
1
    }
1278
1279
    #[test]
1280
1
    fn single_html_in_mtext() {
1281
1
        let test = "<math><mn>1</mn> <mtext>a<p> para  1</p>bc</mtext> <mi>y</mi></math>";
1282
1
        let target = "<math><mn>1</mn> <mtext>a para 1bc</mtext> <mi>y</mi></math>";
1283
1
        assert!(are_parsed_strs_equal(test, target));
1284
1
    }
1285
1286
    #[test]
1287
1
    fn multiple_html_in_mtext() {
1288
1
        let test = "<math><mn>1</mn> <mtext>a<p>para 1</p> <p>para 2</p>bc  </mtext> <mi>y</mi></math>";
1289
1
        let target = "<math><mn>1</mn> <mtext>apara 1 para 2bc</mtext> <mi>y</mi></math>";
1290
1
        assert!(are_parsed_strs_equal(test, target));
1291
1
    }
1292
1293
    #[test]
1294
1
    fn nested_html_in_mtext() {
1295
1
        let test = "<math><mn>1</mn> <mtext>a <ol><li>first</li><li>second</li></ol> bc</mtext> <mi>y</mi></math>";
1296
1
        let target = "<math><mn>1</mn> <mtext>a firstsecond bc</mtext> <mi>y</mi></math>";
1297
1
        assert!(are_parsed_strs_equal(test, target));
1298
1
    }
1299
1300
    #[test]
1301
1
    fn empty_html_in_mtext() {
1302
1
        let test = "<math><mn>1</mn> <mtext>a<br/>bc</mtext> <mi>y</mi></math>";
1303
1
        let target = "<math><mn>1</mn> <mtext>abc</mtext> <mi>y</mi></math>";
1304
1
        assert!(are_parsed_strs_equal(test, target));
1305
1
    }
1306
1307
    #[test]
1308
1
    fn mathml_in_mtext() {
1309
1
        let test = "<math><mtext>if&#xa0;<math> <msup><mi>n</mi><mn>2</mn></msup></math>&#xa0;is real</mtext></math>";
1310
1
        let target = "<math><mrow><mtext>if&#xa0;</mtext><msup><mi>n</mi><mn>2</mn></msup><mtext>&#xa0;is real</mtext></mrow></math>";
1311
1
        assert!(are_parsed_strs_equal(test, target));
1312
1
    }
1313
1314
    #[test]
1315
1
    fn stack_overflow_protection() {
1316
1
        set_rules_dir(super::super::abs_rules_dir_path()).unwrap();
1317
1
        let mut bad_mathml = String::from("<math>");
1318
513
        for _ in 
0..MAX_DEPTH+11
{
1319
513
            bad_mathml.push_str("<msqrt><mi>n</mi>");
1320
513
        }
1321
513
        for _ in 
0..MAX_DEPTH+11
{
1322
513
            bad_mathml.push_str("</msqrt>");
1323
513
        }
1324
1
        bad_mathml.push_str("</math>");
1325
1
        assert_eq!(set_mathml(bad_mathml).unwrap_err().to_string(), "MathML is too deeply nested to process");
1326
1
    }
1327
1328
    #[test]
1329
1
    fn old_mathml_cleared_on_error() {
1330
1
        set_rules_dir(super::super::abs_rules_dir_path()).unwrap();
1331
1
        let good_mathml = "<math><mn>3</mn></math>";
1332
1
        set_mathml(good_mathml).unwrap();
1333
1
        let bad_mathml = "<math><mi>&xabc;</mi></math>";
1334
1
        assert!(set_mathml(bad_mathml).is_err());
1335
1
        assert!(get_spoken_text().unwrap() == "");
1336
1
        set_mathml(good_mathml).unwrap();
1337
1
        let bad_mathml = "<math>garbage";
1338
1
        assert!(set_mathml(bad_mathml).is_err());
1339
1
        assert!(get_spoken_text().unwrap() == "");
1340
1
    }
1341
}
\ No newline at end of file +

Coverage Report

Created: 2026-05-04 09:37

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/home/runner/work/MathCAT/MathCAT/src/interface.rs
Line
Count
Source
1
//! The interface module provides functionality both for calling from an API and also running the code from `main`.
2
//!
3
#![allow(non_snake_case)]
4
#![allow(clippy::needless_return)]
5
use std::cell::RefCell;
6
use std::sync::LazyLock;
7
8
use crate::canonicalize::{as_text, create_mathml_element};
9
use crate::errors::*;
10
use phf::phf_map;
11
use regex::{Captures, Regex};
12
use sxd_document::dom::{Element, Document, ChildOfRoot, ChildOfElement, Attribute};
13
use sxd_document::parser;
14
use sxd_document::Package;
15
16
use crate::canonicalize::{as_element, name};
17
use crate::shim_filesystem::{find_all_dirs_shim, find_files_in_dir_that_ends_with_shim};
18
use log::{debug, error};
19
20
use crate::navigate::*;
21
use crate::pretty_print::mml_to_string;
22
use crate::xpath_functions::{is_leaf, IsNode};
23
use std::panic::{catch_unwind, AssertUnwindSafe};
24
25
/// Maximum depth to prevent stack overflow on deeply nested MathML
26
pub const MAX_DEPTH: usize = 512;
27
28
#[cfg(feature = "enable-logs")]
29
use std::sync::Once;
30
#[cfg(feature = "enable-logs")]
31
static INIT: Once = Once::new();
32
33
45.0k
fn enable_logs() {
34
    #[cfg(feature = "enable-logs")]
35
    INIT.call_once(||{
36
        #[cfg(target_os = "android")]
37
        {
38
            use log::*;
39
            use android_logger::*;
40
        
41
            android_logger::init_once(
42
                Config::default()
43
                .with_max_level(LevelFilter::Trace)
44
                .with_tag("MathCat")
45
            );    
46
            trace!("Activated Android logger!");  
47
        }    
48
    });
49
45.0k
}
50
51
// For getting a message from a panic
52
thread_local! {
53
    // Stores (Message, File, Line)
54
    static PANIC_INFO: RefCell<Option<(String, String, u32)>> = const { RefCell::new(None) };
55
}
56
57
/// Initialize the panic handler to catch panics and store the message, file, and line number in `PANIC_INFO`.
58
13.5k
pub fn init_panic_handler() {
59
    use std::panic;
60
61
13.5k
    panic::set_hook(Box::new(|info| 
{1
62
1
        let location = info.location()
63
1
            .map(|l| format!("{}:{}", l.file(), l.line()))
64
1
            .unwrap_or_else(|| 
"unknown"0
.
to_string0
());
65
66
1
        let payload = info.payload();
67
1
        let msg = if let Some(
s0
) = payload.downcast_ref::<&'static str>() {
68
0
            s.to_string()
69
1
        } else if let Some(s) = payload.downcast_ref::<String>() {
70
1
            s.clone()
71
        } else {
72
0
            "Unknown panic payload".to_string()
73
        };
74
75
        // Use try_with/try_borrow_mut to ensure the hook never panics itself
76
1
        let _ = PANIC_INFO.try_with(|cell| {
77
1
            if let Ok(mut slot) = cell.try_borrow_mut() {
78
1
                *slot = Some((msg, location, 0));
79
1
            
}0
80
1
        });
81
1
    }));
82
13.5k
}
83
84
41.1k
pub fn report_any_panic<T>(result: Result<Result<T, Error>, Box<dyn std::any::Any + Send>>) -> Result<T, Error> {
85
41.1k
    match result {
86
41.1k
        Ok(val) => val,
87
        Err(_) => {
88
            // Retrieve the smuggled info
89
1
            let details = PANIC_INFO.with(|cell| cell.borrow_mut().take());
90
            
91
1
            if let Some((msg, file, line)) = details {
92
1
                Err(anyhow::anyhow!(
93
1
                    "MathCAT crash! Please report the following information: '{}' at {}:{}",
94
1
                    msg, file, line
95
1
                ))
96
            } else {
97
0
                Err(anyhow::anyhow!("MathCAT crash! -- please report"))
98
            }
99
        }
100
    }
101
41.1k
} 
102
103
// wrap up some common functionality between the call from 'main' and AT
104
4.91k
fn cleanup_mathml(mathml: Element) -> Result<Element> {
105
4.91k
    trim_element(mathml, false);
106
4.91k
    let 
mathml4.91k
= crate::canonicalize::canonicalize(mathml)
?1
;
107
4.91k
    let mathml = add_ids(mathml);
108
4.91k
    return Ok(mathml);
109
4.91k
}
110
111
thread_local! {
112
    /// The current node being navigated (also spoken and brailled) is stored in `MATHML_INSTANCE`.
113
    pub static MATHML_INSTANCE: RefCell<Package> = init_mathml_instance();
114
}
115
116
3.92k
fn init_mathml_instance() -> RefCell<Package> {
117
3.92k
    let package = parser::parse("<math></math>")
118
3.92k
        .expect("Internal error in 'init_mathml_instance;: didn't parse initializer string");
119
3.92k
    return RefCell::new(package);
120
3.92k
}
121
122
/// Set the Rules directory
123
/// IMPORTANT: this should be the very first call to MathCAT. If 'dir' is an empty string, the environment var 'MathCATRulesDir' is tried.
124
5.08k
pub fn set_rules_dir(dir: impl AsRef<str>) -> Result<()> {
125
5.08k
    enable_logs();
126
5.08k
    init_panic_handler();
127
5.08k
    let dir = dir.as_ref().to_string();
128
5.08k
    let result = catch_unwind(AssertUnwindSafe(|| {
129
        use std::path::PathBuf;
130
5.08k
        let dir_os = if dir.is_empty() {
131
0
            std::env::var_os("MathCATRulesDir").unwrap_or_default()
132
        } else {
133
5.08k
            std::ffi::OsString::from(&dir)
134
        };
135
5.08k
        let pref_manager = crate::prefs::PreferenceManager::get();
136
5.08k
        pref_manager.borrow_mut().initialize(PathBuf::from(dir_os))
137
5.08k
    }));
138
5.08k
    return report_any_panic(result);
139
5.08k
}
140
141
/// Returns the version number (from Cargo.toml) of the build
142
0
pub fn get_version() -> String {
143
0
    enable_logs();
144
    const VERSION: &str = env!("CARGO_PKG_VERSION");
145
0
    return VERSION.to_string();
146
0
}
147
148
/// This will override any previous MathML that was set.
149
/// This returns canonical MathML with 'id's set on any node that doesn't have an id.
150
/// The ids can be used for sync highlighting if the `Bookmark` API preference is true.
151
4.88k
pub fn set_mathml(mathml_str: impl AsRef<str>) -> Result<String> {
152
4.88k
    enable_logs();
153
    // if these are present when resent to MathJaX, MathJaX crashes (https://github.com/mathjax/MathJax/issues/2822)
154
3
    static MATHJAX_V2: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"class *= *['"]MJX-.*?['"]"#).unwrap());
155
3
    static MATHJAX_V3: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"class *= *['"]data-mjx-.*?['"]"#).unwrap());
156
157
    // Strip out processing instructions and comments -- these are not MathML and can cause DOS problems in the parser
158
3
    static PROCESSING_INSTRUCTION: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"<\?[\s\S]{1,2048}\?>"#).unwrap());
159
3
    static XML_COMMENT: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"(?s)"#).unwrap());
160
161
    // These have some length limits to avoid DOS attacks via long strings
162
3
    static NAMESPACE_DECL: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"xmlns:[[:alpha:]]{1,32}"#).unwrap());
163
3
    static PREFIX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"(</?)[[:alpha:]]{1,32}:"#).unwrap());
164
3
    static HTML_ENTITIES: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"&([a-zA-Z]{2,10});"#).unwrap());
165
4.88k
    let result = catch_unwind(AssertUnwindSafe(|| {
166
4.88k
        NAVIGATION_STATE.with(|nav_stack| {
167
4.88k
            nav_stack.borrow_mut().reset();
168
4.88k
        });
169
170
        // We need the main definitions files to be read in so canonicalize can work.
171
        // This call reads all of them for the current preferences, but that's ok since they will likely be used
172
4.88k
        crate::speech::SPEECH_RULES.with(|rules| rules.borrow_mut().read_files())
?0
;
173
174
4.88k
        let mathml_str = mathml_str.as_ref();
175
        // Safety guard: Reject strings > 1MB to prevent DoS/Stack issues
176
4.88k
        if mathml_str.len() > 1024 * 1024 {
177
0
            bail!("MathML string of size {} bytes exceeds length limit of 1MB", mathml_str.len());
178
4.88k
        }
179
180
4.88k
        return MATHML_INSTANCE.with(|old_package| {
181
            static HTML_ENTITIES_MAPPING: phf::Map<&str, &str> = include!("entities.in");
182
183
4.88k
            let mut error_message = "".to_string(); // can't return a result inside the replace_all, so we do this hack of setting the message and then returning the error
184
                                                                     
185
4.88k
            let mathml_str = XML_COMMENT.replace_all(mathml_str, "");
186
4.88k
            let mathml_str = PROCESSING_INSTRUCTION.replace_all(&mathml_str, "");
187
            // FIX: need to deal with character data and convert to something the parser knows
188
4.88k
            let mathml_str = HTML_ENTITIES.replace_all(&mathml_str, |cap: &Captures| match 
HTML_ENTITIES_MAPPING96
.
get96
(&cap[1]) {
189
                    None => {
190
1
                        error_message = format!("No entity named '{}'", &cap[0]);
191
1
                        cap[0].to_string()
192
                    }
193
95
                    Some(&ch) => ch.to_string(),
194
96
                });
195
196
4.88k
            if !error_message.is_empty() {
197
                // Clear stale state so subsequent API calls do not return previous user's data (security issue)
198
1
                old_package.replace(parser::parse("<math></math>").unwrap());
199
1
                bail!(error_message);
200
4.88k
            }
201
4.88k
            let mathml_str = MATHJAX_V2.replace_all(&mathml_str, "");
202
4.88k
            let mathml_str = MATHJAX_V3.replace_all(&mathml_str, "");
203
204
            // the speech rules use the xpath "name" function and that includes the prefix
205
            // getting rid of the prefix properly probably involves a recursive replacement in the tree
206
            // if the prefix is used, it is almost certainly something like "m" or "mml", so this cheat will work.
207
4.88k
            let mathml_str = NAMESPACE_DECL.replace(&mathml_str, "xmlns"); // do this before the PREFIX replace!
208
4.88k
            let mathml_str = PREFIX.replace_all(&mathml_str, "$1");
209
210
4.88k
            let new_package = parser::parse(&mathml_str);
211
4.88k
            if let Err(
e1
) = new_package {
212
                // Clear stale state so subsequent API calls do not return previous user's data (security issue)
213
1
                old_package.replace(parser::parse("<math></math>").unwrap());
214
1
                bail!("Invalid MathML input:\n{}\nError is: {}", &mathml_str, &e.to_string());
215
4.88k
            }
216
217
4.88k
            let new_package = new_package.unwrap();
218
4.88k
            let mathml = get_element(&new_package);
219
4.88k
            let 
mathml4.88k
= cleanup_mathml(mathml)
?1
;
220
4.88k
            let mathml_string = mml_to_string(mathml);
221
4.88k
            old_package.replace(new_package);
222
223
4.88k
            return Ok(mathml_string);
224
4.88k
        });
225
4.88k
    }));
226
227
4.88k
    return report_any_panic(result);
228
4.88k
}
229
230
/// Get the spoken text of the MathML that was set.
231
/// The speech takes into account any AT or user preferences.
232
3.46k
pub fn get_spoken_text() -> Result<String> {
233
3.46k
    enable_logs();
234
3.46k
    let result = catch_unwind(AssertUnwindSafe(|| {
235
3.46k
        MATHML_INSTANCE.with(|package_instance| {
236
3.46k
            let package_instance = package_instance.borrow();
237
3.46k
            let mathml = get_element(&package_instance);
238
3.46k
            let new_package = Package::new();
239
3.46k
            let intent = crate::speech::intent_from_mathml(mathml, new_package.as_document())
?0
;
240
3.46k
            debug!("Intent tree:\n{}", 
mml_to_string0
(
intent0
));
241
3.46k
            let speech = crate::speech::speak_mathml(intent, "", 0)
?0
;
242
3.46k
            return Ok(speech);
243
3.46k
        })
244
3.46k
    }));
245
3.46k
    return report_any_panic(result);
246
3.46k
}
247
248
/// Get the spoken text for an overview of the MathML that was set.
249
/// The speech takes into account any AT or user preferences.
250
/// Note: this implementation for is currently minimal and should not be used.
251
0
pub fn get_overview_text() -> Result<String> {
252
0
    enable_logs();
253
0
    let result = catch_unwind(AssertUnwindSafe(|| {
254
0
        MATHML_INSTANCE.with(|package_instance| {
255
0
            let package_instance = package_instance.borrow();
256
0
            let mathml = get_element(&package_instance);
257
0
            let speech = crate::speech::overview_mathml(mathml, "", 0)?;
258
0
            return Ok(speech);
259
0
        })
260
0
    }));
261
0
    return report_any_panic(result);
262
0
}
263
264
/// Get the value of the named preference.
265
/// None is returned if `name` is not a known preference.
266
100
pub fn get_preference(name: impl AsRef<str>) -> Result<String> {
267
100
    enable_logs();
268
100
    let name = name.as_ref().to_string();
269
100
    let result = catch_unwind(AssertUnwindSafe(|| {
270
        use crate::prefs::NO_PREFERENCE;
271
100
        crate::speech::SPEECH_RULES.with(|rules| {
272
100
            let rules = rules.borrow();
273
100
            let pref_manager = rules.pref_manager.borrow();
274
100
            let mut value = pref_manager.pref_to_string(&name);
275
100
            if value == NO_PREFERENCE {
276
1
                value = pref_manager.pref_to_string(&name);
277
99
            }
278
100
            if value == NO_PREFERENCE {
279
1
                bail!("No preference named '{}'", name);
280
            } else {
281
99
                return Ok(value);
282
            }
283
100
        })
284
100
    }));
285
100
    return report_any_panic(result);
286
100
}
287
288
/// Set a MathCAT preference. The preference name should be a known preference name.
289
/// The value should either be a string or a number (depending upon the preference being set)
290
/// The list of known user preferences is in the MathCAT user documentation.
291
/// Here are common preferences set by programs (not settable by the user):
292
/// * TTS -- SSML, SAPI5, None
293
/// * Pitch -- normalized at '1.0'
294
/// * Rate -- words per minute (should match current speech rate).
295
///   There is a separate "MathRate" that is user settable that causes a relative percentage change from this rate.
296
/// * Volume -- default 100
297
/// * Voice -- set a voice to use (not implemented)
298
/// * Gender -- set pick any voice of the given gender (not implemented)
299
/// * Bookmark -- set to `true` if a `mark`/`bookmark` should be part of the returned speech (used for sync highlighting)
300
///
301
/// Important: both the preference name and value are case-sensitive
302
///
303
/// This function can be called multiple times to set different values.
304
/// The values are persistent and extend beyond calls to [`set_mathml`].
305
/// A value can be overwritten by calling this function again with a different value.
306
///
307
/// Be careful setting preferences -- these potentially override user settings, so only preferences that really need setting should be set.
308
17.7k
pub fn set_preference(name: impl AsRef<str>, value: impl AsRef<str>) -> Result<()> {
309
17.7k
    enable_logs();
310
17.7k
    let name = name.as_ref().to_string();
311
17.7k
    let value = value.as_ref().to_string();
312
17.7k
    let result = catch_unwind(AssertUnwindSafe(|| {
313
17.7k
        set_preference_impl(&name, &value)
314
17.7k
    }));
315
17.7k
    return report_any_panic(result);
316
17.7k
}
317
318
17.7k
fn set_preference_impl(name: &str, value: &str) -> Result<()> {
319
17.7k
    let mut value = value.to_string();
320
17.7k
    if name == "Language" || 
name == "LanguageAuto"12.7k
{
321
        // check the format
322
5.02k
        if value != "Auto" {
323
            // could get es, es-419, or en-us-nyc ...  we only care about the first two parts so we clean it up a little
324
5.02k
            let mut lang_country_split = value.split('-');
325
5.02k
            let language = lang_country_split.next().unwrap_or("");
326
5.02k
            let country = lang_country_split.next().unwrap_or("");
327
5.02k
            if language.len() != 2 {
328
0
                bail!(
329
                    "Improper format for 'Language' preference '{}'. Should be of form 'en' or 'en-gb'",
330
                    value
331
                );
332
5.02k
            }
333
5.02k
            let mut new_lang_country = language.to_string(); // need a temp value because 'country' is borrowed from 'value' above
334
5.02k
            if !country.is_empty() {
335
321
                new_lang_country.push('-');
336
321
                new_lang_country.push_str(country);
337
4.70k
            }
338
5.02k
            value = new_lang_country;
339
0
        }
340
5.02k
        if name == "LanguageAuto" && 
value == "Auto"0
{
341
0
            bail!("'LanguageAuto' can not have the value 'Auto'");
342
5.02k
        }
343
12.7k
    }
344
345
17.7k
    crate::speech::SPEECH_RULES.with(|rules| {
346
17.7k
        let rules = rules.borrow_mut();
347
17.7k
        if let Some(
error_string0
) = rules.get_error() {
348
0
            bail!("{}", error_string);
349
17.7k
        }
350
351
        // we set the value even if it was the same as the old value because this might override a potentially changed future user value
352
17.7k
        let mut pref_manager = rules.pref_manager.borrow_mut();
353
17.7k
        if name == "LanguageAuto" {
354
0
            let language_pref = pref_manager.pref_to_string("Language");
355
0
            if language_pref != "Auto" {
356
0
                bail!(
357
                    "'LanguageAuto' can only be used when 'Language' has the value 'Auto'; Language={}",
358
                    language_pref
359
                );
360
0
            }
361
17.7k
        }
362
17.7k
        let lower_case_value = value.to_lowercase();
363
17.7k
        if lower_case_value == "true" || 
lower_case_value == "false"17.6k
{
364
1.50k
            pref_manager.set_api_boolean_pref(name, value.to_lowercase() == "true");
365
1.50k
        } else {
366
16.2k
            match name {
367
16.2k
                "Pitch" | "Rate" | "Volume" | "CapitalLetters_Pitch" | "MathRate" | "PauseFactor" => {
368
0
                    pref_manager.set_api_float_pref(name, to_float(name, &value)?)
369
                }
370
                _ => {
371
16.2k
                    pref_manager.set_string_pref(name, &value)
?0
;
372
                }
373
            }
374
        };
375
17.7k
        return Ok::<(), Error>(());
376
17.7k
    })
?0
;
377
378
17.7k
    return Ok(());
379
17.7k
}
380
381
0
fn to_float(name: &str, value: &str) -> Result<f64> {
382
0
    return match value.parse::<f64>() {
383
0
        Ok(val) => Ok(val),
384
0
        Err(_) => bail!("SetPreference: preference'{}'s value '{}' must be a float", name, value),
385
    };
386
0
}
387
388
/// Get the braille associated with the MathML that was set by [`set_mathml`].
389
/// The braille returned depends upon the preference for the `code` preference (default `Nemeth`).
390
/// If 'nav_node_id' is given, it is highlighted based on the value of `BrailleNavHighlight` (default: `EndPoints`)
391
1.36k
pub fn get_braille(nav_node_id: impl AsRef<str>) -> Result<String> {
392
1.36k
    enable_logs();
393
1.36k
    let nav_node_id = nav_node_id.as_ref().to_string();
394
1.36k
    let result = catch_unwind(AssertUnwindSafe(|| {
395
1.36k
        MATHML_INSTANCE.with(|package_instance| {
396
1.36k
            let package_instance = package_instance.borrow();
397
1.36k
            let mathml = get_element(&package_instance);
398
1.36k
            let braille = crate::braille::braille_mathml(mathml, &nav_node_id)
?0
.0;
399
1.36k
            return Ok(braille);
400
1.36k
        })
401
1.36k
    }));
402
1.36k
    return report_any_panic(result);
403
1.36k
}
404
405
/// Get the braille associated with the current navigation focus of the MathML that was set by [`set_mathml`].
406
/// The braille returned depends upon the preference for the `code` preference (default `Nemeth`).
407
/// The returned braille is brailled as if the current navigation focus is the entire expression to be brailled.
408
0
pub fn get_navigation_braille() -> Result<String> {
409
0
    enable_logs();
410
0
    let result = catch_unwind(AssertUnwindSafe(|| {
411
0
        MATHML_INSTANCE.with(|package_instance| {
412
0
            let package_instance = package_instance.borrow();
413
0
            let mathml = get_element(&package_instance);
414
0
            let new_package = Package::new(); // used if we need to create a new tree
415
0
            let new_doc = new_package.as_document();
416
0
            let nav_mathml = NAVIGATION_STATE.with(|nav_stack| {
417
0
                return match nav_stack.borrow_mut().get_navigation_mathml(mathml) {
418
0
                    Err(e) => Err(e),
419
0
                    Ok((found, offset)) => {
420
                        // get the MathML node and wrap it inside of a <math> element
421
                        // if the offset is given, we need to get the character it references
422
0
                        if offset == 0 {
423
0
                            if name(found) == "math" {
424
0
                                Ok(found)
425
                            } else {
426
0
                                let new_mathml = create_mathml_element(&new_doc, "math");
427
0
                                new_mathml.append_child(copy_mathml(found));
428
0
                                new_doc.root().append_child(new_mathml);
429
0
                                Ok(new_mathml)
430
                            }
431
0
                        } else if !is_leaf(found) {
432
0
                            bail!(
433
                                "Internal error: non-zero offset '{}' on a non-leaf element '{}'",
434
                                offset,
435
0
                                name(found)
436
                            );
437
0
                        } else if let Some(ch) = as_text(found).chars().nth(offset) {
438
0
                            let internal_mathml = create_mathml_element(&new_doc, name(found));
439
0
                            internal_mathml.set_text(&ch.to_string());
440
0
                            let new_mathml = create_mathml_element(&new_doc, "math");
441
0
                            new_mathml.append_child(internal_mathml);
442
0
                            new_doc.root().append_child(new_mathml);
443
0
                            Ok(new_mathml)
444
                        } else {
445
0
                            bail!(
446
                                "Internal error: offset '{}' on leaf element '{}' doesn't exist",
447
                                offset,
448
0
                                mml_to_string(found)
449
                            );
450
                        }
451
                    }
452
                };
453
0
            })?;
454
455
0
            let braille = crate::braille::braille_mathml(nav_mathml, "")?.0;
456
0
            return Ok(braille);
457
0
        })
458
0
    }));
459
0
    return report_any_panic(result);
460
0
}
461
462
/// Given a key code along with the modifier keys, the current node is moved accordingly (or value reported in some cases).
463
/// `key` is the [keycode](https://developer.mozilla.org/en-US/docs/Web/API/KeyboardEvent/keyCode#constants_for_keycode_value) for the key (in JavaScript, `ev.key_code`)
464
/// The spoken text for the new current node is returned.
465
0
pub fn do_navigate_keypress(
466
0
    key: usize,
467
0
    shift_key: bool,
468
0
    control_key: bool,
469
0
    alt_key: bool,
470
0
    meta_key: bool,
471
0
) -> Result<String> {
472
0
    enable_logs();
473
0
    let result = catch_unwind(AssertUnwindSafe(|| {
474
0
        MATHML_INSTANCE.with(|package_instance| {
475
0
            let package_instance = package_instance.borrow();
476
0
            let mathml = get_element(&package_instance);
477
0
            return do_mathml_navigate_key_press(mathml, key, shift_key, control_key, alt_key, meta_key);
478
0
        })
479
0
    }));
480
0
    return report_any_panic(result);
481
0
}
482
483
/// Given a navigation command, the current node is moved accordingly.
484
/// This is a higher level interface than `do_navigate_keypress` for applications that want to interpret the keys themselves.
485
/// The valid commands are:
486
/// * Standard move commands:
487
///   `MovePrevious`, `MoveNext`, `MoveStart`, `MoveEnd`, `MoveLineStart`, `MoveLineEnd`
488
/// * Movement in a table or elementary math:
489
///   `MoveCellPrevious`, `MoveCellNext`, `MoveCellUp`, `MoveCellDown`, `MoveColumnStart`, `MoveColumnEnd`
490
/// * Moving into children or out to parents:
491
///   `ZoomIn`, `ZoomOut`, `ZoomOutAll`, `ZoomInAll`
492
/// * Undo the last movement command:
493
///   `MoveLastLocation`
494
/// * Read commands (standard speech):
495
///   `ReadPrevious`, `ReadNext`, `ReadCurrent`, `ReadCellCurrent`, `ReadStart`, `ReadEnd`, `ReadLineStart`, `ReadLineEnd`
496
/// * Describe commands (overview):
497
///   `DescribePrevious`, `DescribeNext`, `DescribeCurrent`
498
/// * Location information:
499
///   `WhereAmI`, `WhereAmIAll`
500
/// * Change navigation modes (circle up/down):
501
///   `ToggleZoomLockUp`, `ToggleZoomLockDown`
502
/// * Speak the current navigation mode
503
///   `ToggleSpeakMode`
504
///
505
/// There are 10 place markers that can be set/read/described or moved to.
506
/// * Setting:
507
///   `SetPlacemarker0`, `SetPlacemarker1`, `SetPlacemarker2`, `SetPlacemarker3`, `SetPlacemarker4`, `SetPlacemarker5`, `SetPlacemarker6`, `SetPlacemarker7`, `SetPlacemarker8`, `SetPlacemarker9`
508
/// * Reading:
509
///   `Read0`, `Read1`, `Read2`, `Read3`, `Read4`, `Read5`, `Read6`, `Read7`, `Read8`, `Read9`
510
/// * Describing:
511
///   `Describe0`, `Describe1`, `Describe2`, `Describe3`, `Describe4`, `Describe5`, `Describe6`, `Describe7`, `Describe8`, `Describe9`
512
/// * Moving:
513
///   `MoveTo0`, `MoveTo1`, `MoveTo2`, `MoveTo3`, `MoveTo4`, `MoveTo5`, `MoveTo6`, `MoveTo7`, `MoveTo8`, `MoveTo9`
514
///
515
/// When done with Navigation, call with `Exit`
516
0
pub fn do_navigate_command(command: impl AsRef<str>) -> Result<String> {
517
0
    enable_logs();
518
0
    let command = command.as_ref().to_string();
519
0
    let result = catch_unwind(AssertUnwindSafe(|| {
520
0
        let cmd = NAV_COMMANDS.get_key(&command); // gets a &'static version of the command
521
0
        if cmd.is_none() {
522
0
            bail!("Unknown command in call to DoNavigateCommand()");
523
0
        };
524
0
        let cmd = *cmd.unwrap();
525
0
        MATHML_INSTANCE.with(|package_instance| {
526
0
            let package_instance = package_instance.borrow();
527
0
            let mathml = get_element(&package_instance);
528
0
            return do_navigate_command_string(mathml, cmd);
529
0
        })
530
0
    }));
531
0
    return report_any_panic(result);
532
0
}
533
534
/// Given an 'id' and an offset (for tokens), set the navigation node to that id.
535
/// An error is returned if the 'id' doesn't exist
536
2
pub fn set_navigation_node(id: impl AsRef<str>, offset: usize) -> Result<()> {
537
2
    enable_logs();
538
2
    let id = id.as_ref().to_string();
539
2
    let result = catch_unwind(AssertUnwindSafe(|| {
540
2
        MATHML_INSTANCE.with(|package_instance| {
541
2
            let package_instance = package_instance.borrow();
542
2
            let mathml = get_element(&package_instance);
543
2
            return set_navigation_node_from_id(mathml, &id, offset);
544
2
        })
545
2
    }));
546
2
    return report_any_panic(result);
547
2
}
548
549
/// Return the MathML associated with the current (navigation) node and the offset (0-based) from that mathml (not yet implemented)
550
/// The offset is needed for token elements that have multiple characters.
551
0
pub fn get_navigation_mathml() -> Result<(String, usize)> {
552
0
    enable_logs();
553
0
    let result = catch_unwind(AssertUnwindSafe(|| {
554
0
        MATHML_INSTANCE.with(|package_instance| {
555
0
            let package_instance = package_instance.borrow();
556
0
            let mathml = get_element(&package_instance);
557
0
            return NAVIGATION_STATE.with(|nav_stack| {
558
0
                return match nav_stack.borrow_mut().get_navigation_mathml(mathml) {
559
0
                    Err(e) => Err(e),
560
0
                    Ok((found, offset)) => Ok((mml_to_string(found), offset)),
561
                };
562
0
            });
563
0
        })
564
0
    }));
565
0
    return report_any_panic(result);
566
0
}
567
568
/// Return the `id` and `offset` (0-based) associated with the current (navigation) node.
569
/// `offset` (not yet implemented)
570
/// The offset is needed for token elements that have multiple characters.
571
2
pub fn get_navigation_mathml_id() -> Result<(String, usize)> {
572
2
    enable_logs();
573
2
    let result = catch_unwind(AssertUnwindSafe(|| {
574
2
        MATHML_INSTANCE.with(|package_instance| {
575
2
            let package_instance = package_instance.borrow();
576
2
            let mathml = get_element(&package_instance);
577
2
            return Ok(NAVIGATION_STATE.with(|nav_stack| {
578
2
                return nav_stack.borrow().get_navigation_mathml_id(mathml);
579
2
            }));
580
2
        })
581
2
    }));
582
2
    return report_any_panic(result);
583
2
}
584
585
/// Return the start and end braille character positions associated with the current (navigation) node.
586
2
pub fn get_braille_position() -> Result<(usize, usize)> {
587
2
    enable_logs();
588
2
    let result = catch_unwind(AssertUnwindSafe(|| {
589
2
        MATHML_INSTANCE.with(|package_instance| {
590
2
            let package_instance = package_instance.borrow();
591
2
            let mathml = get_element(&package_instance);
592
2
            let nav_node = get_navigation_mathml_id()
?0
;
593
2
            let (_, start, end) = crate::braille::braille_mathml(mathml, &nav_node.0)
?0
;
594
2
            return Ok((start, end));
595
2
        })
596
2
    }));
597
2
    return report_any_panic(result);
598
2
}
599
600
/// Given a 0-based braille position, return the smallest MathML node enclosing it.
601
/// This node might be a leaf with an offset.
602
91
pub fn get_navigation_node_from_braille_position(position: usize) -> Result<(String, usize)> {
603
91
    enable_logs();
604
91
    let result = catch_unwind(AssertUnwindSafe(|| {
605
91
        MATHML_INSTANCE.with(|package_instance| {
606
91
            let package_instance = package_instance.borrow();
607
91
            let mathml = get_element(&package_instance);
608
91
            return crate::braille::get_navigation_node_from_braille_position(mathml, position);
609
91
        })
610
91
    }));
611
91
    return report_any_panic(result);
612
91
}
613
614
0
pub fn get_supported_braille_codes() -> Result<Vec<String>> {
615
0
    enable_logs();
616
0
    let result = catch_unwind(AssertUnwindSafe(|| {
617
0
        let rules_dir = crate::prefs::PreferenceManager::get().borrow().get_rules_dir();
618
0
        let braille_dir = rules_dir.join("Braille");
619
0
        let mut braille_code_paths = Vec::new();
620
621
0
        find_all_dirs_shim(&braille_dir, &mut braille_code_paths);
622
0
        let mut braille_code_paths = braille_code_paths.iter()
623
0
                        .map(|path| path.strip_prefix(&braille_dir).unwrap().to_string_lossy().to_string())
624
0
                        .filter(|string_path| !string_path.is_empty() )
625
0
                        .collect::<Vec<String>>();
626
0
        braille_code_paths.sort();
627
628
0
        Ok(braille_code_paths)
629
0
    }));
630
0
    return report_any_panic(result);
631
0
 }
632
633
/// Returns a Vec of all supported languages ("en", "es", ...)
634
1
pub fn get_supported_languages() -> Result<Vec<String>> {
635
1
    enable_logs();
636
1
    let result = catch_unwind(AssertUnwindSafe(|| {
637
1
        let rules_dir = crate::prefs::PreferenceManager::get().borrow().get_rules_dir();
638
1
        let lang_dir = rules_dir.join("Languages");
639
1
        let mut lang_paths = Vec::new();
640
641
1
        find_all_dirs_shim(&lang_dir, &mut lang_paths);
642
1
        let mut language_paths = lang_paths.iter()
643
13
                        .
map1
(|path| path.strip_prefix(&lang_dir).unwrap()
644
13
                                                  .to_string_lossy()
645
13
                                                  .replace(std::path::MAIN_SEPARATOR, "-")
646
13
                                                  .to_string())
647
13
                        .
filter1
(|string_path| !string_path.is_empty() )
648
1
                        .collect::<Vec<String>>();
649
650
        // make sure the 'zz' test dir isn't included (build.rs removes it, but for debugging is there)
651
13
        
language_paths1
.
retain1
(|s| !s.starts_with("zz"));
652
1
        language_paths.sort();
653
1
        Ok(language_paths)
654
1
    }));
655
1
    return report_any_panic(result);
656
1
 }
657
658
0
 pub fn get_supported_speech_styles(lang: impl AsRef<str>) -> Result<Vec<String>> {
659
0
    enable_logs();
660
0
    let lang = lang.as_ref().to_string();
661
0
    let result = catch_unwind(AssertUnwindSafe(|| {
662
0
        let rules_dir = crate::prefs::PreferenceManager::get().borrow().get_rules_dir();
663
0
        let lang_dir = rules_dir.join("Languages").join(&lang);
664
0
        let mut speech_styles = find_files_in_dir_that_ends_with_shim(&lang_dir, "_Rules.yaml");
665
0
        for file_name in &mut speech_styles {
666
0
            file_name.truncate(file_name.len() - "_Rules.yaml".len())
667
        }
668
0
        speech_styles.sort();
669
0
        speech_styles.dedup(); // remove duplicates -- shouldn't be any, but just in case
670
0
        Ok(speech_styles)
671
0
    }));
672
0
    return report_any_panic(result);
673
0
 }
674
675
// utility functions
676
677
/// Copy (recursively) the (MathML) element and return the new one.
678
/// The Element type does not copy and modifying the structure of an element's child will modify the element, so we need a copy
679
/// Convert the returned error from set_mathml, etc., to a useful string for display
680
363
pub fn copy_mathml(mathml: Element) -> Element {
681
363
    return copy_mathml_recursive(mathml, 0);
682
363
}
683
684
4.53k
fn copy_mathml_recursive(mathml: Element, depth: usize) -> Element {
685
    // Safety: Prevent stack overflow on deeply nested MathML
686
4.53k
    if depth > MAX_DEPTH {
687
        // Return the element as a leaf if it's too deep to prevent crash
688
0
        return create_mathml_element(&mathml.document(), name(mathml));
689
4.53k
    }
690
691
    // If it represents MathML, the 'Element' can only have Text and Element children along with attributes
692
4.53k
    let children = mathml.children();
693
4.53k
    let new_mathml = create_mathml_element(&mathml.document(), name(mathml));
694
9.52k
    
mathml.attributes().iter()4.53k
.
for_each4.53k
(|attr| {
695
9.52k
        new_mathml.set_attribute_value(attr.name(), attr.value());
696
9.52k
    });
697
698
    // can't use is_leaf/as_text because this is also used with the intent tree
699
4.53k
    if children.len() == 1 &&
700
3.26k
       let Some(
text2.59k
) = children[0].text() {
701
2.59k
        new_mathml.set_text(text.text());
702
2.59k
        return new_mathml;
703
1.93k
        }
704
705
1.93k
    let mut new_children = Vec::with_capacity(children.len());
706
4.17k
    for child in 
children1.93k
{
707
4.17k
        let child = as_element(child);
708
4.17k
        let new_child = copy_mathml_recursive(child, depth + 1);
709
4.17k
        new_children.push(new_child);
710
4.17k
    }
711
1.93k
    new_mathml.append_children(new_children);
712
1.93k
    return new_mathml;
713
4.53k
}
714
715
0
pub fn errors_to_string(e: &Error) -> String {
716
0
    enable_logs();
717
0
    let mut result = format!("{e}\n");
718
0
    for cause in e.chain().skip(1) { // skips original error
719
0
        result += &format!("caused by: {cause}\n");
720
0
    }
721
0
    result
722
0
}
723
724
4.91k
fn add_ids(mathml: Element) -> Element {
725
    use std::time::SystemTime;
726
4.91k
    let time = if cfg!(target_family = "wasm") {
727
0
        fastrand::usize(..)
728
    } else {
729
4.91k
        SystemTime::now()
730
4.91k
            .duration_since(SystemTime::UNIX_EPOCH)
731
4.91k
            .unwrap()
732
4.91k
            .as_millis() as usize
733
    };
734
4.91k
    let mut time_part = radix_fmt::radix(time, 36).to_string();
735
4.91k
    if time_part.len() < 3 {
736
0
        time_part.push_str("a2c");      // needs to be at least three chars
737
4.91k
    }
738
4.91k
    let mut random_part = radix_fmt::radix(fastrand::u32(..), 36).to_string();
739
4.91k
    if random_part.len() < 4 {
740
0
        random_part.push_str("a1b2");      // needs to be at least four chars
741
4.91k
    }
742
4.91k
    let prefix = "M".to_string() + &time_part[time_part.len() - 3..] + &random_part[random_part.len() - 4..] + "-"; // begin with letter
743
4.91k
    add_ids_to_all(mathml, &prefix, 0);
744
4.91k
    return mathml;
745
746
57.8k
    fn add_ids_to_all(mathml: Element, id_prefix: &str, count: usize) -> usize {
747
57.8k
        let mut count = count;
748
57.8k
        if mathml.attribute("id").is_none() {
749
57.1k
            mathml.set_attribute_value("id", (id_prefix.to_string() + &count.to_string()).as_str());
750
57.1k
            mathml.set_attribute_value("data-id-added", "true");
751
57.1k
            count += 1;
752
57.1k
        
}707
;
753
754
57.8k
        if crate::xpath_functions::is_leaf(mathml) {
755
35.8k
            return count;
756
22.0k
        }
757
758
52.9k
        for child in 
mathml22.0k
.
children22.0k
() {
759
52.9k
            let child = as_element(child);
760
52.9k
            count = add_ids_to_all(child, id_prefix, count);
761
52.9k
        }
762
22.0k
        return count;
763
57.8k
    }
764
4.91k
}
765
766
10.3k
pub fn get_element(package: &Package) -> Element<'_> {
767
10.3k
    enable_logs();
768
10.3k
    let doc = package.as_document();
769
10.3k
    let mut result = None;
770
10.3k
    for root_child in doc.root().children() {
771
10.3k
        if let ChildOfRoot::Element(e) = root_child {
772
10.3k
            assert!(result.is_none());
773
10.3k
            result = Some(e);
774
0
        }
775
    }
776
10.3k
    return result.unwrap();
777
10.3k
}
778
779
/// Get the intent after setting the MathML
780
/// Used in testing
781
#[allow(dead_code)]
782
32
pub fn get_intent<'a>(mathml: Element<'a>, doc: Document<'a>) -> Result<Element<'a>> {
783
32
    crate::speech::SPEECH_RULES.with(|rules|  rules.borrow_mut().read_files().unwrap());
784
32
    let mathml = cleanup_mathml(mathml)
?0
;
785
32
    return crate::speech::intent_from_mathml(mathml, doc);
786
32
}
787
788
#[allow(dead_code)]
789
22
fn trim_doc(doc: &Document) {
790
22
    for root_child in doc.root().children() {
791
22
        if let ChildOfRoot::Element(e) = root_child {
792
22
            trim_element(e, false);
793
22
        } else {
794
0
            doc.root().remove_child(root_child); // comment or processing instruction
795
0
        }
796
    }
797
22
}
798
799
/// Not really meant to be public -- used by tests in some packages
800
55.5k
pub fn trim_element(e: Element, allow_structure_in_leaves: bool) {
801
    // "<mtext>this is text</mtext" results in 3 text children
802
    // these are combined into one child as it makes code downstream simpler
803
804
    // space, tab, newline, carriage return all get collapsed to a single space
805
    const WHITESPACE: &[char] = &[' ', '\u{0009}', '\u{000A}','\u{000C}', '\u{000D}'];
806
3
    static WHITESPACE_MATCH: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"[ \u{0009}\u{000A}\u{00C}\u{000D}]+"#).unwrap());
807
808
55.5k
    if is_leaf(e) && (
!allow_structure_in_leaves34.7k
||
IsNode::is_mathml230
(
e230
)) {
809
        // Assume it is HTML inside of the leaf -- turn the HTML into a string
810
34.7k
        make_leaf_element(e);
811
34.7k
        return;
812
20.7k
    }
813
814
20.7k
    let mut single_text = "".to_string();
815
87.6k
    for child in 
e20.7k
.
children20.7k
() {
816
87.6k
        match child {
817
50.1k
            ChildOfElement::Element(c) => {
818
50.1k
                trim_element(c, allow_structure_in_leaves);
819
50.1k
            }
820
37.4k
            ChildOfElement::Text(t) => {
821
37.4k
                single_text += t.text();
822
37.4k
                e.remove_child(child);
823
37.4k
            }
824
21
            _ => {
825
21
                e.remove_child(child);
826
21
            }
827
        }
828
    }
829
830
    // CSS considers only space, tab, linefeed, and carriage return as collapsable whitespace
831
20.7k
    if !(is_leaf(e) || name(e) == "intent-literal" || single_text.is_empty()) {
832
        // intent-literal comes from testing intent
833
        // FIX: we have a problem -- what should happen???
834
        // FIX: For now, just keep the children and ignore the text and log an error -- shouldn't panic/crash
835
14.3k
        if !single_text.trim_matches(WHITESPACE).is_empty() {
836
20
            error!(
837
                "trim_element: both element and textual children which shouldn't happen -- ignoring text '{single_text}'"
838
            );
839
14.2k
        }
840
14.3k
        return;
841
6.44k
    }
842
6.44k
    if e.children().is_empty() && 
!single_text.is_empty()276
{
843
0
        // debug!("Combining text in {}: '{}' -> '{}'", e.name().local_part(), single_text, trimmed_text);
844
0
        e.set_text(&WHITESPACE_MATCH.replace_all(&single_text, " "));
845
6.44k
    }
846
847
34.7k
    fn make_leaf_element(mathml_leaf: Element) {
848
        // MathML leaves like <mn> really shouldn't have non-textual content, but you could have embedded HTML
849
        // Here, we convert them to leaves by grabbing up all the text and making that the content
850
        // Potentially, we leave them and let (default) rules do something, but it makes other parts of the code
851
        //   messier because checking the text of a leaf becomes Option<&str> rather than just &str
852
34.7k
        let children = mathml_leaf.children();
853
34.7k
        if children.is_empty() {
854
503
            return;
855
34.2k
        }
856
857
34.2k
        if rewrite_and_flatten_embedded_mathml(mathml_leaf) {
858
1
            return;
859
34.2k
        }
860
861
        // gather up the text
862
34.2k
        let mut text = "".to_string();
863
34.6k
        for child in 
children34.2k
{
864
34.6k
            let child_text = match child {
865
8
                ChildOfElement::Element(child) => {
866
8
                    if name(child) == "mglyph" {
867
3
                        child.attribute_value("alt").unwrap_or("").to_string()
868
                    } else {
869
5
                        gather_text(child)
870
                    }
871
                }
872
34.4k
                ChildOfElement::Text(t) => {
873
                    // debug!("ChildOfElement::Text: '{}'", t.text());
874
34.4k
                    t.text().to_string()
875
                }
876
222
                _ => "".to_string(),
877
            };
878
34.6k
            if !child_text.is_empty() {
879
34.4k
                text += &child_text;
880
34.4k
            
}223
881
        }
882
883
        // get rid of the old children and replace with the text we just built
884
34.2k
        mathml_leaf.clear_children();
885
34.2k
        mathml_leaf.set_text(WHITESPACE_MATCH.replace_all(&text, " ").trim_matches(WHITESPACE));
886
        // debug!("make_leaf_element: text is '{}'", crate::canonicalize::as_text(mathml_leaf));
887
888
        /// gather up all the contents of the element and return them with a leading space
889
7
        fn gather_text(html: Element) -> String {
890
7
            let mut text = "".to_string(); // since we are throwing out the element tag, add a space between the contents
891
7
            for child in html.children() {
892
7
                match child {
893
2
                    ChildOfElement::Element(child) => {
894
2
                        text += &gather_text(child);
895
2
                    }
896
5
                    ChildOfElement::Text(t) => text += t.text(),
897
0
                    _ => (),
898
                }
899
            }
900
            // debug!("gather_text: '{}'", text);
901
7
            return text;
902
7
        }
903
34.7k
    }
904
905
34.2k
    fn rewrite_and_flatten_embedded_mathml(mathml_leaf: Element) -> bool {
906
        // first see if it can or needs to be rewritten
907
        // this is likely rare, so we do a check and if true, to a second pass building the result
908
34.2k
        let mut needs_rewrite = false;
909
34.6k
        for child in 
mathml_leaf34.2k
.
children34.2k
() {
910
34.6k
            if let Some(
element8
) = child.element() {
911
8
                if name(element) != "math" {
912
7
                    return false; // something other than MathML as a child -- can't rewrite
913
1
                }
914
1
                needs_rewrite = true;
915
34.6k
            }
916
        };
917
918
34.2k
        if !needs_rewrite {
919
34.2k
            return false;
920
1
        }
921
922
        // now do the rewrite, flatting out the mathml and returning an mrow with the children
923
1
        let leaf_name = name(mathml_leaf);
924
1
        let doc = mathml_leaf.document();
925
1
        let mut new_children = Vec::new();
926
1
        let mut is_last_mtext = false;
927
5
        for child in 
mathml_leaf1
.
children1
() {
928
5
            if let Some(
element1
) = child.element() {
929
1
                trim_element(element, true);
930
1
                new_children.append(&mut element.children());   // don't want 'math' wrapper
931
1
                is_last_mtext = false;
932
4
            } else if let Some(text) = child.text() {
933
                // combine adjacent text nodes into single nodes
934
4
                if is_last_mtext {
935
2
                    let last_child = new_children.last_mut().unwrap().element().unwrap();
936
2
                    let new_text = as_text(last_child).to_string() + text.text();
937
2
                    last_child.set_text(&new_text);
938
2
                } else {
939
2
                    let new_leaf_node = create_mathml_element(&doc, leaf_name);
940
2
                    new_leaf_node.set_text(text.text());
941
2
                    new_children.push(ChildOfElement::Element(new_leaf_node));
942
2
                    is_last_mtext = true;
943
2
                }
944
0
            }
945
        };
946
947
        // clean up whitespace in text nodes
948
3
        for child in 
&mut new_children1
{
949
3
            if let Some(element) = child.element() && is_leaf(element) {
950
2
                let text = as_text(element);
951
2
                let cleaned_text = WHITESPACE_MATCH.replace_all(text, " ").trim_matches(WHITESPACE).to_string();
952
2
                element.set_text(&cleaned_text);
953
2
            
}1
954
        }
955
        
956
1
        crate::canonicalize::set_mathml_name(mathml_leaf, "mrow");
957
1
        mathml_leaf.clear_children();
958
1
        mathml_leaf.append_children(new_children);
959
960
        // debug!("rewrite_and_flatten_embedded_mathml: flattened\n'{}'", mml_to_string(mathml_leaf));
961
1
        return true;
962
34.2k
    }
963
55.5k
}
964
965
// used for testing trim
966
/// returns Ok() if two Documents are equal or some info where they differ in the Err
967
#[allow(dead_code)]
968
11
fn is_same_doc(doc1: &Document, doc2: &Document) -> Result<()> {
969
    // assume 'e' doesn't have element children until proven otherwise
970
    // this means we keep Text children until we are proven they aren't needed
971
11
    if doc1.root().children().len() != doc2.root().children().len() {
972
0
        bail!(
973
            "Children of docs have {} != {} children",
974
0
            doc1.root().children().len(),
975
0
            doc2.root().children().len()
976
        );
977
11
    }
978
979
11
    for (i, (c1, c2)) in doc1
980
11
        .root()
981
11
        .children()
982
11
        .iter()
983
11
        .zip(doc2.root().children().iter())
984
11
        .enumerate()
985
    {
986
11
        match c1 {
987
11
            ChildOfRoot::Element(e1) => {
988
11
                if let ChildOfRoot::Element(e2) = c2 {
989
11
                    is_same_element(*e1, *e2, &[])
?1
;
990
                } else {
991
0
                    bail!("child #{}, first is element, second is something else", i);
992
                }
993
            }
994
0
            ChildOfRoot::Comment(com1) => {
995
0
                if let ChildOfRoot::Comment(com2) = c2 {
996
0
                    if com1.text() != com2.text() {
997
0
                        bail!("child #{} -- comment text differs", i);
998
0
                    }
999
                } else {
1000
0
                    bail!("child #{}, first is comment, second is something else", i);
1001
                }
1002
            }
1003
0
            ChildOfRoot::ProcessingInstruction(p1) => {
1004
0
                if let ChildOfRoot::ProcessingInstruction(p2) = c2 {
1005
0
                    if p1.target() != p2.target() || p1.value() != p2.value() {
1006
0
                        bail!("child #{} -- processing instruction differs", i);
1007
0
                    }
1008
                } else {
1009
0
                    bail!(
1010
                        "child #{}, first is processing instruction, second is something else",
1011
                        i
1012
                    );
1013
                }
1014
            }
1015
        }
1016
    }
1017
10
    return Ok(());
1018
11
}
1019
1020
/// returns Ok() if two Documents are equal or some info where they differ in the Err
1021
// Not really meant to be public -- used by tests in some packages
1022
#[allow(dead_code)]
1023
1.92k
pub fn is_same_element(e1: Element, e2: Element, ignore_attrs: &[&str]) -> Result<()> {
1024
1.92k
    enable_logs();
1025
1.92k
    if name(e1) != name(e2) {
1026
0
        bail!("Names not the same: {}, {}", name(e1), name(e2));
1027
1.92k
    }
1028
1029
    // assume 'e' doesn't have element children until proven otherwise
1030
    // this means we keep Text children until we are proven they aren't needed
1031
1.92k
    if e1.children().len() != e2.children().len() {
1032
0
        bail!(
1033
            "Children of {} have {} != {} children",
1034
0
            name(e1),
1035
0
            e1.children().len(),
1036
0
            e2.children().len()
1037
        );
1038
1.92k
    }
1039
1040
1.92k
    if let Err(
e0
) = attrs_are_same(e1.attributes(), e2.attributes(), ignore_attrs) {
1041
0
        bail!("In element {}, {}", name(e1), e);
1042
1.92k
    }
1043
1044
2.86k
    for (i, (c1, c2)) in 
e1.children().iter()1.92k
.
zip1.92k
(
e2.children().iter()1.92k
).
enumerate1.92k
() {
1045
2.86k
        match c1 {
1046
1.72k
            ChildOfElement::Element(child1) => {
1047
1.72k
                if let ChildOfElement::Element(child2) = c2 {
1048
1.72k
                    is_same_element(*child1, *child2, ignore_attrs)
?2
;
1049
                } else {
1050
0
                    bail!("{} child #{}, first is element, second is something else", name(e1), i);
1051
                }
1052
            }
1053
0
            ChildOfElement::Comment(com1) => {
1054
0
                if let ChildOfElement::Comment(com2) = c2 {
1055
0
                    if com1.text() != com2.text() {
1056
0
                        bail!("{} child #{} -- comment text differs", name(e1), i);
1057
0
                    }
1058
                } else {
1059
0
                    bail!("{} child #{}, first is comment, second is something else", name(e1), i);
1060
                }
1061
            }
1062
0
            ChildOfElement::ProcessingInstruction(p1) => {
1063
0
                if let ChildOfElement::ProcessingInstruction(p2) = c2 {
1064
0
                    if p1.target() != p2.target() || p1.value() != p2.value() {
1065
0
                        bail!("{} child #{} -- processing instruction differs", name(e1), i);
1066
0
                    }
1067
                } else {
1068
0
                    bail!(
1069
                        "{} child #{}, first is processing instruction, second is something else",
1070
0
                        name(e1),
1071
                        i
1072
                    );
1073
                }
1074
            }
1075
1.14k
            ChildOfElement::Text(t1) => {
1076
1.14k
                if let ChildOfElement::Text(t2) = c2 {
1077
1.14k
                    if t1.text() != t2.text() {
1078
1
                        bail!("{} child #{} --  text differs", name(e1), i);
1079
1.14k
                    }
1080
                } else {
1081
0
                    bail!("{} child #{}, first is text, second is something else", name(e1), i);
1082
                }
1083
            }
1084
        }
1085
    }
1086
1.91k
    return Ok(());
1087
1088
    /// compares attributes -- '==' didn't seems to work
1089
1.92k
    fn attrs_are_same(attrs1: Vec<Attribute>, attrs2: Vec<Attribute>, ignore: &[&str]) -> Result<()> {
1090
1.92k
        let attrs1 = attrs1.iter()
1091
1.92k
                .filter(|a| !
ignore1.40k
.
contains1.40k
(
&a.name().local_part()1.40k
)).cloned()
1092
1.92k
                .collect::<Vec<Attribute>>();
1093
1.92k
        let attrs2 = attrs2.iter()
1094
1.92k
                .filter(|a| !
ignore1.40k
.
contains1.40k
(
&a.name().local_part()1.40k
)).cloned()
1095
1.92k
                .collect::<Vec<Attribute>>();
1096
1.92k
        if attrs1.len() != attrs2.len() {
1097
0
            bail!("Attributes have different length: {:?} != {:?}", attrs1, attrs2);
1098
1.92k
        }
1099
        // can't guarantee attrs are in the same order
1100
1.92k
        for 
attr11.40k
in attrs1 {
1101
1.40k
            if let Some(found_attr2) = attrs2
1102
1.40k
                .iter()
1103
1.88k
                .
find1.40k
(|&attr2| attr1.name().local_part() == attr2.name().local_part())
1104
            {
1105
1.40k
                if attr1.value() == found_attr2.value() {
1106
1.40k
                    continue;
1107
                } else {
1108
0
                    bail!(
1109
                        "Attribute named {} has differing values:\n  '{}'\n  '{}'",
1110
0
                        attr1.name().local_part(),
1111
0
                        attr1.value(),
1112
0
                        found_attr2.value()
1113
                    );
1114
                }
1115
            } else {
1116
0
                bail!(
1117
                    "Attribute name {} not in [{}]",
1118
0
                    print_attr(&attr1),
1119
0
                    print_attrs(&attrs2)
1120
                );
1121
            }
1122
        }
1123
1.92k
        return Ok(());
1124
1125
0
        fn print_attr(attr: &Attribute) -> String {
1126
0
            return format!("@{}='{}'", attr.name().local_part(), attr.value());
1127
0
        }
1128
0
        fn print_attrs(attrs: &[Attribute]) -> String {
1129
0
            return attrs.iter().map(print_attr).collect::<Vec<String>>().join(", ");
1130
0
        }
1131
1.92k
    }
1132
1.92k
}
1133
1134
#[cfg(test)]
1135
mod tests {
1136
    #[allow(unused_imports)]
1137
    use super::super::init_logger;
1138
    use super::*;
1139
1140
10
    fn are_parsed_strs_equal(test: &str, target: &str) -> bool {
1141
10
        let test_package = &parser::parse(test).expect("Failed to parse input");
1142
10
        let test_doc = test_package.as_document();
1143
10
        trim_doc(&test_doc);
1144
10
        debug!("test:\n{}", 
mml_to_string0
(
get_element0
(
test_package0
)));
1145
1146
10
        let target_package = &parser::parse(target).expect("Failed to parse input");
1147
10
        let target_doc = target_package.as_document();
1148
10
        trim_doc(&target_doc);
1149
10
        debug!("target:\n{}", 
mml_to_string0
(
get_element0
(
target_package0
)));
1150
1151
10
        match is_same_doc(&test_doc, &target_doc) {
1152
10
            Ok(_) => return true,
1153
0
            Err(e) => panic!("{}", e),
1154
        }
1155
10
    }
1156
1157
    #[test]
1158
1
    fn trim_same() {
1159
1
        let trimmed_str = "<math><mrow><mo>-</mo><mi>a</mi></mrow></math>";
1160
1
        assert!(are_parsed_strs_equal(trimmed_str, trimmed_str));
1161
1
    }
1162
1163
    #[test]
1164
1
    fn trim_whitespace() {
1165
1
        let trimmed_str = "<math><mrow><mo>-</mo><mi> a </mi></mrow></math>";
1166
1
        let whitespace_str = "<math> <mrow ><mo>-</mo><mi> a </mi></mrow ></math>";
1167
1
        assert!(are_parsed_strs_equal(trimmed_str, whitespace_str));
1168
1
    }
1169
1170
    #[test]
1171
1
    fn no_trim_whitespace_nbsp() {
1172
1
        let trimmed_str = "<math><mrow><mo>-</mo><mtext> &#x00A0;a </mtext></mrow></math>";
1173
1
        let whitespace_str = "<math> <mrow ><mo>-</mo><mtext> &#x00A0;a </mtext></mrow ></math>";
1174
1
        assert!(are_parsed_strs_equal(trimmed_str, whitespace_str));
1175
1
    }
1176
1177
    #[test]
1178
1
    fn trim_comment() {
1179
1
        let whitespace_str = "<math> <mrow ><mo>-</mo><mi> a </mi></mrow ></math>";
1180
1
        let comment_str = "<math><mrow><mo>-</mo><!--a comment --><mi> a </mi></mrow></math>";
1181
1
        assert!(are_parsed_strs_equal(comment_str, whitespace_str));
1182
1
    }
1183
1184
    #[test]
1185
1
    fn replace_mglyph() {
1186
1
        let mglyph_str = "<math>
1187
1
                <mrow>
1188
1
                    <mi>X<mglyph fontfamily='my-braid-font' index='2' alt='23braid' /></mi>
1189
1
                    <mo>+</mo>
1190
1
                    <mi>
1191
1
                        <mglyph fontfamily='my-braid-font' index='5' alt='132braid' />Y
1192
1
                    </mi>
1193
1
                    <mo>=</mo>
1194
1
                    <mi>
1195
1
                        <mglyph fontfamily='my-braid-font' index='3' alt='13braid' />
1196
1
                    </mi>
1197
1
                </mrow>
1198
1
            </math>";
1199
1
        let result_str = "<math>
1200
1
            <mrow>
1201
1
                <mi>X23braid</mi>
1202
1
                <mo>+</mo>
1203
1
                <mi>132braidY</mi>
1204
1
                <mo>=</mo>
1205
1
                <mi>13braid</mi>
1206
1
            </mrow>
1207
1
        </math>";
1208
1
        assert!(are_parsed_strs_equal(mglyph_str, result_str));
1209
1
    }
1210
1211
    #[test]
1212
1
    fn trim_differs() {
1213
1
        let whitespace_str = "<math> <mrow ><mo>-</mo><mi> a </mi></mrow ></math>";
1214
1
        let different_str = "<math> <mrow ><mo>-</mo><mi> b </mi></mrow ></math>";
1215
1216
        // need to manually do this since failure shouldn't be a panic
1217
1
        let package1 = &parser::parse(whitespace_str).expect("Failed to parse input");
1218
1
        let doc1 = package1.as_document();
1219
1
        trim_doc(&doc1);
1220
1
        debug!("doc1:\n{}", 
mml_to_string0
(
get_element0
(
package10
)));
1221
1222
1
        let package2 = parser::parse(different_str).expect("Failed to parse input");
1223
1
        let doc2 = package2.as_document();
1224
1
        trim_doc(&doc2);
1225
1
        debug!("doc2:\n{}", 
mml_to_string0
(
get_element0
(
&package20
)));
1226
1227
1
        assert!(is_same_doc(&doc1, &doc2).is_err());
1228
1
    }
1229
1230
    #[test]
1231
1
    fn test_entities() {
1232
        // this forces initialization
1233
1
        set_rules_dir(super::super::abs_rules_dir_path()).unwrap();
1234
1235
1
        let entity_str = set_mathml("<math><mrow><mo>&minus;</mo><mi>&mopf;</mi></mrow></math>").unwrap();
1236
1
        let converted_str =
1237
1
            set_mathml("<math><mrow><mo>&#x02212;</mo><mi>&#x1D55E;</mi></mrow></math>").unwrap();
1238
1239
        // need to remove unique ids
1240
1
        static ID_MATCH: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"id='.+?' "#).unwrap());
1241
1
        let entity_str = ID_MATCH.replace_all(&entity_str, "");
1242
1
        let converted_str = ID_MATCH.replace_all(&converted_str, "");
1243
1
        assert_eq!(entity_str, converted_str, "normal entity test failed");
1244
1245
1
        let entity_str = set_mathml(
1246
            "<math data-quot=\"&quot;value&quot;\" data-apos='&apos;value&apos;'><mi>XXX</mi></math>",
1247
        )
1248
1
        .unwrap();
1249
1
        let converted_str =
1250
1
            set_mathml("<math data-quot='\"value\"' data-apos=\"'value'\"><mi>XXX</mi></math>").unwrap();
1251
1
        let entity_str = ID_MATCH.replace_all(&entity_str, "");
1252
1
        let converted_str = ID_MATCH.replace_all(&converted_str, "");
1253
1
        assert_eq!(entity_str, converted_str, "special entities quote test failed");
1254
1255
1
        let entity_str =
1256
1
            set_mathml("<math><mo>&lt;</mo><mo>&gt;</mo><mtext>&amp;lt;</mtext></math>").unwrap();
1257
1
        let converted_str =
1258
1
            set_mathml("<math><mo>&#x003C;</mo><mo>&#x003E;</mo><mtext>&#x0026;lt;</mtext></math>")
1259
1
                .unwrap();
1260
1
        let entity_str = ID_MATCH.replace_all(&entity_str, "");
1261
1
        let converted_str = ID_MATCH.replace_all(&converted_str, "");
1262
1
        assert_eq!(entity_str, converted_str, "special entities <,>,& test failed");
1263
1
    }
1264
1265
    #[test]
1266
1
    fn can_recover_from_invalid_set_rules_dir() {
1267
        use std::env;
1268
        // MathCAT will check the env var "MathCATRulesDir" as an override, so the following test might succeed if we don't override the env var
1269
1
        unsafe { env::set_var("MathCATRulesDir", "MathCATRulesDir"); }   // safe because we are single threaded
1270
1
        assert!(set_rules_dir("someInvalidRulesDir").is_err());
1271
1
        assert!(
1272
1
            set_rules_dir(super::super::abs_rules_dir_path()).is_ok(),
1273
            "\nset_rules_dir to '{}' failed",
1274
0
            super::super::abs_rules_dir_path()
1275
        );
1276
1
        assert!(set_mathml("<math><mn>1</mn></math>").is_ok());
1277
1
    }
1278
1279
    #[test]
1280
1
    fn single_html_in_mtext() {
1281
1
        let test = "<math><mn>1</mn> <mtext>a<p> para  1</p>bc</mtext> <mi>y</mi></math>";
1282
1
        let target = "<math><mn>1</mn> <mtext>a para 1bc</mtext> <mi>y</mi></math>";
1283
1
        assert!(are_parsed_strs_equal(test, target));
1284
1
    }
1285
1286
    #[test]
1287
1
    fn multiple_html_in_mtext() {
1288
1
        let test = "<math><mn>1</mn> <mtext>a<p>para 1</p> <p>para 2</p>bc  </mtext> <mi>y</mi></math>";
1289
1
        let target = "<math><mn>1</mn> <mtext>apara 1 para 2bc</mtext> <mi>y</mi></math>";
1290
1
        assert!(are_parsed_strs_equal(test, target));
1291
1
    }
1292
1293
    #[test]
1294
1
    fn nested_html_in_mtext() {
1295
1
        let test = "<math><mn>1</mn> <mtext>a <ol><li>first</li><li>second</li></ol> bc</mtext> <mi>y</mi></math>";
1296
1
        let target = "<math><mn>1</mn> <mtext>a firstsecond bc</mtext> <mi>y</mi></math>";
1297
1
        assert!(are_parsed_strs_equal(test, target));
1298
1
    }
1299
1300
    #[test]
1301
1
    fn empty_html_in_mtext() {
1302
1
        let test = "<math><mn>1</mn> <mtext>a<br/>bc</mtext> <mi>y</mi></math>";
1303
1
        let target = "<math><mn>1</mn> <mtext>abc</mtext> <mi>y</mi></math>";
1304
1
        assert!(are_parsed_strs_equal(test, target));
1305
1
    }
1306
1307
    #[test]
1308
1
    fn mathml_in_mtext() {
1309
1
        let test = "<math><mtext>if&#xa0;<math> <msup><mi>n</mi><mn>2</mn></msup></math>&#xa0;is real</mtext></math>";
1310
1
        let target = "<math><mrow><mtext>if&#xa0;</mtext><msup><mi>n</mi><mn>2</mn></msup><mtext>&#xa0;is real</mtext></mrow></math>";
1311
1
        assert!(are_parsed_strs_equal(test, target));
1312
1
    }
1313
1314
    #[test]
1315
1
    fn stack_overflow_protection() {
1316
1
        set_rules_dir(super::super::abs_rules_dir_path()).unwrap();
1317
1
        let mut bad_mathml = String::from("<math>");
1318
513
        for _ in 
0..MAX_DEPTH+11
{
1319
513
            bad_mathml.push_str("<msqrt><mi>n</mi>");
1320
513
        }
1321
513
        for _ in 
0..MAX_DEPTH+11
{
1322
513
            bad_mathml.push_str("</msqrt>");
1323
513
        }
1324
1
        bad_mathml.push_str("</math>");
1325
1
        assert_eq!(set_mathml(bad_mathml).unwrap_err().to_string(), "MathML is too deeply nested to process");
1326
1
    }
1327
1328
    #[test]
1329
1
    fn old_mathml_cleared_on_error() {
1330
1
        set_rules_dir(super::super::abs_rules_dir_path()).unwrap();
1331
1
        let good_mathml = "<math><mn>3</mn></math>";
1332
1
        set_mathml(good_mathml).unwrap();
1333
1
        let bad_mathml = "<math><mi>&xabc;</mi></math>";
1334
1
        assert!(set_mathml(bad_mathml).is_err());
1335
1
        assert!(get_spoken_text().unwrap() == "");
1336
1
        set_mathml(good_mathml).unwrap();
1337
1
        let bad_mathml = "<math>garbage";
1338
1
        assert!(set_mathml(bad_mathml).is_err());
1339
1
        assert!(get_spoken_text().unwrap() == "");
1340
1
    }
1341
}
\ No newline at end of file diff --git a/docs/llvm-cov/html/coverage/home/runner/work/MathCAT/MathCAT/src/lib.rs.html b/docs/llvm-cov/html/coverage/home/runner/work/MathCAT/MathCAT/src/lib.rs.html index 6ed55545..2f146d63 100644 --- a/docs/llvm-cov/html/coverage/home/runner/work/MathCAT/MathCAT/src/lib.rs.html +++ b/docs/llvm-cov/html/coverage/home/runner/work/MathCAT/MathCAT/src/lib.rs.html @@ -1 +1 @@ -

Coverage Report

Created: 2026-04-30 05:51

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/home/runner/work/MathCAT/MathCAT/src/lib.rs
Line
Count
Source
1
#![allow(clippy::needless_return)]
2
3
//! A library for generating speech and braille from MathML
4
//! 
5
//! Typical usage is:
6
//! 1. Set the rules directory [`set_rules_dir`]
7
//! 2. Set whatever preferences are need with repeated calls to [`set_preference`].
8
//! 3. Set MathML via [`set_mathml`]
9
//!    A string representing the cleaned up MathML along with `id`s on each node is returned for highlighting if desired
10
//! 4. Get the speech [`get_spoken_text`] or (Unicode) braille [`get_braille`].
11
//!
12
//! The expression can be navigated also.
13
//! This is done in one of two ways:
14
//! 1. Pass key strokes to allow a user to navigate the MathML by calling [`do_navigate_keypress`]; the speech is returned.
15
//! 2. Pass the MathCAT navigation command directory by called [`do_navigate_command`]; the speech is return returned.
16
//! 
17
//! To get the MathML associated with the current navigation node, call [`get_navigation_mathml`].
18
//! To just get the `id` and offset from the id of the current navigation node, call [`get_navigation_mathml_id`].
19
///
20
/// This module re-exports anyhow types. Use `bail!` for early returns and
21
/// `context()`/`with_context()` on Result to add context (replacing old `chain_err()`).
22
pub mod errors {
23
    pub use anyhow::{anyhow, bail, Error, Result, Context};
24
}
25
26
pub mod interface;
27
#[cfg(feature = "include-zip")]
28
pub use shim_filesystem::ZIPPED_RULE_FILES;
29
30
mod canonicalize;
31
mod infer_intent;
32
pub mod speech;
33
mod braille;
34
mod navigate;
35
mod prefs;
36
mod tts;
37
mod xpath_functions;
38
mod definitions;
39
pub mod pretty_print;
40
mod chemistry;
41
42
pub mod shim_filesystem; // really just for override_file_for_debugging_rules, but the config seems to throw it off
43
pub use interface::*;
44
use crate::errors::{bail, Result};
45
46
#[cfg(test)]
47
0
pub fn init_logger() {
48
0
    env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("debug"))
49
0
        .is_test(true)
50
0
        .format_timestamp(None)
51
0
        .format_module_path(false)
52
0
        .format_indent(None)
53
0
        .format_level(false)
54
0
        .init();
55
0
}
56
57
/// Build Absolute path to rules dir for testing
58
250
pub fn abs_rules_dir_path() -> String {
59
    cfg_if::cfg_if! {
60
    if #[cfg(feature = "include-zip")] {
61
          return "Rules".to_string();
62
    } else {
63
        // Package root (see tests/common/mod.rs `abs_rules_dir_path` for rationale).
64
250
        return std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
65
250
            .join("Rules")
66
250
            .to_str()
67
250
            .expect("CARGO_MANIFEST_DIR and Rules path must be UTF-8")
68
250
            .to_string();
69
        }
70
    }
71
250
}
72
73
141
pub fn are_strs_canonically_equal_with_locale(test: &str, target: &str, ignore_attrs: &[&str], block_separators: &str, decimal_separators: &str) -> Result<()> {
74
    use crate::{interface::*, pretty_print::mml_to_string};
75
    use sxd_document::parser;
76
    use crate::canonicalize::canonicalize;
77
    use std::panic::{catch_unwind, AssertUnwindSafe};
78
79
141
    crate::interface::init_panic_handler();
80
141
    let result = catch_unwind(AssertUnwindSafe(|| {
81
        // this forces initialization
82
141
        crate::interface::set_rules_dir(abs_rules_dir_path()).unwrap();
83
141
        crate::speech::SPEECH_RULES.with(|rules|  rules.borrow_mut().read_files().unwrap());
84
141
        set_preference("Language", "en").unwrap();
85
141
        set_preference("BlockSeparators", block_separators).unwrap();
86
141
        set_preference("DecimalSeparators", decimal_separators).unwrap();
87
88
141
        let package1 = &parser::parse(test).expect("Failed to parse test input");
89
141
        let mathml = get_element(package1);
90
141
        trim_element(mathml, false);
91
141
        let mathml_test = canonicalize(mathml).unwrap();
92
93
141
        let package2 = &parser::parse(target).expect("Failed to parse target input");
94
141
        let mathml_target = get_element(package2);
95
141
        trim_element(mathml_target, false);
96
97
141
        match is_same_element(mathml_test, mathml_target, ignore_attrs) {
98
141
            Ok(_) => Ok( () ),
99
0
            Err(e) => {
100
0
                bail!("{}\nResult:\n{}\nTarget:\n{}", e, mml_to_string(mathml_test), mml_to_string(mathml_target));
101
            },
102
        }
103
141
    }));
104
141
    match crate::interface::report_any_panic(result) {
105
140
        Ok(()) => Ok(()),
106
1
        Err(e) => {
107
1
            eprintln!("{}", e);
108
1
            Err(e)
109
        }
110
    }
111
141
}
112
113
/// sets locale to be US standard
114
30
pub fn are_strs_canonically_equal(test: &str, target: &str, ignore_attrs: &[&str]) -> bool {
115
30
    are_strs_canonically_equal_with_locale(test, target, ignore_attrs, ", \u{00A0}\u{202F}", ".").is_ok()
116
30
}
117
118
/// Like `are_strs_canonically_equal` but returns `Result` for use in `#[test]` functions that return `Result<()>`.
119
102
pub fn are_strs_canonically_equal_result(test: &str, target: &str, ignore_attrs: &[&str]) -> Result<()> {
120
102
    are_strs_canonically_equal_with_locale(test, target, ignore_attrs, ", \u{00A0}\u{202F}", ".")
121
102
}
\ No newline at end of file +

Coverage Report

Created: 2026-05-04 09:37

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/home/runner/work/MathCAT/MathCAT/src/lib.rs
Line
Count
Source
1
#![allow(clippy::needless_return)]
2
3
//! A library for generating speech and braille from MathML
4
//! 
5
//! Typical usage is:
6
//! 1. Set the rules directory [`set_rules_dir`]
7
//! 2. Set whatever preferences are need with repeated calls to [`set_preference`].
8
//! 3. Set MathML via [`set_mathml`]
9
//!    A string representing the cleaned up MathML along with `id`s on each node is returned for highlighting if desired
10
//! 4. Get the speech [`get_spoken_text`] or (Unicode) braille [`get_braille`].
11
//!
12
//! The expression can be navigated also.
13
//! This is done in one of two ways:
14
//! 1. Pass key strokes to allow a user to navigate the MathML by calling [`do_navigate_keypress`]; the speech is returned.
15
//! 2. Pass the MathCAT navigation command directory by called [`do_navigate_command`]; the speech is return returned.
16
//! 
17
//! To get the MathML associated with the current navigation node, call [`get_navigation_mathml`].
18
//! To just get the `id` and offset from the id of the current navigation node, call [`get_navigation_mathml_id`].
19
///
20
/// This module re-exports anyhow types. Use `bail!` for early returns and
21
/// `context()`/`with_context()` on Result to add context (replacing old `chain_err()`).
22
pub mod errors {
23
    pub use anyhow::{anyhow, bail, Error, Result, Context};
24
}
25
26
pub mod interface;
27
#[cfg(feature = "include-zip")]
28
pub use shim_filesystem::ZIPPED_RULE_FILES;
29
30
mod canonicalize;
31
mod infer_intent;
32
pub mod speech;
33
mod braille;
34
mod navigate;
35
mod prefs;
36
mod tts;
37
mod xpath_functions;
38
mod definitions;
39
pub mod pretty_print;
40
mod chemistry;
41
42
pub mod shim_filesystem; // really just for override_file_for_debugging_rules, but the config seems to throw it off
43
pub use interface::*;
44
use crate::errors::{bail, Result};
45
46
#[cfg(test)]
47
0
pub fn init_logger() {
48
0
    env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("debug"))
49
0
        .is_test(true)
50
0
        .format_timestamp(None)
51
0
        .format_module_path(false)
52
0
        .format_indent(None)
53
0
        .format_level(false)
54
0
        .init();
55
0
}
56
57
/// Build Absolute path to rules dir for testing
58
250
pub fn abs_rules_dir_path() -> String {
59
    cfg_if::cfg_if! {
60
    if #[cfg(feature = "include-zip")] {
61
          return "Rules".to_string();
62
    } else {
63
        // Package root (see tests/common/mod.rs `abs_rules_dir_path` for rationale).
64
250
        return std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
65
250
            .join("Rules")
66
250
            .to_str()
67
250
            .expect("CARGO_MANIFEST_DIR and Rules path must be UTF-8")
68
250
            .to_string();
69
        }
70
    }
71
250
}
72
73
141
pub fn are_strs_canonically_equal_with_locale(test: &str, target: &str, ignore_attrs: &[&str], block_separators: &str, decimal_separators: &str) -> Result<()> {
74
    use crate::{interface::*, pretty_print::mml_to_string};
75
    use sxd_document::parser;
76
    use crate::canonicalize::canonicalize;
77
    use std::panic::{catch_unwind, AssertUnwindSafe};
78
79
141
    crate::interface::init_panic_handler();
80
141
    let result = catch_unwind(AssertUnwindSafe(|| {
81
        // this forces initialization
82
141
        crate::interface::set_rules_dir(abs_rules_dir_path()).unwrap();
83
141
        crate::speech::SPEECH_RULES.with(|rules|  rules.borrow_mut().read_files().unwrap());
84
141
        set_preference("Language", "en").unwrap();
85
141
        set_preference("BlockSeparators", block_separators).unwrap();
86
141
        set_preference("DecimalSeparators", decimal_separators).unwrap();
87
88
141
        let package1 = &parser::parse(test).expect("Failed to parse test input");
89
141
        let mathml = get_element(package1);
90
141
        trim_element(mathml, false);
91
141
        let mathml_test = canonicalize(mathml).unwrap();
92
93
141
        let package2 = &parser::parse(target).expect("Failed to parse target input");
94
141
        let mathml_target = get_element(package2);
95
141
        trim_element(mathml_target, false);
96
97
141
        match is_same_element(mathml_test, mathml_target, ignore_attrs) {
98
141
            Ok(_) => Ok( () ),
99
0
            Err(e) => {
100
0
                bail!("{}\nResult:\n{}\nTarget:\n{}", e, mml_to_string(mathml_test), mml_to_string(mathml_target));
101
            },
102
        }
103
141
    }));
104
141
    match crate::interface::report_any_panic(result) {
105
140
        Ok(()) => Ok(()),
106
1
        Err(e) => {
107
1
            eprintln!("{}", e);
108
1
            Err(e)
109
        }
110
    }
111
141
}
112
113
/// sets locale to be US standard
114
30
pub fn are_strs_canonically_equal(test: &str, target: &str, ignore_attrs: &[&str]) -> bool {
115
30
    are_strs_canonically_equal_with_locale(test, target, ignore_attrs, ", \u{00A0}\u{202F}", ".").is_ok()
116
30
}
117
118
/// Like `are_strs_canonically_equal` but returns `Result` for use in `#[test]` functions that return `Result<()>`.
119
102
pub fn are_strs_canonically_equal_result(test: &str, target: &str, ignore_attrs: &[&str]) -> Result<()> {
120
102
    are_strs_canonically_equal_with_locale(test, target, ignore_attrs, ", \u{00A0}\u{202F}", ".")
121
102
}
\ No newline at end of file diff --git a/docs/llvm-cov/html/coverage/home/runner/work/MathCAT/MathCAT/src/main.rs.html b/docs/llvm-cov/html/coverage/home/runner/work/MathCAT/MathCAT/src/main.rs.html index 11dc34a8..6f822a5b 100644 --- a/docs/llvm-cov/html/coverage/home/runner/work/MathCAT/MathCAT/src/main.rs.html +++ b/docs/llvm-cov/html/coverage/home/runner/work/MathCAT/MathCAT/src/main.rs.html @@ -1 +1 @@ -

Coverage Report

Created: 2026-04-30 05:51

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/home/runner/work/MathCAT/MathCAT/src/main.rs
Line
Count
Source
1
// *** MathCAT doesn't normally want to build a binary ***
2
// *** This file is here because it is useful for trying out things ***
3
#![allow(clippy::needless_return)]
4
5
use libmathcat::interface::*;
6
use log::{debug, info};
7
use std::time::Instant;
8
use std::process::exit;
9
10
11
// Maybe also have this speak to test the TTS generation.
12
// There is a rust winapi crate that mirrors the WinPAI and has "Speak(...)" in it
13
14
// env RUST_LOG=DEBUG cargo run --features "include-zip"
15
cfg_if::cfg_if! {
16
    if #[cfg(feature = "include-zip")] {
17
        fn get_rules_dir() -> String {
18
          return "Rules".to_string();
19
        }
20
    } else {
21
0
        fn get_rules_dir() -> String {
22
          // for testing with zipped rules dir
23
          // let rules_path = std::env::current_exe().unwrap().parent().unwrap().join("../../../MathCATForPython/addon/globalPlugins/MathCAT/Rules");
24
0
          let rules_path = std::env::current_exe().unwrap().parent().unwrap().join("../../Rules");
25
0
          return rules_path.as_os_str().to_str().unwrap().to_string();
26
0
        }
27
    }
28
}
29
30
0
fn main() {
31
0
  env_logger::builder()
32
0
      .format_timestamp(None)
33
0
      .format_module_path(false)
34
0
      .format_indent(Some(2))
35
0
      .format_level(false)
36
0
      .init();
37
38
//    let expr = r#"
39
//    <math xmlns="http://www.w3.org/1998/Math/MathML" display="block">
40
//    <mrow>
41
//      <msup>
42
//        <mi>e</mi>
43
//        <mrow>
44
//          <mo>&#x2212;</mo>
45
//          <mfrac>
46
//            <mn>1</mn>
47
//            <mn>2</mn>
48
//          </mfrac>
49
//          <msup>
50
//            <mrow>
51
//              <mrow>
52
//                <mo>(</mo>
53
//                <mrow>
54
//                  <mfrac>
55
//                    <mrow>
56
//                      <mi>x</mi>
57
//                      <mo>&#x2212;</mo>
58
//                      <mi>&#x03BC;</mi>
59
//                    </mrow>
60
//                    <mi>&#x03C3;</mi>
61
//                  </mfrac>
62
//                </mrow>
63
//                <mo>)</mo>
64
//              </mrow>
65
//            </mrow>
66
//            <mn>2</mn>
67
//          </msup>
68
//        </mrow>
69
//      </msup>
70
//    </mrow>
71
//  </math>
72
// "#;
73
  // let expr = "<math display='inline' xmlns='http://www.w3.org/1998/Math/MathML'>
74
  //       <msup intent='power($base(2, $base),silly($exp,-1.))'>
75
  //       <mi arg='base'>x</mi>
76
  //       <mi arg='exp'>n</mi>
77
  //     </msup>
78
  //       </math>
79
  //     ";
80
  // let expr = "<mrow intent='pre@prefix(in@infix($a, x))(post@postfix($b))'>
81
  //     <mi arg='a'>A</mi>
82
  //     <mover>
83
  //         <mo intent='map'>⟶</mo>
84
  //         <mo intent='congruence'>≅</mo>
85
  //     </mover>
86
  //     <mi arg='b'>B</mi>
87
  //   </mrow>";
88
  // let expr = "<math><mi>Na</mi><mi>S</mi><mo>(</mo><mi>l</mi><mo>)</mo></math>";
89
90
91
  // let expr = "<math xmlns='http://www.w3.org/1998/Math/MathML' display='block'>
92
  //     <mrow>
93
  //       <mo stretchy='false'>[</mo>
94
  //       <mrow>
95
  //         <mi>Co</mi>
96
  //       </mrow>
97
  //       <mo stretchy='false'>(</mo>
98
  //       <mrow>
99
  //         <mi>NH</mi>
100
  //       </mrow>
101
  //       <msub>
102
  //         <mrow>
103
  //           <mrow>
104
  //             <mpadded width='0'>
105
  //               <mphantom>
106
  //                 <mi>A</mi>
107
  //               </mphantom>
108
  //             </mpadded>
109
  //           </mrow>
110
  //         </mrow>
111
  //         <mrow>
112
  //           <mrow>
113
  //             <mpadded height='0'>
114
  //               <mn>3</mn>
115
  //             </mpadded>
116
  //           </mrow>
117
  //         </mrow>
118
  //       </msub>
119
  //       <mo stretchy='false'>)</mo>
120
  //       <msub>
121
  //         <mrow>
122
  //           <mrow>
123
  //             <mpadded width='0'>
124
  //               <mphantom>
125
  //                 <mi>A</mi>
126
  //               </mphantom>
127
  //             </mpadded>
128
  //           </mrow>
129
  //         </mrow>
130
  //         <mrow>
131
  //           <mrow>
132
  //             <mpadded height='0'>
133
  //               <mn>6</mn>
134
  //             </mpadded>
135
  //           </mrow>
136
  //         </mrow>
137
  //       </msub>
138
  //       <mo stretchy='false'>]</mo>
139
  //       <msup>
140
  //         <mrow>
141
  //           <mrow>
142
  //             <mpadded width='0'>
143
  //               <mphantom>
144
  //                 <mi>A</mi>
145
  //               </mphantom>
146
  //             </mpadded>
147
  //           </mrow>
148
  //         </mrow>
149
  //         <mrow>
150
  //           <mn>3</mn>
151
  //           <mo>+</mo>
152
  //         </mrow>
153
  //       </msup>
154
  //       <mtext>&#xA0;</mtext>
155
  //       <mo stretchy='false'>(</mo>
156
  //       <mrow>
157
  //         <mi>Cl</mi>
158
  //       </mrow>
159
  //       <msub>
160
  //         <mrow>
161
  //           <mrow>
162
  //             <mpadded width='0'>
163
  //               <mphantom>
164
  //                 <mi>A</mi>
165
  //               </mphantom>
166
  //             </mpadded>
167
  //           </mrow>
168
  //         </mrow>
169
  //         <mrow>
170
  //           <mrow>
171
  //             <mpadded height='0'>
172
  //               <mn>3</mn>
173
  //             </mpadded>
174
  //           </mrow>
175
  //         </mrow>
176
  //       </msub>
177
  //       <mo stretchy='false'>)</mo>
178
  //       <msup>
179
  //         <mrow>
180
  //           <mrow>
181
  //             <mpadded width='0'>
182
  //               <mphantom>
183
  //                 <mi>A</mi>
184
  //               </mphantom>
185
  //             </mpadded>
186
  //           </mrow>
187
  //         </mrow>
188
  //         <mrow>
189
  //           <mo>&#x2212;</mo>, 
190
  //         </mrow>
191
  //       </msup>
192
  //     </mrow>
193
  //   </math>";
194
195
0
  let expr = r#"
196
0
<math>
197
0
        <msub><mi mathvariant="normal">N</mi><mn>2</mn></msub>
198
0
        <munderover><mo>&#x2192;</mo><mtext>Haber&#xA0;process</mtext><msub><mi mathvariant="normal">H</mi><mn>2</mn></msub></munderover>
199
0
        <mi mathvariant="normal">N</mi>
200
0
        <msub><mi mathvariant="normal">H</mi><mn>3</mn></msub>
201
0
    </math>
202
0
         "#;
203
  // let instant = Instant::now();
204
205
  // let rules_dir = "".to_string();    // Use MathCATRulesDir, potentially pointing to a zipped version
206
0
  if let Err(e) = set_rules_dir(get_rules_dir()) {
207
0
    eprintln!("Error: exiting -- {}", errors_to_string(&e)); exit(1);
208
0
  }
209
0
  debug!("Languages: {}", libmathcat::interface::get_supported_languages().unwrap_or_default().join(", "));
210
211
  #[cfg(feature = "include-zip")]
212
  info!("***********include-zip is present**********");
213
0
  info!("Version = '{}' using Rules dir {}", get_version(), get_rules_dir());
214
0
  set_preference("Language", "en").unwrap();
215
0
  set_preference("DecimalSeparator", "Auto").unwrap();
216
0
  set_preference("BrailleCode", "UEB").unwrap();
217
0
  set_preference("BrailleNavHighlight", "On").unwrap();
218
0
  set_preference("TTS", "None").unwrap();
219
0
  set_preference("Verbosity", "Verbose").unwrap();
220
0
  set_preference("NavVerbosity", "Verbose").unwrap();
221
0
  set_preference("NavMode", "Enhanced").unwrap();
222
0
  set_preference("Impairment", "Blindness").unwrap();
223
0
  set_preference("SpeechOverrides_CapitalLetters", "").unwrap();
224
0
  set_preference("MathRate", "80").unwrap();
225
  // set_preference("CapitalLetters_UseWord", "true").unwrap();
226
  // set_preference("CapitalLetters_Pitch", "30").unwrap();
227
0
  set_preference("CapitalLetters_Beep", "true").unwrap();
228
0
  set_preference("IntentErrorRecovery", "Error").unwrap();
229
  // set_preference("MathRate", "77").unwrap();
230
231
0
  set_preference("Bookmark", "false").unwrap();
232
0
  set_preference("SpeechStyle", "ClearSpeak").unwrap();
233
0
  info!("Languages: {}", libmathcat::interface::get_supported_languages().unwrap_or_default().join(", "));
234
0
  info!("Speech styles: {}", libmathcat::interface::get_supported_speech_styles("ClearSpeak").unwrap_or_default().join(", "));
235
0
  info!("BrailleCodes: {}", libmathcat::interface::get_supported_braille_codes().unwrap_or_default().join(", "));
236
  // set_preference("DecimalSeparators", ",").unwrap();
237
  // set_preference("BlockSeparators", ". ").unwrap();
238
0
  if let Err(e) = set_mathml(expr) {
239
0
    eprintln!("Error: exiting -- {}", errors_to_string(&e)); exit(1); 
240
0
  };
241
242
  // match do_navigate_command("ZoomIn".to_string())  {
243
  //   Err(e) => eprintln!("Error: exiting -- {}", errors_to_string(&e)); exit(1);,
244
  //   Ok(speech) => info!("\nZoomIn speech: '{speech}'"),
245
  // }
246
  // match do_navigate_command("ToggleZoomLockUp".to_string()) {
247
  //   Err(e) => eprintln!("Error: exiting -- {}", errors_to_string(&e)); exit(1);,
248
  //   Ok(speech) => info!("ToggleZoomLockUp speech: '{speech}'"),
249
  // }
250
  // match do_navigate_command("MovePrevious".to_string()) {
251
  //   Err(e) => eprintln!("Error: exiting -- {}", errors_to_string(&e)); exit(1);,
252
  //   Ok(speech) => info!("MovePrevious speech: '{speech}'"),
253
  // }
254
  // match do_navigate_command("MovePrevious".to_string()) {
255
  //   Err(e) => eprintln!("Error: exiting -- {}", errors_to_string(&e)); exit(1);,
256
  //   Ok(speech) => info!("MovePrevious speech: '{}'", speech),
257
  // }
258
  // match do_navigate_command("MovePrevious".to_string()) {
259
  //   Err(e) => eprintln!("Error: exiting -- {}", errors_to_string(&e)); exit(1);,
260
  //   Ok(speech) => info!("MovePrevious speech: '{}'", speech),
261
  // }
262
  // match do_navigate_command("MoveNext".to_string()) {
263
  //   Err(e) => eprintln!("Error: exiting -- {}", errors_to_string(&e)); exit(1);,
264
  //   Ok(speech) => info!("MoveNext speech: '{}'", speech),
265
  // }
266
  // match do_navigate_command("MoveNext".to_string()) {
267
  //   Err(e) => eprintln!("Error: exiting -- {}", errors_to_string(&e)); exit(1);,
268
  //   Ok(speech) => info!("MoveNext speech: '{}'", speech),
269
  // }
270
  // match do_navigate_command("MoveNext".to_string()) {
271
  //   Err(e) => eprintln!("Error: exiting -- {}", errors_to_string(&e)); exit(1);,
272
  //   Ok(speech) => info!("MoveNext speech: '{}'", speech),
273
  // }
274
  // match do_navigate_command("MoveNext".to_string()) {
275
  //   Err(e) => eprintln!("Error: exiting -- {}", errors_to_string(&e)); exit(1);,
276
  //   Ok(speech) => info!("MoveNext speech: '{}'", speech),
277
  // }
278
  // match get_spoken_text() {
279
  //   Ok(speech) => info!("Computed speech string:\n   '{speech}'"),
280
  //   Err(e) => eprintln!("{}", errors_to_string(&e)); exit(1);,
281
  // }
282
0
  debug!("Speech language is {}", get_preference("Language").unwrap());
283
0
  debug!("DecimalSeparator: {:?}", get_preference("DecimalSeparator").unwrap());
284
0
  debug!("DecimalSeparators: {:?}, BlockSeparators: {:?}", get_preference("DecimalSeparators").unwrap(), get_preference("BlockSeparators").unwrap());
285
0
  debug!("SpeechStyle: {:?}", get_preference("SpeechStyle").unwrap());
286
0
  debug!("Verbosity: {:?}", get_preference("Verbosity").unwrap());
287
 
288
  // info!("Time taken for loading+speech+braille: {}ms", instant.elapsed().as_millis());
289
  // let instant = Instant::now();
290
0
  match get_spoken_text() {
291
0
    Ok(speech) => info!("Computed speech string:\n   '{}'", speech),
292
0
    Err(e) => {eprintln!("{}", errors_to_string(&e)); exit(1);},
293
  }
294
  // info!("Time taken (second time for speech): {}ms", instant.elapsed().as_millis());
295
  // info!("SpeechStyle: {:?}", get_preference("SpeechStyle"));
296
297
0
  match get_braille("") {
298
0
    Ok(braille) => info!("Computed braille string:\n   '{braille}'"),
299
0
    Err(e) => {eprintln!("{}", errors_to_string(&e)); exit(1);},
300
  }
301
0
  debug!("...using BrailleCode: {:?}", get_preference("BrailleCode").unwrap());
302
  // let xpath_counts = libmathcat::speech::xpath_count();
303
  // info!("#xpath = {}; duplicates = {}", xpath_counts.0, xpath_counts.1);
304
  // info!("Time taken (second time for speech + braille): {}ms", instant.elapsed().as_millis());
305
  // debug!("Hashmap sizes:\n{}", libmathcat::speech::SpeechRules::print_sizes());
306
0
  timing_test(expr, 000);
307
308
0
}
309
310
0
fn timing_test(expr: &str, n_loops: usize) {
311
0
  if n_loops == 0 {
312
0
    return;
313
0
  }
314
  
315
0
  let n_loops_float = n_loops as f64;
316
0
  let instant = Instant::now();
317
0
  for _ in 0..n_loops {
318
0
    if let Err(e) = set_mathml(expr) {
319
0
      eprintln!("Error: exiting -- {}", errors_to_string(&e));
320
0
    };
321
0
    match get_spoken_text() {
322
0
      Ok(_) =>( ),
323
0
      Err(e) => {eprintln!("{}", errors_to_string(&e)); exit(1);},
324
    }
325
0
    match get_braille("") {
326
0
      Ok(_) => (),
327
0
      Err(e) => {eprintln!("{}", errors_to_string(&e)); exit(1);},
328
    }
329
  }
330
0
  info!("Time taken (time for set, speech, {} braille averaged over {} loops): {}ms", get_preference("BrailleCode").unwrap(), n_loops, instant.elapsed().as_millis() as f64/n_loops_float);
331
332
0
  let instant = Instant::now();
333
0
  for _ in 0..n_loops {
334
0
    if let Err(e) = set_mathml(expr) {
335
0
      eprintln!("Error: exiting -- {}", errors_to_string(&e));
336
0
    };
337
  }
338
0
  info!("Time taken (time for set averaged over {} loops): {}ms", n_loops, instant.elapsed().as_millis() as f64/n_loops_float);
339
340
0
  let instant = Instant::now();
341
0
  for _ in 0..n_loops {
342
0
    match get_spoken_text() {
343
0
      Ok(_) =>( ),
344
0
      Err(e) => {eprintln!("{}", errors_to_string(&e)); exit(1);},
345
    }
346
  }
347
0
  info!("Time taken (time for get_spoken_text() averaged over {} loops): {}ms", n_loops, instant.elapsed().as_millis() as f64/n_loops_float);
348
349
0
  set_preference("BrailleCode", "UEB").unwrap();
350
0
  get_braille("").unwrap();
351
0
  let instant = Instant::now();
352
0
  for _ in 0..n_loops {
353
0
    match get_braille("") {
354
0
      Ok(_) => (),
355
0
      Err(e) => {eprintln!("{}", errors_to_string(&e)); exit(1);},
356
    }
357
  }
358
0
  info!("Time taken (time for {} braille averaged over {} loops): {}ms", get_preference("BrailleCode").unwrap(), n_loops, instant.elapsed().as_millis() as f64/n_loops_float);
359
360
0
    if let Err(e) = set_mathml(expr) {
361
0
      eprintln!("Error: exiting -- {}", errors_to_string(&e));
362
0
    };
363
0
  set_preference("BrailleCode", "Nemeth").unwrap();
364
0
  get_braille("").unwrap();
365
0
  let instant = Instant::now();
366
0
  for _ in 0..n_loops {
367
0
    match get_braille("") {
368
0
      Ok(_) => (),
369
0
      Err(e) => {eprintln!("{}", errors_to_string(&e)); exit(1);} ,
370
    }
371
  }
372
0
  info!("Time taken (time for {} braille averaged over {} loops): {}ms", get_preference("BrailleCode").unwrap(), n_loops, instant.elapsed().as_millis() as f64/n_loops_float);
373
0
}
\ No newline at end of file +

Coverage Report

Created: 2026-05-04 09:37

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/home/runner/work/MathCAT/MathCAT/src/main.rs
Line
Count
Source
1
// *** MathCAT doesn't normally want to build a binary ***
2
// *** This file is here because it is useful for trying out things ***
3
#![allow(clippy::needless_return)]
4
5
use libmathcat::interface::*;
6
use log::{debug, info};
7
use std::time::Instant;
8
use std::process::exit;
9
10
11
// Maybe also have this speak to test the TTS generation.
12
// There is a rust winapi crate that mirrors the WinPAI and has "Speak(...)" in it
13
14
// env RUST_LOG=DEBUG cargo run --features "include-zip"
15
cfg_if::cfg_if! {
16
    if #[cfg(feature = "include-zip")] {
17
        fn get_rules_dir() -> String {
18
          return "Rules".to_string();
19
        }
20
    } else {
21
0
        fn get_rules_dir() -> String {
22
          // for testing with zipped rules dir
23
          // let rules_path = std::env::current_exe().unwrap().parent().unwrap().join("../../../MathCATForPython/addon/globalPlugins/MathCAT/Rules");
24
0
          let rules_path = std::env::current_exe().unwrap().parent().unwrap().join("../../Rules");
25
0
          return rules_path.as_os_str().to_str().unwrap().to_string();
26
0
        }
27
    }
28
}
29
30
0
fn main() {
31
0
  env_logger::builder()
32
0
      .format_timestamp(None)
33
0
      .format_module_path(false)
34
0
      .format_indent(Some(2))
35
0
      .format_level(false)
36
0
      .init();
37
38
//    let expr = r#"
39
//    <math xmlns="http://www.w3.org/1998/Math/MathML" display="block">
40
//    <mrow>
41
//      <msup>
42
//        <mi>e</mi>
43
//        <mrow>
44
//          <mo>&#x2212;</mo>
45
//          <mfrac>
46
//            <mn>1</mn>
47
//            <mn>2</mn>
48
//          </mfrac>
49
//          <msup>
50
//            <mrow>
51
//              <mrow>
52
//                <mo>(</mo>
53
//                <mrow>
54
//                  <mfrac>
55
//                    <mrow>
56
//                      <mi>x</mi>
57
//                      <mo>&#x2212;</mo>
58
//                      <mi>&#x03BC;</mi>
59
//                    </mrow>
60
//                    <mi>&#x03C3;</mi>
61
//                  </mfrac>
62
//                </mrow>
63
//                <mo>)</mo>
64
//              </mrow>
65
//            </mrow>
66
//            <mn>2</mn>
67
//          </msup>
68
//        </mrow>
69
//      </msup>
70
//    </mrow>
71
//  </math>
72
// "#;
73
  // let expr = "<math display='inline' xmlns='http://www.w3.org/1998/Math/MathML'>
74
  //       <msup intent='power($base(2, $base),silly($exp,-1.))'>
75
  //       <mi arg='base'>x</mi>
76
  //       <mi arg='exp'>n</mi>
77
  //     </msup>
78
  //       </math>
79
  //     ";
80
  // let expr = "<mrow intent='pre@prefix(in@infix($a, x))(post@postfix($b))'>
81
  //     <mi arg='a'>A</mi>
82
  //     <mover>
83
  //         <mo intent='map'>⟶</mo>
84
  //         <mo intent='congruence'>≅</mo>
85
  //     </mover>
86
  //     <mi arg='b'>B</mi>
87
  //   </mrow>";
88
  // let expr = "<math><mi>Na</mi><mi>S</mi><mo>(</mo><mi>l</mi><mo>)</mo></math>";
89
90
91
  // let expr = "<math xmlns='http://www.w3.org/1998/Math/MathML' display='block'>
92
  //     <mrow>
93
  //       <mo stretchy='false'>[</mo>
94
  //       <mrow>
95
  //         <mi>Co</mi>
96
  //       </mrow>
97
  //       <mo stretchy='false'>(</mo>
98
  //       <mrow>
99
  //         <mi>NH</mi>
100
  //       </mrow>
101
  //       <msub>
102
  //         <mrow>
103
  //           <mrow>
104
  //             <mpadded width='0'>
105
  //               <mphantom>
106
  //                 <mi>A</mi>
107
  //               </mphantom>
108
  //             </mpadded>
109
  //           </mrow>
110
  //         </mrow>
111
  //         <mrow>
112
  //           <mrow>
113
  //             <mpadded height='0'>
114
  //               <mn>3</mn>
115
  //             </mpadded>
116
  //           </mrow>
117
  //         </mrow>
118
  //       </msub>
119
  //       <mo stretchy='false'>)</mo>
120
  //       <msub>
121
  //         <mrow>
122
  //           <mrow>
123
  //             <mpadded width='0'>
124
  //               <mphantom>
125
  //                 <mi>A</mi>
126
  //               </mphantom>
127
  //             </mpadded>
128
  //           </mrow>
129
  //         </mrow>
130
  //         <mrow>
131
  //           <mrow>
132
  //             <mpadded height='0'>
133
  //               <mn>6</mn>
134
  //             </mpadded>
135
  //           </mrow>
136
  //         </mrow>
137
  //       </msub>
138
  //       <mo stretchy='false'>]</mo>
139
  //       <msup>
140
  //         <mrow>
141
  //           <mrow>
142
  //             <mpadded width='0'>
143
  //               <mphantom>
144
  //                 <mi>A</mi>
145
  //               </mphantom>
146
  //             </mpadded>
147
  //           </mrow>
148
  //         </mrow>
149
  //         <mrow>
150
  //           <mn>3</mn>
151
  //           <mo>+</mo>
152
  //         </mrow>
153
  //       </msup>
154
  //       <mtext>&#xA0;</mtext>
155
  //       <mo stretchy='false'>(</mo>
156
  //       <mrow>
157
  //         <mi>Cl</mi>
158
  //       </mrow>
159
  //       <msub>
160
  //         <mrow>
161
  //           <mrow>
162
  //             <mpadded width='0'>
163
  //               <mphantom>
164
  //                 <mi>A</mi>
165
  //               </mphantom>
166
  //             </mpadded>
167
  //           </mrow>
168
  //         </mrow>
169
  //         <mrow>
170
  //           <mrow>
171
  //             <mpadded height='0'>
172
  //               <mn>3</mn>
173
  //             </mpadded>
174
  //           </mrow>
175
  //         </mrow>
176
  //       </msub>
177
  //       <mo stretchy='false'>)</mo>
178
  //       <msup>
179
  //         <mrow>
180
  //           <mrow>
181
  //             <mpadded width='0'>
182
  //               <mphantom>
183
  //                 <mi>A</mi>
184
  //               </mphantom>
185
  //             </mpadded>
186
  //           </mrow>
187
  //         </mrow>
188
  //         <mrow>
189
  //           <mo>&#x2212;</mo>, 
190
  //         </mrow>
191
  //       </msup>
192
  //     </mrow>
193
  //   </math>";
194
195
0
  let expr = r#"
196
0
<math>
197
0
        <msub><mi mathvariant="normal">N</mi><mn>2</mn></msub>
198
0
        <munderover><mo>&#x2192;</mo><mtext>Haber&#xA0;process</mtext><msub><mi mathvariant="normal">H</mi><mn>2</mn></msub></munderover>
199
0
        <mi mathvariant="normal">N</mi>
200
0
        <msub><mi mathvariant="normal">H</mi><mn>3</mn></msub>
201
0
    </math>
202
0
         "#;
203
  // let instant = Instant::now();
204
205
  // let rules_dir = "".to_string();    // Use MathCATRulesDir, potentially pointing to a zipped version
206
0
  if let Err(e) = set_rules_dir(get_rules_dir()) {
207
0
    eprintln!("Error: exiting -- {}", errors_to_string(&e)); exit(1);
208
0
  }
209
0
  debug!("Languages: {}", libmathcat::interface::get_supported_languages().unwrap_or_default().join(", "));
210
211
  #[cfg(feature = "include-zip")]
212
  info!("***********include-zip is present**********");
213
0
  info!("Version = '{}' using Rules dir {}", get_version(), get_rules_dir());
214
0
  set_preference("Language", "en").unwrap();
215
0
  set_preference("DecimalSeparator", "Auto").unwrap();
216
0
  set_preference("BrailleCode", "UEB").unwrap();
217
0
  set_preference("BrailleNavHighlight", "On").unwrap();
218
0
  set_preference("TTS", "None").unwrap();
219
0
  set_preference("Verbosity", "Verbose").unwrap();
220
0
  set_preference("NavVerbosity", "Verbose").unwrap();
221
0
  set_preference("NavMode", "Enhanced").unwrap();
222
0
  set_preference("Impairment", "Blindness").unwrap();
223
0
  set_preference("SpeechOverrides_CapitalLetters", "").unwrap();
224
0
  set_preference("MathRate", "80").unwrap();
225
  // set_preference("CapitalLetters_UseWord", "true").unwrap();
226
  // set_preference("CapitalLetters_Pitch", "30").unwrap();
227
0
  set_preference("CapitalLetters_Beep", "true").unwrap();
228
0
  set_preference("IntentErrorRecovery", "Error").unwrap();
229
  // set_preference("MathRate", "77").unwrap();
230
231
0
  set_preference("Bookmark", "false").unwrap();
232
0
  set_preference("SpeechStyle", "ClearSpeak").unwrap();
233
0
  info!("Languages: {}", libmathcat::interface::get_supported_languages().unwrap_or_default().join(", "));
234
0
  info!("Speech styles: {}", libmathcat::interface::get_supported_speech_styles("ClearSpeak").unwrap_or_default().join(", "));
235
0
  info!("BrailleCodes: {}", libmathcat::interface::get_supported_braille_codes().unwrap_or_default().join(", "));
236
  // set_preference("DecimalSeparators", ",").unwrap();
237
  // set_preference("BlockSeparators", ". ").unwrap();
238
0
  if let Err(e) = set_mathml(expr) {
239
0
    eprintln!("Error: exiting -- {}", errors_to_string(&e)); exit(1); 
240
0
  };
241
242
  // match do_navigate_command("ZoomIn".to_string())  {
243
  //   Err(e) => eprintln!("Error: exiting -- {}", errors_to_string(&e)); exit(1);,
244
  //   Ok(speech) => info!("\nZoomIn speech: '{speech}'"),
245
  // }
246
  // match do_navigate_command("ToggleZoomLockUp".to_string()) {
247
  //   Err(e) => eprintln!("Error: exiting -- {}", errors_to_string(&e)); exit(1);,
248
  //   Ok(speech) => info!("ToggleZoomLockUp speech: '{speech}'"),
249
  // }
250
  // match do_navigate_command("MovePrevious".to_string()) {
251
  //   Err(e) => eprintln!("Error: exiting -- {}", errors_to_string(&e)); exit(1);,
252
  //   Ok(speech) => info!("MovePrevious speech: '{speech}'"),
253
  // }
254
  // match do_navigate_command("MovePrevious".to_string()) {
255
  //   Err(e) => eprintln!("Error: exiting -- {}", errors_to_string(&e)); exit(1);,
256
  //   Ok(speech) => info!("MovePrevious speech: '{}'", speech),
257
  // }
258
  // match do_navigate_command("MovePrevious".to_string()) {
259
  //   Err(e) => eprintln!("Error: exiting -- {}", errors_to_string(&e)); exit(1);,
260
  //   Ok(speech) => info!("MovePrevious speech: '{}'", speech),
261
  // }
262
  // match do_navigate_command("MoveNext".to_string()) {
263
  //   Err(e) => eprintln!("Error: exiting -- {}", errors_to_string(&e)); exit(1);,
264
  //   Ok(speech) => info!("MoveNext speech: '{}'", speech),
265
  // }
266
  // match do_navigate_command("MoveNext".to_string()) {
267
  //   Err(e) => eprintln!("Error: exiting -- {}", errors_to_string(&e)); exit(1);,
268
  //   Ok(speech) => info!("MoveNext speech: '{}'", speech),
269
  // }
270
  // match do_navigate_command("MoveNext".to_string()) {
271
  //   Err(e) => eprintln!("Error: exiting -- {}", errors_to_string(&e)); exit(1);,
272
  //   Ok(speech) => info!("MoveNext speech: '{}'", speech),
273
  // }
274
  // match do_navigate_command("MoveNext".to_string()) {
275
  //   Err(e) => eprintln!("Error: exiting -- {}", errors_to_string(&e)); exit(1);,
276
  //   Ok(speech) => info!("MoveNext speech: '{}'", speech),
277
  // }
278
  // match get_spoken_text() {
279
  //   Ok(speech) => info!("Computed speech string:\n   '{speech}'"),
280
  //   Err(e) => eprintln!("{}", errors_to_string(&e)); exit(1);,
281
  // }
282
0
  debug!("Speech language is {}", get_preference("Language").unwrap());
283
0
  debug!("DecimalSeparator: {:?}", get_preference("DecimalSeparator").unwrap());
284
0
  debug!("DecimalSeparators: {:?}, BlockSeparators: {:?}", get_preference("DecimalSeparators").unwrap(), get_preference("BlockSeparators").unwrap());
285
0
  debug!("SpeechStyle: {:?}", get_preference("SpeechStyle").unwrap());
286
0
  debug!("Verbosity: {:?}", get_preference("Verbosity").unwrap());
287
 
288
  // info!("Time taken for loading+speech+braille: {}ms", instant.elapsed().as_millis());
289
  // let instant = Instant::now();
290
0
  match get_spoken_text() {
291
0
    Ok(speech) => info!("Computed speech string:\n   '{}'", speech),
292
0
    Err(e) => {eprintln!("{}", errors_to_string(&e)); exit(1);},
293
  }
294
  // info!("Time taken (second time for speech): {}ms", instant.elapsed().as_millis());
295
  // info!("SpeechStyle: {:?}", get_preference("SpeechStyle"));
296
297
0
  match get_braille("") {
298
0
    Ok(braille) => info!("Computed braille string:\n   '{braille}'"),
299
0
    Err(e) => {eprintln!("{}", errors_to_string(&e)); exit(1);},
300
  }
301
0
  debug!("...using BrailleCode: {:?}", get_preference("BrailleCode").unwrap());
302
  // let xpath_counts = libmathcat::speech::xpath_count();
303
  // info!("#xpath = {}; duplicates = {}", xpath_counts.0, xpath_counts.1);
304
  // info!("Time taken (second time for speech + braille): {}ms", instant.elapsed().as_millis());
305
  // debug!("Hashmap sizes:\n{}", libmathcat::speech::SpeechRules::print_sizes());
306
0
  timing_test(expr, 000);
307
308
0
}
309
310
0
fn timing_test(expr: &str, n_loops: usize) {
311
0
  if n_loops == 0 {
312
0
    return;
313
0
  }
314
  
315
0
  let n_loops_float = n_loops as f64;
316
0
  let instant = Instant::now();
317
0
  for _ in 0..n_loops {
318
0
    if let Err(e) = set_mathml(expr) {
319
0
      eprintln!("Error: exiting -- {}", errors_to_string(&e));
320
0
    };
321
0
    match get_spoken_text() {
322
0
      Ok(_) =>( ),
323
0
      Err(e) => {eprintln!("{}", errors_to_string(&e)); exit(1);},
324
    }
325
0
    match get_braille("") {
326
0
      Ok(_) => (),
327
0
      Err(e) => {eprintln!("{}", errors_to_string(&e)); exit(1);},
328
    }
329
  }
330
0
  info!("Time taken (time for set, speech, {} braille averaged over {} loops): {}ms", get_preference("BrailleCode").unwrap(), n_loops, instant.elapsed().as_millis() as f64/n_loops_float);
331
332
0
  let instant = Instant::now();
333
0
  for _ in 0..n_loops {
334
0
    if let Err(e) = set_mathml(expr) {
335
0
      eprintln!("Error: exiting -- {}", errors_to_string(&e));
336
0
    };
337
  }
338
0
  info!("Time taken (time for set averaged over {} loops): {}ms", n_loops, instant.elapsed().as_millis() as f64/n_loops_float);
339
340
0
  let instant = Instant::now();
341
0
  for _ in 0..n_loops {
342
0
    match get_spoken_text() {
343
0
      Ok(_) =>( ),
344
0
      Err(e) => {eprintln!("{}", errors_to_string(&e)); exit(1);},
345
    }
346
  }
347
0
  info!("Time taken (time for get_spoken_text() averaged over {} loops): {}ms", n_loops, instant.elapsed().as_millis() as f64/n_loops_float);
348
349
0
  set_preference("BrailleCode", "UEB").unwrap();
350
0
  get_braille("").unwrap();
351
0
  let instant = Instant::now();
352
0
  for _ in 0..n_loops {
353
0
    match get_braille("") {
354
0
      Ok(_) => (),
355
0
      Err(e) => {eprintln!("{}", errors_to_string(&e)); exit(1);},
356
    }
357
  }
358
0
  info!("Time taken (time for {} braille averaged over {} loops): {}ms", get_preference("BrailleCode").unwrap(), n_loops, instant.elapsed().as_millis() as f64/n_loops_float);
359
360
0
    if let Err(e) = set_mathml(expr) {
361
0
      eprintln!("Error: exiting -- {}", errors_to_string(&e));
362
0
    };
363
0
  set_preference("BrailleCode", "Nemeth").unwrap();
364
0
  get_braille("").unwrap();
365
0
  let instant = Instant::now();
366
0
  for _ in 0..n_loops {
367
0
    match get_braille("") {
368
0
      Ok(_) => (),
369
0
      Err(e) => {eprintln!("{}", errors_to_string(&e)); exit(1);} ,
370
    }
371
  }
372
0
  info!("Time taken (time for {} braille averaged over {} loops): {}ms", get_preference("BrailleCode").unwrap(), n_loops, instant.elapsed().as_millis() as f64/n_loops_float);
373
0
}
\ No newline at end of file diff --git a/docs/llvm-cov/html/coverage/home/runner/work/MathCAT/MathCAT/src/navigate.rs.html b/docs/llvm-cov/html/coverage/home/runner/work/MathCAT/MathCAT/src/navigate.rs.html index aa1c5a40..d2bbf1fc 100644 --- a/docs/llvm-cov/html/coverage/home/runner/work/MathCAT/MathCAT/src/navigate.rs.html +++ b/docs/llvm-cov/html/coverage/home/runner/work/MathCAT/MathCAT/src/navigate.rs.html @@ -1 +1 @@ -

Coverage Report

Created: 2026-04-30 05:51

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/home/runner/work/MathCAT/MathCAT/src/navigate.rs
Line
Count
Source
1
//! Navigation is controlled by a `Navigation_Rules.yaml` file in conjunction with preferences.
2
//! See preference documentation for more info on navigation preferences.
3
#![allow(clippy::needless_return)]
4
5
use std::cell::{Ref, RefCell, RefMut};
6
use sxd_xpath::context::Evaluation;
7
use sxd_xpath::Value;
8
use sxd_document::dom::Element;
9
use sxd_document::Package;
10
11
use std::fmt;
12
use crate::canonicalize::{name, get_parent};
13
use crate::pretty_print::mml_to_string;
14
use crate::speech::{NAVIGATION_RULES, CONCAT_INDICATOR, CONCAT_STRING, SpeechRules, SpeechRulesWithContext};
15
use crate::infer_intent::add_fixity_children;
16
use crate::interface::copy_mathml;
17
#[cfg(not(target_family = "wasm"))]
18
use std::time::Instant;
19
use crate::errors::*;
20
use phf::phf_set;
21
use log::{debug};
22
23
pub const ID_OFFSET: &str = "data-id-offset";
24
25
const MAX_PLACE_MARKERS: usize = 10;
26
27
thread_local!{
28
    /// The current set of navigation rules
29
    pub static NAVIGATION_STATE: RefCell<NavigationState> =
30
            RefCell::new( NavigationState::new() );
31
}
32
33
pub static NAV_COMMANDS: phf::Set<&str> = phf_set! {
34
    "MovePrevious", "MoveNext", "MoveStart", "MoveEnd", "MoveLineStart", "MoveLineEnd", 
35
    "MoveCellPrevious", "MoveCellNext", "MoveCellUp", "MoveCellDown", "MoveColumnStart", "MoveColumnEnd", 
36
    "ZoomIn", "ZoomOut", "ZoomOutAll", "ZoomInAll", 
37
    "MoveLastLocation", 
38
    "ReadPrevious", "ReadNext", "ReadCurrent", "ReadCellCurrent", "ReadStart", "ReadEnd", "ReadLineStart", "ReadLineEnd", 
39
    "DescribePrevious", "DescribeNext", "DescribeCurrent", 
40
    "WhereAmI", "WhereAmIAll", 
41
    "ToggleZoomLockUp", "ToggleZoomLockDown", "ToggleSpeakMode", 
42
    "Exit", 
43
    "MoveTo0","MoveTo1","MoveTo2","MoveTo3","MoveTo4","MoveTo5","MoveTo6","MoveTo7","MoveTo8","MoveTo9",
44
    "Read0","Read1","Read2","Read3","Read4","Read5","Read6","Read7","Read8","Read9",
45
    "Describe0","Describe1","Describe2","Describe3","Describe4","Describe5","Describe6","Describe7","Describe8","Describe9",
46
    "SetPlacemarker0","SetPlacemarker1","SetPlacemarker2","SetPlacemarker3","SetPlacemarker4","SetPlacemarker5","SetPlacemarker6","SetPlacemarker7","SetPlacemarker8","SetPlacemarker9",
47
};
48
49
#[derive(Clone, PartialEq, Debug)]
50
struct NavigationPosition {
51
    current_node: String,           // id of current node
52
    current_node_offset: usize,     // for leaves, char offset in leaf (default = 0), otherwise id for artificial intent node
53
}
54
55
impl fmt::Display for NavigationPosition {
56
0
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
57
0
        return write!(f, "{}[+{}]", self.current_node, self.current_node_offset);
58
0
    }
59
}
60
61
const ILLEGAL_NODE_ID: &str = "!not set";     // an illegal 'id' value
62
impl Default for NavigationPosition {
63
48.6k
    fn default() -> Self {
64
48.6k
        NavigationPosition {
65
48.6k
            current_node: ILLEGAL_NODE_ID.to_string(),
66
48.6k
            current_node_offset: 0
67
48.6k
        }
68
48.6k
     }
69
}
70
71
72
#[derive(Debug, Clone)]
73
pub struct NavigationState {
74
    // it might be better to use a linked for the stacks, with the first node being the top
75
    // these two stacks should be kept in sync.
76
    position_stack: Vec<NavigationPosition>,    // all positions, so we can go back to them
77
    command_stack: Vec<&'static str>,           // all commands, so we can undo them
78
    place_markers: [NavigationPosition; MAX_PLACE_MARKERS],
79
    where_am_i: NavigationPosition,             // current 'where am i' location
80
81
    #[cfg(target_family = "wasm")]
82
    where_am_i_start_time: usize,               // FIX: for web
83
    #[cfg(not(target_family = "wasm"))]
84
    where_am_i_start_time: Instant,
85
    mode: String,                               // one of "Character", "Simple", or "Enhanced"
86
    speak_overview: bool,                       // true => describe after move; false => (standard) speech rules
87
}
88
89
impl fmt::Display for NavigationState {
90
0
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
91
0
        writeln!(f, "NavigationState{{")?;
92
0
        write!(f, "  Position Stack: ")?;
93
0
        for (i, nav_state) in self.position_stack.iter().enumerate() {
94
0
            write!(f, "{}{}", if i==0 {""} else {", "}, nav_state)?;
95
        }
96
0
        writeln!(f)?;
97
0
        write!(f, "  Command Stack: ")?;
98
0
        for (i, nav_state) in self.command_stack.iter().enumerate() {
99
0
            write!(f, "{}{}", if i==0 {""} else {", "}, *nav_state)?;
100
        }
101
0
        writeln!(f)?;
102
0
        writeln!(f, "  where_am_i: {}, start_time: {:?}", self.where_am_i, self.where_am_i_start_time)?;
103
0
        writeln!(f, "  mode: {}, speak_overview: {}", self.mode, self.speak_overview)?;
104
0
        writeln!(f, "}}")?;
105
0
        return Ok( () );
106
0
    }
107
}
108
109
impl NavigationState {
110
3.92k
    fn new() -> NavigationState {
111
3.92k
        return NavigationState {
112
3.92k
            position_stack: Vec::with_capacity(1024),
113
3.92k
            command_stack: Vec::with_capacity(1024),
114
3.92k
            place_markers: Default::default(),
115
3.92k
            where_am_i: NavigationPosition::default(),
116
3.92k
            // FIX: figure this out for the web
117
3.92k
            #[cfg(target_family = "wasm")]
118
3.92k
            where_am_i_start_time: 0,           // FIX: for web
119
3.92k
            #[cfg(not(target_family = "wasm"))]
120
3.92k
            where_am_i_start_time: Instant::now(),      // need to give it some value, and "default()" isn't an option
121
3.92k
            mode: "".to_string(),                       // set latter when we have some context
122
3.92k
            speak_overview: false,                      // set latter when we have some context
123
3.92k
        };
124
3.92k
    }
125
126
4.88k
    pub fn reset(&mut self) {
127
4.88k
        self.position_stack.clear();
128
4.88k
        self.command_stack.clear();
129
4.88k
        self.where_am_i = NavigationPosition::default();
130
4.88k
        self.reset_start_time()
131
4.88k
    }
132
133
134
    // defining reset_start_time because of the following message if done inline
135
    // attributes on expressions are experimental
136
    // see issue #15701 <https://github.com/rust-lang/rust/issues/15701> for more information
137
    #[cfg(target_family = "wasm")]
138
    fn reset_start_time(&mut self) {
139
         self.where_am_i_start_time = 0;
140
    }
141
142
    #[cfg(not(target_family = "wasm"))]
143
4.88k
    fn reset_start_time(&mut self) {
144
4.88k
         self.where_am_i_start_time = Instant::now();      // need to give it some value, and "default()" isn't an option
145
4.88k
    }
146
147
148
563
    fn push(&mut self, position: NavigationPosition, command: &'static str) {
149
563
        self.position_stack.push(position);
150
563
        self.command_stack.push(command);
151
563
    }
152
153
46
    fn pop(&mut self) -> Option<(NavigationPosition, &'static str)> {
154
46
        assert_eq!(self.position_stack.len(), self.command_stack.len());
155
46
        if self.position_stack.is_empty() {
156
0
            return None;
157
        } else {
158
46
            return Some( (self.position_stack.pop().unwrap(), self.command_stack.pop().unwrap()) );
159
        }
160
46
    }
161
162
2.75k
    fn top(&self) -> Option<(&NavigationPosition, &'static str)> {
163
2.75k
        if self.position_stack.is_empty() {
164
0
            return None;
165
2.75k
        }
166
2.75k
        let last = self.position_stack.len()-1;
167
2.75k
        return Some( (&self.position_stack[last], self.command_stack[last]) );
168
2.75k
    }
169
170
0
    pub fn get_navigation_mathml<'a>(&self, mathml: Element<'a>) -> Result<(Element<'a>, usize)> {
171
0
        if self.position_stack.is_empty() {
172
0
            return Ok( (mathml, 0) );
173
        } else {
174
0
            let (position, _) = self.top().unwrap();
175
0
            return match get_node_by_id(mathml, position) {
176
0
                None => bail!("internal error: id '{}' was not found in mathml:\n{}",
177
0
                                position.current_node, mml_to_string(mathml)),
178
0
                Some(found) => Ok( (found, position.current_node_offset) )
179
            };
180
        }
181
0
    }
182
183
1.09k
    pub fn get_navigation_mathml_id(&self, mathml: Element) -> (String, usize) {
184
1.09k
        if self.position_stack.is_empty() {
185
47
            return (mathml.attribute_value("id").unwrap().to_string(), 0);
186
        } else {
187
1.05k
            let (position, _) = self.top().unwrap();
188
1.05k
            return (position.current_node.clone(), position.current_node_offset);
189
        }
190
1.09k
    }
191
192
549
    fn init_navigation_context(&self, context: &mut sxd_xpath::Context, command: &'static str,
193
549
                               nav_state_top: Option<(&NavigationPosition, &'static str)>) {
194
549
        context.set_variable("NavCommand", command);
195
196
549
        if command == "WhereAmI" && 
self.where_am_i == NavigationPosition::default()0
{
197
0
            context.set_variable("NavNode", self.where_am_i.current_node.as_str());
198
0
            context.set_variable("NavNodeOffset", self.where_am_i.current_node_offset as f64);
199
549
        } else {
200
549
            let position = &self.position_stack[self.position_stack.len()-1];
201
549
            context.set_variable("NavNode", position.current_node.as_str());
202
549
            context.set_variable("NavNodeOffset", position.current_node_offset as f64);
203
549
        }
204
205
        // get the index from command (e.g., '3' in 'SetPlacemarker3 or MoveTo3' and set 'PlaceMarker' to it's position)
206
549
        if command.ends_with(|ch: char| ch.is_ascii_digit()) {
207
6
            let index = convert_last_char_to_number(command);
208
6
            let position = &self.place_markers[index];
209
6
            context.set_variable("PlaceMarkerIndex", index as f64);
210
6
            context.set_variable("PlaceMarker", position.current_node.as_str());
211
6
            context.set_variable("PlaceMarkerOffset", position.current_node_offset as f64);
212
543
        }
213
           
214
549
        context.set_variable("Overview", self.speak_overview);
215
549
        context.set_variable("ReadZoomLevel", (if self.mode == "Enhanced" {
-1200
} else {
1349
}) as f64);
216
549
        context.set_variable("MatchCounter", 0 as f64);
217
218
549
        if command == "MoveLastLocation" {
219
3
            let previous_command = match nav_state_top {
220
0
                None => "None",
221
3
                Some( (_, previous_command) ) => previous_command,
222
            };
223
3
            context.set_variable("PreviousNavCommand", previous_command);
224
546
        }
225
226
        // used by nav rules for speech -- needs an initial value so tests don't fail
227
549
        context.set_variable("SayCommand", "" );
228
549
        context.set_variable("Move2D", "" );
229
549
        context.set_variable("SpeakExpression", true );    // default is to speak the expr after navigation
230
549
        return;
231
232
6
        fn convert_last_char_to_number(str: &str) -> usize {
233
6
            let last_char = str.as_bytes()[str.len()-1];
234
6
            assert!( last_char.is_ascii_digit() );
235
6
            return (last_char - b'0') as usize;
236
6
        }
237
549
    }
238
}
239
240
// convert the last digit of a Placemarker command to an integer
241
2
fn convert_last_char_to_number(str: &str) -> usize {
242
2
    let last_char = str.as_bytes()[str.len()-1];
243
2
    assert!( last_char.is_ascii_digit() );
244
2
    return (last_char - b'0') as usize;
245
2
}
246
247
/// Get the node associated with a `NavigationPosition`.
248
/// This can be called on an intent tree 
249
9.18k
fn get_node_by_id<'a>(mathml: Element<'a>, pos: &NavigationPosition) -> Option<Element<'a>> {
250
9.18k
    if let Some(
mathml_id9.17k
) = mathml.attribute_value("id") &&
251
9.17k
       mathml_id == pos.current_node.as_str() &&
252
1.46k
        (crate::xpath_functions::is_leaf(mathml) || 
253
537
        mathml.attribute_value(ID_OFFSET).unwrap_or("0") == pos.current_node_offset.to_string()) {
254
1.46k
        return Some(mathml);
255
7.71k
    }
256
257
10.0k
    for child in 
mathml7.71k
.
children7.71k
() {
258
10.0k
        if let Some(
child7.71k
) = child.element() &&
259
7.71k
           let Some(
found4.41k
) = get_node_by_id(child, pos) {
260
4.41k
                return Some(found);
261
5.60k
            }
262
    }
263
3.29k
    return None;
264
9.18k
}
265
266
/// Search the mathml for the id and set the navigation node to that id
267
/// Resets the navigation stack
268
2
pub fn set_navigation_node_from_id(mathml: Element, id: &str, offset: usize) -> Result<()> {
269
2
    let current_node = id.to_string();
270
2
    let pos = NavigationPosition { current_node: current_node.clone(), current_node_offset: offset };
271
2
    let node = get_node_by_id(mathml, &pos);
272
2
    if node.is_some() {
273
2
        return NAVIGATION_STATE.with(|nav_state| {
274
2
            let mut nav_state = nav_state.borrow_mut();
275
2
            nav_state.reset();
276
2
            nav_state.push(NavigationPosition{
277
2
                current_node,
278
2
                current_node_offset: offset
279
2
            }, "None");
280
2
            return Ok( () );
281
2
        })
282
    } else {
283
0
        bail!("Id {} not found in MathML {}", id, mml_to_string(mathml));
284
    }
285
2
}
286
287
/// Get's the Nav Node from the context, with some exceptions such as Toggle commands where it isn't set.
288
/// Note: mathml can be any node. It isn't really used but some Element needs to be part of Evaluate().
289
571
pub fn get_nav_node<'c>(context: &sxd_xpath::Context<'c>, var_name: &str, mathml: Element<'c>, start_node: Element<'c>, command: &str, nav_mode: &str) -> Result<String> {
290
571
    let start_id = start_node.attribute_value("id").unwrap_or_default();
291
571
    if command.starts_with("Toggle") {
292
1
        return Ok( start_id.to_string() );
293
    } else {
294
570
        return context_get_variable(context, var_name, mathml)
295
570
                .with_context(|| 
format!0
("When trying to {} starting at id={} in {} mode",
296
0
                                                command, start_node.attribute_value("id").unwrap_or_default(), nav_mode));
297
    }
298
571
}
299
300
// FIX: think of a better place to put this, and maybe a better interface
301
/// Note: mathml can be any node. It isn't really used but some Element needs to be part of Evaluate().
302
/// If the context variable has String, Number, or Boolean xpath value, return it as a string. Otherwise it is an error
303
4.55k
pub fn context_get_variable<'c>(context: &sxd_xpath::Context<'c>, var_name: &str, mathml: Element<'c>) -> Result<String> {
304
    // This is slightly roundabout because Context doesn't expose a way to get the values.
305
    // Instead, we create an "Evaluation", which is just one level of indirection.
306
    use sxd_xpath::nodeset::Node;
307
4.55k
    let evaluation = Evaluation::new(context, Node::Element(mathml));
308
4.55k
    return match evaluation.value_of(var_name.into()) {
309
4.55k
        Some(value) => match value {
310
1.74k
            Value::String(s) => Ok(s.clone()),
311
1.20k
            Value::Number(f) => Ok(f.to_string()),
312
1.09k
            Value::Boolean(b) => Ok(format!("{b}")),    // "true" or "false"
313
509
            Value::Nodeset(nodes) => {
314
509
                if nodes.size() == 1 &&
315
509
                   let Some(attr) = nodes.document_order_first().unwrap().attribute() {
316
509
                        return Ok(attr.value().to_string());
317
0
                    };
318
0
                let mut error_message = format!("Variable '{var_name}' set somewhere in navigate.yaml is nodeset and not an attribute: ");
319
0
                if nodes.size() == 0 {
320
0
                    error_message += &format!("0 nodes (false) -- {} set to non-existent node in\n{}",
321
0
                                              var_name, mml_to_string(mathml));
322
0
                } else {
323
0
                    let singular = nodes.size()==1;
324
0
                    error_message += &format!("{} node{}. {}:",
325
0
                            nodes.size(),
326
0
                            if singular {""} else {"s"},
327
0
                            if singular {"Node is"} else {"Nodes are"});
328
0
                    nodes.document_order()
329
0
                        .iter()
330
0
                        .enumerate()
331
0
                        .for_each(|(i, node)| {
332
0
                            match node {
333
0
                                sxd_xpath::nodeset::Node::Element(mathml) =>
334
0
                                    error_message += &format!("#{}:\n{}",i, mml_to_string(*mathml)),
335
0
                                _ => error_message += &format!("'{node:?}'"),
336
                            }   
337
0
                        })    
338
                };
339
0
                bail!(error_message);
340
            },
341
        },
342
0
        None => bail!("Could not find value for navigation variable '{}'", var_name),
343
    }
344
4.55k
}
345
346
/// Wrapper around context_get_variable to get an integer variable
347
1.70k
fn context_get_int_variable<'c>(context: &sxd_xpath::Context<'c>, var_name: &str, mathml: Element<'c>) -> Result<usize> {
348
1.70k
    let value = context_get_variable(context, var_name, mathml)
?0
;
349
1.70k
    return match value.parse::<usize>() {
350
1.70k
        Ok(i) => Ok(i),
351
0
        Err(e) => bail!("Could not parse navigation variable '{}' with value '{}' as integer: {}", var_name, value, e),
352
    }
353
1.70k
}
354
355
/// Given a key code along with the modifier keys, the current node is moved accordingly (or value reported in some cases).]
356
/// The spoken text for the new current node is returned.
357
0
pub fn do_mathml_navigate_key_press(mathml: Element,
358
0
            key: usize, shift_key: bool, control_key: bool, alt_key: bool, meta_key: bool) -> Result<String> {
359
0
    let (command, param) = key_press_to_command_and_param(key, shift_key, control_key, alt_key, meta_key)?;
360
0
    return do_navigate_command_and_param(mathml, command, param);
361
0
}
362
363
2
fn do_navigate_command_and_param(mathml: Element, command: NavigationCommand, param: NavigationParam) -> Result<String> {
364
2
    return do_navigate_command_string(mathml, navigation_command_string(command, param));
365
2
}
366
367
549
pub fn do_navigate_command_string(mathml: Element, nav_command: &'static str) -> Result<String> {   
368
    // first check to see if nav file has been changed -- don't bother checking in loop below
369
549
    NAVIGATION_RULES.with(|rules| {
370
549
        rules.borrow_mut().read_files()
371
549
    })
?0
;
372
373
549
    if mathml.children().is_empty() {
374
0
        bail!("MathML has not been set -- can't navigate");
375
549
    };
376
377
549
    return NAVIGATION_STATE.with(|nav_state| {
378
549
        let mut nav_state = nav_state.borrow_mut();
379
        // debug!("MathML: {}", mml_to_string(mathml));
380
549
        if nav_state.position_stack.is_empty() {
381
            // initialize to root node
382
47
            nav_state.push(NavigationPosition{
383
47
                current_node: mathml.attribute_value("id").unwrap().to_string(),
384
47
                current_node_offset: 0
385
47
            }, "None")
386
502
        };
387
388
549
        return NAVIGATION_RULES.with(|rules| {
389
549
            let rules = rules.borrow();
390
549
            let new_package = Package::new();
391
549
            let mut rules_with_context = SpeechRulesWithContext::new(&rules, new_package.as_document(), "", 0);
392
            
393
549
            nav_state.mode = rules.pref_manager.as_ref().borrow().pref_to_string("NavMode");
394
549
            nav_state.speak_overview = rules.pref_manager.as_ref().borrow().pref_to_string("Overview") == "true";
395
396
549
            nav_state.init_navigation_context(rules_with_context.get_context(), nav_command, nav_state.top());
397
            
398
            // start navigation off at the right node
399
549
            if nav_command == "MoveLastLocation" {
400
3
                nav_state.pop();
401
546
            }
402
403
            // If no speech happened for some calls, we try the call again (e.g, no speech for invisible times).
404
            // To prevent to infinite loop, we limit the number of tries
405
            const LOOP_LIMIT: usize = 3;
406
549
            let mut cumulative_speech = String::with_capacity(120);
407
569
            for loop_count in 
0..LOOP_LIMIT549
{
408
569
                match apply_navigation_rules(mathml, nav_command, &rules, &mut rules_with_context, &mut nav_state, loop_count) {
409
569
                    Ok( (speech, done)) => {
410
569
                        cumulative_speech = cumulative_speech + if loop_count==0 {
""549
} else {
" "20
} + speech.trim();
411
569
                        if done {
412
549
                            let (tts, rate) = {
413
549
                                let prefs = rules.pref_manager.borrow();
414
549
                                (prefs.pref_to_string("TTS"), prefs.pref_to_string("MathRate"))
415
549
                            };
416
549
                            if rate != "100" {
417
0
                                match tts.as_str() {
418
0
                                    "SSML"
419
0
                                        if !cumulative_speech.starts_with("<prosody rate") => {
420
0
                                            cumulative_speech = format!("<prosody rate='{}%'>{}</prosody>", &rate, &cumulative_speech);
421
0
                                        }
422
0
                                    "SAPI5"
423
0
                                        if !cumulative_speech.starts_with("<rate speed") => {
424
0
                                            cumulative_speech = format!(
425
0
                                                "<rate speed='{:.1}'>{}</rate>",
426
0
                                                10.0 * (0.01 * rate.parse::<f32>().unwrap_or(100.0)).log(3.0),
427
0
                                                cumulative_speech
428
0
                                            );
429
0
                                        }
430
0
                                    _ => (),  // do nothing
431
                                }
432
549
                            }
433
549
                                                return Ok( rules.pref_manager.borrow().get_tts()
434
549
                                            .merge_pauses(crate::speech::remove_optional_indicators(
435
549
                                                &cumulative_speech.replace(CONCAT_STRING, "")
436
549
                                                                    .replace(CONCAT_INDICATOR, "")                            
437
549
                                                            )
438
549
                                            .trim_start().trim_end_matches([' ', ',', ';'])) );
439
20
                        }
440
                    },
441
0
                    Err(e) => {
442
0
                        return Err(e);
443
                    }
444
                }
445
            }
446
0
            bail!("Internal error: Navigation exceeded limit of number of times no speech generated
447
                   when attempting to {} in {} mode start at id={} in this MathML:\n{}.",
448
0
                   nav_command, nav_state.mode, nav_state.top().unwrap().0.current_node, mml_to_string(mathml));
449
549
        });
450
549
    });
451
452
570
    fn get_start_node<'m>(mathml: Element<'m>, nav_state: &RefMut<NavigationState>) -> Result<Element<'m>>  {
453
570
        let element = match nav_state.top() {
454
            None => {
455
0
                let nav_position = NavigationPosition { current_node: mathml.attribute_value("id").unwrap().to_string(), current_node_offset: 0 };
456
0
                get_node_by_id(mathml, &nav_position)
457
            },
458
570
            Some( (position, _) ) => get_node_by_id(mathml, position),
459
        };
460
461
570
        return match element {
462
569
            Some(node) => Ok(node),
463
            None => {
464
1
                bail!("Internal Error: didn't find id/offset '{:?}' while attempting to start navigation. MathML is\n{}",
465
1
                      nav_state.top().map(|t| t.0), mml_to_string(mathml));
466
            }
467
        };
468
570
    }
469
470
471
472
569
    fn apply_navigation_rules<'c, 'm:'c>(mathml: Element<'m>, nav_command: &'static str,
473
569
            rules: &Ref<SpeechRules>, rules_with_context: &mut SpeechRulesWithContext<'c, '_, 'm>, nav_state: &mut RefMut<NavigationState>,
474
569
            loop_count: usize) -> Result<(String, bool)> {
475
        {
476
569
            let context = rules_with_context.get_context();
477
569
            context.set_variable("MatchCounter", loop_count as f64);
478
569
            nav_state.mode = context_get_variable(context, "NavMode", mathml)
?0
;
479
        }
480
481
569
        let mut add_literal = nav_state.mode == "Character";
482
569
        let (intent, nav_intent) = if add_literal {
483
206
            (mathml, mathml)
484
        } else {
485
363
            let intent = crate::speech::intent_from_mathml(mathml, rules_with_context.get_document())
?0
;
486
363
            (intent, add_fixity_children(copy_mathml(intent)))
487
        };
488
489
569
        let mut properties = "";
490
569
        if add_literal {
491
206
            properties  = mathml.attribute_value("data-intent-property").unwrap_or_default();
492
206
            if properties.contains(":literal:") {
493
0
                add_literal = false;
494
206
            } else {
495
206
                mathml.set_attribute_value("data-intent-property", (":literal:".to_string() + properties).as_str());
496
206
            };
497
363
        }
498
        // we should always find the start node.
499
        // however, if we were navigating by character, then switched the NavMode, the intent tree might not have that node in it
500
569
        let start_node = match get_start_node(nav_intent, nav_state) {
501
568
            Ok(node) => node,
502
            Err(_) => {
503
                // find the node in the other tree (probably mathml) and walk up to find a parent that has an id in both
504
1
                debug!("Could not find start_node in nav_intent -- trying other_tree");
505
1
                let other_tree = if nav_state.mode == "Character" {
nav_intent0
} else {mathml};
506
1
                let mut found_node = get_start_node(other_tree, nav_state)
?0
;
507
2
                while name(found_node) != "math" {
508
2
                    found_node = get_parent(found_node);
509
                    // debug!("found_node:\n{}", mml_to_string(found_node));
510
2
                    let temp_pos = NavigationPosition {
511
2
                        current_node: found_node.attribute_value("id").unwrap_or_default().to_string().clone(),
512
2
                        current_node_offset: found_node.attribute_value(ID_OFFSET).unwrap_or_default().parse::<usize>().unwrap_or_default(),
513
2
                    };
514
2
                    if let Some(
intent_node1
) = get_node_by_id(nav_intent, &temp_pos) {
515
1
                        found_node = intent_node;
516
1
                        break;
517
1
                    }
518
                }
519
1
                found_node
520
            }
521
        };
522
523
        // debug!("intent=\n{}", mml_to_string(intent));
524
        // debug!("nav intent=\n{}", mml_to_string(nav_intent));
525
        // debug!("start_node id={}\n{}", nav_state.top().unwrap().0.current_node.as_str(), mml_to_string(start_node));
526
        // if name(start_node) != "math" {
527
        //     let mut parent= get_parent(start_node);
528
        //     if name(parent) != "math" {
529
        //         parent = get_parent(parent);
530
        //     }
531
        //     debug!("parent or grandparent of start_node:\n{}", mml_to_string(parent));
532
        // }
533
569
        let offset = context_get_int_variable(rules_with_context.get_context(), "NavNodeOffset", intent)
?0
;
534
569
        rules_with_context.set_nav_node_offset(offset);
535
569
        debug!("starting nav_position: {}, start node ={}", 
nav_state.top()0
.
unwrap0
().0,
name0
(
start_node0
));
536
537
569
        let raw_speech_string = rules_with_context.match_pattern::<String>(start_node)
538
569
                    .context("Pattern match/replacement failure during math navigation!")
?0
;
539
569
        let speech = rules.pref_manager.borrow().get_tts()
540
569
                    .merge_pauses(crate::speech::remove_optional_indicators(
541
569
                        &raw_speech_string.replace(CONCAT_STRING, "")
542
569
                                                .replace(CONCAT_INDICATOR, "")                            
543
569
                                    )
544
569
                    .trim());
545
        // debug!("Nav Speech: {}", speech);
546
547
        // FIX: add things that need to do a speech replacement based on some marker for "where am i" and others that loop ([Speak: id])???
548
        // what else needs to be done/set???
549
550
        // transfer some values that might have been set into the prefs
551
569
        let offset = context_get_int_variable(rules_with_context.get_context(), "NavNodeOffset", intent)
?0
;
552
569
        rules_with_context.set_nav_node_offset(offset);
553
569
        let context = rules_with_context.get_context();
554
569
        nav_state.speak_overview = context_get_variable(context, "Overview", intent)
?0
== "true";
555
569
        nav_state.mode = context_get_variable(context, "NavMode", intent)
?0
;
556
569
        rules.pref_manager.as_ref().borrow_mut().set_user_prefs("NavMode", &nav_state.mode)
?0
;
557
558
569
        debug!("context value of NavNodeOffset: {:?}", 
context_get_variable0
(
context0
,
"NavNodeOffset"0
,
intent0
)
?0
);
559
569
        let nav_position = NavigationPosition {
560
569
                current_node: get_nav_node(context, "NavNode", intent, start_node, nav_command, &nav_state.mode)
?0
,
561
569
                current_node_offset: context_get_int_variable(context, "NavNodeOffset", intent)
?0
,
562
            };
563
564
        // after a command, we either read or describe the new location (part of state)
565
        // also some commands are DescribeXXX/ReadXXX, so we need to look at the commands also
566
569
        let use_read_rules = if nav_command.starts_with("Read") {
567
5
            true
568
564
        } else if nav_command.starts_with("Describe") {
569
3
            false
570
        } else {
571
561
            !nav_state.speak_overview
572
        };
573
574
569
        debug!("after match nav_position: {}", nav_position);
575
        // push the new location on the stack
576
569
        if nav_position != NavigationPosition::default() && &nav_position != nav_state.top().unwrap().0 {
577
483
            nav_state.push(nav_position.clone(), nav_command);
578
483
        
}86
579
580
569
        if nav_command.starts_with("SetPlacemarker") {
581
2
            let new_node_id = get_nav_node(context, "NavNode", intent, start_node, nav_command, &nav_state.mode)
?0
;
582
2
            nav_state.place_markers[convert_last_char_to_number(nav_command)] = NavigationPosition{
583
2
                current_node: new_node_id,
584
2
                current_node_offset: context_get_int_variable(context, "NavNodeOffset", intent)
?0
,
585
            }
586
567
        }
587
588
569
        let nav_mathml = get_node_by_id(intent, &nav_position);
589
569
        if nav_mathml.is_some() && context_get_variable(context, "SpeakExpression", intent)
?0
== "true" {
590
            // Speak/Overview of where we landed (if we are supposed to speak it) -- use intent, not nav_intent
591
            // Note: NavMode might have changed, so we need to recheck the mode to see if we use LiteralSpeak
592
519
            let literal_speak = nav_state.mode == "Character";
593
519
            let node_speech_result = speak(mathml, intent, &nav_position, literal_speak, use_read_rules);
594
519
            remove_literal_property(mathml, add_literal, properties);
595
519
            let node_speech = match node_speech_result {
596
519
                Ok(speech) => speech,
597
0
                Err(e) => {
598
0
                    if e.to_string() == crate::speech::NAV_NODE_SPEECH_NOT_FOUND {
599
0
                        bail!("Internal error: With {}/{} in {} mode, can't {} from expression with id '{}' inside:\n{}",
600
0
                              rules.pref_manager.as_ref().borrow().pref_to_string("Language"),
601
0
                              rules.pref_manager.as_ref().borrow().pref_to_string("SpeechStyle"),
602
0
                              &nav_state.mode, nav_command, &nav_position.current_node, mml_to_string(if literal_speak {mathml} else {intent}));
603
0
                    }
604
0
                    return Err(e);
605
                }
606
            };
607
608
            // debug!("node_speech: '{}', speech: '{}'\n", node_speech, speech);
609
519
            if node_speech.is_empty() {
610
                // try again in loop
611
20
                return Ok( (speech, false));
612
            } else {
613
499
                pop_stack(nav_state, loop_count, nav_command);
614
                // debug!("returning: '{}'", speech.clone() + " " + &node_speech);
615
499
                return Ok( (speech + " " + &node_speech, true) );
616
            }
617
        } else {
618
50
            remove_literal_property(mathml, add_literal, properties);
619
50
            pop_stack(nav_state, loop_count, nav_command);
620
50
            return Ok( (speech, true) );
621
        };
622
623
569
        fn remove_literal_property(mathml: Element, add_literal: bool, properties: &str) {
624
569
            if add_literal {
625
206
                if properties.is_empty() {
626
206
                    mathml.remove_attribute("data-intent-property");
627
206
                } else {
628
0
                    mathml.set_attribute_value("data-intent-property", properties);
629
0
                }
630
363
            }
631
569
        }
632
633
569
    }
634
635
636
549
    fn pop_stack(nav_state: &mut NavigationState, count: usize, nav_command: &'static str) {
637
        // save the final state and pop the intermediate states that did nothing
638
549
        let push_command_on_stack = (nav_command.starts_with("Move") && 
nav_command != "MoveLastLocation"355
) ||
nav_command197
.
starts_with197
("Zoom");
639
        // debug!("pop_stack: nav_command={}, count={}, push? {} stack=\n{}", nav_command, count, push_command_on_stack, nav_state);
640
549
        if count == 0 {
641
529
            if !push_command_on_stack && 
nav_command13
==
nav_state13
.top().unwrap().1 {
642
3
                nav_state.pop();    // remove ReadXXX, SetPlacemarker, etc. commands that don't change the state
643
526
            }
644
529
            return;
645
20
        }
646
20
        let (top_position, top_command) = nav_state.pop().unwrap();
647
20
        let mut count = count - 1;
648
        loop {
649
            // debug!("  ... loop count={}", count);
650
20
            nav_state.pop();
651
20
            if count == 0 {
652
20
                break;
653
0
            };
654
0
            count -= 1;
655
        };
656
20
        if push_command_on_stack {
657
19
            nav_state.push(top_position, top_command);
658
19
        
}1
659
        // debug!("END pop_stack: stack=\n{}", nav_state);
660
549
    }
661
549
}
662
663
/// Speak the intent tree at the nav_node_id if that id exists in the intent tree; otherwise use the mathml tree.
664
/// If full_read is true, we speak the tree, otherwise we use the overview rules.
665
/// If literal_speak is true, we use the literal speak rules (and use the mathml tree).
666
519
fn speak(mathml: Element, intent: Element, nav_position: &NavigationPosition, literal_speak: bool, full_read: bool) -> Result<String> {
667
519
    if full_read {
668
        // In something like x^3, we might be looking for the '3', but it will be "cubed", so we don't find it.
669
        // Or we might be on a "(" surrounding a matrix and that isn't part of the intent
670
        // We are probably safer in terms of getting the same speech if we retry intent starting at the nav node,
671
        //  but the node to speak is almost certainly trivial.
672
        // By speaking the non-intent tree, we are certain to speak on the next try
673
505
        if !literal_speak && 
get_node_by_id327
(intent, nav_position).
is_some327
() {
674
                // debug!("speak: nav_node_id={}, intent=\n{}", nav_node_id, mml_to_string(intent));
675
327
            match crate::speech::speak_mathml(intent, &nav_position.current_node, nav_position.current_node_offset) {
676
326
                Ok(speech) => return Ok(speech),
677
1
                Err(e) => {
678
1
                    if e.to_string() != crate::speech::NAV_NODE_SPEECH_NOT_FOUND {
679
0
                        return Err(e);
680
1
                    }
681
                    // else could be something like '3' in 'x^3' ("cubed")
682
                },
683
            }
684
178
        }
685
        // debug!("speak (literal): nav_node_id={}, mathml=\n{}", nav_node_id, mml_to_string(mathml));
686
179
        let speech = crate::speech::speak_mathml(mathml,
687
179
                &nav_position.current_node, nav_position.current_node_offset);
688
        // debug!("speech from speak: {:?}", speech);
689
179
        return speech;
690
    } else {
691
14
        return crate::speech::overview_mathml(mathml, &nav_position.current_node, nav_position.current_node_offset);
692
    }
693
519
}
694
695
696
// MathPlayer's interface mentions these, so we keep them.
697
// These (KeyboardEvent.keyCode) are consistent across platforms (mostly?) but are deprecated.
698
//   KeyboardEvent.code is recommended instead (a string)
699
const VK_LEFT: usize = 0x25;
700
const VK_RIGHT: usize = 0x27;
701
const VK_UP: usize = 0x26;
702
const VK_DOWN: usize = 0x28;
703
const VK_RETURN: usize = 0x0D;
704
const VK_SPACE: usize = 0x20;
705
const VK_HOME: usize = 0x24;
706
const VK_END: usize = 0x23;
707
const VK_BACK: usize = 0x08;
708
const VK_ESCAPE: usize = 0x1B;
709
710
// Utilities that returns one of four commands/params based on shift/control key combinations
711
712
enum NavigationCommand {
713
    Move,
714
    Zoom,
715
    MoveLastLocation,
716
    Read,
717
    Describe,
718
    ReadTo,
719
    Locate,
720
    ChangeNavMode,
721
    ToggleSpeakMode,
722
    SetPlacemarker,
723
    Exit,
724
    Last,
725
}
726
727
#[derive(PartialEq, PartialOrd, Clone, Copy)]
728
enum NavigationParam {
729
    Placemarker0,
730
    Placemarker1,
731
    Placemarker2,
732
    Placemarker3,
733
    Placemarker4,
734
    Placemarker5,
735
    Placemarker6,
736
    Placemarker7,
737
    Placemarker8,
738
    Placemarker9,
739
    Previous,
740
    Current,
741
    Next,
742
    Start,
743
    End,
744
    LineStart,
745
    LineEnd,
746
    CellPrevious,
747
    CellCurrent,
748
    CellNext,
749
    ColStart,
750
    ColEnd,
751
    CellUp,
752
    CellDown,
753
    Last 
754
}
755
756
757
0
fn choose_command(
758
0
  shift_key: bool,
759
0
  control_key: bool,
760
0
  none: NavigationCommand,
761
0
  shift: NavigationCommand,
762
0
  control: NavigationCommand,
763
0
  shift_control: NavigationCommand
764
0
) -> NavigationCommand {
765
0
     if shift_key && control_key {
766
0
    return shift_control;
767
0
    } else if control_key {
768
0
        return control;
769
0
    } else if shift_key {
770
0
    return shift;
771
  } else {
772
0
    return none;
773
    }
774
0
}
775
776
0
fn choose_param(
777
0
  shift_key: bool,
778
0
  control_key: bool,
779
0
  none: NavigationParam,
780
0
  shift: NavigationParam,
781
0
  control: NavigationParam,
782
0
  shift_control: NavigationParam
783
0
) -> NavigationParam {
784
0
    if shift_key && control_key {
785
0
    return shift_control;
786
0
    } else if control_key {
787
0
        return control;
788
0
    } else if shift_key {
789
0
    return shift;
790
  } else {
791
0
    return none;
792
    }
793
0
}
794
795
0
fn key_press_to_command_and_param(
796
0
    key: usize,
797
0
  shift_key: bool,
798
0
  control_key: bool,
799
0
  alt_key: bool,
800
0
  meta_key: bool,
801
0
) -> Result<(NavigationCommand, NavigationParam)> {
802
  // key press mapping should probably be stored externally (registry) with an app that allows changes
803
  // for now, we build in the defaults
804
805
    // this is a hack to map alt+ctl+arrow to ctl+arrow to change table mappings (github.com/NSoiffer/MathCAT/issues/105)
806
    // if this change sticks, choose_command() needs to be changed and this hack should go away
807
0
    let mut alt_key = alt_key;
808
0
    if alt_key && control_key && [VK_LEFT, VK_RIGHT, VK_UP, VK_DOWN].contains(&key) {
809
0
        alt_key = false;
810
0
    }
811
0
  if alt_key || meta_key {
812
0
        bail!("Invalid argument to key_press_to_command_and_param");
813
0
    }
814
815
    let command;
816
    let param;
817
0
  match key {
818
0
        VK_LEFT => {
819
0
            command = choose_command(shift_key, control_key, NavigationCommand::Move,   NavigationCommand::Read, NavigationCommand::Move,     NavigationCommand::Describe);
820
0
            param =   choose_param(  shift_key, control_key, NavigationParam::Previous, NavigationParam::Previous, NavigationParam::CellPrevious, NavigationParam::Previous);
821
0
            },
822
0
        VK_RIGHT => {
823
0
            command = choose_command(shift_key, control_key, NavigationCommand::Move, NavigationCommand::Read, NavigationCommand::Move,    NavigationCommand::Describe);
824
0
            param =   choose_param(  shift_key, control_key, NavigationParam::Next, NavigationParam::Next, NavigationParam::CellNext, NavigationParam::Next);
825
0
            },
826
0
        VK_UP => {
827
0
            command = choose_command(shift_key, control_key, NavigationCommand::Zoom,      NavigationCommand::ChangeNavMode, NavigationCommand::Move,   NavigationCommand::Zoom);
828
0
            param =   choose_param(  shift_key, control_key, NavigationParam::Previous,  NavigationParam::Previous,      NavigationParam::CellUp, NavigationParam::Start);
829
0
            },
830
0
        VK_DOWN => {
831
0
            command = choose_command(shift_key, control_key, NavigationCommand::Zoom, NavigationCommand::ChangeNavMode, NavigationCommand::Move,     NavigationCommand::Zoom);
832
0
            param =   choose_param(  shift_key, control_key, NavigationParam::Next, NavigationParam::Next,          NavigationParam::CellDown, NavigationParam::End);
833
0
            },
834
0
        VK_RETURN => {
835
0
            command = choose_command(shift_key, control_key, NavigationCommand::Locate,  NavigationCommand::Last, NavigationCommand::Locate, NavigationCommand::Last);
836
0
            param =   choose_param(  shift_key, control_key, NavigationParam::Previous,NavigationParam::Last, NavigationParam::Last,    NavigationParam::Last);
837
0
            },
838
0
        VK_SPACE => {
839
0
            command = choose_command(shift_key, control_key, NavigationCommand::Read,   NavigationCommand::ToggleSpeakMode,    NavigationCommand::Read,        NavigationCommand::Describe);
840
0
            param =   choose_param(  shift_key, control_key, NavigationParam::Current, NavigationParam::Last,                NavigationParam::CellCurrent, NavigationParam::Current);
841
0
            },
842
    
843
0
        VK_HOME => {
844
0
            command = choose_command(shift_key, control_key, NavigationCommand::Move, NavigationCommand::Move,    NavigationCommand::Move,      NavigationCommand::ReadTo);
845
0
            param =   choose_param(  shift_key, control_key, NavigationParam::Start,NavigationParam::ColStart, NavigationParam::LineStart, NavigationParam::Start);
846
0
            },
847
0
        VK_END => {
848
0
            command = choose_command(shift_key, control_key, NavigationCommand::Move, NavigationCommand::Move,   NavigationCommand::Move,    NavigationCommand::ReadTo);
849
0
            param =   choose_param(  shift_key, control_key, NavigationParam::End,  NavigationParam::ColEnd, NavigationParam::LineEnd, NavigationParam::End);
850
0
            },
851
0
        VK_BACK => {
852
0
            command = NavigationCommand::MoveLastLocation;
853
0
            param = NavigationParam::Last;
854
0
            },
855
0
        VK_ESCAPE => {
856
0
            command = NavigationCommand::Exit;
857
0
            param = NavigationParam::Last;
858
0
            },
859
0
        0x30..=0x39 => {  // '0' ... '9'
860
0
            command = choose_command(shift_key, control_key, NavigationCommand::Move, NavigationCommand::Read, NavigationCommand::SetPlacemarker, NavigationCommand::Describe);
861
            static PLACE_MARKER: &[NavigationParam] = &[
862
                NavigationParam::Placemarker0,
863
                NavigationParam::Placemarker1,
864
                NavigationParam::Placemarker2,
865
                NavigationParam::Placemarker3,
866
                NavigationParam::Placemarker4,
867
                NavigationParam::Placemarker5,
868
                NavigationParam::Placemarker6,
869
                NavigationParam::Placemarker7,
870
                NavigationParam::Placemarker8,
871
                NavigationParam::Placemarker9,
872
            ];
873
0
            param = PLACE_MARKER[key-0x30];
874
        },
875
0
        _ => bail!("Unknown key press/command"),
876
    };
877
    
878
0
  return Ok( (command, param) );
879
0
}
880
881
// translate the key presses into commands
882
883
884
2
fn navigation_command_string(command: NavigationCommand, param: NavigationParam) -> &'static str {
885
2
  match command {
886
      NavigationCommand::Move => {
887
1
            return match param {
888
0
                NavigationParam::Previous => "MovePrevious",
889
0
                NavigationParam::Next => "MoveNext",
890
1
                NavigationParam::Start => "MoveStart",
891
0
                NavigationParam::End => "MoveEnd",
892
0
                NavigationParam::LineStart => "MoveLineStart",
893
0
                NavigationParam::LineEnd => "MoveLineEnd",
894
0
                NavigationParam::CellPrevious => "MoveCellPrevious",
895
0
                NavigationParam::CellNext => "MoveCellNext",
896
0
                NavigationParam::CellUp => "MoveCellUp",
897
0
                NavigationParam::CellDown => "MoveCellDown",
898
0
                NavigationParam::ColStart => "MoveColumnStart",
899
0
                NavigationParam::ColEnd => "MoveColumnEnd",
900
                _ => {
901
0
                    if param < NavigationParam::Placemarker0 || param > NavigationParam::Placemarker9 {
902
0
                        panic!("Internal Error: Found illegal value for param of NavigationCommand::Move");
903
0
                    }
904
                    static MOVE_TO: &[&str] = &["MoveTo0","MoveTo1","MoveTo2","MoveTo3","MoveTo4","MoveTo5","MoveTo6","MoveTo7","MoveTo8","MoveTo9"];
905
0
                    return MOVE_TO[(param as usize) - (NavigationParam::Placemarker0 as usize)];
906
                }
907
            }
908
        },
909
        NavigationCommand::Zoom => {
910
1
            return match param {
911
0
                NavigationParam::Next => "ZoomIn",
912
1
                NavigationParam::Previous => "ZoomOut",
913
0
                NavigationParam::Start => "ZoomOutAll",
914
0
                NavigationParam::End => "ZoomInAll",
915
0
                _  => panic!("Illegal param for NavigationCommand::Zoom"),
916
            }
917
        },
918
        NavigationCommand::MoveLastLocation => {
919
0
            return "MoveLastLocation";
920
        },
921
        NavigationCommand::Read => {
922
0
            return match param {
923
0
                NavigationParam::Previous => "ReadPrevious",
924
0
                NavigationParam::Next => "ReadNext",
925
0
                NavigationParam::Current => "ReadCurrent",
926
0
                NavigationParam::CellCurrent => "ReadCellCurrent",
927
0
                NavigationParam::Start => "ReadStart",
928
0
                NavigationParam::End => "ReadEnd",
929
0
                NavigationParam::LineStart => "ReadLineStart",
930
0
                NavigationParam::LineEnd => "ReadLineEnd",
931
                _ => {
932
0
                    if param < NavigationParam::Placemarker0 || param > NavigationParam::Placemarker9 {
933
0
                        panic!("Internal Error: Found illegal value for param of NavigationCommand::Move");
934
0
                    }
935
                    static READ_PLACE_MARKERS: &[&str] = &["Read0","Read1","Read2","Read3","Read4","Read5","Read6","Read7","Read8","Read9"];
936
0
                    return READ_PLACE_MARKERS[(param as usize) - (NavigationParam::Placemarker0 as usize)];
937
                },
938
            }
939
        },
940
        NavigationCommand::Describe => {
941
0
            return match param {
942
0
                NavigationParam::Previous => "DescribePrevious",
943
0
                NavigationParam::Next => "DescribeNext",
944
0
                NavigationParam::Current => "DescribeCurrent",
945
                _ => {
946
0
                    if param < NavigationParam::Placemarker0 || param > NavigationParam::Placemarker9 {
947
0
                        panic!("Internal Error: Found illegal value for param of NavigationCommand::Describe");
948
0
                    }
949
                    static DESCRIBE_PLACE_MARKERS: &[&str] = &["Describe0","Describe1","Describe2","Describe3","Describe4","Describe5","Describe6","Describe7","Describe8","Describe9"];
950
0
                    return DESCRIBE_PLACE_MARKERS[(param as usize) - (NavigationParam::Placemarker0 as usize)];
951
                }
952
            }
953
        },
954
        NavigationCommand::ReadTo => {
955
0
            todo!("ReadTo navigation command")
956
        },
957
        NavigationCommand::Locate => {
958
0
            if param ==NavigationParam::Previous {
959
0
                return "WhereAmI";
960
0
            } else if param ==NavigationParam::Last {
961
0
                return "WhereAmIAll";
962
0
            }
963
        },
964
        NavigationCommand::ChangeNavMode => {
965
0
            if param ==NavigationParam::Previous {
966
0
                return "ToggleZoomLockUp";
967
0
            } else if param ==NavigationParam::Next {
968
0
                return "ToggleZoomLockDown";
969
0
            }
970
        },
971
        NavigationCommand::ToggleSpeakMode => {
972
0
            return "ToggleSpeakMode";
973
        },
974
        NavigationCommand::SetPlacemarker => {
975
0
            if param < NavigationParam::Placemarker0 || param > NavigationParam::Placemarker9 {
976
0
                panic!("Internal Error: Found illegal value for param of NavigationCommand::SetPlacemarker");
977
0
            }
978
            static SET_PLACE_MARKER: &[&str] = &["SetPlacemarker0","SetPlacemarker1","SetPlacemarker2","SetPlacemarker3","SetPlacemarker4","SetPlacemarker5","SetPlacemarker6","SetPlacemarker7","SetPlacemarker8","SetPlacemarker9"];
979
0
            return SET_PLACE_MARKER[(param as usize) - (NavigationParam::Placemarker0 as usize)];
980
        },
981
        NavigationCommand::Exit => {
982
0
            return "Exit";
983
        },
984
        NavigationCommand::Last => {
985
0
            return "Error";
986
        }
987
    };
988
0
    return "Error";
989
2
}
990
991
#[cfg(test)]
992
mod tests {
993
    use super::*;
994
    #[allow(unused_imports)]
995
    use crate::init_logger;
996
    use crate::interface::*;
997
998
    #[cfg(test)]
999
    /// Assert if result_id != '' and it doesn't match the id of the result of the move
1000
    /// Returns the speech from the command
1001
547
    fn test_command(command: &'static str, mathml: Element, result_id: &str) -> String {
1002
        // debug!("\nCommand: {}", command);
1003
547
        NAVIGATION_STATE.with(|nav_stack| {
1004
547
            let (start_id, _) = nav_stack.borrow().get_navigation_mathml_id(mathml);
1005
547
            match do_navigate_command_string(mathml, command) {
1006
0
                Err(e) => {
1007
0
                    panic!("\nStarting at '{}', '{} failed.\n{}",
1008
0
                                        start_id, command, &crate::interface::errors_to_string(&e))
1009
                },
1010
547
                Ok(nav_speech) => {
1011
547
                    let nav_speech = nav_speech.trim_end_matches(&[' ', ',', ';']);
1012
                    // debug!("Full speech: {}", nav_speech);
1013
547
                    if !result_id.is_empty() {
1014
547
                        let (id, _) = nav_stack.borrow().get_navigation_mathml_id(mathml);
1015
547
                        assert_eq!(result_id, id, "\nStarting at '{}', '{} failed.", start_id, command);
1016
0
                    }
1017
547
                    return nav_speech.to_string();
1018
                }
1019
            };
1020
547
        })
1021
547
    }
1022
1023
56
    fn init_default_prefs(mathml: &str, nav_mode_default: &str) {
1024
56
        set_rules_dir(super::super::abs_rules_dir_path()).unwrap();
1025
56
        set_preference("NavMode", nav_mode_default).unwrap();
1026
56
        set_preference("NavVerbosity", "Verbose").unwrap();
1027
56
        set_preference("AutoZoomOut", "True").unwrap();
1028
56
        set_preference("Language", "en").unwrap();
1029
56
        set_preference("SpeechStyle", "SimpleSpeak").unwrap();
1030
56
        set_preference("Verbosity", "Medium").unwrap();
1031
56
        set_preference("Overview", "False").unwrap();
1032
56
        set_mathml(mathml).unwrap();
1033
56
    }
1034
1035
    #[test]
1036
1
    fn zoom_in() -> Result<()> {
1037
1
        let mathml_str = "<math id='math'><mfrac id='mfrac'>
1038
1
                <msup id='msup'><mi id='base'>b</mi><mn id='exp'>2</mn></msup>
1039
1
                <mi id='denom'>d</mi>
1040
1
            </mfrac></math>";
1041
1
        init_default_prefs(mathml_str, "Enhanced");
1042
1
        return MATHML_INSTANCE.with(|package_instance| {
1043
1
            let package_instance = package_instance.borrow();
1044
1
            let mathml = get_element(&package_instance);
1045
1
            test_command("ZoomIn", mathml, "msup");
1046
1
            test_command("ZoomIn", mathml, "base");
1047
1
            test_command("ZoomIn", mathml, "base");
1048
1
            return Ok( () );
1049
1
        });
1050
1
    }
1051
1052
    #[test]
1053
1
    fn test_init_navigate_move_right() -> Result<()> {
1054
        // this is how navigation typically starts up
1055
1
        let mathml_str = " <math display='block' id='id-0'>
1056
1
            <mrow id='id-1'>
1057
1
                <msup id='msup'><mi id='base'>b</mi><mn id='exp'>2</mn></msup>
1058
1
                <mo id='id-3'>=</mo>
1059
1
                <mrow id='id-4'>
1060
1
                    <mi id='id-5'>a</mi>
1061
1
                    <mo id='id-6'>-</mo>
1062
1
                    <mn id='id-7'>2</mn>
1063
1
                </mrow>
1064
1
            </mrow>
1065
1
        </math>";
1066
1
        init_default_prefs(mathml_str, "Enhanced");
1067
1
        debug!("--- Enhanced ---");
1068
1
        MATHML_INSTANCE.with(|package_instance| {
1069
1
            let package_instance = package_instance.borrow();
1070
1
            let mathml = get_element(&package_instance);
1071
1
            test_command("ZoomIn", mathml, "msup");
1072
1
            test_command("MoveNext", mathml, "id-3");
1073
1
        });
1074
1075
1
        init_default_prefs(mathml_str, "Simple");
1076
1
        debug!("--- Simple ---");
1077
1
        MATHML_INSTANCE.with(|package_instance: &RefCell<Package>| {
1078
1
            let package_instance = package_instance.borrow();
1079
1
            let mathml = get_element(&package_instance);
1080
1
            test_command("ZoomIn", mathml, "msup");
1081
1
            test_command("MoveNext", mathml, "id-3");
1082
1
        });
1083
        
1084
1
        init_default_prefs(mathml_str, "Character");
1085
1
        debug!("--- Character ---");
1086
1
        MATHML_INSTANCE.with(|package_instance| {
1087
1
            let package_instance = package_instance.borrow();
1088
1
            let mathml = get_element(&package_instance);
1089
1
            test_command("ZoomIn", mathml, "base");
1090
1
            test_command("MoveNext", mathml, "exp");
1091
1
        });
1092
1
        return Ok( () );
1093
1
    }
1094
    
1095
    #[test]
1096
1
    fn zoom_in_parens() -> Result<()> {
1097
        // (a+b)(c+d) + 1
1098
1
        let mathml_str = " <math display='block' id='id-0'>
1099
1
            <mrow id='id-1'>
1100
1
                <mrow id='id-2'>
1101
1
                    <mrow id='id-3'>
1102
1
                    <mo stretchy='false' id='id-4'>(</mo>
1103
1
                    <mrow id='id-5'>
1104
1
                        <mi id='id-6'>a</mi>
1105
1
                        <mo id='id-7'>+</mo>
1106
1
                        <mi id='id-8'>b</mi>
1107
1
                    </mrow>
1108
1
                    <mo stretchy='false' id='id-9'>)</mo>
1109
1
                    </mrow>
1110
1
                    <mo id='id-10'>&#x2062;</mo>
1111
1
                    <mrow id='id-11'>
1112
1
                    <mo stretchy='false' id='id-12'>(</mo>
1113
1
                    <mrow id='id-13'>
1114
1
                        <mi id='id-14'>c</mi>
1115
1
                        <mo id='id-15'>+</mo>
1116
1
                        <mi id='id-16'>d</mi>
1117
1
                    </mrow>
1118
1
                    <mo stretchy='false' id='id-17'>)</mo>
1119
1
                    </mrow>
1120
1
                </mrow>
1121
1
                <mo id='id-18'>+</mo>
1122
1
                <mn id='id-19'>1</mn>
1123
1
            </mrow>
1124
1
        </math>";
1125
1
        init_default_prefs(mathml_str, "Enhanced");
1126
1
        return MATHML_INSTANCE.with(|package_instance| {
1127
1
            let package_instance = package_instance.borrow();
1128
1
            let mathml = get_element(&package_instance);
1129
1
            set_preference("NavMode", "Enhanced")
?0
;
1130
1
            debug!("\n------EnhancedMode----------");
1131
1
            test_command("ZoomIn", mathml, "id-2");
1132
1
            test_command("ZoomIn", mathml, "id-5");
1133
1
            test_command("ZoomIn", mathml, "id-6");
1134
            
1135
            // repeat, but this time with "Simple
1136
1
            set_preference("NavMode", "Simple")
?0
;
1137
1
            debug!("\n------SimpleMode----------");
1138
1
            test_command("ZoomOutAll", mathml, "id-1");
1139
1
            test_command("ZoomIn", mathml, "id-4");
1140
1
            test_command("ZoomIn", mathml, "id-4");
1141
1
            return Ok( () );
1142
1
        });
1143
1
    }
1144
    
1145
    #[test]
1146
1
    fn zoom_in_all() -> Result<()> {
1147
1
        let mathml_str = "<math id='math'><mfrac id='mfrac'>
1148
1
                <msup id='msup'><mi id='base'>b</mi><mn id='exp'>2</mn></msup>
1149
1
                <mi id='denom'>d</mi>
1150
1
            </mfrac></math>";
1151
1
        init_default_prefs(mathml_str, "Enhanced");
1152
1
        return MATHML_INSTANCE.with(|package_instance| {
1153
1
            let package_instance = package_instance.borrow();
1154
1
            let mathml = get_element(&package_instance);
1155
1
            test_command("ZoomInAll", mathml, "base");
1156
1
            return Ok( () );
1157
1
        });
1158
1
    }
1159
1160
    
1161
    #[test]
1162
1
    fn zoom_out() -> Result<()> {
1163
1
        let mathml_str = "<math id='math'><mfrac id='mfrac'>
1164
1
                <msup id='msup'><mi id='base'>b</mi><mn id='exp'>2</mn></msup>
1165
1
                <mi id='denom'>d</mi>
1166
1
            </mfrac></math>";
1167
1
            init_default_prefs(mathml_str, "Enhanced");
1168
1
            return MATHML_INSTANCE.with(|package_instance| {
1169
1
            let package_instance = package_instance.borrow();
1170
1
            let mathml = get_element(&package_instance);
1171
1
            NAVIGATION_STATE.with(|nav_stack| {
1172
1
                nav_stack.borrow_mut().push(NavigationPosition{
1173
1
                    current_node: "base".to_string(),
1174
1
                    current_node_offset: 0
1175
1
                }, "None")
1176
1
            });
1177
1
            test_command("ZoomOut", mathml, "msup");
1178
1179
1
            let _nav_speech = do_navigate_command_and_param(mathml, NavigationCommand::Zoom, NavigationParam::Previous)
?0
;
1180
1
            NAVIGATION_STATE.with(|nav_stack| {
1181
1
                let (id, _) = nav_stack.borrow().get_navigation_mathml_id(mathml);
1182
1
                assert_eq!(id, "mfrac");
1183
1
            });
1184
1
            return Ok( () );
1185
1
        });
1186
1
    }
1187
    
1188
    #[test]
1189
1
    fn zoom_out_all() -> Result<()> {
1190
1
        let mathml_str = "<math id='math'><mfrac id='mfrac'>
1191
1
                <msup id='msup'><mi id='base'>b</mi><mn id='exp'>2</mn></msup>
1192
1
                <mi id='denom'>d</mi>
1193
1
            </mfrac></math>";
1194
1
            init_default_prefs(mathml_str, "Enhanced");
1195
1
            return MATHML_INSTANCE.with(|package_instance| {
1196
1
            let package_instance = package_instance.borrow();
1197
1
            let mathml = get_element(&package_instance);
1198
1
            NAVIGATION_STATE.with(|nav_stack| {
1199
1
                nav_stack.borrow_mut().push(NavigationPosition{
1200
1
                    current_node: "base".to_string(),
1201
1
                    current_node_offset: 0
1202
1
                }, "None")
1203
1
            });
1204
1205
1
            test_command("ZoomOutAll", mathml, "mfrac");
1206
1
            return Ok( () );
1207
1
        });
1208
1
    }
1209
    
1210
    #[test]
1211
1
    fn move_start_end() -> Result<()> {
1212
1
        let mathml_str = " <math display='block' id='id-0'>
1213
1
        <mrow id='id-1'>
1214
1
          <mi id='id-2'>x</mi>
1215
1
          <mo id='id-3'>=</mo>
1216
1
          <mrow id='id-4'>
1217
1
            <mi id='id-5'>a</mi>
1218
1
            <mo id='id-6'>-</mo>
1219
1
            <mn id='id-7'>2</mn>
1220
1
          </mrow>
1221
1
        </mrow>
1222
1
       </math>";
1223
1
       init_default_prefs(mathml_str, "Enhanced");
1224
1
       return MATHML_INSTANCE.with(|package_instance| {
1225
1
            let package_instance = package_instance.borrow();
1226
1
            let mathml = get_element(&package_instance);
1227
1
            NAVIGATION_STATE.with(|nav_stack| {
1228
1
                nav_stack.borrow_mut().push(NavigationPosition{
1229
1
                    current_node: "id-4".to_string(),
1230
1
                    current_node_offset: 0
1231
1
                }, "None")
1232
1
            });
1233
1234
1
           set_preference("NavMode", "Character")
?0
;
1235
1
            test_command("MoveStart", mathml, "id-2");
1236
1
            test_command("MoveEnd", mathml, "id-7");
1237
1
           set_preference("NavMode", "Simple")
?0
;
1238
1
            test_command("MoveStart", mathml, "id-2");
1239
1
            test_command("MoveEnd", mathml, "id-7");
1240
1
           set_preference("NavMode", "Enhanced")
?0
;
1241
1
            test_command("MoveStart", mathml, "id-2");
1242
1
            test_command("MovePrevious", mathml, "id-2");
1243
1
            test_command("MoveEnd", mathml, "id-4");
1244
1
            test_command("MoveNext", mathml, "id-4");
1245
1
            return Ok( () );
1246
1
        });
1247
1
    }
1248
    
1249
    #[test]
1250
1
    fn move_line_start_end() -> Result<()> {
1251
1
        let mathml_str = " <math display='block' id='id-0'>
1252
1
        <mfrac displaystyle='true' id='id-1'>
1253
1
          <mi id='id-2'>x</mi>
1254
1
          <mrow id='id-3'>
1255
1
            <msup id='id-4'>
1256
1
              <mi id='id-5'>y</mi>
1257
1
              <mn id='id-6'>2</mn>
1258
1
            </msup>
1259
1
            <mo id='id-7'>+</mo>
1260
1
            <mn id='id-8'>1</mn>
1261
1
          </mrow>
1262
1
        </mfrac>
1263
1
       </math>";
1264
1
       init_default_prefs(mathml_str, "Enhanced");
1265
1
       return MATHML_INSTANCE.with(|package_instance| {
1266
1
            let package_instance = package_instance.borrow();
1267
1
            let mathml = get_element(&package_instance);
1268
1
            NAVIGATION_STATE.with(|nav_stack| {
1269
1
                nav_stack.borrow_mut().push(NavigationPosition{
1270
1
                    current_node: "id-7".to_string(),
1271
1
                    current_node_offset: 0
1272
1
                }, "None")
1273
1
            });
1274
1275
1
           set_preference("NavMode", "Character")
?0
;
1276
1
            test_command("MoveLineStart", mathml, "id-5");
1277
1
            test_command("MoveLineEnd", mathml, "id-8");
1278
1
           set_preference("NavMode", "Simple")
?0
;
1279
1
            test_command("MoveLineStart", mathml, "id-4");
1280
1
            test_command("MoveLineEnd", mathml, "id-8");
1281
1
           set_preference("NavMode", "Enhanced")
?0
;
1282
1
            test_command("MoveLineStart", mathml, "id-4");
1283
1
            test_command("MoveLineEnd", mathml, "id-8");
1284
1
            test_command("MoveEnd", mathml, "id-3");
1285
1
            return Ok( () );
1286
1
        });
1287
1
    }
1288
    
1289
    #[test]
1290
1
    fn text_extremes_and_move_last_location() -> Result<()> {
1291
1
        let mathml_str = "<math id='math'><mfrac id='mfrac'>
1292
1
                <msup id='msup'><mi id='base'>b</mi><mn id='exp'>2</mn></msup>
1293
1
                <mi id='denom'>d</mi>
1294
1
            </mfrac></math>";
1295
1
            init_default_prefs(mathml_str, "Enhanced");
1296
1
            return MATHML_INSTANCE.with(|package_instance| {
1297
1
            let package_instance = package_instance.borrow();
1298
1
            let mathml = get_element(&package_instance);
1299
1
            NAVIGATION_STATE.with(|nav_stack| {
1300
1
                nav_stack.borrow_mut().push(NavigationPosition{
1301
1
                    current_node: "base".to_string(),
1302
1
                    current_node_offset: 0
1303
1
                }, "None")
1304
1
            });
1305
1306
1
            test_command("ZoomOutAll", mathml, "mfrac");
1307
1
            test_command("ZoomOut", mathml, "mfrac");
1308
1
            test_command("MoveLastLocation", mathml, "base");       // second zoom out should do nothing
1309
1310
1
            test_command("ZoomOut", mathml, "msup");
1311
1
            test_command("ZoomInAll", mathml, "base");
1312
1
            test_command("ZoomIn", mathml, "base");
1313
1
            test_command("MoveLastLocation", mathml, "msup");       // second zoom in should do nothing
1314
1315
1
            return Ok( () );
1316
1
        });
1317
1
    }
1318
    
1319
    #[test]
1320
1
    fn move_to_start() -> Result<()> {
1321
1
        let mathml_str = "<math id='math'><mfrac id='mfrac'>
1322
1
                <mrow id='num'><msup id='msup'><mi id='base'>b</mi><mn id='exp'>2</mn></msup><mo id='factorial'>!</mo></mrow>
1323
1
                <mi id='denom'>d</mi>
1324
1
            </mfrac></math>";
1325
1
            init_default_prefs(mathml_str, "Enhanced");
1326
1
            return MATHML_INSTANCE.with(|package_instance| {
1327
1
            let package_instance = package_instance.borrow();
1328
1
            let mathml = get_element(&package_instance);
1329
1
            NAVIGATION_STATE.with(|nav_stack| {
1330
1
                nav_stack.borrow_mut().push(NavigationPosition{
1331
1
                    current_node: "denom".to_string(),
1332
1
                    current_node_offset: 0
1333
1
                }, "None")
1334
1
            });
1335
1
            test_command("MoveLineStart", mathml, "denom");
1336
1337
1
            NAVIGATION_STATE.with(|nav_stack| {
1338
1
                nav_stack.borrow_mut().push(NavigationPosition{
1339
1
                    current_node: "factorial".to_string(),
1340
1
                    current_node_offset: 0
1341
1
                }, "None")
1342
1
            });
1343
1
            test_command("MoveLineStart", mathml, "msup");
1344
1345
1
            let _nav_speech = do_navigate_command_and_param(mathml, NavigationCommand::Move, NavigationParam::Start)
?0
;
1346
1
            NAVIGATION_STATE.with(|nav_stack| {
1347
1
                let (id, _) = nav_stack.borrow().get_navigation_mathml_id(mathml);
1348
1
                assert_eq!(id, "num");
1349
1
            });
1350
1
            return Ok( () );
1351
1
        });
1352
1
    }
1353
    
1354
    #[test]
1355
1
    fn move_right_sup() -> Result<()> {
1356
1
        let mathml_str = "<math display='block' id='id-0'>
1357
1
        <mrow id='id-1'>
1358
1
          <msup id='id-2'>
1359
1
            <mn id='id-3'>2</mn>
1360
1
            <mi id='id-4'>q</mi>
1361
1
          </msup>
1362
1
          <mo id='id-5'>-</mo>
1363
1
          <mi id='id-6'>x</mi>
1364
1
        </mrow>
1365
1
        </math>";
1366
1
        init_default_prefs(mathml_str, "Enhanced");
1367
1
        return MATHML_INSTANCE.with(|package_instance| {
1368
1
            let package_instance = package_instance.borrow();
1369
1
            let mathml = get_element(&package_instance);
1370
1
            NAVIGATION_STATE.with(|nav_stack| {
1371
1
                nav_stack.borrow_mut().push(NavigationPosition{
1372
1
                    current_node: "id-2".to_string(),
1373
1
                    current_node_offset: 0
1374
1
                }, "None")
1375
1
            });
1376
1
            set_preference("NavMode", "Enhanced")
?0
;
1377
1
            test_command("MoveNext", mathml, "id-5");
1378
1379
            // reset start and test Simple
1380
1
            NAVIGATION_STATE.with(|nav_stack| {
1381
1
                nav_stack.borrow_mut().push(NavigationPosition{
1382
1
                    current_node: "id-2".to_string(),
1383
1
                    current_node_offset: 0
1384
1
                }, "None")
1385
1
            });
1386
1
            set_preference("NavMode", "Simple")
?0
;
1387
1
            test_command("MoveNext", mathml, "id-5");
1388
1389
            // reset start and test Character
1390
1
            NAVIGATION_STATE.with(|nav_stack| {
1391
1
                nav_stack.borrow_mut().push(NavigationPosition{
1392
1
                    current_node: "id-3".to_string(),
1393
1
                    current_node_offset: 0
1394
1
                }, "None")
1395
1
            });
1396
1
            set_preference("NavMode", "Character")
?0
;
1397
1
            test_command("MoveNext", mathml, "id-4");
1398
1
            test_command("MoveNext", mathml, "id-5");
1399
1
            return Ok( () );
1400
1
        });
1401
1
    }
1402
1403
        
1404
    #[test]
1405
1
    fn move_msubsup_char() -> Result<()> {
1406
1
        let mathml_str = "<math display='block' id='id-0'>
1407
1
        <mrow id='id-1'>
1408
1
          <mn id='id-2'>1</mn>
1409
1
          <mo id='id-3'>+</mo>
1410
1
          <msubsup id='id-4'>
1411
1
            <mi id='id-5'>x</mi>
1412
1
            <mn id='id-6'>2</mn>
1413
1
            <mn id='id-7'>3</mn>
1414
1
          </msubsup>
1415
1
          <mo id='id-8'>+</mo>
1416
1
          <mn id='id-9'>4</mn>
1417
1
        </mrow>
1418
1
       </math>";
1419
1
        init_default_prefs(mathml_str, "Character");
1420
1
        return MATHML_INSTANCE.with(|package_instance| {
1421
1
            let package_instance = package_instance.borrow();
1422
1
            let mathml = get_element(&package_instance);
1423
1
            assert_eq!("zoomed in all of the way; 1", test_command("ZoomInAll", mathml, "id-2"));
1424
1
            assert_eq!("move right; plus", test_command("MoveNext", mathml, "id-3"));
1425
1
            assert_eq!("move right; in base; x", test_command("MoveNext", mathml, "id-5"));
1426
1
            assert_eq!("move right; in subscript; 2", test_command("MoveNext", mathml, "id-6"));
1427
1
            assert_eq!("move right; in superscript; 3", test_command("MoveNext", mathml, "id-7"));
1428
1
            assert_eq!("move right; out of superscript; plus", test_command("MoveNext", mathml, "id-8"));
1429
1
            assert_eq!("move left; in superscript; 3", test_command("MovePrevious", mathml, "id-7"));
1430
1
            assert_eq!("move left; in subscript; 2", test_command("MovePrevious", mathml, "id-6"));
1431
1
            assert_eq!("move left; in base; x", test_command("MovePrevious", mathml, "id-5"));
1432
1
            assert_eq!("move left; out of base; plus", test_command("MovePrevious", mathml, "id-3"));
1433
1434
1
            return Ok( () );
1435
1
        });
1436
1
    }
1437
        
1438
    #[test]
1439
1
    fn zoom_logbase() -> Result<()> {
1440
1
        let mathml_str = "<math display='block' id='id-0'>
1441
1
            <mrow displaystyle='true' id='id-1'>
1442
1
                <msub id='id-2'>
1443
1
                    <mi id='id-3'>log</mi>
1444
1
                    <mn id='id-4'>2</mn>
1445
1
                </msub>
1446
1
                <mo data-changed='added' id='id-5'>&#x2061;</mo>
1447
1
                <mi id='id-6'>x</mi>a
1448
1
            </mrow>
1449
1
            </math>";
1450
1
        init_default_prefs(mathml_str, "Enhanced");
1451
1
        return MATHML_INSTANCE.with(|package_instance| {
1452
1
            let package_instance = package_instance.borrow();
1453
1
            let mathml = get_element(&package_instance);
1454
1
            assert_eq!("zoom in; the log base 2", test_command("ZoomIn", mathml, "id-2"));
1455
1
            assert_eq!("zoom in; in base; 2", test_command("ZoomIn", mathml, "id-4"));
1456
1
            assert_eq!("zoomed in all of the way; 2", test_command("ZoomIn", mathml, "id-4"));
1457
1
            debug!("Now zooming out");
1458
1
            assert_eq!("zoom out; out of base; the log base 2", test_command("ZoomOut", mathml, "id-2"));
1459
1
            assert_eq!("zoom out; the log base 2, of x", test_command("ZoomOut", mathml, "id-1"));
1460
1
            assert_eq!("zoomed out all of the way; the log base 2, of x", test_command("ZoomOut", mathml, "id-1"));
1461
1
            return Ok( () );
1462
1
        });
1463
1
    }
1464
        
1465
    #[test]
1466
1
    fn zoom_logbase_power() -> Result<()> {
1467
1
        let mathml_str = "<math display='block' id='id-0'>
1468
1
            <mrow displaystyle='true' id='id-1'>
1469
1
                <msubsup id='id-2'>
1470
1
                    <mi id='id-3'>log</mi>
1471
1
                    <mn id='id-4'>2</mn>
1472
1
                    <mn id='id-5'>3</mn>
1473
1
                </msubsup>
1474
1
                <mo data-changed='added' id='id-6'>&#x2061;</mo>
1475
1
                <mi id='id-7'>x</mi>
1476
1
            </mrow>
1477
1
            </math>";
1478
1
        init_default_prefs(mathml_str, "Enhanced");
1479
1
        return MATHML_INSTANCE.with(|package_instance| {
1480
1
            let package_instance = package_instance.borrow();
1481
1
            let mathml = get_element(&package_instance);
1482
1
            assert_eq!("zoom in; the log base 2, cubed", test_command("ZoomIn", mathml, "id-2"));
1483
1
            assert_eq!("zoom in; in base; the log base 2", test_command("ZoomIn", mathml, "id-2-log-base"));
1484
1
            assert_eq!("zoom in; in base; 2", test_command("ZoomIn", mathml, "id-4"));
1485
1
            assert_eq!("zoomed in all of the way; 2", test_command("ZoomIn", mathml, "id-4"));
1486
1
            debug!("Now zooming out");
1487
1
            assert_eq!("zoom out; out of base; the log base 2", test_command("ZoomOut", mathml, "id-2-log-base"));
1488
1
            assert_eq!("zoom out; out of base; the log base 2, cubed", test_command("ZoomOut", mathml, "id-2"));
1489
1
            assert_eq!("zoom out; the log base 2, cubed of x", test_command("ZoomOut", mathml, "id-1"));
1490
1
            assert_eq!("zoomed out all of the way; the log base 2, cubed of x", test_command("ZoomOut", mathml, "id-1"));
1491
1
            return Ok( () );
1492
1
        });
1493
1
    }
1494
        
1495
    #[test]
1496
1
    fn zoom_msubsup() -> Result<()> {
1497
        // msubsup is trickier because it creates an intent within an intent, so offsets need to be handled properly
1498
1
        let mathml_str = "<math id='math'><msubsup id='msubsup'><mi id='base'>𝑥</mi><mn id='sub'>1</mn><mn id='sup'>2</mn></msubsup></math>";
1499
1
        init_default_prefs(mathml_str, "Enhanced");
1500
1
        return MATHML_INSTANCE.with(|package_instance| {
1501
1
            let package_instance = package_instance.borrow();
1502
1
            let mathml = get_element(&package_instance);
1503
1
            set_preference("NavMode", "Enhanced").unwrap();
1504
1
            debug!("Enhanced mode");
1505
1
            do_commands(mathml)
?0
;
1506
1
            set_preference("NavMode", "Simple").unwrap();
1507
1
            debug!("Simple mode");
1508
1
            do_commands(mathml)
?0
;
1509
1
            set_preference("NavMode", "Character").unwrap();
1510
1
            debug!("Character mode");
1511
1
            assert_eq!("zoom in; in base; x", test_command("ZoomIn", mathml, "base"));
1512
1
            assert_eq!("zoom out; out of base; x sub 1 super 2 end super", test_command("ZoomOut", mathml, "msubsup"));
1513
1
            return Ok( () );
1514
1515
        /// Enhanced and Simple mode should behave the same
1516
2
        fn do_commands(mathml: Element) -> Result<()> {
1517
2
            assert_eq!("zoom in; in base; x sub 1", test_command("ZoomIn", mathml, "msubsup-indexed-by"));
1518
2
            assert_eq!("zoom in; in base; x", test_command("ZoomIn", mathml, "base"));
1519
2
            assert_eq!("zoomed in all of the way; x", test_command("ZoomIn", mathml, "base"));
1520
2
            debug!("Now zooming out");
1521
2
            assert_eq!("zoom out; out of base; x sub 1", test_command("ZoomOut", mathml, "msubsup-indexed-by"));
1522
2
            assert_eq!("zoom out; out of base; x sub 1, squared", test_command("ZoomOut", mathml, "msubsup"));
1523
2
            assert_eq!("zoomed out all of the way; x sub 1, squared", test_command("ZoomOut", mathml, "msubsup"));
1524
2
            return Ok( () );
1525
2
        }
1526
1
        });
1527
1
    }
1528
        
1529
    #[test]
1530
1
    fn move_mmultiscripts_char() -> Result<()> {
1531
1
        let mathml_str = "<math display='block' id='id-0'>
1532
1
            <mmultiscripts data-mjx-texclass='ORD' data-chem-formula='5' id='id-1'>
1533
1
                <mrow data-chem-formula='3' id='id-2'>
1534
1
                    <mo stretchy='false' id='id-3'>[</mo>
1535
1
                    <mmultiscripts data-chem-formula='3' id='id-4'>
1536
1
                        <mi data-chem-element='3' id='id-5'>Co</mi>
1537
1
                        <mn id='id-6'>6</mn>
1538
1
                        <none id='id-7'></none>
1539
1
                    </mmultiscripts>
1540
1
                    <mo stretchy='false' id='id-8'>]</mo>
1541
1
                </mrow>
1542
1
                <none id='id-9'></none>
1543
1
                <mrow id='id-10'>
1544
1
                    <mn id='id-11'>3</mn>
1545
1
                    <mo id='id-12'>+</mo>
1546
1
                </mrow>
1547
1
            </mmultiscripts>
1548
1
            </math>";
1549
1
            init_default_prefs(mathml_str, "Character");
1550
1
            return MATHML_INSTANCE.with(|package_instance| {
1551
1
            let package_instance = package_instance.borrow();
1552
1
            let mathml = get_element(&package_instance);
1553
1
            assert_eq!("zoomed in all of the way; in base; open bracket", test_command("ZoomInAll", mathml, "id-3"));
1554
1
            assert_eq!("move right; in base; cap c o", test_command("MoveNext", mathml, "id-5"));
1555
1
            assert_eq!("move right; in subscript; 6", test_command("MoveNext", mathml, "id-6"));
1556
1
            assert_eq!("move right; out of subscript; close bracket", test_command("MoveNext", mathml, "id-8"));
1557
1
            assert_eq!("move right; in superscript; 3", test_command("MoveNext", mathml, "id-11"));
1558
1
            assert_eq!("move right; plus", test_command("MoveNext", mathml, "id-12"));
1559
1
            assert_eq!("cannot move right, end of math", test_command("MoveNext", mathml, "id-12"));
1560
1
            assert_eq!("move left; 3", test_command("MovePrevious", mathml, "id-11"));
1561
1
            assert_eq!("move left; in base; close bracket", test_command("MovePrevious", mathml, "id-8"));
1562
1
            assert_eq!("move left; in subscript; 6", test_command("MovePrevious", mathml, "id-6"));
1563
1
            assert_eq!("move left; in base; cap c o", test_command("MovePrevious", mathml, "id-5"));
1564
1
            assert_eq!("move left; out of base; open bracket", test_command("MovePrevious", mathml, "id-3"));
1565
1566
1
            return Ok( () );
1567
1
        });
1568
1
    }
1569
1570
    #[test]
1571
1
    fn move_right_char() -> Result<()> {
1572
1
        let mathml_str = "<math id='id-0'>
1573
1
        <mrow displaystyle='true' id='id-1'>
1574
1
          <mi id='id-2'>x</mi>
1575
1
          <mo id='id-3'>=</mo>
1576
1
          <mrow id='id-4'>
1577
1
            <mfrac id='id-5'>
1578
1
              <mn id='id-6'>1</mn>
1579
1
              <mrow id='id-7'>
1580
1
                <mi id='id-8'>a</mi>
1581
1
                <mo id='id-9'>+</mo>
1582
1
                <mn id='id-10'>2</mn>
1583
1
              </mrow>
1584
1
            </mfrac>
1585
1
            <mo id='id-11'>+</mo>
1586
1
            <mrow id='id-12'>
1587
1
              <mn id='id-13'>3</mn>
1588
1
              <mo id='id-14'>&#x2062;</mo>
1589
1
              <mi id='id-15'>b</mi>
1590
1
            </mrow>
1591
1
          </mrow>
1592
1
        </mrow>
1593
1
        </math>";
1594
1
        init_default_prefs(mathml_str, "Character");
1595
1
        return MATHML_INSTANCE.with(|package_instance| {
1596
1
            let package_instance = package_instance.borrow();
1597
1
            let mathml = get_element(&package_instance);
1598
1
            test_command("ZoomInAll", mathml, "id-2");
1599
1
            test_command("MoveNext", mathml, "id-3");
1600
1
            test_command("MoveNext", mathml, "id-6");
1601
1
            test_command("MoveNext", mathml, "id-8");
1602
1
            test_command("MoveNext", mathml, "id-9");
1603
1
            test_command("MoveNext", mathml, "id-10");
1604
1
            test_command("MoveNext", mathml, "id-11");
1605
1
            test_command("MoveNext", mathml, "id-13");
1606
1
            test_command("MoveNext", mathml, "id-15");
1607
1
            test_command("MoveNext", mathml, "id-15");
1608
1609
1
            return Ok( () );
1610
1
        });
1611
1
    }
1612
1613
    #[test]
1614
1
    fn char_mode_paren_test() -> Result<()> {
1615
1
        let mathml_str = "<math display='block' id='id-0'>
1616
1
            <mrow displaystyle='true' id='id-1'>
1617
1
                <mrow id='id-2'>
1618
1
                    <mo id='id-3'>(</mo>
1619
1
                    <mi id='id-4'>a</mi>
1620
1
                    <mo id='id-5'>)</mo>
1621
1
                </mrow>
1622
1
                <mo id='id-6'>&#x2062;</mo>
1623
1
                <mrow id='id-7'>
1624
1
                    <mo id='id-8'>(</mo>
1625
1
                    <mi id='id-9'>b</mi>
1626
1
                    <mo id='id-10'>)</mo>
1627
1
                </mrow>
1628
1
            </mrow>
1629
1
        </math>";
1630
1
        init_default_prefs(mathml_str, "Character");
1631
1
        return MATHML_INSTANCE.with(|package_instance| {
1632
1
            let package_instance = package_instance.borrow();
1633
1
            let mathml = get_element(&package_instance);
1634
1
            debug!("Character mode");
1635
1
            do_commands(mathml)
?0
;
1636
1
            set_preference("NavMode", "Simple").unwrap();
1637
1
            debug!("Simple mode");
1638
1
            test_command("ZoomIn", mathml, "id-3");  // zooms to the first parenthesis
1639
1
            do_commands(mathml)
?0
;
1640
1
            set_preference("NavMode", "Enhanced").unwrap();
1641
1
            debug!("Enhanced mode");
1642
1
            test_command("ZoomIn", mathml, "id-4");
1643
1
            test_command("MoveNext", mathml, "id-6");
1644
1
            test_command("MoveNext", mathml, "id-9");
1645
1
            test_command("MovePrevious", mathml, "id-6");
1646
1
            test_command("MovePrevious", mathml, "id-4");
1647
1648
1
            return Ok( () );
1649
1
        });
1650
1651
        /// Simple and Character mode should behave the same
1652
2
        fn do_commands(mathml: Element) -> Result<()> {
1653
2
            test_command("ZoomIn", mathml, "id-3");
1654
2
            test_command("MoveNext", mathml, "id-4");
1655
2
            test_command("MoveNext", mathml, "id-5");
1656
2
            test_command("MoveNext", mathml, "id-8");
1657
2
            test_command("MoveNext", mathml, "id-9");
1658
2
            test_command("MoveNext", mathml, "id-10");
1659
2
            test_command("MovePrevious", mathml, "id-9");
1660
2
            test_command("MovePrevious", mathml, "id-8");
1661
2
            test_command("MovePrevious", mathml, "id-5");
1662
2
            test_command("ZoomOutAll", mathml, "id-1");
1663
2
            return Ok( () );
1664
2
        }
1665
1
    }
1666
1667
    #[test]
1668
1
    fn char_mode_trig_test() -> Result<()> {
1669
1
        let mathml_str = "<math id='id-0'>
1670
1
            <mrow id='id-1'>
1671
1
            <mi id='id-2'>sin</mi>
1672
1
            <mo id='id-3'>&#x2061;</mo>
1673
1
            <mrow id='id-4'>
1674
1
                <mo id='id-5'>(</mo>
1675
1
                <mi id='id-6'>x</mi>
1676
1
                <mo id='id-7'>)</mo>
1677
1
            </mrow>
1678
1
            </mrow>
1679
1
        </math>";
1680
1
        init_default_prefs(mathml_str, "Simple");
1681
1
        return MATHML_INSTANCE.with(|package_instance| {
1682
1
            let package_instance = package_instance.borrow();
1683
1
            let mathml = get_element(&package_instance);
1684
1
            do_commands(mathml)
?0
;
1685
1
            set_preference("NavMode", "Simple").unwrap();
1686
1
            do_commands(mathml)
?0
;
1687
1
            set_preference("NavMode", "Enhanced").unwrap();
1688
1
            test_command("ZoomIn", mathml, "id-2");
1689
1
            test_command("MoveNext", mathml, "id-6");
1690
1
            test_command("MovePrevious", mathml, "id-2");
1691
1692
1
            return Ok( () );
1693
1
        });
1694
1695
        
1696
        /// Simple and Character mode should behave the same
1697
2
        fn do_commands(mathml: Element) -> Result<()> {
1698
2
            test_command("ZoomIn", mathml, "id-2");
1699
2
            test_command("MoveNext", mathml, "id-5");
1700
2
            test_command("MoveNext", mathml, "id-6");
1701
2
            test_command("MoveNext", mathml, "id-7");
1702
2
            test_command("MovePrevious", mathml, "id-6");
1703
2
            test_command("MovePrevious", mathml, "id-5");
1704
2
            test_command("MovePrevious", mathml, "id-2");
1705
2
            test_command("ZoomOutAll", mathml, "id-1");
1706
2
            return Ok( () );
1707
2
        }
1708
1
    }
1709
    
1710
    #[test]
1711
1
    fn move_char_speech() -> Result<()> {
1712
1
        let mathml_str = "<math display='block' id='id-0'>
1713
1
                <mrow id='id-1'>
1714
1
                <mfrac id='id-2'>
1715
1
                    <mi id='id-3'>x</mi>
1716
1
                    <mi id='id-4'>y</mi>
1717
1
                </mfrac>
1718
1
                <mo id='id-5'>&#x2062;</mo>
1719
1
                <mi id='id-6'>z</mi>
1720
1
                </mrow>
1721
1
            </math>";
1722
1
            init_default_prefs(mathml_str, "Character");
1723
1
            return MATHML_INSTANCE.with(|package_instance| {
1724
1
            let package_instance = package_instance.borrow();
1725
1
            let mathml = get_element(&package_instance);
1726
1
            test_command("ZoomInAll", mathml, "id-3");
1727
1
            assert_eq!("move right; in denominator; y", test_command("MoveNext", mathml, "id-4"));
1728
1
            assert_eq!("move right; out of denominator; z", test_command("MoveNext", mathml, "id-6"));
1729
1
            assert_eq!("move left; in denominator; y", test_command("MovePrevious", mathml, "id-4"));
1730
1
            assert_eq!("move left; in numerator; x", test_command("MovePrevious", mathml, "id-3"));
1731
1732
1
            return Ok( () );
1733
1
        });
1734
1
    }
1735
    
1736
    #[test]
1737
1
    fn move_inside_leaves() -> Result<()> {
1738
1
        let mathml_str = "<math display='block' id='id-0'>
1739
1
                <mrow id='id-1'>
1740
1
                    <mfrac id='id-2'>
1741
1
                        <mi id='id-3'>top</mi>
1742
1
                        <mi id='id-4'>αβγ</mi>
1743
1
                    </mfrac>
1744
1
                </mrow>
1745
1
            </math>";
1746
1
        init_default_prefs(mathml_str, "Character");
1747
1
        return MATHML_INSTANCE.with(|package_instance| {
1748
1
        let package_instance = package_instance.borrow();
1749
1
        let mathml = get_element(&package_instance);
1750
1
        test_command("ZoomInAll", mathml, "id-3");
1751
1
        assert_eq!("zoomed in to first character; t", test_command("ZoomIn", mathml, "id-3"));
1752
1
        assert_eq!("move right; o", test_command("MoveNext", mathml, "id-3"));
1753
1
        assert_eq!("move right; p", test_command("MoveNext", mathml, "id-3"));
1754
1
        assert_eq!("move right; in denominator; αβγ", test_command("MoveNext", mathml, "id-4"));
1755
1
        assert_eq!("zoomed in to first character; alpha", test_command("ZoomIn", mathml, "id-4"));
1756
1
        assert_eq!("move right; beta", test_command("MoveNext", mathml, "id-4"));
1757
1
        assert_eq!("move right; gamma", test_command("MoveNext", mathml, "id-4"));
1758
1
        assert_eq!("cannot move right, end of math", test_command("MoveNext", mathml, "id-4"));
1759
1
        assert_eq!("move left; beta", test_command("MovePrevious", mathml, "id-4"));
1760
1
        assert_eq!("zoom out; αβγ", test_command("ZoomOut", mathml, "id-4"));
1761
1762
1
        return Ok( () );
1763
1
        });
1764
1
    }
1765
    
1766
    #[test]
1767
1
    fn move_enhanced_times() -> Result<()> {
1768
1
        let mathml_str = "<math display='block' id='id-0'>
1769
1
        <mrow displaystyle='true' id='id-1'>
1770
1
          <mn id='id-2'>2</mn>
1771
1
          <mo id='id-3'>&#x2062;</mo>
1772
1
          <mrow id='id-4'>
1773
1
            <mo id='id-5'>(</mo>
1774
1
            <mrow id='id-6'>
1775
1
              <mn id='id-7'>1</mn>
1776
1
              <mo id='id-8'>-</mo>
1777
1
              <mi id='id-9'>x</mi>
1778
1
            </mrow>
1779
1
            <mo id='id-10'>)</mo>
1780
1
          </mrow>
1781
1
        </mrow>
1782
1
       </math>";
1783
1
        init_default_prefs(mathml_str, "Enhanced");
1784
1
        return MATHML_INSTANCE.with(|package_instance| {
1785
1
            let package_instance = package_instance.borrow();
1786
1
            let mathml = get_element(&package_instance);
1787
1
            test_command("ZoomIn", mathml, "id-2");
1788
1
            assert_eq!("move right; times", test_command("MoveNext", mathml, "id-3"));
1789
1
            assert_eq!("move right; 1 minus x", test_command("MoveNext", mathml, "id-6"));
1790
1
            assert_eq!("move left; times", test_command("MovePrevious", mathml, "id-3"));
1791
1
            assert_eq!("move left; 2", test_command("MovePrevious", mathml, "id-2"));
1792
1793
1
            return Ok( () );
1794
1
        });
1795
1
    }
1796
    
1797
    #[test]
1798
1
    fn move_simple_no_times() -> Result<()> {
1799
1
        let mathml_str = "<math display='block' id='id-0'>
1800
1
        <mrow displaystyle='true' id='id-1'>
1801
1
          <mn id='id-2'>2</mn>
1802
1
          <mo id='id-3'>&#x2062;</mo>
1803
1
          <mrow id='id-4'>
1804
1
            <mo id='id-5'>(</mo>
1805
1
            <mrow id='id-6'>
1806
1
              <mn id='id-7'>1</mn>
1807
1
              <mo id='id-8'>-</mo>
1808
1
              <mi id='id-9'>x</mi>
1809
1
            </mrow>
1810
1
            <mo id='id-10'>)</mo>
1811
1
          </mrow>
1812
1
        </mrow>
1813
1
       </math>";
1814
1
        init_default_prefs(mathml_str, "Simple");
1815
1
        set_preference("SpeechStyle", "ClearSpeak").unwrap();
1816
1
        return MATHML_INSTANCE.with(|package_instance| {
1817
1
            let package_instance = package_instance.borrow();
1818
1
            let mathml = get_element(&package_instance);
1819
1
            test_command("ZoomIn", mathml, "id-2");
1820
1
            assert_eq!("move right; open paren", test_command("MoveNext", mathml, "id-5"));
1821
1
            assert_eq!("move right; 1", test_command("MoveNext", mathml, "id-7"));
1822
1
            assert_eq!("move left; open paren", test_command("MovePrevious", mathml, "id-5"));
1823
1
            assert_eq!("move left; 2", test_command("MovePrevious", mathml, "id-2"));
1824
1825
1
            return Ok( () );
1826
1
        });
1827
1
    }
1828
    
1829
    
1830
    #[test]
1831
1
    fn move_cell() -> Result<()> {
1832
1
        let mathml_str = "<math id='nav-0'>
1833
1
        <mtable id='nav-1'>
1834
1
          <mtr id='nav-2'>
1835
1
            <mtd id='nav-3'> <mn id='nav-4'>1</mn></mtd>
1836
1
            <mtd id='nav-5'> <mn id='nav-6'>2</mn></mtd>
1837
1
            <mtd id='nav-7'><mn id='nav-8'>3</mn> </mtd>
1838
1
          </mtr>
1839
1
          <mtr id='nav-9'>
1840
1
            <mtd id='nav-10'>
1841
1
              <mrow id='nav-11'>
1842
1
                <mi id='nav-12'>x</mi>
1843
1
                <mo id='nav-13'>-</mo>
1844
1
                <mi id='nav-14'>y</mi>
1845
1
              </mrow>
1846
1
            </mtd>
1847
1
            <mtd id='nav-15'>
1848
1
              <mfrac id='nav-16'>
1849
1
                <mn id='nav-17'>1</mn>
1850
1
                <mn id='nav-18'>2</mn>
1851
1
              </mfrac>
1852
1
            </mtd>
1853
1
            <mtd id='nav-19'>
1854
1
              <mi id='nav-20'>z</mi>
1855
1
            </mtd>
1856
1
          </mtr>
1857
1
          <mtr id='nav-21'>
1858
1
            <mtd id='nav-22'><mn id='nav-23'>7</mn> </mtd>
1859
1
            <mtd id='nav-24'><mn id='nav-25'>8</mn> </mtd>
1860
1
            <mtd id='nav-26'> <mn id='nav-27'>9</mn></mtd>
1861
1
          </mtr>
1862
1
          <mtr id='nav-28'>
1863
1
            <mtd id='nav-29'>
1864
1
              <mrow id='nav-30'>
1865
1
                <mi id='nav-31'>sin</mi>
1866
1
                <mo id='nav-32'>&#x2061;</mo>
1867
1
                <mi id='nav-33'>x</mi>
1868
1
              </mrow>
1869
1
            </mtd>
1870
1
            <mtd id='nav-34'>
1871
1
              <msup id='nav-35'>
1872
1
                <mi id='nav-36'>e</mi>
1873
1
                <mi id='nav-37'>x</mi>
1874
1
              </msup>
1875
1
            </mtd>
1876
1
            <mtd id='nav-38'>
1877
1
              <mrow id='nav-39'>
1878
1
                <mn id='nav-40'>2</mn>
1879
1
                <mo id='nav-41'>-</mo>
1880
1
                <mi id='nav-42'>y</mi>
1881
1
              </mrow>
1882
1
            </mtd>
1883
1
          </mtr>
1884
1
        </mtable>
1885
1
       </math>";
1886
1
        init_default_prefs(mathml_str, "Enhanced");
1887
1
        return MATHML_INSTANCE.with(|package_instance| {
1888
1
            let package_instance = package_instance.borrow();
1889
1
            let mathml = get_element(&package_instance);
1890
1
            test_command("ZoomInAll", mathml, "nav-4");
1891
1
            test_command("MoveCellNext", mathml, "nav-6");
1892
1
            test_command("MoveCellNext", mathml, "nav-8");
1893
1
            test_command("MoveCellNext", mathml, "nav-8");
1894
1
            test_command("MoveCellDown", mathml, "nav-20");
1895
1
            test_command("MoveCellDown", mathml, "nav-27");
1896
1
            let speech = test_command("MoveCellDown", mathml, "nav-39");
1897
1
            assert_eq!(speech, "move down, row 4, column 3; 2 minus y");
1898
1
            let speech = test_command("MoveCellDown", mathml, "nav-39");
1899
1
            assert_eq!(speech, "no next row");
1900
1
            test_command("MoveCellPrevious", mathml, "nav-35");
1901
1
            test_command("ZoomIn", mathml, "nav-36");
1902
1
            test_command("MoveCellUp", mathml, "nav-25");
1903
1
            test_command("MoveCellUp", mathml, "nav-16");
1904
1
            test_command("MoveCellUp", mathml, "nav-6");
1905
1
            test_command("MoveCellUp", mathml, "nav-6");
1906
1907
1
            return Ok( () );
1908
1
        });
1909
1
    }
1910
    
1911
    #[test]
1912
1
    fn move_cell_char_mode() -> Result<()> {
1913
1
        let mathml_str = "<math id='nav-0'>
1914
1
        <mtable id='nav-1'>
1915
1
          <mtr id='nav-2'>
1916
1
            <mtd id='nav-3'> <mn id='nav-4'>1</mn></mtd>
1917
1
            <mtd id='nav-5'> <mn id='nav-6'>2</mn></mtd>
1918
1
            <mtd id='nav-7'><mn id='nav-8'>3</mn> </mtd>
1919
1
          </mtr>
1920
1
          <mtr id='nav-9'>
1921
1
            <mtd id='nav-10'>
1922
1
              <mrow id='nav-11'>
1923
1
                <mi id='nav-12'>x</mi>
1924
1
                <mo id='nav-13'>-</mo>
1925
1
                <mi id='nav-14'>y</mi>
1926
1
              </mrow>
1927
1
            </mtd>
1928
1
            <mtd id='nav-15'>
1929
1
              <mfrac id='nav-16'>
1930
1
                <mn id='nav-17'>1</mn>
1931
1
                <mn id='nav-18'>2</mn>
1932
1
              </mfrac>
1933
1
            </mtd>
1934
1
            <mtd id='nav-19'>
1935
1
              <mi id='nav-20'>z</mi>
1936
1
            </mtd>
1937
1
          </mtr>
1938
1
          <mtr id='nav-21'>
1939
1
            <mtd id='nav-22'><mn id='nav-23'>7</mn> </mtd>
1940
1
            <mtd id='nav-24'><mn id='nav-25'>8</mn> </mtd>
1941
1
            <mtd id='nav-26'> <mn id='nav-27'>9</mn></mtd>
1942
1
          </mtr>
1943
1
          <mtr id='nav-28'>
1944
1
            <mtd id='nav-29'>
1945
1
              <mrow id='nav-30'>
1946
1
                <mi id='nav-31'>sin</mi>
1947
1
                <mo id='nav-32'>&#x2061;</mo>
1948
1
                <mi id='nav-33'>x</mi>
1949
1
              </mrow>
1950
1
            </mtd>
1951
1
            <mtd id='nav-34'>
1952
1
              <msup id='nav-35'>
1953
1
                <mi id='nav-36'>e</mi>
1954
1
                <mi id='nav-37'>x</mi>
1955
1
              </msup>
1956
1
            </mtd>
1957
1
            <mtd id='nav-38'>
1958
1
              <mrow id='nav-39'>
1959
1
                <mn id='nav-40'>2</mn>
1960
1
                <mo id='nav-41'>-</mo>
1961
1
                <mi id='nav-42'>y</mi>
1962
1
              </mrow>
1963
1
            </mtd>
1964
1
          </mtr>
1965
1
        </mtable>
1966
1
       </math>";
1967
1
       init_default_prefs(mathml_str, "Character");
1968
1
       return MATHML_INSTANCE.with(|package_instance| {
1969
1
            let package_instance = package_instance.borrow();
1970
1
            let mathml = get_element(&package_instance);
1971
1
            NAVIGATION_STATE.with(|nav_stack| {
1972
1
                nav_stack.borrow_mut().push(NavigationPosition{
1973
1
                    current_node: "nav-8".to_string(),
1974
1
                    current_node_offset: 0
1975
1
                }, "None")
1976
1
            });
1977
1
            test_command("MoveNext", mathml, "nav-12");
1978
1
            test_command("MoveNext", mathml, "nav-13");
1979
1
            test_command("MoveNext", mathml, "nav-14");
1980
1
            test_command("MoveNext", mathml, "nav-17");
1981
1
            test_command("MovePrevious", mathml, "nav-14");
1982
1
            test_command("MoveCellNext", mathml, "nav-17");
1983
1
            test_command("MoveCellPrevious", mathml, "nav-14");
1984
1
            test_command("MovePrevious", mathml, "nav-13");
1985
1
            test_command("MovePrevious", mathml, "nav-12");
1986
1
            test_command("MoveCellPrevious", mathml, "nav-12");
1987
1
            test_command("MovePrevious", mathml, "nav-8");
1988
1
            test_command("MoveCellDown", mathml, "nav-20");
1989
1
            test_command("MoveCellDown", mathml, "nav-27");
1990
1
            test_command("MoveCellDown", mathml, "nav-40");
1991
1
            test_command("MoveCellDown", mathml, "nav-40");
1992
1
            test_command("MoveCellPrevious", mathml, "nav-37");
1993
1
            test_command("MoveCellUp", mathml, "nav-25");
1994
1995
1
            return Ok( () );
1996
1
        });
1997
1
    }
1998
    
1999
    #[test]
2000
1
    fn placemarker() -> Result<()> {
2001
1
        let mathml_str = "<math display='block' id='math'>
2002
1
        <mrow displaystyle='true' id='mrow'>
2003
1
          <mi id='a'>a</mi>
2004
1
          <mo id='plus-1'>+</mo>
2005
1
          <mi id='b'>b</mi>
2006
1
          <mo id='plus-2'>+</mo>
2007
1
          <mi id='c'>c</mi>
2008
1
        </mrow>
2009
1
        </math>";
2010
1
        init_default_prefs(mathml_str, "Character");
2011
1
        return MATHML_INSTANCE.with(|package_instance| {
2012
1
            let package_instance = package_instance.borrow();
2013
1
            let mathml = get_element(&package_instance);
2014
1
            test_command("MoveStart", mathml, "a");
2015
1
            test_command("SetPlacemarker0", mathml, "a");
2016
1
            test_command("MoveEnd", mathml, "c");
2017
1
            test_command("Read0", mathml, "c");
2018
1
            test_command("Describe0", mathml, "c");
2019
1
            test_command("SetPlacemarker1", mathml, "c");
2020
1
            test_command("MoveTo0", mathml, "a");
2021
1
            test_command("MoveTo1", mathml, "c");
2022
1
            test_command("MoveLastLocation", mathml, "a");
2023
            
2024
1
            return Ok( () );
2025
1
        });
2026
1
    }
2027
2028
    #[test]
2029
1
    fn where_am_i_all() -> Result<()> {
2030
1
        let mathml_str = "<math id='math'><mfrac id='mfrac'>
2031
1
                <msup id='msup'><mi id='base'>b</mi><mn id='exp'>2</mn></msup>
2032
1
                <mi id='denom'>d</mi>
2033
1
            </mfrac></math>";
2034
1
        init_default_prefs(mathml_str, "Enhanced");
2035
1
        set_preference("SpeechStyle", "ClearSpeak").unwrap();
2036
1
        return MATHML_INSTANCE.with(|package_instance| {
2037
1
            let package_instance = package_instance.borrow();
2038
1
            let mathml = get_element(&package_instance);
2039
1
            NAVIGATION_STATE.with(|nav_stack| {
2040
1
                nav_stack.borrow_mut().push(NavigationPosition{
2041
1
                    current_node: "exp".to_string(),
2042
1
                    current_node_offset: 0
2043
1
                }, "None")
2044
1
            });
2045
            // WhereAmIAll doesn't change the stack
2046
1
            let speech =test_command("WhereAmIAll", mathml, "exp");
2047
            // should be 2 "inside" strings corresponding to steps to the root
2048
1
            assert_eq!(speech, "2; inside; b squared; inside; the fraction with numerator; b squared; and denominator d");
2049
1
            return Ok( () );
2050
1
        });
2051
1
    }
2052
2053
    #[test]
2054
1
    fn auto_zoom_out_mrow() -> Result<()> {
2055
1
        let mathml_str = "<math id='math'>
2056
1
        <mrow id='id-1'>
2057
1
          <mrow id='id-2'>
2058
1
            <mrow id='2ax'>
2059
1
              <mn id='2'>2</mn>
2060
1
              <mo id='id-5'>&#x2062;</mo>
2061
1
              <mi id='a'>a</mi>
2062
1
              <mo id='id-7'>&#x2062;</mo>
2063
1
              <mi id='x'>x</mi>
2064
1
            </mrow>
2065
1
            <mo id='plus'>+</mo>
2066
1
            <mi id='b'>b</mi>
2067
1
          </mrow>
2068
1
          <mo id='equal'>=</mo>
2069
1
          <mn id='10'>10</mn>
2070
1
        </mrow>
2071
1
       </math>";
2072
1
        init_default_prefs(mathml_str, "Enhanced");
2073
1
        set_preference("AutoZoomOut", "False")
?0
;
2074
1
        return MATHML_INSTANCE.with(|package_instance| {
2075
1
            let package_instance = package_instance.borrow();
2076
1
            let mathml = get_element(&package_instance);
2077
1
            test_command("ZoomInAll", mathml, "2");
2078
1
            test_command("MoveNext", mathml, "a");
2079
1
            test_command("MoveNext", mathml, "x");
2080
1
            test_command("MoveNext", mathml, "plus");
2081
1
            test_command("MovePrevious", mathml, "2ax");
2082
1
            return Ok( () );
2083
1
        });
2084
1
    }
2085
2086
    #[test]
2087
1
    fn auto_zoom_out_fraction() -> Result<()> {
2088
1
        let mathml_str = "<math id='math'>
2089
1
            <mrow id='mrow'>
2090
1
                <mfrac id='frac'>
2091
1
                    <mrow id='num'><mi id='a'>a</mi><mo id='plus'>+</mo><mn id='1'>1</mn></mrow>
2092
1
                    <mrow id='denom'><mn id='2'>2</mn><mo id='invisible-times'>&#x2062;</mo><mi id='b'>b</mi></mrow>
2093
1
                </mfrac>
2094
1
                <mo id='minus'>-</mo>
2095
1
                <mn id='3'>3</mn>
2096
1
            </mrow>
2097
1
        </math>";
2098
1
        init_default_prefs(mathml_str, "Enhanced");
2099
1
        set_preference("AutoZoomOut", "False")
?0
;
2100
1
        return MATHML_INSTANCE.with(|package_instance| {
2101
1
            let package_instance = package_instance.borrow();
2102
1
            let mathml = get_element(&package_instance);
2103
1
            test_command("ZoomIn", mathml, "frac");
2104
1
            test_command("ZoomIn", mathml, "num");
2105
1
            test_command("MoveNext", mathml, "denom");
2106
1
            test_command("MoveNext", mathml, "denom");
2107
1
            test_command("MovePrevious", mathml, "num");
2108
1
            test_command("MovePrevious", mathml, "num");
2109
1
            test_command("ZoomOut", mathml, "frac");
2110
1
            test_command("MoveNext", mathml, "minus");
2111
1
            return Ok( () );
2112
1
        });
2113
1
    }
2114
2115
    #[test]
2116
1
    fn zoom_root() -> Result<()> {
2117
1
        let mathml_str = r#"<math display='block' id='id-0'>
2118
1
        <mrow id='id-1'>
2119
1
            <mo id='id-9'>±</mo>
2120
1
            <msqrt id='id-10'>
2121
1
                <mrow id='id-11'>
2122
1
                    <msup id='id-12'> <mi id='id-13'>b</mi> <mn id='id-14'>2</mn> </msup>
2123
1
                    <mo id='id-15'>-</mo>
2124
1
                    <mn id='id-17'>4</mn>
2125
1
                </mrow>
2126
1
            </msqrt>
2127
1
        </mrow>
2128
1
        </math>"#;
2129
2130
1
        test_mode(mathml_str, "Enhanced")
?0
;
2131
1
        test_mode(mathml_str, "Simple")
?0
;
2132
1
        test_mode(mathml_str, "Character")
?0
;
2133
1
        return Ok( () );
2134
2135
3
        fn test_mode(mathml_str: &str, mode: &str) -> Result<()> {
2136
3
            init_default_prefs(mathml_str, mode);
2137
3
            set_preference("AutoZoomOut", "False")
?0
;
2138
3
            return MATHML_INSTANCE.with(|package_instance| {
2139
3
                debug!("--- Testing mode {mode} ---");
2140
3
                let package_instance = package_instance.borrow();
2141
3
                let mathml = get_element(&package_instance);
2142
3
                test_command("ZoomIn", mathml, "id-9");
2143
3
                debug!("\nStart zoom in");
2144
3
                match mode {
2145
3
                    "Enhanced" => {
2146
1
                        test_command("MoveNext", mathml, "id-10");
2147
1
                        let speech = test_command("ZoomIn", mathml, "id-11");
2148
1
                        assert_eq!(speech, "zoom in; in root; b squared minus 4");  // only one arg, so don't say "in root"
2149
1
                        let speech = test_command("ZoomIn", mathml, "id-12");
2150
1
                        assert_eq!(speech, "zoom in; b squared");  // only one arg, so don't say "in root"
2151
1
                        let speech = test_command("ZoomIn", mathml, "id-13");
2152
1
                        assert_eq!(speech, "zoom in; in base; b");
2153
                    },
2154
2
                    "Simple" => {
2155
1
                        test_command("MoveNext", mathml, "id-10");
2156
1
                        let speech = test_command("ZoomIn", mathml, "id-12");
2157
1
                        assert_eq!(speech, "zoom in; in root; b squared");
2158
1
                        let speech = test_command("ZoomIn", mathml, "id-13");
2159
1
                        assert_eq!(speech, "zoom in; in base; b");
2160
                    },
2161
                    _ => { // "Character"
2162
1
                        let speech = test_command("MoveNext", mathml, "id-13");
2163
1
                        assert_eq!(speech, "move right; in root; in base; b");
2164
                    }
2165
                }
2166
3
                let squared_speech = if mode == "Character" {
"b super 2 end super"1
} else {
"b squared"2
};
2167
3
                let sqrt_speech = if mode == "Character" {
"root"1
} else {
"square root"2
};
2168
3
                let speech = test_command("ZoomOut", mathml, "id-12");
2169
3
                assert_eq!(speech, format!("zoom out; out of base; {squared_speech}"));
2170
3
                let speech = test_command("ZoomOut", mathml, "id-11");
2171
3
                assert_eq!(speech, format!("zoom out; {squared_speech} minus 4"));
2172
3
                let speech = test_command("ZoomOut", mathml, "id-10");
2173
3
                assert_eq!(speech, format!("zoom out; out of root; the {sqrt_speech} of {squared_speech} minus 4, end root",));
2174
3
                return Ok( () );
2175
3
            });
2176
3
        }
2177
1
    }
2178
2179
    #[test]
2180
1
    fn matrix_speech() -> Result<()> {
2181
1
        let mathml_str = r#"<math id='math'>
2182
1
            <mrow id='mrow'>
2183
1
            <mo id='open'>[</mo>
2184
1
            <mtable columnspacing='1em' rowspacing='4pt' id='table'>
2185
1
                <mtr id='row-1'>
2186
1
                    <mtd id='1-1'><mn id='id-6'>9</mn></mtd>
2187
1
                    <mtd id='1-2'><mrow id='id-8'><mo id='id-9'>-</mo><mn id='id-10'>13</mn></mrow></mtd>
2188
1
                </mtr>
2189
1
                <mtr id='row-2'>
2190
1
                    <mtd id='2-1'><mn id='id-13'>5</mn></mtd>
2191
1
                    <mtd id='2-2'><mo id='id-16'>-</mo><mn id='id-17'>6</mn></mtd>
2192
1
                </mtr>
2193
1
            </mtable>
2194
1
            <mo id='close'>]</mo>
2195
1
            </mrow>
2196
1
        </math>"#;
2197
1
        init_default_prefs(mathml_str, "Enhanced");
2198
1
        return MATHML_INSTANCE.with(|package_instance| {
2199
1
            let package_instance = package_instance.borrow();
2200
1
            let mathml = get_element(&package_instance);
2201
1
            test_command("ZoomIn", mathml, "row-1");
2202
1
            let speech = test_command("MoveNext", mathml, "row-2");
2203
1
            assert_eq!(speech, "move right; row 2; 5, negative 6");
2204
1
            let speech = test_command("ZoomIn", mathml, "id-13");
2205
1
            assert_eq!(speech, "zoom in; column 1; 5");
2206
1
            let speech = test_command("ZoomOut", mathml, "row-2");
2207
1
            assert_eq!(speech, "zoom out; row 2; 5, negative 6");
2208
1
            let speech = test_command("ZoomOut", mathml, "table");
2209
1
            assert_eq!(speech, "zoom out; the 2 by 2 matrix; row 1; 9, negative 13; row 2; 5, negative 6");
2210
1
        return Ok( () );
2211
1
        });
2212
1
    }
2213
2214
    #[test]
2215
1
    fn chem_speech() -> Result<()> {
2216
        // this comes from bug 218
2217
1
        let mathml_str = "<math display='block' id='id-0'>
2218
1
            <mrow data-chem-formula='5' id='id-1'>
2219
1
                <msub data-chem-formula='1' id='id-2'>
2220
1
                    <mi data-chem-element='1' id='id-3'>H</mi>
2221
1
                    <mn id='id-4'>2</mn>
2222
1
                </msub>
2223
1
                <mo data-chem-formula-op='0' id='id-5'>&#x2063;</mo>
2224
1
                <mi data-chem-element='1' id='id-6'>S</mi>
2225
1
                <mo data-chem-formula-op='0' id='id-7'>&#x2063;</mo>
2226
1
                <msub data-chem-formula='1' id='id-8'>
2227
1
                    <mi data-chem-element='1' id='id-9'>O</mi>
2228
1
                    <mn id='id-10'>4</mn>
2229
1
                </msub>
2230
1
            </mrow>
2231
1
        </math>";
2232
1
        init_default_prefs(mathml_str, "Enhanced");
2233
1
        return MATHML_INSTANCE.with(|package_instance| {
2234
1
            let package_instance = package_instance.borrow();
2235
1
            let mathml = get_element(&package_instance);
2236
1
            test_command("ZoomIn", mathml, "id-2");
2237
1
            let speech = test_command("MoveNext", mathml, "id-6");
2238
            // tables need to check their parent for proper speech
2239
1
            assert_eq!(speech, "move right; cap s");
2240
1
            return Ok( () );
2241
1
        });
2242
1
    }
2243
2244
    #[test]
2245
1
    fn determinant_speech() -> Result<()> {
2246
1
        let mathml_str = "<math id='math'>
2247
1
            <mrow id='mrow'>
2248
1
            <mo id='open'>|</mo>
2249
1
            <mtable columnspacing='1em' rowspacing='4pt' id='table'>
2250
1
                <mtr id='row-1'>
2251
1
                    <mtd id='1-1'><mn id='id-6'>9</mn></mtd>
2252
1
                    <mtd id='1-2'><mrow id='id-8'><mo id='id-9'>-</mo><mn id='id-10'>13</mn></mrow></mtd>
2253
1
                </mtr>
2254
1
                <mtr id='row-2'>
2255
1
                    <mtd id='2-1'><mn id='id-13'>5</mn></mtd>
2256
1
                    <mtd id='2-2'><mrow id='row2-negative'><mo id='id-16'>-</mo><mn id='id-17'>6</mn></mrow></mtd>
2257
1
                </mtr>
2258
1
            </mtable>
2259
1
            <mo id='close'>|</mo>
2260
1
            </mrow>
2261
1
        </math>";
2262
1
        init_default_prefs(mathml_str, "Enhanced");
2263
1
        set_preference("SpeechStyle", "ClearSpeak").unwrap();
2264
1
        return MATHML_INSTANCE.with(|package_instance| {
2265
1
            let package_instance = package_instance.borrow();
2266
1
            let mathml = get_element(&package_instance);
2267
1
            let speech = test_command("ZoomIn", mathml, "row-1");
2268
1
            assert_eq!(speech, "zoom in; row 1; 9, negative 13");
2269
1
            let speech = test_command("MoveNext", mathml, "row-2");
2270
1
            assert_eq!(speech, "move right; row 2; 5, negative 6");
2271
1
            let speech = test_command("MoveNext", mathml, "row-2");
2272
1
            assert_eq!(speech, "cannot move right, end of math");
2273
1
            let speech = test_command("ZoomIn", mathml, "id-13");
2274
1
            assert_eq!(speech, "zoom in; column 1; 5");
2275
1
            let speech = test_command("MoveNext", mathml, "row2-negative");
2276
1
            assert_eq!(speech, "move right; column 2, negative 6");
2277
1
            let speech = test_command("ZoomOutAll", mathml, "table");
2278
1
            assert_eq!(speech, "zoomed out all of the way; the 2 by 2 determinant; row 1; 9, negative 13; row 2; 5, negative 6");
2279
1
            return Ok( () );
2280
1
        });
2281
1
    }
2282
2283
    #[test]
2284
1
    fn cases_speech() -> Result<()> {
2285
1
        let mathml_str = "<math id='id-0'>
2286
1
        <mrow id='id-1'>
2287
1
          <mo id='open'>{</mo>
2288
1
          <mtable columnalign='left left' columnspacing='1em' displaystyle='false' rowspacing='.2em' id='table'>
2289
1
            <mtr id='row-1'>
2290
1
              <mtd id='id-5'><mrow id='id-6'><mrow id='id-7'><mo id='id-8'>-</mo><mi id='id-9'>x</mi></mrow><mo id='id-10'>,</mo></mrow></mtd>
2291
1
              <mtd id='id-11'><mrow id='id-12'><mrow id='id-13'><mtext id='id-14'>if</mtext><mo id='id-15'>&#x2062;</mo><mi id='id-16'>x</mi></mrow><mo id='id-17'>&lt;</mo><mn id='id-18'>0</mn></mrow></mtd>
2292
1
            </mtr>
2293
1
            <mtr id='row-2'>
2294
1
              <mtd id='id-20'><mrow id='id-21'><mrow id='id-22'><mo id='id-23'>+</mo><mi id='id-24'>x</mi></mrow><mo id='id-25'>,</mo></mrow></mtd>
2295
1
              <mtd id='id-26'><mrow id='id-27'><mrow id='id-28'><mtext id='id-29'>if</mtext><mo id='id-30'>&#x2062;</mo><mi id='id-31'>x</mi></mrow><mo id='id-32'>≥</mo><mn id='id-33'>0</mn></mrow></mtd>
2296
1
            </mtr>
2297
1
          </mtable>
2298
1
        </mrow>
2299
1
       </math>";
2300
1
        init_default_prefs(mathml_str, "Enhanced");
2301
1
        set_preference("SpeechStyle", "ClearSpeak").unwrap();
2302
1
        return MATHML_INSTANCE.with(|package_instance| {
2303
1
            let package_instance = package_instance.borrow();
2304
1
            let mathml = get_element(&package_instance);
2305
1
            test_command("ZoomIn", mathml, "row-1");
2306
1
            let speech = test_command("MovePrevious", mathml, "row-1");
2307
1
            assert_eq!(speech, "move left; start of math");
2308
1
            let speech = test_command("MoveNext", mathml, "row-2");
2309
1
            assert_eq!(speech, "move right; case 2; positive x comma; if x, is greater than or equal to 0");
2310
1
            let speech = test_command("ZoomOut", mathml, "table");
2311
1
            assert_eq!(speech, "zoom out; 2 cases; case 1; negative x comma; if x is less than 0; case 2; positive x comma; if x, is greater than or equal to 0");
2312
1
            let speech = test_command("ZoomIn", mathml, "row-1");
2313
1
            assert_eq!(speech, "zoom in; case 1; negative x comma; if x is less than 0");
2314
1
            set_preference("NavMode", "Character").unwrap();
2315
1
            let speech = test_command("MovePrevious", mathml, "open");
2316
1
            assert_eq!(speech, "move left; open brace");
2317
1
            return Ok( () );
2318
1
        });
2319
1
    }
2320
2321
    #[test]
2322
1
    fn base_superscript() -> Result<()> {
2323
        // bug #217 -- zoom into base of parenthesized script 
2324
1
        let mathml_str = "<math display='block' id='id-0'>
2325
1
            <msup id='id-1'>
2326
1
                <mrow id='id-2'>
2327
1
                    <mo stretchy='false' id='id-3'>(</mo>
2328
1
                    <mrow id='id-4'>
2329
1
                        <mn id='id-5'>2</mn>
2330
1
                        <mo id='id-6'>&#x2062;</mo>
2331
1
                        <mi id='id-7'>x</mi>
2332
1
                    </mrow>
2333
1
                    <mo stretchy='false' id='id-8'>)</mo>
2334
1
                </mrow>
2335
1
                <mn id='id-9'>2</mn>
2336
1
            </msup>
2337
1
        </math>";
2338
1
        init_default_prefs(mathml_str, "Enhanced");
2339
1
        set_preference("SpeechStyle", "ClearSpeak").unwrap();
2340
1
        return MATHML_INSTANCE.with(|package_instance| {
2341
1
            let package_instance = package_instance.borrow();
2342
1
            let mathml = get_element(&package_instance);
2343
1
            let speech = test_command("ZoomIn", mathml, "id-4");
2344
1
            assert_eq!(speech, "zoom in; in base; 2 x");
2345
1
            let speech = test_command("MoveNext", mathml, "id-9");
2346
1
            assert_eq!(speech, "move right; in exponent; 2");
2347
1
            return Ok( () );
2348
1
        });
2349
1
    }
2350
2351
    #[test]
2352
1
    fn binomial_intent() -> Result<()> {
2353
1
        let mathml_str = "<math display='block' id='id-0'>
2354
1
                    <mrow intent='binomial($n,$k)' id='id-1'>
2355
1
                        <mo id='id-2'>(</mo>
2356
1
                        <mfrac linethickness='0pt' id='id-3'>
2357
1
                            <mi arg='n' id='id-4'>n</mi>
2358
1
                            <mi arg='k' id='id-5'>k</mi>
2359
1
                        </mfrac>
2360
1
                    <mo id='id-6'>)</mo>
2361
1
                    </mrow>
2362
1
                </math>";
2363
1
        init_default_prefs(mathml_str, "Character");
2364
1
        set_preference("SpeechStyle", "ClearSpeak").unwrap();
2365
1
        return MATHML_INSTANCE.with(|package_instance| {
2366
1
            let package_instance = package_instance.borrow();
2367
1
            let mathml = get_element(&package_instance);
2368
1
            debug!("Character mode");
2369
1
            let speech = test_command("MoveStart", mathml, "id-2");
2370
1
            assert_eq!(speech, "move to start of math; open paren");
2371
1
            let speech = test_command("MoveNext", mathml, "id-4");
2372
            // I'm not keen on the use of numerator/denominator here, but character mode turns off intent
2373
1
            assert_eq!(speech, "move right; in numerator; n");
2374
1
            let speech = test_command("MoveNext", mathml, "id-5");
2375
1
            assert_eq!(speech, "move right; in denominator; k");
2376
1
            debug!("before zoom out");
2377
1
            let speech = test_command("ZoomOut", mathml, "id-3");
2378
1
            assert_eq!(speech, "zoom out; out of denominator; n over k");
2379
            // let speech = test_command("ZoomOut", mathml, "id-1");
2380
            // assert_eq!(speech, "zoom out; open paren n over k, close paren");
2381
2382
1
            set_preference("NavMode", "Simple").unwrap();
2383
1
            debug!("Simple mode");
2384
1
            let speech = test_command("ZoomIn", mathml, "id-4");
2385
1
            assert_eq!(speech, "zoom in; in part 1; n");
2386
1
            let speech = test_command("MoveNext", mathml, "id-5");
2387
1
            assert_eq!(speech, "move right; in part 2; k");
2388
1
            let speech = test_command("MoveNext", mathml, "id-5");
2389
1
            assert_eq!(speech, "cannot move right, end of math");
2390
1
            let speech = test_command("ZoomOut", mathml, "id-1-literal-0");
2391
1
            assert_eq!(speech, "zoom out; out of part 2; n choose k");
2392
2393
1
            set_preference("NavMode", "Enhanced").unwrap();
2394
1
            debug!("Enhanced mode");
2395
1
            let speech = test_command("ZoomIn", mathml, "id-4");
2396
1
            assert_eq!(speech, "zoom in; in part 1; n");
2397
1
            let speech = test_command("MoveNext", mathml, "id-5");
2398
1
            assert_eq!(speech, "move right; in part 2; k");
2399
1
            let speech = test_command("MoveNext", mathml, "id-5");
2400
1
            assert_eq!(speech, "cannot move right, end of math");
2401
1
            let speech = test_command("ZoomOut", mathml, "id-1-literal-0");
2402
1
            assert_eq!(speech, "zoom out; out of part 2; n choose k");
2403
2404
1
            return Ok( () );
2405
1
        });
2406
1
    }
2407
2408
    #[test]
2409
1
    fn matrix_literal_intent() -> Result<()> {
2410
1
        let mathml_str = r#"<math display='block' id='id-0'>
2411
1
            <mrow intent='$m' id='id-1'>
2412
1
                <mo id='id-2'>(</mo>
2413
1
                <mtable arg='m' intent='_diagonal:prefix(1,2,3)' id='id-3'>
2414
1
                <mtr id='id-4'>
2415
1
                    <mtd id='id-5'><mn id='id-6'>1</mn></mtd>
2416
1
                    <mtd id='id-7'><mn id='id-8'>0</mn></mtd>
2417
1
                    <mtd id='id-9'><mn id='id-10'>0</mn></mtd>
2418
1
                </mtr>
2419
1
                <mtr id='id-11'>
2420
1
                    <mtd id='id-12'><mn id='id-13'>0</mn></mtd>
2421
1
                    <mtd id='id-14'><mn id='id-15'>2</mn></mtd>
2422
1
                    <mtd id='id-16'><mn id='id-17'>0</mn></mtd>
2423
1
                </mtr>
2424
1
                <mtr id='id-18'>
2425
1
                    <mtd id='id-19'><mn id='id-20'>0</mn></mtd>
2426
1
                    <mtd id='id-21'><mn id='id-22'>0</mn></mtd>
2427
1
                    <mtd id='id-23'><mn id='id-24'>3</mn></mtd>
2428
1
                </mtr>
2429
1
                </mtable>
2430
1
                <mo id='id-25'>)</mo>
2431
1
            </mrow>
2432
1
        </math>"#;
2433
1
        init_default_prefs(mathml_str, "Simple");
2434
1
        return MATHML_INSTANCE.with(|package_instance| {
2435
1
            let package_instance = package_instance.borrow();
2436
1
            let mathml = get_element(&package_instance);
2437
1
            let speech = test_command("ZoomIn", mathml, "id-3-literal-1");
2438
1
            assert_eq!(speech, "zoom in; 1");
2439
1
            let speech = test_command("MoveNext", mathml, "id-3-literal-2");
2440
1
            assert_eq!(speech, "move right; 2");
2441
1
            let speech = test_command("MoveNext", mathml, "id-3-literal-3");
2442
1
            assert_eq!(speech, "move right; 3");
2443
1
            let speech = test_command("MoveNext", mathml, "id-3-literal-3");
2444
1
            assert_eq!(speech, "cannot move right, end of math");
2445
1
            let speech = test_command("ZoomOut", mathml, "id-3-literal-0");
2446
1
            assert_eq!(speech, "zoom out; diagonal 1 2 3");
2447
2448
1
            return Ok( () );
2449
1
        });
2450
1
    }
2451
2452
    #[test]
2453
1
    fn absolute_value() -> Result<()> {
2454
1
        let mathml_str = "<math id='math'>
2455
1
                <mrow id='expr'>
2456
1
                    <mn id='2'>2</mn>
2457
1
                    <mrow id='abs'>
2458
1
                        <mo id='start'>|</mo>
2459
1
                        <mi id='x'>x</mi>
2460
1
                        <mo id='end'>|</mo>
2461
1
                    </mrow>
2462
1
                </mrow>
2463
1
            </math>";
2464
1
        init_default_prefs(mathml_str, "Enhanced");
2465
1
        set_preference("SpeechStyle", "ClearSpeak").unwrap();
2466
1
        return MATHML_INSTANCE.with(|package_instance| {
2467
1
            let package_instance = package_instance.borrow();
2468
1
            let mathml = get_element(&package_instance);
2469
1
            let speech = test_command("ZoomIn", mathml, "2");
2470
1
            assert_eq!(speech, "zoom in; 2");
2471
1
            let speech = test_command("MoveNext", mathml, "abs");
2472
1
            assert_eq!(speech, "move right; the absolute value of x");
2473
1
            let speech = test_command("ZoomIn", mathml, "x");
2474
1
            assert_eq!(speech, "zoom in; in absolute value; x");
2475
1
            let speech = test_command("MoveNext", mathml, "x");
2476
1
            assert_eq!(speech, "cannot move right, end of math");
2477
1
            set_preference("NavMode", "Character").unwrap();
2478
1
            let speech = test_command("MoveNext", mathml, "end");
2479
1
            assert_eq!(speech, "move right; vertical line");
2480
1
            let speech = test_command("MoveLineStart", mathml, "2");
2481
1
            assert_eq!(speech, "move to start of line; 2");
2482
1
            let speech = test_command("MoveNext", mathml, "start");
2483
1
            assert_eq!(speech, "move right; vertical line");
2484
1
            return Ok( () );
2485
1
        });
2486
1
    }
2487
2488
    #[test]
2489
1
    fn read_and_describe_fraction() -> Result<()> {
2490
1
        let mathml_str = "<math id='math'>
2491
1
            <mrow id='mrow'>
2492
1
                <mfrac id='frac'>
2493
1
                    <mrow id='numerator'><mi>b</mi><mo>+</mo><mn>1</mn></mrow>
2494
1
                <mn id='denom'>3</mn>
2495
1
                </mfrac>
2496
1
                <mo id='minus'>-</mo>
2497
1
                <mn id='3'>3</mn>
2498
1
            </mrow>
2499
1
        </math>";
2500
1
        init_default_prefs(mathml_str, "Enhanced");
2501
1
        set_preference("SpeechStyle", "SimpleSpeak").unwrap();
2502
1
        return MATHML_INSTANCE.with(|package_instance| {
2503
1
            let package_instance = package_instance.borrow();
2504
1
            let mathml = get_element(&package_instance);
2505
1
            test_command("ZoomIn", mathml, "frac");
2506
1
            let speech = test_command("ReadCurrent", mathml, "frac");
2507
1
            assert_eq!(speech, "read current; fraction, b plus 1, over 3, end fraction");
2508
1
            let speech = test_command("DescribeCurrent", mathml, "frac");
2509
1
            assert_eq!(speech, "describe current; fraction");
2510
1
            return Ok( () );
2511
1
        });
2512
1
    }
2513
2514
2515
    #[test]
2516
1
    fn read_and_describe_mrow() -> Result<()> {
2517
1
        let mathml_str = "<math id='math'>
2518
1
            <mrow id='mrow'>
2519
1
                <mn>1</mn><mo>+</mo>
2520
1
                <mn>2</mn><mo>+</mo>
2521
1
                <mn>3</mn><mo>+</mo>
2522
1
                <mn>4</mn><mo>+</mo>
2523
1
                <mn>5</mn><mo>+</mo>
2524
1
                <mn>6</mn><mo>+</mo>
2525
1
                <mn>7</mn>
2526
1
            </mrow>
2527
1
        </math>";
2528
1
        init_default_prefs(mathml_str, "Enhanced");
2529
1
        set_preference("SpeechStyle", "SimpleSpeak").unwrap();
2530
1
        return MATHML_INSTANCE.with(|package_instance| {
2531
1
            let package_instance = package_instance.borrow();
2532
1
            let mathml = get_element(&package_instance);
2533
1
            let speech = test_command("ZoomOutAll", mathml, "mrow");
2534
1
            assert_eq!(speech, "zoomed out all of the way; 1 plus 2 plus 3 plus 4 plus 5 plus 6 plus 7");
2535
1
            let speech = test_command("ReadCurrent", mathml, "mrow");
2536
1
            assert_eq!(speech, "read current; 1 plus 2 plus 3 plus 4 plus 5 plus 6 plus 7");
2537
1
            let speech = test_command("DescribeCurrent", mathml, "mrow");
2538
1
            assert_eq!(speech, "describe current; 1 plus 2 plus 3 and so on");
2539
1
            return Ok( () );
2540
1
        });
2541
1
    }
2542
2543
2544
    #[test]
2545
1
    fn read_next_invisible_char() -> Result<()> {
2546
1
        let mathml_str = "<math id='id-0'>
2547
1
            <mrow id='id-1'>
2548
1
                <mi id='id-2'>x</mi>
2549
1
                <mo id='id-3'>&#x2062;</mo>
2550
1
                <mi id='id-4'>y</mi>
2551
1
            </mrow>
2552
1
            </math>";
2553
1
        init_default_prefs(mathml_str, "Simple");
2554
1
        set_preference("SpeechStyle", "SimpleSpeak").unwrap();
2555
1
        return MATHML_INSTANCE.with(|package_instance| {
2556
1
            let package_instance = package_instance.borrow();
2557
1
            let mathml = get_element(&package_instance);
2558
1
            let speech = test_command("ZoomIn", mathml, "id-2");
2559
1
            assert_eq!(speech, "zoom in; x");
2560
1
            let speech = test_command("ToggleZoomLockUp", mathml, "id-2");
2561
1
            assert_eq!(speech, "enhanced mode; x");
2562
1
            let speech = test_command("ReadNext", mathml, "id-2");
2563
1
            assert_eq!(speech, "read right; y");
2564
1
            return Ok( () );
2565
1
        });
2566
1
    }
2567
2568
    
2569
    #[test]
2570
1
    fn basic_language_test() -> Result<()> {
2571
        // this is basically a sanity check that all the language's navigation.yaml files are at least syntactically correct
2572
        // FIX: should look through the Languages dir and figure this is out
2573
1
        let mathml_str = "<math id='math'>
2574
1
                <mrow id='contents'>
2575
1
                    <mrow id='lhs'>
2576
1
                        <mrow id='term'>
2577
1
                            <mn id='2'>2</mn>
2578
1
                            <mo id='invisible-times'>&#x2062;</mo>
2579
1
                            <msup id='msup'>
2580
1
                                <mi id='x'>x</mi>
2581
1
                                <mn id='3'>3</mn>
2582
1
                            </msup>
2583
1
                        </mrow>
2584
1
                        <mo id='plus'>+</mo>
2585
1
                        <mn id='1'>1</mn>
2586
1
                    </mrow>
2587
1
                <mo id='id-11'>=</mo>
2588
1
                <mi id='id-12'>y</mi>
2589
1
                </mrow>
2590
1
            </math>";
2591
        
2592
1
        set_rules_dir(super::super::abs_rules_dir_path()).unwrap();
2593
11
        for lang in 
get_supported_languages1
().
unwrap_or_default1
() {
2594
11
            test_language(&lang, mathml_str);
2595
11
        }
2596
1
        return Ok( () );
2597
2598
11
        fn test_language(lang: &str, mathml_str: &str) {
2599
11
            init_default_prefs(mathml_str, "Enhanced");
2600
11
            set_preference("Language", lang).unwrap();
2601
2602
11
            set_preference("NavMode", "Enhanced").unwrap();
2603
11
            MATHML_INSTANCE.with(|package_instance| {
2604
11
                let package_instance = package_instance.borrow();
2605
11
                let mathml = get_element(&package_instance);
2606
11
                test_command("ZoomInAll", mathml, "2");
2607
11
                test_command("MoveNext", mathml, "msup");
2608
11
                test_command("MoveNext", mathml, "plus");
2609
11
                test_command("MovePrevious", mathml, "term");
2610
11
                test_command("MovePrevious", mathml, "term");
2611
11
                test_command("ZoomOutAll", mathml, "contents");
2612
11
            });
2613
2614
11
            set_preference("NavMode", "Simple").unwrap();
2615
11
            MATHML_INSTANCE.with(|package_instance: &RefCell<Package>| {
2616
11
                let package_instance = package_instance.borrow();
2617
11
                let mathml = get_element(&package_instance);
2618
11
                test_command("ZoomInAll", mathml, "2");
2619
11
                test_command("MoveNext", mathml, "msup");
2620
11
                test_command("MoveNext", mathml, "plus");
2621
11
                test_command("MovePrevious", mathml, "msup");
2622
11
                test_command("MovePrevious", mathml, "2");
2623
11
                test_command("MovePrevious", mathml, "2");
2624
11
                test_command("ZoomOutAll", mathml, "contents");
2625
11
            });
2626
2627
11
            set_preference("NavMode", "Character").unwrap();
2628
11
            MATHML_INSTANCE.with(|package_instance| {
2629
11
                let package_instance = package_instance.borrow();
2630
11
                let mathml = get_element(&package_instance);
2631
11
                test_command("ZoomIn", mathml, "2");
2632
11
                test_command("MoveNext", mathml, "x");
2633
11
                test_command("MoveNext", mathml, "3");
2634
11
                test_command("MoveNext", mathml, "plus");
2635
11
                test_command("MovePrevious", mathml, "3");
2636
11
                test_command("MovePrevious", mathml, "x");
2637
11
                test_command("MovePrevious", mathml, "2");
2638
11
                test_command("MovePrevious", mathml, "2");
2639
11
            });
2640
            
2641
            // simple sanity check that "overview.yaml" doesn't have a syntax error
2642
11
            set_preference("Overview", "True").unwrap();
2643
11
            set_preference("NavMode", "Character").unwrap();
2644
11
            MATHML_INSTANCE.with(|package_instance| {
2645
11
                let package_instance = package_instance.borrow();
2646
11
                let mathml = get_element(&package_instance);
2647
11
                test_command("ZoomIn", mathml, "2");
2648
11
            });
2649
11
        }
2650
1
    }
2651
}
\ No newline at end of file +

Coverage Report

Created: 2026-05-04 09:37

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/home/runner/work/MathCAT/MathCAT/src/navigate.rs
Line
Count
Source
1
//! Navigation is controlled by a `Navigation_Rules.yaml` file in conjunction with preferences.
2
//! See preference documentation for more info on navigation preferences.
3
#![allow(clippy::needless_return)]
4
5
use std::cell::{Ref, RefCell, RefMut};
6
use sxd_xpath::context::Evaluation;
7
use sxd_xpath::Value;
8
use sxd_document::dom::Element;
9
use sxd_document::Package;
10
11
use std::fmt;
12
use crate::canonicalize::{name, get_parent};
13
use crate::pretty_print::mml_to_string;
14
use crate::speech::{NAVIGATION_RULES, CONCAT_INDICATOR, CONCAT_STRING, SpeechRules, SpeechRulesWithContext};
15
use crate::infer_intent::add_fixity_children;
16
use crate::interface::copy_mathml;
17
#[cfg(not(target_family = "wasm"))]
18
use std::time::Instant;
19
use crate::errors::*;
20
use phf::phf_set;
21
use log::{debug};
22
23
pub const ID_OFFSET: &str = "data-id-offset";
24
25
const MAX_PLACE_MARKERS: usize = 10;
26
27
thread_local!{
28
    /// The current set of navigation rules
29
    pub static NAVIGATION_STATE: RefCell<NavigationState> =
30
            RefCell::new( NavigationState::new() );
31
}
32
33
pub static NAV_COMMANDS: phf::Set<&str> = phf_set! {
34
    "MovePrevious", "MoveNext", "MoveStart", "MoveEnd", "MoveLineStart", "MoveLineEnd", 
35
    "MoveCellPrevious", "MoveCellNext", "MoveCellUp", "MoveCellDown", "MoveColumnStart", "MoveColumnEnd", 
36
    "ZoomIn", "ZoomOut", "ZoomOutAll", "ZoomInAll", 
37
    "MoveLastLocation", 
38
    "ReadPrevious", "ReadNext", "ReadCurrent", "ReadCellCurrent", "ReadStart", "ReadEnd", "ReadLineStart", "ReadLineEnd", 
39
    "DescribePrevious", "DescribeNext", "DescribeCurrent", 
40
    "WhereAmI", "WhereAmIAll", 
41
    "ToggleZoomLockUp", "ToggleZoomLockDown", "ToggleSpeakMode", 
42
    "Exit", 
43
    "MoveTo0","MoveTo1","MoveTo2","MoveTo3","MoveTo4","MoveTo5","MoveTo6","MoveTo7","MoveTo8","MoveTo9",
44
    "Read0","Read1","Read2","Read3","Read4","Read5","Read6","Read7","Read8","Read9",
45
    "Describe0","Describe1","Describe2","Describe3","Describe4","Describe5","Describe6","Describe7","Describe8","Describe9",
46
    "SetPlacemarker0","SetPlacemarker1","SetPlacemarker2","SetPlacemarker3","SetPlacemarker4","SetPlacemarker5","SetPlacemarker6","SetPlacemarker7","SetPlacemarker8","SetPlacemarker9",
47
};
48
49
#[derive(Clone, PartialEq, Debug)]
50
struct NavigationPosition {
51
    current_node: String,           // id of current node
52
    current_node_offset: usize,     // for leaves, char offset in leaf (default = 0), otherwise id for artificial intent node
53
}
54
55
impl fmt::Display for NavigationPosition {
56
0
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
57
0
        return write!(f, "{}[+{}]", self.current_node, self.current_node_offset);
58
0
    }
59
}
60
61
const ILLEGAL_NODE_ID: &str = "!not set";     // an illegal 'id' value
62
impl Default for NavigationPosition {
63
48.6k
    fn default() -> Self {
64
48.6k
        NavigationPosition {
65
48.6k
            current_node: ILLEGAL_NODE_ID.to_string(),
66
48.6k
            current_node_offset: 0
67
48.6k
        }
68
48.6k
     }
69
}
70
71
72
#[derive(Debug, Clone)]
73
pub struct NavigationState {
74
    // it might be better to use a linked for the stacks, with the first node being the top
75
    // these two stacks should be kept in sync.
76
    position_stack: Vec<NavigationPosition>,    // all positions, so we can go back to them
77
    command_stack: Vec<&'static str>,           // all commands, so we can undo them
78
    place_markers: [NavigationPosition; MAX_PLACE_MARKERS],
79
    where_am_i: NavigationPosition,             // current 'where am i' location
80
81
    #[cfg(target_family = "wasm")]
82
    where_am_i_start_time: usize,               // FIX: for web
83
    #[cfg(not(target_family = "wasm"))]
84
    where_am_i_start_time: Instant,
85
    mode: String,                               // one of "Character", "Simple", or "Enhanced"
86
    speak_overview: bool,                       // true => describe after move; false => (standard) speech rules
87
}
88
89
impl fmt::Display for NavigationState {
90
0
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
91
0
        writeln!(f, "NavigationState{{")?;
92
0
        write!(f, "  Position Stack: ")?;
93
0
        for (i, nav_state) in self.position_stack.iter().enumerate() {
94
0
            write!(f, "{}{}", if i==0 {""} else {", "}, nav_state)?;
95
        }
96
0
        writeln!(f)?;
97
0
        write!(f, "  Command Stack: ")?;
98
0
        for (i, nav_state) in self.command_stack.iter().enumerate() {
99
0
            write!(f, "{}{}", if i==0 {""} else {", "}, *nav_state)?;
100
        }
101
0
        writeln!(f)?;
102
0
        writeln!(f, "  where_am_i: {}, start_time: {:?}", self.where_am_i, self.where_am_i_start_time)?;
103
0
        writeln!(f, "  mode: {}, speak_overview: {}", self.mode, self.speak_overview)?;
104
0
        writeln!(f, "}}")?;
105
0
        return Ok( () );
106
0
    }
107
}
108
109
impl NavigationState {
110
3.92k
    fn new() -> NavigationState {
111
3.92k
        return NavigationState {
112
3.92k
            position_stack: Vec::with_capacity(1024),
113
3.92k
            command_stack: Vec::with_capacity(1024),
114
3.92k
            place_markers: Default::default(),
115
3.92k
            where_am_i: NavigationPosition::default(),
116
3.92k
            // FIX: figure this out for the web
117
3.92k
            #[cfg(target_family = "wasm")]
118
3.92k
            where_am_i_start_time: 0,           // FIX: for web
119
3.92k
            #[cfg(not(target_family = "wasm"))]
120
3.92k
            where_am_i_start_time: Instant::now(),      // need to give it some value, and "default()" isn't an option
121
3.92k
            mode: "".to_string(),                       // set latter when we have some context
122
3.92k
            speak_overview: false,                      // set latter when we have some context
123
3.92k
        };
124
3.92k
    }
125
126
4.88k
    pub fn reset(&mut self) {
127
4.88k
        self.position_stack.clear();
128
4.88k
        self.command_stack.clear();
129
4.88k
        self.where_am_i = NavigationPosition::default();
130
4.88k
        self.reset_start_time()
131
4.88k
    }
132
133
134
    // defining reset_start_time because of the following message if done inline
135
    // attributes on expressions are experimental
136
    // see issue #15701 <https://github.com/rust-lang/rust/issues/15701> for more information
137
    #[cfg(target_family = "wasm")]
138
    fn reset_start_time(&mut self) {
139
         self.where_am_i_start_time = 0;
140
    }
141
142
    #[cfg(not(target_family = "wasm"))]
143
4.88k
    fn reset_start_time(&mut self) {
144
4.88k
         self.where_am_i_start_time = Instant::now();      // need to give it some value, and "default()" isn't an option
145
4.88k
    }
146
147
148
563
    fn push(&mut self, position: NavigationPosition, command: &'static str) {
149
563
        self.position_stack.push(position);
150
563
        self.command_stack.push(command);
151
563
    }
152
153
46
    fn pop(&mut self) -> Option<(NavigationPosition, &'static str)> {
154
46
        assert_eq!(self.position_stack.len(), self.command_stack.len());
155
46
        if self.position_stack.is_empty() {
156
0
            return None;
157
        } else {
158
46
            return Some( (self.position_stack.pop().unwrap(), self.command_stack.pop().unwrap()) );
159
        }
160
46
    }
161
162
2.75k
    fn top(&self) -> Option<(&NavigationPosition, &'static str)> {
163
2.75k
        if self.position_stack.is_empty() {
164
0
            return None;
165
2.75k
        }
166
2.75k
        let last = self.position_stack.len()-1;
167
2.75k
        return Some( (&self.position_stack[last], self.command_stack[last]) );
168
2.75k
    }
169
170
0
    pub fn get_navigation_mathml<'a>(&self, mathml: Element<'a>) -> Result<(Element<'a>, usize)> {
171
0
        if self.position_stack.is_empty() {
172
0
            return Ok( (mathml, 0) );
173
        } else {
174
0
            let (position, _) = self.top().unwrap();
175
0
            return match get_node_by_id(mathml, position) {
176
0
                None => bail!("internal error: id '{}' was not found in mathml:\n{}",
177
0
                                position.current_node, mml_to_string(mathml)),
178
0
                Some(found) => Ok( (found, position.current_node_offset) )
179
            };
180
        }
181
0
    }
182
183
1.09k
    pub fn get_navigation_mathml_id(&self, mathml: Element) -> (String, usize) {
184
1.09k
        if self.position_stack.is_empty() {
185
47
            return (mathml.attribute_value("id").unwrap().to_string(), 0);
186
        } else {
187
1.05k
            let (position, _) = self.top().unwrap();
188
1.05k
            return (position.current_node.clone(), position.current_node_offset);
189
        }
190
1.09k
    }
191
192
549
    fn init_navigation_context(&self, context: &mut sxd_xpath::Context, command: &'static str,
193
549
                               nav_state_top: Option<(&NavigationPosition, &'static str)>) {
194
549
        context.set_variable("NavCommand", command);
195
196
549
        if command == "WhereAmI" && 
self.where_am_i == NavigationPosition::default()0
{
197
0
            context.set_variable("NavNode", self.where_am_i.current_node.as_str());
198
0
            context.set_variable("NavNodeOffset", self.where_am_i.current_node_offset as f64);
199
549
        } else {
200
549
            let position = &self.position_stack[self.position_stack.len()-1];
201
549
            context.set_variable("NavNode", position.current_node.as_str());
202
549
            context.set_variable("NavNodeOffset", position.current_node_offset as f64);
203
549
        }
204
205
        // get the index from command (e.g., '3' in 'SetPlacemarker3 or MoveTo3' and set 'PlaceMarker' to it's position)
206
549
        if command.ends_with(|ch: char| ch.is_ascii_digit()) {
207
6
            let index = convert_last_char_to_number(command);
208
6
            let position = &self.place_markers[index];
209
6
            context.set_variable("PlaceMarkerIndex", index as f64);
210
6
            context.set_variable("PlaceMarker", position.current_node.as_str());
211
6
            context.set_variable("PlaceMarkerOffset", position.current_node_offset as f64);
212
543
        }
213
           
214
549
        context.set_variable("Overview", self.speak_overview);
215
549
        context.set_variable("ReadZoomLevel", (if self.mode == "Enhanced" {
-1200
} else {
1349
}) as f64);
216
549
        context.set_variable("MatchCounter", 0 as f64);
217
218
549
        if command == "MoveLastLocation" {
219
3
            let previous_command = match nav_state_top {
220
0
                None => "None",
221
3
                Some( (_, previous_command) ) => previous_command,
222
            };
223
3
            context.set_variable("PreviousNavCommand", previous_command);
224
546
        }
225
226
        // used by nav rules for speech -- needs an initial value so tests don't fail
227
549
        context.set_variable("SayCommand", "" );
228
549
        context.set_variable("Move2D", "" );
229
549
        context.set_variable("SpeakExpression", true );    // default is to speak the expr after navigation
230
549
        return;
231
232
6
        fn convert_last_char_to_number(str: &str) -> usize {
233
6
            let last_char = str.as_bytes()[str.len()-1];
234
6
            assert!( last_char.is_ascii_digit() );
235
6
            return (last_char - b'0') as usize;
236
6
        }
237
549
    }
238
}
239
240
// convert the last digit of a Placemarker command to an integer
241
2
fn convert_last_char_to_number(str: &str) -> usize {
242
2
    let last_char = str.as_bytes()[str.len()-1];
243
2
    assert!( last_char.is_ascii_digit() );
244
2
    return (last_char - b'0') as usize;
245
2
}
246
247
/// Get the node associated with a `NavigationPosition`.
248
/// This can be called on an intent tree 
249
9.18k
fn get_node_by_id<'a>(mathml: Element<'a>, pos: &NavigationPosition) -> Option<Element<'a>> {
250
9.18k
    if let Some(
mathml_id9.17k
) = mathml.attribute_value("id") &&
251
9.17k
       mathml_id == pos.current_node.as_str() &&
252
1.46k
        (crate::xpath_functions::is_leaf(mathml) || 
253
537
        mathml.attribute_value(ID_OFFSET).unwrap_or("0") == pos.current_node_offset.to_string()) {
254
1.46k
        return Some(mathml);
255
7.71k
    }
256
257
10.0k
    for child in 
mathml7.71k
.
children7.71k
() {
258
10.0k
        if let Some(
child7.71k
) = child.element() &&
259
7.71k
           let Some(
found4.41k
) = get_node_by_id(child, pos) {
260
4.41k
                return Some(found);
261
5.60k
            }
262
    }
263
3.29k
    return None;
264
9.18k
}
265
266
/// Search the mathml for the id and set the navigation node to that id
267
/// Resets the navigation stack
268
2
pub fn set_navigation_node_from_id(mathml: Element, id: &str, offset: usize) -> Result<()> {
269
2
    let current_node = id.to_string();
270
2
    let pos = NavigationPosition { current_node: current_node.clone(), current_node_offset: offset };
271
2
    let node = get_node_by_id(mathml, &pos);
272
2
    if node.is_some() {
273
2
        return NAVIGATION_STATE.with(|nav_state| {
274
2
            let mut nav_state = nav_state.borrow_mut();
275
2
            nav_state.reset();
276
2
            nav_state.push(NavigationPosition{
277
2
                current_node,
278
2
                current_node_offset: offset
279
2
            }, "None");
280
2
            return Ok( () );
281
2
        })
282
    } else {
283
0
        bail!("Id {} not found in MathML {}", id, mml_to_string(mathml));
284
    }
285
2
}
286
287
/// Get's the Nav Node from the context, with some exceptions such as Toggle commands where it isn't set.
288
/// Note: mathml can be any node. It isn't really used but some Element needs to be part of Evaluate().
289
571
pub fn get_nav_node<'c>(context: &sxd_xpath::Context<'c>, var_name: &str, mathml: Element<'c>, start_node: Element<'c>, command: &str, nav_mode: &str) -> Result<String> {
290
571
    let start_id = start_node.attribute_value("id").unwrap_or_default();
291
571
    if command.starts_with("Toggle") {
292
1
        return Ok( start_id.to_string() );
293
    } else {
294
570
        return context_get_variable(context, var_name, mathml)
295
570
                .with_context(|| 
format!0
("When trying to {} starting at id={} in {} mode",
296
0
                                                command, start_node.attribute_value("id").unwrap_or_default(), nav_mode));
297
    }
298
571
}
299
300
// FIX: think of a better place to put this, and maybe a better interface
301
/// Note: mathml can be any node. It isn't really used but some Element needs to be part of Evaluate().
302
/// If the context variable has String, Number, or Boolean xpath value, return it as a string. Otherwise it is an error
303
4.55k
pub fn context_get_variable<'c>(context: &sxd_xpath::Context<'c>, var_name: &str, mathml: Element<'c>) -> Result<String> {
304
    // This is slightly roundabout because Context doesn't expose a way to get the values.
305
    // Instead, we create an "Evaluation", which is just one level of indirection.
306
    use sxd_xpath::nodeset::Node;
307
4.55k
    let evaluation = Evaluation::new(context, Node::Element(mathml));
308
4.55k
    return match evaluation.value_of(var_name.into()) {
309
4.55k
        Some(value) => match value {
310
1.74k
            Value::String(s) => Ok(s.clone()),
311
1.20k
            Value::Number(f) => Ok(f.to_string()),
312
1.09k
            Value::Boolean(b) => Ok(format!("{b}")),    // "true" or "false"
313
509
            Value::Nodeset(nodes) => {
314
509
                if nodes.size() == 1 &&
315
509
                   let Some(attr) = nodes.document_order_first().unwrap().attribute() {
316
509
                        return Ok(attr.value().to_string());
317
0
                    };
318
0
                let mut error_message = format!("Variable '{var_name}' set somewhere in navigate.yaml is nodeset and not an attribute: ");
319
0
                if nodes.size() == 0 {
320
0
                    error_message += &format!("0 nodes (false) -- {} set to non-existent node in\n{}",
321
0
                                              var_name, mml_to_string(mathml));
322
0
                } else {
323
0
                    let singular = nodes.size()==1;
324
0
                    error_message += &format!("{} node{}. {}:",
325
0
                            nodes.size(),
326
0
                            if singular {""} else {"s"},
327
0
                            if singular {"Node is"} else {"Nodes are"});
328
0
                    nodes.document_order()
329
0
                        .iter()
330
0
                        .enumerate()
331
0
                        .for_each(|(i, node)| {
332
0
                            match node {
333
0
                                sxd_xpath::nodeset::Node::Element(mathml) =>
334
0
                                    error_message += &format!("#{}:\n{}",i, mml_to_string(*mathml)),
335
0
                                _ => error_message += &format!("'{node:?}'"),
336
                            }   
337
0
                        })    
338
                };
339
0
                bail!(error_message);
340
            },
341
        },
342
0
        None => bail!("Could not find value for navigation variable '{}'", var_name),
343
    }
344
4.55k
}
345
346
/// Wrapper around context_get_variable to get an integer variable
347
1.70k
fn context_get_int_variable<'c>(context: &sxd_xpath::Context<'c>, var_name: &str, mathml: Element<'c>) -> Result<usize> {
348
1.70k
    let value = context_get_variable(context, var_name, mathml)
?0
;
349
1.70k
    return match value.parse::<usize>() {
350
1.70k
        Ok(i) => Ok(i),
351
0
        Err(e) => bail!("Could not parse navigation variable '{}' with value '{}' as integer: {}", var_name, value, e),
352
    }
353
1.70k
}
354
355
/// Given a key code along with the modifier keys, the current node is moved accordingly (or value reported in some cases).]
356
/// The spoken text for the new current node is returned.
357
0
pub fn do_mathml_navigate_key_press(mathml: Element,
358
0
            key: usize, shift_key: bool, control_key: bool, alt_key: bool, meta_key: bool) -> Result<String> {
359
0
    let (command, param) = key_press_to_command_and_param(key, shift_key, control_key, alt_key, meta_key)?;
360
0
    return do_navigate_command_and_param(mathml, command, param);
361
0
}
362
363
2
fn do_navigate_command_and_param(mathml: Element, command: NavigationCommand, param: NavigationParam) -> Result<String> {
364
2
    return do_navigate_command_string(mathml, navigation_command_string(command, param));
365
2
}
366
367
549
pub fn do_navigate_command_string(mathml: Element, nav_command: &'static str) -> Result<String> {   
368
    // first check to see if nav file has been changed -- don't bother checking in loop below
369
549
    NAVIGATION_RULES.with(|rules| {
370
549
        rules.borrow_mut().read_files()
371
549
    })
?0
;
372
373
549
    if mathml.children().is_empty() {
374
0
        bail!("MathML has not been set -- can't navigate");
375
549
    };
376
377
549
    return NAVIGATION_STATE.with(|nav_state| {
378
549
        let mut nav_state = nav_state.borrow_mut();
379
        // debug!("MathML: {}", mml_to_string(mathml));
380
549
        if nav_state.position_stack.is_empty() {
381
            // initialize to root node
382
47
            nav_state.push(NavigationPosition{
383
47
                current_node: mathml.attribute_value("id").unwrap().to_string(),
384
47
                current_node_offset: 0
385
47
            }, "None")
386
502
        };
387
388
549
        return NAVIGATION_RULES.with(|rules| {
389
549
            let rules = rules.borrow();
390
549
            let new_package = Package::new();
391
549
            let mut rules_with_context = SpeechRulesWithContext::new(&rules, new_package.as_document(), "", 0);
392
            
393
549
            nav_state.mode = rules.pref_manager.as_ref().borrow().pref_to_string("NavMode");
394
549
            nav_state.speak_overview = rules.pref_manager.as_ref().borrow().pref_to_string("Overview") == "true";
395
396
549
            nav_state.init_navigation_context(rules_with_context.get_context(), nav_command, nav_state.top());
397
            
398
            // start navigation off at the right node
399
549
            if nav_command == "MoveLastLocation" {
400
3
                nav_state.pop();
401
546
            }
402
403
            // If no speech happened for some calls, we try the call again (e.g, no speech for invisible times).
404
            // To prevent to infinite loop, we limit the number of tries
405
            const LOOP_LIMIT: usize = 3;
406
549
            let mut cumulative_speech = String::with_capacity(120);
407
569
            for loop_count in 
0..LOOP_LIMIT549
{
408
569
                match apply_navigation_rules(mathml, nav_command, &rules, &mut rules_with_context, &mut nav_state, loop_count) {
409
569
                    Ok( (speech, done)) => {
410
569
                        cumulative_speech = cumulative_speech + if loop_count==0 {
""549
} else {
" "20
} + speech.trim();
411
569
                        if done {
412
549
                            let (tts, rate) = {
413
549
                                let prefs = rules.pref_manager.borrow();
414
549
                                (prefs.pref_to_string("TTS"), prefs.pref_to_string("MathRate"))
415
549
                            };
416
549
                            if rate != "100" {
417
0
                                match tts.as_str() {
418
0
                                    "SSML"
419
0
                                        if !cumulative_speech.starts_with("<prosody rate") => {
420
0
                                            cumulative_speech = format!("<prosody rate='{}%'>{}</prosody>", &rate, &cumulative_speech);
421
0
                                        }
422
0
                                    "SAPI5"
423
0
                                        if !cumulative_speech.starts_with("<rate speed") => {
424
0
                                            cumulative_speech = format!(
425
0
                                                "<rate speed='{:.1}'>{}</rate>",
426
0
                                                10.0 * (0.01 * rate.parse::<f32>().unwrap_or(100.0)).log(3.0),
427
0
                                                cumulative_speech
428
0
                                            );
429
0
                                        }
430
0
                                    _ => (),  // do nothing
431
                                }
432
549
                            }
433
549
                                                return Ok( rules.pref_manager.borrow().get_tts()
434
549
                                            .merge_pauses(crate::speech::remove_optional_indicators(
435
549
                                                &cumulative_speech.replace(CONCAT_STRING, "")
436
549
                                                                    .replace(CONCAT_INDICATOR, "")                            
437
549
                                                            )
438
549
                                            .trim_start().trim_end_matches([' ', ',', ';'])) );
439
20
                        }
440
                    },
441
0
                    Err(e) => {
442
0
                        return Err(e);
443
                    }
444
                }
445
            }
446
0
            bail!("Internal error: Navigation exceeded limit of number of times no speech generated
447
                   when attempting to {} in {} mode start at id={} in this MathML:\n{}.",
448
0
                   nav_command, nav_state.mode, nav_state.top().unwrap().0.current_node, mml_to_string(mathml));
449
549
        });
450
549
    });
451
452
570
    fn get_start_node<'m>(mathml: Element<'m>, nav_state: &RefMut<NavigationState>) -> Result<Element<'m>>  {
453
570
        let element = match nav_state.top() {
454
            None => {
455
0
                let nav_position = NavigationPosition { current_node: mathml.attribute_value("id").unwrap().to_string(), current_node_offset: 0 };
456
0
                get_node_by_id(mathml, &nav_position)
457
            },
458
570
            Some( (position, _) ) => get_node_by_id(mathml, position),
459
        };
460
461
570
        return match element {
462
569
            Some(node) => Ok(node),
463
            None => {
464
1
                bail!("Internal Error: didn't find id/offset '{:?}' while attempting to start navigation. MathML is\n{}",
465
1
                      nav_state.top().map(|t| t.0), mml_to_string(mathml));
466
            }
467
        };
468
570
    }
469
470
471
472
569
    fn apply_navigation_rules<'c, 'm:'c>(mathml: Element<'m>, nav_command: &'static str,
473
569
            rules: &Ref<SpeechRules>, rules_with_context: &mut SpeechRulesWithContext<'c, '_, 'm>, nav_state: &mut RefMut<NavigationState>,
474
569
            loop_count: usize) -> Result<(String, bool)> {
475
        {
476
569
            let context = rules_with_context.get_context();
477
569
            context.set_variable("MatchCounter", loop_count as f64);
478
569
            nav_state.mode = context_get_variable(context, "NavMode", mathml)
?0
;
479
        }
480
481
569
        let mut add_literal = nav_state.mode == "Character";
482
569
        let (intent, nav_intent) = if add_literal {
483
206
            (mathml, mathml)
484
        } else {
485
363
            let intent = crate::speech::intent_from_mathml(mathml, rules_with_context.get_document())
?0
;
486
363
            (intent, add_fixity_children(copy_mathml(intent)))
487
        };
488
489
569
        let mut properties = "";
490
569
        if add_literal {
491
206
            properties  = mathml.attribute_value("data-intent-property").unwrap_or_default();
492
206
            if properties.contains(":literal:") {
493
0
                add_literal = false;
494
206
            } else {
495
206
                mathml.set_attribute_value("data-intent-property", (":literal:".to_string() + properties).as_str());
496
206
            };
497
363
        }
498
        // we should always find the start node.
499
        // however, if we were navigating by character, then switched the NavMode, the intent tree might not have that node in it
500
569
        let start_node = match get_start_node(nav_intent, nav_state) {
501
568
            Ok(node) => node,
502
            Err(_) => {
503
                // find the node in the other tree (probably mathml) and walk up to find a parent that has an id in both
504
1
                debug!("Could not find start_node in nav_intent -- trying other_tree");
505
1
                let other_tree = if nav_state.mode == "Character" {
nav_intent0
} else {mathml};
506
1
                let mut found_node = get_start_node(other_tree, nav_state)
?0
;
507
2
                while name(found_node) != "math" {
508
2
                    found_node = get_parent(found_node);
509
                    // debug!("found_node:\n{}", mml_to_string(found_node));
510
2
                    let temp_pos = NavigationPosition {
511
2
                        current_node: found_node.attribute_value("id").unwrap_or_default().to_string().clone(),
512
2
                        current_node_offset: found_node.attribute_value(ID_OFFSET).unwrap_or_default().parse::<usize>().unwrap_or_default(),
513
2
                    };
514
2
                    if let Some(
intent_node1
) = get_node_by_id(nav_intent, &temp_pos) {
515
1
                        found_node = intent_node;
516
1
                        break;
517
1
                    }
518
                }
519
1
                found_node
520
            }
521
        };
522
523
        // debug!("intent=\n{}", mml_to_string(intent));
524
        // debug!("nav intent=\n{}", mml_to_string(nav_intent));
525
        // debug!("start_node id={}\n{}", nav_state.top().unwrap().0.current_node.as_str(), mml_to_string(start_node));
526
        // if name(start_node) != "math" {
527
        //     let mut parent= get_parent(start_node);
528
        //     if name(parent) != "math" {
529
        //         parent = get_parent(parent);
530
        //     }
531
        //     debug!("parent or grandparent of start_node:\n{}", mml_to_string(parent));
532
        // }
533
569
        let offset = context_get_int_variable(rules_with_context.get_context(), "NavNodeOffset", intent)
?0
;
534
569
        rules_with_context.set_nav_node_offset(offset);
535
569
        debug!("starting nav_position: {}, start node ={}", 
nav_state.top()0
.
unwrap0
().0,
name0
(
start_node0
));
536
537
569
        let raw_speech_string = rules_with_context.match_pattern::<String>(start_node)
538
569
                    .context("Pattern match/replacement failure during math navigation!")
?0
;
539
569
        let speech = rules.pref_manager.borrow().get_tts()
540
569
                    .merge_pauses(crate::speech::remove_optional_indicators(
541
569
                        &raw_speech_string.replace(CONCAT_STRING, "")
542
569
                                                .replace(CONCAT_INDICATOR, "")                            
543
569
                                    )
544
569
                    .trim());
545
        // debug!("Nav Speech: {}", speech);
546
547
        // FIX: add things that need to do a speech replacement based on some marker for "where am i" and others that loop ([Speak: id])???
548
        // what else needs to be done/set???
549
550
        // transfer some values that might have been set into the prefs
551
569
        let offset = context_get_int_variable(rules_with_context.get_context(), "NavNodeOffset", intent)
?0
;
552
569
        rules_with_context.set_nav_node_offset(offset);
553
569
        let context = rules_with_context.get_context();
554
569
        nav_state.speak_overview = context_get_variable(context, "Overview", intent)
?0
== "true";
555
569
        nav_state.mode = context_get_variable(context, "NavMode", intent)
?0
;
556
569
        rules.pref_manager.as_ref().borrow_mut().set_user_prefs("NavMode", &nav_state.mode)
?0
;
557
558
569
        debug!("context value of NavNodeOffset: {:?}", 
context_get_variable0
(
context0
,
"NavNodeOffset"0
,
intent0
)
?0
);
559
569
        let nav_position = NavigationPosition {
560
569
                current_node: get_nav_node(context, "NavNode", intent, start_node, nav_command, &nav_state.mode)
?0
,
561
569
                current_node_offset: context_get_int_variable(context, "NavNodeOffset", intent)
?0
,
562
            };
563
564
        // after a command, we either read or describe the new location (part of state)
565
        // also some commands are DescribeXXX/ReadXXX, so we need to look at the commands also
566
569
        let use_read_rules = if nav_command.starts_with("Read") {
567
5
            true
568
564
        } else if nav_command.starts_with("Describe") {
569
3
            false
570
        } else {
571
561
            !nav_state.speak_overview
572
        };
573
574
569
        debug!("after match nav_position: {}", nav_position);
575
        // push the new location on the stack
576
569
        if nav_position != NavigationPosition::default() && &nav_position != nav_state.top().unwrap().0 {
577
483
            nav_state.push(nav_position.clone(), nav_command);
578
483
        
}86
579
580
569
        if nav_command.starts_with("SetPlacemarker") {
581
2
            let new_node_id = get_nav_node(context, "NavNode", intent, start_node, nav_command, &nav_state.mode)
?0
;
582
2
            nav_state.place_markers[convert_last_char_to_number(nav_command)] = NavigationPosition{
583
2
                current_node: new_node_id,
584
2
                current_node_offset: context_get_int_variable(context, "NavNodeOffset", intent)
?0
,
585
            }
586
567
        }
587
588
569
        let nav_mathml = get_node_by_id(intent, &nav_position);
589
569
        if nav_mathml.is_some() && context_get_variable(context, "SpeakExpression", intent)
?0
== "true" {
590
            // Speak/Overview of where we landed (if we are supposed to speak it) -- use intent, not nav_intent
591
            // Note: NavMode might have changed, so we need to recheck the mode to see if we use LiteralSpeak
592
519
            let literal_speak = nav_state.mode == "Character";
593
519
            let node_speech_result = speak(mathml, intent, &nav_position, literal_speak, use_read_rules);
594
519
            remove_literal_property(mathml, add_literal, properties);
595
519
            let node_speech = match node_speech_result {
596
519
                Ok(speech) => speech,
597
0
                Err(e) => {
598
0
                    if e.to_string() == crate::speech::NAV_NODE_SPEECH_NOT_FOUND {
599
0
                        bail!("Internal error: With {}/{} in {} mode, can't {} from expression with id '{}' inside:\n{}",
600
0
                              rules.pref_manager.as_ref().borrow().pref_to_string("Language"),
601
0
                              rules.pref_manager.as_ref().borrow().pref_to_string("SpeechStyle"),
602
0
                              &nav_state.mode, nav_command, &nav_position.current_node, mml_to_string(if literal_speak {mathml} else {intent}));
603
0
                    }
604
0
                    return Err(e);
605
                }
606
            };
607
608
            // debug!("node_speech: '{}', speech: '{}'\n", node_speech, speech);
609
519
            if node_speech.is_empty() {
610
                // try again in loop
611
20
                return Ok( (speech, false));
612
            } else {
613
499
                pop_stack(nav_state, loop_count, nav_command);
614
                // debug!("returning: '{}'", speech.clone() + " " + &node_speech);
615
499
                return Ok( (speech + " " + &node_speech, true) );
616
            }
617
        } else {
618
50
            remove_literal_property(mathml, add_literal, properties);
619
50
            pop_stack(nav_state, loop_count, nav_command);
620
50
            return Ok( (speech, true) );
621
        };
622
623
569
        fn remove_literal_property(mathml: Element, add_literal: bool, properties: &str) {
624
569
            if add_literal {
625
206
                if properties.is_empty() {
626
206
                    mathml.remove_attribute("data-intent-property");
627
206
                } else {
628
0
                    mathml.set_attribute_value("data-intent-property", properties);
629
0
                }
630
363
            }
631
569
        }
632
633
569
    }
634
635
636
549
    fn pop_stack(nav_state: &mut NavigationState, count: usize, nav_command: &'static str) {
637
        // save the final state and pop the intermediate states that did nothing
638
549
        let push_command_on_stack = (nav_command.starts_with("Move") && 
nav_command != "MoveLastLocation"355
) ||
nav_command197
.
starts_with197
("Zoom");
639
        // debug!("pop_stack: nav_command={}, count={}, push? {} stack=\n{}", nav_command, count, push_command_on_stack, nav_state);
640
549
        if count == 0 {
641
529
            if !push_command_on_stack && 
nav_command13
==
nav_state13
.top().unwrap().1 {
642
3
                nav_state.pop();    // remove ReadXXX, SetPlacemarker, etc. commands that don't change the state
643
526
            }
644
529
            return;
645
20
        }
646
20
        let (top_position, top_command) = nav_state.pop().unwrap();
647
20
        let mut count = count - 1;
648
        loop {
649
            // debug!("  ... loop count={}", count);
650
20
            nav_state.pop();
651
20
            if count == 0 {
652
20
                break;
653
0
            };
654
0
            count -= 1;
655
        };
656
20
        if push_command_on_stack {
657
19
            nav_state.push(top_position, top_command);
658
19
        
}1
659
        // debug!("END pop_stack: stack=\n{}", nav_state);
660
549
    }
661
549
}
662
663
/// Speak the intent tree at the nav_node_id if that id exists in the intent tree; otherwise use the mathml tree.
664
/// If full_read is true, we speak the tree, otherwise we use the overview rules.
665
/// If literal_speak is true, we use the literal speak rules (and use the mathml tree).
666
519
fn speak(mathml: Element, intent: Element, nav_position: &NavigationPosition, literal_speak: bool, full_read: bool) -> Result<String> {
667
519
    if full_read {
668
        // In something like x^3, we might be looking for the '3', but it will be "cubed", so we don't find it.
669
        // Or we might be on a "(" surrounding a matrix and that isn't part of the intent
670
        // We are probably safer in terms of getting the same speech if we retry intent starting at the nav node,
671
        //  but the node to speak is almost certainly trivial.
672
        // By speaking the non-intent tree, we are certain to speak on the next try
673
505
        if !literal_speak && 
get_node_by_id327
(intent, nav_position).
is_some327
() {
674
                // debug!("speak: nav_node_id={}, intent=\n{}", nav_node_id, mml_to_string(intent));
675
327
            match crate::speech::speak_mathml(intent, &nav_position.current_node, nav_position.current_node_offset) {
676
326
                Ok(speech) => return Ok(speech),
677
1
                Err(e) => {
678
1
                    if e.to_string() != crate::speech::NAV_NODE_SPEECH_NOT_FOUND {
679
0
                        return Err(e);
680
1
                    }
681
                    // else could be something like '3' in 'x^3' ("cubed")
682
                },
683
            }
684
178
        }
685
        // debug!("speak (literal): nav_node_id={}, mathml=\n{}", nav_node_id, mml_to_string(mathml));
686
179
        let speech = crate::speech::speak_mathml(mathml,
687
179
                &nav_position.current_node, nav_position.current_node_offset);
688
        // debug!("speech from speak: {:?}", speech);
689
179
        return speech;
690
    } else {
691
14
        return crate::speech::overview_mathml(mathml, &nav_position.current_node, nav_position.current_node_offset);
692
    }
693
519
}
694
695
696
// MathPlayer's interface mentions these, so we keep them.
697
// These (KeyboardEvent.keyCode) are consistent across platforms (mostly?) but are deprecated.
698
//   KeyboardEvent.code is recommended instead (a string)
699
const VK_LEFT: usize = 0x25;
700
const VK_RIGHT: usize = 0x27;
701
const VK_UP: usize = 0x26;
702
const VK_DOWN: usize = 0x28;
703
const VK_RETURN: usize = 0x0D;
704
const VK_SPACE: usize = 0x20;
705
const VK_HOME: usize = 0x24;
706
const VK_END: usize = 0x23;
707
const VK_BACK: usize = 0x08;
708
const VK_ESCAPE: usize = 0x1B;
709
710
// Utilities that returns one of four commands/params based on shift/control key combinations
711
712
enum NavigationCommand {
713
    Move,
714
    Zoom,
715
    MoveLastLocation,
716
    Read,
717
    Describe,
718
    ReadTo,
719
    Locate,
720
    ChangeNavMode,
721
    ToggleSpeakMode,
722
    SetPlacemarker,
723
    Exit,
724
    Last,
725
}
726
727
#[derive(PartialEq, PartialOrd, Clone, Copy)]
728
enum NavigationParam {
729
    Placemarker0,
730
    Placemarker1,
731
    Placemarker2,
732
    Placemarker3,
733
    Placemarker4,
734
    Placemarker5,
735
    Placemarker6,
736
    Placemarker7,
737
    Placemarker8,
738
    Placemarker9,
739
    Previous,
740
    Current,
741
    Next,
742
    Start,
743
    End,
744
    LineStart,
745
    LineEnd,
746
    CellPrevious,
747
    CellCurrent,
748
    CellNext,
749
    ColStart,
750
    ColEnd,
751
    CellUp,
752
    CellDown,
753
    Last 
754
}
755
756
757
0
fn choose_command(
758
0
  shift_key: bool,
759
0
  control_key: bool,
760
0
  none: NavigationCommand,
761
0
  shift: NavigationCommand,
762
0
  control: NavigationCommand,
763
0
  shift_control: NavigationCommand
764
0
) -> NavigationCommand {
765
0
     if shift_key && control_key {
766
0
    return shift_control;
767
0
    } else if control_key {
768
0
        return control;
769
0
    } else if shift_key {
770
0
    return shift;
771
  } else {
772
0
    return none;
773
    }
774
0
}
775
776
0
fn choose_param(
777
0
  shift_key: bool,
778
0
  control_key: bool,
779
0
  none: NavigationParam,
780
0
  shift: NavigationParam,
781
0
  control: NavigationParam,
782
0
  shift_control: NavigationParam
783
0
) -> NavigationParam {
784
0
    if shift_key && control_key {
785
0
    return shift_control;
786
0
    } else if control_key {
787
0
        return control;
788
0
    } else if shift_key {
789
0
    return shift;
790
  } else {
791
0
    return none;
792
    }
793
0
}
794
795
0
fn key_press_to_command_and_param(
796
0
    key: usize,
797
0
  shift_key: bool,
798
0
  control_key: bool,
799
0
  alt_key: bool,
800
0
  meta_key: bool,
801
0
) -> Result<(NavigationCommand, NavigationParam)> {
802
  // key press mapping should probably be stored externally (registry) with an app that allows changes
803
  // for now, we build in the defaults
804
805
    // this is a hack to map alt+ctl+arrow to ctl+arrow to change table mappings (github.com/NSoiffer/MathCAT/issues/105)
806
    // if this change sticks, choose_command() needs to be changed and this hack should go away
807
0
    let mut alt_key = alt_key;
808
0
    if alt_key && control_key && [VK_LEFT, VK_RIGHT, VK_UP, VK_DOWN].contains(&key) {
809
0
        alt_key = false;
810
0
    }
811
0
  if alt_key || meta_key {
812
0
        bail!("Invalid argument to key_press_to_command_and_param");
813
0
    }
814
815
    let command;
816
    let param;
817
0
  match key {
818
0
        VK_LEFT => {
819
0
            command = choose_command(shift_key, control_key, NavigationCommand::Move,   NavigationCommand::Read, NavigationCommand::Move,     NavigationCommand::Describe);
820
0
            param =   choose_param(  shift_key, control_key, NavigationParam::Previous, NavigationParam::Previous, NavigationParam::CellPrevious, NavigationParam::Previous);
821
0
            },
822
0
        VK_RIGHT => {
823
0
            command = choose_command(shift_key, control_key, NavigationCommand::Move, NavigationCommand::Read, NavigationCommand::Move,    NavigationCommand::Describe);
824
0
            param =   choose_param(  shift_key, control_key, NavigationParam::Next, NavigationParam::Next, NavigationParam::CellNext, NavigationParam::Next);
825
0
            },
826
0
        VK_UP => {
827
0
            command = choose_command(shift_key, control_key, NavigationCommand::Zoom,      NavigationCommand::ChangeNavMode, NavigationCommand::Move,   NavigationCommand::Zoom);
828
0
            param =   choose_param(  shift_key, control_key, NavigationParam::Previous,  NavigationParam::Previous,      NavigationParam::CellUp, NavigationParam::Start);
829
0
            },
830
0
        VK_DOWN => {
831
0
            command = choose_command(shift_key, control_key, NavigationCommand::Zoom, NavigationCommand::ChangeNavMode, NavigationCommand::Move,     NavigationCommand::Zoom);
832
0
            param =   choose_param(  shift_key, control_key, NavigationParam::Next, NavigationParam::Next,          NavigationParam::CellDown, NavigationParam::End);
833
0
            },
834
0
        VK_RETURN => {
835
0
            command = choose_command(shift_key, control_key, NavigationCommand::Locate,  NavigationCommand::Last, NavigationCommand::Locate, NavigationCommand::Last);
836
0
            param =   choose_param(  shift_key, control_key, NavigationParam::Previous,NavigationParam::Last, NavigationParam::Last,    NavigationParam::Last);
837
0
            },
838
0
        VK_SPACE => {
839
0
            command = choose_command(shift_key, control_key, NavigationCommand::Read,   NavigationCommand::ToggleSpeakMode,    NavigationCommand::Read,        NavigationCommand::Describe);
840
0
            param =   choose_param(  shift_key, control_key, NavigationParam::Current, NavigationParam::Last,                NavigationParam::CellCurrent, NavigationParam::Current);
841
0
            },
842
    
843
0
        VK_HOME => {
844
0
            command = choose_command(shift_key, control_key, NavigationCommand::Move, NavigationCommand::Move,    NavigationCommand::Move,      NavigationCommand::ReadTo);
845
0
            param =   choose_param(  shift_key, control_key, NavigationParam::Start,NavigationParam::ColStart, NavigationParam::LineStart, NavigationParam::Start);
846
0
            },
847
0
        VK_END => {
848
0
            command = choose_command(shift_key, control_key, NavigationCommand::Move, NavigationCommand::Move,   NavigationCommand::Move,    NavigationCommand::ReadTo);
849
0
            param =   choose_param(  shift_key, control_key, NavigationParam::End,  NavigationParam::ColEnd, NavigationParam::LineEnd, NavigationParam::End);
850
0
            },
851
0
        VK_BACK => {
852
0
            command = NavigationCommand::MoveLastLocation;
853
0
            param = NavigationParam::Last;
854
0
            },
855
0
        VK_ESCAPE => {
856
0
            command = NavigationCommand::Exit;
857
0
            param = NavigationParam::Last;
858
0
            },
859
0
        0x30..=0x39 => {  // '0' ... '9'
860
0
            command = choose_command(shift_key, control_key, NavigationCommand::Move, NavigationCommand::Read, NavigationCommand::SetPlacemarker, NavigationCommand::Describe);
861
            static PLACE_MARKER: &[NavigationParam] = &[
862
                NavigationParam::Placemarker0,
863
                NavigationParam::Placemarker1,
864
                NavigationParam::Placemarker2,
865
                NavigationParam::Placemarker3,
866
                NavigationParam::Placemarker4,
867
                NavigationParam::Placemarker5,
868
                NavigationParam::Placemarker6,
869
                NavigationParam::Placemarker7,
870
                NavigationParam::Placemarker8,
871
                NavigationParam::Placemarker9,
872
            ];
873
0
            param = PLACE_MARKER[key-0x30];
874
        },
875
0
        _ => bail!("Unknown key press/command"),
876
    };
877
    
878
0
  return Ok( (command, param) );
879
0
}
880
881
// translate the key presses into commands
882
883
884
2
fn navigation_command_string(command: NavigationCommand, param: NavigationParam) -> &'static str {
885
2
  match command {
886
      NavigationCommand::Move => {
887
1
            return match param {
888
0
                NavigationParam::Previous => "MovePrevious",
889
0
                NavigationParam::Next => "MoveNext",
890
1
                NavigationParam::Start => "MoveStart",
891
0
                NavigationParam::End => "MoveEnd",
892
0
                NavigationParam::LineStart => "MoveLineStart",
893
0
                NavigationParam::LineEnd => "MoveLineEnd",
894
0
                NavigationParam::CellPrevious => "MoveCellPrevious",
895
0
                NavigationParam::CellNext => "MoveCellNext",
896
0
                NavigationParam::CellUp => "MoveCellUp",
897
0
                NavigationParam::CellDown => "MoveCellDown",
898
0
                NavigationParam::ColStart => "MoveColumnStart",
899
0
                NavigationParam::ColEnd => "MoveColumnEnd",
900
                _ => {
901
0
                    if param < NavigationParam::Placemarker0 || param > NavigationParam::Placemarker9 {
902
0
                        panic!("Internal Error: Found illegal value for param of NavigationCommand::Move");
903
0
                    }
904
                    static MOVE_TO: &[&str] = &["MoveTo0","MoveTo1","MoveTo2","MoveTo3","MoveTo4","MoveTo5","MoveTo6","MoveTo7","MoveTo8","MoveTo9"];
905
0
                    return MOVE_TO[(param as usize) - (NavigationParam::Placemarker0 as usize)];
906
                }
907
            }
908
        },
909
        NavigationCommand::Zoom => {
910
1
            return match param {
911
0
                NavigationParam::Next => "ZoomIn",
912
1
                NavigationParam::Previous => "ZoomOut",
913
0
                NavigationParam::Start => "ZoomOutAll",
914
0
                NavigationParam::End => "ZoomInAll",
915
0
                _  => panic!("Illegal param for NavigationCommand::Zoom"),
916
            }
917
        },
918
        NavigationCommand::MoveLastLocation => {
919
0
            return "MoveLastLocation";
920
        },
921
        NavigationCommand::Read => {
922
0
            return match param {
923
0
                NavigationParam::Previous => "ReadPrevious",
924
0
                NavigationParam::Next => "ReadNext",
925
0
                NavigationParam::Current => "ReadCurrent",
926
0
                NavigationParam::CellCurrent => "ReadCellCurrent",
927
0
                NavigationParam::Start => "ReadStart",
928
0
                NavigationParam::End => "ReadEnd",
929
0
                NavigationParam::LineStart => "ReadLineStart",
930
0
                NavigationParam::LineEnd => "ReadLineEnd",
931
                _ => {
932
0
                    if param < NavigationParam::Placemarker0 || param > NavigationParam::Placemarker9 {
933
0
                        panic!("Internal Error: Found illegal value for param of NavigationCommand::Move");
934
0
                    }
935
                    static READ_PLACE_MARKERS: &[&str] = &["Read0","Read1","Read2","Read3","Read4","Read5","Read6","Read7","Read8","Read9"];
936
0
                    return READ_PLACE_MARKERS[(param as usize) - (NavigationParam::Placemarker0 as usize)];
937
                },
938
            }
939
        },
940
        NavigationCommand::Describe => {
941
0
            return match param {
942
0
                NavigationParam::Previous => "DescribePrevious",
943
0
                NavigationParam::Next => "DescribeNext",
944
0
                NavigationParam::Current => "DescribeCurrent",
945
                _ => {
946
0
                    if param < NavigationParam::Placemarker0 || param > NavigationParam::Placemarker9 {
947
0
                        panic!("Internal Error: Found illegal value for param of NavigationCommand::Describe");
948
0
                    }
949
                    static DESCRIBE_PLACE_MARKERS: &[&str] = &["Describe0","Describe1","Describe2","Describe3","Describe4","Describe5","Describe6","Describe7","Describe8","Describe9"];
950
0
                    return DESCRIBE_PLACE_MARKERS[(param as usize) - (NavigationParam::Placemarker0 as usize)];
951
                }
952
            }
953
        },
954
        NavigationCommand::ReadTo => {
955
0
            todo!("ReadTo navigation command")
956
        },
957
        NavigationCommand::Locate => {
958
0
            if param ==NavigationParam::Previous {
959
0
                return "WhereAmI";
960
0
            } else if param ==NavigationParam::Last {
961
0
                return "WhereAmIAll";
962
0
            }
963
        },
964
        NavigationCommand::ChangeNavMode => {
965
0
            if param ==NavigationParam::Previous {
966
0
                return "ToggleZoomLockUp";
967
0
            } else if param ==NavigationParam::Next {
968
0
                return "ToggleZoomLockDown";
969
0
            }
970
        },
971
        NavigationCommand::ToggleSpeakMode => {
972
0
            return "ToggleSpeakMode";
973
        },
974
        NavigationCommand::SetPlacemarker => {
975
0
            if param < NavigationParam::Placemarker0 || param > NavigationParam::Placemarker9 {
976
0
                panic!("Internal Error: Found illegal value for param of NavigationCommand::SetPlacemarker");
977
0
            }
978
            static SET_PLACE_MARKER: &[&str] = &["SetPlacemarker0","SetPlacemarker1","SetPlacemarker2","SetPlacemarker3","SetPlacemarker4","SetPlacemarker5","SetPlacemarker6","SetPlacemarker7","SetPlacemarker8","SetPlacemarker9"];
979
0
            return SET_PLACE_MARKER[(param as usize) - (NavigationParam::Placemarker0 as usize)];
980
        },
981
        NavigationCommand::Exit => {
982
0
            return "Exit";
983
        },
984
        NavigationCommand::Last => {
985
0
            return "Error";
986
        }
987
    };
988
0
    return "Error";
989
2
}
990
991
#[cfg(test)]
992
mod tests {
993
    use super::*;
994
    #[allow(unused_imports)]
995
    use crate::init_logger;
996
    use crate::interface::*;
997
998
    #[cfg(test)]
999
    /// Assert if result_id != '' and it doesn't match the id of the result of the move
1000
    /// Returns the speech from the command
1001
547
    fn test_command(command: &'static str, mathml: Element, result_id: &str) -> String {
1002
        // debug!("\nCommand: {}", command);
1003
547
        NAVIGATION_STATE.with(|nav_stack| {
1004
547
            let (start_id, _) = nav_stack.borrow().get_navigation_mathml_id(mathml);
1005
547
            match do_navigate_command_string(mathml, command) {
1006
0
                Err(e) => {
1007
0
                    panic!("\nStarting at '{}', '{} failed.\n{}",
1008
0
                                        start_id, command, &crate::interface::errors_to_string(&e))
1009
                },
1010
547
                Ok(nav_speech) => {
1011
547
                    let nav_speech = nav_speech.trim_end_matches(&[' ', ',', ';']);
1012
                    // debug!("Full speech: {}", nav_speech);
1013
547
                    if !result_id.is_empty() {
1014
547
                        let (id, _) = nav_stack.borrow().get_navigation_mathml_id(mathml);
1015
547
                        assert_eq!(result_id, id, "\nStarting at '{}', '{} failed.", start_id, command);
1016
0
                    }
1017
547
                    return nav_speech.to_string();
1018
                }
1019
            };
1020
547
        })
1021
547
    }
1022
1023
56
    fn init_default_prefs(mathml: &str, nav_mode_default: &str) {
1024
56
        set_rules_dir(super::super::abs_rules_dir_path()).unwrap();
1025
56
        set_preference("NavMode", nav_mode_default).unwrap();
1026
56
        set_preference("NavVerbosity", "Verbose").unwrap();
1027
56
        set_preference("AutoZoomOut", "True").unwrap();
1028
56
        set_preference("Language", "en").unwrap();
1029
56
        set_preference("SpeechStyle", "SimpleSpeak").unwrap();
1030
56
        set_preference("Verbosity", "Medium").unwrap();
1031
56
        set_preference("Overview", "False").unwrap();
1032
56
        set_mathml(mathml).unwrap();
1033
56
    }
1034
1035
    #[test]
1036
1
    fn zoom_in() -> Result<()> {
1037
1
        let mathml_str = "<math id='math'><mfrac id='mfrac'>
1038
1
                <msup id='msup'><mi id='base'>b</mi><mn id='exp'>2</mn></msup>
1039
1
                <mi id='denom'>d</mi>
1040
1
            </mfrac></math>";
1041
1
        init_default_prefs(mathml_str, "Enhanced");
1042
1
        return MATHML_INSTANCE.with(|package_instance| {
1043
1
            let package_instance = package_instance.borrow();
1044
1
            let mathml = get_element(&package_instance);
1045
1
            test_command("ZoomIn", mathml, "msup");
1046
1
            test_command("ZoomIn", mathml, "base");
1047
1
            test_command("ZoomIn", mathml, "base");
1048
1
            return Ok( () );
1049
1
        });
1050
1
    }
1051
1052
    #[test]
1053
1
    fn test_init_navigate_move_right() -> Result<()> {
1054
        // this is how navigation typically starts up
1055
1
        let mathml_str = " <math display='block' id='id-0'>
1056
1
            <mrow id='id-1'>
1057
1
                <msup id='msup'><mi id='base'>b</mi><mn id='exp'>2</mn></msup>
1058
1
                <mo id='id-3'>=</mo>
1059
1
                <mrow id='id-4'>
1060
1
                    <mi id='id-5'>a</mi>
1061
1
                    <mo id='id-6'>-</mo>
1062
1
                    <mn id='id-7'>2</mn>
1063
1
                </mrow>
1064
1
            </mrow>
1065
1
        </math>";
1066
1
        init_default_prefs(mathml_str, "Enhanced");
1067
1
        debug!("--- Enhanced ---");
1068
1
        MATHML_INSTANCE.with(|package_instance| {
1069
1
            let package_instance = package_instance.borrow();
1070
1
            let mathml = get_element(&package_instance);
1071
1
            test_command("ZoomIn", mathml, "msup");
1072
1
            test_command("MoveNext", mathml, "id-3");
1073
1
        });
1074
1075
1
        init_default_prefs(mathml_str, "Simple");
1076
1
        debug!("--- Simple ---");
1077
1
        MATHML_INSTANCE.with(|package_instance: &RefCell<Package>| {
1078
1
            let package_instance = package_instance.borrow();
1079
1
            let mathml = get_element(&package_instance);
1080
1
            test_command("ZoomIn", mathml, "msup");
1081
1
            test_command("MoveNext", mathml, "id-3");
1082
1
        });
1083
        
1084
1
        init_default_prefs(mathml_str, "Character");
1085
1
        debug!("--- Character ---");
1086
1
        MATHML_INSTANCE.with(|package_instance| {
1087
1
            let package_instance = package_instance.borrow();
1088
1
            let mathml = get_element(&package_instance);
1089
1
            test_command("ZoomIn", mathml, "base");
1090
1
            test_command("MoveNext", mathml, "exp");
1091
1
        });
1092
1
        return Ok( () );
1093
1
    }
1094
    
1095
    #[test]
1096
1
    fn zoom_in_parens() -> Result<()> {
1097
        // (a+b)(c+d) + 1
1098
1
        let mathml_str = " <math display='block' id='id-0'>
1099
1
            <mrow id='id-1'>
1100
1
                <mrow id='id-2'>
1101
1
                    <mrow id='id-3'>
1102
1
                    <mo stretchy='false' id='id-4'>(</mo>
1103
1
                    <mrow id='id-5'>
1104
1
                        <mi id='id-6'>a</mi>
1105
1
                        <mo id='id-7'>+</mo>
1106
1
                        <mi id='id-8'>b</mi>
1107
1
                    </mrow>
1108
1
                    <mo stretchy='false' id='id-9'>)</mo>
1109
1
                    </mrow>
1110
1
                    <mo id='id-10'>&#x2062;</mo>
1111
1
                    <mrow id='id-11'>
1112
1
                    <mo stretchy='false' id='id-12'>(</mo>
1113
1
                    <mrow id='id-13'>
1114
1
                        <mi id='id-14'>c</mi>
1115
1
                        <mo id='id-15'>+</mo>
1116
1
                        <mi id='id-16'>d</mi>
1117
1
                    </mrow>
1118
1
                    <mo stretchy='false' id='id-17'>)</mo>
1119
1
                    </mrow>
1120
1
                </mrow>
1121
1
                <mo id='id-18'>+</mo>
1122
1
                <mn id='id-19'>1</mn>
1123
1
            </mrow>
1124
1
        </math>";
1125
1
        init_default_prefs(mathml_str, "Enhanced");
1126
1
        return MATHML_INSTANCE.with(|package_instance| {
1127
1
            let package_instance = package_instance.borrow();
1128
1
            let mathml = get_element(&package_instance);
1129
1
            set_preference("NavMode", "Enhanced")
?0
;
1130
1
            debug!("\n------EnhancedMode----------");
1131
1
            test_command("ZoomIn", mathml, "id-2");
1132
1
            test_command("ZoomIn", mathml, "id-5");
1133
1
            test_command("ZoomIn", mathml, "id-6");
1134
            
1135
            // repeat, but this time with "Simple
1136
1
            set_preference("NavMode", "Simple")
?0
;
1137
1
            debug!("\n------SimpleMode----------");
1138
1
            test_command("ZoomOutAll", mathml, "id-1");
1139
1
            test_command("ZoomIn", mathml, "id-4");
1140
1
            test_command("ZoomIn", mathml, "id-4");
1141
1
            return Ok( () );
1142
1
        });
1143
1
    }
1144
    
1145
    #[test]
1146
1
    fn zoom_in_all() -> Result<()> {
1147
1
        let mathml_str = "<math id='math'><mfrac id='mfrac'>
1148
1
                <msup id='msup'><mi id='base'>b</mi><mn id='exp'>2</mn></msup>
1149
1
                <mi id='denom'>d</mi>
1150
1
            </mfrac></math>";
1151
1
        init_default_prefs(mathml_str, "Enhanced");
1152
1
        return MATHML_INSTANCE.with(|package_instance| {
1153
1
            let package_instance = package_instance.borrow();
1154
1
            let mathml = get_element(&package_instance);
1155
1
            test_command("ZoomInAll", mathml, "base");
1156
1
            return Ok( () );
1157
1
        });
1158
1
    }
1159
1160
    
1161
    #[test]
1162
1
    fn zoom_out() -> Result<()> {
1163
1
        let mathml_str = "<math id='math'><mfrac id='mfrac'>
1164
1
                <msup id='msup'><mi id='base'>b</mi><mn id='exp'>2</mn></msup>
1165
1
                <mi id='denom'>d</mi>
1166
1
            </mfrac></math>";
1167
1
            init_default_prefs(mathml_str, "Enhanced");
1168
1
            return MATHML_INSTANCE.with(|package_instance| {
1169
1
            let package_instance = package_instance.borrow();
1170
1
            let mathml = get_element(&package_instance);
1171
1
            NAVIGATION_STATE.with(|nav_stack| {
1172
1
                nav_stack.borrow_mut().push(NavigationPosition{
1173
1
                    current_node: "base".to_string(),
1174
1
                    current_node_offset: 0
1175
1
                }, "None")
1176
1
            });
1177
1
            test_command("ZoomOut", mathml, "msup");
1178
1179
1
            let _nav_speech = do_navigate_command_and_param(mathml, NavigationCommand::Zoom, NavigationParam::Previous)
?0
;
1180
1
            NAVIGATION_STATE.with(|nav_stack| {
1181
1
                let (id, _) = nav_stack.borrow().get_navigation_mathml_id(mathml);
1182
1
                assert_eq!(id, "mfrac");
1183
1
            });
1184
1
            return Ok( () );
1185
1
        });
1186
1
    }
1187
    
1188
    #[test]
1189
1
    fn zoom_out_all() -> Result<()> {
1190
1
        let mathml_str = "<math id='math'><mfrac id='mfrac'>
1191
1
                <msup id='msup'><mi id='base'>b</mi><mn id='exp'>2</mn></msup>
1192
1
                <mi id='denom'>d</mi>
1193
1
            </mfrac></math>";
1194
1
            init_default_prefs(mathml_str, "Enhanced");
1195
1
            return MATHML_INSTANCE.with(|package_instance| {
1196
1
            let package_instance = package_instance.borrow();
1197
1
            let mathml = get_element(&package_instance);
1198
1
            NAVIGATION_STATE.with(|nav_stack| {
1199
1
                nav_stack.borrow_mut().push(NavigationPosition{
1200
1
                    current_node: "base".to_string(),
1201
1
                    current_node_offset: 0
1202
1
                }, "None")
1203
1
            });
1204
1205
1
            test_command("ZoomOutAll", mathml, "mfrac");
1206
1
            return Ok( () );
1207
1
        });
1208
1
    }
1209
    
1210
    #[test]
1211
1
    fn move_start_end() -> Result<()> {
1212
1
        let mathml_str = " <math display='block' id='id-0'>
1213
1
        <mrow id='id-1'>
1214
1
          <mi id='id-2'>x</mi>
1215
1
          <mo id='id-3'>=</mo>
1216
1
          <mrow id='id-4'>
1217
1
            <mi id='id-5'>a</mi>
1218
1
            <mo id='id-6'>-</mo>
1219
1
            <mn id='id-7'>2</mn>
1220
1
          </mrow>
1221
1
        </mrow>
1222
1
       </math>";
1223
1
       init_default_prefs(mathml_str, "Enhanced");
1224
1
       return MATHML_INSTANCE.with(|package_instance| {
1225
1
            let package_instance = package_instance.borrow();
1226
1
            let mathml = get_element(&package_instance);
1227
1
            NAVIGATION_STATE.with(|nav_stack| {
1228
1
                nav_stack.borrow_mut().push(NavigationPosition{
1229
1
                    current_node: "id-4".to_string(),
1230
1
                    current_node_offset: 0
1231
1
                }, "None")
1232
1
            });
1233
1234
1
           set_preference("NavMode", "Character")
?0
;
1235
1
            test_command("MoveStart", mathml, "id-2");
1236
1
            test_command("MoveEnd", mathml, "id-7");
1237
1
           set_preference("NavMode", "Simple")
?0
;
1238
1
            test_command("MoveStart", mathml, "id-2");
1239
1
            test_command("MoveEnd", mathml, "id-7");
1240
1
           set_preference("NavMode", "Enhanced")
?0
;
1241
1
            test_command("MoveStart", mathml, "id-2");
1242
1
            test_command("MovePrevious", mathml, "id-2");
1243
1
            test_command("MoveEnd", mathml, "id-4");
1244
1
            test_command("MoveNext", mathml, "id-4");
1245
1
            return Ok( () );
1246
1
        });
1247
1
    }
1248
    
1249
    #[test]
1250
1
    fn move_line_start_end() -> Result<()> {
1251
1
        let mathml_str = " <math display='block' id='id-0'>
1252
1
        <mfrac displaystyle='true' id='id-1'>
1253
1
          <mi id='id-2'>x</mi>
1254
1
          <mrow id='id-3'>
1255
1
            <msup id='id-4'>
1256
1
              <mi id='id-5'>y</mi>
1257
1
              <mn id='id-6'>2</mn>
1258
1
            </msup>
1259
1
            <mo id='id-7'>+</mo>
1260
1
            <mn id='id-8'>1</mn>
1261
1
          </mrow>
1262
1
        </mfrac>
1263
1
       </math>";
1264
1
       init_default_prefs(mathml_str, "Enhanced");
1265
1
       return MATHML_INSTANCE.with(|package_instance| {
1266
1
            let package_instance = package_instance.borrow();
1267
1
            let mathml = get_element(&package_instance);
1268
1
            NAVIGATION_STATE.with(|nav_stack| {
1269
1
                nav_stack.borrow_mut().push(NavigationPosition{
1270
1
                    current_node: "id-7".to_string(),
1271
1
                    current_node_offset: 0
1272
1
                }, "None")
1273
1
            });
1274
1275
1
           set_preference("NavMode", "Character")
?0
;
1276
1
            test_command("MoveLineStart", mathml, "id-5");
1277
1
            test_command("MoveLineEnd", mathml, "id-8");
1278
1
           set_preference("NavMode", "Simple")
?0
;
1279
1
            test_command("MoveLineStart", mathml, "id-4");
1280
1
            test_command("MoveLineEnd", mathml, "id-8");
1281
1
           set_preference("NavMode", "Enhanced")
?0
;
1282
1
            test_command("MoveLineStart", mathml, "id-4");
1283
1
            test_command("MoveLineEnd", mathml, "id-8");
1284
1
            test_command("MoveEnd", mathml, "id-3");
1285
1
            return Ok( () );
1286
1
        });
1287
1
    }
1288
    
1289
    #[test]
1290
1
    fn text_extremes_and_move_last_location() -> Result<()> {
1291
1
        let mathml_str = "<math id='math'><mfrac id='mfrac'>
1292
1
                <msup id='msup'><mi id='base'>b</mi><mn id='exp'>2</mn></msup>
1293
1
                <mi id='denom'>d</mi>
1294
1
            </mfrac></math>";
1295
1
            init_default_prefs(mathml_str, "Enhanced");
1296
1
            return MATHML_INSTANCE.with(|package_instance| {
1297
1
            let package_instance = package_instance.borrow();
1298
1
            let mathml = get_element(&package_instance);
1299
1
            NAVIGATION_STATE.with(|nav_stack| {
1300
1
                nav_stack.borrow_mut().push(NavigationPosition{
1301
1
                    current_node: "base".to_string(),
1302
1
                    current_node_offset: 0
1303
1
                }, "None")
1304
1
            });
1305
1306
1
            test_command("ZoomOutAll", mathml, "mfrac");
1307
1
            test_command("ZoomOut", mathml, "mfrac");
1308
1
            test_command("MoveLastLocation", mathml, "base");       // second zoom out should do nothing
1309
1310
1
            test_command("ZoomOut", mathml, "msup");
1311
1
            test_command("ZoomInAll", mathml, "base");
1312
1
            test_command("ZoomIn", mathml, "base");
1313
1
            test_command("MoveLastLocation", mathml, "msup");       // second zoom in should do nothing
1314
1315
1
            return Ok( () );
1316
1
        });
1317
1
    }
1318
    
1319
    #[test]
1320
1
    fn move_to_start() -> Result<()> {
1321
1
        let mathml_str = "<math id='math'><mfrac id='mfrac'>
1322
1
                <mrow id='num'><msup id='msup'><mi id='base'>b</mi><mn id='exp'>2</mn></msup><mo id='factorial'>!</mo></mrow>
1323
1
                <mi id='denom'>d</mi>
1324
1
            </mfrac></math>";
1325
1
            init_default_prefs(mathml_str, "Enhanced");
1326
1
            return MATHML_INSTANCE.with(|package_instance| {
1327
1
            let package_instance = package_instance.borrow();
1328
1
            let mathml = get_element(&package_instance);
1329
1
            NAVIGATION_STATE.with(|nav_stack| {
1330
1
                nav_stack.borrow_mut().push(NavigationPosition{
1331
1
                    current_node: "denom".to_string(),
1332
1
                    current_node_offset: 0
1333
1
                }, "None")
1334
1
            });
1335
1
            test_command("MoveLineStart", mathml, "denom");
1336
1337
1
            NAVIGATION_STATE.with(|nav_stack| {
1338
1
                nav_stack.borrow_mut().push(NavigationPosition{
1339
1
                    current_node: "factorial".to_string(),
1340
1
                    current_node_offset: 0
1341
1
                }, "None")
1342
1
            });
1343
1
            test_command("MoveLineStart", mathml, "msup");
1344
1345
1
            let _nav_speech = do_navigate_command_and_param(mathml, NavigationCommand::Move, NavigationParam::Start)
?0
;
1346
1
            NAVIGATION_STATE.with(|nav_stack| {
1347
1
                let (id, _) = nav_stack.borrow().get_navigation_mathml_id(mathml);
1348
1
                assert_eq!(id, "num");
1349
1
            });
1350
1
            return Ok( () );
1351
1
        });
1352
1
    }
1353
    
1354
    #[test]
1355
1
    fn move_right_sup() -> Result<()> {
1356
1
        let mathml_str = "<math display='block' id='id-0'>
1357
1
        <mrow id='id-1'>
1358
1
          <msup id='id-2'>
1359
1
            <mn id='id-3'>2</mn>
1360
1
            <mi id='id-4'>q</mi>
1361
1
          </msup>
1362
1
          <mo id='id-5'>-</mo>
1363
1
          <mi id='id-6'>x</mi>
1364
1
        </mrow>
1365
1
        </math>";
1366
1
        init_default_prefs(mathml_str, "Enhanced");
1367
1
        return MATHML_INSTANCE.with(|package_instance| {
1368
1
            let package_instance = package_instance.borrow();
1369
1
            let mathml = get_element(&package_instance);
1370
1
            NAVIGATION_STATE.with(|nav_stack| {
1371
1
                nav_stack.borrow_mut().push(NavigationPosition{
1372
1
                    current_node: "id-2".to_string(),
1373
1
                    current_node_offset: 0
1374
1
                }, "None")
1375
1
            });
1376
1
            set_preference("NavMode", "Enhanced")
?0
;
1377
1
            test_command("MoveNext", mathml, "id-5");
1378
1379
            // reset start and test Simple
1380
1
            NAVIGATION_STATE.with(|nav_stack| {
1381
1
                nav_stack.borrow_mut().push(NavigationPosition{
1382
1
                    current_node: "id-2".to_string(),
1383
1
                    current_node_offset: 0
1384
1
                }, "None")
1385
1
            });
1386
1
            set_preference("NavMode", "Simple")
?0
;
1387
1
            test_command("MoveNext", mathml, "id-5");
1388
1389
            // reset start and test Character
1390
1
            NAVIGATION_STATE.with(|nav_stack| {
1391
1
                nav_stack.borrow_mut().push(NavigationPosition{
1392
1
                    current_node: "id-3".to_string(),
1393
1
                    current_node_offset: 0
1394
1
                }, "None")
1395
1
            });
1396
1
            set_preference("NavMode", "Character")
?0
;
1397
1
            test_command("MoveNext", mathml, "id-4");
1398
1
            test_command("MoveNext", mathml, "id-5");
1399
1
            return Ok( () );
1400
1
        });
1401
1
    }
1402
1403
        
1404
    #[test]
1405
1
    fn move_msubsup_char() -> Result<()> {
1406
1
        let mathml_str = "<math display='block' id='id-0'>
1407
1
        <mrow id='id-1'>
1408
1
          <mn id='id-2'>1</mn>
1409
1
          <mo id='id-3'>+</mo>
1410
1
          <msubsup id='id-4'>
1411
1
            <mi id='id-5'>x</mi>
1412
1
            <mn id='id-6'>2</mn>
1413
1
            <mn id='id-7'>3</mn>
1414
1
          </msubsup>
1415
1
          <mo id='id-8'>+</mo>
1416
1
          <mn id='id-9'>4</mn>
1417
1
        </mrow>
1418
1
       </math>";
1419
1
        init_default_prefs(mathml_str, "Character");
1420
1
        return MATHML_INSTANCE.with(|package_instance| {
1421
1
            let package_instance = package_instance.borrow();
1422
1
            let mathml = get_element(&package_instance);
1423
1
            assert_eq!("zoomed in all of the way; 1", test_command("ZoomInAll", mathml, "id-2"));
1424
1
            assert_eq!("move right; plus", test_command("MoveNext", mathml, "id-3"));
1425
1
            assert_eq!("move right; in base; x", test_command("MoveNext", mathml, "id-5"));
1426
1
            assert_eq!("move right; in subscript; 2", test_command("MoveNext", mathml, "id-6"));
1427
1
            assert_eq!("move right; in superscript; 3", test_command("MoveNext", mathml, "id-7"));
1428
1
            assert_eq!("move right; out of superscript; plus", test_command("MoveNext", mathml, "id-8"));
1429
1
            assert_eq!("move left; in superscript; 3", test_command("MovePrevious", mathml, "id-7"));
1430
1
            assert_eq!("move left; in subscript; 2", test_command("MovePrevious", mathml, "id-6"));
1431
1
            assert_eq!("move left; in base; x", test_command("MovePrevious", mathml, "id-5"));
1432
1
            assert_eq!("move left; out of base; plus", test_command("MovePrevious", mathml, "id-3"));
1433
1434
1
            return Ok( () );
1435
1
        });
1436
1
    }
1437
        
1438
    #[test]
1439
1
    fn zoom_logbase() -> Result<()> {
1440
1
        let mathml_str = "<math display='block' id='id-0'>
1441
1
            <mrow displaystyle='true' id='id-1'>
1442
1
                <msub id='id-2'>
1443
1
                    <mi id='id-3'>log</mi>
1444
1
                    <mn id='id-4'>2</mn>
1445
1
                </msub>
1446
1
                <mo data-changed='added' id='id-5'>&#x2061;</mo>
1447
1
                <mi id='id-6'>x</mi>a
1448
1
            </mrow>
1449
1
            </math>";
1450
1
        init_default_prefs(mathml_str, "Enhanced");
1451
1
        return MATHML_INSTANCE.with(|package_instance| {
1452
1
            let package_instance = package_instance.borrow();
1453
1
            let mathml = get_element(&package_instance);
1454
1
            assert_eq!("zoom in; the log base 2", test_command("ZoomIn", mathml, "id-2"));
1455
1
            assert_eq!("zoom in; in base; 2", test_command("ZoomIn", mathml, "id-4"));
1456
1
            assert_eq!("zoomed in all of the way; 2", test_command("ZoomIn", mathml, "id-4"));
1457
1
            debug!("Now zooming out");
1458
1
            assert_eq!("zoom out; out of base; the log base 2", test_command("ZoomOut", mathml, "id-2"));
1459
1
            assert_eq!("zoom out; the log base 2, of x", test_command("ZoomOut", mathml, "id-1"));
1460
1
            assert_eq!("zoomed out all of the way; the log base 2, of x", test_command("ZoomOut", mathml, "id-1"));
1461
1
            return Ok( () );
1462
1
        });
1463
1
    }
1464
        
1465
    #[test]
1466
1
    fn zoom_logbase_power() -> Result<()> {
1467
1
        let mathml_str = "<math display='block' id='id-0'>
1468
1
            <mrow displaystyle='true' id='id-1'>
1469
1
                <msubsup id='id-2'>
1470
1
                    <mi id='id-3'>log</mi>
1471
1
                    <mn id='id-4'>2</mn>
1472
1
                    <mn id='id-5'>3</mn>
1473
1
                </msubsup>
1474
1
                <mo data-changed='added' id='id-6'>&#x2061;</mo>
1475
1
                <mi id='id-7'>x</mi>
1476
1
            </mrow>
1477
1
            </math>";
1478
1
        init_default_prefs(mathml_str, "Enhanced");
1479
1
        return MATHML_INSTANCE.with(|package_instance| {
1480
1
            let package_instance = package_instance.borrow();
1481
1
            let mathml = get_element(&package_instance);
1482
1
            assert_eq!("zoom in; the log base 2, cubed", test_command("ZoomIn", mathml, "id-2"));
1483
1
            assert_eq!("zoom in; in base; the log base 2", test_command("ZoomIn", mathml, "id-2-log-base"));
1484
1
            assert_eq!("zoom in; in base; 2", test_command("ZoomIn", mathml, "id-4"));
1485
1
            assert_eq!("zoomed in all of the way; 2", test_command("ZoomIn", mathml, "id-4"));
1486
1
            debug!("Now zooming out");
1487
1
            assert_eq!("zoom out; out of base; the log base 2", test_command("ZoomOut", mathml, "id-2-log-base"));
1488
1
            assert_eq!("zoom out; out of base; the log base 2, cubed", test_command("ZoomOut", mathml, "id-2"));
1489
1
            assert_eq!("zoom out; the log base 2, cubed of x", test_command("ZoomOut", mathml, "id-1"));
1490
1
            assert_eq!("zoomed out all of the way; the log base 2, cubed of x", test_command("ZoomOut", mathml, "id-1"));
1491
1
            return Ok( () );
1492
1
        });
1493
1
    }
1494
        
1495
    #[test]
1496
1
    fn zoom_msubsup() -> Result<()> {
1497
        // msubsup is trickier because it creates an intent within an intent, so offsets need to be handled properly
1498
1
        let mathml_str = "<math id='math'><msubsup id='msubsup'><mi id='base'>𝑥</mi><mn id='sub'>1</mn><mn id='sup'>2</mn></msubsup></math>";
1499
1
        init_default_prefs(mathml_str, "Enhanced");
1500
1
        return MATHML_INSTANCE.with(|package_instance| {
1501
1
            let package_instance = package_instance.borrow();
1502
1
            let mathml = get_element(&package_instance);
1503
1
            set_preference("NavMode", "Enhanced").unwrap();
1504
1
            debug!("Enhanced mode");
1505
1
            do_commands(mathml)
?0
;
1506
1
            set_preference("NavMode", "Simple").unwrap();
1507
1
            debug!("Simple mode");
1508
1
            do_commands(mathml)
?0
;
1509
1
            set_preference("NavMode", "Character").unwrap();
1510
1
            debug!("Character mode");
1511
1
            assert_eq!("zoom in; in base; x", test_command("ZoomIn", mathml, "base"));
1512
1
            assert_eq!("zoom out; out of base; x sub 1 super 2 end super", test_command("ZoomOut", mathml, "msubsup"));
1513
1
            return Ok( () );
1514
1515
        /// Enhanced and Simple mode should behave the same
1516
2
        fn do_commands(mathml: Element) -> Result<()> {
1517
2
            assert_eq!("zoom in; in base; x sub 1", test_command("ZoomIn", mathml, "msubsup-indexed-by"));
1518
2
            assert_eq!("zoom in; in base; x", test_command("ZoomIn", mathml, "base"));
1519
2
            assert_eq!("zoomed in all of the way; x", test_command("ZoomIn", mathml, "base"));
1520
2
            debug!("Now zooming out");
1521
2
            assert_eq!("zoom out; out of base; x sub 1", test_command("ZoomOut", mathml, "msubsup-indexed-by"));
1522
2
            assert_eq!("zoom out; out of base; x sub 1, squared", test_command("ZoomOut", mathml, "msubsup"));
1523
2
            assert_eq!("zoomed out all of the way; x sub 1, squared", test_command("ZoomOut", mathml, "msubsup"));
1524
2
            return Ok( () );
1525
2
        }
1526
1
        });
1527
1
    }
1528
        
1529
    #[test]
1530
1
    fn move_mmultiscripts_char() -> Result<()> {
1531
1
        let mathml_str = "<math display='block' id='id-0'>
1532
1
            <mmultiscripts data-mjx-texclass='ORD' data-chem-formula='5' id='id-1'>
1533
1
                <mrow data-chem-formula='3' id='id-2'>
1534
1
                    <mo stretchy='false' id='id-3'>[</mo>
1535
1
                    <mmultiscripts data-chem-formula='3' id='id-4'>
1536
1
                        <mi data-chem-element='3' id='id-5'>Co</mi>
1537
1
                        <mn id='id-6'>6</mn>
1538
1
                        <none id='id-7'></none>
1539
1
                    </mmultiscripts>
1540
1
                    <mo stretchy='false' id='id-8'>]</mo>
1541
1
                </mrow>
1542
1
                <none id='id-9'></none>
1543
1
                <mrow id='id-10'>
1544
1
                    <mn id='id-11'>3</mn>
1545
1
                    <mo id='id-12'>+</mo>
1546
1
                </mrow>
1547
1
            </mmultiscripts>
1548
1
            </math>";
1549
1
            init_default_prefs(mathml_str, "Character");
1550
1
            return MATHML_INSTANCE.with(|package_instance| {
1551
1
            let package_instance = package_instance.borrow();
1552
1
            let mathml = get_element(&package_instance);
1553
1
            assert_eq!("zoomed in all of the way; in base; open bracket", test_command("ZoomInAll", mathml, "id-3"));
1554
1
            assert_eq!("move right; in base; cap c o", test_command("MoveNext", mathml, "id-5"));
1555
1
            assert_eq!("move right; in subscript; 6", test_command("MoveNext", mathml, "id-6"));
1556
1
            assert_eq!("move right; out of subscript; close bracket", test_command("MoveNext", mathml, "id-8"));
1557
1
            assert_eq!("move right; in superscript; 3", test_command("MoveNext", mathml, "id-11"));
1558
1
            assert_eq!("move right; plus", test_command("MoveNext", mathml, "id-12"));
1559
1
            assert_eq!("cannot move right, end of math", test_command("MoveNext", mathml, "id-12"));
1560
1
            assert_eq!("move left; 3", test_command("MovePrevious", mathml, "id-11"));
1561
1
            assert_eq!("move left; in base; close bracket", test_command("MovePrevious", mathml, "id-8"));
1562
1
            assert_eq!("move left; in subscript; 6", test_command("MovePrevious", mathml, "id-6"));
1563
1
            assert_eq!("move left; in base; cap c o", test_command("MovePrevious", mathml, "id-5"));
1564
1
            assert_eq!("move left; out of base; open bracket", test_command("MovePrevious", mathml, "id-3"));
1565
1566
1
            return Ok( () );
1567
1
        });
1568
1
    }
1569
1570
    #[test]
1571
1
    fn move_right_char() -> Result<()> {
1572
1
        let mathml_str = "<math id='id-0'>
1573
1
        <mrow displaystyle='true' id='id-1'>
1574
1
          <mi id='id-2'>x</mi>
1575
1
          <mo id='id-3'>=</mo>
1576
1
          <mrow id='id-4'>
1577
1
            <mfrac id='id-5'>
1578
1
              <mn id='id-6'>1</mn>
1579
1
              <mrow id='id-7'>
1580
1
                <mi id='id-8'>a</mi>
1581
1
                <mo id='id-9'>+</mo>
1582
1
                <mn id='id-10'>2</mn>
1583
1
              </mrow>
1584
1
            </mfrac>
1585
1
            <mo id='id-11'>+</mo>
1586
1
            <mrow id='id-12'>
1587
1
              <mn id='id-13'>3</mn>
1588
1
              <mo id='id-14'>&#x2062;</mo>
1589
1
              <mi id='id-15'>b</mi>
1590
1
            </mrow>
1591
1
          </mrow>
1592
1
        </mrow>
1593
1
        </math>";
1594
1
        init_default_prefs(mathml_str, "Character");
1595
1
        return MATHML_INSTANCE.with(|package_instance| {
1596
1
            let package_instance = package_instance.borrow();
1597
1
            let mathml = get_element(&package_instance);
1598
1
            test_command("ZoomInAll", mathml, "id-2");
1599
1
            test_command("MoveNext", mathml, "id-3");
1600
1
            test_command("MoveNext", mathml, "id-6");
1601
1
            test_command("MoveNext", mathml, "id-8");
1602
1
            test_command("MoveNext", mathml, "id-9");
1603
1
            test_command("MoveNext", mathml, "id-10");
1604
1
            test_command("MoveNext", mathml, "id-11");
1605
1
            test_command("MoveNext", mathml, "id-13");
1606
1
            test_command("MoveNext", mathml, "id-15");
1607
1
            test_command("MoveNext", mathml, "id-15");
1608
1609
1
            return Ok( () );
1610
1
        });
1611
1
    }
1612
1613
    #[test]
1614
1
    fn char_mode_paren_test() -> Result<()> {
1615
1
        let mathml_str = "<math display='block' id='id-0'>
1616
1
            <mrow displaystyle='true' id='id-1'>
1617
1
                <mrow id='id-2'>
1618
1
                    <mo id='id-3'>(</mo>
1619
1
                    <mi id='id-4'>a</mi>
1620
1
                    <mo id='id-5'>)</mo>
1621
1
                </mrow>
1622
1
                <mo id='id-6'>&#x2062;</mo>
1623
1
                <mrow id='id-7'>
1624
1
                    <mo id='id-8'>(</mo>
1625
1
                    <mi id='id-9'>b</mi>
1626
1
                    <mo id='id-10'>)</mo>
1627
1
                </mrow>
1628
1
            </mrow>
1629
1
        </math>";
1630
1
        init_default_prefs(mathml_str, "Character");
1631
1
        return MATHML_INSTANCE.with(|package_instance| {
1632
1
            let package_instance = package_instance.borrow();
1633
1
            let mathml = get_element(&package_instance);
1634
1
            debug!("Character mode");
1635
1
            do_commands(mathml)
?0
;
1636
1
            set_preference("NavMode", "Simple").unwrap();
1637
1
            debug!("Simple mode");
1638
1
            test_command("ZoomIn", mathml, "id-3");  // zooms to the first parenthesis
1639
1
            do_commands(mathml)
?0
;
1640
1
            set_preference("NavMode", "Enhanced").unwrap();
1641
1
            debug!("Enhanced mode");
1642
1
            test_command("ZoomIn", mathml, "id-4");
1643
1
            test_command("MoveNext", mathml, "id-6");
1644
1
            test_command("MoveNext", mathml, "id-9");
1645
1
            test_command("MovePrevious", mathml, "id-6");
1646
1
            test_command("MovePrevious", mathml, "id-4");
1647
1648
1
            return Ok( () );
1649
1
        });
1650
1651
        /// Simple and Character mode should behave the same
1652
2
        fn do_commands(mathml: Element) -> Result<()> {
1653
2
            test_command("ZoomIn", mathml, "id-3");
1654
2
            test_command("MoveNext", mathml, "id-4");
1655
2
            test_command("MoveNext", mathml, "id-5");
1656
2
            test_command("MoveNext", mathml, "id-8");
1657
2
            test_command("MoveNext", mathml, "id-9");
1658
2
            test_command("MoveNext", mathml, "id-10");
1659
2
            test_command("MovePrevious", mathml, "id-9");
1660
2
            test_command("MovePrevious", mathml, "id-8");
1661
2
            test_command("MovePrevious", mathml, "id-5");
1662
2
            test_command("ZoomOutAll", mathml, "id-1");
1663
2
            return Ok( () );
1664
2
        }
1665
1
    }
1666
1667
    #[test]
1668
1
    fn char_mode_trig_test() -> Result<()> {
1669
1
        let mathml_str = "<math id='id-0'>
1670
1
            <mrow id='id-1'>
1671
1
            <mi id='id-2'>sin</mi>
1672
1
            <mo id='id-3'>&#x2061;</mo>
1673
1
            <mrow id='id-4'>
1674
1
                <mo id='id-5'>(</mo>
1675
1
                <mi id='id-6'>x</mi>
1676
1
                <mo id='id-7'>)</mo>
1677
1
            </mrow>
1678
1
            </mrow>
1679
1
        </math>";
1680
1
        init_default_prefs(mathml_str, "Simple");
1681
1
        return MATHML_INSTANCE.with(|package_instance| {
1682
1
            let package_instance = package_instance.borrow();
1683
1
            let mathml = get_element(&package_instance);
1684
1
            do_commands(mathml)
?0
;
1685
1
            set_preference("NavMode", "Simple").unwrap();
1686
1
            do_commands(mathml)
?0
;
1687
1
            set_preference("NavMode", "Enhanced").unwrap();
1688
1
            test_command("ZoomIn", mathml, "id-2");
1689
1
            test_command("MoveNext", mathml, "id-6");
1690
1
            test_command("MovePrevious", mathml, "id-2");
1691
1692
1
            return Ok( () );
1693
1
        });
1694
1695
        
1696
        /// Simple and Character mode should behave the same
1697
2
        fn do_commands(mathml: Element) -> Result<()> {
1698
2
            test_command("ZoomIn", mathml, "id-2");
1699
2
            test_command("MoveNext", mathml, "id-5");
1700
2
            test_command("MoveNext", mathml, "id-6");
1701
2
            test_command("MoveNext", mathml, "id-7");
1702
2
            test_command("MovePrevious", mathml, "id-6");
1703
2
            test_command("MovePrevious", mathml, "id-5");
1704
2
            test_command("MovePrevious", mathml, "id-2");
1705
2
            test_command("ZoomOutAll", mathml, "id-1");
1706
2
            return Ok( () );
1707
2
        }
1708
1
    }
1709
    
1710
    #[test]
1711
1
    fn move_char_speech() -> Result<()> {
1712
1
        let mathml_str = "<math display='block' id='id-0'>
1713
1
                <mrow id='id-1'>
1714
1
                <mfrac id='id-2'>
1715
1
                    <mi id='id-3'>x</mi>
1716
1
                    <mi id='id-4'>y</mi>
1717
1
                </mfrac>
1718
1
                <mo id='id-5'>&#x2062;</mo>
1719
1
                <mi id='id-6'>z</mi>
1720
1
                </mrow>
1721
1
            </math>";
1722
1
            init_default_prefs(mathml_str, "Character");
1723
1
            return MATHML_INSTANCE.with(|package_instance| {
1724
1
            let package_instance = package_instance.borrow();
1725
1
            let mathml = get_element(&package_instance);
1726
1
            test_command("ZoomInAll", mathml, "id-3");
1727
1
            assert_eq!("move right; in denominator; y", test_command("MoveNext", mathml, "id-4"));
1728
1
            assert_eq!("move right; out of denominator; z", test_command("MoveNext", mathml, "id-6"));
1729
1
            assert_eq!("move left; in denominator; y", test_command("MovePrevious", mathml, "id-4"));
1730
1
            assert_eq!("move left; in numerator; x", test_command("MovePrevious", mathml, "id-3"));
1731
1732
1
            return Ok( () );
1733
1
        });
1734
1
    }
1735
    
1736
    #[test]
1737
1
    fn move_inside_leaves() -> Result<()> {
1738
1
        let mathml_str = "<math display='block' id='id-0'>
1739
1
                <mrow id='id-1'>
1740
1
                    <mfrac id='id-2'>
1741
1
                        <mi id='id-3'>top</mi>
1742
1
                        <mi id='id-4'>αβγ</mi>
1743
1
                    </mfrac>
1744
1
                </mrow>
1745
1
            </math>";
1746
1
        init_default_prefs(mathml_str, "Character");
1747
1
        return MATHML_INSTANCE.with(|package_instance| {
1748
1
        let package_instance = package_instance.borrow();
1749
1
        let mathml = get_element(&package_instance);
1750
1
        test_command("ZoomInAll", mathml, "id-3");
1751
1
        assert_eq!("zoomed in to first character; t", test_command("ZoomIn", mathml, "id-3"));
1752
1
        assert_eq!("move right; o", test_command("MoveNext", mathml, "id-3"));
1753
1
        assert_eq!("move right; p", test_command("MoveNext", mathml, "id-3"));
1754
1
        assert_eq!("move right; in denominator; αβγ", test_command("MoveNext", mathml, "id-4"));
1755
1
        assert_eq!("zoomed in to first character; alpha", test_command("ZoomIn", mathml, "id-4"));
1756
1
        assert_eq!("move right; beta", test_command("MoveNext", mathml, "id-4"));
1757
1
        assert_eq!("move right; gamma", test_command("MoveNext", mathml, "id-4"));
1758
1
        assert_eq!("cannot move right, end of math", test_command("MoveNext", mathml, "id-4"));
1759
1
        assert_eq!("move left; beta", test_command("MovePrevious", mathml, "id-4"));
1760
1
        assert_eq!("zoom out; αβγ", test_command("ZoomOut", mathml, "id-4"));
1761
1762
1
        return Ok( () );
1763
1
        });
1764
1
    }
1765
    
1766
    #[test]
1767
1
    fn move_enhanced_times() -> Result<()> {
1768
1
        let mathml_str = "<math display='block' id='id-0'>
1769
1
        <mrow displaystyle='true' id='id-1'>
1770
1
          <mn id='id-2'>2</mn>
1771
1
          <mo id='id-3'>&#x2062;</mo>
1772
1
          <mrow id='id-4'>
1773
1
            <mo id='id-5'>(</mo>
1774
1
            <mrow id='id-6'>
1775
1
              <mn id='id-7'>1</mn>
1776
1
              <mo id='id-8'>-</mo>
1777
1
              <mi id='id-9'>x</mi>
1778
1
            </mrow>
1779
1
            <mo id='id-10'>)</mo>
1780
1
          </mrow>
1781
1
        </mrow>
1782
1
       </math>";
1783
1
        init_default_prefs(mathml_str, "Enhanced");
1784
1
        return MATHML_INSTANCE.with(|package_instance| {
1785
1
            let package_instance = package_instance.borrow();
1786
1
            let mathml = get_element(&package_instance);
1787
1
            test_command("ZoomIn", mathml, "id-2");
1788
1
            assert_eq!("move right; times", test_command("MoveNext", mathml, "id-3"));
1789
1
            assert_eq!("move right; 1 minus x", test_command("MoveNext", mathml, "id-6"));
1790
1
            assert_eq!("move left; times", test_command("MovePrevious", mathml, "id-3"));
1791
1
            assert_eq!("move left; 2", test_command("MovePrevious", mathml, "id-2"));
1792
1793
1
            return Ok( () );
1794
1
        });
1795
1
    }
1796
    
1797
    #[test]
1798
1
    fn move_simple_no_times() -> Result<()> {
1799
1
        let mathml_str = "<math display='block' id='id-0'>
1800
1
        <mrow displaystyle='true' id='id-1'>
1801
1
          <mn id='id-2'>2</mn>
1802
1
          <mo id='id-3'>&#x2062;</mo>
1803
1
          <mrow id='id-4'>
1804
1
            <mo id='id-5'>(</mo>
1805
1
            <mrow id='id-6'>
1806
1
              <mn id='id-7'>1</mn>
1807
1
              <mo id='id-8'>-</mo>
1808
1
              <mi id='id-9'>x</mi>
1809
1
            </mrow>
1810
1
            <mo id='id-10'>)</mo>
1811
1
          </mrow>
1812
1
        </mrow>
1813
1
       </math>";
1814
1
        init_default_prefs(mathml_str, "Simple");
1815
1
        set_preference("SpeechStyle", "ClearSpeak").unwrap();
1816
1
        return MATHML_INSTANCE.with(|package_instance| {
1817
1
            let package_instance = package_instance.borrow();
1818
1
            let mathml = get_element(&package_instance);
1819
1
            test_command("ZoomIn", mathml, "id-2");
1820
1
            assert_eq!("move right; open paren", test_command("MoveNext", mathml, "id-5"));
1821
1
            assert_eq!("move right; 1", test_command("MoveNext", mathml, "id-7"));
1822
1
            assert_eq!("move left; open paren", test_command("MovePrevious", mathml, "id-5"));
1823
1
            assert_eq!("move left; 2", test_command("MovePrevious", mathml, "id-2"));
1824
1825
1
            return Ok( () );
1826
1
        });
1827
1
    }
1828
    
1829
    
1830
    #[test]
1831
1
    fn move_cell() -> Result<()> {
1832
1
        let mathml_str = "<math id='nav-0'>
1833
1
        <mtable id='nav-1'>
1834
1
          <mtr id='nav-2'>
1835
1
            <mtd id='nav-3'> <mn id='nav-4'>1</mn></mtd>
1836
1
            <mtd id='nav-5'> <mn id='nav-6'>2</mn></mtd>
1837
1
            <mtd id='nav-7'><mn id='nav-8'>3</mn> </mtd>
1838
1
          </mtr>
1839
1
          <mtr id='nav-9'>
1840
1
            <mtd id='nav-10'>
1841
1
              <mrow id='nav-11'>
1842
1
                <mi id='nav-12'>x</mi>
1843
1
                <mo id='nav-13'>-</mo>
1844
1
                <mi id='nav-14'>y</mi>
1845
1
              </mrow>
1846
1
            </mtd>
1847
1
            <mtd id='nav-15'>
1848
1
              <mfrac id='nav-16'>
1849
1
                <mn id='nav-17'>1</mn>
1850
1
                <mn id='nav-18'>2</mn>
1851
1
              </mfrac>
1852
1
            </mtd>
1853
1
            <mtd id='nav-19'>
1854
1
              <mi id='nav-20'>z</mi>
1855
1
            </mtd>
1856
1
          </mtr>
1857
1
          <mtr id='nav-21'>
1858
1
            <mtd id='nav-22'><mn id='nav-23'>7</mn> </mtd>
1859
1
            <mtd id='nav-24'><mn id='nav-25'>8</mn> </mtd>
1860
1
            <mtd id='nav-26'> <mn id='nav-27'>9</mn></mtd>
1861
1
          </mtr>
1862
1
          <mtr id='nav-28'>
1863
1
            <mtd id='nav-29'>
1864
1
              <mrow id='nav-30'>
1865
1
                <mi id='nav-31'>sin</mi>
1866
1
                <mo id='nav-32'>&#x2061;</mo>
1867
1
                <mi id='nav-33'>x</mi>
1868
1
              </mrow>
1869
1
            </mtd>
1870
1
            <mtd id='nav-34'>
1871
1
              <msup id='nav-35'>
1872
1
                <mi id='nav-36'>e</mi>
1873
1
                <mi id='nav-37'>x</mi>
1874
1
              </msup>
1875
1
            </mtd>
1876
1
            <mtd id='nav-38'>
1877
1
              <mrow id='nav-39'>
1878
1
                <mn id='nav-40'>2</mn>
1879
1
                <mo id='nav-41'>-</mo>
1880
1
                <mi id='nav-42'>y</mi>
1881
1
              </mrow>
1882
1
            </mtd>
1883
1
          </mtr>
1884
1
        </mtable>
1885
1
       </math>";
1886
1
        init_default_prefs(mathml_str, "Enhanced");
1887
1
        return MATHML_INSTANCE.with(|package_instance| {
1888
1
            let package_instance = package_instance.borrow();
1889
1
            let mathml = get_element(&package_instance);
1890
1
            test_command("ZoomInAll", mathml, "nav-4");
1891
1
            test_command("MoveCellNext", mathml, "nav-6");
1892
1
            test_command("MoveCellNext", mathml, "nav-8");
1893
1
            test_command("MoveCellNext", mathml, "nav-8");
1894
1
            test_command("MoveCellDown", mathml, "nav-20");
1895
1
            test_command("MoveCellDown", mathml, "nav-27");
1896
1
            let speech = test_command("MoveCellDown", mathml, "nav-39");
1897
1
            assert_eq!(speech, "move down, row 4, column 3; 2 minus y");
1898
1
            let speech = test_command("MoveCellDown", mathml, "nav-39");
1899
1
            assert_eq!(speech, "no next row");
1900
1
            test_command("MoveCellPrevious", mathml, "nav-35");
1901
1
            test_command("ZoomIn", mathml, "nav-36");
1902
1
            test_command("MoveCellUp", mathml, "nav-25");
1903
1
            test_command("MoveCellUp", mathml, "nav-16");
1904
1
            test_command("MoveCellUp", mathml, "nav-6");
1905
1
            test_command("MoveCellUp", mathml, "nav-6");
1906
1907
1
            return Ok( () );
1908
1
        });
1909
1
    }
1910
    
1911
    #[test]
1912
1
    fn move_cell_char_mode() -> Result<()> {
1913
1
        let mathml_str = "<math id='nav-0'>
1914
1
        <mtable id='nav-1'>
1915
1
          <mtr id='nav-2'>
1916
1
            <mtd id='nav-3'> <mn id='nav-4'>1</mn></mtd>
1917
1
            <mtd id='nav-5'> <mn id='nav-6'>2</mn></mtd>
1918
1
            <mtd id='nav-7'><mn id='nav-8'>3</mn> </mtd>
1919
1
          </mtr>
1920
1
          <mtr id='nav-9'>
1921
1
            <mtd id='nav-10'>
1922
1
              <mrow id='nav-11'>
1923
1
                <mi id='nav-12'>x</mi>
1924
1
                <mo id='nav-13'>-</mo>
1925
1
                <mi id='nav-14'>y</mi>
1926
1
              </mrow>
1927
1
            </mtd>
1928
1
            <mtd id='nav-15'>
1929
1
              <mfrac id='nav-16'>
1930
1
                <mn id='nav-17'>1</mn>
1931
1
                <mn id='nav-18'>2</mn>
1932
1
              </mfrac>
1933
1
            </mtd>
1934
1
            <mtd id='nav-19'>
1935
1
              <mi id='nav-20'>z</mi>
1936
1
            </mtd>
1937
1
          </mtr>
1938
1
          <mtr id='nav-21'>
1939
1
            <mtd id='nav-22'><mn id='nav-23'>7</mn> </mtd>
1940
1
            <mtd id='nav-24'><mn id='nav-25'>8</mn> </mtd>
1941
1
            <mtd id='nav-26'> <mn id='nav-27'>9</mn></mtd>
1942
1
          </mtr>
1943
1
          <mtr id='nav-28'>
1944
1
            <mtd id='nav-29'>
1945
1
              <mrow id='nav-30'>
1946
1
                <mi id='nav-31'>sin</mi>
1947
1
                <mo id='nav-32'>&#x2061;</mo>
1948
1
                <mi id='nav-33'>x</mi>
1949
1
              </mrow>
1950
1
            </mtd>
1951
1
            <mtd id='nav-34'>
1952
1
              <msup id='nav-35'>
1953
1
                <mi id='nav-36'>e</mi>
1954
1
                <mi id='nav-37'>x</mi>
1955
1
              </msup>
1956
1
            </mtd>
1957
1
            <mtd id='nav-38'>
1958
1
              <mrow id='nav-39'>
1959
1
                <mn id='nav-40'>2</mn>
1960
1
                <mo id='nav-41'>-</mo>
1961
1
                <mi id='nav-42'>y</mi>
1962
1
              </mrow>
1963
1
            </mtd>
1964
1
          </mtr>
1965
1
        </mtable>
1966
1
       </math>";
1967
1
       init_default_prefs(mathml_str, "Character");
1968
1
       return MATHML_INSTANCE.with(|package_instance| {
1969
1
            let package_instance = package_instance.borrow();
1970
1
            let mathml = get_element(&package_instance);
1971
1
            NAVIGATION_STATE.with(|nav_stack| {
1972
1
                nav_stack.borrow_mut().push(NavigationPosition{
1973
1
                    current_node: "nav-8".to_string(),
1974
1
                    current_node_offset: 0
1975
1
                }, "None")
1976
1
            });
1977
1
            test_command("MoveNext", mathml, "nav-12");
1978
1
            test_command("MoveNext", mathml, "nav-13");
1979
1
            test_command("MoveNext", mathml, "nav-14");
1980
1
            test_command("MoveNext", mathml, "nav-17");
1981
1
            test_command("MovePrevious", mathml, "nav-14");
1982
1
            test_command("MoveCellNext", mathml, "nav-17");
1983
1
            test_command("MoveCellPrevious", mathml, "nav-14");
1984
1
            test_command("MovePrevious", mathml, "nav-13");
1985
1
            test_command("MovePrevious", mathml, "nav-12");
1986
1
            test_command("MoveCellPrevious", mathml, "nav-12");
1987
1
            test_command("MovePrevious", mathml, "nav-8");
1988
1
            test_command("MoveCellDown", mathml, "nav-20");
1989
1
            test_command("MoveCellDown", mathml, "nav-27");
1990
1
            test_command("MoveCellDown", mathml, "nav-40");
1991
1
            test_command("MoveCellDown", mathml, "nav-40");
1992
1
            test_command("MoveCellPrevious", mathml, "nav-37");
1993
1
            test_command("MoveCellUp", mathml, "nav-25");
1994
1995
1
            return Ok( () );
1996
1
        });
1997
1
    }
1998
    
1999
    #[test]
2000
1
    fn placemarker() -> Result<()> {
2001
1
        let mathml_str = "<math display='block' id='math'>
2002
1
        <mrow displaystyle='true' id='mrow'>
2003
1
          <mi id='a'>a</mi>
2004
1
          <mo id='plus-1'>+</mo>
2005
1
          <mi id='b'>b</mi>
2006
1
          <mo id='plus-2'>+</mo>
2007
1
          <mi id='c'>c</mi>
2008
1
        </mrow>
2009
1
        </math>";
2010
1
        init_default_prefs(mathml_str, "Character");
2011
1
        return MATHML_INSTANCE.with(|package_instance| {
2012
1
            let package_instance = package_instance.borrow();
2013
1
            let mathml = get_element(&package_instance);
2014
1
            test_command("MoveStart", mathml, "a");
2015
1
            test_command("SetPlacemarker0", mathml, "a");
2016
1
            test_command("MoveEnd", mathml, "c");
2017
1
            test_command("Read0", mathml, "c");
2018
1
            test_command("Describe0", mathml, "c");
2019
1
            test_command("SetPlacemarker1", mathml, "c");
2020
1
            test_command("MoveTo0", mathml, "a");
2021
1
            test_command("MoveTo1", mathml, "c");
2022
1
            test_command("MoveLastLocation", mathml, "a");
2023
            
2024
1
            return Ok( () );
2025
1
        });
2026
1
    }
2027
2028
    #[test]
2029
1
    fn where_am_i_all() -> Result<()> {
2030
1
        let mathml_str = "<math id='math'><mfrac id='mfrac'>
2031
1
                <msup id='msup'><mi id='base'>b</mi><mn id='exp'>2</mn></msup>
2032
1
                <mi id='denom'>d</mi>
2033
1
            </mfrac></math>";
2034
1
        init_default_prefs(mathml_str, "Enhanced");
2035
1
        set_preference("SpeechStyle", "ClearSpeak").unwrap();
2036
1
        return MATHML_INSTANCE.with(|package_instance| {
2037
1
            let package_instance = package_instance.borrow();
2038
1
            let mathml = get_element(&package_instance);
2039
1
            NAVIGATION_STATE.with(|nav_stack| {
2040
1
                nav_stack.borrow_mut().push(NavigationPosition{
2041
1
                    current_node: "exp".to_string(),
2042
1
                    current_node_offset: 0
2043
1
                }, "None")
2044
1
            });
2045
            // WhereAmIAll doesn't change the stack
2046
1
            let speech =test_command("WhereAmIAll", mathml, "exp");
2047
            // should be 2 "inside" strings corresponding to steps to the root
2048
1
            assert_eq!(speech, "2; inside; b squared; inside; the fraction with numerator; b squared; and denominator d");
2049
1
            return Ok( () );
2050
1
        });
2051
1
    }
2052
2053
    #[test]
2054
1
    fn auto_zoom_out_mrow() -> Result<()> {
2055
1
        let mathml_str = "<math id='math'>
2056
1
        <mrow id='id-1'>
2057
1
          <mrow id='id-2'>
2058
1
            <mrow id='2ax'>
2059
1
              <mn id='2'>2</mn>
2060
1
              <mo id='id-5'>&#x2062;</mo>
2061
1
              <mi id='a'>a</mi>
2062
1
              <mo id='id-7'>&#x2062;</mo>
2063
1
              <mi id='x'>x</mi>
2064
1
            </mrow>
2065
1
            <mo id='plus'>+</mo>
2066
1
            <mi id='b'>b</mi>
2067
1
          </mrow>
2068
1
          <mo id='equal'>=</mo>
2069
1
          <mn id='10'>10</mn>
2070
1
        </mrow>
2071
1
       </math>";
2072
1
        init_default_prefs(mathml_str, "Enhanced");
2073
1
        set_preference("AutoZoomOut", "False")
?0
;
2074
1
        return MATHML_INSTANCE.with(|package_instance| {
2075
1
            let package_instance = package_instance.borrow();
2076
1
            let mathml = get_element(&package_instance);
2077
1
            test_command("ZoomInAll", mathml, "2");
2078
1
            test_command("MoveNext", mathml, "a");
2079
1
            test_command("MoveNext", mathml, "x");
2080
1
            test_command("MoveNext", mathml, "plus");
2081
1
            test_command("MovePrevious", mathml, "2ax");
2082
1
            return Ok( () );
2083
1
        });
2084
1
    }
2085
2086
    #[test]
2087
1
    fn auto_zoom_out_fraction() -> Result<()> {
2088
1
        let mathml_str = "<math id='math'>
2089
1
            <mrow id='mrow'>
2090
1
                <mfrac id='frac'>
2091
1
                    <mrow id='num'><mi id='a'>a</mi><mo id='plus'>+</mo><mn id='1'>1</mn></mrow>
2092
1
                    <mrow id='denom'><mn id='2'>2</mn><mo id='invisible-times'>&#x2062;</mo><mi id='b'>b</mi></mrow>
2093
1
                </mfrac>
2094
1
                <mo id='minus'>-</mo>
2095
1
                <mn id='3'>3</mn>
2096
1
            </mrow>
2097
1
        </math>";
2098
1
        init_default_prefs(mathml_str, "Enhanced");
2099
1
        set_preference("AutoZoomOut", "False")
?0
;
2100
1
        return MATHML_INSTANCE.with(|package_instance| {
2101
1
            let package_instance = package_instance.borrow();
2102
1
            let mathml = get_element(&package_instance);
2103
1
            test_command("ZoomIn", mathml, "frac");
2104
1
            test_command("ZoomIn", mathml, "num");
2105
1
            test_command("MoveNext", mathml, "denom");
2106
1
            test_command("MoveNext", mathml, "denom");
2107
1
            test_command("MovePrevious", mathml, "num");
2108
1
            test_command("MovePrevious", mathml, "num");
2109
1
            test_command("ZoomOut", mathml, "frac");
2110
1
            test_command("MoveNext", mathml, "minus");
2111
1
            return Ok( () );
2112
1
        });
2113
1
    }
2114
2115
    #[test]
2116
1
    fn zoom_root() -> Result<()> {
2117
1
        let mathml_str = r#"<math display='block' id='id-0'>
2118
1
        <mrow id='id-1'>
2119
1
            <mo id='id-9'>±</mo>
2120
1
            <msqrt id='id-10'>
2121
1
                <mrow id='id-11'>
2122
1
                    <msup id='id-12'> <mi id='id-13'>b</mi> <mn id='id-14'>2</mn> </msup>
2123
1
                    <mo id='id-15'>-</mo>
2124
1
                    <mn id='id-17'>4</mn>
2125
1
                </mrow>
2126
1
            </msqrt>
2127
1
        </mrow>
2128
1
        </math>"#;
2129
2130
1
        test_mode(mathml_str, "Enhanced")
?0
;
2131
1
        test_mode(mathml_str, "Simple")
?0
;
2132
1
        test_mode(mathml_str, "Character")
?0
;
2133
1
        return Ok( () );
2134
2135
3
        fn test_mode(mathml_str: &str, mode: &str) -> Result<()> {
2136
3
            init_default_prefs(mathml_str, mode);
2137
3
            set_preference("AutoZoomOut", "False")
?0
;
2138
3
            return MATHML_INSTANCE.with(|package_instance| {
2139
3
                debug!("--- Testing mode {mode} ---");
2140
3
                let package_instance = package_instance.borrow();
2141
3
                let mathml = get_element(&package_instance);
2142
3
                test_command("ZoomIn", mathml, "id-9");
2143
3
                debug!("\nStart zoom in");
2144
3
                match mode {
2145
3
                    "Enhanced" => {
2146
1
                        test_command("MoveNext", mathml, "id-10");
2147
1
                        let speech = test_command("ZoomIn", mathml, "id-11");
2148
1
                        assert_eq!(speech, "zoom in; in root; b squared minus 4");  // only one arg, so don't say "in root"
2149
1
                        let speech = test_command("ZoomIn", mathml, "id-12");
2150
1
                        assert_eq!(speech, "zoom in; b squared");  // only one arg, so don't say "in root"
2151
1
                        let speech = test_command("ZoomIn", mathml, "id-13");
2152
1
                        assert_eq!(speech, "zoom in; in base; b");
2153
                    },
2154
2
                    "Simple" => {
2155
1
                        test_command("MoveNext", mathml, "id-10");
2156
1
                        let speech = test_command("ZoomIn", mathml, "id-12");
2157
1
                        assert_eq!(speech, "zoom in; in root; b squared");
2158
1
                        let speech = test_command("ZoomIn", mathml, "id-13");
2159
1
                        assert_eq!(speech, "zoom in; in base; b");
2160
                    },
2161
                    _ => { // "Character"
2162
1
                        let speech = test_command("MoveNext", mathml, "id-13");
2163
1
                        assert_eq!(speech, "move right; in root; in base; b");
2164
                    }
2165
                }
2166
3
                let squared_speech = if mode == "Character" {
"b super 2 end super"1
} else {
"b squared"2
};
2167
3
                let sqrt_speech = if mode == "Character" {
"root"1
} else {
"square root"2
};
2168
3
                let speech = test_command("ZoomOut", mathml, "id-12");
2169
3
                assert_eq!(speech, format!("zoom out; out of base; {squared_speech}"));
2170
3
                let speech = test_command("ZoomOut", mathml, "id-11");
2171
3
                assert_eq!(speech, format!("zoom out; {squared_speech} minus 4"));
2172
3
                let speech = test_command("ZoomOut", mathml, "id-10");
2173
3
                assert_eq!(speech, format!("zoom out; out of root; the {sqrt_speech} of {squared_speech} minus 4, end root",));
2174
3
                return Ok( () );
2175
3
            });
2176
3
        }
2177
1
    }
2178
2179
    #[test]
2180
1
    fn matrix_speech() -> Result<()> {
2181
1
        let mathml_str = r#"<math id='math'>
2182
1
            <mrow id='mrow'>
2183
1
            <mo id='open'>[</mo>
2184
1
            <mtable columnspacing='1em' rowspacing='4pt' id='table'>
2185
1
                <mtr id='row-1'>
2186
1
                    <mtd id='1-1'><mn id='id-6'>9</mn></mtd>
2187
1
                    <mtd id='1-2'><mrow id='id-8'><mo id='id-9'>-</mo><mn id='id-10'>13</mn></mrow></mtd>
2188
1
                </mtr>
2189
1
                <mtr id='row-2'>
2190
1
                    <mtd id='2-1'><mn id='id-13'>5</mn></mtd>
2191
1
                    <mtd id='2-2'><mo id='id-16'>-</mo><mn id='id-17'>6</mn></mtd>
2192
1
                </mtr>
2193
1
            </mtable>
2194
1
            <mo id='close'>]</mo>
2195
1
            </mrow>
2196
1
        </math>"#;
2197
1
        init_default_prefs(mathml_str, "Enhanced");
2198
1
        return MATHML_INSTANCE.with(|package_instance| {
2199
1
            let package_instance = package_instance.borrow();
2200
1
            let mathml = get_element(&package_instance);
2201
1
            test_command("ZoomIn", mathml, "row-1");
2202
1
            let speech = test_command("MoveNext", mathml, "row-2");
2203
1
            assert_eq!(speech, "move right; row 2; 5, negative 6");
2204
1
            let speech = test_command("ZoomIn", mathml, "id-13");
2205
1
            assert_eq!(speech, "zoom in; column 1; 5");
2206
1
            let speech = test_command("ZoomOut", mathml, "row-2");
2207
1
            assert_eq!(speech, "zoom out; row 2; 5, negative 6");
2208
1
            let speech = test_command("ZoomOut", mathml, "table");
2209
1
            assert_eq!(speech, "zoom out; the 2 by 2 matrix; row 1; 9, negative 13; row 2; 5, negative 6");
2210
1
        return Ok( () );
2211
1
        });
2212
1
    }
2213
2214
    #[test]
2215
1
    fn chem_speech() -> Result<()> {
2216
        // this comes from bug 218
2217
1
        let mathml_str = "<math display='block' id='id-0'>
2218
1
            <mrow data-chem-formula='5' id='id-1'>
2219
1
                <msub data-chem-formula='1' id='id-2'>
2220
1
                    <mi data-chem-element='1' id='id-3'>H</mi>
2221
1
                    <mn id='id-4'>2</mn>
2222
1
                </msub>
2223
1
                <mo data-chem-formula-op='0' id='id-5'>&#x2063;</mo>
2224
1
                <mi data-chem-element='1' id='id-6'>S</mi>
2225
1
                <mo data-chem-formula-op='0' id='id-7'>&#x2063;</mo>
2226
1
                <msub data-chem-formula='1' id='id-8'>
2227
1
                    <mi data-chem-element='1' id='id-9'>O</mi>
2228
1
                    <mn id='id-10'>4</mn>
2229
1
                </msub>
2230
1
            </mrow>
2231
1
        </math>";
2232
1
        init_default_prefs(mathml_str, "Enhanced");
2233
1
        return MATHML_INSTANCE.with(|package_instance| {
2234
1
            let package_instance = package_instance.borrow();
2235
1
            let mathml = get_element(&package_instance);
2236
1
            test_command("ZoomIn", mathml, "id-2");
2237
1
            let speech = test_command("MoveNext", mathml, "id-6");
2238
            // tables need to check their parent for proper speech
2239
1
            assert_eq!(speech, "move right; cap s");
2240
1
            return Ok( () );
2241
1
        });
2242
1
    }
2243
2244
    #[test]
2245
1
    fn determinant_speech() -> Result<()> {
2246
1
        let mathml_str = "<math id='math'>
2247
1
            <mrow id='mrow'>
2248
1
            <mo id='open'>|</mo>
2249
1
            <mtable columnspacing='1em' rowspacing='4pt' id='table'>
2250
1
                <mtr id='row-1'>
2251
1
                    <mtd id='1-1'><mn id='id-6'>9</mn></mtd>
2252
1
                    <mtd id='1-2'><mrow id='id-8'><mo id='id-9'>-</mo><mn id='id-10'>13</mn></mrow></mtd>
2253
1
                </mtr>
2254
1
                <mtr id='row-2'>
2255
1
                    <mtd id='2-1'><mn id='id-13'>5</mn></mtd>
2256
1
                    <mtd id='2-2'><mrow id='row2-negative'><mo id='id-16'>-</mo><mn id='id-17'>6</mn></mrow></mtd>
2257
1
                </mtr>
2258
1
            </mtable>
2259
1
            <mo id='close'>|</mo>
2260
1
            </mrow>
2261
1
        </math>";
2262
1
        init_default_prefs(mathml_str, "Enhanced");
2263
1
        set_preference("SpeechStyle", "ClearSpeak").unwrap();
2264
1
        return MATHML_INSTANCE.with(|package_instance| {
2265
1
            let package_instance = package_instance.borrow();
2266
1
            let mathml = get_element(&package_instance);
2267
1
            let speech = test_command("ZoomIn", mathml, "row-1");
2268
1
            assert_eq!(speech, "zoom in; row 1; 9, negative 13");
2269
1
            let speech = test_command("MoveNext", mathml, "row-2");
2270
1
            assert_eq!(speech, "move right; row 2; 5, negative 6");
2271
1
            let speech = test_command("MoveNext", mathml, "row-2");
2272
1
            assert_eq!(speech, "cannot move right, end of math");
2273
1
            let speech = test_command("ZoomIn", mathml, "id-13");
2274
1
            assert_eq!(speech, "zoom in; column 1; 5");
2275
1
            let speech = test_command("MoveNext", mathml, "row2-negative");
2276
1
            assert_eq!(speech, "move right; column 2, negative 6");
2277
1
            let speech = test_command("ZoomOutAll", mathml, "table");
2278
1
            assert_eq!(speech, "zoomed out all of the way; the 2 by 2 determinant; row 1; 9, negative 13; row 2; 5, negative 6");
2279
1
            return Ok( () );
2280
1
        });
2281
1
    }
2282
2283
    #[test]
2284
1
    fn cases_speech() -> Result<()> {
2285
1
        let mathml_str = "<math id='id-0'>
2286
1
        <mrow id='id-1'>
2287
1
          <mo id='open'>{</mo>
2288
1
          <mtable columnalign='left left' columnspacing='1em' displaystyle='false' rowspacing='.2em' id='table'>
2289
1
            <mtr id='row-1'>
2290
1
              <mtd id='id-5'><mrow id='id-6'><mrow id='id-7'><mo id='id-8'>-</mo><mi id='id-9'>x</mi></mrow><mo id='id-10'>,</mo></mrow></mtd>
2291
1
              <mtd id='id-11'><mrow id='id-12'><mrow id='id-13'><mtext id='id-14'>if</mtext><mo id='id-15'>&#x2062;</mo><mi id='id-16'>x</mi></mrow><mo id='id-17'>&lt;</mo><mn id='id-18'>0</mn></mrow></mtd>
2292
1
            </mtr>
2293
1
            <mtr id='row-2'>
2294
1
              <mtd id='id-20'><mrow id='id-21'><mrow id='id-22'><mo id='id-23'>+</mo><mi id='id-24'>x</mi></mrow><mo id='id-25'>,</mo></mrow></mtd>
2295
1
              <mtd id='id-26'><mrow id='id-27'><mrow id='id-28'><mtext id='id-29'>if</mtext><mo id='id-30'>&#x2062;</mo><mi id='id-31'>x</mi></mrow><mo id='id-32'>≥</mo><mn id='id-33'>0</mn></mrow></mtd>
2296
1
            </mtr>
2297
1
          </mtable>
2298
1
        </mrow>
2299
1
       </math>";
2300
1
        init_default_prefs(mathml_str, "Enhanced");
2301
1
        set_preference("SpeechStyle", "ClearSpeak").unwrap();
2302
1
        return MATHML_INSTANCE.with(|package_instance| {
2303
1
            let package_instance = package_instance.borrow();
2304
1
            let mathml = get_element(&package_instance);
2305
1
            test_command("ZoomIn", mathml, "row-1");
2306
1
            let speech = test_command("MovePrevious", mathml, "row-1");
2307
1
            assert_eq!(speech, "move left; start of math");
2308
1
            let speech = test_command("MoveNext", mathml, "row-2");
2309
1
            assert_eq!(speech, "move right; case 2; positive x comma; if x, is greater than or equal to 0");
2310
1
            let speech = test_command("ZoomOut", mathml, "table");
2311
1
            assert_eq!(speech, "zoom out; 2 cases; case 1; negative x comma; if x is less than 0; case 2; positive x comma; if x, is greater than or equal to 0");
2312
1
            let speech = test_command("ZoomIn", mathml, "row-1");
2313
1
            assert_eq!(speech, "zoom in; case 1; negative x comma; if x is less than 0");
2314
1
            set_preference("NavMode", "Character").unwrap();
2315
1
            let speech = test_command("MovePrevious", mathml, "open");
2316
1
            assert_eq!(speech, "move left; open brace");
2317
1
            return Ok( () );
2318
1
        });
2319
1
    }
2320
2321
    #[test]
2322
1
    fn base_superscript() -> Result<()> {
2323
        // bug #217 -- zoom into base of parenthesized script 
2324
1
        let mathml_str = "<math display='block' id='id-0'>
2325
1
            <msup id='id-1'>
2326
1
                <mrow id='id-2'>
2327
1
                    <mo stretchy='false' id='id-3'>(</mo>
2328
1
                    <mrow id='id-4'>
2329
1
                        <mn id='id-5'>2</mn>
2330
1
                        <mo id='id-6'>&#x2062;</mo>
2331
1
                        <mi id='id-7'>x</mi>
2332
1
                    </mrow>
2333
1
                    <mo stretchy='false' id='id-8'>)</mo>
2334
1
                </mrow>
2335
1
                <mn id='id-9'>2</mn>
2336
1
            </msup>
2337
1
        </math>";
2338
1
        init_default_prefs(mathml_str, "Enhanced");
2339
1
        set_preference("SpeechStyle", "ClearSpeak").unwrap();
2340
1
        return MATHML_INSTANCE.with(|package_instance| {
2341
1
            let package_instance = package_instance.borrow();
2342
1
            let mathml = get_element(&package_instance);
2343
1
            let speech = test_command("ZoomIn", mathml, "id-4");
2344
1
            assert_eq!(speech, "zoom in; in base; 2 x");
2345
1
            let speech = test_command("MoveNext", mathml, "id-9");
2346
1
            assert_eq!(speech, "move right; in exponent; 2");
2347
1
            return Ok( () );
2348
1
        });
2349
1
    }
2350
2351
    #[test]
2352
1
    fn binomial_intent() -> Result<()> {
2353
1
        let mathml_str = "<math display='block' id='id-0'>
2354
1
                    <mrow intent='binomial($n,$k)' id='id-1'>
2355
1
                        <mo id='id-2'>(</mo>
2356
1
                        <mfrac linethickness='0pt' id='id-3'>
2357
1
                            <mi arg='n' id='id-4'>n</mi>
2358
1
                            <mi arg='k' id='id-5'>k</mi>
2359
1
                        </mfrac>
2360
1
                    <mo id='id-6'>)</mo>
2361
1
                    </mrow>
2362
1
                </math>";
2363
1
        init_default_prefs(mathml_str, "Character");
2364
1
        set_preference("SpeechStyle", "ClearSpeak").unwrap();
2365
1
        return MATHML_INSTANCE.with(|package_instance| {
2366
1
            let package_instance = package_instance.borrow();
2367
1
            let mathml = get_element(&package_instance);
2368
1
            debug!("Character mode");
2369
1
            let speech = test_command("MoveStart", mathml, "id-2");
2370
1
            assert_eq!(speech, "move to start of math; open paren");
2371
1
            let speech = test_command("MoveNext", mathml, "id-4");
2372
            // I'm not keen on the use of numerator/denominator here, but character mode turns off intent
2373
1
            assert_eq!(speech, "move right; in numerator; n");
2374
1
            let speech = test_command("MoveNext", mathml, "id-5");
2375
1
            assert_eq!(speech, "move right; in denominator; k");
2376
1
            debug!("before zoom out");
2377
1
            let speech = test_command("ZoomOut", mathml, "id-3");
2378
1
            assert_eq!(speech, "zoom out; out of denominator; n over k");
2379
            // let speech = test_command("ZoomOut", mathml, "id-1");
2380
            // assert_eq!(speech, "zoom out; open paren n over k, close paren");
2381
2382
1
            set_preference("NavMode", "Simple").unwrap();
2383
1
            debug!("Simple mode");
2384
1
            let speech = test_command("ZoomIn", mathml, "id-4");
2385
1
            assert_eq!(speech, "zoom in; in part 1; n");
2386
1
            let speech = test_command("MoveNext", mathml, "id-5");
2387
1
            assert_eq!(speech, "move right; in part 2; k");
2388
1
            let speech = test_command("MoveNext", mathml, "id-5");
2389
1
            assert_eq!(speech, "cannot move right, end of math");
2390
1
            let speech = test_command("ZoomOut", mathml, "id-1-literal-0");
2391
1
            assert_eq!(speech, "zoom out; out of part 2; n choose k");
2392
2393
1
            set_preference("NavMode", "Enhanced").unwrap();
2394
1
            debug!("Enhanced mode");
2395
1
            let speech = test_command("ZoomIn", mathml, "id-4");
2396
1
            assert_eq!(speech, "zoom in; in part 1; n");
2397
1
            let speech = test_command("MoveNext", mathml, "id-5");
2398
1
            assert_eq!(speech, "move right; in part 2; k");
2399
1
            let speech = test_command("MoveNext", mathml, "id-5");
2400
1
            assert_eq!(speech, "cannot move right, end of math");
2401
1
            let speech = test_command("ZoomOut", mathml, "id-1-literal-0");
2402
1
            assert_eq!(speech, "zoom out; out of part 2; n choose k");
2403
2404
1
            return Ok( () );
2405
1
        });
2406
1
    }
2407
2408
    #[test]
2409
1
    fn matrix_literal_intent() -> Result<()> {
2410
1
        let mathml_str = r#"<math display='block' id='id-0'>
2411
1
            <mrow intent='$m' id='id-1'>
2412
1
                <mo id='id-2'>(</mo>
2413
1
                <mtable arg='m' intent='_diagonal:prefix(1,2,3)' id='id-3'>
2414
1
                <mtr id='id-4'>
2415
1
                    <mtd id='id-5'><mn id='id-6'>1</mn></mtd>
2416
1
                    <mtd id='id-7'><mn id='id-8'>0</mn></mtd>
2417
1
                    <mtd id='id-9'><mn id='id-10'>0</mn></mtd>
2418
1
                </mtr>
2419
1
                <mtr id='id-11'>
2420
1
                    <mtd id='id-12'><mn id='id-13'>0</mn></mtd>
2421
1
                    <mtd id='id-14'><mn id='id-15'>2</mn></mtd>
2422
1
                    <mtd id='id-16'><mn id='id-17'>0</mn></mtd>
2423
1
                </mtr>
2424
1
                <mtr id='id-18'>
2425
1
                    <mtd id='id-19'><mn id='id-20'>0</mn></mtd>
2426
1
                    <mtd id='id-21'><mn id='id-22'>0</mn></mtd>
2427
1
                    <mtd id='id-23'><mn id='id-24'>3</mn></mtd>
2428
1
                </mtr>
2429
1
                </mtable>
2430
1
                <mo id='id-25'>)</mo>
2431
1
            </mrow>
2432
1
        </math>"#;
2433
1
        init_default_prefs(mathml_str, "Simple");
2434
1
        return MATHML_INSTANCE.with(|package_instance| {
2435
1
            let package_instance = package_instance.borrow();
2436
1
            let mathml = get_element(&package_instance);
2437
1
            let speech = test_command("ZoomIn", mathml, "id-3-literal-1");
2438
1
            assert_eq!(speech, "zoom in; 1");
2439
1
            let speech = test_command("MoveNext", mathml, "id-3-literal-2");
2440
1
            assert_eq!(speech, "move right; 2");
2441
1
            let speech = test_command("MoveNext", mathml, "id-3-literal-3");
2442
1
            assert_eq!(speech, "move right; 3");
2443
1
            let speech = test_command("MoveNext", mathml, "id-3-literal-3");
2444
1
            assert_eq!(speech, "cannot move right, end of math");
2445
1
            let speech = test_command("ZoomOut", mathml, "id-3-literal-0");
2446
1
            assert_eq!(speech, "zoom out; diagonal 1 2 3");
2447
2448
1
            return Ok( () );
2449
1
        });
2450
1
    }
2451
2452
    #[test]
2453
1
    fn absolute_value() -> Result<()> {
2454
1
        let mathml_str = "<math id='math'>
2455
1
                <mrow id='expr'>
2456
1
                    <mn id='2'>2</mn>
2457
1
                    <mrow id='abs'>
2458
1
                        <mo id='start'>|</mo>
2459
1
                        <mi id='x'>x</mi>
2460
1
                        <mo id='end'>|</mo>
2461
1
                    </mrow>
2462
1
                </mrow>
2463
1
            </math>";
2464
1
        init_default_prefs(mathml_str, "Enhanced");
2465
1
        set_preference("SpeechStyle", "ClearSpeak").unwrap();
2466
1
        return MATHML_INSTANCE.with(|package_instance| {
2467
1
            let package_instance = package_instance.borrow();
2468
1
            let mathml = get_element(&package_instance);
2469
1
            let speech = test_command("ZoomIn", mathml, "2");
2470
1
            assert_eq!(speech, "zoom in; 2");
2471
1
            let speech = test_command("MoveNext", mathml, "abs");
2472
1
            assert_eq!(speech, "move right; the absolute value of x");
2473
1
            let speech = test_command("ZoomIn", mathml, "x");
2474
1
            assert_eq!(speech, "zoom in; in absolute value; x");
2475
1
            let speech = test_command("MoveNext", mathml, "x");
2476
1
            assert_eq!(speech, "cannot move right, end of math");
2477
1
            set_preference("NavMode", "Character").unwrap();
2478
1
            let speech = test_command("MoveNext", mathml, "end");
2479
1
            assert_eq!(speech, "move right; vertical line");
2480
1
            let speech = test_command("MoveLineStart", mathml, "2");
2481
1
            assert_eq!(speech, "move to start of line; 2");
2482
1
            let speech = test_command("MoveNext", mathml, "start");
2483
1
            assert_eq!(speech, "move right; vertical line");
2484
1
            return Ok( () );
2485
1
        });
2486
1
    }
2487
2488
    #[test]
2489
1
    fn read_and_describe_fraction() -> Result<()> {
2490
1
        let mathml_str = "<math id='math'>
2491
1
            <mrow id='mrow'>
2492
1
                <mfrac id='frac'>
2493
1
                    <mrow id='numerator'><mi>b</mi><mo>+</mo><mn>1</mn></mrow>
2494
1
                <mn id='denom'>3</mn>
2495
1
                </mfrac>
2496
1
                <mo id='minus'>-</mo>
2497
1
                <mn id='3'>3</mn>
2498
1
            </mrow>
2499
1
        </math>";
2500
1
        init_default_prefs(mathml_str, "Enhanced");
2501
1
        set_preference("SpeechStyle", "SimpleSpeak").unwrap();
2502
1
        return MATHML_INSTANCE.with(|package_instance| {
2503
1
            let package_instance = package_instance.borrow();
2504
1
            let mathml = get_element(&package_instance);
2505
1
            test_command("ZoomIn", mathml, "frac");
2506
1
            let speech = test_command("ReadCurrent", mathml, "frac");
2507
1
            assert_eq!(speech, "read current; fraction, b plus 1, over 3, end fraction");
2508
1
            let speech = test_command("DescribeCurrent", mathml, "frac");
2509
1
            assert_eq!(speech, "describe current; fraction");
2510
1
            return Ok( () );
2511
1
        });
2512
1
    }
2513
2514
2515
    #[test]
2516
1
    fn read_and_describe_mrow() -> Result<()> {
2517
1
        let mathml_str = "<math id='math'>
2518
1
            <mrow id='mrow'>
2519
1
                <mn>1</mn><mo>+</mo>
2520
1
                <mn>2</mn><mo>+</mo>
2521
1
                <mn>3</mn><mo>+</mo>
2522
1
                <mn>4</mn><mo>+</mo>
2523
1
                <mn>5</mn><mo>+</mo>
2524
1
                <mn>6</mn><mo>+</mo>
2525
1
                <mn>7</mn>
2526
1
            </mrow>
2527
1
        </math>";
2528
1
        init_default_prefs(mathml_str, "Enhanced");
2529
1
        set_preference("SpeechStyle", "SimpleSpeak").unwrap();
2530
1
        return MATHML_INSTANCE.with(|package_instance| {
2531
1
            let package_instance = package_instance.borrow();
2532
1
            let mathml = get_element(&package_instance);
2533
1
            let speech = test_command("ZoomOutAll", mathml, "mrow");
2534
1
            assert_eq!(speech, "zoomed out all of the way; 1 plus 2 plus 3 plus 4 plus 5 plus 6 plus 7");
2535
1
            let speech = test_command("ReadCurrent", mathml, "mrow");
2536
1
            assert_eq!(speech, "read current; 1 plus 2 plus 3 plus 4 plus 5 plus 6 plus 7");
2537
1
            let speech = test_command("DescribeCurrent", mathml, "mrow");
2538
1
            assert_eq!(speech, "describe current; 1 plus 2 plus 3 and so on");
2539
1
            return Ok( () );
2540
1
        });
2541
1
    }
2542
2543
2544
    #[test]
2545
1
    fn read_next_invisible_char() -> Result<()> {
2546
1
        let mathml_str = "<math id='id-0'>
2547
1
            <mrow id='id-1'>
2548
1
                <mi id='id-2'>x</mi>
2549
1
                <mo id='id-3'>&#x2062;</mo>
2550
1
                <mi id='id-4'>y</mi>
2551
1
            </mrow>
2552
1
            </math>";
2553
1
        init_default_prefs(mathml_str, "Simple");
2554
1
        set_preference("SpeechStyle", "SimpleSpeak").unwrap();
2555
1
        return MATHML_INSTANCE.with(|package_instance| {
2556
1
            let package_instance = package_instance.borrow();
2557
1
            let mathml = get_element(&package_instance);
2558
1
            let speech = test_command("ZoomIn", mathml, "id-2");
2559
1
            assert_eq!(speech, "zoom in; x");
2560
1
            let speech = test_command("ToggleZoomLockUp", mathml, "id-2");
2561
1
            assert_eq!(speech, "enhanced mode; x");
2562
1
            let speech = test_command("ReadNext", mathml, "id-2");
2563
1
            assert_eq!(speech, "read right; y");
2564
1
            return Ok( () );
2565
1
        });
2566
1
    }
2567
2568
    
2569
    #[test]
2570
1
    fn basic_language_test() -> Result<()> {
2571
        // this is basically a sanity check that all the language's navigation.yaml files are at least syntactically correct
2572
        // FIX: should look through the Languages dir and figure this is out
2573
1
        let mathml_str = "<math id='math'>
2574
1
                <mrow id='contents'>
2575
1
                    <mrow id='lhs'>
2576
1
                        <mrow id='term'>
2577
1
                            <mn id='2'>2</mn>
2578
1
                            <mo id='invisible-times'>&#x2062;</mo>
2579
1
                            <msup id='msup'>
2580
1
                                <mi id='x'>x</mi>
2581
1
                                <mn id='3'>3</mn>
2582
1
                            </msup>
2583
1
                        </mrow>
2584
1
                        <mo id='plus'>+</mo>
2585
1
                        <mn id='1'>1</mn>
2586
1
                    </mrow>
2587
1
                <mo id='id-11'>=</mo>
2588
1
                <mi id='id-12'>y</mi>
2589
1
                </mrow>
2590
1
            </math>";
2591
        
2592
1
        set_rules_dir(super::super::abs_rules_dir_path()).unwrap();
2593
11
        for lang in 
get_supported_languages1
().
unwrap_or_default1
() {
2594
11
            test_language(&lang, mathml_str);
2595
11
        }
2596
1
        return Ok( () );
2597
2598
11
        fn test_language(lang: &str, mathml_str: &str) {
2599
11
            init_default_prefs(mathml_str, "Enhanced");
2600
11
            set_preference("Language", lang).unwrap();
2601
2602
11
            set_preference("NavMode", "Enhanced").unwrap();
2603
11
            MATHML_INSTANCE.with(|package_instance| {
2604
11
                let package_instance = package_instance.borrow();
2605
11
                let mathml = get_element(&package_instance);
2606
11
                test_command("ZoomInAll", mathml, "2");
2607
11
                test_command("MoveNext", mathml, "msup");
2608
11
                test_command("MoveNext", mathml, "plus");
2609
11
                test_command("MovePrevious", mathml, "term");
2610
11
                test_command("MovePrevious", mathml, "term");
2611
11
                test_command("ZoomOutAll", mathml, "contents");
2612
11
            });
2613
2614
11
            set_preference("NavMode", "Simple").unwrap();
2615
11
            MATHML_INSTANCE.with(|package_instance: &RefCell<Package>| {
2616
11
                let package_instance = package_instance.borrow();
2617
11
                let mathml = get_element(&package_instance);
2618
11
                test_command("ZoomInAll", mathml, "2");
2619
11
                test_command("MoveNext", mathml, "msup");
2620
11
                test_command("MoveNext", mathml, "plus");
2621
11
                test_command("MovePrevious", mathml, "msup");
2622
11
                test_command("MovePrevious", mathml, "2");
2623
11
                test_command("MovePrevious", mathml, "2");
2624
11
                test_command("ZoomOutAll", mathml, "contents");
2625
11
            });
2626
2627
11
            set_preference("NavMode", "Character").unwrap();
2628
11
            MATHML_INSTANCE.with(|package_instance| {
2629
11
                let package_instance = package_instance.borrow();
2630
11
                let mathml = get_element(&package_instance);
2631
11
                test_command("ZoomIn", mathml, "2");
2632
11
                test_command("MoveNext", mathml, "x");
2633
11
                test_command("MoveNext", mathml, "3");
2634
11
                test_command("MoveNext", mathml, "plus");
2635
11
                test_command("MovePrevious", mathml, "3");
2636
11
                test_command("MovePrevious", mathml, "x");
2637
11
                test_command("MovePrevious", mathml, "2");
2638
11
                test_command("MovePrevious", mathml, "2");
2639
11
            });
2640
            
2641
            // simple sanity check that "overview.yaml" doesn't have a syntax error
2642
11
            set_preference("Overview", "True").unwrap();
2643
11
            set_preference("NavMode", "Character").unwrap();
2644
11
            MATHML_INSTANCE.with(|package_instance| {
2645
11
                let package_instance = package_instance.borrow();
2646
11
                let mathml = get_element(&package_instance);
2647
11
                test_command("ZoomIn", mathml, "2");
2648
11
            });
2649
11
        }
2650
1
    }
2651
}
\ No newline at end of file diff --git a/docs/llvm-cov/html/coverage/home/runner/work/MathCAT/MathCAT/src/prefs.rs.html b/docs/llvm-cov/html/coverage/home/runner/work/MathCAT/MathCAT/src/prefs.rs.html index 6b1407ea..89e6beb6 100644 --- a/docs/llvm-cov/html/coverage/home/runner/work/MathCAT/MathCAT/src/prefs.rs.html +++ b/docs/llvm-cov/html/coverage/home/runner/work/MathCAT/MathCAT/src/prefs.rs.html @@ -1 +1 @@ -

Coverage Report

Created: 2026-04-30 05:51

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/home/runner/work/MathCAT/MathCAT/src/prefs.rs
Line
Count
Source
1
//! Preferences come from either the user or are programmatically set by the AT.
2
//! Either source can set any preference, but users and AT typically set different preferences.
3
//!
4
//! User prefs are read in from a YAML file (prefs.yaml). They can be written by hand.
5
//! In the future, there will hopefully be a nice UI that writes out the YAML file.
6
//!
7
//! AT prefs are set via the API given in the [crate::interface] module.
8
//! These in turn call [`PreferenceManager::set_string_pref`] and [`PreferenceManager::set_api_float_pref`].
9
//! Ultimately, user and api prefs are stored in a hashmap.
10
//!
11
//! Preferences can be found in a few places:
12
//! 1. Language-independent prefs found in the Rules dir
13
//! 2. Language-specific prefs
14
//! 3. Language-region-specific prefs
15
//! 
16
//! If there are multiple definitions, the later ones overwrite the former ones.
17
//! This means that region-specific variants will overwrite more general variants.
18
//!
19
//! Note: there are a number of public 'get_xxx' functions that really are meant to be public only to the [crate::speech] module as speech needs access
20
//! to the preferences to generate the speech.
21
#![allow(clippy::needless_return)]
22
use yaml_rust::{Yaml, YamlLoader};
23
use crate::pretty_print::yaml_to_string;
24
use crate::tts::TTS;
25
use std::cell::RefCell;
26
use std::rc::Rc;
27
use log::{debug, error, warn};
28
use std::path::{Path, PathBuf};
29
use std::sync::LazyLock;
30
use crate::speech::{as_str_checked, RulesFor, FileAndTime};
31
use std::collections::{HashMap, HashSet};
32
use phf::phf_set;
33
use crate::shim_filesystem::*;
34
use crate::errors::*;
35
36
/// Use to indicate preference not found with Preference::to_string()
37
pub static NO_PREFERENCE: &str = "\u{FFFF}";
38
39
3
static DEFAULT_LANG: LazyLock<Yaml> = LazyLock::new(|| Yaml::String("en".to_string()));
40
41
42
// Preferences are recorded here
43
/// Preferences are stored in a HashMap. It maps the name of the pref (a String) to its value (stored as YAML string/float)
44
pub type PreferenceHashMap = HashMap<String, Yaml>;
45
#[derive(Debug, Clone, Default)]
46
pub struct Preferences {
47
    prefs: PreferenceHashMap        // FIX: pub so can get at iterator, should add iterator to Preferences instead
48
}
49
50
use std::fmt;
51
impl fmt::Display for Preferences {
52
0
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
53
0
        let mut pref_vec: Vec<(&String, &Yaml)> = self.prefs.iter().collect();
54
0
        pref_vec.sort();
55
0
        for (name, value) in pref_vec {
56
0
            writeln!(f, "    {}: {}", name, yaml_to_string(value, 0))?;
57
        }
58
0
        return Ok(());
59
0
    }
60
}
61
62
impl Preferences{
63
    // default values needed in case nothing else gets set 
64
4.14k
    fn user_defaults() -> Preferences {
65
4.14k
        let mut prefs = PreferenceHashMap::with_capacity(39);
66
4.14k
        prefs.insert("Language".to_string(), Yaml::String("en".to_string()));
67
4.14k
        prefs.insert("LanguageAuto".to_string(), Yaml::String("".to_string()));     // illegal value so change will be recognized
68
4.14k
        prefs.insert("SpeechStyle".to_string(), Yaml::String("ClearSpeak".to_string()));
69
4.14k
        prefs.insert("Verbosity".to_string(), Yaml::String("Medium".to_string()));
70
4.14k
        prefs.insert("SpeechOverrides_CapitalLetters".to_string(), Yaml::String("".to_string())); // important for testing
71
4.14k
        prefs.insert("Blind".to_string(), Yaml::Boolean(true));
72
4.14k
        prefs.insert("MathRate".to_string(), Yaml::Real("100.0".to_string()));
73
4.14k
        prefs.insert("PauseFactor".to_string(), Yaml::Real("100.0".to_string()));
74
4.14k
        prefs.insert("NavMode".to_string(), Yaml::String("Enhanced".to_string()));
75
4.14k
        prefs.insert("Overview".to_string(), Yaml::Boolean(false));
76
4.14k
        prefs.insert("ResetOverView".to_string(), Yaml::Boolean(true));
77
4.14k
        prefs.insert("NavVerbosity".to_string(), Yaml::String("Verbose".to_string()));
78
4.14k
        prefs.insert("AutoZoomOut".to_string(), Yaml::Boolean(true));
79
4.14k
        prefs.insert("BrailleCode".to_string(), Yaml::String("Nemeth".to_string()));
80
4.14k
        prefs.insert("BrailleNavHighlight".to_string(), Yaml::String("EndPoints".to_string()));
81
4.14k
        prefs.insert("UEB_START_MODE".to_string(), Yaml::String("Grade2".to_string()));
82
4.14k
        prefs.insert("DecimalSeparators".to_string(), Yaml::String(".".to_string()));
83
4.14k
        prefs.insert("BlockSeparators".to_string(), Yaml::String(", \u{00A0}\u{202F}".to_string()));
84
    
85
4.14k
        return Preferences{ prefs };
86
4.14k
    }
87
88
    // default values needed in case nothing else gets set 
89
4.14k
    fn api_defaults() -> Preferences {
90
4.14k
        let mut prefs = PreferenceHashMap::with_capacity(19);
91
4.14k
        prefs.insert("TTS".to_string(), Yaml::String("none".to_string()));
92
4.14k
        prefs.insert("Pitch".to_string(), Yaml::Real("0.0".to_string()));
93
4.14k
        prefs.insert("Rate".to_string(), Yaml::Real("180.0".to_string()));
94
4.14k
        prefs.insert("Volume".to_string(), Yaml::Real("100.0".to_string()));
95
4.14k
        prefs.insert("Voice".to_string(), Yaml::String("none".to_string()));
96
4.14k
        prefs.insert("Gender".to_string(), Yaml::String("none".to_string()));
97
4.14k
        prefs.insert("Bookmark".to_string(), Yaml::Boolean(false));
98
4.14k
        prefs.insert("CapitalLetters_UseWord".to_string(), Yaml::Boolean(true));
99
4.14k
        prefs.insert("CapitalLetters_Pitch".to_string(), Yaml::Real("0.0".to_string()));
100
4.14k
        prefs.insert("CapitalLetters_Beep".to_string(), Yaml::Boolean(false));
101
4.14k
        prefs.insert("IntentErrorRecovery".to_string(), Yaml::String("IgnoreIntent".to_string()));    // also Error
102
4.14k
        prefs.insert("CheckRuleFiles".to_string(), Yaml::String(
103
4.14k
                    (if cfg!(target_family = "wasm") {
"None"0
} else {"Prefs"}).to_string())); // avoid checking for rule files being changed (40% speedup!) (All, Prefs, None)
104
4.14k
        return Preferences{ prefs };
105
4.14k
    }
106
107
4.14k
    fn read_prefs_file(file: &Path, mut base_prefs: Preferences) -> Result<Preferences> {
108
4.14k
        let file_name = file.to_str().unwrap();
109
        let docs;
110
4.14k
        match read_to_string_shim(file) {
111
0
            Err(e) => {
112
0
                bail!("Couldn't read file {}\n{}", file_name, e);
113
            },
114
4.14k
            Ok( file_contents) => {
115
4.14k
                match YamlLoader::load_from_str(&file_contents) {
116
0
                    Err(e) => {
117
0
                        bail!("Yaml parse error ('{}') in preference file {}.", e, file_name);
118
                    },
119
4.14k
                    Ok(d) => docs = d,
120
                }
121
122
            }
123
        }
124
4.14k
        if docs.len() != 1 {
125
0
            bail!("MathCAT: error in prefs file '{}'.\nFound {} 'documents' -- should only be 1.", file_name, docs.len());
126
4.14k
        }
127
128
4.14k
        let doc = &docs[0];
129
4.14k
        if cfg!(debug_assertions) {
130
4.14k
            verify_keys(doc, "Speech", file_name)
?0
;
131
4.14k
            verify_keys(doc, "Navigation", file_name)
?0
;
132
4.14k
            verify_keys(doc, "Braille", file_name)
?0
;
133
4.14k
            verify_keys(doc, "Other", file_name)
?0
;
134
0
        }
135
136
4.14k
        let prefs = &mut base_prefs.prefs;
137
4.14k
        add_prefs(prefs, &doc["Speech"], "", file_name);
138
4.14k
        add_prefs(prefs, &doc["Navigation"], "", file_name);
139
4.14k
        add_prefs(prefs, &doc["Braille"], "", file_name);
140
4.14k
        add_prefs(prefs, &doc["Other"], "", file_name);
141
4.14k
        return Ok( Preferences{ prefs: prefs.to_owned() } );
142
143
144
145
16.5k
        fn verify_keys(dict: &Yaml, key: &str, file_name: &str) -> Result<()> {
146
16.5k
            let prefs = &dict[key];
147
16.5k
            if prefs.is_badvalue() {
148
0
                bail!("Yaml error in file {}.\nDidn't find '{}' key.", file_name, key);
149
16.5k
            }
150
16.5k
            if prefs.as_hash().is_none() {
151
0
                bail!("Yaml error in file {}.\n'{}' key is not a dictionary. Value found is {}.",
152
0
                            file_name, key, yaml_to_string(dict, 1));
153
16.5k
            }
154
16.5k
            return Ok(());
155
16.5k
        }
156
157
41.4k
        fn add_prefs(map: &mut PreferenceHashMap, new_prefs: &Yaml, name_prefix: &str, file_name: &str) {
158
41.4k
            if new_prefs.is_badvalue() || new_prefs.is_null() || new_prefs.as_hash().is_none() {
159
0
                return;
160
41.4k
            }
161
41.4k
            let new_prefs = new_prefs.as_hash().unwrap();
162
302k
            for (yaml_name, yaml_value) in 
new_prefs41.4k
{
163
302k
                let name = as_str_checked(yaml_name);
164
302k
                if let Err(
e0
) = name {
165
0
                    error!("{}", e.context(
166
0
                        format!("name '{}' is not a string in file {}", yaml_to_string(yaml_name, 0), file_name)));
167
                } else {
168
302k
                    match yaml_value {
169
24.8k
                        Yaml::Hash(_) => add_prefs(map, yaml_value, &(name.unwrap().to_string() + "_"), file_name),
170
0
                        Yaml::Array(_) => error!("name '{}' has illegal array value {} in file '{}'",
171
0
                                                 yaml_to_string(yaml_name, 0), yaml_to_string(yaml_value, 0), file_name),
172
                        Yaml::String(_) | Yaml::Boolean(_) | Yaml::Integer(_) | Yaml::Real(_) => {
173
277k
                            let trimmed_name = name_prefix.to_string() + name.unwrap().trim();
174
277k
                            let mut yaml_value = yaml_value.to_owned();
175
277k
                            if let Some(
value236k
) = yaml_value.as_str() {
176
236k
                                yaml_value = Yaml::String(value.to_string());
177
236k
                            
}41.4k
178
277k
                            map.insert(trimmed_name, yaml_value);
179
                        },
180
0
                        _ => error!("name '{}' has illegal {:#?} value {} in file '{}'",
181
0
                                    yaml_to_string(yaml_name, 0), yaml_value, yaml_to_string(yaml_value, 0), file_name),
182
                    }
183
                }                  
184
            }
185
41.4k
        }
186
4.14k
    }
187
188
    #[allow(dead_code)]     // used in testing
189
0
    fn set_string_value(&mut self, name: &str, value: &str) {
190
0
        self.prefs.insert(name.to_string(), Yaml::String(value.trim().to_string()));
191
0
    }
192
193
    #[allow(dead_code)]     // used in testing
194
0
    fn set_bool_value(&mut self, name: &str, value: bool) {
195
0
        self.prefs.insert(name.to_string(), Yaml::Boolean(value));
196
0
    }
197
}
198
199
200
thread_local!{
201
    static DEFAULT_USER_PREFERENCES: Preferences = Preferences::user_defaults();
202
    static DEFAULT_API_PREFERENCES: Preferences = Preferences::api_defaults();
203
    static PREF_MANAGER: Rc<RefCell<PreferenceManager>> = 
204
            Rc::new( RefCell::new( PreferenceManager::default() ) );
205
206
}
207
208
/// PreferenceManager keeps track of user and api prefs along with current files
209
///
210
/// If one of the `FileAndTime` files changes while the program is running, the values will auto-update
211
/// Among other things, that means that a UI that changes a user pref will be reflected the next time someone gets speech, braille, etc.
212
//
213
// Note: I experimented with PREF_MANAGER being a Result<PreferenceManager> in the case of no rule files,
214
//   but it ended up being a mess (lots of unwrapping). Having a field is much cleaner.
215
//   Also note that if 'error' is not an empty string, SpeechRules can't work so using those requires a check.
216
#[derive(Debug, Default)]
217
pub struct PreferenceManager {
218
    rules_dir: PathBuf,                   // full path to rules dir
219
    error: String,                        // empty/default string if fields are set, otherwise error message
220
    user_prefs: Preferences,              // prefs that come from reading prefs.yaml (system and user locations)
221
    api_prefs: Preferences,               // prefs set by API calls (along with some defaults not in the user settings such as "pitch")
222
    sys_prefs_file: Option<FileAndTime>,  // the system prefs.yaml file
223
    user_prefs_file: Option<FileAndTime>, // the user prefs.yaml file
224
    intent: PathBuf,                      // the intent rule style file
225
    speech: PathBuf,                      // the speech rule style file
226
    overview: PathBuf,                    // the overview rule file
227
    navigation: PathBuf,                  // the navigation rule file
228
    speech_unicode: PathBuf,              // short unicode.yaml file
229
    speech_unicode_full: PathBuf,         // full unicode.yaml file
230
    speech_defs: PathBuf,                 // the definition.yaml file
231
    braille: PathBuf,                     // the braille rule file
232
    braille_unicode: PathBuf,             // short braille unicode file
233
    braille_unicode_full: PathBuf,        // full braille unicode file
234
    braille_defs: PathBuf,                // the definition.yaml file
235
}
236
237
238
impl fmt::Display for PreferenceManager {
239
0
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
240
0
        writeln!(f, "PreferenceManager:")?;
241
0
        if self.error.is_empty() {
242
0
            writeln!(f, "  not initialized!!! Error is {}", &self.error)?;
243
        } else {
244
0
            writeln!(f, "  user prefs:\n{}", self.user_prefs)?;
245
0
            writeln!(f, "  api prefs:\n{}", self.api_prefs)?;
246
0
            writeln!(f, "  style files: {:?}", self.speech.as_path())?;
247
0
            writeln!(f, "  unicode files: {:?}", self.speech_unicode.as_path())?;
248
0
            writeln!(f, "  intent files: {:?}", self.intent.as_path())?;
249
0
            writeln!(f, "  speech definition files: {:?}", self.speech_defs)?;
250
0
            writeln!(f, "  braille definition files: {:?}", self.braille_defs)?;
251
        }
252
0
        return Ok(());
253
0
    }
254
}
255
256
impl PreferenceManager {
257
    /// Initialize (the) PreferenceManager (a global var).
258
    /// 'rules_dir' is the path to "Rules" unless the env var MathCATRulesDir is set
259
    /// 
260
    /// If rules_dir is an empty PathBuf, the existing rules_dir is used (an error if it doesn't exist)
261
5.09k
    pub fn initialize(&mut self, rules_dir: PathBuf) -> Result<()> {
262
        #[cfg(not(feature = "include-zip"))]
263
5.09k
        let 
rules_dir5.09k
= match rules_dir.canonicalize() {
264
1
            Err(e) => bail!("set_rules_dir: could not canonicalize path {}: {}", rules_dir.display(), e),
265
5.09k
            Ok(rules_dir) =>  rules_dir,
266
        };
267
268
5.09k
        self.set_rules_dir(&rules_dir)
?0
;
269
5.09k
        self.set_preference_files()
?0
;
270
5.09k
        self.set_all_files(&rules_dir)
?0
;
271
5.09k
        return Ok( () );
272
        
273
5.09k
    }
274
275
60.3k
    pub fn get() -> Rc<RefCell<PreferenceManager>> {
276
60.3k
        return PREF_MANAGER.with( |pm| pm.clone() );
277
60.3k
    }
278
279
0
    pub fn get_error(&self) -> &str {
280
0
        return &self.error;
281
0
    }
282
283
    /// Return a `PreferenceHashMap` that is the merger of the api prefs onto the user prefs.
284
22.7k
    pub fn merge_prefs(&self) -> PreferenceHashMap {
285
22.7k
        let mut merged_prefs = self.user_prefs.prefs.clone();
286
22.7k
        merged_prefs.extend(self.api_prefs.prefs.clone());
287
22.7k
        return merged_prefs;
288
22.7k
    }
289
290
    /// Set the rules dir and return failure if it is a bad directory (non-existent, can't find all files, ...)
291
5.09k
    fn set_rules_dir(&mut self, rules_dir: &Path) -> Result<()> {
292
        // Fix: should make sure all files exists -- fail if not true
293
5.09k
        if !is_dir_shim(rules_dir) {
294
0
            bail!("Unable to find MathCAT Rules directory '{}'", rules_dir.to_string_lossy())
295
5.09k
        }
296
5.09k
        self.rules_dir = rules_dir.to_path_buf();
297
5.09k
        return Ok( () );
298
5.09k
    }
299
300
    /// Set the rules dir and return failure if it is a bad directory (non-existent, can't find all files, ...)
301
1
    pub fn get_rules_dir(&self) -> PathBuf {
302
        // Fix: should make sure rules_dir is set -- fail if not true
303
1
        return self.rules_dir.clone();
304
1
    }
305
306
    /// Read the preferences from the files (if not up to date) and set the preferences and preference files
307
    /// Returns failure if the files don't exist or have errors
308
20.3k
    pub fn set_preference_files(&mut self) -> Result<()> {
309
        // first, read in the preferences -- need to determine which files to read next
310
        // the prefs files are in the rules dir and the user dir; differs from other files
311
20.3k
        if self.api_prefs.prefs.is_empty() {
312
4.14k
            self.api_prefs = Preferences{ prefs: DEFAULT_API_PREFERENCES.with(|defaults| defaults.prefs.clone()) };
313
16.2k
        }
314
315
20.3k
        let should_update_system_prefs = self.sys_prefs_file.is_none() || 
!16.2k
self.sys_prefs_file16.2k
.as_ref().unwrap().is_up_to_date();
316
20.3k
        let should_update_user_prefs = self.user_prefs_file.is_none() || 
!16.2k
self.user_prefs_file16.2k
.as_ref().unwrap().is_up_to_date();
317
20.3k
        if !(should_update_system_prefs || 
should_update_user_prefs16.2k
) {
318
16.2k
            return Ok( () );            // no need to do anything else
319
4.14k
        }
320
321
4.14k
        let mut prefs = Preferences::default();
322
323
4.14k
        let mut system_prefs_file = self.rules_dir.to_path_buf();
324
4.14k
        system_prefs_file.push("prefs.yaml");
325
4.14k
        if is_file_shim(&system_prefs_file) {
326
4.14k
            let defaults = DEFAULT_USER_PREFERENCES.with(|defaults| defaults.clone());
327
4.14k
            prefs = Preferences::read_prefs_file(&system_prefs_file, defaults)
?0
;
328
4.14k
            self.sys_prefs_file = Some( FileAndTime::new_with_time(system_prefs_file.clone()) );
329
        } else {
330
0
            error!("MathCAT couldn't open file system preference file '{}'.\nUsing fallback defaults which may be inappropriate.",
331
0
                        system_prefs_file.to_str().unwrap());
332
        };
333
334
4.14k
        let mut user_prefs_file = dirs::config_dir();
335
4.14k
        if let Some(mut user_prefs_file_path_buf) = user_prefs_file {
336
4.14k
            user_prefs_file_path_buf.push("MathCAT/prefs.yaml");
337
4.14k
            if is_file_shim(&user_prefs_file_path_buf) {
338
0
                prefs = Preferences::read_prefs_file(&user_prefs_file_path_buf, prefs)?;
339
4.14k
            }
340
            // set the time otherwise keeps needing to do updates
341
4.14k
            self.user_prefs_file = Some( FileAndTime::new_with_time(user_prefs_file_path_buf.clone()) );
342
4.14k
            user_prefs_file = Some(user_prefs_file_path_buf);
343
0
        }
344
345
4.14k
        if prefs.prefs.is_empty() {
346
0
            let user_prefs_file_name = match user_prefs_file {
347
0
                None => "No user config directory".to_string(),
348
0
                Some(file) => file.to_string_lossy().to_string(),
349
            };
350
0
            bail!("Didn't find preferences in rule directory ('{}') or user directory ('{}')", &system_prefs_file.to_string_lossy(), user_prefs_file_name);
351
4.14k
        }
352
4.14k
        self.set_files_based_on_changes(&prefs)
?0
;
353
4.14k
        self.user_prefs = prefs;
354
355
        // set computed values for BLOCK_SEPARATORS and DECIMAL_SEPARATORS (a little messy about the language due immutable and mutable borrows)
356
4.14k
        let language = self.user_prefs.prefs.get("Language").unwrap_or(&DEFAULT_LANG).clone();
357
4.14k
        let language = language.as_str().unwrap();
358
4.14k
        self.set_separators(language)
?0
;
359
        
360
4.14k
        return Ok( () );
361
20.3k
    }
362
363
5.09k
    fn set_all_files(&mut self, rules_dir: &Path) -> Result<()> {
364
        // try to find ./Rules/lang/style.yaml and ./Rules/lang/style.yaml
365
        // we go through a series of fallbacks -- we try to maintain the language if possible
366
367
5.09k
        let language = self.pref_to_string("Language");
368
5.09k
        let language = if language.as_str() == "Auto" {
"en"4.14k
} else {
language.as_str()949
}; // avoid 'temp value dropped while borrowed' error
369
5.09k
        let language_dir = rules_dir.to_path_buf().join("Languages");
370
5.09k
        self.set_speech_files(&language_dir, language, None)
?0
; // also sets style file
371
372
5.09k
        let braille_code = self.pref_to_string("BrailleCode");
373
5.09k
        let braille_dir = rules_dir.to_path_buf().join("Braille");
374
5.09k
        self.set_braille_files(&braille_dir, &braille_code)
?0
;
375
5.09k
        return Ok(());
376
5.09k
    }
377
378
9.19k
    fn set_speech_files(&mut self, language_dir: &Path, language: &str, new_speech_style: Option<&str>) -> Result<()> {
379
9.19k
        PreferenceManager::unzip_files(language_dir, language, Some("en"))
?0
;
380
9.19k
        self.intent = PreferenceManager::find_file(language_dir, language, Some("en"), "intent.yaml")
?0
;
381
9.19k
        self.overview = PreferenceManager::find_file(language_dir, language, Some("en"), "overview.yaml")
?0
;
382
9.19k
        self.navigation = PreferenceManager::find_file(language_dir, language, Some("en"), "navigate.yaml")
?0
;
383
384
9.19k
        self.speech_unicode = PreferenceManager::find_file(language_dir, language, Some("en"), "unicode.yaml")
?0
;
385
9.19k
        self.speech_unicode_full = PreferenceManager::find_file(language_dir, language, Some("en"), "unicode-full.yaml")
?0
;
386
387
9.19k
        self.speech_defs = PreferenceManager::find_file(language_dir, language, Some("en"), "definitions.yaml")
?0
;
388
389
9.19k
        match new_speech_style {
390
0
            Some(style_name) => self.set_style_file(language_dir, language, style_name)?,
391
            // use the old style name if one isn't given
392
9.19k
            None => self.set_style_file(language_dir, language, &self.pref_to_string("SpeechStyle"))
?0
,
393
        }
394
9.19k
        return Ok( () );
395
9.19k
    }
396
397
10.7k
    fn set_style_file(&mut self, language_dir: &Path, language: &str, style_file_name: &str) -> Result<()> {
398
10.7k
        let style_file_name = style_file_name.to_string() + "_Rules.yaml";
399
10.7k
        self.speech = PreferenceManager::find_file(language_dir, language, Some("en"), &style_file_name)
?0
;
400
        // debug!("set_style_file: language_dir: {}, language: {}, style_file_name: {}, self.speech: {}",
401
        //        language_dir.display(), language, style_file_name, self.speech.display());
402
10.7k
        return Ok( () );
403
10.7k
    }
404
405
5.69k
    fn set_braille_files(&mut self, braille_rules_dir: &Path, braille_code_name: &str) -> Result<()> {
406
        // Fix: Currently the braille code and the directory it lives in have to have the same name
407
5.69k
        PreferenceManager::unzip_files(braille_rules_dir, braille_code_name, Some("UEB"))
?0
;
408
409
5.69k
        let braille_file = braille_code_name.to_string() + "_Rules.yaml";
410
411
5.69k
        self.braille = PreferenceManager::find_file(braille_rules_dir, braille_code_name, Some("UEB"), &(braille_file))
?0
;
412
413
5.69k
        self.braille_unicode = PreferenceManager::find_file(braille_rules_dir, braille_code_name, Some("UEB"), "unicode.yaml")
?0
;
414
5.69k
        self.braille_unicode_full = PreferenceManager::find_file(braille_rules_dir, braille_code_name, Some("UEB"), "unicode-full.yaml")
?0
;
415
416
5.69k
        self.braille_defs = PreferenceManager::find_file(braille_rules_dir, braille_code_name, Some("UEB"), "definitions.yaml")
?0
;
417
5.69k
        return Ok( () );
418
5.69k
    }
419
420
    /// If some preferences have changed, we may need to recompute other ones
421
    /// The key prefs are Language, SpeechStyle, and BrailleCode, along with DecimalSeparator
422
4.14k
    fn set_files_based_on_changes(&mut self, new_prefs: &Preferences) -> Result<()> {
423
4.14k
        let old_language = self.user_prefs.prefs.get("Language");       // not set if first time
424
4.14k
        if old_language.is_none() {
425
4.14k
            return Ok( () );            // if "Language" isn't set yet, nothing else is either -- first time through, so no updating needed.
426
0
        }
427
428
0
        let old_language = old_language.unwrap();
429
0
        let new_language = new_prefs.prefs.get("Language").unwrap();
430
0
        debug!("set_files_based_on_changes: old_language={old_language:?}, new_language={new_language:?}");
431
0
        if old_language != new_language {
432
0
            let language_dir = self.rules_dir.to_path_buf().join("Languages");
433
0
            self.set_speech_files(&language_dir, new_language.as_str().unwrap(), None)?;  // also sets style file
434
        } else {
435
0
            let old_speech_style = self.user_prefs.prefs.get("SpeechStyle").unwrap();
436
0
            let new_speech_style = new_prefs.prefs.get("SpeechStyle").unwrap();
437
0
            let language_dir = self.rules_dir.to_path_buf().join("Languages");
438
0
            if old_speech_style != new_speech_style {
439
0
                self.set_speech_files(&language_dir, new_language.as_str().unwrap(), new_speech_style.as_str())?;
440
0
            }
441
        }
442
443
0
        let old_braille_code = self.user_prefs.prefs.get("BrailleCode").unwrap();
444
0
        let new_braille_code = new_prefs.prefs.get("BrailleCode").unwrap();
445
0
        if old_braille_code != new_braille_code {
446
0
            let braille_code_dir = self.rules_dir.to_path_buf().join("Braille");
447
0
            self.set_braille_files(&braille_code_dir, new_braille_code.as_str().unwrap())?;  // also sets style file
448
0
        }
449
450
0
        return Ok( () );
451
4.14k
    }
452
453
    /// Unzip the files if needed
454
    /// Returns true if it unzipped them
455
41.3k
    pub fn unzip_files(path: &Path, lang: &str, default_lang: Option<&str>) -> Result<bool> {
456
        thread_local!{
457
            /// when a language/braille code dir is unzipped, it is recorded here
458
            static UNZIPPED_FILES: RefCell<HashSet<String>> = RefCell::new( HashSet::with_capacity(31));
459
        }
460
        // ignore regional subdirs
461
41.3k
        let dir = PreferenceManager::get_language_dir(path, lang, default_lang)
?0
;
462
41.3k
        let language = if dir.ends_with(lang) {
lang39.5k
} else {
dir.file_name().unwrap()1.76k
.to_str().unwrap()};
463
41.3k
        let zip_file_name = language.to_string() + ".zip";
464
41.3k
        let zip_file_path = dir.join(&zip_file_name);
465
41.3k
        let zip_file_string = zip_file_path.to_string_lossy().to_string();
466
        // debug!("unzip_files: dir: {}, zip_file_name: {}, zip_file_path: {}", dir.display(), zip_file_name, zip_file_string);
467
41.3k
        if UNZIPPED_FILES.with( |unzipped_files| unzipped_files.borrow().contains(&zip_file_string)) {
468
28.6k
            return Ok(false);
469
12.6k
        }
470
471
12.6k
        let 
result11.2k
= match zip_extract_shim(&dir, &zip_file_name) {
472
1.44k
            Err(e) => {
473
1.44k
                if lang.contains('-') {
474
                    // try again in parent dir of regional language
475
0
                    let language = lang.split_once('-').unwrap_or((lang, "")).0; // get the parent language
476
                    // debug!("unzip_files: trying again in parent language: {}", language);
477
0
                    PreferenceManager::unzip_files(path, language, default_lang)
478
0
                                                .with_context(|| format!("Couldn't open zip file {zip_file_string} in parent {language}: {e}."))?
479
                } else {
480
                    // maybe just regional dialects
481
1.44k
                    let mut regional_dirs = Vec::new();
482
1.44k
                    find_all_dirs_shim(&dir, &mut regional_dirs);
483
1.44k
                    for dir in regional_dirs {
484
                        // debug!("unzip_files: trying again in subdir: {}", dir.display());
485
1.44k
                        let language = format!("{}-{}", lang, dir.file_name().unwrap().to_str().unwrap());
486
1.44k
                        if let Ok(result) =PreferenceManager::unzip_files(path, &language, default_lang) {
487
1.44k
                            return Ok(result);
488
0
                        }
489
                    }
490
0
                    bail!("Couldn't open zip file {}: {}.", zip_file_string, e)
491
                }
492
            },
493
11.2k
            Ok(result) => {
494
11.2k
                result
495
            },
496
        };
497
498
11.2k
        UNZIPPED_FILES.with( |unzipped_files| unzipped_files.borrow_mut().insert(zip_file_string.clone()) );
499
        // debug!("  unzip_files: unzipped {} files from {}", result, &zip_file_string);
500
        // UNZIPPED_FILES.with( |unzipped_files| {
501
        //     debug!("unzip_files: unzipped_files: {:?}", unzipped_files.borrow());
502
        // });
503
        
504
11.2k
        return Ok(result);
505
41.3k
    }
506
507
    /// Set BlockSeparators and DecimalSeparators
508
    /// FIX: changing these values could change the parse, so we really should reparse the original expr, but that doesn't exist anymore (store the original string???)
509
    ///
510
    /// Note: DecimalSeparator is user-facing (can be Auto), DecimalSeparators is code-facing (always a char)
511
8.24k
    fn set_separators(&mut self, language_country: &str) -> Result<()> {
512
        // This list was generated from https://en.wikipedia.org/wiki/Decimal_separator#Countries_using_decimal_point
513
        // The countries were then mapped to language(s) using https://en.wikipedia.org/wiki/List_of_official_languages_by_country_and_territory
514
        // When a language was used in other countries that used a "," separator, the language+country is listed 
515
        //   Sometimes there are multiple languages used in a country -- they are all listed, sometimes with a country code
516
        // The country code isn't used when the language is used in smaller countries (i.e, when "." is more likely correct)
517
        //   This decision is sometimes a bit arbitrary
518
        //   For example, Swahili (sw) is used in: Democratic Republic of the Congo, Kenya, Rwanda, Tanzania, and Uganda.
519
        //   Of these, Kenya, Tanzania, and Uganda are listed as using "." and I include Swahili in the list below.
520
        static USE_DECIMAL_SEPARATOR: phf::Set<&str> = phf_set! {
521
            "en", "bn", "km", "el-cy", "tr-cy", "zh", "es-do", "ar", "es-sv", "es-gt", "es-hn", "hi", "as", "gu", "kn", "ks",
522
            "ml", "mr", "ne", "or", "pa", "sa", "sd", "ta", "te", "ur", "he", "ja", "sw", "ko", "de-li", "ms", "dv", "mt", "es-mx", "my",
523
            "af-na", "es-ni", "es-pa", "fil", "ms-sg", "si", "th",
524
            "es-419", // latin america
525
        };
526
        
527
8.24k
        let decimal_separator = self.pref_to_string("DecimalSeparator");
528
8.24k
        if !["Auto", ",", "."].contains(&decimal_separator.as_str()) {
529
2
            return Ok( () );
530
8.24k
        }
531
532
8.24k
        if language_country == "Auto" && 
decimal_separator == "Auto"4.14k
{
533
4.14k
            return Ok( () );        // "Auto" doesn't tell us anything -- we will get called again when Language is set
534
4.10k
        }
535
536
4.10k
        let language_country = language_country.to_ascii_lowercase();
537
4.10k
        let language_country = &language_country;
538
4.10k
        let mut lang_country_split = language_country.split('-');
539
4.10k
        let language = lang_country_split.next().unwrap_or("");
540
4.10k
        let country = lang_country_split.next().unwrap_or("");
541
4.10k
        let mut use_period = decimal_separator == ".";
542
4.10k
        if decimal_separator == "Auto" {
543
            // if we don't have a match for the lang-country, then just try lang
544
4.09k
            use_period = USE_DECIMAL_SEPARATOR.contains(language_country) || 
USE_DECIMAL_SEPARATOR2.34k
.
contains2.34k
(
language2.34k
);
545
2
        }
546
        // debug!("set_separators: use_period: {}", use_period);
547
4.10k
        self.user_prefs.prefs.insert("DecimalSeparators".to_string(), Yaml::String((if use_period {
"."1.99k
} else {
","2.10k
}).to_string()));
548
4.10k
        let mut block_separators =  (if use_period {
", \u{00A0}\u{202F}"1.99k
} else {
". \u{00A0}\u{202F}"2.10k
}).to_string();
549
4.10k
        if country == "ch" || country == "li" { // Switzerland and Liechtenstein also use ` as a block separator, at least in some cases
550
0
            block_separators.push('\'');
551
4.10k
        }
552
4.10k
        self.user_prefs.prefs.insert("BlockSeparators".to_string(), Yaml::String(block_separators));
553
4.10k
        return Ok( () );
554
8.24k
    }
555
556
557
    /// Find a file matching `file_name` by starting in the regional directory and looking to the language.
558
    /// If that fails, fall back to looking for the default repeating the same process -- something needs to be found or MathCAT crashes
559
88.6k
    fn find_file(rules_dir: &Path, lang: &str, default_lang: Option<&str>, file_name: &str) -> Result<PathBuf> {
560
        // rules_dir: is the root of the search
561
        //   to that we add the language dir(s)
562
        //   if file_name doesn't exist in the language dir(s), we try to find it in the default dir
563
        //   the exception to this is if it ends with _Rules.yaml, we look for other _Rules.yaml files
564
        // returns the location of the file_name found
565
566
        // start by trying to find a dir that exists
567
88.6k
        let lang_dir = PreferenceManager::get_language_dir(rules_dir, lang, default_lang)
?0
;
568
        // now find the file name in the dirs
569
        // we start with the deepest dir and walk back to towards Rules
570
88.6k
        let mut alternative_style_file = None;      // back up in case we don't find the target style in lang_dir
571
88.6k
        let looking_for_style_file = file_name.ends_with("_Rules.yaml");
572
108k
        for os_path in 
lang_dir.ancestors()88.6k
{ // ancestor returns self and ancestors
573
108k
            let path = PathBuf::from(os_path).join(file_name);
574
            // debug!("find_file: checking file: {}", path.to_string_lossy());
575
108k
            if is_file_shim(&path) {
576
                // we make an exception for definitions.yaml -- there a language specific checks for Hundreds, etc
577
88.4k
                if !(file_name == "definitions.yaml" && 
os_path14.8k
.
ends_with14.8k
("Rules")) {
578
                    // debug!("find_file -- found={}", path.to_string_lossy());
579
88.4k
                    return Ok(path);
580
2
                }
581
19.7k
            };
582
19.7k
            if looking_for_style_file && 
alternative_style_file991
.
is_none991
() &&
583
257
               let Ok(
alt_file_path249
) = find_any_style_file(os_path) {
584
249
                    // debug!("find_file: found alternative style file '{}'", alt_file_path.display());
585
249
                    alternative_style_file = Some(alt_file_path);
586
19.4k
                }
587
19.7k
            if os_path.ends_with("Rules") {
588
                // at root of Rules directory
589
256
                break;
590
19.4k
            }
591
        }
592
593
594
256
        if let Some(
result248
) = alternative_style_file {
595
            // debug!("find_file: found alternative_style_file '{}'", result.to_string_lossy());
596
248
            return Ok(result);     // found an alternative style file in the same lang dir
597
8
        }
598
599
        // try a subdir (regional dialect) of the language dir
600
8
        let mut regional_dirs = Vec::new();
601
8
        find_all_dirs_shim(&lang_dir, &mut regional_dirs);
602
8
        for dir in regional_dirs {
603
            // debug!("find_file: trying again in subdir: {}", dir.display());
604
            // debug!(" ... files found = {:?}", find_files_in_dir_that_ends_with_shim(&dir, file_name));
605
8
            if find_files_in_dir_that_ends_with_shim(&dir, ".yaml").contains(&file_name.to_string()) {
606
0
                let path = dir.join(file_name);
607
0
                if is_file_shim(&path) {
608
0
                    return Ok(path);
609
0
                }
610
8
            }
611
        }
612
613
8
        if let Some(default_lang) = default_lang {
614
            // try again with the default language (we're likely in trouble)
615
8
            return PreferenceManager::find_file(rules_dir, default_lang, None, file_name);
616
0
        }
617
        
618
        // We are done for -- MathCAT can't do anything without the required files!
619
0
        bail!("Wasn't able to find/read MathCAT required file in directory: {}\n\
620
               Initially looked in there for language specific directory: {}\n\
621
               Looking for file: {}",
622
0
            rules_dir.to_str().unwrap(), lang, file_name);
623
624
625
        /// try to find a xxx_Rules.yaml file -- returns an error if none is found ()
626
257
        fn find_any_style_file(path: &Path) -> Result<PathBuf> {    
627
            // try to find a xxx_Rules.yaml file
628
            // we find the first file because this is the deepest (most language specific) speech rule file
629
257
            let rule_files = find_files_in_dir_that_ends_with_shim(path, "_Rules.yaml");
630
257
            if rule_files.is_empty() {
631
8
                bail!{"didn't find file"};
632
            } else {
633
249
                return Ok( path.join(rule_files[0].clone()) );
634
            }
635
257
        }
636
88.6k
    }
637
638
129k
    fn get_language_dir(rules_dir: &Path, lang: &str, default_lang: Option<&str>) -> Result<PathBuf> {
639
        // return 'Rules/Language/fr', 'Rules/Language/en/gb', etc, if they exist.
640
        // fall back to main language, and then to default_dir if language dir doesn't exist
641
129k
        let mut full_path = rules_dir.to_path_buf();
642
129k
        full_path.push(lang.replace('-', std::path::MAIN_SEPARATOR_STR));
643
130k
        for parent in 
full_path.ancestors()129k
{
644
130k
            if parent == rules_dir {
645
0
                break;
646
130k
            } else if is_dir_shim(parent) {
647
129k
                return Ok(parent.to_path_buf());
648
24
            }
649
        }
650
651
        // didn't find the language -- try again with the default language
652
0
        match default_lang {
653
0
            Some(default_lang) => {
654
0
                warn!("Couldn't find rules for language {lang}, ");
655
0
                return PreferenceManager::get_language_dir(rules_dir, default_lang, None);
656
            },
657
            None => {
658
                // We are done for -- MathCAT can't do anything without the required files!
659
0
                bail!("Wasn't able to find/read directory for language {}\n
660
                        Wasn't able to find/read MathCAT default language directory: {}",
661
0
                        lang, rules_dir.join(default_lang.unwrap_or("")).as_os_str().to_str().unwrap());
662
            }
663
        }
664
129k
    }
665
666
    
667
    /// Return the speech rule style file locations.
668
15.3k
    pub fn get_rule_file(&self, name: &RulesFor) -> &Path {
669
15.3k
        if !self.error.is_empty() {
670
0
            panic!("Internal error: get_rule_file called on invalid PreferenceManager -- error message\n{}", &self.error);
671
15.3k
        };
672
673
15.3k
        let files = match name {
674
3.88k
            RulesFor::Intent => &self.intent,
675
9.03k
            RulesFor::Speech => &self.speech,
676
14
            RulesFor::OverView => &self.overview,
677
549
            RulesFor::Navigation => &self.navigation,
678
1.83k
            RulesFor::Braille => &self.braille,
679
        };
680
15.3k
        return files.as_path();
681
15.3k
    }
682
683
    /// Return the unicode.yaml file locations.
684
18.9k
    pub fn get_speech_unicode_file(&self) ->(&Path, &Path) {
685
18.9k
        if !self.error.is_empty() {
686
0
            panic!("Internal error: get_speech_unicode_file called on invalid PreferenceManager -- error message\n{}", &self.error);
687
18.9k
        };
688
18.9k
        return (self.speech_unicode.as_path(), self.speech_unicode_full.as_path());
689
18.9k
    }
690
691
    /// Return the unicode.yaml file locations.
692
3.92k
    pub fn get_braille_unicode_file(&self) -> (&Path, &Path) {
693
3.92k
        if !self.error.is_empty() {
694
0
            panic!("Internal error: get_braille_unicode_file called on invalid PreferenceManager -- error message\n{}", &self.error);
695
3.92k
        };
696
697
3.92k
        return (self.braille_unicode.as_path(), self.braille_unicode_full.as_path());
698
3.92k
    }
699
700
    /// Return the definitions.yaml file locations.
701
15.3k
    pub fn get_definitions_file(&self, use_speech_defs: bool) -> &Path {
702
15.3k
        if !self.error.is_empty() {
703
0
            panic!("Internal error: get_definitions_file called on invalid PreferenceManager -- error message\n{}", &self.error);
704
15.3k
        };
705
706
15.3k
        let defs_file = if use_speech_defs {
&self.speech_defs13.4k
} else {
&self.braille_defs1.83k
};
707
15.3k
        return defs_file;
708
15.3k
    }
709
710
    /// Return the TTS engine currently in use.
711
85.4k
    pub fn get_tts(&self) -> TTS {
712
85.4k
        if !self.error.is_empty() {
713
0
            panic!("Internal error: get_tts called on invalid PreferenceManager -- error message\n{}", &self.error);
714
85.4k
        };
715
716
85.4k
        return match self.pref_to_string("TTS").as_str().to_ascii_lowercase().as_str() {
717
85.4k
            "none" => TTS::None,
718
0
            "ssml" => TTS::SSML,
719
0
            "sapi5" => TTS::SAPI5,
720
            _ => {
721
0
                warn!("found unknown value for TTS: '{}'", self.pref_to_string("TTS").as_str());
722
0
                TTS::None
723
            }
724
        }
725
85.4k
    }
726
727
    /// Set the string-valued preference.
728
    /// 
729
    /// Note: changing the language, speech style, or braille code might fail if the files don't exist.
730
    ///   If this happens, the preference is not set and an error is returned.
731
    /// If "LanguageAuto" is set, we assume "Language" has already be checked to be "Auto"
732
16.2k
    pub fn set_string_pref(&mut self, key: &str, value: &str) -> Result<()> {
733
16.2k
        if !self.error.is_empty() {
734
0
            panic!("Internal error: set_string_pref called on invalid PreferenceManager -- error message\n{}", &self.error);
735
16.2k
        };
736
737
        // verify language, braille, and SpeechStyle because these are used as access into the file system
738
        // should be an ascii string with only letters, dashes, and underscores
739
16.2k
        if 
matches!9.97k
(key, "Language" |
"BrailleCode"11.2k
|
"SpeechStyle"9.85k
) &&
740
55.9k
           !
value.chars()9.97k
.
all9.97k
(|c| matches!(c,
'a'..='z'45.9k
|
'A'..='Z'9.63k
| '_' | '-')) {
741
3
            bail!("{} is an invalid value! Must contains only ascii letters, '_', or'-'", key);
742
16.2k
        }
743
        
744
        // don't do an update if the value hasn't changed
745
16.2k
        let mut is_user_pref = true;
746
16.2k
        if let Some(
pref_value61
) = self.api_prefs.prefs.get(key) {
747
61
            if pref_value.as_str().unwrap() != value {
748
59
                is_user_pref = false;
749
59
                self.reset_files_from_preference_change(key, value)
?0
;
750
2
            }
751
16.1k
        } else if let Some(pref_value) = self.user_prefs.prefs.get(key) {
752
16.1k
            if pref_value.as_str().unwrap() != value {
753
8.77k
                self.reset_files_from_preference_change(key, value)
?0
;
754
7.40k
            }
755
        } else {
756
0
            bail!("{} is an unknown MathCAT preference!", key);
757
        }
758
759
        // debug!("Setting ({}) {} to '{}'", if is_user_pref {"user"} else {"sys"}, key, value);
760
16.2k
        if is_user_pref {
761
            // a little messy about the DecimalSeparator due immutable and mutable borrows
762
16.1k
            let current_decimal_separator = self.user_prefs.prefs.get("DecimalSeparator").unwrap().clone();
763
16.1k
            let current_decimal_separator = current_decimal_separator.as_str().unwrap();
764
16.1k
            let is_decimal_separators_changed = key == "DecimalSeparator" && 
current_decimal_separator != value1.35k
;
765
16.1k
            let is_language_changed = key == "Language" && 
self.user_prefs.prefs5.02k
.
get5.02k
("Language").unwrap().as_str().unwrap() != value;
766
16.1k
            self.user_prefs.prefs.insert(key.to_string(), Yaml::String(value.to_string()));
767
16.1k
            if is_decimal_separators_changed || (current_decimal_separator == "Auto" && is_language_changed) {
768
                // a little messy about the language due immutable and mutable borrows)
769
4.08k
                let language = self.user_prefs.prefs.get("Language").unwrap_or(&DEFAULT_LANG).clone();
770
4.08k
                let language = language.as_str().unwrap();
771
4.08k
                self.set_separators(language)
?0
;
772
12.0k
            }
773
59
        } else {
774
59
            self.api_prefs.prefs.insert(key.to_string(), Yaml::String(value.to_string()));
775
59
        }
776
16.2k
        return Ok( () );
777
16.2k
    }
778
779
30.2k
    fn reset_files_from_preference_change(&mut self, changed_pref: &str, changed_value: &str) -> Result<()> {       
780
30.2k
        if changed_pref == "Language" && 
changed_value == "Auto"4.09k
{
781
            // Language must have had a non-Auto value -- set LanguageAuto to old value so (probable) next change to LanguageAuto works well
782
0
            self.api_prefs.prefs.insert("LanguageAuto".to_string(),
783
0
                                self.api_prefs.prefs.get("Language").unwrap_or(&DEFAULT_LANG).clone() );
784
0
            return Ok( () );
785
30.2k
        }
786
787
30.2k
        let changed_pref = if changed_pref == "LanguageAuto" {
"Language"0
} else {changed_pref};
788
30.2k
        let language_dir = self.rules_dir.to_path_buf().join("Languages");
789
30.2k
        match changed_pref {
790
30.2k
            "Language" => {
791
4.09k
                self.set_speech_files(&language_dir, changed_value, None)
?0
792
            },
793
26.1k
            "SpeechStyle" => {
794
1.51k
                let language = self.pref_to_string("Language");
795
1.51k
                let language = if language.as_str() == "Auto" {
"en"62
} else {
language.as_str()1.45k
}; // avoid 'temp value dropped while borrowed' error
796
1.51k
                self.set_style_file(&language_dir, language, changed_value)
?0
797
            },
798
24.6k
            "BrailleCode" => {
799
601
                let braille_dir = self.rules_dir.to_path_buf().join("Braille");
800
601
                self.set_braille_files(&braille_dir, changed_value)
?0
801
            },
802
24.0k
            _ => (),
803
        }
804
30.2k
        return Ok( () );
805
30.2k
    }
806
807
    /// Set the number-valued preference.
808
    /// All number-valued preferences are stored with type `f64`.
809
0
    pub fn set_api_float_pref(&mut self, key: &str, value: f64) {
810
0
        if !self.error.is_empty() {
811
0
            panic!("Internal error: set_api_float_pref called on invalid PreferenceManager -- error message\n{}", &self.error);
812
0
        };
813
814
0
        self.api_prefs.prefs.insert(key.to_string(), Yaml::Real(value.to_string()));
815
0
    }
816
817
1.50k
    pub fn set_api_boolean_pref(&mut self, key: &str, value: bool) {
818
1.50k
        if !self.error.is_empty() {
819
0
            panic!("Internal error: set_api_boolean_pref called on invalid PreferenceManager -- error message\n{}", &self.error);
820
1.50k
        };
821
822
1.50k
        self.api_prefs.prefs.insert(key.to_string(), Yaml::Boolean(value));
823
1.50k
    }
824
825
    /// Return the current speech rate.
826
0
    pub fn get_rate(&self) -> f64 {
827
0
        if !self.error.is_empty() {
828
0
            panic!("Internal error: get_rate called on invalid PreferenceManager -- error message\n{}", &self.error);
829
0
        };
830
831
0
        return match &self.pref_to_string("Rate").parse::<f64>() {
832
0
            Ok(val) => *val,
833
            Err(_) => {
834
0
                warn!("Rate ('{}') can't be converted to a floating point number", &self.pref_to_string("Rate"));
835
0
                DEFAULT_API_PREFERENCES.with(|defaults| defaults.prefs["Rate"].as_f64().unwrap())
836
            }
837
        };
838
0
    }
839
840
0
    pub fn get_api_prefs(&self) -> &Preferences {
841
0
        return &self.api_prefs;
842
0
    }
843
844
    /// returns value associated with 'name' or string NO_PREFERENCE
845
    /// 
846
    /// Note: Option/Result not used because most of the time we know the preference exists, so no unwrapping is needed for 95% of calls
847
268k
    pub fn pref_to_string(&self, name: &str) -> String {
848
268k
        let mut value = self.api_prefs.prefs.get(name);
849
268k
        if value.is_none() {
850
122k
            value = self.user_prefs.prefs.get(name);
851
145k
        }
852
268k
        return match value {
853
11
            None => NO_PREFERENCE.to_string(),
854
268k
            Some(v) => match v {
855
236k
                Yaml::String(s) => s.clone(),
856
27.3k
                Yaml::Boolean(b)   => b.to_string(),
857
4.23k
                Yaml::Integer(i)    => i.to_string(),
858
0
                Yaml::Real(s) => s.clone(),
859
0
                _  => NO_PREFERENCE.to_string(),       // shouldn't happen
860
            }
861
        }
862
268k
    }
863
864
    // occasionally useful to check a pref value when debugging
865
    // fn get_pref(&self, pref_name: &str) -> String {
866
    //     return yaml_to_string(self.user_prefs.prefs.get(pref_name).unwrap(), 1);
867
    // }
868
869
    /// Warning!!! This is meant for testing only -- it overwrites any values from a user pref file and will be overwritten if the file is reread.
870
    ///  set_preference() is the function that should be called.
871
    /// This differs from set_preference in that the user preferences are changed, not the api ones
872
21.4k
    pub fn set_user_prefs(&mut self, key: &str, value: &str) -> Result<()> {
873
21.4k
        if !self.error.is_empty() {
874
0
            panic!("Internal error: set_user_prefs called on invalid PreferenceManager -- error message\n{}", &self.error);
875
21.4k
        };
876
        
877
21.4k
        self.reset_files_from_preference_change(key, value)
?0
;
878
21.4k
        let is_decimal_separators_changed = key == "DecimalSeparator" && 
self.user_prefs.prefs3.46k
.
get3.46k
("DecimalSeparator").unwrap().as_str().unwrap() != value;
879
21.4k
        let is_language_changed = key == "Language" && 
self.user_prefs.prefs13
.
get13
("Language").unwrap().as_str().unwrap() != value;
880
21.4k
        self.user_prefs.prefs.insert(key.to_string(), Yaml::String(value.to_string()));
881
21.4k
        if is_decimal_separators_changed || 
is_language_changed21.4k
{
882
            // set computed values for BLOCK_SEPARATORS and DECIMAL_SEPARATORS (a little messy about the language due immutable and mutable borrows)
883
17
            let language = self.user_prefs.prefs.get("Language").unwrap_or(&DEFAULT_LANG).clone();
884
17
            let language = language.as_str().unwrap();
885
17
            self.set_separators(language)
?0
;
886
21.3k
        }
887
888
21.4k
        return Ok(());
889
21.4k
    }
890
}
891
892
893
#[cfg(test)]
894
mod tests {
895
    #[allow(unused_imports)]
896
    use crate::init_logger;
897
898
    // For these tests, it is assumed that there are Rules subdirs zz and zz/aa dir; there is no zz/ab
899
    // definitions.yaml is in Rules, zz, aa dirs
900
    // unicode.yaml is in zz
901
    // ClearSpeak_Rules.yaml is in zz
902
    // These files are NOT in the zipped up version -- hence the config
903
    use super::*;
904
905
    /// Version of abs_rules_dir_path that returns a PathBuf
906
11
    fn abs_rules_dir_path() -> PathBuf {
907
11
        return PathBuf::from(super::super::abs_rules_dir_path());
908
11
    }
909
    /// Return a relative path to Rules dir (ie, .../Rules/zz... returns zz/...)
910
    /// strip .../Rules from file path
911
33
    fn rel_path<'a>(rules_dir: &'a Path, path: &'a Path) -> &'a Path {
912
33
        let stripped_path = path.strip_prefix(rules_dir).unwrap();
913
33
        return stripped_path
914
33
    }
915
916
    #[test]
917
1
    fn separators() {
918
1
        PREF_MANAGER.with(|pref_manager| {
919
1
            let mut pref_manager = pref_manager.borrow_mut();
920
1
            pref_manager.initialize(abs_rules_dir_path()).unwrap();
921
1
            pref_manager.set_user_prefs("Language", "en").unwrap();
922
1
            pref_manager.set_user_prefs("DecimalSeparator", "Auto").unwrap();
923
1
            assert_eq!(&pref_manager.pref_to_string("DecimalSeparators"), ".");
924
1
            assert_eq!(&pref_manager.pref_to_string("BlockSeparators"), ", \u{00A0}\u{202F}");
925
926
1
            pref_manager.set_user_prefs("Language", "sv").unwrap();
927
1
            assert_eq!(&pref_manager.pref_to_string("DecimalSeparators"), ",");
928
1
            assert_eq!(&pref_manager.pref_to_string("BlockSeparators"), ". \u{00A0}\u{202F}");
929
930
            // test potentially ambiguous language (defaults to comma decimal separator)
931
1
            pref_manager.set_user_prefs("Language", "es").unwrap();
932
1
            assert_eq!(&pref_manager.pref_to_string("DecimalSeparators"), ",");
933
1
            assert_eq!(&pref_manager.pref_to_string("BlockSeparators"), ". \u{00A0}\u{202F}");
934
935
            // test country override
936
1
            pref_manager.set_user_prefs("Language", "es-mx").unwrap();
937
1
            assert_eq!(&pref_manager.pref_to_string("DecimalSeparators"), ".");
938
1
            assert_eq!(&pref_manager.pref_to_string("BlockSeparators"), ", \u{00A0}\u{202F}");
939
940
1
            pref_manager.set_user_prefs("DecimalSeparator", ",").unwrap();
941
1
            assert_eq!(&pref_manager.pref_to_string("DecimalSeparators"), ",");
942
1
            assert_eq!(&pref_manager.pref_to_string("BlockSeparators"), ". \u{00A0}\u{202F}");
943
944
1
            pref_manager.set_user_prefs("DecimalSeparator", ".").unwrap();
945
1
            assert_eq!(&pref_manager.pref_to_string("DecimalSeparators"), ".");
946
1
            assert_eq!(&pref_manager.pref_to_string("BlockSeparators"), ", \u{00A0}\u{202F}");
947
948
            // set to illegal value -- should leave values as before
949
1
            pref_manager.set_user_prefs("DecimalSeparator", ";").unwrap();
950
1
            assert_eq!(&pref_manager.pref_to_string("DecimalSeparators"), ".");
951
1
            assert_eq!(&pref_manager.pref_to_string("BlockSeparators"), ", \u{00A0}\u{202F}");
952
953
            // manual
954
1
            pref_manager.set_user_prefs("DecimalSeparators", ",").unwrap();
955
1
            pref_manager.set_user_prefs("BlockSeparators", " ").unwrap();
956
1
            pref_manager.set_user_prefs("DecimalSeparator", "None").unwrap();
957
1
            assert_eq!(&pref_manager.pref_to_string("DecimalSeparators"), ",");
958
1
            assert_eq!(&pref_manager.pref_to_string("BlockSeparators"), " ");
959
1
        });
960
1
    }
961
962
    #[test]
963
1
    fn find_simple_style() {
964
1
        PREF_MANAGER.with(|pref_manager| {
965
1
            let mut pref_manager = pref_manager.borrow_mut();
966
1
            pref_manager.initialize(abs_rules_dir_path()).unwrap();
967
1
            pref_manager.set_user_prefs("Language", "en").unwrap();
968
1
            pref_manager.set_user_prefs("SpeechStyle", "ClearSpeak").unwrap();
969
1
            assert_eq!(&pref_manager.pref_to_string("Language"), "en");
970
1
            assert_eq!(&pref_manager.pref_to_string("SpeechStyle"), "ClearSpeak");
971
1
            assert_eq!(rel_path(&pref_manager.rules_dir, pref_manager.speech.as_path()), PathBuf::from("Languages/en/ClearSpeak_Rules.yaml"));
972
1
        });
973
1
    }
974
975
cfg_if::cfg_if! {if #[cfg(not(feature = "include-zip"))] {  
976
    #[test]
977
1
    fn find_style_other_language() {
978
        // zz dir should have both ClearSpeak and SimpleSpeak styles
979
        // zz-aa dir should have only ClearSpeak style and unicode.yaml that includes the zz unicode but overrides "+"
980
1
        PREF_MANAGER.with(|pref_manager| {
981
1
            let mut pref_manager = pref_manager.borrow_mut();
982
1
            pref_manager.initialize(abs_rules_dir_path()).unwrap();
983
1
            pref_manager.set_user_prefs("Language", "en").unwrap();
984
1
            pref_manager.set_user_prefs("SpeechStyle", "SimpleSpeak").unwrap();
985
1
            assert_eq!(rel_path(&pref_manager.rules_dir, pref_manager.speech.as_path()), PathBuf::from("Languages/en/SimpleSpeak_Rules.yaml"));
986
987
1
            pref_manager.set_user_prefs("Language", "zz").unwrap();
988
1
            assert_eq!(&pref_manager.pref_to_string("Language"), "zz");
989
1
            assert_eq!(&pref_manager.pref_to_string("SpeechStyle"), "SimpleSpeak");
990
1
            assert_eq!(rel_path(&pref_manager.rules_dir, pref_manager.speech.as_path()), PathBuf::from("Languages/zz/SimpleSpeak_Rules.yaml"));
991
992
            // make sure language stays the same
993
1
            pref_manager.set_user_prefs("SpeechStyle", "ClearSpeak").unwrap();
994
1
            assert_eq!(&pref_manager.pref_to_string("SpeechStyle"), "ClearSpeak");
995
1
            assert_eq!(rel_path(&pref_manager.rules_dir, pref_manager.speech.as_path()), PathBuf::from("Languages/zz/ClearSpeak_Rules.yaml"));
996
997
            // make sure language stays the same
998
1
            pref_manager.set_user_prefs("SpeechStyle", "SimpleSpeak").unwrap();
999
1
            assert_eq!(&pref_manager.pref_to_string("SpeechStyle"), "SimpleSpeak");
1000
1
            assert_eq!(rel_path(&pref_manager.rules_dir, pref_manager.speech.as_path()), PathBuf::from("Languages/zz/SimpleSpeak_Rules.yaml"));
1001
1
        });
1002
1
    }
1003
1004
    #[test]
1005
1
    fn find_regional_overrides() {
1006
        // zz dir should have both ClearSpeak and SimpleSpeak styles
1007
        // zz-aa dir should have ClearSpeak style and unicode.yaml that includes the zz unicode but overrides "+"
1008
1
        PREF_MANAGER.with(|pref_manager| {
1009
1
            let mut pref_manager = pref_manager.borrow_mut();
1010
1
            pref_manager.initialize(abs_rules_dir_path()).unwrap();
1011
1
            pref_manager.set_user_prefs("SpeechStyle", "ClearSpeak").unwrap();
1012
1
            pref_manager.set_user_prefs("Language", "zz-aa").unwrap();
1013
1
            assert_eq!(rel_path(&pref_manager.rules_dir, pref_manager.speech.as_path()), PathBuf::from("Languages/zz/aa/ClearSpeak_Rules.yaml"));
1014
1015
1
            pref_manager.set_user_prefs("SpeechStyle", "SimpleSpeak").unwrap();
1016
1
            assert_eq!(rel_path(&pref_manager.rules_dir, pref_manager.speech.as_path()), PathBuf::from("Languages/zz/SimpleSpeak_Rules.yaml"));
1017
1
        });
1018
1
    }
1019
1020
    #[test]
1021
1
    fn find_style_no_sublanguage() {
1022
1
        PREF_MANAGER.with(|pref_manager| {
1023
1
            let mut pref_manager = pref_manager.borrow_mut();
1024
1
            pref_manager.initialize(abs_rules_dir_path()).unwrap();
1025
1
            pref_manager.set_user_prefs("SpeechStyle", "ClearSpeak").unwrap();
1026
1
            pref_manager.set_user_prefs("Language", "zz-ab").unwrap();
1027
1
            assert_eq!(rel_path(&pref_manager.rules_dir, pref_manager.speech.as_path()), PathBuf::from("Languages/zz/ClearSpeak_Rules.yaml"));
1028
1
        });
1029
1
    }
1030
1031
    #[test]
1032
1
    fn found_all_files() {
1033
1
        PREF_MANAGER.with(|pref_manager| {
1034
1
            let mut pref_manager = pref_manager.borrow_mut();
1035
1
            pref_manager.initialize(abs_rules_dir_path()).unwrap();
1036
1
            pref_manager.set_user_prefs("SpeechStyle", "ClearSpeak").unwrap();
1037
1
            pref_manager.set_user_prefs("Language", "zz-aa").unwrap();
1038
1
            pref_manager.set_user_prefs("BrailleCode", "UEB").unwrap();
1039
1
            assert_eq!(rel_path(&pref_manager.rules_dir, pref_manager.intent.as_path()), PathBuf::from("intent.yaml"));
1040
1
            assert_eq!(rel_path(&pref_manager.rules_dir, pref_manager.overview.as_path()), PathBuf::from("Languages/zz/overview.yaml"));
1041
1
            assert_eq!(rel_path(&pref_manager.rules_dir, pref_manager.speech_defs.as_path()), PathBuf::from("Languages/zz/aa/definitions.yaml"));
1042
1
            assert_eq!(rel_path(&pref_manager.rules_dir, pref_manager.speech.as_path()), PathBuf::from("Languages/zz/aa/ClearSpeak_Rules.yaml"));
1043
1
            assert_eq!(rel_path(&pref_manager.rules_dir, pref_manager.speech_unicode.as_path()), PathBuf::from("Languages/zz/aa/unicode.yaml"));
1044
1
            assert_eq!(rel_path(&pref_manager.rules_dir, pref_manager.speech_unicode_full.as_path()), PathBuf::from("Languages/zz/unicode-full.yaml"));
1045
1
            assert_eq!(rel_path(&pref_manager.rules_dir, pref_manager.braille.as_path()), PathBuf::from("Braille/UEB/UEB_Rules.yaml"));
1046
1
            assert_eq!(rel_path(&pref_manager.rules_dir, pref_manager.braille_unicode.as_path()), PathBuf::from("Braille/UEB/unicode.yaml"));
1047
1
            assert_eq!(rel_path(&pref_manager.rules_dir, pref_manager.braille_unicode_full.as_path()), PathBuf::from("Braille/UEB/unicode-full.yaml"));
1048
1
            assert_eq!(rel_path(&pref_manager.rules_dir, pref_manager.braille_defs.as_path()), PathBuf::from("Braille/UEB/definitions.yaml"));
1049
    
1050
1
            pref_manager.set_user_prefs("Language", "zz-ab").unwrap();
1051
1
            assert_eq!(rel_path(&pref_manager.rules_dir, pref_manager.intent.as_path()), PathBuf::from("intent.yaml"));
1052
1
            assert_eq!(rel_path(&pref_manager.rules_dir, pref_manager.overview.as_path()), PathBuf::from("Languages/zz/overview.yaml"));
1053
1
            assert_eq!(rel_path(&pref_manager.rules_dir, pref_manager.speech_defs.as_path()), PathBuf::from("Languages/zz/definitions.yaml"));
1054
1
            assert_eq!(rel_path(&pref_manager.rules_dir, pref_manager.speech.as_path()), PathBuf::from("Languages/zz/ClearSpeak_Rules.yaml"));
1055
1
            assert_eq!(rel_path(&pref_manager.rules_dir, pref_manager.speech_unicode.as_path()), PathBuf::from("Languages/zz/unicode.yaml"));
1056
1
            assert_eq!(rel_path(&pref_manager.rules_dir, pref_manager.speech_unicode_full.as_path()), PathBuf::from("Languages/zz/unicode-full.yaml"));
1057
1
            assert_eq!(rel_path(&pref_manager.rules_dir, pref_manager.braille.as_path()), PathBuf::from("Braille/UEB/UEB_Rules.yaml"));
1058
1
            assert_eq!(rel_path(&pref_manager.rules_dir, pref_manager.braille_unicode.as_path()), PathBuf::from("Braille/UEB/unicode.yaml"));
1059
1
            assert_eq!(rel_path(&pref_manager.rules_dir, pref_manager.braille_unicode_full.as_path()), PathBuf::from("Braille/UEB/unicode-full.yaml"));
1060
1
            assert_eq!(rel_path(&pref_manager.rules_dir, pref_manager.braille_defs.as_path()), PathBuf::from("Braille/UEB/definitions.yaml"));
1061
1
        })
1062
1
    }
1063
1064
    #[test]
1065
1
    fn test_prefs() {
1066
1
        PREF_MANAGER.with(|pref_manager| {
1067
            // first test with internal settings
1068
            {
1069
1
                let mut pref_manager = pref_manager.borrow_mut();
1070
1
                pref_manager.initialize(abs_rules_dir_path()).unwrap();
1071
    
1072
1
                pref_manager.set_user_prefs("Language", "en").unwrap();
1073
1
                pref_manager.set_user_prefs("ClearSpeak_AbsoluteValue", "Determinant").unwrap();
1074
1
                pref_manager.set_user_prefs("ResetNavMode", "true").unwrap();
1075
1
                pref_manager.set_user_prefs("BrailleCode", "Nemeth").unwrap();
1076
1
                assert_eq!(pref_manager.pref_to_string("Language").as_str(), "en");
1077
1
                assert_eq!(pref_manager.pref_to_string("SubjectArea").as_str(), "General");
1078
1
                assert_eq!(pref_manager.pref_to_string("ClearSpeak_AbsoluteValue").as_str(), "Determinant");
1079
1
                assert_eq!(pref_manager.pref_to_string("ResetNavMode").as_str(), "true");
1080
1
                assert_eq!(pref_manager.pref_to_string("BrailleCode").as_str(), "Nemeth");
1081
1
                assert_eq!(pref_manager.pref_to_string("X_Y_Z").as_str(), NO_PREFERENCE);
1082
            }
1083
1084
            // now test with the interface
1085
            {
1086
                use crate::interface::{set_preference, get_preference};
1087
1
                set_preference("Language", "zz").unwrap();
1088
1
                set_preference("ClearSpeak_AbsoluteValue", "Cardinality").unwrap();
1089
1
                set_preference("Overview", "true").unwrap();
1090
1
                set_preference("BrailleCode", "UEB").unwrap();
1091
1
                assert_eq!(&get_preference("Language").unwrap(), "zz");
1092
1
                assert_eq!(&get_preference("ClearSpeak_AbsoluteValue").unwrap(), "Cardinality");
1093
1
                assert_eq!(&get_preference("Overview").unwrap(), "true");
1094
1
                assert_eq!(&get_preference("BrailleCode").unwrap(), "UEB");
1095
1
                assert!(&get_preference("X_Y_Z").is_err());
1096
1097
            }
1098
1
        });
1099
1
    }
1100
1101
    #[test]
1102
1
    fn test_language_change() {
1103
        // set_preference borrows the pref manager, so the previous borrow's lifetime needs to be ended before using it
1104
1
        PREF_MANAGER.with(|pref_manager| {
1105
1
            let mut pref_manager = pref_manager.borrow_mut();
1106
1
            pref_manager.initialize(abs_rules_dir_path()).unwrap();
1107
1
        });
1108
1
        crate::interface::set_preference("Language", "en").unwrap();
1109
1
        crate::interface::set_preference("SpeechStyle", "ClearSpeak").unwrap();
1110
1
        PREF_MANAGER.with(|pref_manager| {
1111
1
            let pref_manager = pref_manager.borrow_mut();
1112
1
            assert_eq!(rel_path(&pref_manager.rules_dir, pref_manager.get_rule_file(&RulesFor::Speech)), PathBuf::from("Languages/en/ClearSpeak_Rules.yaml"));
1113
1
        });
1114
1115
1
        crate::interface::set_preference("Language", "zz").unwrap();
1116
1
        PREF_MANAGER.with(|pref_manager| {
1117
1
            let pref_manager = pref_manager.borrow_mut();
1118
1
            assert_eq!(rel_path(&pref_manager.rules_dir, pref_manager.get_rule_file(&RulesFor::Speech)), PathBuf::from("Languages/zz/ClearSpeak_Rules.yaml"));
1119
1
        });
1120
1
    }
1121
    
1122
    #[test]
1123
1
    fn test_speech_style_change() {
1124
1
        PREF_MANAGER.with(|pref_manager| {
1125
1
            let mut pref_manager = pref_manager.borrow_mut();
1126
1
            pref_manager.initialize(abs_rules_dir_path()).unwrap();
1127
1
            pref_manager.set_user_prefs("Language", "en").unwrap();
1128
1
            pref_manager.set_user_prefs("SpeechStyle", "ClearSpeak").unwrap();
1129
1
            assert_eq!(rel_path(&pref_manager.rules_dir, pref_manager.get_rule_file(&RulesFor::Speech)), PathBuf::from("Languages/en/ClearSpeak_Rules.yaml"));
1130
1131
1
            pref_manager.set_user_prefs("SpeechStyle", "SimpleSpeak").unwrap();
1132
            
1133
1
            assert_eq!(rel_path(&pref_manager.rules_dir, pref_manager.get_rule_file(&RulesFor::Speech)), PathBuf::from("Languages/en/SimpleSpeak_Rules.yaml"));
1134
1
        });
1135
1
    }
1136
1137
    #[test]
1138
1
    fn test_some_changes() {
1139
1
        PREF_MANAGER.with(|pref_manager| {
1140
1
            let mut pref_manager = pref_manager.borrow_mut();
1141
1
            pref_manager.initialize(abs_rules_dir_path()).unwrap();
1142
1
            pref_manager.set_user_prefs("Verbosity", "Terse").unwrap();
1143
1144
1
            assert_eq!(&pref_manager.pref_to_string("Verbosity"), "Terse");
1145
1146
1
            pref_manager.set_user_prefs("BrailleCode", "UEB").unwrap();
1147
1
            assert_eq!(rel_path(&pref_manager.rules_dir, pref_manager.get_rule_file(&RulesFor::Braille)), PathBuf::from("Braille/UEB/UEB_Rules.yaml"));
1148
1149
            // make sure they show up when building context for speech generation
1150
1
            let merged_prefs = pref_manager.merge_prefs();
1151
1
            assert_eq!(merged_prefs.get("Verbosity").unwrap().as_str().unwrap(), "Terse");
1152
1
        });
1153
1154
1
        crate::interface::set_preference("NavVerbosity", "Terse").unwrap();
1155
1
        PREF_MANAGER.with(|pref_manager| {
1156
1
            let pref_manager = pref_manager.borrow_mut();
1157
1
            let merged_prefs = pref_manager.merge_prefs();
1158
1
            assert_eq!(merged_prefs.get("NavVerbosity").unwrap().as_str().unwrap(), "Terse");
1159
1
        });
1160
1
    }
1161
1162
    #[test]
1163
1
    fn test_illegal_pref_values() {
1164
1
        PREF_MANAGER.with(|pref_manager| {
1165
1
            let mut pref_manager = pref_manager.borrow_mut();
1166
1
            pref_manager.initialize(abs_rules_dir_path()).unwrap();
1167
1
            assert!(pref_manager.set_string_pref("Language", "../../../my/path").is_err());
1168
1
            assert!(pref_manager.set_string_pref("BrailleCode", "C:\\my\\path").is_err());
1169
1
            assert!(pref_manager.set_string_pref("SpeechStyle", "/my/path").is_err());
1170
1
        });
1171
1
    }
1172
1173
    #[test]
1174
    #[ignore]   // this is an ugly test for #262 -- it changes the prefs file and so is a bad thing in general
1175
0
    fn test_up_to_date() {
1176
        use std::fs;
1177
        use std::thread::sleep;
1178
        use std::time::Duration;
1179
        use crate::interface;
1180
0
        PREF_MANAGER.with(|pref_manager| {
1181
0
            let mut pref_manager = pref_manager.borrow_mut();
1182
0
            pref_manager.initialize(abs_rules_dir_path()).unwrap();
1183
0
            assert_eq!(&pref_manager.pref_to_string("SpeechStyle"), "ClearSpeak");
1184
0
            assert_eq!(rel_path(&pref_manager.rules_dir, pref_manager.speech.as_path()), PathBuf::from("Languages/zz/ClearSpeak_Rules.yaml"));
1185
0
        });
1186
0
        interface::set_mathml("<math><mo>+</mo><mn>10</mn></math>").unwrap();
1187
0
        assert_eq!(interface::get_spoken_text().unwrap(), "ClearSpeak positive from zz 10");
1188
        
1189
0
        let mut file_path = PathBuf::default();
1190
0
        let mut contents = vec![];
1191
0
        PREF_MANAGER.with(|pref_manager| {
1192
0
            let pref_manager = pref_manager.borrow();
1193
0
            if let Some(file_name) = pref_manager.user_prefs_file.as_ref().unwrap().debug_get_file() {
1194
0
                file_path = PathBuf::from(file_name);
1195
0
                contents = fs::read(&file_path).expect(&format!("Failed to write file {} during test", file_name));
1196
0
                let changed_contents = String::from_utf8(contents.clone()).unwrap()
1197
0
                                .replace("SpeechStyle: ClearSpeak", "SpeechStyle: SimpleSpeak");
1198
0
                fs::write(&file_path, changed_contents).unwrap();
1199
0
                sleep(Duration::from_millis(5));  // make sure the time changes enough to be recognized
1200
0
            }
1201
0
        });
1202
0
        assert_eq!(interface::get_spoken_text().unwrap(), "SimpleSpeak positive from zz 10");
1203
0
        fs::write(&file_path, contents).unwrap();
1204
1205
                // assert_eq!(&pref_manager.pref_to_string("SpeechStyle"), "SimpleSpeak");
1206
                // assert_eq!(rel_path(&pref_manager.rules_dir, pref_manager.speech.as_path()), PathBuf::from("Languages/zz/SimpleSpeak_Rules.yaml"));
1207
0
    }
1208
1209
}}
1210
}
\ No newline at end of file +

Coverage Report

Created: 2026-05-04 09:37

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/home/runner/work/MathCAT/MathCAT/src/prefs.rs
Line
Count
Source
1
//! Preferences come from either the user or are programmatically set by the AT.
2
//! Either source can set any preference, but users and AT typically set different preferences.
3
//!
4
//! User prefs are read in from a YAML file (prefs.yaml). They can be written by hand.
5
//! In the future, there will hopefully be a nice UI that writes out the YAML file.
6
//!
7
//! AT prefs are set via the API given in the [crate::interface] module.
8
//! These in turn call [`PreferenceManager::set_string_pref`] and [`PreferenceManager::set_api_float_pref`].
9
//! Ultimately, user and api prefs are stored in a hashmap.
10
//!
11
//! Preferences can be found in a few places:
12
//! 1. Language-independent prefs found in the Rules dir
13
//! 2. Language-specific prefs
14
//! 3. Language-region-specific prefs
15
//! 
16
//! If there are multiple definitions, the later ones overwrite the former ones.
17
//! This means that region-specific variants will overwrite more general variants.
18
//!
19
//! Note: there are a number of public 'get_xxx' functions that really are meant to be public only to the [crate::speech] module as speech needs access
20
//! to the preferences to generate the speech.
21
#![allow(clippy::needless_return)]
22
use yaml_rust::{Yaml, YamlLoader};
23
use crate::pretty_print::yaml_to_string;
24
use crate::tts::TTS;
25
use std::cell::RefCell;
26
use std::rc::Rc;
27
use log::{debug, error, warn};
28
use std::path::{Path, PathBuf};
29
use std::sync::LazyLock;
30
use crate::speech::{as_str_checked, RulesFor, FileAndTime};
31
use std::collections::{HashMap, HashSet};
32
use phf::phf_set;
33
use crate::shim_filesystem::*;
34
use crate::errors::*;
35
36
/// Use to indicate preference not found with Preference::to_string()
37
pub static NO_PREFERENCE: &str = "\u{FFFF}";
38
39
3
static DEFAULT_LANG: LazyLock<Yaml> = LazyLock::new(|| Yaml::String("en".to_string()));
40
41
42
// Preferences are recorded here
43
/// Preferences are stored in a HashMap. It maps the name of the pref (a String) to its value (stored as YAML string/float)
44
pub type PreferenceHashMap = HashMap<String, Yaml>;
45
#[derive(Debug, Clone, Default)]
46
pub struct Preferences {
47
    prefs: PreferenceHashMap        // FIX: pub so can get at iterator, should add iterator to Preferences instead
48
}
49
50
use std::fmt;
51
impl fmt::Display for Preferences {
52
0
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
53
0
        let mut pref_vec: Vec<(&String, &Yaml)> = self.prefs.iter().collect();
54
0
        pref_vec.sort();
55
0
        for (name, value) in pref_vec {
56
0
            writeln!(f, "    {}: {}", name, yaml_to_string(value, 0))?;
57
        }
58
0
        return Ok(());
59
0
    }
60
}
61
62
impl Preferences{
63
    // default values needed in case nothing else gets set 
64
4.14k
    fn user_defaults() -> Preferences {
65
4.14k
        let mut prefs = PreferenceHashMap::with_capacity(39);
66
4.14k
        prefs.insert("Language".to_string(), Yaml::String("en".to_string()));
67
4.14k
        prefs.insert("LanguageAuto".to_string(), Yaml::String("".to_string()));     // illegal value so change will be recognized
68
4.14k
        prefs.insert("SpeechStyle".to_string(), Yaml::String("ClearSpeak".to_string()));
69
4.14k
        prefs.insert("Verbosity".to_string(), Yaml::String("Medium".to_string()));
70
4.14k
        prefs.insert("SpeechOverrides_CapitalLetters".to_string(), Yaml::String("".to_string())); // important for testing
71
4.14k
        prefs.insert("Blind".to_string(), Yaml::Boolean(true));
72
4.14k
        prefs.insert("MathRate".to_string(), Yaml::Real("100.0".to_string()));
73
4.14k
        prefs.insert("PauseFactor".to_string(), Yaml::Real("100.0".to_string()));
74
4.14k
        prefs.insert("NavMode".to_string(), Yaml::String("Enhanced".to_string()));
75
4.14k
        prefs.insert("Overview".to_string(), Yaml::Boolean(false));
76
4.14k
        prefs.insert("ResetOverView".to_string(), Yaml::Boolean(true));
77
4.14k
        prefs.insert("NavVerbosity".to_string(), Yaml::String("Verbose".to_string()));
78
4.14k
        prefs.insert("AutoZoomOut".to_string(), Yaml::Boolean(true));
79
4.14k
        prefs.insert("BrailleCode".to_string(), Yaml::String("Nemeth".to_string()));
80
4.14k
        prefs.insert("BrailleNavHighlight".to_string(), Yaml::String("EndPoints".to_string()));
81
4.14k
        prefs.insert("UEB_START_MODE".to_string(), Yaml::String("Grade2".to_string()));
82
4.14k
        prefs.insert("DecimalSeparators".to_string(), Yaml::String(".".to_string()));
83
4.14k
        prefs.insert("BlockSeparators".to_string(), Yaml::String(", \u{00A0}\u{202F}".to_string()));
84
    
85
4.14k
        return Preferences{ prefs };
86
4.14k
    }
87
88
    // default values needed in case nothing else gets set 
89
4.14k
    fn api_defaults() -> Preferences {
90
4.14k
        let mut prefs = PreferenceHashMap::with_capacity(19);
91
4.14k
        prefs.insert("TTS".to_string(), Yaml::String("none".to_string()));
92
4.14k
        prefs.insert("Pitch".to_string(), Yaml::Real("0.0".to_string()));
93
4.14k
        prefs.insert("Rate".to_string(), Yaml::Real("180.0".to_string()));
94
4.14k
        prefs.insert("Volume".to_string(), Yaml::Real("100.0".to_string()));
95
4.14k
        prefs.insert("Voice".to_string(), Yaml::String("none".to_string()));
96
4.14k
        prefs.insert("Gender".to_string(), Yaml::String("none".to_string()));
97
4.14k
        prefs.insert("Bookmark".to_string(), Yaml::Boolean(false));
98
4.14k
        prefs.insert("CapitalLetters_UseWord".to_string(), Yaml::Boolean(true));
99
4.14k
        prefs.insert("CapitalLetters_Pitch".to_string(), Yaml::Real("0.0".to_string()));
100
4.14k
        prefs.insert("CapitalLetters_Beep".to_string(), Yaml::Boolean(false));
101
4.14k
        prefs.insert("IntentErrorRecovery".to_string(), Yaml::String("IgnoreIntent".to_string()));    // also Error
102
4.14k
        prefs.insert("CheckRuleFiles".to_string(), Yaml::String(
103
4.14k
                    (if cfg!(target_family = "wasm") {
"None"0
} else {"Prefs"}).to_string())); // avoid checking for rule files being changed (40% speedup!) (All, Prefs, None)
104
4.14k
        return Preferences{ prefs };
105
4.14k
    }
106
107
4.14k
    fn read_prefs_file(file: &Path, mut base_prefs: Preferences) -> Result<Preferences> {
108
4.14k
        let file_name = file.to_str().unwrap();
109
        let docs;
110
4.14k
        match read_to_string_shim(file) {
111
0
            Err(e) => {
112
0
                bail!("Couldn't read file {}\n{}", file_name, e);
113
            },
114
4.14k
            Ok( file_contents) => {
115
4.14k
                match YamlLoader::load_from_str(&file_contents) {
116
0
                    Err(e) => {
117
0
                        bail!("Yaml parse error ('{}') in preference file {}.", e, file_name);
118
                    },
119
4.14k
                    Ok(d) => docs = d,
120
                }
121
122
            }
123
        }
124
4.14k
        if docs.len() != 1 {
125
0
            bail!("MathCAT: error in prefs file '{}'.\nFound {} 'documents' -- should only be 1.", file_name, docs.len());
126
4.14k
        }
127
128
4.14k
        let doc = &docs[0];
129
4.14k
        if cfg!(debug_assertions) {
130
4.14k
            verify_keys(doc, "Speech", file_name)
?0
;
131
4.14k
            verify_keys(doc, "Navigation", file_name)
?0
;
132
4.14k
            verify_keys(doc, "Braille", file_name)
?0
;
133
4.14k
            verify_keys(doc, "Other", file_name)
?0
;
134
0
        }
135
136
4.14k
        let prefs = &mut base_prefs.prefs;
137
4.14k
        add_prefs(prefs, &doc["Speech"], "", file_name);
138
4.14k
        add_prefs(prefs, &doc["Navigation"], "", file_name);
139
4.14k
        add_prefs(prefs, &doc["Braille"], "", file_name);
140
4.14k
        add_prefs(prefs, &doc["Other"], "", file_name);
141
4.14k
        return Ok( Preferences{ prefs: prefs.to_owned() } );
142
143
144
145
16.5k
        fn verify_keys(dict: &Yaml, key: &str, file_name: &str) -> Result<()> {
146
16.5k
            let prefs = &dict[key];
147
16.5k
            if prefs.is_badvalue() {
148
0
                bail!("Yaml error in file {}.\nDidn't find '{}' key.", file_name, key);
149
16.5k
            }
150
16.5k
            if prefs.as_hash().is_none() {
151
0
                bail!("Yaml error in file {}.\n'{}' key is not a dictionary. Value found is {}.",
152
0
                            file_name, key, yaml_to_string(dict, 1));
153
16.5k
            }
154
16.5k
            return Ok(());
155
16.5k
        }
156
157
41.4k
        fn add_prefs(map: &mut PreferenceHashMap, new_prefs: &Yaml, name_prefix: &str, file_name: &str) {
158
41.4k
            if new_prefs.is_badvalue() || new_prefs.is_null() || new_prefs.as_hash().is_none() {
159
0
                return;
160
41.4k
            }
161
41.4k
            let new_prefs = new_prefs.as_hash().unwrap();
162
302k
            for (yaml_name, yaml_value) in 
new_prefs41.4k
{
163
302k
                let name = as_str_checked(yaml_name);
164
302k
                if let Err(
e0
) = name {
165
0
                    error!("{}", e.context(
166
0
                        format!("name '{}' is not a string in file {}", yaml_to_string(yaml_name, 0), file_name)));
167
                } else {
168
302k
                    match yaml_value {
169
24.8k
                        Yaml::Hash(_) => add_prefs(map, yaml_value, &(name.unwrap().to_string() + "_"), file_name),
170
0
                        Yaml::Array(_) => error!("name '{}' has illegal array value {} in file '{}'",
171
0
                                                 yaml_to_string(yaml_name, 0), yaml_to_string(yaml_value, 0), file_name),
172
                        Yaml::String(_) | Yaml::Boolean(_) | Yaml::Integer(_) | Yaml::Real(_) => {
173
277k
                            let trimmed_name = name_prefix.to_string() + name.unwrap().trim();
174
277k
                            let mut yaml_value = yaml_value.to_owned();
175
277k
                            if let Some(
value236k
) = yaml_value.as_str() {
176
236k
                                yaml_value = Yaml::String(value.to_string());
177
236k
                            
}41.4k
178
277k
                            map.insert(trimmed_name, yaml_value);
179
                        },
180
0
                        _ => error!("name '{}' has illegal {:#?} value {} in file '{}'",
181
0
                                    yaml_to_string(yaml_name, 0), yaml_value, yaml_to_string(yaml_value, 0), file_name),
182
                    }
183
                }                  
184
            }
185
41.4k
        }
186
4.14k
    }
187
188
    #[allow(dead_code)]     // used in testing
189
0
    fn set_string_value(&mut self, name: &str, value: &str) {
190
0
        self.prefs.insert(name.to_string(), Yaml::String(value.trim().to_string()));
191
0
    }
192
193
    #[allow(dead_code)]     // used in testing
194
0
    fn set_bool_value(&mut self, name: &str, value: bool) {
195
0
        self.prefs.insert(name.to_string(), Yaml::Boolean(value));
196
0
    }
197
}
198
199
200
thread_local!{
201
    static DEFAULT_USER_PREFERENCES: Preferences = Preferences::user_defaults();
202
    static DEFAULT_API_PREFERENCES: Preferences = Preferences::api_defaults();
203
    static PREF_MANAGER: Rc<RefCell<PreferenceManager>> = 
204
            Rc::new( RefCell::new( PreferenceManager::default() ) );
205
206
}
207
208
/// PreferenceManager keeps track of user and api prefs along with current files
209
///
210
/// If one of the `FileAndTime` files changes while the program is running, the values will auto-update
211
/// Among other things, that means that a UI that changes a user pref will be reflected the next time someone gets speech, braille, etc.
212
//
213
// Note: I experimented with PREF_MANAGER being a Result<PreferenceManager> in the case of no rule files,
214
//   but it ended up being a mess (lots of unwrapping). Having a field is much cleaner.
215
//   Also note that if 'error' is not an empty string, SpeechRules can't work so using those requires a check.
216
#[derive(Debug, Default)]
217
pub struct PreferenceManager {
218
    rules_dir: PathBuf,                   // full path to rules dir
219
    error: String,                        // empty/default string if fields are set, otherwise error message
220
    user_prefs: Preferences,              // prefs that come from reading prefs.yaml (system and user locations)
221
    api_prefs: Preferences,               // prefs set by API calls (along with some defaults not in the user settings such as "pitch")
222
    sys_prefs_file: Option<FileAndTime>,  // the system prefs.yaml file
223
    user_prefs_file: Option<FileAndTime>, // the user prefs.yaml file
224
    intent: PathBuf,                      // the intent rule style file
225
    speech: PathBuf,                      // the speech rule style file
226
    overview: PathBuf,                    // the overview rule file
227
    navigation: PathBuf,                  // the navigation rule file
228
    speech_unicode: PathBuf,              // short unicode.yaml file
229
    speech_unicode_full: PathBuf,         // full unicode.yaml file
230
    speech_defs: PathBuf,                 // the definition.yaml file
231
    braille: PathBuf,                     // the braille rule file
232
    braille_unicode: PathBuf,             // short braille unicode file
233
    braille_unicode_full: PathBuf,        // full braille unicode file
234
    braille_defs: PathBuf,                // the definition.yaml file
235
}
236
237
238
impl fmt::Display for PreferenceManager {
239
0
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
240
0
        writeln!(f, "PreferenceManager:")?;
241
0
        if self.error.is_empty() {
242
0
            writeln!(f, "  not initialized!!! Error is {}", &self.error)?;
243
        } else {
244
0
            writeln!(f, "  user prefs:\n{}", self.user_prefs)?;
245
0
            writeln!(f, "  api prefs:\n{}", self.api_prefs)?;
246
0
            writeln!(f, "  style files: {:?}", self.speech.as_path())?;
247
0
            writeln!(f, "  unicode files: {:?}", self.speech_unicode.as_path())?;
248
0
            writeln!(f, "  intent files: {:?}", self.intent.as_path())?;
249
0
            writeln!(f, "  speech definition files: {:?}", self.speech_defs)?;
250
0
            writeln!(f, "  braille definition files: {:?}", self.braille_defs)?;
251
        }
252
0
        return Ok(());
253
0
    }
254
}
255
256
impl PreferenceManager {
257
    /// Initialize (the) PreferenceManager (a global var).
258
    /// 'rules_dir' is the path to "Rules" unless the env var MathCATRulesDir is set
259
    /// 
260
    /// If rules_dir is an empty PathBuf, the existing rules_dir is used (an error if it doesn't exist)
261
5.09k
    pub fn initialize(&mut self, rules_dir: PathBuf) -> Result<()> {
262
        // Resolve the rules directory to an absolute, canonical path.
263
        // If canonicalize() fails (e.g., ACCESS_DENIED in containers), fall back to:
264
        //   - returning the path as-is if it is already absolute,
265
        //   - prepending the current working directory if it is relative.
266
        // Note: if current_dir() also fails, unwrap_or_default yields an empty PathBuf,
267
        //       and the result may remain relative.
268
        #[cfg(not(feature = "include-zip"))]
269
5.09k
        let rules_dir = match rules_dir.canonicalize() {
270
1
            Err(_e) => {
271
1
                if rules_dir.is_absolute() {
272
0
                    rules_dir
273
                } else {
274
1
                    std::env::current_dir()
275
1
                        .unwrap_or_default()
276
1
                        .join(&rules_dir)
277
                }
278
            },
279
5.09k
            Ok(rules_dir) =>  rules_dir,
280
        };
281
282
5.09k
        self.set_rules_dir(&rules_dir)
?1
;
283
5.09k
        self.set_preference_files()
?0
;
284
5.09k
        self.set_all_files(&rules_dir)
?0
;
285
5.09k
        return Ok( () );
286
        
287
5.09k
    }
288
289
60.3k
    pub fn get() -> Rc<RefCell<PreferenceManager>> {
290
60.3k
        return PREF_MANAGER.with( |pm| pm.clone() );
291
60.3k
    }
292
293
0
    pub fn get_error(&self) -> &str {
294
0
        return &self.error;
295
0
    }
296
297
    /// Return a `PreferenceHashMap` that is the merger of the api prefs onto the user prefs.
298
22.7k
    pub fn merge_prefs(&self) -> PreferenceHashMap {
299
22.7k
        let mut merged_prefs = self.user_prefs.prefs.clone();
300
22.7k
        merged_prefs.extend(self.api_prefs.prefs.clone());
301
22.7k
        return merged_prefs;
302
22.7k
    }
303
304
    /// Set the rules dir and return failure if it is a bad directory (non-existent, can't find all files, ...)
305
5.09k
    fn set_rules_dir(&mut self, rules_dir: &Path) -> Result<()> {
306
        // Fix: should make sure all files exists -- fail if not true
307
5.09k
        if !is_dir_shim(rules_dir) {
308
1
            bail!("Unable to find MathCAT Rules directory '{}'", rules_dir.to_string_lossy())
309
5.09k
        }
310
5.09k
        self.rules_dir = rules_dir.to_path_buf();
311
5.09k
        return Ok( () );
312
5.09k
    }
313
314
    /// Set the rules dir and return failure if it is a bad directory (non-existent, can't find all files, ...)
315
1
    pub fn get_rules_dir(&self) -> PathBuf {
316
        // Fix: should make sure rules_dir is set -- fail if not true
317
1
        return self.rules_dir.clone();
318
1
    }
319
320
    /// Read the preferences from the files (if not up to date) and set the preferences and preference files
321
    /// Returns failure if the files don't exist or have errors
322
20.3k
    pub fn set_preference_files(&mut self) -> Result<()> {
323
        // first, read in the preferences -- need to determine which files to read next
324
        // the prefs files are in the rules dir and the user dir; differs from other files
325
20.3k
        if self.api_prefs.prefs.is_empty() {
326
4.14k
            self.api_prefs = Preferences{ prefs: DEFAULT_API_PREFERENCES.with(|defaults| defaults.prefs.clone()) };
327
16.2k
        }
328
329
20.3k
        let should_update_system_prefs = self.sys_prefs_file.is_none() || 
!16.2k
self.sys_prefs_file16.2k
.as_ref().unwrap().is_up_to_date();
330
20.3k
        let should_update_user_prefs = self.user_prefs_file.is_none() || 
!16.2k
self.user_prefs_file16.2k
.as_ref().unwrap().is_up_to_date();
331
20.3k
        if !(should_update_system_prefs || 
should_update_user_prefs16.2k
) {
332
16.2k
            return Ok( () );            // no need to do anything else
333
4.14k
        }
334
335
4.14k
        let mut prefs = Preferences::default();
336
337
4.14k
        let mut system_prefs_file = self.rules_dir.to_path_buf();
338
4.14k
        system_prefs_file.push("prefs.yaml");
339
4.14k
        if is_file_shim(&system_prefs_file) {
340
4.14k
            let defaults = DEFAULT_USER_PREFERENCES.with(|defaults| defaults.clone());
341
4.14k
            prefs = Preferences::read_prefs_file(&system_prefs_file, defaults)
?0
;
342
4.14k
            self.sys_prefs_file = Some( FileAndTime::new_with_time(system_prefs_file.clone()) );
343
        } else {
344
0
            error!("MathCAT couldn't open file system preference file '{}'.\nUsing fallback defaults which may be inappropriate.",
345
0
                        system_prefs_file.to_str().unwrap());
346
        };
347
348
4.14k
        let mut user_prefs_file = dirs::config_dir();
349
4.14k
        if let Some(mut user_prefs_file_path_buf) = user_prefs_file {
350
4.14k
            user_prefs_file_path_buf.push("MathCAT/prefs.yaml");
351
4.14k
            if is_file_shim(&user_prefs_file_path_buf) {
352
0
                prefs = Preferences::read_prefs_file(&user_prefs_file_path_buf, prefs)?;
353
4.14k
            }
354
            // set the time otherwise keeps needing to do updates
355
4.14k
            self.user_prefs_file = Some( FileAndTime::new_with_time(user_prefs_file_path_buf.clone()) );
356
4.14k
            user_prefs_file = Some(user_prefs_file_path_buf);
357
0
        }
358
359
4.14k
        if prefs.prefs.is_empty() {
360
0
            let user_prefs_file_name = match user_prefs_file {
361
0
                None => "No user config directory".to_string(),
362
0
                Some(file) => file.to_string_lossy().to_string(),
363
            };
364
0
            bail!("Didn't find preferences in rule directory ('{}') or user directory ('{}')", &system_prefs_file.to_string_lossy(), user_prefs_file_name);
365
4.14k
        }
366
4.14k
        self.set_files_based_on_changes(&prefs)
?0
;
367
4.14k
        self.user_prefs = prefs;
368
369
        // set computed values for BLOCK_SEPARATORS and DECIMAL_SEPARATORS (a little messy about the language due immutable and mutable borrows)
370
4.14k
        let language = self.user_prefs.prefs.get("Language").unwrap_or(&DEFAULT_LANG).clone();
371
4.14k
        let language = language.as_str().unwrap();
372
4.14k
        self.set_separators(language)
?0
;
373
        
374
4.14k
        return Ok( () );
375
20.3k
    }
376
377
5.09k
    fn set_all_files(&mut self, rules_dir: &Path) -> Result<()> {
378
        // try to find ./Rules/lang/style.yaml and ./Rules/lang/style.yaml
379
        // we go through a series of fallbacks -- we try to maintain the language if possible
380
381
5.09k
        let language = self.pref_to_string("Language");
382
5.09k
        let language = if language.as_str() == "Auto" {
"en"4.14k
} else {
language.as_str()949
}; // avoid 'temp value dropped while borrowed' error
383
5.09k
        let language_dir = rules_dir.to_path_buf().join("Languages");
384
5.09k
        self.set_speech_files(&language_dir, language, None)
?0
; // also sets style file
385
386
5.09k
        let braille_code = self.pref_to_string("BrailleCode");
387
5.09k
        let braille_dir = rules_dir.to_path_buf().join("Braille");
388
5.09k
        self.set_braille_files(&braille_dir, &braille_code)
?0
;
389
5.09k
        return Ok(());
390
5.09k
    }
391
392
9.19k
    fn set_speech_files(&mut self, language_dir: &Path, language: &str, new_speech_style: Option<&str>) -> Result<()> {
393
9.19k
        PreferenceManager::unzip_files(language_dir, language, Some("en"))
?0
;
394
9.19k
        self.intent = PreferenceManager::find_file(language_dir, language, Some("en"), "intent.yaml")
?0
;
395
9.19k
        self.overview = PreferenceManager::find_file(language_dir, language, Some("en"), "overview.yaml")
?0
;
396
9.19k
        self.navigation = PreferenceManager::find_file(language_dir, language, Some("en"), "navigate.yaml")
?0
;
397
398
9.19k
        self.speech_unicode = PreferenceManager::find_file(language_dir, language, Some("en"), "unicode.yaml")
?0
;
399
9.19k
        self.speech_unicode_full = PreferenceManager::find_file(language_dir, language, Some("en"), "unicode-full.yaml")
?0
;
400
401
9.19k
        self.speech_defs = PreferenceManager::find_file(language_dir, language, Some("en"), "definitions.yaml")
?0
;
402
403
9.19k
        match new_speech_style {
404
0
            Some(style_name) => self.set_style_file(language_dir, language, style_name)?,
405
            // use the old style name if one isn't given
406
9.19k
            None => self.set_style_file(language_dir, language, &self.pref_to_string("SpeechStyle"))
?0
,
407
        }
408
9.19k
        return Ok( () );
409
9.19k
    }
410
411
10.7k
    fn set_style_file(&mut self, language_dir: &Path, language: &str, style_file_name: &str) -> Result<()> {
412
10.7k
        let style_file_name = style_file_name.to_string() + "_Rules.yaml";
413
10.7k
        self.speech = PreferenceManager::find_file(language_dir, language, Some("en"), &style_file_name)
?0
;
414
        // debug!("set_style_file: language_dir: {}, language: {}, style_file_name: {}, self.speech: {}",
415
        //        language_dir.display(), language, style_file_name, self.speech.display());
416
10.7k
        return Ok( () );
417
10.7k
    }
418
419
5.69k
    fn set_braille_files(&mut self, braille_rules_dir: &Path, braille_code_name: &str) -> Result<()> {
420
        // Fix: Currently the braille code and the directory it lives in have to have the same name
421
5.69k
        PreferenceManager::unzip_files(braille_rules_dir, braille_code_name, Some("UEB"))
?0
;
422
423
5.69k
        let braille_file = braille_code_name.to_string() + "_Rules.yaml";
424
425
5.69k
        self.braille = PreferenceManager::find_file(braille_rules_dir, braille_code_name, Some("UEB"), &(braille_file))
?0
;
426
427
5.69k
        self.braille_unicode = PreferenceManager::find_file(braille_rules_dir, braille_code_name, Some("UEB"), "unicode.yaml")
?0
;
428
5.69k
        self.braille_unicode_full = PreferenceManager::find_file(braille_rules_dir, braille_code_name, Some("UEB"), "unicode-full.yaml")
?0
;
429
430
5.69k
        self.braille_defs = PreferenceManager::find_file(braille_rules_dir, braille_code_name, Some("UEB"), "definitions.yaml")
?0
;
431
5.69k
        return Ok( () );
432
5.69k
    }
433
434
    /// If some preferences have changed, we may need to recompute other ones
435
    /// The key prefs are Language, SpeechStyle, and BrailleCode, along with DecimalSeparator
436
4.14k
    fn set_files_based_on_changes(&mut self, new_prefs: &Preferences) -> Result<()> {
437
4.14k
        let old_language = self.user_prefs.prefs.get("Language");       // not set if first time
438
4.14k
        if old_language.is_none() {
439
4.14k
            return Ok( () );            // if "Language" isn't set yet, nothing else is either -- first time through, so no updating needed.
440
0
        }
441
442
0
        let old_language = old_language.unwrap();
443
0
        let new_language = new_prefs.prefs.get("Language").unwrap();
444
0
        debug!("set_files_based_on_changes: old_language={old_language:?}, new_language={new_language:?}");
445
0
        if old_language != new_language {
446
0
            let language_dir = self.rules_dir.to_path_buf().join("Languages");
447
0
            self.set_speech_files(&language_dir, new_language.as_str().unwrap(), None)?;  // also sets style file
448
        } else {
449
0
            let old_speech_style = self.user_prefs.prefs.get("SpeechStyle").unwrap();
450
0
            let new_speech_style = new_prefs.prefs.get("SpeechStyle").unwrap();
451
0
            let language_dir = self.rules_dir.to_path_buf().join("Languages");
452
0
            if old_speech_style != new_speech_style {
453
0
                self.set_speech_files(&language_dir, new_language.as_str().unwrap(), new_speech_style.as_str())?;
454
0
            }
455
        }
456
457
0
        let old_braille_code = self.user_prefs.prefs.get("BrailleCode").unwrap();
458
0
        let new_braille_code = new_prefs.prefs.get("BrailleCode").unwrap();
459
0
        if old_braille_code != new_braille_code {
460
0
            let braille_code_dir = self.rules_dir.to_path_buf().join("Braille");
461
0
            self.set_braille_files(&braille_code_dir, new_braille_code.as_str().unwrap())?;  // also sets style file
462
0
        }
463
464
0
        return Ok( () );
465
4.14k
    }
466
467
    /// Unzip the files if needed
468
    /// Returns true if it unzipped them
469
41.3k
    pub fn unzip_files(path: &Path, lang: &str, default_lang: Option<&str>) -> Result<bool> {
470
        thread_local!{
471
            /// when a language/braille code dir is unzipped, it is recorded here
472
            static UNZIPPED_FILES: RefCell<HashSet<String>> = RefCell::new( HashSet::with_capacity(31));
473
        }
474
        // ignore regional subdirs
475
41.3k
        let dir = PreferenceManager::get_language_dir(path, lang, default_lang)
?0
;
476
41.3k
        let language = if dir.ends_with(lang) {
lang39.5k
} else {
dir.file_name().unwrap()1.76k
.to_str().unwrap()};
477
41.3k
        let zip_file_name = language.to_string() + ".zip";
478
41.3k
        let zip_file_path = dir.join(&zip_file_name);
479
41.3k
        let zip_file_string = zip_file_path.to_string_lossy().to_string();
480
        // debug!("unzip_files: dir: {}, zip_file_name: {}, zip_file_path: {}", dir.display(), zip_file_name, zip_file_string);
481
41.3k
        if UNZIPPED_FILES.with( |unzipped_files| unzipped_files.borrow().contains(&zip_file_string)) {
482
28.6k
            return Ok(false);
483
12.6k
        }
484
485
12.6k
        let 
result11.2k
= match zip_extract_shim(&dir, &zip_file_name) {
486
1.44k
            Err(e) => {
487
1.44k
                if lang.contains('-') {
488
                    // try again in parent dir of regional language
489
0
                    let language = lang.split_once('-').unwrap_or((lang, "")).0; // get the parent language
490
                    // debug!("unzip_files: trying again in parent language: {}", language);
491
0
                    PreferenceManager::unzip_files(path, language, default_lang)
492
0
                                                .with_context(|| format!("Couldn't open zip file {zip_file_string} in parent {language}: {e}."))?
493
                } else {
494
                    // maybe just regional dialects
495
1.44k
                    let mut regional_dirs = Vec::new();
496
1.44k
                    find_all_dirs_shim(&dir, &mut regional_dirs);
497
1.44k
                    for dir in regional_dirs {
498
                        // debug!("unzip_files: trying again in subdir: {}", dir.display());
499
1.44k
                        let language = format!("{}-{}", lang, dir.file_name().unwrap().to_str().unwrap());
500
1.44k
                        if let Ok(result) =PreferenceManager::unzip_files(path, &language, default_lang) {
501
1.44k
                            return Ok(result);
502
0
                        }
503
                    }
504
0
                    bail!("Couldn't open zip file {}: {}.", zip_file_string, e)
505
                }
506
            },
507
11.2k
            Ok(result) => {
508
11.2k
                result
509
            },
510
        };
511
512
11.2k
        UNZIPPED_FILES.with( |unzipped_files| unzipped_files.borrow_mut().insert(zip_file_string.clone()) );
513
        // debug!("  unzip_files: unzipped {} files from {}", result, &zip_file_string);
514
        // UNZIPPED_FILES.with( |unzipped_files| {
515
        //     debug!("unzip_files: unzipped_files: {:?}", unzipped_files.borrow());
516
        // });
517
        
518
11.2k
        return Ok(result);
519
41.3k
    }
520
521
    /// Set BlockSeparators and DecimalSeparators
522
    /// FIX: changing these values could change the parse, so we really should reparse the original expr, but that doesn't exist anymore (store the original string???)
523
    ///
524
    /// Note: DecimalSeparator is user-facing (can be Auto), DecimalSeparators is code-facing (always a char)
525
8.24k
    fn set_separators(&mut self, language_country: &str) -> Result<()> {
526
        // This list was generated from https://en.wikipedia.org/wiki/Decimal_separator#Countries_using_decimal_point
527
        // The countries were then mapped to language(s) using https://en.wikipedia.org/wiki/List_of_official_languages_by_country_and_territory
528
        // When a language was used in other countries that used a "," separator, the language+country is listed 
529
        //   Sometimes there are multiple languages used in a country -- they are all listed, sometimes with a country code
530
        // The country code isn't used when the language is used in smaller countries (i.e, when "." is more likely correct)
531
        //   This decision is sometimes a bit arbitrary
532
        //   For example, Swahili (sw) is used in: Democratic Republic of the Congo, Kenya, Rwanda, Tanzania, and Uganda.
533
        //   Of these, Kenya, Tanzania, and Uganda are listed as using "." and I include Swahili in the list below.
534
        static USE_DECIMAL_SEPARATOR: phf::Set<&str> = phf_set! {
535
            "en", "bn", "km", "el-cy", "tr-cy", "zh", "es-do", "ar", "es-sv", "es-gt", "es-hn", "hi", "as", "gu", "kn", "ks",
536
            "ml", "mr", "ne", "or", "pa", "sa", "sd", "ta", "te", "ur", "he", "ja", "sw", "ko", "de-li", "ms", "dv", "mt", "es-mx", "my",
537
            "af-na", "es-ni", "es-pa", "fil", "ms-sg", "si", "th",
538
            "es-419", // latin america
539
        };
540
        
541
8.24k
        let decimal_separator = self.pref_to_string("DecimalSeparator");
542
8.24k
        if !["Auto", ",", "."].contains(&decimal_separator.as_str()) {
543
2
            return Ok( () );
544
8.24k
        }
545
546
8.24k
        if language_country == "Auto" && 
decimal_separator == "Auto"4.14k
{
547
4.14k
            return Ok( () );        // "Auto" doesn't tell us anything -- we will get called again when Language is set
548
4.10k
        }
549
550
4.10k
        let language_country = language_country.to_ascii_lowercase();
551
4.10k
        let language_country = &language_country;
552
4.10k
        let mut lang_country_split = language_country.split('-');
553
4.10k
        let language = lang_country_split.next().unwrap_or("");
554
4.10k
        let country = lang_country_split.next().unwrap_or("");
555
4.10k
        let mut use_period = decimal_separator == ".";
556
4.10k
        if decimal_separator == "Auto" {
557
            // if we don't have a match for the lang-country, then just try lang
558
4.09k
            use_period = USE_DECIMAL_SEPARATOR.contains(language_country) || 
USE_DECIMAL_SEPARATOR2.34k
.
contains2.34k
(
language2.34k
);
559
2
        }
560
        // debug!("set_separators: use_period: {}", use_period);
561
4.10k
        self.user_prefs.prefs.insert("DecimalSeparators".to_string(), Yaml::String((if use_period {
"."1.99k
} else {
","2.10k
}).to_string()));
562
4.10k
        let mut block_separators =  (if use_period {
", \u{00A0}\u{202F}"1.99k
} else {
". \u{00A0}\u{202F}"2.10k
}).to_string();
563
4.10k
        if country == "ch" || country == "li" { // Switzerland and Liechtenstein also use ` as a block separator, at least in some cases
564
0
            block_separators.push('\'');
565
4.10k
        }
566
4.10k
        self.user_prefs.prefs.insert("BlockSeparators".to_string(), Yaml::String(block_separators));
567
4.10k
        return Ok( () );
568
8.24k
    }
569
570
571
    /// Find a file matching `file_name` by starting in the regional directory and looking to the language.
572
    /// If that fails, fall back to looking for the default repeating the same process -- something needs to be found or MathCAT crashes
573
88.6k
    fn find_file(rules_dir: &Path, lang: &str, default_lang: Option<&str>, file_name: &str) -> Result<PathBuf> {
574
        // rules_dir: is the root of the search
575
        //   to that we add the language dir(s)
576
        //   if file_name doesn't exist in the language dir(s), we try to find it in the default dir
577
        //   the exception to this is if it ends with _Rules.yaml, we look for other _Rules.yaml files
578
        // returns the location of the file_name found
579
580
        // start by trying to find a dir that exists
581
88.6k
        let lang_dir = PreferenceManager::get_language_dir(rules_dir, lang, default_lang)
?0
;
582
        // now find the file name in the dirs
583
        // we start with the deepest dir and walk back to towards Rules
584
88.6k
        let mut alternative_style_file = None;      // back up in case we don't find the target style in lang_dir
585
88.6k
        let looking_for_style_file = file_name.ends_with("_Rules.yaml");
586
108k
        for os_path in 
lang_dir.ancestors()88.6k
{ // ancestor returns self and ancestors
587
108k
            let path = PathBuf::from(os_path).join(file_name);
588
            // debug!("find_file: checking file: {}", path.to_string_lossy());
589
108k
            if is_file_shim(&path) {
590
                // we make an exception for definitions.yaml -- there a language specific checks for Hundreds, etc
591
88.4k
                if !(file_name == "definitions.yaml" && 
os_path14.8k
.
ends_with14.8k
("Rules")) {
592
                    // debug!("find_file -- found={}", path.to_string_lossy());
593
88.4k
                    return Ok(path);
594
2
                }
595
19.7k
            };
596
19.7k
            if looking_for_style_file && 
alternative_style_file991
.
is_none991
() &&
597
257
               let Ok(
alt_file_path249
) = find_any_style_file(os_path) {
598
249
                    // debug!("find_file: found alternative style file '{}'", alt_file_path.display());
599
249
                    alternative_style_file = Some(alt_file_path);
600
19.4k
                }
601
19.7k
            if os_path.ends_with("Rules") {
602
                // at root of Rules directory
603
256
                break;
604
19.4k
            }
605
        }
606
607
608
256
        if let Some(
result248
) = alternative_style_file {
609
            // debug!("find_file: found alternative_style_file '{}'", result.to_string_lossy());
610
248
            return Ok(result);     // found an alternative style file in the same lang dir
611
8
        }
612
613
        // try a subdir (regional dialect) of the language dir
614
8
        let mut regional_dirs = Vec::new();
615
8
        find_all_dirs_shim(&lang_dir, &mut regional_dirs);
616
8
        for dir in regional_dirs {
617
            // debug!("find_file: trying again in subdir: {}", dir.display());
618
            // debug!(" ... files found = {:?}", find_files_in_dir_that_ends_with_shim(&dir, file_name));
619
8
            if find_files_in_dir_that_ends_with_shim(&dir, ".yaml").contains(&file_name.to_string()) {
620
0
                let path = dir.join(file_name);
621
0
                if is_file_shim(&path) {
622
0
                    return Ok(path);
623
0
                }
624
8
            }
625
        }
626
627
8
        if let Some(default_lang) = default_lang {
628
            // try again with the default language (we're likely in trouble)
629
8
            return PreferenceManager::find_file(rules_dir, default_lang, None, file_name);
630
0
        }
631
        
632
        // We are done for -- MathCAT can't do anything without the required files!
633
0
        bail!("Wasn't able to find/read MathCAT required file in directory: {}\n\
634
               Initially looked in there for language specific directory: {}\n\
635
               Looking for file: {}",
636
0
            rules_dir.to_str().unwrap(), lang, file_name);
637
638
639
        /// try to find a xxx_Rules.yaml file -- returns an error if none is found ()
640
257
        fn find_any_style_file(path: &Path) -> Result<PathBuf> {    
641
            // try to find a xxx_Rules.yaml file
642
            // we find the first file because this is the deepest (most language specific) speech rule file
643
257
            let rule_files = find_files_in_dir_that_ends_with_shim(path, "_Rules.yaml");
644
257
            if rule_files.is_empty() {
645
8
                bail!{"didn't find file"};
646
            } else {
647
249
                return Ok( path.join(rule_files[0].clone()) );
648
            }
649
257
        }
650
88.6k
    }
651
652
129k
    fn get_language_dir(rules_dir: &Path, lang: &str, default_lang: Option<&str>) -> Result<PathBuf> {
653
        // return 'Rules/Language/fr', 'Rules/Language/en/gb', etc, if they exist.
654
        // fall back to main language, and then to default_dir if language dir doesn't exist
655
129k
        let mut full_path = rules_dir.to_path_buf();
656
129k
        full_path.push(lang.replace('-', std::path::MAIN_SEPARATOR_STR));
657
130k
        for parent in 
full_path.ancestors()129k
{
658
130k
            if parent == rules_dir {
659
0
                break;
660
130k
            } else if is_dir_shim(parent) {
661
129k
                return Ok(parent.to_path_buf());
662
24
            }
663
        }
664
665
        // didn't find the language -- try again with the default language
666
0
        match default_lang {
667
0
            Some(default_lang) => {
668
0
                warn!("Couldn't find rules for language {lang}, ");
669
0
                return PreferenceManager::get_language_dir(rules_dir, default_lang, None);
670
            },
671
            None => {
672
                // We are done for -- MathCAT can't do anything without the required files!
673
0
                bail!("Wasn't able to find/read directory for language {}\n
674
                        Wasn't able to find/read MathCAT default language directory: {}",
675
0
                        lang, rules_dir.join(default_lang.unwrap_or("")).as_os_str().to_str().unwrap());
676
            }
677
        }
678
129k
    }
679
680
    
681
    /// Return the speech rule style file locations.
682
15.3k
    pub fn get_rule_file(&self, name: &RulesFor) -> &Path {
683
15.3k
        if !self.error.is_empty() {
684
0
            panic!("Internal error: get_rule_file called on invalid PreferenceManager -- error message\n{}", &self.error);
685
15.3k
        };
686
687
15.3k
        let files = match name {
688
3.88k
            RulesFor::Intent => &self.intent,
689
9.03k
            RulesFor::Speech => &self.speech,
690
14
            RulesFor::OverView => &self.overview,
691
549
            RulesFor::Navigation => &self.navigation,
692
1.83k
            RulesFor::Braille => &self.braille,
693
        };
694
15.3k
        return files.as_path();
695
15.3k
    }
696
697
    /// Return the unicode.yaml file locations.
698
18.9k
    pub fn get_speech_unicode_file(&self) ->(&Path, &Path) {
699
18.9k
        if !self.error.is_empty() {
700
0
            panic!("Internal error: get_speech_unicode_file called on invalid PreferenceManager -- error message\n{}", &self.error);
701
18.9k
        };
702
18.9k
        return (self.speech_unicode.as_path(), self.speech_unicode_full.as_path());
703
18.9k
    }
704
705
    /// Return the unicode.yaml file locations.
706
3.92k
    pub fn get_braille_unicode_file(&self) -> (&Path, &Path) {
707
3.92k
        if !self.error.is_empty() {
708
0
            panic!("Internal error: get_braille_unicode_file called on invalid PreferenceManager -- error message\n{}", &self.error);
709
3.92k
        };
710
711
3.92k
        return (self.braille_unicode.as_path(), self.braille_unicode_full.as_path());
712
3.92k
    }
713
714
    /// Return the definitions.yaml file locations.
715
15.3k
    pub fn get_definitions_file(&self, use_speech_defs: bool) -> &Path {
716
15.3k
        if !self.error.is_empty() {
717
0
            panic!("Internal error: get_definitions_file called on invalid PreferenceManager -- error message\n{}", &self.error);
718
15.3k
        };
719
720
15.3k
        let defs_file = if use_speech_defs {
&self.speech_defs13.4k
} else {
&self.braille_defs1.83k
};
721
15.3k
        return defs_file;
722
15.3k
    }
723
724
    /// Return the TTS engine currently in use.
725
85.4k
    pub fn get_tts(&self) -> TTS {
726
85.4k
        if !self.error.is_empty() {
727
0
            panic!("Internal error: get_tts called on invalid PreferenceManager -- error message\n{}", &self.error);
728
85.4k
        };
729
730
85.4k
        return match self.pref_to_string("TTS").as_str().to_ascii_lowercase().as_str() {
731
85.4k
            "none" => TTS::None,
732
0
            "ssml" => TTS::SSML,
733
0
            "sapi5" => TTS::SAPI5,
734
            _ => {
735
0
                warn!("found unknown value for TTS: '{}'", self.pref_to_string("TTS").as_str());
736
0
                TTS::None
737
            }
738
        }
739
85.4k
    }
740
741
    /// Set the string-valued preference.
742
    /// 
743
    /// Note: changing the language, speech style, or braille code might fail if the files don't exist.
744
    ///   If this happens, the preference is not set and an error is returned.
745
    /// If "LanguageAuto" is set, we assume "Language" has already be checked to be "Auto"
746
16.2k
    pub fn set_string_pref(&mut self, key: &str, value: &str) -> Result<()> {
747
16.2k
        if !self.error.is_empty() {
748
0
            panic!("Internal error: set_string_pref called on invalid PreferenceManager -- error message\n{}", &self.error);
749
16.2k
        };
750
751
        // verify language, braille, and SpeechStyle because these are used as access into the file system
752
        // should be an ascii string with only letters, dashes, and underscores
753
16.2k
        if 
matches!9.97k
(key, "Language" |
"BrailleCode"11.2k
|
"SpeechStyle"9.85k
) &&
754
55.9k
           !
value.chars()9.97k
.
all9.97k
(|c| matches!(c,
'a'..='z'45.9k
|
'A'..='Z'9.63k
| '_' | '-')) {
755
3
            bail!("{} is an invalid value! Must contains only ascii letters, '_', or'-'", key);
756
16.2k
        }
757
        
758
        // don't do an update if the value hasn't changed
759
16.2k
        let mut is_user_pref = true;
760
16.2k
        if let Some(
pref_value61
) = self.api_prefs.prefs.get(key) {
761
61
            if pref_value.as_str().unwrap() != value {
762
59
                is_user_pref = false;
763
59
                self.reset_files_from_preference_change(key, value)
?0
;
764
2
            }
765
16.1k
        } else if let Some(pref_value) = self.user_prefs.prefs.get(key) {
766
16.1k
            if pref_value.as_str().unwrap() != value {
767
8.77k
                self.reset_files_from_preference_change(key, value)
?0
;
768
7.40k
            }
769
        } else {
770
0
            bail!("{} is an unknown MathCAT preference!", key);
771
        }
772
773
        // debug!("Setting ({}) {} to '{}'", if is_user_pref {"user"} else {"sys"}, key, value);
774
16.2k
        if is_user_pref {
775
            // a little messy about the DecimalSeparator due immutable and mutable borrows
776
16.1k
            let current_decimal_separator = self.user_prefs.prefs.get("DecimalSeparator").unwrap().clone();
777
16.1k
            let current_decimal_separator = current_decimal_separator.as_str().unwrap();
778
16.1k
            let is_decimal_separators_changed = key == "DecimalSeparator" && 
current_decimal_separator != value1.35k
;
779
16.1k
            let is_language_changed = key == "Language" && 
self.user_prefs.prefs5.02k
.
get5.02k
("Language").unwrap().as_str().unwrap() != value;
780
16.1k
            self.user_prefs.prefs.insert(key.to_string(), Yaml::String(value.to_string()));
781
16.1k
            if is_decimal_separators_changed || (current_decimal_separator == "Auto" && is_language_changed) {
782
                // a little messy about the language due immutable and mutable borrows)
783
4.08k
                let language = self.user_prefs.prefs.get("Language").unwrap_or(&DEFAULT_LANG).clone();
784
4.08k
                let language = language.as_str().unwrap();
785
4.08k
                self.set_separators(language)
?0
;
786
12.0k
            }
787
59
        } else {
788
59
            self.api_prefs.prefs.insert(key.to_string(), Yaml::String(value.to_string()));
789
59
        }
790
16.2k
        return Ok( () );
791
16.2k
    }
792
793
30.2k
    fn reset_files_from_preference_change(&mut self, changed_pref: &str, changed_value: &str) -> Result<()> {       
794
30.2k
        if changed_pref == "Language" && 
changed_value == "Auto"4.09k
{
795
            // Language must have had a non-Auto value -- set LanguageAuto to old value so (probable) next change to LanguageAuto works well
796
0
            self.api_prefs.prefs.insert("LanguageAuto".to_string(),
797
0
                                self.api_prefs.prefs.get("Language").unwrap_or(&DEFAULT_LANG).clone() );
798
0
            return Ok( () );
799
30.2k
        }
800
801
30.2k
        let changed_pref = if changed_pref == "LanguageAuto" {
"Language"0
} else {changed_pref};
802
30.2k
        let language_dir = self.rules_dir.to_path_buf().join("Languages");
803
30.2k
        match changed_pref {
804
30.2k
            "Language" => {
805
4.09k
                self.set_speech_files(&language_dir, changed_value, None)
?0
806
            },
807
26.1k
            "SpeechStyle" => {
808
1.51k
                let language = self.pref_to_string("Language");
809
1.51k
                let language = if language.as_str() == "Auto" {
"en"62
} else {
language.as_str()1.45k
}; // avoid 'temp value dropped while borrowed' error
810
1.51k
                self.set_style_file(&language_dir, language, changed_value)
?0
811
            },
812
24.6k
            "BrailleCode" => {
813
601
                let braille_dir = self.rules_dir.to_path_buf().join("Braille");
814
601
                self.set_braille_files(&braille_dir, changed_value)
?0
815
            },
816
24.0k
            _ => (),
817
        }
818
30.2k
        return Ok( () );
819
30.2k
    }
820
821
    /// Set the number-valued preference.
822
    /// All number-valued preferences are stored with type `f64`.
823
0
    pub fn set_api_float_pref(&mut self, key: &str, value: f64) {
824
0
        if !self.error.is_empty() {
825
0
            panic!("Internal error: set_api_float_pref called on invalid PreferenceManager -- error message\n{}", &self.error);
826
0
        };
827
828
0
        self.api_prefs.prefs.insert(key.to_string(), Yaml::Real(value.to_string()));
829
0
    }
830
831
1.50k
    pub fn set_api_boolean_pref(&mut self, key: &str, value: bool) {
832
1.50k
        if !self.error.is_empty() {
833
0
            panic!("Internal error: set_api_boolean_pref called on invalid PreferenceManager -- error message\n{}", &self.error);
834
1.50k
        };
835
836
1.50k
        self.api_prefs.prefs.insert(key.to_string(), Yaml::Boolean(value));
837
1.50k
    }
838
839
    /// Return the current speech rate.
840
0
    pub fn get_rate(&self) -> f64 {
841
0
        if !self.error.is_empty() {
842
0
            panic!("Internal error: get_rate called on invalid PreferenceManager -- error message\n{}", &self.error);
843
0
        };
844
845
0
        return match &self.pref_to_string("Rate").parse::<f64>() {
846
0
            Ok(val) => *val,
847
            Err(_) => {
848
0
                warn!("Rate ('{}') can't be converted to a floating point number", &self.pref_to_string("Rate"));
849
0
                DEFAULT_API_PREFERENCES.with(|defaults| defaults.prefs["Rate"].as_f64().unwrap())
850
            }
851
        };
852
0
    }
853
854
0
    pub fn get_api_prefs(&self) -> &Preferences {
855
0
        return &self.api_prefs;
856
0
    }
857
858
    /// returns value associated with 'name' or string NO_PREFERENCE
859
    /// 
860
    /// Note: Option/Result not used because most of the time we know the preference exists, so no unwrapping is needed for 95% of calls
861
268k
    pub fn pref_to_string(&self, name: &str) -> String {
862
268k
        let mut value = self.api_prefs.prefs.get(name);
863
268k
        if value.is_none() {
864
122k
            value = self.user_prefs.prefs.get(name);
865
145k
        }
866
268k
        return match value {
867
11
            None => NO_PREFERENCE.to_string(),
868
268k
            Some(v) => match v {
869
236k
                Yaml::String(s) => s.clone(),
870
27.3k
                Yaml::Boolean(b)   => b.to_string(),
871
4.23k
                Yaml::Integer(i)    => i.to_string(),
872
0
                Yaml::Real(s) => s.clone(),
873
0
                _  => NO_PREFERENCE.to_string(),       // shouldn't happen
874
            }
875
        }
876
268k
    }
877
878
    // occasionally useful to check a pref value when debugging
879
    // fn get_pref(&self, pref_name: &str) -> String {
880
    //     return yaml_to_string(self.user_prefs.prefs.get(pref_name).unwrap(), 1);
881
    // }
882
883
    /// Warning!!! This is meant for testing only -- it overwrites any values from a user pref file and will be overwritten if the file is reread.
884
    ///  set_preference() is the function that should be called.
885
    /// This differs from set_preference in that the user preferences are changed, not the api ones
886
21.4k
    pub fn set_user_prefs(&mut self, key: &str, value: &str) -> Result<()> {
887
21.4k
        if !self.error.is_empty() {
888
0
            panic!("Internal error: set_user_prefs called on invalid PreferenceManager -- error message\n{}", &self.error);
889
21.4k
        };
890
        
891
21.4k
        self.reset_files_from_preference_change(key, value)
?0
;
892
21.4k
        let is_decimal_separators_changed = key == "DecimalSeparator" && 
self.user_prefs.prefs3.46k
.
get3.46k
("DecimalSeparator").unwrap().as_str().unwrap() != value;
893
21.4k
        let is_language_changed = key == "Language" && 
self.user_prefs.prefs13
.
get13
("Language").unwrap().as_str().unwrap() != value;
894
21.4k
        self.user_prefs.prefs.insert(key.to_string(), Yaml::String(value.to_string()));
895
21.4k
        if is_decimal_separators_changed || 
is_language_changed21.4k
{
896
            // set computed values for BLOCK_SEPARATORS and DECIMAL_SEPARATORS (a little messy about the language due immutable and mutable borrows)
897
17
            let language = self.user_prefs.prefs.get("Language").unwrap_or(&DEFAULT_LANG).clone();
898
17
            let language = language.as_str().unwrap();
899
17
            self.set_separators(language)
?0
;
900
21.3k
        }
901
902
21.4k
        return Ok(());
903
21.4k
    }
904
}
905
906
907
#[cfg(test)]
908
mod tests {
909
    #[allow(unused_imports)]
910
    use crate::init_logger;
911
912
    // For these tests, it is assumed that there are Rules subdirs zz and zz/aa dir; there is no zz/ab
913
    // definitions.yaml is in Rules, zz, aa dirs
914
    // unicode.yaml is in zz
915
    // ClearSpeak_Rules.yaml is in zz
916
    // These files are NOT in the zipped up version -- hence the config
917
    use super::*;
918
919
    /// Version of abs_rules_dir_path that returns a PathBuf
920
11
    fn abs_rules_dir_path() -> PathBuf {
921
11
        return PathBuf::from(super::super::abs_rules_dir_path());
922
11
    }
923
    /// Return a relative path to Rules dir (ie, .../Rules/zz... returns zz/...)
924
    /// strip .../Rules from file path
925
33
    fn rel_path<'a>(rules_dir: &'a Path, path: &'a Path) -> &'a Path {
926
33
        let stripped_path = path.strip_prefix(rules_dir).unwrap();
927
33
        return stripped_path
928
33
    }
929
930
    #[test]
931
1
    fn separators() {
932
1
        PREF_MANAGER.with(|pref_manager| {
933
1
            let mut pref_manager = pref_manager.borrow_mut();
934
1
            pref_manager.initialize(abs_rules_dir_path()).unwrap();
935
1
            pref_manager.set_user_prefs("Language", "en").unwrap();
936
1
            pref_manager.set_user_prefs("DecimalSeparator", "Auto").unwrap();
937
1
            assert_eq!(&pref_manager.pref_to_string("DecimalSeparators"), ".");
938
1
            assert_eq!(&pref_manager.pref_to_string("BlockSeparators"), ", \u{00A0}\u{202F}");
939
940
1
            pref_manager.set_user_prefs("Language", "sv").unwrap();
941
1
            assert_eq!(&pref_manager.pref_to_string("DecimalSeparators"), ",");
942
1
            assert_eq!(&pref_manager.pref_to_string("BlockSeparators"), ". \u{00A0}\u{202F}");
943
944
            // test potentially ambiguous language (defaults to comma decimal separator)
945
1
            pref_manager.set_user_prefs("Language", "es").unwrap();
946
1
            assert_eq!(&pref_manager.pref_to_string("DecimalSeparators"), ",");
947
1
            assert_eq!(&pref_manager.pref_to_string("BlockSeparators"), ". \u{00A0}\u{202F}");
948
949
            // test country override
950
1
            pref_manager.set_user_prefs("Language", "es-mx").unwrap();
951
1
            assert_eq!(&pref_manager.pref_to_string("DecimalSeparators"), ".");
952
1
            assert_eq!(&pref_manager.pref_to_string("BlockSeparators"), ", \u{00A0}\u{202F}");
953
954
1
            pref_manager.set_user_prefs("DecimalSeparator", ",").unwrap();
955
1
            assert_eq!(&pref_manager.pref_to_string("DecimalSeparators"), ",");
956
1
            assert_eq!(&pref_manager.pref_to_string("BlockSeparators"), ". \u{00A0}\u{202F}");
957
958
1
            pref_manager.set_user_prefs("DecimalSeparator", ".").unwrap();
959
1
            assert_eq!(&pref_manager.pref_to_string("DecimalSeparators"), ".");
960
1
            assert_eq!(&pref_manager.pref_to_string("BlockSeparators"), ", \u{00A0}\u{202F}");
961
962
            // set to illegal value -- should leave values as before
963
1
            pref_manager.set_user_prefs("DecimalSeparator", ";").unwrap();
964
1
            assert_eq!(&pref_manager.pref_to_string("DecimalSeparators"), ".");
965
1
            assert_eq!(&pref_manager.pref_to_string("BlockSeparators"), ", \u{00A0}\u{202F}");
966
967
            // manual
968
1
            pref_manager.set_user_prefs("DecimalSeparators", ",").unwrap();
969
1
            pref_manager.set_user_prefs("BlockSeparators", " ").unwrap();
970
1
            pref_manager.set_user_prefs("DecimalSeparator", "None").unwrap();
971
1
            assert_eq!(&pref_manager.pref_to_string("DecimalSeparators"), ",");
972
1
            assert_eq!(&pref_manager.pref_to_string("BlockSeparators"), " ");
973
1
        });
974
1
    }
975
976
    #[test]
977
1
    fn find_simple_style() {
978
1
        PREF_MANAGER.with(|pref_manager| {
979
1
            let mut pref_manager = pref_manager.borrow_mut();
980
1
            pref_manager.initialize(abs_rules_dir_path()).unwrap();
981
1
            pref_manager.set_user_prefs("Language", "en").unwrap();
982
1
            pref_manager.set_user_prefs("SpeechStyle", "ClearSpeak").unwrap();
983
1
            assert_eq!(&pref_manager.pref_to_string("Language"), "en");
984
1
            assert_eq!(&pref_manager.pref_to_string("SpeechStyle"), "ClearSpeak");
985
1
            assert_eq!(rel_path(&pref_manager.rules_dir, pref_manager.speech.as_path()), PathBuf::from("Languages/en/ClearSpeak_Rules.yaml"));
986
1
        });
987
1
    }
988
989
cfg_if::cfg_if! {if #[cfg(not(feature = "include-zip"))] {  
990
    #[test]
991
1
    fn find_style_other_language() {
992
        // zz dir should have both ClearSpeak and SimpleSpeak styles
993
        // zz-aa dir should have only ClearSpeak style and unicode.yaml that includes the zz unicode but overrides "+"
994
1
        PREF_MANAGER.with(|pref_manager| {
995
1
            let mut pref_manager = pref_manager.borrow_mut();
996
1
            pref_manager.initialize(abs_rules_dir_path()).unwrap();
997
1
            pref_manager.set_user_prefs("Language", "en").unwrap();
998
1
            pref_manager.set_user_prefs("SpeechStyle", "SimpleSpeak").unwrap();
999
1
            assert_eq!(rel_path(&pref_manager.rules_dir, pref_manager.speech.as_path()), PathBuf::from("Languages/en/SimpleSpeak_Rules.yaml"));
1000
1001
1
            pref_manager.set_user_prefs("Language", "zz").unwrap();
1002
1
            assert_eq!(&pref_manager.pref_to_string("Language"), "zz");
1003
1
            assert_eq!(&pref_manager.pref_to_string("SpeechStyle"), "SimpleSpeak");
1004
1
            assert_eq!(rel_path(&pref_manager.rules_dir, pref_manager.speech.as_path()), PathBuf::from("Languages/zz/SimpleSpeak_Rules.yaml"));
1005
1006
            // make sure language stays the same
1007
1
            pref_manager.set_user_prefs("SpeechStyle", "ClearSpeak").unwrap();
1008
1
            assert_eq!(&pref_manager.pref_to_string("SpeechStyle"), "ClearSpeak");
1009
1
            assert_eq!(rel_path(&pref_manager.rules_dir, pref_manager.speech.as_path()), PathBuf::from("Languages/zz/ClearSpeak_Rules.yaml"));
1010
1011
            // make sure language stays the same
1012
1
            pref_manager.set_user_prefs("SpeechStyle", "SimpleSpeak").unwrap();
1013
1
            assert_eq!(&pref_manager.pref_to_string("SpeechStyle"), "SimpleSpeak");
1014
1
            assert_eq!(rel_path(&pref_manager.rules_dir, pref_manager.speech.as_path()), PathBuf::from("Languages/zz/SimpleSpeak_Rules.yaml"));
1015
1
        });
1016
1
    }
1017
1018
    #[test]
1019
1
    fn find_regional_overrides() {
1020
        // zz dir should have both ClearSpeak and SimpleSpeak styles
1021
        // zz-aa dir should have ClearSpeak style and unicode.yaml that includes the zz unicode but overrides "+"
1022
1
        PREF_MANAGER.with(|pref_manager| {
1023
1
            let mut pref_manager = pref_manager.borrow_mut();
1024
1
            pref_manager.initialize(abs_rules_dir_path()).unwrap();
1025
1
            pref_manager.set_user_prefs("SpeechStyle", "ClearSpeak").unwrap();
1026
1
            pref_manager.set_user_prefs("Language", "zz-aa").unwrap();
1027
1
            assert_eq!(rel_path(&pref_manager.rules_dir, pref_manager.speech.as_path()), PathBuf::from("Languages/zz/aa/ClearSpeak_Rules.yaml"));
1028
1029
1
            pref_manager.set_user_prefs("SpeechStyle", "SimpleSpeak").unwrap();
1030
1
            assert_eq!(rel_path(&pref_manager.rules_dir, pref_manager.speech.as_path()), PathBuf::from("Languages/zz/SimpleSpeak_Rules.yaml"));
1031
1
        });
1032
1
    }
1033
1034
    #[test]
1035
1
    fn find_style_no_sublanguage() {
1036
1
        PREF_MANAGER.with(|pref_manager| {
1037
1
            let mut pref_manager = pref_manager.borrow_mut();
1038
1
            pref_manager.initialize(abs_rules_dir_path()).unwrap();
1039
1
            pref_manager.set_user_prefs("SpeechStyle", "ClearSpeak").unwrap();
1040
1
            pref_manager.set_user_prefs("Language", "zz-ab").unwrap();
1041
1
            assert_eq!(rel_path(&pref_manager.rules_dir, pref_manager.speech.as_path()), PathBuf::from("Languages/zz/ClearSpeak_Rules.yaml"));
1042
1
        });
1043
1
    }
1044
1045
    #[test]
1046
1
    fn found_all_files() {
1047
1
        PREF_MANAGER.with(|pref_manager| {
1048
1
            let mut pref_manager = pref_manager.borrow_mut();
1049
1
            pref_manager.initialize(abs_rules_dir_path()).unwrap();
1050
1
            pref_manager.set_user_prefs("SpeechStyle", "ClearSpeak").unwrap();
1051
1
            pref_manager.set_user_prefs("Language", "zz-aa").unwrap();
1052
1
            pref_manager.set_user_prefs("BrailleCode", "UEB").unwrap();
1053
1
            assert_eq!(rel_path(&pref_manager.rules_dir, pref_manager.intent.as_path()), PathBuf::from("intent.yaml"));
1054
1
            assert_eq!(rel_path(&pref_manager.rules_dir, pref_manager.overview.as_path()), PathBuf::from("Languages/zz/overview.yaml"));
1055
1
            assert_eq!(rel_path(&pref_manager.rules_dir, pref_manager.speech_defs.as_path()), PathBuf::from("Languages/zz/aa/definitions.yaml"));
1056
1
            assert_eq!(rel_path(&pref_manager.rules_dir, pref_manager.speech.as_path()), PathBuf::from("Languages/zz/aa/ClearSpeak_Rules.yaml"));
1057
1
            assert_eq!(rel_path(&pref_manager.rules_dir, pref_manager.speech_unicode.as_path()), PathBuf::from("Languages/zz/aa/unicode.yaml"));
1058
1
            assert_eq!(rel_path(&pref_manager.rules_dir, pref_manager.speech_unicode_full.as_path()), PathBuf::from("Languages/zz/unicode-full.yaml"));
1059
1
            assert_eq!(rel_path(&pref_manager.rules_dir, pref_manager.braille.as_path()), PathBuf::from("Braille/UEB/UEB_Rules.yaml"));
1060
1
            assert_eq!(rel_path(&pref_manager.rules_dir, pref_manager.braille_unicode.as_path()), PathBuf::from("Braille/UEB/unicode.yaml"));
1061
1
            assert_eq!(rel_path(&pref_manager.rules_dir, pref_manager.braille_unicode_full.as_path()), PathBuf::from("Braille/UEB/unicode-full.yaml"));
1062
1
            assert_eq!(rel_path(&pref_manager.rules_dir, pref_manager.braille_defs.as_path()), PathBuf::from("Braille/UEB/definitions.yaml"));
1063
    
1064
1
            pref_manager.set_user_prefs("Language", "zz-ab").unwrap();
1065
1
            assert_eq!(rel_path(&pref_manager.rules_dir, pref_manager.intent.as_path()), PathBuf::from("intent.yaml"));
1066
1
            assert_eq!(rel_path(&pref_manager.rules_dir, pref_manager.overview.as_path()), PathBuf::from("Languages/zz/overview.yaml"));
1067
1
            assert_eq!(rel_path(&pref_manager.rules_dir, pref_manager.speech_defs.as_path()), PathBuf::from("Languages/zz/definitions.yaml"));
1068
1
            assert_eq!(rel_path(&pref_manager.rules_dir, pref_manager.speech.as_path()), PathBuf::from("Languages/zz/ClearSpeak_Rules.yaml"));
1069
1
            assert_eq!(rel_path(&pref_manager.rules_dir, pref_manager.speech_unicode.as_path()), PathBuf::from("Languages/zz/unicode.yaml"));
1070
1
            assert_eq!(rel_path(&pref_manager.rules_dir, pref_manager.speech_unicode_full.as_path()), PathBuf::from("Languages/zz/unicode-full.yaml"));
1071
1
            assert_eq!(rel_path(&pref_manager.rules_dir, pref_manager.braille.as_path()), PathBuf::from("Braille/UEB/UEB_Rules.yaml"));
1072
1
            assert_eq!(rel_path(&pref_manager.rules_dir, pref_manager.braille_unicode.as_path()), PathBuf::from("Braille/UEB/unicode.yaml"));
1073
1
            assert_eq!(rel_path(&pref_manager.rules_dir, pref_manager.braille_unicode_full.as_path()), PathBuf::from("Braille/UEB/unicode-full.yaml"));
1074
1
            assert_eq!(rel_path(&pref_manager.rules_dir, pref_manager.braille_defs.as_path()), PathBuf::from("Braille/UEB/definitions.yaml"));
1075
1
        })
1076
1
    }
1077
1078
    #[test]
1079
1
    fn test_prefs() {
1080
1
        PREF_MANAGER.with(|pref_manager| {
1081
            // first test with internal settings
1082
            {
1083
1
                let mut pref_manager = pref_manager.borrow_mut();
1084
1
                pref_manager.initialize(abs_rules_dir_path()).unwrap();
1085
    
1086
1
                pref_manager.set_user_prefs("Language", "en").unwrap();
1087
1
                pref_manager.set_user_prefs("ClearSpeak_AbsoluteValue", "Determinant").unwrap();
1088
1
                pref_manager.set_user_prefs("ResetNavMode", "true").unwrap();
1089
1
                pref_manager.set_user_prefs("BrailleCode", "Nemeth").unwrap();
1090
1
                assert_eq!(pref_manager.pref_to_string("Language").as_str(), "en");
1091
1
                assert_eq!(pref_manager.pref_to_string("SubjectArea").as_str(), "General");
1092
1
                assert_eq!(pref_manager.pref_to_string("ClearSpeak_AbsoluteValue").as_str(), "Determinant");
1093
1
                assert_eq!(pref_manager.pref_to_string("ResetNavMode").as_str(), "true");
1094
1
                assert_eq!(pref_manager.pref_to_string("BrailleCode").as_str(), "Nemeth");
1095
1
                assert_eq!(pref_manager.pref_to_string("X_Y_Z").as_str(), NO_PREFERENCE);
1096
            }
1097
1098
            // now test with the interface
1099
            {
1100
                use crate::interface::{set_preference, get_preference};
1101
1
                set_preference("Language", "zz").unwrap();
1102
1
                set_preference("ClearSpeak_AbsoluteValue", "Cardinality").unwrap();
1103
1
                set_preference("Overview", "true").unwrap();
1104
1
                set_preference("BrailleCode", "UEB").unwrap();
1105
1
                assert_eq!(&get_preference("Language").unwrap(), "zz");
1106
1
                assert_eq!(&get_preference("ClearSpeak_AbsoluteValue").unwrap(), "Cardinality");
1107
1
                assert_eq!(&get_preference("Overview").unwrap(), "true");
1108
1
                assert_eq!(&get_preference("BrailleCode").unwrap(), "UEB");
1109
1
                assert!(&get_preference("X_Y_Z").is_err());
1110
1111
            }
1112
1
        });
1113
1
    }
1114
1115
    #[test]
1116
1
    fn test_language_change() {
1117
        // set_preference borrows the pref manager, so the previous borrow's lifetime needs to be ended before using it
1118
1
        PREF_MANAGER.with(|pref_manager| {
1119
1
            let mut pref_manager = pref_manager.borrow_mut();
1120
1
            pref_manager.initialize(abs_rules_dir_path()).unwrap();
1121
1
        });
1122
1
        crate::interface::set_preference("Language", "en").unwrap();
1123
1
        crate::interface::set_preference("SpeechStyle", "ClearSpeak").unwrap();
1124
1
        PREF_MANAGER.with(|pref_manager| {
1125
1
            let pref_manager = pref_manager.borrow_mut();
1126
1
            assert_eq!(rel_path(&pref_manager.rules_dir, pref_manager.get_rule_file(&RulesFor::Speech)), PathBuf::from("Languages/en/ClearSpeak_Rules.yaml"));
1127
1
        });
1128
1129
1
        crate::interface::set_preference("Language", "zz").unwrap();
1130
1
        PREF_MANAGER.with(|pref_manager| {
1131
1
            let pref_manager = pref_manager.borrow_mut();
1132
1
            assert_eq!(rel_path(&pref_manager.rules_dir, pref_manager.get_rule_file(&RulesFor::Speech)), PathBuf::from("Languages/zz/ClearSpeak_Rules.yaml"));
1133
1
        });
1134
1
    }
1135
    
1136
    #[test]
1137
1
    fn test_speech_style_change() {
1138
1
        PREF_MANAGER.with(|pref_manager| {
1139
1
            let mut pref_manager = pref_manager.borrow_mut();
1140
1
            pref_manager.initialize(abs_rules_dir_path()).unwrap();
1141
1
            pref_manager.set_user_prefs("Language", "en").unwrap();
1142
1
            pref_manager.set_user_prefs("SpeechStyle", "ClearSpeak").unwrap();
1143
1
            assert_eq!(rel_path(&pref_manager.rules_dir, pref_manager.get_rule_file(&RulesFor::Speech)), PathBuf::from("Languages/en/ClearSpeak_Rules.yaml"));
1144
1145
1
            pref_manager.set_user_prefs("SpeechStyle", "SimpleSpeak").unwrap();
1146
            
1147
1
            assert_eq!(rel_path(&pref_manager.rules_dir, pref_manager.get_rule_file(&RulesFor::Speech)), PathBuf::from("Languages/en/SimpleSpeak_Rules.yaml"));
1148
1
        });
1149
1
    }
1150
1151
    #[test]
1152
1
    fn test_some_changes() {
1153
1
        PREF_MANAGER.with(|pref_manager| {
1154
1
            let mut pref_manager = pref_manager.borrow_mut();
1155
1
            pref_manager.initialize(abs_rules_dir_path()).unwrap();
1156
1
            pref_manager.set_user_prefs("Verbosity", "Terse").unwrap();
1157
1158
1
            assert_eq!(&pref_manager.pref_to_string("Verbosity"), "Terse");
1159
1160
1
            pref_manager.set_user_prefs("BrailleCode", "UEB").unwrap();
1161
1
            assert_eq!(rel_path(&pref_manager.rules_dir, pref_manager.get_rule_file(&RulesFor::Braille)), PathBuf::from("Braille/UEB/UEB_Rules.yaml"));
1162
1163
            // make sure they show up when building context for speech generation
1164
1
            let merged_prefs = pref_manager.merge_prefs();
1165
1
            assert_eq!(merged_prefs.get("Verbosity").unwrap().as_str().unwrap(), "Terse");
1166
1
        });
1167
1168
1
        crate::interface::set_preference("NavVerbosity", "Terse").unwrap();
1169
1
        PREF_MANAGER.with(|pref_manager| {
1170
1
            let pref_manager = pref_manager.borrow_mut();
1171
1
            let merged_prefs = pref_manager.merge_prefs();
1172
1
            assert_eq!(merged_prefs.get("NavVerbosity").unwrap().as_str().unwrap(), "Terse");
1173
1
        });
1174
1
    }
1175
1176
    #[test]
1177
1
    fn test_illegal_pref_values() {
1178
1
        PREF_MANAGER.with(|pref_manager| {
1179
1
            let mut pref_manager = pref_manager.borrow_mut();
1180
1
            pref_manager.initialize(abs_rules_dir_path()).unwrap();
1181
1
            assert!(pref_manager.set_string_pref("Language", "../../../my/path").is_err());
1182
1
            assert!(pref_manager.set_string_pref("BrailleCode", "C:\\my\\path").is_err());
1183
1
            assert!(pref_manager.set_string_pref("SpeechStyle", "/my/path").is_err());
1184
1
        });
1185
1
    }
1186
1187
    #[test]
1188
    #[ignore]   // this is an ugly test for #262 -- it changes the prefs file and so is a bad thing in general
1189
0
    fn test_up_to_date() {
1190
        use std::fs;
1191
        use std::thread::sleep;
1192
        use std::time::Duration;
1193
        use crate::interface;
1194
0
        PREF_MANAGER.with(|pref_manager| {
1195
0
            let mut pref_manager = pref_manager.borrow_mut();
1196
0
            pref_manager.initialize(abs_rules_dir_path()).unwrap();
1197
0
            assert_eq!(&pref_manager.pref_to_string("SpeechStyle"), "ClearSpeak");
1198
0
            assert_eq!(rel_path(&pref_manager.rules_dir, pref_manager.speech.as_path()), PathBuf::from("Languages/zz/ClearSpeak_Rules.yaml"));
1199
0
        });
1200
0
        interface::set_mathml("<math><mo>+</mo><mn>10</mn></math>").unwrap();
1201
0
        assert_eq!(interface::get_spoken_text().unwrap(), "ClearSpeak positive from zz 10");
1202
        
1203
0
        let mut file_path = PathBuf::default();
1204
0
        let mut contents = vec![];
1205
0
        PREF_MANAGER.with(|pref_manager| {
1206
0
            let pref_manager = pref_manager.borrow();
1207
0
            if let Some(file_name) = pref_manager.user_prefs_file.as_ref().unwrap().debug_get_file() {
1208
0
                file_path = PathBuf::from(file_name);
1209
0
                contents = fs::read(&file_path).expect(&format!("Failed to write file {} during test", file_name));
1210
0
                let changed_contents = String::from_utf8(contents.clone()).unwrap()
1211
0
                                .replace("SpeechStyle: ClearSpeak", "SpeechStyle: SimpleSpeak");
1212
0
                fs::write(&file_path, changed_contents).unwrap();
1213
0
                sleep(Duration::from_millis(5));  // make sure the time changes enough to be recognized
1214
0
            }
1215
0
        });
1216
0
        assert_eq!(interface::get_spoken_text().unwrap(), "SimpleSpeak positive from zz 10");
1217
0
        fs::write(&file_path, contents).unwrap();
1218
1219
                // assert_eq!(&pref_manager.pref_to_string("SpeechStyle"), "SimpleSpeak");
1220
                // assert_eq!(rel_path(&pref_manager.rules_dir, pref_manager.speech.as_path()), PathBuf::from("Languages/zz/SimpleSpeak_Rules.yaml"));
1221
0
    }
1222
1223
}}
1224
}
\ No newline at end of file diff --git a/docs/llvm-cov/html/coverage/home/runner/work/MathCAT/MathCAT/src/pretty_print.rs.html b/docs/llvm-cov/html/coverage/home/runner/work/MathCAT/MathCAT/src/pretty_print.rs.html index 7dbb099d..302eb630 100644 --- a/docs/llvm-cov/html/coverage/home/runner/work/MathCAT/MathCAT/src/pretty_print.rs.html +++ b/docs/llvm-cov/html/coverage/home/runner/work/MathCAT/MathCAT/src/pretty_print.rs.html @@ -1 +1 @@ -

Coverage Report

Created: 2026-04-30 05:51

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/home/runner/work/MathCAT/MathCAT/src/pretty_print.rs
Line
Count
Source
1
//! Useful functions for debugging and error messages.
2
#![allow(clippy::needless_return)]
3
4
use sxd_document::dom::{Element, ChildOfElement, Attribute};
5
6
// #[allow(dead_code)]
7
// pub fn pp_doc(doc: &Document) {
8
//     for root_child in doc.root().children() {
9
//         if let ChildOfRoot::Element(e) = root_child {
10
//             format_element(&e, 0);
11
//             break;
12
//         }
13
//     };
14
// }
15
16
/// Pretty-print the MathML represented by `element`.
17
4.90k
pub fn mml_to_string(e: Element) -> String {
18
4.90k
    return format_element(e, 0);
19
4.90k
}
20
21
/// Pretty-print the MathML represented by `element`.
22
/// * `indent` -- the amount of indentation to start with
23
57.6k
pub fn format_element(e: Element, indent: usize) -> String {
24
    // let namespace = match e.name().namespace_uri() {
25
    //     None => "".to_string(),
26
    //     Some(prefix) => prefix.to_string() + ":",
27
    // };
28
    // let namespace = namespace.as_str();
29
57.6k
    let namespace = "";
30
57.6k
    let mut answer = format!("{:in$}<{ns}{name}{attrs}>", " ", in=2*indent, ns=namespace, name=e.name().local_part(), attrs=format_attrs(&e.attributes()));
31
57.6k
    let children = e.children();
32
57.6k
    let has_element = children.iter().find(|&&c| matches!(
c57.1k
, ChildOfElement::Element(
_x21.6k
)));
33
57.6k
    if has_element.is_none() {
34
        // print text content
35
35.9k
        let content = children.iter()
36
35.9k
                .map(|c| if let ChildOfElement::Text(
t35.4k
) =
c35.4k
{
t35.4k
.
text35.4k
()} else {
""0
}35.4k
)
37
35.9k
                .collect::<Vec<&str>>()
38
35.9k
                .join("");
39
35.9k
        return format!("{}{}</{}{}>\n", answer, &handle_special_chars(&content), namespace, e.name().local_part());
40
        // for child in children {
41
        //     if let ChildOfElement::Text(t) = child {
42
        //         return format!("{}{}</{}{}>\n", answer, &make_invisible_chars_visible(t.text()), namespace, e.name().local_part());
43
        //     }
44
        // };
45
    } else {
46
21.6k
       answer += "\n";        // tag with children should start on new line
47
        // recurse on each Element child
48
52.7k
        for c in 
e21.6k
.
children21.6k
() {
49
52.7k
            if let ChildOfElement::Element(e) = c {
50
52.7k
                answer += &format_element(e, indent+1);
51
52.7k
            
}0
52
        }
53
    }
54
21.6k
    return answer + &format!("{:in$}</{ns}{name}>\n", " ", in=2*indent, ns=namespace, name=e.name().local_part());
55
56
    // Use the &#x....; representation for invisible chars when printing
57
57.6k
}
58
59
/// Format a vector of attributes as a string with a leading space
60
57.6k
pub fn format_attrs(attrs: &[Attribute]) -> String {
61
57.6k
    let mut result = String::new();
62
134k
    for attr in 
attrs57.6k
{
63
134k
        result += format!(" {}='{}'", attr.name().local_part(), &handle_special_chars(attr.value())).as_str();
64
134k
    }
65
57.6k
    result
66
57.6k
}
67
68
170k
fn handle_special_chars(text: &str) -> String {
69
    // Pre-allocate a buffer. We guess the size is roughly the same as input, maybe slightly larger.
70
170k
    let mut s = String::with_capacity(text.len());
71
963k
    for ch in 
text170k
.
chars170k
() {
72
963k
        match ch {
73
32
            '"' => s.push_str("&quot;"),
74
5
            '&' => s.push_str("&amp;"),
75
277
            '\'' => s.push_str("&apos;"),
76
367
            '<' => s.push_str("&lt;"),
77
410
            '>' => s.push_str("&gt;"),
78
724
            '\u{2061}' => s.push_str("&#x2061;"),
79
3.46k
            '\u{2062}' => s.push_str("&#x2062;"),
80
571
            '\u{2063}' => s.push_str("&#x2063;"),
81
76
            '\u{2064}' => s.push_str("&#x2064;"),
82
957k
            _ => s.push(ch),
83
        }
84
    }
85
170k
    s
86
170k
}
87
88
89
// /// Pretty print an xpath value.
90
// /// If the value is a `NodeSet`, the MathML for the node/element is returned.
91
// pub fn pp_xpath_value(value: Value) {
92
//     use sxd_xpath::Value;
93
//     use sxd_xpath::nodeset::Node;
94
//     debug!("XPath value:");
95
//     if let Value::Nodeset(nodeset) = &value {
96
//         for node in nodeset.document_order() {
97
//             match node {
98
//                 Node::Element(el) => {debug!("{}", crate::pretty_print::format_element(&el, 1))},
99
//                 Node::Text(t) =>  {debug!("found Text value: {}", t.text())},
100
//                 _ => {debug!("found unexpected node type")}
101
//             }
102
//         }
103
//     }
104
// }
105
106
/// Convert YAML to a string using with `indent` amount of space.
107
2.41M
pub fn yaml_to_string(yaml: &Yaml, indent: usize) -> String {
108
2.41M
    let mut result = String::new();
109
2.41M
    {
110
2.41M
        let mut emitter = YamlEmitter::new(&mut result);
111
2.41M
        emitter.compact(true);
112
2.41M
        emitter.emit_node(yaml).unwrap(); // dump the YAML object to a String
113
2.41M
    }
114
2.41M
    if indent == 0 {
115
2.41M
        return result;
116
0
    }
117
0
    let indent_str = format!("{:in$}", " ", in=2*indent);
118
0
    result = result.replace('\n',&("\n".to_string() + &indent_str)); // add indentation to all but first line
119
0
    return indent_str + result.trim_end();  // add indent to first line and remove an extra indent at end
120
2.41M
}
121
122
/* --------------------- Tweaked pretty printer for YAML (from YAML code) --------------------- */
123
124
// Changed: new function to determine if more compact notation can be used (when child is a one entry simple array/hash). Writes
125
// -foo [bar: bletch]
126
// -foo {bar: bletch}
127
20.0k
fn is_scalar(v: &Yaml) -> bool {
128
20.0k
    return !matches!(v, Yaml::Hash(_) | Yaml::Array(_));
129
20.0k
}
130
131
20.0k
fn is_complex(v: &Yaml) -> bool {
132
20.0k
    return match v {
133
1
        Yaml::Hash(h) => {
134
1
            return match h.len() {
135
0
                0 => false,
136
                1 => {
137
1
                    let (key,val) = h.iter().next().unwrap();
138
1
                    return !(is_scalar(key) && is_scalar(val))
139
                },
140
0
                _ => true,
141
            }
142
        },
143
0
        Yaml::Array(v) => {
144
0
            return match v.len() {
145
0
                0 => false,
146
                1 => {
147
0
                    let hash = v[0].as_hash();
148
0
                    if let Some(hash) = hash {
149
0
                        return match hash.len() {
150
0
                            0 => false,
151
                            1 => {
152
0
                                let (key, val) = hash.iter().next().unwrap();
153
0
                                return !(is_scalar(key) && is_scalar(val));
154
                            },
155
0
                            _ => true,
156
                        }
157
                    } else {
158
0
                        return !is_scalar(&v[0]);
159
                    }    
160
                },
161
0
                _ => true,
162
            }
163
        },
164
20.0k
        _ => false,
165
    }
166
20.0k
}
167
168
use std::error::Error;
169
use std::fmt::{self, Display};
170
use yaml_rust::{Yaml, yaml::Hash};
171
172
//use crate::yaml::{Hash, Yaml};
173
174
#[derive(Copy, Clone, Debug)]
175
#[allow(dead_code)] // from original YAML code (isn't used here)
176
enum EmitError {
177
    FmtError(fmt::Error),
178
    BadHashmapKey,
179
}
180
181
impl Error for EmitError {
182
0
    fn cause(&self) -> Option<&dyn Error> {
183
0
        None
184
0
    }
185
}
186
187
impl Display for EmitError {
188
0
    fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
189
0
        match *self {
190
0
            EmitError::FmtError(ref err) => Display::fmt(err, formatter),
191
0
            EmitError::BadHashmapKey => formatter.write_str("bad hashmap key"),
192
        }
193
0
    }
194
}
195
196
impl From<fmt::Error> for EmitError {
197
0
    fn from(f: fmt::Error) -> Self {
198
0
        EmitError::FmtError(f)
199
0
    }
200
}
201
202
struct YamlEmitter<'a> {
203
    writer: &'a mut dyn fmt::Write,
204
    best_indent: usize,
205
    compact: bool,
206
207
    level: isize,
208
}
209
210
type EmitResult = Result<(), EmitError>;
211
212
// from serialize::json
213
1.12M
fn escape_str(wr: &mut dyn fmt::Write, v: &str) -> Result<(), fmt::Error> {
214
1.12M
    wr.write_str("\"")
?0
;
215
216
1.12M
    let mut start = 0;
217
218
101M
    for (i, byte) in 
v1.12M
.
bytes1.12M
().
enumerate1.12M
() {
219
101M
        let 
escaped0
= match byte {
220
0
            b'"' => "\\\"",
221
0
            b'\\' => "\\\\",
222
0
            b'\x00' => "\\u0000",
223
0
            b'\x01' => "\\u0001",
224
0
            b'\x02' => "\\u0002",
225
0
            b'\x03' => "\\u0003",
226
0
            b'\x04' => "\\u0004",
227
0
            b'\x05' => "\\u0005",
228
0
            b'\x06' => "\\u0006",
229
0
            b'\x07' => "\\u0007",
230
0
            b'\x08' => "\\b",
231
0
            b'\t' => "\\t",
232
0
            b'\n' => "\\n",
233
0
            b'\x0b' => "\\u000b",
234
0
            b'\x0c' => "\\f",
235
0
            b'\r' => "\\r",
236
0
            b'\x0e' => "\\u000e",
237
0
            b'\x0f' => "\\u000f",
238
0
            b'\x10' => "\\u0010",
239
0
            b'\x11' => "\\u0011",
240
0
            b'\x12' => "\\u0012",
241
0
            b'\x13' => "\\u0013",
242
0
            b'\x14' => "\\u0014",
243
0
            b'\x15' => "\\u0015",
244
0
            b'\x16' => "\\u0016",
245
0
            b'\x17' => "\\u0017",
246
0
            b'\x18' => "\\u0018",
247
0
            b'\x19' => "\\u0019",
248
0
            b'\x1a' => "\\u001a",
249
0
            b'\x1b' => "\\u001b",
250
0
            b'\x1c' => "\\u001c",
251
0
            b'\x1d' => "\\u001d",
252
0
            b'\x1e' => "\\u001e",
253
0
            b'\x1f' => "\\u001f",
254
0
            b'\x7f' => "\\u007f",
255
101M
            _ => continue,
256
        };
257
258
0
        if start < i {
259
0
            wr.write_str(&v[start..i])?;
260
0
        }
261
262
0
        wr.write_str(escaped)?;
263
264
0
        start = i + 1;
265
    }
266
267
1.12M
    if start != v.len() {
268
1.12M
        wr.write_str(&v[start..])
?0
;
269
0
    }
270
271
1.12M
    wr.write_str("\"")
?0
;
272
1.12M
    Ok(())
273
1.12M
}
274
275
impl<'a> YamlEmitter<'a> {
276
2.41M
    pub fn new(writer: &'a mut dyn fmt::Write) -> YamlEmitter<'a> {
277
2.41M
        YamlEmitter {
278
2.41M
            writer,
279
2.41M
            best_indent: 2,
280
2.41M
            compact: true,
281
2.41M
            level: -1,
282
2.41M
        }
283
2.41M
    }
284
285
    /// Set 'compact inline notation' on or off, as described for block
286
    /// [sequences](http://www.yaml.org/spec/1.2/spec.html#id2797382)
287
    /// and
288
    /// [mappings](http://www.yaml.org/spec/1.2/spec.html#id2798057).
289
    ///
290
    /// In this form, blocks cannot have any properties (such as anchors
291
    /// or tags), which should be OK, because this emitter doesn't
292
    /// (currently) emit those anyways.
293
2.41M
    pub fn compact(&mut self, compact: bool) {
294
2.41M
        self.compact = compact;
295
2.41M
    }
296
297
    /// Determine if this emitter is using 'compact inline notation'.
298
    #[allow(dead_code)]   // not all fields are used in this program
299
0
    pub fn is_compact(&self) -> bool {
300
0
        self.compact
301
0
    }
302
303
    // fn dump(&mut self, doc: &Yaml) -> EmitResult {
304
    //     // write DocumentStart
305
    //     writeln!(self.writer, "---")?;
306
    //     self.level = -1;
307
    //     self.emit_node(doc)
308
    // }
309
310
15.0k
    fn write_indent(&mut self) -> EmitResult {
311
15.0k
        if self.level <= 0 {
312
15.0k
            return Ok(());
313
0
        }
314
0
        for _ in 0..self.level {
315
0
            for _ in 0..self.best_indent {
316
0
                write!(self.writer, " ")?;
317
            }
318
        }
319
0
        Ok(())
320
15.0k
    }
321
322
2.45M
    fn emit_node(&mut self, node: &Yaml) -> EmitResult {
323
2.45M
        match *node {
324
5.02k
            Yaml::Array(ref v) => self.emit_array(v),
325
0
            Yaml::Hash(ref h) => self.emit_hash(h),
326
2.45M
            Yaml::String(ref v) => {
327
2.45M
                if need_quotes(v) {
328
1.12M
                    escape_str(self.writer, v)
?0
;
329
                } else {
330
1.32M
                    write!(self.writer, "{v}")
?0
;
331
                }
332
2.45M
                Ok(())
333
            }
334
0
            Yaml::Boolean(v) => {
335
0
                if v {
336
0
                    self.writer.write_str("true")?;
337
                } else {
338
0
                    self.writer.write_str("false")?;
339
                }
340
0
                Ok(())
341
            }
342
0
            Yaml::Integer(v) => {
343
0
                write!(self.writer, "{v}")?;
344
0
                Ok(())
345
            }
346
0
            Yaml::Real(ref v) => {
347
0
                write!(self.writer, "{v}")?;
348
0
                Ok(())
349
            }
350
            Yaml::Null | Yaml::BadValue => {
351
0
                write!(self.writer, "~")?;
352
0
                Ok(())
353
            }
354
            // XXX(chenyh) Alias
355
0
            _ => Ok(()),
356
        }
357
2.45M
    }
358
359
5.02k
    fn emit_array(&mut self, v: &[Yaml]) -> EmitResult {
360
5.02k
        if v.is_empty() {
361
0
            write!(self.writer, "[]")?;
362
5.02k
        } else if v.len() == 1 && 
!is_complex(&v[0])1
{
363
            // changed -- for arrays that have only one simple element, make them more compact by using [...] notation
364
1
            write!(self.writer, "[")
?0
;
365
1
            self.emit_val(true, &v[0])
?0
;
366
1
            write!(self.writer, "]")
?0
;
367
        } else {
368
5.02k
            self.level += 1;
369
            
370
20.0k
            for (cnt, x) in 
v5.02k
.
iter5.02k
().
enumerate5.02k
() {
371
20.0k
                if cnt > 0 {
372
15.0k
                    writeln!(self.writer)
?0
;
373
15.0k
                    self.write_indent()
?0
;
374
5.02k
                }
375
20.0k
                write!(self.writer, "- ")
?0
;
376
20.0k
                self.emit_val(true, x)
?0
;
377
            }
378
5.02k
            self.level -= 1;
379
        }
380
5.02k
        return Ok(());
381
5.02k
    }
382
383
20.0k
    fn emit_hash(&mut self, h: &Hash) -> EmitResult {
384
20.0k
        if h.is_empty() {
385
0
            self.writer.write_str("{}")?;
386
        } else {
387
          // changed -- for hashmaps that have only one simple element, make them more compact by using {...}} notation
388
20.0k
            self.level += 1;
389
20.0k
            for (cnt, (k, v)) in h.iter().enumerate() {
390
                // changed: use new function is_scalar()
391
                // let complex_key = match *k {
392
                //     Yaml::Hash(_) | Yaml::Array(_) => true,
393
                //     _ => false,
394
                // };
395
20.0k
                if cnt > 0 {
396
0
                    writeln!(self.writer)?;
397
0
                    self.write_indent()?;
398
20.0k
                }
399
20.0k
                if !is_scalar(k) {
400
0
                    write!(self.writer, "? ")?;
401
0
                    self.emit_val(true, k)?;
402
0
                    writeln!(self.writer)?;
403
0
                    self.write_indent()?;
404
0
                    write!(self.writer, ": ")?;
405
0
                    self.emit_val(true, v)?;
406
                } else {
407
20.0k
                    self.emit_node(k)
?0
;
408
20.0k
                    write!(self.writer, ": ")
?0
;
409
410
                    // changed to use braces in some cases
411
20.0k
                    let complex_value = is_complex(v);
412
20.0k
                    if !complex_value && v.as_hash().is_some() {
413
0
                        write!(self.writer, "{{")?;
414
20.0k
                    }
415
                    // changed to use complex_value from 'false'
416
20.0k
                    self.emit_val(!complex_value, v)
?0
;
417
20.0k
                    if !complex_value && v.as_hash().is_some() {
418
0
                        write!(self.writer, "}}")?;
419
20.0k
                    }
420
                }
421
            }
422
20.0k
            self.level -= 1;
423
        }   
424
20.0k
        Ok(())
425
20.0k
    }
426
427
    /// Emit a yaml as a hash or array value: i.e., which should appear
428
    /// following a ":" or "-", either after a space, or on a new line.
429
    /// If `inline` is true, then the preceding characters are distinct
430
    /// and short enough to respect the compact flag.
431
    // changed: use to always emit ' ' for inline -- that is now handled elsewhere
432
40.1k
    fn emit_val(&mut self, inline: bool, val: &Yaml) -> EmitResult {
433
40.1k
        match *val {
434
0
            Yaml::Array(ref v) => {
435
0
                if !((inline && self.compact) || v.is_empty()) {
436
0
                    writeln!(self.writer)?;
437
0
                    self.level += 1;
438
0
                    self.write_indent()?;
439
0
                    self.level -= 1;
440
0
                }
441
0
                self.emit_array(v)
442
            }
443
20.0k
            Yaml::Hash(ref h) => {
444
20.0k
                if !((inline && self.compact) || 
h0
.
is_empty0
()) {
445
0
                    writeln!(self.writer)?;
446
0
                    self.level += 1;
447
0
                    self.write_indent()?;
448
0
                    self.level -= 1;
449
20.0k
                }
450
20.0k
                self.emit_hash(h)
451
            }
452
            _ => {
453
           //     write!(self.writer, " ")?;
454
20.0k
                self.emit_node(val)
455
            }
456
        }
457
40.1k
    }
458
}
459
460
/// Check if the string requires quoting.
461
/// Strings starting with any of the following characters must be quoted.
462
/// :, &, *, ?, |, -, <, >, =, !, %, @
463
/// Strings containing any of the following characters must be quoted.
464
/// {, }, [, ], ,, #, `
465
///
466
/// If the string contains any of the following control characters, it must be escaped with double quotes:
467
/// \0, \x01, \x02, \x03, \x04, \x05, \x06, \a, \b, \t, \n, \v, \f, \r, \x0e, \x0f, \x10, \x11, \x12, \x13, \x14, \x15, \x16, \x17, \x18, \x19, \x1a, \e, \x1c, \x1d, \x1e, \x1f, \N, \_, \L, \P
468
///
469
/// Finally, there are other cases when the strings must be quoted, no matter if you're using single or double quotes:
470
/// * When the string is true or false (otherwise, it would be treated as a boolean value);
471
/// * When the string is null or ~ (otherwise, it would be considered as a null value);
472
/// * When the string looks like a number, such as integers (e.g. 2, 14, etc.), floats (e.g. 2.6, 14.9) and exponential numbers (e.g. 12e7, etc.) (otherwise, it would be treated as a numeric value);
473
/// * When the string looks like a date (e.g. 2014-12-31) (otherwise it would be automatically converted into a Unix timestamp).
474
2.45M
fn need_quotes(string: &str) -> bool {
475
2.45M
    fn need_quotes_spaces(string: &str) -> bool {
476
2.45M
        string.starts_with(' ') || 
string2.45M
.
ends_with2.45M
(' ')
477
2.45M
    }
478
479
2.45M
    string.is_empty()
480
2.45M
        || need_quotes_spaces(string)
481
2.45M
        || string.starts_with(['&', '*', '?', '|', '-', '<', '>', '=', '!', '%', '@'])
482
19.8M
        || 
string2.26M
.
contains2.26M
(|character: char| matches!(character,
483
            ':'
484
            | '{'
485
            | '}'
486
            | '['
487
            | ']'
488
            | ','
489
            | '#'
490
            | '`'
491
            | '\"'
492
            | '\''
493
            | '\\'
494
18.9M
            | '\0'..='\x06'
495
            | '\t'
496
            | '\n'
497
            | '\r'
498
18.9M
            | '\x0e'..='\x1a'
499
18.9M
            | '\x1c'..='\x1f') )
500
1.32M
        || [
501
1.32M
            // http://yaml.org/type/bool.html
502
1.32M
            // Note: 'y', 'Y', 'n', 'N', is not quoted deliberately, as in libyaml. PyYAML also parse
503
1.32M
            // them as string, not booleans, although it is violating the YAML 1.1 specification.
504
1.32M
            // See https://github.com/dtolnay/serde-yaml/pull/83#discussion_r152628088.
505
1.32M
            "yes", "Yes", "YES", "no", "No", "NO", "True", "TRUE", "true", "False", "FALSE",
506
1.32M
            "false", "on", "On", "ON", "off", "Off", "OFF",
507
1.32M
            // http://yaml.org/type/null.html
508
1.32M
            "null", "Null", "NULL", "~",
509
1.32M
        ]
510
1.32M
        .contains(&string)
511
1.32M
        || string.starts_with('.')
512
1.32M
        || string.starts_with("0x")
513
1.32M
        || string.parse::<i64>().is_ok()
514
1.32M
        || string.parse::<f64>().is_ok()
515
2.45M
}
516
517
#[cfg(test)]
518
mod tests {
519
    use super::*;
520
    use sxd_document::dom::{ChildOfElement, ChildOfRoot};
521
    use sxd_document::parser;
522
523
    /// helper function
524
10
    fn first_element(package: &sxd_document::Package) -> Element<'_> {
525
10
        let doc = package.as_document();
526
10
        for child in doc.root().children() {
527
10
            if let ChildOfRoot::Element(e) = child {
528
10
                return e;
529
0
            }
530
        }
531
0
        panic!("No root element found");
532
10
    }
533
534
    #[test]
535
    /// Escapes XML entities and invisible characters for safe display.
536
    /// Tests the method on a few hardcoded characters.
537
1
    fn handle_special_chars_escapes() {
538
1
        let input = "& < > \" ' \u{2061} \u{2062} \u{2063} \u{2064} x";
539
1
        let expected = "&amp; &lt; &gt; &quot; &apos; &#x2061; &#x2062; &#x2063; &#x2064; x";
540
1
        assert_eq!(handle_special_chars(input), expected);
541
1
    }
542
543
    #[test]
544
    /// Formats a leaf element as a single line with escaped text.
545
1
    fn format_element_leaf_text() {
546
1
        let package = parser::parse("<math><mi>&amp;</mi></math>").unwrap();
547
1
        let math = first_element(&package);
548
1
        let mi = math
549
1
            .children()
550
1
            .iter()
551
1
            .find_map(|c| match c {
552
1
                ChildOfElement::Element(e) => Some(*e),
553
0
                _ => None,
554
1
            })
555
1
            .unwrap();
556
1
        assert_eq!(format_element(mi, 0), " <mi>&amp;</mi>\n");
557
1
    }
558
559
    #[test]
560
    /// Formats a nested element with indentation and newlines.
561
1
    fn format_element_nested() {
562
1
        let package = parser::parse("<math><mi>x</mi><mo>+</mo></math>").unwrap();
563
1
        let math = first_element(&package);
564
1
        let rendered = format_element(math, 0);
565
1
        assert!(rendered.starts_with(" <math>\n"));
566
1
        assert!(rendered.contains("\n  <mi>x</mi>\n"));
567
1
        assert!(rendered.contains("\n  <mo>+</mo>\n"));
568
1
        assert!(rendered.ends_with("</math>\n"));
569
1
    }
570
571
    #[test]
572
    /// Escapes special characters in attribute values.
573
1
    fn format_attrs_escapes() {
574
1
        let package = parser::parse("<math a=\"&amp;\" b=\"&lt;\"></math>").unwrap();
575
1
        let math = first_element(&package);
576
1
        let rendered = format_attrs(&math.attributes());
577
1
        assert!(rendered.contains(" a='&amp;'"));
578
1
        assert!(rendered.contains(" b='&lt;'"));
579
1
    }
580
581
    #[test]
582
    /// Preserves non-BMP characters from a literal XML form.
583
1
    fn format_element_non_bmp_character_literal() {
584
1
        let package = parser::parse("<math><mi>𝞪</mi></math>").unwrap();
585
1
        let math = first_element(&package);
586
1
        let mi = math
587
1
            .children()
588
1
            .iter()
589
1
            .find_map(|c| match c {
590
1
                ChildOfElement::Element(e) => Some(*e),
591
0
                _ => None,
592
1
            })
593
1
            .unwrap();
594
1
        let rendered = format_element(mi, 0);
595
1
        assert!(rendered.contains("𝞪"));
596
1
    }
597
598
    #[test]
599
    /// Preserves non-BMP characters from a numeric XML form.
600
1
    fn format_element_non_bmp_character_numeric() {
601
1
        let package = parser::parse("<math><mi>&#x1d7aa;</mi></math>").unwrap();
602
1
        let math = first_element(&package);
603
1
        let mi = math
604
1
            .children()
605
1
            .iter()
606
1
            .find_map(|c| match c {
607
1
                ChildOfElement::Element(e) => Some(*e),
608
0
                _ => None,
609
1
            })
610
1
            .unwrap();
611
1
        let rendered = format_element(mi, 0);
612
1
        assert!(rendered.contains("𝞪"));
613
1
    }
614
615
    #[test]
616
    /// Evaluates non-BMP literal text through sxd_xpath.
617
1
    fn xpath_non_bmp_literal() {
618
        use sxd_xpath::{Factory, Value};
619
620
1
        let package = parser::parse("<math><mi>𝞪</mi></math>").unwrap();
621
1
        let xpath = Factory::new().build("string(/math/mi)").unwrap().unwrap();
622
1
        let context = sxd_xpath::Context::new();
623
624
1
        let value = xpath.evaluate(&context, first_element(&package)).unwrap();
625
1
        match value {
626
1
            Value::String(s) => assert_eq!(s, "𝞪"),
627
0
            _ => panic!("Expected string value from xpath"),
628
        }
629
1
    }
630
631
    #[test]
632
    /// Evaluates non-BMP numeric text through sxd_xpath.
633
1
    fn xpath_non_bmp_numeric() {
634
        use sxd_xpath::{Factory, Value};
635
636
1
        let package = parser::parse("<math><mi>&#x1d7aa;</mi></math>").unwrap();
637
1
        let xpath = Factory::new().build("string(/math/mi)").unwrap().unwrap();
638
1
        let context = sxd_xpath::Context::new();
639
640
1
        let value = xpath.evaluate(&context, first_element(&package)).unwrap();
641
1
        match value {
642
1
            Value::String(s) => assert_eq!(s, "𝞪"),
643
0
            _ => panic!("Expected string value from xpath"),
644
        }
645
1
    }
646
647
    #[test]
648
    /// Evaluates non-BMP literal text with a MathML namespace-qualified XPath.
649
1
    fn xpath_non_bmp_namespace_literal() {
650
        use sxd_xpath::{Factory, Value};
651
652
1
        let xml = "<math xmlns=\"http://www.w3.org/1998/Math/MathML\"><mi>𝞪</mi></math>";
653
1
        let package = parser::parse(xml).unwrap();
654
1
        let xpath = Factory::new()
655
1
            .build("string(/m:math/m:mi)")
656
1
            .unwrap()
657
1
            .unwrap();
658
1
        let mut context = sxd_xpath::Context::new();
659
1
        context.set_namespace("m", "http://www.w3.org/1998/Math/MathML");
660
661
1
        let value = xpath.evaluate(&context, first_element(&package)).unwrap();
662
1
        match value {
663
1
            Value::String(s) => assert_eq!(s, "𝞪"),
664
0
            _ => panic!("Expected string value from xpath"),
665
        }
666
1
    }
667
668
    #[test]
669
    /// Evaluates non-BMP numeric text with a MathML namespace-qualified XPath.
670
1
    fn xpath_non_bmp_namespace_numeric() {
671
        use sxd_xpath::{Factory, Value};
672
673
1
        let xml = "<math xmlns=\"http://www.w3.org/1998/Math/MathML\"><mi>&#120746;</mi></math>";
674
1
        let package = parser::parse(xml).unwrap();
675
1
        let xpath = Factory::new()
676
1
            .build("string(/m:math/m:mi)")
677
1
            .unwrap()
678
1
            .unwrap();
679
1
        let mut context = sxd_xpath::Context::new();
680
1
        context.set_namespace("m", "http://www.w3.org/1998/Math/MathML");
681
682
1
        let value = xpath.evaluate(&context, first_element(&package)).unwrap();
683
1
        match value {
684
1
            Value::String(s) => assert_eq!(s, "𝞪"),
685
0
            _ => panic!("Expected string value from xpath"),
686
        }
687
1
    }
688
689
    #[test]
690
    /// Extracts a text node via XPath (nodeset result) and verifies the non-BMP character survives.
691
1
    fn xpath_non_bmp_text_nodeset() {
692
        use sxd_xpath::{Factory, Value};
693
694
1
        let xml = "<math xmlns=\"http://www.w3.org/1998/Math/MathML\"><mi>𝞪</mi></math>";
695
1
        let package = parser::parse(xml).unwrap();
696
1
        let xpath = Factory::new().build("/m:math/m:mi/text()").unwrap().unwrap();
697
1
        let mut context = sxd_xpath::Context::new();
698
1
        context.set_namespace("m", "http://www.w3.org/1998/Math/MathML");
699
700
1
        let value = xpath.evaluate(&context, first_element(&package)).unwrap();
701
1
        match value {
702
1
            Value::Nodeset(nodes) => {
703
1
                let ordered = nodes.document_order();
704
1
                let node = ordered.first().expect("Expected one text node");
705
1
                let text = node.text().expect("Expected text node");
706
1
                assert_eq!(text.text(), "𝞪");
707
1
                assert_eq!(ordered.len(), 1);
708
            }
709
0
            _ => panic!("Expected nodeset value from xpath"),
710
        }
711
1
    }
712
}
\ No newline at end of file +

Coverage Report

Created: 2026-05-04 09:37

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/home/runner/work/MathCAT/MathCAT/src/pretty_print.rs
Line
Count
Source
1
//! Useful functions for debugging and error messages.
2
#![allow(clippy::needless_return)]
3
4
use sxd_document::dom::{Element, ChildOfElement, Attribute};
5
6
// #[allow(dead_code)]
7
// pub fn pp_doc(doc: &Document) {
8
//     for root_child in doc.root().children() {
9
//         if let ChildOfRoot::Element(e) = root_child {
10
//             format_element(&e, 0);
11
//             break;
12
//         }
13
//     };
14
// }
15
16
/// Pretty-print the MathML represented by `element`.
17
4.90k
pub fn mml_to_string(e: Element) -> String {
18
4.90k
    return format_element(e, 0);
19
4.90k
}
20
21
/// Pretty-print the MathML represented by `element`.
22
/// * `indent` -- the amount of indentation to start with
23
57.6k
pub fn format_element(e: Element, indent: usize) -> String {
24
    // let namespace = match e.name().namespace_uri() {
25
    //     None => "".to_string(),
26
    //     Some(prefix) => prefix.to_string() + ":",
27
    // };
28
    // let namespace = namespace.as_str();
29
57.6k
    let namespace = "";
30
57.6k
    let mut answer = format!("{:in$}<{ns}{name}{attrs}>", " ", in=2*indent, ns=namespace, name=e.name().local_part(), attrs=format_attrs(&e.attributes()));
31
57.6k
    let children = e.children();
32
57.6k
    let has_element = children.iter().find(|&&c| matches!(
c57.1k
, ChildOfElement::Element(
_x21.6k
)));
33
57.6k
    if has_element.is_none() {
34
        // print text content
35
35.9k
        let content = children.iter()
36
35.9k
                .map(|c| if let ChildOfElement::Text(
t35.4k
) =
c35.4k
{
t35.4k
.
text35.4k
()} else {
""0
}35.4k
)
37
35.9k
                .collect::<Vec<&str>>()
38
35.9k
                .join("");
39
35.9k
        return format!("{}{}</{}{}>\n", answer, &handle_special_chars(&content), namespace, e.name().local_part());
40
        // for child in children {
41
        //     if let ChildOfElement::Text(t) = child {
42
        //         return format!("{}{}</{}{}>\n", answer, &make_invisible_chars_visible(t.text()), namespace, e.name().local_part());
43
        //     }
44
        // };
45
    } else {
46
21.6k
       answer += "\n";        // tag with children should start on new line
47
        // recurse on each Element child
48
52.7k
        for c in 
e21.6k
.
children21.6k
() {
49
52.7k
            if let ChildOfElement::Element(e) = c {
50
52.7k
                answer += &format_element(e, indent+1);
51
52.7k
            
}0
52
        }
53
    }
54
21.6k
    return answer + &format!("{:in$}</{ns}{name}>\n", " ", in=2*indent, ns=namespace, name=e.name().local_part());
55
56
    // Use the &#x....; representation for invisible chars when printing
57
57.6k
}
58
59
/// Format a vector of attributes as a string with a leading space
60
57.6k
pub fn format_attrs(attrs: &[Attribute]) -> String {
61
57.6k
    let mut result = String::new();
62
134k
    for attr in 
attrs57.6k
{
63
134k
        result += format!(" {}='{}'", attr.name().local_part(), &handle_special_chars(attr.value())).as_str();
64
134k
    }
65
57.6k
    result
66
57.6k
}
67
68
170k
fn handle_special_chars(text: &str) -> String {
69
    // Pre-allocate a buffer. We guess the size is roughly the same as input, maybe slightly larger.
70
170k
    let mut s = String::with_capacity(text.len());
71
963k
    for ch in 
text170k
.
chars170k
() {
72
963k
        match ch {
73
32
            '"' => s.push_str("&quot;"),
74
5
            '&' => s.push_str("&amp;"),
75
277
            '\'' => s.push_str("&apos;"),
76
367
            '<' => s.push_str("&lt;"),
77
410
            '>' => s.push_str("&gt;"),
78
724
            '\u{2061}' => s.push_str("&#x2061;"),
79
3.46k
            '\u{2062}' => s.push_str("&#x2062;"),
80
571
            '\u{2063}' => s.push_str("&#x2063;"),
81
76
            '\u{2064}' => s.push_str("&#x2064;"),
82
957k
            _ => s.push(ch),
83
        }
84
    }
85
170k
    s
86
170k
}
87
88
89
// /// Pretty print an xpath value.
90
// /// If the value is a `NodeSet`, the MathML for the node/element is returned.
91
// pub fn pp_xpath_value(value: Value) {
92
//     use sxd_xpath::Value;
93
//     use sxd_xpath::nodeset::Node;
94
//     debug!("XPath value:");
95
//     if let Value::Nodeset(nodeset) = &value {
96
//         for node in nodeset.document_order() {
97
//             match node {
98
//                 Node::Element(el) => {debug!("{}", crate::pretty_print::format_element(&el, 1))},
99
//                 Node::Text(t) =>  {debug!("found Text value: {}", t.text())},
100
//                 _ => {debug!("found unexpected node type")}
101
//             }
102
//         }
103
//     }
104
// }
105
106
/// Convert YAML to a string using with `indent` amount of space.
107
2.41M
pub fn yaml_to_string(yaml: &Yaml, indent: usize) -> String {
108
2.41M
    let mut result = String::new();
109
2.41M
    {
110
2.41M
        let mut emitter = YamlEmitter::new(&mut result);
111
2.41M
        emitter.compact(true);
112
2.41M
        emitter.emit_node(yaml).unwrap(); // dump the YAML object to a String
113
2.41M
    }
114
2.41M
    if indent == 0 {
115
2.41M
        return result;
116
0
    }
117
0
    let indent_str = format!("{:in$}", " ", in=2*indent);
118
0
    result = result.replace('\n',&("\n".to_string() + &indent_str)); // add indentation to all but first line
119
0
    return indent_str + result.trim_end();  // add indent to first line and remove an extra indent at end
120
2.41M
}
121
122
/* --------------------- Tweaked pretty printer for YAML (from YAML code) --------------------- */
123
124
// Changed: new function to determine if more compact notation can be used (when child is a one entry simple array/hash). Writes
125
// -foo [bar: bletch]
126
// -foo {bar: bletch}
127
20.0k
fn is_scalar(v: &Yaml) -> bool {
128
20.0k
    return !matches!(v, Yaml::Hash(_) | Yaml::Array(_));
129
20.0k
}
130
131
20.0k
fn is_complex(v: &Yaml) -> bool {
132
20.0k
    return match v {
133
1
        Yaml::Hash(h) => {
134
1
            return match h.len() {
135
0
                0 => false,
136
                1 => {
137
1
                    let (key,val) = h.iter().next().unwrap();
138
1
                    return !(is_scalar(key) && is_scalar(val))
139
                },
140
0
                _ => true,
141
            }
142
        },
143
0
        Yaml::Array(v) => {
144
0
            return match v.len() {
145
0
                0 => false,
146
                1 => {
147
0
                    let hash = v[0].as_hash();
148
0
                    if let Some(hash) = hash {
149
0
                        return match hash.len() {
150
0
                            0 => false,
151
                            1 => {
152
0
                                let (key, val) = hash.iter().next().unwrap();
153
0
                                return !(is_scalar(key) && is_scalar(val));
154
                            },
155
0
                            _ => true,
156
                        }
157
                    } else {
158
0
                        return !is_scalar(&v[0]);
159
                    }    
160
                },
161
0
                _ => true,
162
            }
163
        },
164
20.0k
        _ => false,
165
    }
166
20.0k
}
167
168
use std::error::Error;
169
use std::fmt::{self, Display};
170
use yaml_rust::{Yaml, yaml::Hash};
171
172
//use crate::yaml::{Hash, Yaml};
173
174
#[derive(Copy, Clone, Debug)]
175
#[allow(dead_code)] // from original YAML code (isn't used here)
176
enum EmitError {
177
    FmtError(fmt::Error),
178
    BadHashmapKey,
179
}
180
181
impl Error for EmitError {
182
0
    fn cause(&self) -> Option<&dyn Error> {
183
0
        None
184
0
    }
185
}
186
187
impl Display for EmitError {
188
0
    fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
189
0
        match *self {
190
0
            EmitError::FmtError(ref err) => Display::fmt(err, formatter),
191
0
            EmitError::BadHashmapKey => formatter.write_str("bad hashmap key"),
192
        }
193
0
    }
194
}
195
196
impl From<fmt::Error> for EmitError {
197
0
    fn from(f: fmt::Error) -> Self {
198
0
        EmitError::FmtError(f)
199
0
    }
200
}
201
202
struct YamlEmitter<'a> {
203
    writer: &'a mut dyn fmt::Write,
204
    best_indent: usize,
205
    compact: bool,
206
207
    level: isize,
208
}
209
210
type EmitResult = Result<(), EmitError>;
211
212
// from serialize::json
213
1.12M
fn escape_str(wr: &mut dyn fmt::Write, v: &str) -> Result<(), fmt::Error> {
214
1.12M
    wr.write_str("\"")
?0
;
215
216
1.12M
    let mut start = 0;
217
218
101M
    for (i, byte) in 
v1.12M
.
bytes1.12M
().
enumerate1.12M
() {
219
101M
        let 
escaped0
= match byte {
220
0
            b'"' => "\\\"",
221
0
            b'\\' => "\\\\",
222
0
            b'\x00' => "\\u0000",
223
0
            b'\x01' => "\\u0001",
224
0
            b'\x02' => "\\u0002",
225
0
            b'\x03' => "\\u0003",
226
0
            b'\x04' => "\\u0004",
227
0
            b'\x05' => "\\u0005",
228
0
            b'\x06' => "\\u0006",
229
0
            b'\x07' => "\\u0007",
230
0
            b'\x08' => "\\b",
231
0
            b'\t' => "\\t",
232
0
            b'\n' => "\\n",
233
0
            b'\x0b' => "\\u000b",
234
0
            b'\x0c' => "\\f",
235
0
            b'\r' => "\\r",
236
0
            b'\x0e' => "\\u000e",
237
0
            b'\x0f' => "\\u000f",
238
0
            b'\x10' => "\\u0010",
239
0
            b'\x11' => "\\u0011",
240
0
            b'\x12' => "\\u0012",
241
0
            b'\x13' => "\\u0013",
242
0
            b'\x14' => "\\u0014",
243
0
            b'\x15' => "\\u0015",
244
0
            b'\x16' => "\\u0016",
245
0
            b'\x17' => "\\u0017",
246
0
            b'\x18' => "\\u0018",
247
0
            b'\x19' => "\\u0019",
248
0
            b'\x1a' => "\\u001a",
249
0
            b'\x1b' => "\\u001b",
250
0
            b'\x1c' => "\\u001c",
251
0
            b'\x1d' => "\\u001d",
252
0
            b'\x1e' => "\\u001e",
253
0
            b'\x1f' => "\\u001f",
254
0
            b'\x7f' => "\\u007f",
255
101M
            _ => continue,
256
        };
257
258
0
        if start < i {
259
0
            wr.write_str(&v[start..i])?;
260
0
        }
261
262
0
        wr.write_str(escaped)?;
263
264
0
        start = i + 1;
265
    }
266
267
1.12M
    if start != v.len() {
268
1.12M
        wr.write_str(&v[start..])
?0
;
269
0
    }
270
271
1.12M
    wr.write_str("\"")
?0
;
272
1.12M
    Ok(())
273
1.12M
}
274
275
impl<'a> YamlEmitter<'a> {
276
2.41M
    pub fn new(writer: &'a mut dyn fmt::Write) -> YamlEmitter<'a> {
277
2.41M
        YamlEmitter {
278
2.41M
            writer,
279
2.41M
            best_indent: 2,
280
2.41M
            compact: true,
281
2.41M
            level: -1,
282
2.41M
        }
283
2.41M
    }
284
285
    /// Set 'compact inline notation' on or off, as described for block
286
    /// [sequences](http://www.yaml.org/spec/1.2/spec.html#id2797382)
287
    /// and
288
    /// [mappings](http://www.yaml.org/spec/1.2/spec.html#id2798057).
289
    ///
290
    /// In this form, blocks cannot have any properties (such as anchors
291
    /// or tags), which should be OK, because this emitter doesn't
292
    /// (currently) emit those anyways.
293
2.41M
    pub fn compact(&mut self, compact: bool) {
294
2.41M
        self.compact = compact;
295
2.41M
    }
296
297
    /// Determine if this emitter is using 'compact inline notation'.
298
    #[allow(dead_code)]   // not all fields are used in this program
299
0
    pub fn is_compact(&self) -> bool {
300
0
        self.compact
301
0
    }
302
303
    // fn dump(&mut self, doc: &Yaml) -> EmitResult {
304
    //     // write DocumentStart
305
    //     writeln!(self.writer, "---")?;
306
    //     self.level = -1;
307
    //     self.emit_node(doc)
308
    // }
309
310
15.0k
    fn write_indent(&mut self) -> EmitResult {
311
15.0k
        if self.level <= 0 {
312
15.0k
            return Ok(());
313
0
        }
314
0
        for _ in 0..self.level {
315
0
            for _ in 0..self.best_indent {
316
0
                write!(self.writer, " ")?;
317
            }
318
        }
319
0
        Ok(())
320
15.0k
    }
321
322
2.45M
    fn emit_node(&mut self, node: &Yaml) -> EmitResult {
323
2.45M
        match *node {
324
5.02k
            Yaml::Array(ref v) => self.emit_array(v),
325
0
            Yaml::Hash(ref h) => self.emit_hash(h),
326
2.45M
            Yaml::String(ref v) => {
327
2.45M
                if need_quotes(v) {
328
1.12M
                    escape_str(self.writer, v)
?0
;
329
                } else {
330
1.32M
                    write!(self.writer, "{v}")
?0
;
331
                }
332
2.45M
                Ok(())
333
            }
334
0
            Yaml::Boolean(v) => {
335
0
                if v {
336
0
                    self.writer.write_str("true")?;
337
                } else {
338
0
                    self.writer.write_str("false")?;
339
                }
340
0
                Ok(())
341
            }
342
0
            Yaml::Integer(v) => {
343
0
                write!(self.writer, "{v}")?;
344
0
                Ok(())
345
            }
346
0
            Yaml::Real(ref v) => {
347
0
                write!(self.writer, "{v}")?;
348
0
                Ok(())
349
            }
350
            Yaml::Null | Yaml::BadValue => {
351
0
                write!(self.writer, "~")?;
352
0
                Ok(())
353
            }
354
            // XXX(chenyh) Alias
355
0
            _ => Ok(()),
356
        }
357
2.45M
    }
358
359
5.02k
    fn emit_array(&mut self, v: &[Yaml]) -> EmitResult {
360
5.02k
        if v.is_empty() {
361
0
            write!(self.writer, "[]")?;
362
5.02k
        } else if v.len() == 1 && 
!is_complex(&v[0])1
{
363
            // changed -- for arrays that have only one simple element, make them more compact by using [...] notation
364
1
            write!(self.writer, "[")
?0
;
365
1
            self.emit_val(true, &v[0])
?0
;
366
1
            write!(self.writer, "]")
?0
;
367
        } else {
368
5.02k
            self.level += 1;
369
            
370
20.0k
            for (cnt, x) in 
v5.02k
.
iter5.02k
().
enumerate5.02k
() {
371
20.0k
                if cnt > 0 {
372
15.0k
                    writeln!(self.writer)
?0
;
373
15.0k
                    self.write_indent()
?0
;
374
5.02k
                }
375
20.0k
                write!(self.writer, "- ")
?0
;
376
20.0k
                self.emit_val(true, x)
?0
;
377
            }
378
5.02k
            self.level -= 1;
379
        }
380
5.02k
        return Ok(());
381
5.02k
    }
382
383
20.0k
    fn emit_hash(&mut self, h: &Hash) -> EmitResult {
384
20.0k
        if h.is_empty() {
385
0
            self.writer.write_str("{}")?;
386
        } else {
387
          // changed -- for hashmaps that have only one simple element, make them more compact by using {...}} notation
388
20.0k
            self.level += 1;
389
20.0k
            for (cnt, (k, v)) in h.iter().enumerate() {
390
                // changed: use new function is_scalar()
391
                // let complex_key = match *k {
392
                //     Yaml::Hash(_) | Yaml::Array(_) => true,
393
                //     _ => false,
394
                // };
395
20.0k
                if cnt > 0 {
396
0
                    writeln!(self.writer)?;
397
0
                    self.write_indent()?;
398
20.0k
                }
399
20.0k
                if !is_scalar(k) {
400
0
                    write!(self.writer, "? ")?;
401
0
                    self.emit_val(true, k)?;
402
0
                    writeln!(self.writer)?;
403
0
                    self.write_indent()?;
404
0
                    write!(self.writer, ": ")?;
405
0
                    self.emit_val(true, v)?;
406
                } else {
407
20.0k
                    self.emit_node(k)
?0
;
408
20.0k
                    write!(self.writer, ": ")
?0
;
409
410
                    // changed to use braces in some cases
411
20.0k
                    let complex_value = is_complex(v);
412
20.0k
                    if !complex_value && v.as_hash().is_some() {
413
0
                        write!(self.writer, "{{")?;
414
20.0k
                    }
415
                    // changed to use complex_value from 'false'
416
20.0k
                    self.emit_val(!complex_value, v)
?0
;
417
20.0k
                    if !complex_value && v.as_hash().is_some() {
418
0
                        write!(self.writer, "}}")?;
419
20.0k
                    }
420
                }
421
            }
422
20.0k
            self.level -= 1;
423
        }   
424
20.0k
        Ok(())
425
20.0k
    }
426
427
    /// Emit a yaml as a hash or array value: i.e., which should appear
428
    /// following a ":" or "-", either after a space, or on a new line.
429
    /// If `inline` is true, then the preceding characters are distinct
430
    /// and short enough to respect the compact flag.
431
    // changed: use to always emit ' ' for inline -- that is now handled elsewhere
432
40.1k
    fn emit_val(&mut self, inline: bool, val: &Yaml) -> EmitResult {
433
40.1k
        match *val {
434
0
            Yaml::Array(ref v) => {
435
0
                if !((inline && self.compact) || v.is_empty()) {
436
0
                    writeln!(self.writer)?;
437
0
                    self.level += 1;
438
0
                    self.write_indent()?;
439
0
                    self.level -= 1;
440
0
                }
441
0
                self.emit_array(v)
442
            }
443
20.0k
            Yaml::Hash(ref h) => {
444
20.0k
                if !((inline && self.compact) || 
h0
.
is_empty0
()) {
445
0
                    writeln!(self.writer)?;
446
0
                    self.level += 1;
447
0
                    self.write_indent()?;
448
0
                    self.level -= 1;
449
20.0k
                }
450
20.0k
                self.emit_hash(h)
451
            }
452
            _ => {
453
           //     write!(self.writer, " ")?;
454
20.0k
                self.emit_node(val)
455
            }
456
        }
457
40.1k
    }
458
}
459
460
/// Check if the string requires quoting.
461
/// Strings starting with any of the following characters must be quoted.
462
/// :, &, *, ?, |, -, <, >, =, !, %, @
463
/// Strings containing any of the following characters must be quoted.
464
/// {, }, [, ], ,, #, `
465
///
466
/// If the string contains any of the following control characters, it must be escaped with double quotes:
467
/// \0, \x01, \x02, \x03, \x04, \x05, \x06, \a, \b, \t, \n, \v, \f, \r, \x0e, \x0f, \x10, \x11, \x12, \x13, \x14, \x15, \x16, \x17, \x18, \x19, \x1a, \e, \x1c, \x1d, \x1e, \x1f, \N, \_, \L, \P
468
///
469
/// Finally, there are other cases when the strings must be quoted, no matter if you're using single or double quotes:
470
/// * When the string is true or false (otherwise, it would be treated as a boolean value);
471
/// * When the string is null or ~ (otherwise, it would be considered as a null value);
472
/// * When the string looks like a number, such as integers (e.g. 2, 14, etc.), floats (e.g. 2.6, 14.9) and exponential numbers (e.g. 12e7, etc.) (otherwise, it would be treated as a numeric value);
473
/// * When the string looks like a date (e.g. 2014-12-31) (otherwise it would be automatically converted into a Unix timestamp).
474
2.45M
fn need_quotes(string: &str) -> bool {
475
2.45M
    fn need_quotes_spaces(string: &str) -> bool {
476
2.45M
        string.starts_with(' ') || 
string2.45M
.
ends_with2.45M
(' ')
477
2.45M
    }
478
479
2.45M
    string.is_empty()
480
2.45M
        || need_quotes_spaces(string)
481
2.45M
        || string.starts_with(['&', '*', '?', '|', '-', '<', '>', '=', '!', '%', '@'])
482
19.8M
        || 
string2.26M
.
contains2.26M
(|character: char| matches!(character,
483
            ':'
484
            | '{'
485
            | '}'
486
            | '['
487
            | ']'
488
            | ','
489
            | '#'
490
            | '`'
491
            | '\"'
492
            | '\''
493
            | '\\'
494
18.9M
            | '\0'..='\x06'
495
            | '\t'
496
            | '\n'
497
            | '\r'
498
18.9M
            | '\x0e'..='\x1a'
499
18.9M
            | '\x1c'..='\x1f') )
500
1.32M
        || [
501
1.32M
            // http://yaml.org/type/bool.html
502
1.32M
            // Note: 'y', 'Y', 'n', 'N', is not quoted deliberately, as in libyaml. PyYAML also parse
503
1.32M
            // them as string, not booleans, although it is violating the YAML 1.1 specification.
504
1.32M
            // See https://github.com/dtolnay/serde-yaml/pull/83#discussion_r152628088.
505
1.32M
            "yes", "Yes", "YES", "no", "No", "NO", "True", "TRUE", "true", "False", "FALSE",
506
1.32M
            "false", "on", "On", "ON", "off", "Off", "OFF",
507
1.32M
            // http://yaml.org/type/null.html
508
1.32M
            "null", "Null", "NULL", "~",
509
1.32M
        ]
510
1.32M
        .contains(&string)
511
1.32M
        || string.starts_with('.')
512
1.32M
        || string.starts_with("0x")
513
1.32M
        || string.parse::<i64>().is_ok()
514
1.32M
        || string.parse::<f64>().is_ok()
515
2.45M
}
516
517
#[cfg(test)]
518
mod tests {
519
    use super::*;
520
    use sxd_document::dom::{ChildOfElement, ChildOfRoot};
521
    use sxd_document::parser;
522
523
    /// helper function
524
10
    fn first_element(package: &sxd_document::Package) -> Element<'_> {
525
10
        let doc = package.as_document();
526
10
        for child in doc.root().children() {
527
10
            if let ChildOfRoot::Element(e) = child {
528
10
                return e;
529
0
            }
530
        }
531
0
        panic!("No root element found");
532
10
    }
533
534
    #[test]
535
    /// Escapes XML entities and invisible characters for safe display.
536
    /// Tests the method on a few hardcoded characters.
537
1
    fn handle_special_chars_escapes() {
538
1
        let input = "& < > \" ' \u{2061} \u{2062} \u{2063} \u{2064} x";
539
1
        let expected = "&amp; &lt; &gt; &quot; &apos; &#x2061; &#x2062; &#x2063; &#x2064; x";
540
1
        assert_eq!(handle_special_chars(input), expected);
541
1
    }
542
543
    #[test]
544
    /// Formats a leaf element as a single line with escaped text.
545
1
    fn format_element_leaf_text() {
546
1
        let package = parser::parse("<math><mi>&amp;</mi></math>").unwrap();
547
1
        let math = first_element(&package);
548
1
        let mi = math
549
1
            .children()
550
1
            .iter()
551
1
            .find_map(|c| match c {
552
1
                ChildOfElement::Element(e) => Some(*e),
553
0
                _ => None,
554
1
            })
555
1
            .unwrap();
556
1
        assert_eq!(format_element(mi, 0), " <mi>&amp;</mi>\n");
557
1
    }
558
559
    #[test]
560
    /// Formats a nested element with indentation and newlines.
561
1
    fn format_element_nested() {
562
1
        let package = parser::parse("<math><mi>x</mi><mo>+</mo></math>").unwrap();
563
1
        let math = first_element(&package);
564
1
        let rendered = format_element(math, 0);
565
1
        assert!(rendered.starts_with(" <math>\n"));
566
1
        assert!(rendered.contains("\n  <mi>x</mi>\n"));
567
1
        assert!(rendered.contains("\n  <mo>+</mo>\n"));
568
1
        assert!(rendered.ends_with("</math>\n"));
569
1
    }
570
571
    #[test]
572
    /// Escapes special characters in attribute values.
573
1
    fn format_attrs_escapes() {
574
1
        let package = parser::parse("<math a=\"&amp;\" b=\"&lt;\"></math>").unwrap();
575
1
        let math = first_element(&package);
576
1
        let rendered = format_attrs(&math.attributes());
577
1
        assert!(rendered.contains(" a='&amp;'"));
578
1
        assert!(rendered.contains(" b='&lt;'"));
579
1
    }
580
581
    #[test]
582
    /// Preserves non-BMP characters from a literal XML form.
583
1
    fn format_element_non_bmp_character_literal() {
584
1
        let package = parser::parse("<math><mi>𝞪</mi></math>").unwrap();
585
1
        let math = first_element(&package);
586
1
        let mi = math
587
1
            .children()
588
1
            .iter()
589
1
            .find_map(|c| match c {
590
1
                ChildOfElement::Element(e) => Some(*e),
591
0
                _ => None,
592
1
            })
593
1
            .unwrap();
594
1
        let rendered = format_element(mi, 0);
595
1
        assert!(rendered.contains("𝞪"));
596
1
    }
597
598
    #[test]
599
    /// Preserves non-BMP characters from a numeric XML form.
600
1
    fn format_element_non_bmp_character_numeric() {
601
1
        let package = parser::parse("<math><mi>&#x1d7aa;</mi></math>").unwrap();
602
1
        let math = first_element(&package);
603
1
        let mi = math
604
1
            .children()
605
1
            .iter()
606
1
            .find_map(|c| match c {
607
1
                ChildOfElement::Element(e) => Some(*e),
608
0
                _ => None,
609
1
            })
610
1
            .unwrap();
611
1
        let rendered = format_element(mi, 0);
612
1
        assert!(rendered.contains("𝞪"));
613
1
    }
614
615
    #[test]
616
    /// Evaluates non-BMP literal text through sxd_xpath.
617
1
    fn xpath_non_bmp_literal() {
618
        use sxd_xpath::{Factory, Value};
619
620
1
        let package = parser::parse("<math><mi>𝞪</mi></math>").unwrap();
621
1
        let xpath = Factory::new().build("string(/math/mi)").unwrap().unwrap();
622
1
        let context = sxd_xpath::Context::new();
623
624
1
        let value = xpath.evaluate(&context, first_element(&package)).unwrap();
625
1
        match value {
626
1
            Value::String(s) => assert_eq!(s, "𝞪"),
627
0
            _ => panic!("Expected string value from xpath"),
628
        }
629
1
    }
630
631
    #[test]
632
    /// Evaluates non-BMP numeric text through sxd_xpath.
633
1
    fn xpath_non_bmp_numeric() {
634
        use sxd_xpath::{Factory, Value};
635
636
1
        let package = parser::parse("<math><mi>&#x1d7aa;</mi></math>").unwrap();
637
1
        let xpath = Factory::new().build("string(/math/mi)").unwrap().unwrap();
638
1
        let context = sxd_xpath::Context::new();
639
640
1
        let value = xpath.evaluate(&context, first_element(&package)).unwrap();
641
1
        match value {
642
1
            Value::String(s) => assert_eq!(s, "𝞪"),
643
0
            _ => panic!("Expected string value from xpath"),
644
        }
645
1
    }
646
647
    #[test]
648
    /// Evaluates non-BMP literal text with a MathML namespace-qualified XPath.
649
1
    fn xpath_non_bmp_namespace_literal() {
650
        use sxd_xpath::{Factory, Value};
651
652
1
        let xml = "<math xmlns=\"http://www.w3.org/1998/Math/MathML\"><mi>𝞪</mi></math>";
653
1
        let package = parser::parse(xml).unwrap();
654
1
        let xpath = Factory::new()
655
1
            .build("string(/m:math/m:mi)")
656
1
            .unwrap()
657
1
            .unwrap();
658
1
        let mut context = sxd_xpath::Context::new();
659
1
        context.set_namespace("m", "http://www.w3.org/1998/Math/MathML");
660
661
1
        let value = xpath.evaluate(&context, first_element(&package)).unwrap();
662
1
        match value {
663
1
            Value::String(s) => assert_eq!(s, "𝞪"),
664
0
            _ => panic!("Expected string value from xpath"),
665
        }
666
1
    }
667
668
    #[test]
669
    /// Evaluates non-BMP numeric text with a MathML namespace-qualified XPath.
670
1
    fn xpath_non_bmp_namespace_numeric() {
671
        use sxd_xpath::{Factory, Value};
672
673
1
        let xml = "<math xmlns=\"http://www.w3.org/1998/Math/MathML\"><mi>&#120746;</mi></math>";
674
1
        let package = parser::parse(xml).unwrap();
675
1
        let xpath = Factory::new()
676
1
            .build("string(/m:math/m:mi)")
677
1
            .unwrap()
678
1
            .unwrap();
679
1
        let mut context = sxd_xpath::Context::new();
680
1
        context.set_namespace("m", "http://www.w3.org/1998/Math/MathML");
681
682
1
        let value = xpath.evaluate(&context, first_element(&package)).unwrap();
683
1
        match value {
684
1
            Value::String(s) => assert_eq!(s, "𝞪"),
685
0
            _ => panic!("Expected string value from xpath"),
686
        }
687
1
    }
688
689
    #[test]
690
    /// Extracts a text node via XPath (nodeset result) and verifies the non-BMP character survives.
691
1
    fn xpath_non_bmp_text_nodeset() {
692
        use sxd_xpath::{Factory, Value};
693
694
1
        let xml = "<math xmlns=\"http://www.w3.org/1998/Math/MathML\"><mi>𝞪</mi></math>";
695
1
        let package = parser::parse(xml).unwrap();
696
1
        let xpath = Factory::new().build("/m:math/m:mi/text()").unwrap().unwrap();
697
1
        let mut context = sxd_xpath::Context::new();
698
1
        context.set_namespace("m", "http://www.w3.org/1998/Math/MathML");
699
700
1
        let value = xpath.evaluate(&context, first_element(&package)).unwrap();
701
1
        match value {
702
1
            Value::Nodeset(nodes) => {
703
1
                let ordered = nodes.document_order();
704
1
                let node = ordered.first().expect("Expected one text node");
705
1
                let text = node.text().expect("Expected text node");
706
1
                assert_eq!(text.text(), "𝞪");
707
1
                assert_eq!(ordered.len(), 1);
708
            }
709
0
            _ => panic!("Expected nodeset value from xpath"),
710
        }
711
1
    }
712
}
\ No newline at end of file diff --git a/docs/llvm-cov/html/coverage/home/runner/work/MathCAT/MathCAT/src/shim_filesystem.rs.html b/docs/llvm-cov/html/coverage/home/runner/work/MathCAT/MathCAT/src/shim_filesystem.rs.html index 20b8c18b..c23931c0 100644 --- a/docs/llvm-cov/html/coverage/home/runner/work/MathCAT/MathCAT/src/shim_filesystem.rs.html +++ b/docs/llvm-cov/html/coverage/home/runner/work/MathCAT/MathCAT/src/shim_filesystem.rs.html @@ -1 +1 @@ -

Coverage Report

Created: 2026-04-30 05:51

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/home/runner/work/MathCAT/MathCAT/src/shim_filesystem.rs
Line
Count
Source
1
#![allow(clippy::needless_return)]
2
//! This is used to paste over normal reading of the Rules files and building them into the code for web assembly (WASM) which
3
//! can't do file system access. For the latter, the Rules directory is zipped up.
4
5
use std::path::{Path, PathBuf};
6
use crate::errors::*;
7
use cfg_if::cfg_if;
8
9
#[allow(unused_imports)]
10
use log::{debug};
11
12
13
// The zipped files are needed by WASM builds.
14
// However, they are also useful for other builds because there really isn't another good way to get at the rules.
15
// Other build scripts can extract these files and unzip to their needed locations.
16
// I'm not thrilled with this solution as it seems hacky, but I don't know another way for crates to allow for each access to data.
17
cfg_if! {
18
    if #[cfg(any(target_family = "wasm", feature = "include-zip"))] {
19
        // For the include-zip builds, we build a fake file system based on ZIPPED_RULE_FILES.
20
        // That stream encodes other zip files that must be unzipped.
21
        // Only one level of embedded zip files is supported.
22
        use zip::ZipArchive;
23
        pub static ZIPPED_RULE_FILES: &[u8] = include_bytes!(concat!(env!("OUT_DIR"),"/rules.zip"));
24
25
        /// Struct to indicate where a file is located in the zip archive(s)
26
        #[derive(Debug, Copy, Clone)]
27
        struct ArchivePath {
28
            main: usize,                // index into ZIPPED_RULE_FILES
29
            zipped: Option<usize>,      // if Some, index into embedded zip file, None if top-level zip file
30
        }
31
32
        use std::cell::RefCell;
33
        use std::io::Cursor;
34
        use std::io::Read;
35
        use std::collections::{HashMap, HashSet};
36
        thread_local! {
37
            // mapping the file names to whether they are a directory or a file
38
            // Note: these are always stored with "/" as the path separator
39
            static DIRECTORIES: RefCell<HashSet<String>> = RefCell::new(HashSet::with_capacity(127));
40
            // if a file, we note whether it is in ZIPPED_RULE_FILES or the index of a zipped file within ZIPPED_RULE_FILES
41
            static FILES: RefCell<HashMap<String, ArchivePath>> = RefCell::new(HashMap::with_capacity(1023));
42
        }
43
44
        /// Canonicalize path separators to "/"
45
        fn canonicalize_path_separators(path: &Path) -> String {
46
            return path.to_str().unwrap_or_default().replace("\\", "/");
47
        }
48
        
49
        /// Return a zip archive given the zip bytes
50
        fn get_zip_archive(zip_bytes: &[u8]) -> Result<ZipArchive<Cursor<&[u8]>>> {
51
            let buf_reader = Cursor::new(zip_bytes);
52
            let archive = match zip::ZipArchive::new(buf_reader) {
53
                Err(e) => bail!("get_zip_archive: failed to create ZipArchive: {}", e),
54
                Ok(archive) => archive,
55
            };
56
            return Ok(archive);
57
        }
58
59
        /// Read ZIPPED_RULE_FILES and build up the FILES and DIRECTORIES static variables.
60
        /// This is called lazily when the first file or directory check is done.
61
        fn initialize_static_vars() -> Result<()> {
62
            let mut archive = get_zip_archive(ZIPPED_RULE_FILES)?;
63
            read_zip_file("", &mut archive, None)?;
64
65
            // Because of Rust's borrow checker, we can't recursively unzip contained zip files (FILES, etc., are borrowed mut)
66
            // Here we gather up the zip files that were found and iterate over them non-recursively.
67
            // Note: there shouldn't be embedded zip files in these files (if there are, they won't be unzipped)
68
            let zip_files = FILES.with(|files| files.borrow().iter()
69
                .filter_map(|(name, archive_path)| if name.ends_with(".zip") { Some((name.clone(), *archive_path)) } else { None } )
70
                .collect::<Vec<_>>()
71
            );
72
            // debug!("Found {:?} embedded zip files", zip_files);
73
            for (zip_file_name, archive_path) in zip_files.iter() {
74
                let bytes = get_bytes_from_index(&mut archive, archive_path.main)?;
75
                let mut inner_archive = get_zip_archive(bytes.as_slice())?;
76
                // debug!("  internal zip file {} has {} files", zip_file_name, inner_archive.len());
77
                let new_containing_dir = zip_file_name.rsplit_once("/").map(|(before, _)| before).unwrap_or("");
78
                read_zip_file(new_containing_dir, &mut inner_archive, Some(archive_path.main))?;
79
            }
80
            // FILES.with(|files| {
81
            //     let files = files.borrow();
82
            //     debug!("{} files={:?}", files.len(), files);
83
            // });
84
            return Ok(());
85
        }
86
87
        /// Get the bytes for a file in the zip archive (intended for embedded zip files)
88
        fn get_bytes_from_index(archive: &mut ZipArchive<Cursor<&[u8]>>, index: usize) -> Result<Vec<u8>> {
89
            let mut file = archive.by_index(index)
90
                .map_err(|e| anyhow!(format!("Error getting index={} from zip archive: {}", index, e)) )?;
91
            let mut contents = Vec::new();
92
            file.read_to_end(&mut contents)
93
                .map_err(|e| anyhow!(format!("Error reading index={} from zip archive: {}", index, e)) )?;
94
            return Ok(contents);
95
        }
96
        /// Unzip the zip file (given by zip_archive) and record the file and dir names
97
        /// 'containing_dir' is the rule dir (RulesDir or a subdir) and establishes a full path for unzipped file(s)
98
        /// embedded_zip_file is index into ZIPPED_RULE_FILES if this is an embedded zip file, None if it is the top-level zip file
99
        fn read_zip_file(containing_dir: &str, zip_archive: &mut ZipArchive<Cursor<&[u8]>>, embedded_zip_file: Option<usize>) -> Result<()> {
100
            // debug!("read_zip_file: containing_dir='{}', zip_archive.len()={}", containing_dir, zip_archive.len());
101
            return FILES.with(|files| {
102
                let mut files = files.borrow_mut();
103
                return DIRECTORIES.with(|dirs| {
104
                    let mut dirs = dirs.borrow_mut();
105
                    for i in 0..zip_archive.len() {
106
                        let file = zip_archive.by_index(i).unwrap();
107
                        // A little bit of safety/sanity checking
108
                        let path = match file.enclosed_name() {
109
                            Some(path) => PathBuf::from(containing_dir).join(path),
110
                            None => {
111
                                bail!("Entry {} has a suspicious path (outside of archive)", file.name());
112
                            }
113
                        };
114
                        // debug!("read_zip_file: file path='{}'", path.display());
115
                        // add all the dirs up to the containing dir -- skip the first one as that is a file
116
                        // for files like unicode.yaml, this loop is a no-op, but for files in the Shared folder, it will go one time.
117
                        for parent in path.ancestors().skip(1) {
118
                            if parent.to_str().unwrap_or_default() == containing_dir {
119
                                break;
120
                            }
121
                            dirs.insert(canonicalize_path_separators(parent));
122
                        }
123
                        let file_name = canonicalize_path_separators(&path);
124
                        if file.is_file() {
125
                            let archive_path = match embedded_zip_file {
126
                                None => ArchivePath{ main: i, zipped: None },
127
                                Some(main) => ArchivePath{ main, zipped: Some(i) },
128
                            };
129
                            files.insert(file_name, archive_path);
130
                        } else if file.is_dir() {
131
                            dirs.insert(file_name);
132
                        } else {
133
                            bail!("read_zip_file: {} is neither a file nor a directory", path.display());
134
                        }
135
                    };
136
                    // debug!("{} files={:?}", files.len(), files);
137
                    // debug!("{} dirs={:?}", dirs.len(), dirs);
138
                    return Ok::<(), Error>( () );
139
                });
140
            });
141
        }
142
143
        pub fn is_file_shim(path: &Path) -> bool {
144
            if FILES.with(|files| files.borrow().is_empty()) {
145
                let _ignore_result = initialize_static_vars();
146
            }
147
            return FILES.with(|files| files.borrow().contains_key(&canonicalize_path_separators(path)) );
148
        }
149
        
150
        pub fn is_dir_shim(path: &Path) -> bool {
151
            if FILES.with(|files| files.borrow().is_empty()) {
152
                let _ignore_result = initialize_static_vars();
153
            }
154
            return DIRECTORIES.with(|dirs| dirs.borrow().contains(&canonicalize_path_separators(path)) );
155
        }
156
157
        /// Find files in 'dir' that end with 'ending' (e.g., "_Rules.yaml")
158
        pub fn find_files_in_dir_that_ends_with_shim(dir: &Path, ending: &str) -> Vec<String> {
159
            // FIX: this is very inefficient because it looks through all the files -- maybe dirs should list the files in them?
160
            // look for files that have 'path' as a prefix
161
            return FILES.with(|files| {
162
                let files = files.borrow();
163
                let mut answer = Vec::new();
164
165
                let dir_name = canonicalize_path_separators(dir);
166
                for file_name in files.keys() {
167
                    if let Some(dir_relative_name) = file_name.strip_prefix(&dir_name) &&
168
                       file_name.ends_with(ending)
169
                    {
170
                        // this could be (e.g.) xxx_Rules.yaml or it could be subdir/xxx_Rules.yaml
171
                        let file_name = dir_relative_name.split_once("/").map(|(_, after)| after).unwrap_or(dir_relative_name);
172
                        answer.push(file_name.to_string());
173
                    }
174
                }
175
                // debug!("find_files_in_dir_that_ends_with_shim: in dir '{}' found {:?}", dir.display(), answer);
176
                return answer;
177
            });
178
        }
179
        
180
181
        pub fn find_all_dirs_shim(dir: &Path, found_dirs: &mut Vec<PathBuf> ) {
182
            return DIRECTORIES.with(|dirs| {
183
                let dirs = dirs.borrow();
184
185
                let common_dir_name = canonicalize_path_separators(dir);
186
                for dir_name in dirs.iter() {
187
                    if dir_name.starts_with(&common_dir_name) && !dir_name.contains("SharedRules") {
188
                        found_dirs.push(PathBuf::from(&dir_name));
189
                    };
190
                }
191
            });
192
        }
193
194
        
195
        pub fn canonicalize_shim(path: &Path) -> std::io::Result<PathBuf> {
196
            use std::ffi::OsStr;
197
            let dot_dot = OsStr::new("..");
198
            let mut result = PathBuf::new();
199
            for part in path.iter() {
200
                if dot_dot == part {
201
                    result.pop();
202
                } else {
203
                    result.push(part);
204
                }
205
            }
206
            return Ok(result);
207
        }
208
        
209
        /// Read the file at 'path' and return its contents as a String
210
        pub fn read_to_string_shim(path: &Path) -> Result<String> {
211
            let path = canonicalize_shim(path).unwrap();        // can't fail
212
            let file_name = canonicalize_path_separators(&path);
213
            // Is this the debugging override?
214
            if let Some(contents) = OVERRIDE_FILE_NAME.with(|override_name| {
215
                if file_name == override_name.borrow().as_str() {
216
                    // debug!("override read_to_string_shim: {}",file_name);
217
                    return OVERRIDE_FILE_CONTENTS.with(|contents| return Some(contents.borrow().clone()));
218
                } else {
219
                    return None;
220
                }
221
            }) {
222
                return Ok(contents);
223
            };
224
225
            let file_name = file_name.replace('\\', "/"); // zip files always use forward slash
226
            // top-level zip file or embedded zip file
227
            return FILES.with(|files| {
228
                let files = files.borrow();
229
                let inner_bytes;
230
                let (bytes, index) = match files.get(&file_name) {
231
                    Some(archive_path) => {
232
                        match &archive_path.zipped {
233
                            None => (ZIPPED_RULE_FILES, archive_path.main),
234
                            Some(i) => {
235
                                // debug!("read_to_string_shim: reading embedded zip file {} at index {}", file_name, *i);
236
                                let mut archive = get_zip_archive(ZIPPED_RULE_FILES)?;
237
                                inner_bytes = get_bytes_from_index(&mut archive, archive_path.main)?;  // need to hold temp value
238
                                (inner_bytes.as_slice(), *i)
239
                            }
240
                        }
241
                    },
242
                    None => bail!("read_to_string_shim: didn't find {} in zip archive", file_name),
243
                };
244
                let mut archive = get_zip_archive(bytes)?;
245
                let mut file = match archive.by_index(index) {
246
                    Ok(file) => {
247
                        // debug!("read_to_string_shim: want {}; name of zipped file={:?}", file_name, file.enclosed_name().unwrap());
248
                        file
249
                    },
250
                    Err(..) => {
251
                        bail!("Didn't find {} in zip archive", file_name);
252
                    }
253
                };
254
255
                let mut contents = String::new();
256
                if let Err(e) = file.read_to_string(&mut contents) {
257
                    bail!("read_to_string: {}", e);
258
                }
259
                return Ok(contents);
260
            });
261
        }
262
263
        pub fn zip_extract_shim(dir: &Path, zip_file_name: &str) -> Result<bool> {
264
            let zip_file_path = dir.join(zip_file_name);
265
            let full_zip_file_name = canonicalize_path_separators(&zip_file_path);
266
            match FILES.with(|files| files.borrow().contains_key(full_zip_file_name.as_str()) ) {
267
                true => Ok(true),
268
                false => bail!("zip_extract_shim: didn't find {} in zip archive", full_zip_file_name),
269
            }
270
        }
271
272
        thread_local! {
273
            // For debugging rules files (mainly nav file) via MathCATDemo
274
            static OVERRIDE_FILE_NAME: RefCell<String> = RefCell::new("".to_string());
275
            static OVERRIDE_FILE_CONTENTS: RefCell<String> = RefCell::new("".to_string());
276
        }
277
        pub fn override_file_for_debugging_rules(file_name: &str, file_contents: &str) {
278
            // file_name should be path name starting at Rules dir: e.g, "Rules/en/navigate.yaml"
279
            OVERRIDE_FILE_NAME.with(|name| *name.borrow_mut() = file_name.to_string().replace("/", "\\"));
280
            OVERRIDE_FILE_CONTENTS.with(|contents| *contents.borrow_mut() = file_contents.to_string());
281
            crate::interface::set_rules_dir("Rules").unwrap();       // force reinitialization after the change
282
        }
283
    } else {
284
116k
        pub fn is_file_shim(path: &Path) -> bool {
285
116k
            return path.is_file();
286
116k
        }
287
        
288
135k
        pub fn is_dir_shim(path: &Path) -> bool {
289
135k
            return path.is_dir();
290
135k
        }
291
        
292
12.9k
        pub fn find_files_in_dir_that_ends_with_shim(dir: &Path, ending: &str) ->  Vec<String> {
293
12.9k
            match dir.read_dir() {
294
0
                Err(_) => return vec![],    // empty
295
12.9k
                Ok(read_dir) => {
296
12.9k
                    let mut answer = Vec::new();
297
78.4k
                    for dir_entry in 
read_dir12.9k
.
flatten12.9k
() {
298
78.4k
                        let file_name = dir_entry.file_name();
299
78.4k
                        let file_name = file_name.to_string_lossy().to_string();
300
78.4k
                        if file_name.ends_with(ending) {
301
                            // this could be (e.g.) xxx_Rules.yaml or it could be subdir/xxx_Rules.yaml
302
64.4k
                            let file_name = file_name.split_once(std::path::MAIN_SEPARATOR).map(|(_, after)| after).unwrap_or(&file_name);
303
64.4k
                            answer.push( file_name.to_string() );
304
14.0k
                        }
305
                    }
306
12.9k
                    return answer;
307
                }
308
            }
309
12.9k
        }
310
311
2.90k
        pub fn find_all_dirs_shim(dir: &Path, found_dirs: &mut Vec<PathBuf> ) {
312
            // FIX: this doesn't work for subdirectories that haven't been unzipped yet
313
2.90k
            assert!(dir.is_dir(), "find_all_dirs_shim called with non-directory path: {}", 
dir0
.
display0
());
314
2.90k
            let mut found_rules_file = false;
315
2.90k
            if let Ok(entries) = std::fs::read_dir(dir) {
316
13.0k
                for entry in 
entries2.90k
.
flatten2.90k
() {
317
13.0k
                    let path = entry.path();
318
13.0k
                    if path.is_dir() {
319
                        // skip "SharedRules" directory
320
2.90k
                        if let Some(dir_name) = path.file_name() &&
321
2.90k
                           dir_name.to_str().unwrap_or_default() != "SharedRules" {
322
1.45k
                            find_all_dirs_shim(&path, found_dirs);
323
1.45k
                        
}1.44k
324
                    } else {
325
10.1k
                        let file_name = path.file_name().unwrap_or_default().to_str().unwrap_or_default();
326
10.1k
                        if !found_rules_file &&
327
2.91k
                           (file_name.starts_with("unicode") || 
file_name1.46k
.
starts_with1.46k
("definitions") ||
file_name1.46k
.
ends_with1.46k
("_Rules.yaml") ||
file_name1.45k
.
ends_with1.45k
(".zip")) {
328
1.46k
                            found_dirs.push(path.parent().unwrap().to_path_buf());
329
                            // FIX: hack to get around not unzipping files and having zh/tw not found
330
1.46k
                            if file_name == "zh.zip" {
331
0
                                let tw_dir = path.parent().unwrap().join("tw");
332
0
                                if !found_dirs.contains(&tw_dir) {
333
0
                                    found_dirs.push(tw_dir.to_path_buf());
334
0
                                }
335
1.46k
                            }
336
1.46k
                            found_rules_file = true;
337
8.71k
                        }
338
                    }
339
                }
340
0
            }
341
2.90k
        }
342
        
343
73.1k
        pub fn canonicalize_shim(path: &Path) -> std::io::Result<PathBuf> {
344
73.1k
            return path.canonicalize();
345
73.1k
        }
346
        
347
60.5k
        pub fn read_to_string_shim(path: &Path) -> Result<String> {
348
60.5k
            let path = match path.canonicalize() {
349
60.5k
                Ok(path) => path,
350
0
                Err(e) => bail!("Read error while trying to canonicalize in read_to_string_shim {}: {}", path.display(), e),
351
            };
352
60.5k
            debug!("Reading file '{}'", 
&path.display()0
);
353
60.5k
            match std::fs::read_to_string(&path) {
354
60.5k
                Ok(str) => return Ok(str),
355
0
                Err(e) => bail!("Read error while trying to read {}: {}", &path.display(), e),
356
            }
357
60.5k
        }
358
359
12.6k
        pub fn zip_extract_shim(dir: &Path, zip_file_name: &str) -> Result<bool> {
360
12.6k
            let zip_file = dir.join(zip_file_name);
361
12.6k
            return match std::fs::read(zip_file) {
362
12.6k
                Err(e) => {
363
                    // no zip file? -- maybe started out with all the files unzipped? See if there is a .yaml file
364
12.6k
                    let yaml_files = find_files_in_dir_that_ends_with_shim(dir, ".yaml");
365
12.6k
                    if yaml_files.is_empty() {
366
1.44k
                        bail!("{}", e)
367
                    } else {
368
11.2k
                        Ok(false)
369
                    }
370
                },
371
0
                Ok(contents) => {
372
0
                    let archive = std::io::Cursor::new(contents);
373
0
                    let mut zip_archive = zip::ZipArchive::new(archive).unwrap();
374
0
                    zip_archive.extract(dir).expect("Zip extraction failed");
375
0
                    Ok(true)
376
                },
377
            };
378
12.6k
        }
379
    }
380
}
\ No newline at end of file +

Coverage Report

Created: 2026-05-04 09:37

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/home/runner/work/MathCAT/MathCAT/src/shim_filesystem.rs
Line
Count
Source
1
#![allow(clippy::needless_return)]
2
//! This is used to paste over normal reading of the Rules files and building them into the code for web assembly (WASM) which
3
//! can't do file system access. For the latter, the Rules directory is zipped up.
4
5
use std::path::{Path, PathBuf};
6
use crate::errors::*;
7
use cfg_if::cfg_if;
8
9
#[allow(unused_imports)]
10
use log::{debug};
11
12
13
// The zipped files are needed by WASM builds.
14
// However, they are also useful for other builds because there really isn't another good way to get at the rules.
15
// Other build scripts can extract these files and unzip to their needed locations.
16
// I'm not thrilled with this solution as it seems hacky, but I don't know another way for crates to allow for each access to data.
17
cfg_if! {
18
    if #[cfg(any(target_family = "wasm", feature = "include-zip"))] {
19
        // For the include-zip builds, we build a fake file system based on ZIPPED_RULE_FILES.
20
        // That stream encodes other zip files that must be unzipped.
21
        // Only one level of embedded zip files is supported.
22
        use zip::ZipArchive;
23
        pub static ZIPPED_RULE_FILES: &[u8] = include_bytes!(concat!(env!("OUT_DIR"),"/rules.zip"));
24
25
        /// Struct to indicate where a file is located in the zip archive(s)
26
        #[derive(Debug, Copy, Clone)]
27
        struct ArchivePath {
28
            main: usize,                // index into ZIPPED_RULE_FILES
29
            zipped: Option<usize>,      // if Some, index into embedded zip file, None if top-level zip file
30
        }
31
32
        use std::cell::RefCell;
33
        use std::io::Cursor;
34
        use std::io::Read;
35
        use std::collections::{HashMap, HashSet};
36
        thread_local! {
37
            // mapping the file names to whether they are a directory or a file
38
            // Note: these are always stored with "/" as the path separator
39
            static DIRECTORIES: RefCell<HashSet<String>> = RefCell::new(HashSet::with_capacity(127));
40
            // if a file, we note whether it is in ZIPPED_RULE_FILES or the index of a zipped file within ZIPPED_RULE_FILES
41
            static FILES: RefCell<HashMap<String, ArchivePath>> = RefCell::new(HashMap::with_capacity(1023));
42
        }
43
44
        /// Canonicalize path separators to "/"
45
        fn canonicalize_path_separators(path: &Path) -> String {
46
            return path.to_str().unwrap_or_default().replace("\\", "/");
47
        }
48
        
49
        /// Return a zip archive given the zip bytes
50
        fn get_zip_archive(zip_bytes: &[u8]) -> Result<ZipArchive<Cursor<&[u8]>>> {
51
            let buf_reader = Cursor::new(zip_bytes);
52
            let archive = match zip::ZipArchive::new(buf_reader) {
53
                Err(e) => bail!("get_zip_archive: failed to create ZipArchive: {}", e),
54
                Ok(archive) => archive,
55
            };
56
            return Ok(archive);
57
        }
58
59
        /// Read ZIPPED_RULE_FILES and build up the FILES and DIRECTORIES static variables.
60
        /// This is called lazily when the first file or directory check is done.
61
        fn initialize_static_vars() -> Result<()> {
62
            let mut archive = get_zip_archive(ZIPPED_RULE_FILES)?;
63
            read_zip_file("", &mut archive, None)?;
64
65
            // Because of Rust's borrow checker, we can't recursively unzip contained zip files (FILES, etc., are borrowed mut)
66
            // Here we gather up the zip files that were found and iterate over them non-recursively.
67
            // Note: there shouldn't be embedded zip files in these files (if there are, they won't be unzipped)
68
            let zip_files = FILES.with(|files| files.borrow().iter()
69
                .filter_map(|(name, archive_path)| if name.ends_with(".zip") { Some((name.clone(), *archive_path)) } else { None } )
70
                .collect::<Vec<_>>()
71
            );
72
            // debug!("Found {:?} embedded zip files", zip_files);
73
            for (zip_file_name, archive_path) in zip_files.iter() {
74
                let bytes = get_bytes_from_index(&mut archive, archive_path.main)?;
75
                let mut inner_archive = get_zip_archive(bytes.as_slice())?;
76
                // debug!("  internal zip file {} has {} files", zip_file_name, inner_archive.len());
77
                let new_containing_dir = zip_file_name.rsplit_once("/").map(|(before, _)| before).unwrap_or("");
78
                read_zip_file(new_containing_dir, &mut inner_archive, Some(archive_path.main))?;
79
            }
80
            // FILES.with(|files| {
81
            //     let files = files.borrow();
82
            //     debug!("{} files={:?}", files.len(), files);
83
            // });
84
            return Ok(());
85
        }
86
87
        /// Get the bytes for a file in the zip archive (intended for embedded zip files)
88
        fn get_bytes_from_index(archive: &mut ZipArchive<Cursor<&[u8]>>, index: usize) -> Result<Vec<u8>> {
89
            let mut file = archive.by_index(index)
90
                .map_err(|e| anyhow!(format!("Error getting index={} from zip archive: {}", index, e)) )?;
91
            let mut contents = Vec::new();
92
            file.read_to_end(&mut contents)
93
                .map_err(|e| anyhow!(format!("Error reading index={} from zip archive: {}", index, e)) )?;
94
            return Ok(contents);
95
        }
96
        /// Unzip the zip file (given by zip_archive) and record the file and dir names
97
        /// 'containing_dir' is the rule dir (RulesDir or a subdir) and establishes a full path for unzipped file(s)
98
        /// embedded_zip_file is index into ZIPPED_RULE_FILES if this is an embedded zip file, None if it is the top-level zip file
99
        fn read_zip_file(containing_dir: &str, zip_archive: &mut ZipArchive<Cursor<&[u8]>>, embedded_zip_file: Option<usize>) -> Result<()> {
100
            // debug!("read_zip_file: containing_dir='{}', zip_archive.len()={}", containing_dir, zip_archive.len());
101
            return FILES.with(|files| {
102
                let mut files = files.borrow_mut();
103
                return DIRECTORIES.with(|dirs| {
104
                    let mut dirs = dirs.borrow_mut();
105
                    for i in 0..zip_archive.len() {
106
                        let file = zip_archive.by_index(i).unwrap();
107
                        // A little bit of safety/sanity checking
108
                        let path = match file.enclosed_name() {
109
                            Some(path) => PathBuf::from(containing_dir).join(path),
110
                            None => {
111
                                bail!("Entry {} has a suspicious path (outside of archive)", file.name());
112
                            }
113
                        };
114
                        // debug!("read_zip_file: file path='{}'", path.display());
115
                        // add all the dirs up to the containing dir -- skip the first one as that is a file
116
                        // for files like unicode.yaml, this loop is a no-op, but for files in the Shared folder, it will go one time.
117
                        for parent in path.ancestors().skip(1) {
118
                            if parent.to_str().unwrap_or_default() == containing_dir {
119
                                break;
120
                            }
121
                            dirs.insert(canonicalize_path_separators(parent));
122
                        }
123
                        let file_name = canonicalize_path_separators(&path);
124
                        if file.is_file() {
125
                            let archive_path = match embedded_zip_file {
126
                                None => ArchivePath{ main: i, zipped: None },
127
                                Some(main) => ArchivePath{ main, zipped: Some(i) },
128
                            };
129
                            files.insert(file_name, archive_path);
130
                        } else if file.is_dir() {
131
                            dirs.insert(file_name);
132
                        } else {
133
                            bail!("read_zip_file: {} is neither a file nor a directory", path.display());
134
                        }
135
                    };
136
                    // debug!("{} files={:?}", files.len(), files);
137
                    // debug!("{} dirs={:?}", dirs.len(), dirs);
138
                    return Ok::<(), Error>( () );
139
                });
140
            });
141
        }
142
143
        pub fn is_file_shim(path: &Path) -> bool {
144
            if FILES.with(|files| files.borrow().is_empty()) {
145
                let _ignore_result = initialize_static_vars();
146
            }
147
            return FILES.with(|files| files.borrow().contains_key(&canonicalize_path_separators(path)) );
148
        }
149
        
150
        pub fn is_dir_shim(path: &Path) -> bool {
151
            if FILES.with(|files| files.borrow().is_empty()) {
152
                let _ignore_result = initialize_static_vars();
153
            }
154
            return DIRECTORIES.with(|dirs| dirs.borrow().contains(&canonicalize_path_separators(path)) );
155
        }
156
157
        /// Find files in 'dir' that end with 'ending' (e.g., "_Rules.yaml")
158
        pub fn find_files_in_dir_that_ends_with_shim(dir: &Path, ending: &str) -> Vec<String> {
159
            // FIX: this is very inefficient because it looks through all the files -- maybe dirs should list the files in them?
160
            // look for files that have 'path' as a prefix
161
            return FILES.with(|files| {
162
                let files = files.borrow();
163
                let mut answer = Vec::new();
164
165
                let dir_name = canonicalize_path_separators(dir);
166
                for file_name in files.keys() {
167
                    if let Some(dir_relative_name) = file_name.strip_prefix(&dir_name) &&
168
                       file_name.ends_with(ending)
169
                    {
170
                        // this could be (e.g.) xxx_Rules.yaml or it could be subdir/xxx_Rules.yaml
171
                        let file_name = dir_relative_name.split_once("/").map(|(_, after)| after).unwrap_or(dir_relative_name);
172
                        answer.push(file_name.to_string());
173
                    }
174
                }
175
                // debug!("find_files_in_dir_that_ends_with_shim: in dir '{}' found {:?}", dir.display(), answer);
176
                return answer;
177
            });
178
        }
179
        
180
181
        pub fn find_all_dirs_shim(dir: &Path, found_dirs: &mut Vec<PathBuf> ) {
182
            return DIRECTORIES.with(|dirs| {
183
                let dirs = dirs.borrow();
184
185
                let common_dir_name = canonicalize_path_separators(dir);
186
                for dir_name in dirs.iter() {
187
                    if dir_name.starts_with(&common_dir_name) && !dir_name.contains("SharedRules") {
188
                        found_dirs.push(PathBuf::from(&dir_name));
189
                    };
190
                }
191
            });
192
        }
193
194
        
195
        pub fn canonicalize_shim(path: &Path) -> std::io::Result<PathBuf> {
196
            use std::ffi::OsStr;
197
            let dot_dot = OsStr::new("..");
198
            let mut result = PathBuf::new();
199
            for part in path.iter() {
200
                if dot_dot == part {
201
                    result.pop();
202
                } else {
203
                    result.push(part);
204
                }
205
            }
206
            return Ok(result);
207
        }
208
        
209
        /// Read the file at 'path' and return its contents as a String
210
        pub fn read_to_string_shim(path: &Path) -> Result<String> {
211
            let path = canonicalize_shim(path).unwrap();        // can't fail
212
            let file_name = canonicalize_path_separators(&path);
213
            // Is this the debugging override?
214
            if let Some(contents) = OVERRIDE_FILE_NAME.with(|override_name| {
215
                if file_name == override_name.borrow().as_str() {
216
                    // debug!("override read_to_string_shim: {}",file_name);
217
                    return OVERRIDE_FILE_CONTENTS.with(|contents| return Some(contents.borrow().clone()));
218
                } else {
219
                    return None;
220
                }
221
            }) {
222
                return Ok(contents);
223
            };
224
225
            let file_name = file_name.replace('\\', "/"); // zip files always use forward slash
226
            // top-level zip file or embedded zip file
227
            return FILES.with(|files| {
228
                let files = files.borrow();
229
                let inner_bytes;
230
                let (bytes, index) = match files.get(&file_name) {
231
                    Some(archive_path) => {
232
                        match &archive_path.zipped {
233
                            None => (ZIPPED_RULE_FILES, archive_path.main),
234
                            Some(i) => {
235
                                // debug!("read_to_string_shim: reading embedded zip file {} at index {}", file_name, *i);
236
                                let mut archive = get_zip_archive(ZIPPED_RULE_FILES)?;
237
                                inner_bytes = get_bytes_from_index(&mut archive, archive_path.main)?;  // need to hold temp value
238
                                (inner_bytes.as_slice(), *i)
239
                            }
240
                        }
241
                    },
242
                    None => bail!("read_to_string_shim: didn't find {} in zip archive", file_name),
243
                };
244
                let mut archive = get_zip_archive(bytes)?;
245
                let mut file = match archive.by_index(index) {
246
                    Ok(file) => {
247
                        // debug!("read_to_string_shim: want {}; name of zipped file={:?}", file_name, file.enclosed_name().unwrap());
248
                        file
249
                    },
250
                    Err(..) => {
251
                        bail!("Didn't find {} in zip archive", file_name);
252
                    }
253
                };
254
255
                let mut contents = String::new();
256
                if let Err(e) = file.read_to_string(&mut contents) {
257
                    bail!("read_to_string: {}", e);
258
                }
259
                return Ok(contents);
260
            });
261
        }
262
263
        pub fn zip_extract_shim(dir: &Path, zip_file_name: &str) -> Result<bool> {
264
            let zip_file_path = dir.join(zip_file_name);
265
            let full_zip_file_name = canonicalize_path_separators(&zip_file_path);
266
            match FILES.with(|files| files.borrow().contains_key(full_zip_file_name.as_str()) ) {
267
                true => Ok(true),
268
                false => bail!("zip_extract_shim: didn't find {} in zip archive", full_zip_file_name),
269
            }
270
        }
271
272
        thread_local! {
273
            // For debugging rules files (mainly nav file) via MathCATDemo
274
            static OVERRIDE_FILE_NAME: RefCell<String> = RefCell::new("".to_string());
275
            static OVERRIDE_FILE_CONTENTS: RefCell<String> = RefCell::new("".to_string());
276
        }
277
        pub fn override_file_for_debugging_rules(file_name: &str, file_contents: &str) {
278
            // file_name should be path name starting at Rules dir: e.g, "Rules/en/navigate.yaml"
279
            OVERRIDE_FILE_NAME.with(|name| *name.borrow_mut() = file_name.to_string().replace("/", "\\"));
280
            OVERRIDE_FILE_CONTENTS.with(|contents| *contents.borrow_mut() = file_contents.to_string());
281
            crate::interface::set_rules_dir("Rules").unwrap();       // force reinitialization after the change
282
        }
283
    } else {
284
116k
        pub fn is_file_shim(path: &Path) -> bool {
285
116k
            return path.is_file();
286
116k
        }
287
        
288
135k
        pub fn is_dir_shim(path: &Path) -> bool {
289
135k
            return path.is_dir();
290
135k
        }
291
        
292
12.9k
        pub fn find_files_in_dir_that_ends_with_shim(dir: &Path, ending: &str) ->  Vec<String> {
293
12.9k
            match dir.read_dir() {
294
0
                Err(_) => return vec![],    // empty
295
12.9k
                Ok(read_dir) => {
296
12.9k
                    let mut answer = Vec::new();
297
78.4k
                    for dir_entry in 
read_dir12.9k
.
flatten12.9k
() {
298
78.4k
                        let file_name = dir_entry.file_name();
299
78.4k
                        let file_name = file_name.to_string_lossy().to_string();
300
78.4k
                        if file_name.ends_with(ending) {
301
                            // this could be (e.g.) xxx_Rules.yaml or it could be subdir/xxx_Rules.yaml
302
64.4k
                            let file_name = file_name.split_once(std::path::MAIN_SEPARATOR).map(|(_, after)| after).unwrap_or(&file_name);
303
64.4k
                            answer.push( file_name.to_string() );
304
14.0k
                        }
305
                    }
306
12.9k
                    return answer;
307
                }
308
            }
309
12.9k
        }
310
311
2.90k
        pub fn find_all_dirs_shim(dir: &Path, found_dirs: &mut Vec<PathBuf> ) {
312
            // FIX: this doesn't work for subdirectories that haven't been unzipped yet
313
2.90k
            assert!(dir.is_dir(), "find_all_dirs_shim called with non-directory path: {}", 
dir0
.
display0
());
314
2.90k
            let mut found_rules_file = false;
315
2.90k
            if let Ok(entries) = std::fs::read_dir(dir) {
316
13.0k
                for entry in 
entries2.90k
.
flatten2.90k
() {
317
13.0k
                    let path = entry.path();
318
13.0k
                    if path.is_dir() {
319
                        // skip "SharedRules" directory
320
2.90k
                        if let Some(dir_name) = path.file_name() &&
321
2.90k
                           dir_name.to_str().unwrap_or_default() != "SharedRules" {
322
1.45k
                            find_all_dirs_shim(&path, found_dirs);
323
1.45k
                        
}1.44k
324
                    } else {
325
10.1k
                        let file_name = path.file_name().unwrap_or_default().to_str().unwrap_or_default();
326
10.1k
                        if !found_rules_file &&
327
2.91k
                           (file_name.starts_with("unicode") || 
file_name1.46k
.
starts_with1.46k
("definitions") ||
file_name1.46k
.
ends_with1.46k
("_Rules.yaml") ||
file_name1.45k
.
ends_with1.45k
(".zip")) {
328
1.46k
                            found_dirs.push(path.parent().unwrap().to_path_buf());
329
                            // FIX: hack to get around not unzipping files and having zh/tw not found
330
1.46k
                            if file_name == "zh.zip" {
331
0
                                let tw_dir = path.parent().unwrap().join("tw");
332
0
                                if !found_dirs.contains(&tw_dir) {
333
0
                                    found_dirs.push(tw_dir.to_path_buf());
334
0
                                }
335
1.46k
                            }
336
1.46k
                            found_rules_file = true;
337
8.71k
                        }
338
                    }
339
                }
340
0
            }
341
2.90k
        }
342
        
343
        /// Resolves the path to an absolute, canonical form using the OS.
344
        /// If `canonicalize()` fails (e.g., ACCESS_DENIED in containers), falls back to:
345
        ///   - returning the path as-is if it is already absolute,
346
        ///   - prepending the current working directory if it is relative.
347
        /// Note: the fallback does not resolve symlinks or normalize `..`/`.` segments.
348
73.1k
        pub fn canonicalize_shim(path: &Path) -> std::io::Result<PathBuf> {
349
73.1k
            match path.canonicalize() {
350
73.1k
                Ok(p) => Ok(p),
351
                Err(_) => {
352
0
                    if path.is_absolute() {
353
0
                        Ok(path.to_path_buf())
354
                    } else {
355
                        // Prepend cwd to make the relative path absolute.
356
                        // unwrap_or_default yields an empty PathBuf if cwd is unavailable,
357
                        // in which case the returned path will still be relative.
358
0
                        Ok(std::env::current_dir().unwrap_or_default().join(path))
359
                    }
360
                }
361
            }
362
73.1k
        }
363
        
364
60.5k
        pub fn read_to_string_shim(path: &Path) -> Result<String> {
365
60.5k
            let path = match path.canonicalize() {
366
60.5k
                Ok(path) => path,
367
0
                Err(_) => path.to_path_buf(),
368
            };
369
60.5k
            debug!("Reading file '{}'", 
&path.display()0
);
370
60.5k
            match std::fs::read_to_string(&path) {
371
60.5k
                Ok(str) => return Ok(str),
372
0
                Err(e) => bail!("Read error while trying to read {}: {}", &path.display(), e),
373
            }
374
60.5k
        }
375
376
12.6k
        pub fn zip_extract_shim(dir: &Path, zip_file_name: &str) -> Result<bool> {
377
12.6k
            let zip_file = dir.join(zip_file_name);
378
12.6k
            return match std::fs::read(zip_file) {
379
12.6k
                Err(e) => {
380
                    // no zip file? -- maybe started out with all the files unzipped? See if there is a .yaml file
381
12.6k
                    let yaml_files = find_files_in_dir_that_ends_with_shim(dir, ".yaml");
382
12.6k
                    if yaml_files.is_empty() {
383
1.44k
                        bail!("{}", e)
384
                    } else {
385
11.2k
                        Ok(false)
386
                    }
387
                },
388
0
                Ok(contents) => {
389
0
                    let archive = std::io::Cursor::new(contents);
390
0
                    let mut zip_archive = zip::ZipArchive::new(archive).unwrap();
391
0
                    zip_archive.extract(dir).expect("Zip extraction failed");
392
0
                    Ok(true)
393
                },
394
            };
395
12.6k
        }
396
    }
397
}
\ No newline at end of file diff --git a/docs/llvm-cov/html/coverage/home/runner/work/MathCAT/MathCAT/src/speech.rs.html b/docs/llvm-cov/html/coverage/home/runner/work/MathCAT/MathCAT/src/speech.rs.html index a7b14262..c2be949f 100644 --- a/docs/llvm-cov/html/coverage/home/runner/work/MathCAT/MathCAT/src/speech.rs.html +++ b/docs/llvm-cov/html/coverage/home/runner/work/MathCAT/MathCAT/src/speech.rs.html @@ -1 +1 @@ -

Coverage Report

Created: 2026-04-30 05:51

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/home/runner/work/MathCAT/MathCAT/src/speech.rs
Line
Count
Source
1
//! The speech module is where the speech rules are read in and speech generated.
2
//!
3
//! The speech rules call out to the preferences and tts modules and the dividing line is not always clean.
4
//! A number of useful utility functions used by other modules are defined here.
5
#![allow(clippy::needless_return)]
6
use std::path::PathBuf;
7
use std::collections::HashMap;
8
use std::cell::{RefCell, RefMut};
9
use std::sync::LazyLock;
10
use sxd_document::dom::{ChildOfElement, Document, Element};
11
use sxd_document::{Package, QName};
12
use sxd_xpath::context::Evaluation;
13
use sxd_xpath::{Factory, Value, XPath};
14
use sxd_xpath::nodeset::Node;
15
use std::fmt;
16
use std::time::SystemTime;
17
use crate::definitions::read_definitions_file;
18
use crate::errors::*;
19
use crate::prefs::*;
20
use crate::xpath_functions::is_leaf;
21
use yaml_rust::{YamlLoader, Yaml, yaml::Hash};
22
use crate::tts::*;
23
use crate::infer_intent::*;
24
use crate::pretty_print::{mml_to_string, yaml_to_string};
25
use std::path::Path;
26
use std::rc::Rc;
27
use crate::shim_filesystem::{read_to_string_shim, canonicalize_shim};
28
use crate::canonicalize::{as_element, create_mathml_element, set_mathml_name, name, MATHML_FROM_NAME_ATTR};
29
use regex::Regex;
30
use log::{debug, error, info};
31
32
33
pub const NAV_NODE_SPEECH_NOT_FOUND: &str = "NAV_NODE_NOT_FOUND";
34
35
/// Like lisp's ' (quote foo), this is used to block "replace_chars" being called.
36
///   Unlike lisp, this appended to the end of a string (more efficient)
37
/// At the moment, the only use is BrailleChars(...) -- internally, it calls replace_chars and we don't want it called again.
38
/// Note: an alternative to this hack is to add "xq" (execute but don't eval the result), but that's heavy-handed for the current need
39
const NO_EVAL_QUOTE_CHAR: char = '\u{efff}';            // a private space char
40
const NO_EVAL_QUOTE_CHAR_AS_BYTES: [u8;3] = [0xee,0xbf,0xbf];
41
const N_BYTES_NO_EVAL_QUOTE_CHAR: usize = NO_EVAL_QUOTE_CHAR.len_utf8();
42
43
/// Converts 'string' into a "quoted" string -- use is_quoted_string and unquote_string
44
12.5k
pub fn make_quoted_string(mut string: String) -> String {
45
12.5k
    string.push(NO_EVAL_QUOTE_CHAR);
46
12.5k
    return string;
47
12.5k
}
48
49
/// Checks the string to see if it is "quoted"
50
58.0k
pub fn is_quoted_string(str: &str) -> bool {
51
58.0k
    if str.len() < N_BYTES_NO_EVAL_QUOTE_CHAR {
52
34.1k
        return false;
53
23.9k
    }
54
23.9k
    let bytes = str.as_bytes();
55
23.9k
    return bytes[bytes.len()-N_BYTES_NO_EVAL_QUOTE_CHAR..] == NO_EVAL_QUOTE_CHAR_AS_BYTES;
56
58.0k
}
57
58
/// Converts 'string' into a "quoted" string -- use is_quoted_string and unquote_string
59
/// IMPORTANT: this assumes the string is quoted -- no check is made
60
12.5k
pub fn unquote_string(str: &str) -> &str {
61
12.5k
    return &str[..str.len()-N_BYTES_NO_EVAL_QUOTE_CHAR];
62
12.5k
}
63
64
65
/// The main external call, `intent_from_mathml` returns a string for the speech associated with the `mathml`.
66
///   It matches against the rules that are computed by user prefs such as "Language" and "SpeechStyle".
67
///
68
/// The speech rules assume `mathml` has been "cleaned" via the canonicalization step.
69
///
70
/// If the preferences change (and hence the speech rules to use change), or if the rule file changes,
71
///   `intent_from_mathml` will detect that and (re)load the proper rules.
72
///
73
/// A string is returned in call cases.
74
/// If there is an error, the speech string will indicate an error.
75
3.88k
pub fn intent_from_mathml<'m>(mathml: Element, doc: Document<'m>) -> Result<Element<'m>> {
76
3.88k
    let 
intent_tree3.87k
= intent_rules(&INTENT_RULES, doc, mathml, "")
?9
;
77
3.87k
    doc.root().append_child(intent_tree);
78
3.87k
    return Ok(intent_tree);
79
3.88k
}
80
81
3.96k
pub fn speak_mathml(mathml: Element, nav_node_id: &str, nav_node_offset: usize) -> Result<String> {
82
3.96k
    return speak_rules(&SPEECH_RULES, mathml, nav_node_id, nav_node_offset);
83
3.96k
}
84
85
14
pub fn overview_mathml(mathml: Element, nav_node_id: &str, nav_node_offset: usize) -> Result<String> {
86
14
    return speak_rules(&OVERVIEW_RULES, mathml, nav_node_id, nav_node_offset);
87
14
}
88
89
90
3.88k
fn intent_rules<'m>(rules: &'static std::thread::LocalKey<RefCell<SpeechRules>>, doc: Document<'m>, mathml: Element, nav_node_id: &'m str) -> Result<Element<'m>> {
91
3.88k
    rules.with(|rules| {
92
3.88k
        rules.borrow_mut().read_files()
?0
;
93
3.88k
        let rules = rules.borrow();
94
        // debug!("intent_rules:\n{}", mml_to_string(mathml));
95
3.88k
        let should_set_literal_intent = rules.pref_manager.borrow().pref_to_string("SpeechStyle").as_str() == "LiteralSpeak";
96
3.88k
        let original_intent = mathml.attribute_value("intent");
97
3.88k
        if should_set_literal_intent {
98
10
            if let Some(
intent4
) = original_intent {
99
4
                let intent = if intent.contains('(') {
intent2
.
replace2
('(',
":literal("2
)} else {
intent2
.to_string() + ":literal"};
100
4
                mathml.set_attribute_value("intent", &intent);
101
6
            } else {
102
6
                mathml.set_attribute_value("intent", ":literal");
103
6
            };
104
3.87k
        }
105
3.88k
        let mut rules_with_context = SpeechRulesWithContext::new(&rules, doc, nav_node_id, 0);
106
3.88k
        let 
intent3.87k
= rules_with_context.match_pattern::<Element<'m>>(mathml)
107
3.88k
                    .context("Pattern match/replacement failure!")
?9
;
108
3.87k
        let answer = if name(intent) == "TEMP_NAME" {   // unneeded extra layer
109
0
            assert_eq!(intent.children().len(), 1);
110
0
            as_element(intent.children()[0])
111
        } else {
112
3.87k
            intent
113
        };
114
3.87k
        if should_set_literal_intent {
115
10
            if let Some(
original_intent4
) = original_intent {
116
4
                mathml.set_attribute_value("intent", original_intent);
117
6
            } else {
118
6
                mathml.remove_attribute("intent");
119
6
            }
120
3.86k
        }
121
3.87k
        return Ok(answer);
122
3.88k
    })
123
3.88k
}
124
125
/// Speak the MathML
126
/// If 'nav_node_id' is not an empty string, then the element with that id will have [[...]] around it
127
3.98k
fn speak_rules(rules: &'static std::thread::LocalKey<RefCell<SpeechRules>>, mathml: Element, nav_node_id: &str, nav_node_offset: usize) -> Result<String> {
128
3.98k
    return rules.with(|rules| {
129
3.98k
        rules.borrow_mut().read_files()
?0
;
130
3.98k
        let rules = rules.borrow();
131
        // debug!("speak_rules:\n{}", mml_to_string(mathml));
132
3.98k
        let new_package = Package::new();
133
3.98k
        let mut rules_with_context = SpeechRulesWithContext::new(&rules, new_package.as_document(), nav_node_id, nav_node_offset);
134
3.98k
        let 
speech_string3.98k
= nestable_speak_rules(& mut rules_with_context, mathml)
?1
;
135
3.98k
        return Ok( rules.pref_manager.borrow().get_tts()
136
3.98k
            .merge_pauses(remove_optional_indicators(
137
3.98k
                &speech_string.replace(CONCAT_STRING, "")
138
3.98k
                                    .replace(CONCAT_INDICATOR, "")                            
139
3.98k
                            )
140
3.98k
            .trim_start().trim_end_matches([' ', ',', ';'])) );
141
3.98k
    });
142
143
3.99k
    fn nestable_speak_rules<'c, 's:'c, 'm:'c>(rules_with_context: &mut SpeechRulesWithContext<'c, 's, 'm>, mathml: Element<'c>) -> Result<String> {
144
3.99k
        let mut speech_string = rules_with_context.match_pattern::<String>(mathml)
145
3.99k
                    .context("Pattern match/replacement failure!")
?0
;
146
        // Note: [[...]] is added around a matching child, but if the "id" is on 'mathml', the whole string is used
147
3.99k
        if !rules_with_context.nav_node_id.is_empty() {
148
            // See https://github.com/NSoiffer/MathCAT/issues/174 for why we can just start the speech at the nav node
149
536
            let intent_attr = mathml.attribute_value("data-intent-property").unwrap_or_default();
150
536
            if let Some(
start521
) = speech_string.find("[[") {
151
521
                match speech_string[start+2..].find("]]") {
152
0
                    None => bail!("Internal error: looking for '[[...]]' during navigation -- only found '[[' in '{}'", speech_string),
153
521
                    Some(end) => speech_string = speech_string[start+2..start+2+end].to_string(),
154
                }
155
15
            } else if !intent_attr.contains(":literal:") {
156
                // try again with LiteralSpeak -- some parts might have been elided in other SpeechStyles
157
14
                mathml.set_attribute_value("data-intent-property", (":literal:".to_string() + intent_attr).as_str());
158
14
                let speech = nestable_speak_rules(rules_with_context, mathml);
159
14
                mathml.set_attribute_value("data-intent-property", intent_attr);
160
14
                return speech;
161
            } else {
162
1
                bail!(NAV_NODE_SPEECH_NOT_FOUND); //  NAV_NODE_SPEECH_NOT_FOUND is tested for later
163
            }
164
3.46k
        }
165
3.98k
        return Ok(speech_string);
166
3.99k
    }
167
3.98k
}
168
169
/// Converts its argument to a string that can be used in a debugging message.
170
0
pub fn yaml_to_type(yaml: &Yaml) -> String {
171
0
    return match yaml {
172
0
        Yaml::Real(v)=> format!("real='{v:#}'"),
173
0
        Yaml::Integer(v)=> format!("integer='{v:#}'"),
174
0
        Yaml::String(v)=> format!("string='{v:#}'"),
175
0
        Yaml::Boolean(v)=> format!("boolean='{v:#}'"),
176
0
        Yaml::Array(v)=> match v.len() {
177
0
            0 => "array with no entries".to_string(),
178
0
            1 => format!("array with the entry: {}", yaml_to_type(&v[0])),
179
0
            _ => format!("array with {} entries. First entry: {}", v.len(), yaml_to_type(&v[0])),
180
        }
181
0
        Yaml::Hash(h)=> {
182
0
            let first_pair = 
183
0
                if h.is_empty() {
184
0
                    "no pairs".to_string()
185
                } else {
186
0
                    let (key, val) = h.iter().next().unwrap();
187
0
                    format!("({}, {})", yaml_to_type(key), yaml_to_type(val))
188
                };
189
0
            format!("dictionary with {} pair{}. A pair: {}", h.len(), if h.len()==1 {""} else {"s"}, first_pair)
190
        }
191
0
        Yaml::Alias(_)=> "Alias".to_string(),
192
0
        Yaml::Null=> "Null".to_string(),
193
0
        Yaml::BadValue=> "BadValue".to_string(),       
194
    }
195
0
}
196
197
0
fn yaml_type_err(yaml: &Yaml, str: &str) -> Error {
198
0
    anyhow!("Expected {}, found {}", str, yaml_to_type(yaml))
199
0
}
200
201
// fn yaml_key_err(dict: &Yaml, key: &str, yaml_type: &str) -> String {
202
//     if dict.as_hash().is_none() {
203
//        return format!("Expected dictionary with key '{}', found\n{}", key, yaml_to_string(dict, 1));
204
//     }
205
//     let str = &dict[key];
206
//     if str.is_badvalue() {
207
//         return format!("Did not find '{}' in\n{}", key,  yaml_to_string(dict, 1));
208
//     }
209
//     return format!("Type of '{}' is not a {}.\nIt is a {}. YAML value is\n{}", 
210
//             key, yaml_type, yaml_to_type(str), yaml_to_string(dict, 0));
211
// }
212
213
4.86M
fn find_str<'a>(dict: &'a Yaml, key: &'a str) -> Option<&'a str> {
214
4.86M
    return dict[key].as_str();
215
4.86M
}
216
217
/// Returns the Yaml as a `Hash` or an error if it isn't.
218
175k
pub fn as_hash_checked(value: &Yaml) -> Result<&Hash> {
219
175k
    let result = value.as_hash();
220
175k
    let result = result.ok_or_else(|| 
yaml_type_err0
(
value0
,
"hashmap"0
))
?0
;
221
175k
    return Ok( result );
222
175k
}
223
224
/// Returns the Yaml as a `Vec` or an error if it isn't.
225
11.7k
pub fn as_vec_checked(value: &Yaml) -> Result<&Vec<Yaml>> {
226
11.7k
    let result = value.as_vec();
227
11.7k
    let result = result.ok_or_else(|| 
yaml_type_err0
(
value0
,
"array"0
))
?0
;
228
11.7k
    return Ok( result );
229
11.7k
}
230
231
/// Returns the Yaml as a `&str` or an error if it isn't.
232
8.08M
pub fn as_str_checked(yaml: &Yaml) -> Result<&str> {
233
8.08M
    return yaml.as_str().ok_or_else(|| 
yaml_type_err0
(
yaml0
,
"string"0
));
234
8.08M
}
235
236
237
/// A bit of a hack to concatenate replacements (without a ' ').
238
/// The CONCAT_INDICATOR is added by a "ct:" (instead of 't:') in the speech rules
239
/// and checked for by the tts code.
240
pub const CONCAT_INDICATOR: &str = "\u{F8FE}";
241
242
// This is the pattern that needs to be matched (and deleted)
243
pub const CONCAT_STRING: &str = " \u{F8FE}";
244
245
// a similar hack to potentially delete (repetitive) optional replacements
246
// the OPTIONAL_INDICATOR is added by "ot:" before and after the optional string
247
const OPTIONAL_INDICATOR: &str  = "\u{F8FD}";
248
const OPTIONAL_INDICATOR_LEN: usize = OPTIONAL_INDICATOR.len();
249
250
5.10k
pub fn remove_optional_indicators(str: &str) -> String {
251
5.10k
    return str.replace(OPTIONAL_INDICATOR, "");
252
5.10k
}
253
254
/// Given a string that should be Yaml, it calls `build_fn` with that string.
255
/// The build function/closure should process the Yaml as appropriate and capture any errors and write them to `std_err`.
256
/// The returned value should be a Vector containing the paths of all the files that were included.
257
56.3k
pub fn compile_rule<F>(str: &str, mut build_fn: F) -> Result<Vec<PathBuf>> where
258
56.3k
            F: FnMut(&Yaml) -> Result<Vec<PathBuf>> {
259
56.3k
    let docs = YamlLoader::load_from_str(str);
260
56.3k
    match docs {
261
0
        Err(e) => {
262
0
            bail!("Parse error!!: {}", e);
263
        },
264
56.3k
        Ok(docs) => {
265
56.3k
            if docs.len() != 1 {
266
0
                bail!("Didn't find rules!");
267
56.3k
            }
268
56.3k
            return build_fn(&docs[0]);
269
        }
270
    }
271
56.3k
}
272
273
36.5k
pub fn process_include<F>(current_file: &Path, new_file_name: &str, mut read_new_file: F) -> Result<Vec<PathBuf>>
274
36.5k
                    where F: FnMut(&Path) -> Result<Vec<PathBuf>> {
275
36.5k
    let parent_path = current_file.parent();
276
36.5k
    if parent_path.is_none() {
277
0
        bail!("Internal error: {:?} is not a valid file name", current_file);
278
36.5k
    }
279
36.5k
    let mut new_file = match canonicalize_shim(parent_path.unwrap()) {
280
36.5k
        Ok(path) => path,
281
0
        Err(e) => bail!("process_include: canonicalize failed for {} with message {}", parent_path.unwrap().display(), e),
282
    };
283
284
    // the referenced file might be in a directory that hasn't been zipped up -- find the dir and call the unzip function
285
89.0k
    for unzip_dir in 
new_file.ancestors()36.5k
{
286
89.0k
        if unzip_dir.ends_with("Rules") {
287
36.5k
            break;      // nothing to unzip
288
52.4k
        }
289
52.4k
        if unzip_dir.ends_with("Languages") || 
unzip_dir28.5k
.
ends_with28.5k
("Braille") {
290
            // get the subdir ...Rules/Braille/en/...
291
            // could have ...Rules/Braille/definitions.yaml, so 'next()' doesn't exist in this case, but the file wasn't zipped up
292
26.0k
            if let Some(
subdir24.9k
) = new_file.strip_prefix(unzip_dir).unwrap().iter().next() {
293
24.9k
                let default_lang = if unzip_dir.ends_with("Languages") {
"en"23.9k
} else {
"UEB;"1.06k
};
294
24.9k
                PreferenceManager::unzip_files(unzip_dir, subdir.to_str().unwrap(), Some(default_lang)).unwrap_or_default();
295
1.06k
            }
296
26.4k
        }
297
    }
298
36.5k
    new_file.push(new_file_name);
299
36.5k
    info!("...processing include: {new_file_name}...");
300
36.5k
    let new_file = match crate::shim_filesystem::canonicalize_shim(new_file.as_path()) {
301
36.5k
        Ok(buf) => buf,
302
0
        Err(msg) => bail!("-include: constructed file name '{}' causes error '{}'",
303
0
                                 new_file.to_str().unwrap(), msg),
304
    };
305
306
36.5k
    let mut included_files = read_new_file(new_file.as_path())
?0
;
307
36.5k
    let mut files_read = vec![new_file];
308
36.5k
    files_read.append(&mut included_files);
309
36.5k
    return Ok(files_read);
310
36.5k
}
311
312
/// As the name says, TreeOrString is either a Tree (Element) or a String
313
/// It is used to share code during pattern matching
314
pub trait TreeOrString<'c, 'm:'c, T> {
315
    fn from_element(e: Element<'m>) -> Result<T>;
316
    fn from_string(s: String, doc: Document<'m>) -> Result<T>;
317
    fn replace_tts<'s:'c, 'r>(tts: &TTS, command: &TTSCommandRule, prefs: &PreferenceManager, rules_with_context: &'r mut SpeechRulesWithContext<'c, 's,'m>, mathml: Element<'c>) -> Result<T>;
318
    fn replace<'s:'c, 'r>(ra: &ReplacementArray, rules_with_context: &'r mut SpeechRulesWithContext<'c, 's,'m>, mathml: Element<'c>) -> Result<T>;
319
    fn replace_nodes<'s:'c, 'r>(rules: &'r mut SpeechRulesWithContext<'c, 's,'m>, nodes: Vec<Node<'c>>, mathml: Element<'c>) -> Result<T>;
320
    fn highlight_braille(braille: T, highlight_style: String) -> T;
321
    fn mark_nav_speech(speech: T) -> T;
322
}
323
324
impl<'c, 'm:'c> TreeOrString<'c, 'm, String> for String {
325
0
    fn from_element(_e: Element<'m>) -> Result<String> {
326
0
         bail!("from_element not allowed for strings");
327
0
    }
328
329
180k
    fn from_string(s: String, _doc: Document<'m>) -> Result<String> {
330
180k
        return Ok(s);
331
180k
    }
332
333
60.7k
    fn replace_tts<'s:'c, 'r>(tts: &TTS, command: &TTSCommandRule, prefs: &PreferenceManager, rules_with_context: &'r mut SpeechRulesWithContext<'c, 's,'m>, mathml: Element<'c>) -> Result<String> {
334
60.7k
        return tts.replace_string(command, prefs, rules_with_context, mathml);
335
60.7k
    }
336
337
142k
    fn replace<'s:'c, 'r>(ra: &ReplacementArray, rules_with_context: &'r mut SpeechRulesWithContext<'c, 's,'m>, mathml: Element<'c>) -> Result<String> {
338
142k
        return ra.replace_array_string(rules_with_context, mathml);
339
142k
    }
340
341
72.9k
    fn replace_nodes<'s:'c, 'r>(rules: &'r mut SpeechRulesWithContext<'c, 's,'m>, nodes: Vec<Node<'c>>, mathml: Element<'c>) -> Result<String> {
342
72.9k
        return rules.replace_nodes_string(nodes, mathml);
343
72.9k
    }
344
345
469
    fn highlight_braille(braille: String, highlight_style: String) -> String {
346
469
        return SpeechRulesWithContext::highlight_braille_string(braille, highlight_style);
347
469
    }
348
349
521
    fn mark_nav_speech(speech: String) -> String {
350
521
        return SpeechRulesWithContext::mark_nav_speech(speech);
351
521
    }
352
}
353
354
impl<'c, 'm:'c> TreeOrString<'c, 'm, Element<'m>> for Element<'m> {
355
48.0k
    fn from_element(e: Element<'m>) -> Result<Element<'m>> {
356
48.0k
         return Ok(e);
357
48.0k
    }
358
359
213
    fn from_string(s: String, doc: Document<'m>) -> Result<Element<'m>> {
360
        // FIX: is 'mi' really ok?  Don't want to use TEMP_NAME because this name needs to move to the outside world
361
213
        let leaf = create_mathml_element(&doc, "mi");
362
213
        leaf.set_text(&s);
363
213
        return Ok(leaf);
364
213
}
365
366
0
    fn replace_tts<'s:'c, 'r>(_tts: &TTS, _command: &TTSCommandRule, _prefs: &PreferenceManager, _rules_with_context: &'r mut SpeechRulesWithContext<'c, 's,'m>, _mathml: Element<'c>) -> Result<Element<'m>> {
367
0
        bail!("Internal error: applying a TTS rule to a tree");
368
0
    }
369
370
132k
    fn replace<'s:'c, 'r>(ra: &ReplacementArray, rules_with_context: &'r mut SpeechRulesWithContext<'c, 's,'m>, mathml: Element<'c>) -> Result<Element<'m>> {
371
132k
        return ra.replace_array_tree(rules_with_context, mathml);
372
132k
    }
373
374
48.6k
    fn replace_nodes<'s:'c, 'r>(rules: &'r mut SpeechRulesWithContext<'c, 's,'m>, nodes: Vec<Node<'c>>, mathml: Element<'c>) -> Result<Element<'m>> {
375
48.6k
        return rules.replace_nodes_tree(nodes, mathml);
376
48.6k
    }
377
378
0
    fn highlight_braille(_braille: Element<'c>, _highlight_style: String) -> Element<'m> {
379
0
        panic!("Internal error: highlight_braille called on a tree");
380
    }
381
382
0
    fn mark_nav_speech(_speech: Element<'c>) -> Element<'m> {
383
0
        panic!("Internal error: mark_nav_speech called on a tree");
384
    }
385
}
386
387
/// 'Replacement' is an enum that contains all the potential replacement types/structs
388
/// Hence there are fields 'Test' ("test:"), 'Text" ("t:"), "XPath", etc
389
#[derive(Debug, Clone)]
390
#[allow(clippy::upper_case_acronyms)]
391
enum Replacement {
392
    // Note: all of these are pointer types
393
    Text(String),
394
    XPath(MyXPath),
395
    Intent(Box<Intent>),
396
    Test(Box<TestArray>),
397
    TTS(Box<TTSCommandRule>),
398
    With(Box<With>),
399
    SetVariables(Box<SetVariables>),
400
    Insert(Box<InsertChildren>),
401
    Translate(TranslateExpression),
402
}
403
404
impl fmt::Display for Replacement {
405
10
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
406
10
        return write!(f, "{}",
407
10
            match self {
408
0
                Replacement::Test(c) => c.to_string(),
409
0
                Replacement::Text(t) => format!("t: \"{t}\""),
410
10
                Replacement::XPath(x) => x.to_string(),
411
0
                Replacement::Intent(i) => i.to_string(),
412
0
                Replacement::TTS(t) => t.to_string(),
413
0
                Replacement::With(w) => w.to_string(),
414
0
                Replacement::SetVariables(v) => v.to_string(),
415
0
                Replacement::Insert(ic) => ic.to_string(),
416
0
                Replacement::Translate(x) => x.to_string(),
417
            }
418
        );
419
10
    }
420
}
421
422
impl Replacement {   
423
13.5M
    fn build(replacement: &Yaml) -> Result<Replacement> {
424
        // Replacement -- single key/value (see below for allowed values)
425
13.5M
        let dictionary = replacement.as_hash();
426
13.5M
        if dictionary.is_none() {
427
0
            bail!("  expected a key/value pair. Found {}.",  yaml_to_string(replacement, 0));
428
13.5M
        };
429
13.5M
        let dictionary = dictionary.unwrap();
430
13.5M
        if dictionary.is_empty() { 
431
0
            bail!("No key/value pairs found for key 'replace'.\n\
432
                Suggestion: are the following lines indented properly?");
433
13.5M
        }
434
13.5M
        if dictionary.len() > 1 { 
435
0
            bail!("Should only be one key/value pair for the replacement.\n    \
436
                    Suggestion: are the following lines indented properly?\n    \
437
0
                    The key/value pairs found are\n{}", yaml_to_string(replacement, 2));
438
13.5M
        }
439
440
        // get the single value
441
13.5M
        let (key, value) = dictionary.iter().next().unwrap();
442
13.5M
        let key = key.as_str().ok_or_else(|| 
anyhow!0
("replacement key(e.g, 't') is not a string"))
?0
;
443
13.5M
        match key {
444
13.5M
            "t" | 
"T"10.4M
=> {
445
5.76M
                return Ok( Replacement::Text( as_str_checked(value)
?0
.to_string() ) );
446
            },
447
7.75M
            "ct" | 
"CT"7.72M
=> {
448
24.7k
                return Ok( Replacement::Text( CONCAT_INDICATOR.to_string() + as_str_checked(value)
?0
) );
449
            },
450
7.72M
            "ot" | 
"OT"7.69M
=> {
451
36.2k
                return Ok( Replacement::Text( OPTIONAL_INDICATOR.to_string() + as_str_checked(value)
?0
+ OPTIONAL_INDICATOR ) );
452
            },
453
7.69M
            "x" => {
454
2.27M
                return Ok( Replacement::XPath( MyXPath::build(value)
455
2.27M
                    .context("while trying to evaluate value of 'x:'")
?0
) );
456
            },
457
5.41M
            "pause" | 
"rate"4.58M
|
"pitch"4.58M
|
"volume"4.36M
|
"audio"4.36M
|
"gender"4.13M
|
"voice"4.13M
|
"spell"4.13M
|
"SPELL"3.47M
|
"bookmark"3.18M
|
"pronounce"3.00M
|
"PRONOUNCE"2.99M
=> {
458
2.41M
                return Ok( Replacement::TTS( TTS::build(&key.to_ascii_lowercase(), value)
?0
) );
459
            },
460
2.99M
            "intent" => {
461
283k
                return Ok( Replacement::Intent( Intent::build(value)
?0
) );
462
            },
463
2.71M
            "test" => {
464
2.58M
                return Ok( Replacement::Test( Box::new( TestArray::build(value)
?0
) ) );
465
            },
466
128k
            "with" => {
467
77.5k
                return Ok( Replacement::With( With::build(value)
?0
) );
468
            },
469
51.3k
            "set_variables" => {
470
30.3k
                return Ok( Replacement::SetVariables( SetVariables::build(value)
?0
) );
471
            },
472
21.0k
            "insert" => {
473
20.9k
                return Ok( Replacement::Insert( InsertChildren::build(value)
?0
) );
474
            },
475
102
            "translate" => {
476
102
                return Ok( Replacement::Translate( TranslateExpression::build(value)
477
102
                    .context("while trying to evaluate value of 'speak:'")
?0
) );
478
            },
479
            _ => {
480
0
                bail!("Unknown 'replace' command ({}) with value: {}", key, yaml_to_string(value, 0));
481
            }
482
        }
483
13.5M
    }
484
}
485
486
// structure used when "insert:" is encountered in a rule
487
// the 'replacements' are inserted between each node in the 'xpath'
488
#[derive(Debug, Clone)]
489
struct InsertChildren {
490
    xpath: MyXPath,                     // the replacement nodes
491
    replacements: ReplacementArray,     // what is inserted between each node
492
}
493
494
impl fmt::Display for InsertChildren {
495
0
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
496
0
        return write!(f, "InsertChildren:\n  nodes {}\n  replacements {}", self.xpath, &self.replacements);
497
0
    }
498
}
499
500
impl InsertChildren {
501
20.9k
    fn build(insert: &Yaml) -> Result<Box<InsertChildren>> {
502
        // 'insert:' -- 'nodes': xxx 'replace': xxx
503
20.9k
        if insert.as_hash().is_none() {
504
0
            bail!("")
505
20.9k
        }
506
20.9k
        let nodes = &insert["nodes"];
507
20.9k
        if nodes.is_badvalue() { 
508
0
            bail!("Missing 'nodes' as part of 'insert'.\n    \
509
                  Suggestion: add 'nodes:' or if present, indent so it is contained in 'insert'");
510
20.9k
        }
511
20.9k
        let nodes = as_str_checked(nodes)
?0
;
512
20.9k
        let replace = &insert["replace"];
513
20.9k
        if replace.is_badvalue() { 
514
0
            bail!("Missing 'replace' as part of 'insert'.\n    \
515
                  Suggestion: add 'replace:' or if present, indent so it is contained in 'insert'");
516
20.9k
        }
517
20.9k
        return Ok( Box::new( InsertChildren {
518
20.9k
            xpath: MyXPath::new(nodes.to_string())
?0
,
519
20.9k
            replacements: ReplacementArray::build(replace).context("'replace:'")
?0
,
520
        } ) );
521
20.9k
    }
522
    
523
    // It would be most efficient to do an xpath eval, get the nodes (type: NodeSet) and then intersperse the node_replace()
524
    //   calls with replacements for the ReplacementArray parts. But that causes problems with the "pause: auto" calculation because
525
    //   the replacements are segmented (can't look to neighbors for the calculation there)
526
    // An alternative is to introduce another Replacement enum value, but that's a lot of complication for not that much
527
    //    gain (and Node's have contagious lifetimes)
528
    // The solution adopted is to find out the number of nodes and build up MyXPaths with each node selected (e.g, "*" => "*[3]")
529
    //    and put those nodes into a flat ReplacementArray and then do a standard replace on that.
530
    //    This is slower than the alternatives, but reuses a bunch of code and hence is less complicated.
531
7.45k
    fn replace<'c, 's:'c, 'm: 'c, T:TreeOrString<'c, 'm, T>>(&self, rules_with_context: &mut SpeechRulesWithContext<'c, 's,'m>, mathml: Element<'c>) -> Result<T> {
532
7.45k
        let result = self.xpath.evaluate(&rules_with_context.context_stack.base, mathml)
533
7.45k
                .with_context(||
format!0
("in '{}' replacing after pattern match",
&self.xpath.rc.string0
) )
?0
;
534
7.45k
        match result {
535
7.45k
            Value::Nodeset(nodes) => {
536
7.45k
                if nodes.size() == 0 {
537
0
                    bail!("During replacement, no matching element found");
538
7.45k
                };
539
7.45k
                let nodes = nodes.document_order();
540
7.45k
                let n_nodes = nodes.len();
541
7.45k
                let mut expanded_result = Vec::with_capacity(n_nodes + (n_nodes+1)*self.replacements.replacements.len());
542
7.45k
                expanded_result.push(
543
                    Replacement::XPath(
544
7.45k
                        MyXPath::new(format!("{}[{}]", self.xpath.rc.string , 1))
?0
545
                    )
546
                );
547
19.3k
                for i in 
2..n_nodes+17.45k
{
548
19.3k
                    expanded_result.extend_from_slice(&self.replacements.replacements);
549
19.3k
                    expanded_result.push(
550
                        Replacement::XPath(
551
19.3k
                            MyXPath::new(format!("{}[{}]", self.xpath.rc.string , i))
?0
552
                        )
553
                    );
554
                }
555
7.45k
                let replacements = ReplacementArray{ replacements: expanded_result };
556
7.45k
                return replacements.replace(rules_with_context, mathml);
557
            },
558
559
            // FIX: should the options be errors???
560
0
            Value::String(t) => { return T::from_string(rules_with_context.replace_chars(&t, mathml)?, rules_with_context.doc); },
561
0
            Value::Number(num)  => { return T::from_string( num.to_string(), rules_with_context.doc ); },
562
0
            Value::Boolean(b)  => { return T::from_string( b.to_string(), rules_with_context.doc ); },          // FIX: is this right???
563
        }
564
        
565
7.45k
    }    
566
}
567
568
569
2
static ATTR_NAME_VALUE: LazyLock<Regex> = LazyLock::new(|| {
570
2
    Regex::new(
571
        // match name='value', where name is sort of an NCNAME (see CONCEPT_OR_LITERAL in infer_intent.rs)
572
        // The quotes can be either single or double quotes
573
2
        r#"(?P<name>[^\s\u{0}-\u{40}\[\\\]^`\u{7B}-\u{BF}][^\s\u{0}-\u{2C}/:;<=>?@\[\\\]^`\u{7B}-\u{BF}]*)\s*=\s*('(?P<value>[^']+)'|"(?P<dqvalue>[^"]+)")"#
574
2
    ).unwrap()
575
2
});
576
577
// structure used when "intent:" is encountered in a rule
578
// the name is either a string or an xpath that needs evaluation. 99% of the time it is a string
579
#[derive(Debug, Clone)]
580
struct Intent {
581
    name: Option<String>,           // name of node
582
    xpath: Option<MyXPath>,         // alternative to directly using the string
583
    attrs: String,                  // optional attrs -- format "attr1='val1' [attr2='val2'...]"
584
    children: ReplacementArray,     // children of node
585
}
586
587
impl fmt::Display for Intent {
588
0
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
589
0
        let name = if let Some(name) = &self.name {
590
0
            name.to_string()
591
        } else {
592
0
            self.xpath.as_ref().unwrap().to_string()
593
        };
594
0
        return write!(f, "intent: {}: {},  attrs='{}'>\n      children: {}",
595
0
                        if self.name.is_some() {"name"} else {"xpath-name"}, name,
596
                        self.attrs,
597
0
                        &self.children);
598
0
    }
599
}
600
601
impl Intent {
602
283k
    fn build(yaml_dict: &Yaml) -> Result<Box<Intent>> {
603
        // 'intent:' -- 'name': xxx 'children': xxx
604
283k
        if yaml_dict.as_hash().is_none() {
605
0
            bail!("Array found for contents of 'intent' -- should be dictionary with keys 'name' and 'children'")
606
283k
        }
607
283k
        let name = &yaml_dict["name"];
608
283k
        let xpath_name = &yaml_dict["xpath-name"];
609
283k
        if name.is_badvalue() && 
xpath_name31.5k
.
is_badvalue31.5k
(){
610
0
            bail!("Missing 'name' or 'xpath-name' as part of 'intent'.\n    \
611
                  Suggestion: add 'name:' or if present, indent so it is contained in 'intent'");
612
283k
        }
613
283k
        let attrs = &yaml_dict["attrs"];
614
283k
        let replace = &yaml_dict["children"];
615
283k
        if replace.is_badvalue() {
616
0
            bail!("Missing 'children' as part of 'intent'.\n    \
617
                  Suggestion: add 'children:' or if present, indent so it is contained in 'intent'");
618
283k
        }
619
283k
        return Ok( Box::new( Intent {
620
283k
            name: if name.is_badvalue() {
None31.5k
} else {Some(
as_str_checked252k
(
name252k
).
context252k
("'name'")
?0
.
to_string252k
())},
621
283k
            xpath: if xpath_name.is_badvalue() {
None252k
} else {Some(
MyXPath::build31.5k
(
xpath_name31.5k
).
context31.5k
("'intent'")
?0
)},
622
283k
            attrs: if attrs.is_badvalue() {
""128k
.
to_string128k
()} else {
as_str_checked154k
(
attrs154k
).
context154k
("'attrs'")
?0
.
to_string154k
()},
623
283k
            children: ReplacementArray::build(replace).context("'children:'")
?0
,
624
        } ) );
625
283k
    }
626
        
627
45.5k
    fn replace<'c, 's:'c, 'm: 'c, T:TreeOrString<'c, 'm, T>>(&self, rules_with_context: &mut SpeechRulesWithContext<'c, 's,'m>, mathml: Element<'c>) -> Result<T> {
628
45.5k
        let result = self.children.replace::<Element<'m>>(rules_with_context, mathml)
629
45.5k
                    .context("replacing inside 'intent'")
?0
;
630
45.5k
        let mut result = lift_children(result);
631
45.5k
        if name(result) != "TEMP_NAME" && 
name(result) != "Unknown"3.43k
{
632
235
            // this case happens when you have an 'intent' replacement as a direct child of an 'intent' replacement
633
235
            let temp = create_mathml_element(&result.document(), "TEMP_NAME");
634
235
            temp.append_child(result);
635
235
            result = temp;
636
45.3k
        }
637
45.5k
        if let Some(
intent_name11.2k
) = &self.name {
638
11.2k
            result.set_attribute_value(MATHML_FROM_NAME_ATTR, name(mathml));
639
11.2k
            set_mathml_name(result, intent_name.as_str());
640
34.2k
        }
641
45.5k
        if let Some(
my_xpath34.2k
) = &self.xpath{ // self.xpath_name must be != None
642
34.2k
            let xpath_value = my_xpath.evaluate(rules_with_context.get_context(), mathml)
?0
;
643
34.2k
            match xpath_value {
644
34.2k
                Value::String(intent_name) => {
645
34.2k
                    result.set_attribute_value(MATHML_FROM_NAME_ATTR, name(mathml));
646
34.2k
                    set_mathml_name(result, intent_name.as_str())
647
                },
648
0
                _ => bail!("'xpath-name' value '{}' was not a string", &my_xpath),
649
            }
650
11.2k
        }
651
45.5k
        if self.name.is_none() && 
self.xpath34.2k
.
is_none34.2k
() {
652
0
            bail!("Intent::replace: internal error -- neither 'name' nor 'xpath' is set");
653
45.5k
        };
654
        
655
100k
        for attr in 
mathml45.5k
.
attributes45.5k
() {
656
100k
            result.set_attribute_value(attr.name(), attr.value());
657
100k
        }
658
659
        // can't test against name == "math" because intent might a new element
660
45.5k
        if mathml.parent().is_some() && mathml.parent().unwrap().element().is_some() &&
661
41.7k
           result.attribute_value("id") == crate::canonicalize::get_parent(mathml).attribute_value("id") {
662
32
            // avoid duplicate ids -- it's a bug if it does, but this helps in that case
663
32
            result.remove_attribute("id");
664
45.5k
        }
665
666
45.5k
        if !self.attrs.is_empty() {
667
            // debug!("MathML after children, before attr processing:\n{}", mml_to_string(mathml));
668
            // debug!("Result after children, before attr processing:\n{}", mml_to_string(result));
669
            // debug!("Intent::replace attrs = \"{}\"", &self.attrs);
670
5.63k
            for cap in 
ATTR_NAME_VALUE5.58k
.captures_iter(&self.attrs) {
671
5.63k
                let matched_value = if cap["value"].is_empty() {
&cap["dqvalue"]0
} else {&cap["value"]};
672
5.63k
                let value_as_xpath = MyXPath::new(matched_value.to_string()).context("attr value inside 'intent'")
?0
;
673
5.63k
                let value = value_as_xpath.evaluate(rules_with_context.get_context(), result)
674
5.63k
                        .context("attr xpath evaluation value inside 'intent'")
?0
;
675
5.63k
                let mut value = value.into_string();
676
5.63k
                if &cap["name"] == INTENT_PROPERTY {
677
5.23k
                    value = simplify_fixity_properties(&value);
678
5.23k
                
}397
679
                // debug!("Intent::replace match\n  name={}\n  value={}\n  xpath value={}", &cap["name"], &cap["value"], &value);
680
5.63k
                if &cap["name"] == INTENT_PROPERTY && 
value == ":"5.23k
{
681
1.81k
                    // should have been an empty string, so remove the attribute
682
1.81k
                    result.remove_attribute(INTENT_PROPERTY);
683
3.82k
                } else {
684
3.82k
                    result.set_attribute_value(&cap["name"], &value);
685
3.82k
                }
686
            };
687
39.9k
        }
688
689
        // debug!("Result from 'intent:'\n{}", mml_to_string(result));
690
45.5k
        return T::from_element(result);
691
692
693
        /// "lift" up the children any "TEMP_NAME" child -- could short circuit when only one child
694
45.5k
        fn lift_children(result: Element) -> Element {
695
            // debug!("lift_children:\n{}", mml_to_string(result));
696
            // most likely there will be the same number of new children as result has, but there could be more
697
45.5k
            let mut new_children = Vec::with_capacity(2*result.children().len());
698
69.6k
            for child_of_element in 
result45.5k
.
children45.5k
() {
699
69.6k
                match child_of_element {
700
69.6k
                    ChildOfElement::Element(child) => {
701
69.6k
                        if name(child) == "TEMP_NAME" {
702
34.1k
                            new_children.append(&mut child.children());  // almost always just one
703
35.5k
                        } else {
704
35.5k
                            new_children.push(child_of_element);
705
35.5k
                        }
706
                    },
707
7
                    _ => new_children.push(child_of_element),      // text()
708
                }
709
            }
710
45.5k
            result.replace_children(new_children);
711
45.5k
            return result;
712
45.5k
        }
713
45.5k
    }    
714
}
715
716
// structure used when "with:" is encountered in a rule
717
// the variables are placed on (and later) popped of a variable stack before/after the replacement
718
#[derive(Debug, Clone)]
719
struct With {
720
    variables: VariableDefinitions,     // variables and values
721
    replacements: ReplacementArray,     // what to do with these vars
722
}
723
724
impl fmt::Display for With {
725
0
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
726
0
        return write!(f, "with:\n      variables: {}\n      replace: {}", &self.variables, &self.replacements);
727
0
    }
728
}
729
730
impl With {
731
77.5k
    fn build(vars_replacements: &Yaml) -> Result<Box<With>> {
732
        // 'with:' -- 'variables': xxx 'replace': xxx
733
77.5k
        if vars_replacements.as_hash().is_none() {
734
0
            bail!("Array found for contents of 'with' -- should be dictionary with keys 'variables' and 'replace'")
735
77.5k
        }
736
77.5k
        let var_defs = &vars_replacements["variables"];
737
77.5k
        if var_defs.is_badvalue() { 
738
0
            bail!("Missing 'variables' as part of 'with'.\n    \
739
                  Suggestion: add 'variables:' or if present, indent so it is contained in 'with'");
740
77.5k
        }
741
77.5k
        let replace = &vars_replacements["replace"];
742
77.5k
        if replace.is_badvalue() { 
743
0
            bail!("Missing 'replace' as part of 'with'.\n    \
744
                  Suggestion: add 'replace:' or if present, indent so it is contained in 'with'");
745
77.5k
        }
746
77.5k
        return Ok( Box::new( With {
747
77.5k
            variables: VariableDefinitions::build(var_defs).context("'variables'")
?0
,
748
77.5k
            replacements: ReplacementArray::build(replace).context("'replace:'")
?0
,
749
        } ) );
750
77.5k
    }
751
752
7.28k
    fn replace<'c, 's:'c, 'm: 'c, T:TreeOrString<'c, 'm, T>>(&self, rules_with_context: &mut SpeechRulesWithContext<'c, 's,'m>, mathml: Element<'c>) -> Result<T> {
753
7.28k
        rules_with_context.context_stack.push(self.variables.clone(), mathml)
?0
;
754
7.28k
        let result = self.replacements.replace(rules_with_context, mathml)
755
7.28k
                    .context("replacing inside 'with'")
?0
;
756
7.28k
        rules_with_context.context_stack.pop();
757
7.28k
        return Ok( result );
758
7.28k
    }    
759
}
760
761
// structure used when "set_variables:" is encountered in a rule
762
// the variables are global and are placed in the base context and never popped off
763
#[derive(Debug, Clone)]
764
struct SetVariables {
765
    variables: VariableDefinitions,     // variables and values
766
}
767
768
impl fmt::Display for SetVariables {
769
0
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
770
0
        return write!(f, "SetVariables: variables {}", &self.variables);
771
0
    }
772
}
773
774
impl SetVariables {
775
30.3k
    fn build(vars: &Yaml) -> Result<Box<SetVariables>> {
776
        // 'set_variables:' -- 'variables': xxx (array)
777
30.3k
        if vars.as_vec().is_none() {
778
0
            bail!("'set_variables' -- should be an array of variable name, xpath value");
779
30.3k
        }
780
30.3k
        return Ok( Box::new( SetVariables {
781
30.3k
            variables: VariableDefinitions::build(vars).context("'set_variables'")
?0
782
        } ) );
783
30.3k
    }
784
        
785
3.78k
    fn replace<'c, 's:'c, 'm: 'c, T:TreeOrString<'c, 'm, T>>(&self, rules_with_context: &mut SpeechRulesWithContext<'c, 's,'m>, mathml: Element<'c>) -> Result<T> {
786
3.78k
        rules_with_context.context_stack.set_globals(self.variables.clone(), mathml)
?0
;
787
3.78k
        return T::from_string( "".to_string(), rules_with_context.doc );
788
3.78k
    }    
789
}
790
791
792
/// Allow speech of an expression in the middle of a rule (used by "WhereAmI" for navigation)
793
#[derive(Debug, Clone)]
794
struct TranslateExpression {
795
    xpath: MyXPath,     // variables and values
796
}
797
798
impl fmt::Display for TranslateExpression {
799
0
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
800
0
        return write!(f, "speak: {}", &self.xpath);
801
0
    }
802
}
803
impl TranslateExpression {
804
102
    fn build(vars: &Yaml) -> Result<TranslateExpression> {
805
        // 'translate:' -- xpath (should evaluate to an id)
806
102
        return Ok( TranslateExpression { xpath: MyXPath::build(vars).context("'translate'")
?0
} );
807
102
    }
808
        
809
2
    fn replace<'c, 's:'c, 'm:'c, T:TreeOrString<'c, 'm, T>>(&self, rules_with_context: &mut SpeechRulesWithContext<'c, 's,'m>, mathml: Element<'c>) -> Result<T> {
810
2
        if self.xpath.rc.string.starts_with('@') {
811
2
            let xpath_value = self.xpath.evaluate(rules_with_context.get_context(), mathml)
?0
;
812
2
            let id = match xpath_value {
813
0
                Value::String(s) => Some(s),
814
2
                Value::Nodeset(nodes) => {
815
2
                    if nodes.size() == 1 {
816
2
                        nodes.document_order_first().unwrap().attribute().map(|attr| attr.value().to_string())
817
                    } else {
818
0
                        None
819
                    }
820
                },
821
0
                _ => None,
822
            };
823
2
            match id {
824
0
                None => bail!("'translate' value '{}' is not a string or an attribute value (correct by using '@id'??):\n", self.xpath),
825
2
                Some(id) => {
826
2
                    let speech = speak_mathml(mathml, &id, 0)
?0
;
827
2
                    return T::from_string(speech, rules_with_context.doc);
828
                }
829
            }
830
        } else {
831
0
            return T::from_string(
832
0
                self.xpath.replace(rules_with_context, mathml).context("'translate'")?,
833
0
                rules_with_context.doc
834
            );
835
        }  
836
2
    } 
837
}
838
839
840
/// An array of rule `Replacement`s (text, xpath, tts commands, etc)
841
#[derive(Debug, Clone)]
842
pub struct ReplacementArray {
843
    replacements: Vec<Replacement>
844
}
845
846
impl fmt::Display for ReplacementArray {
847
1
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
848
1
        return write!(f, "{}", self.pretty_print_replacements());
849
1
    }
850
}
851
852
impl ReplacementArray {
853
    /// Return an empty `ReplacementArray`
854
1.98M
    pub fn build_empty() -> ReplacementArray {
855
1.98M
        return ReplacementArray {
856
1.98M
            replacements: vec![]
857
1.98M
        }
858
1.98M
    }
859
860
    /// Convert a Yaml input into a [`ReplacementArray`].
861
    /// Any errors are passed back out.
862
9.24M
    pub fn build(replacements: &Yaml) -> Result<ReplacementArray> {
863
        // replacements is either a single replacement or an array of replacements
864
9.24M
        let result= if replacements.is_array() {
865
9.22M
            let replacements = replacements.as_vec().unwrap();
866
9.22M
            replacements
867
9.22M
                .iter()
868
9.22M
                .enumerate()    // useful for errors
869
13.4M
                .
map9.22M
(|(i, r)| Replacement::build(r)
870
13.4M
                            .with_context(|| 
format!0
("replacement #{} of {}",
i+10
,
replacements0
.
len0
())))
871
9.22M
                .collect::<Result<Vec<Replacement>>>()
?0
872
        } else {
873
21.2k
            vec![ Replacement::build(replacements)
?0
]
874
        };
875
876
9.24M
        return Ok( ReplacementArray{ replacements: result } );
877
9.24M
    }
878
879
    /// Do all the replacements in `mathml` using `rules`.
880
275k
    pub fn replace<'c, 's:'c, 'm:'c, T:TreeOrString<'c, 'm, T>>(&self, rules_with_context: &mut SpeechRulesWithContext<'c, 's,'m>, mathml: Element<'c>) -> Result<T> {
881
275k
        return T::replace(self, rules_with_context, mathml);
882
275k
    }
883
884
142k
    pub fn replace_array_string<'c, 's:'c, 'm:'c>(&self, rules_with_context: &mut SpeechRulesWithContext<'c, 's,'m>, mathml: Element<'c>) -> Result<String> {
885
        // loop over the replacements and build up a vector of strings, excluding empty ones.
886
        // * eliminate any redundance
887
        // * add/replace auto-pauses
888
        // * join the remaining vector together
889
142k
        let mut replacement_strings = Vec::with_capacity(self.replacements.len());   // probably conservative guess
890
271k
        for replacement in 
self.replacements.iter()142k
{
891
271k
            let string: String = rules_with_context.replace(replacement, mathml)
?0
;
892
271k
            if !string.is_empty() {
893
200k
                replacement_strings.push(string);
894
200k
            
}70.3k
895
        }
896
897
142k
        if replacement_strings.is_empty() {
898
12.9k
            return Ok( "".to_string() );
899
129k
        }
900
        // delete an optional text that is repetitive
901
        // we do this by looking for the optional text marker, and if present, check for repetition at end of previous string
902
        // if repetitive, we delete the optional string
903
        // if not, we leave the markers because the repetition might happen several "levels" up
904
        // this could also be done in a final cleanup of the entire string (where we remove any markers),
905
        //   but the match is harder (rust regex lacks look behind pattern match) and it is less efficient
906
        // Note: we skip the first string since it can't be repetitive of something at this level
907
129k
        for 
i45.4k
in 1..replacement_strings.len()-1 {
908
45.4k
            if let Some(
bytes13
) = is_repetitive(&replacement_strings[i-1], &replacement_strings[i]) {
909
13
                replacement_strings[i] = bytes.to_string();
910
45.4k
            } 
911
        }
912
                        
913
200k
        for i in 
0..replacement_strings.len()129k
{
914
200k
            if replacement_strings[i].contains(PAUSE_AUTO_STR) {
915
19.5k
                let before = if i == 0 {
""194
} else {
&replacement_strings[i-1]19.3k
};
916
19.5k
                let after = if i+1 == replacement_strings.len() {
""230
} else {
&replacement_strings[i+1]19.3k
};
917
19.5k
                replacement_strings[i] = replacement_strings[i].replace(
918
19.5k
                    PAUSE_AUTO_STR,
919
19.5k
                    &rules_with_context.speech_rules.pref_manager.borrow().get_tts().compute_auto_pause(&rules_with_context.speech_rules.pref_manager.borrow(), before, after));
920
181k
            }
921
        }
922
923
        // join the strings together with spaces in between
924
        // concatenation (removal of spaces) is saved for the top level because they otherwise are stripped at the wrong sometimes
925
129k
        return Ok( replacement_strings.join(" ") );
926
927
        /// delete an optional text (in 'next') that is repetitive at the end of 'prev'
928
        /// we do this by looking for the optional text marker, and if present, check for repetition at end of previous string
929
        /// if repetitive, we delete the optional string
930
45.4k
        fn is_repetitive<'a>(prev: &str, next: &'a str) -> Option<&'a str> {
931
            // OPTIONAL_INDICATOR optionally surrounds the end of 'prev'(ignoring trailing whitespace)
932
            // OPTIONAL_INDICATOR surrounds the start of 'next'
933
            // minor optimization -- lots of short strings and the OPTIONAL_INDICATOR takes a few bytes, so skip the check for those strings
934
45.4k
            if next.len() <=  2 * OPTIONAL_INDICATOR_LEN {
935
14.2k
                return None;
936
31.2k
            }
937
938
            // should be exactly one match -- ignore more than one for now
939
31.2k
            let 
i_start36
= next.find(OPTIONAL_INDICATOR)
?31.2k
;
940
36
            let start_repeat_word_in_next = &next[i_start + OPTIONAL_INDICATOR_LEN..];
941
36
            let i_end = start_repeat_word_in_next.find(OPTIONAL_INDICATOR)
942
36
                .unwrap_or_else(|| 
panic!0
("Internal error: missing end optional char -- text handling is corrupted!"));
943
36
            let repeat_word = &start_repeat_word_in_next[..i_end];
944
            // debug!("check if '{}' is repetitive, end_index={}", repeat_word, i_end);
945
            // debug!("   prev: '{}', next '{}'", prev, next);
946
947
36
            let prev_trimmed = prev.trim_end();
948
36
            let ends_with_word = prev_trimmed.len() > repeat_word.len() && 
prev_trimmed35
.
ends_with35
(
repeat_word35
);
949
36
            let ends_with_wrapped_word =
950
36
                prev_trimmed
951
36
                    .strip_suffix(OPTIONAL_INDICATOR)
952
36
                    .and_then(|s| 
s0
.
strip_suffix0
(
repeat_word0
))
953
36
                    .and_then(|s| 
s0
.
strip_suffix0
(OPTIONAL_INDICATOR))
954
36
                    .is_some();
955
36
            if ends_with_word || 
ends_with_wrapped_word23
{
956
                // debug!("  is repetitive");
957
13
                Some(start_repeat_word_in_next[i_end + OPTIONAL_INDICATOR_LEN..].trim_start())  // remove repeat word and OPTIONAL_INDICATOR
958
            } else {
959
23
                None
960
            }
961
45.4k
        }
962
142k
    }
963
964
132k
    pub fn replace_array_tree<'c, 's:'c, 'm:'c>(&self, rules_with_context: &mut SpeechRulesWithContext<'c, 's,'m>, mathml: Element<'c>) -> Result<Element<'m>> {
965
        // shortcut for common case (don't build a new tree node)
966
132k
        if self.replacements.len() == 1 {
967
129k
            return rules_with_context.replace::<Element<'m>>(&self.replacements[0], mathml);
968
3.20k
        }
969
970
3.20k
        let new_element = create_mathml_element(&rules_with_context.doc, "Unknown");  // Hopefully set later (in Intent::Replace())
971
3.20k
        let mut new_children = Vec::with_capacity(self.replacements.len());
972
6.12k
        for child in 
self.replacements.iter()3.20k
{
973
6.12k
            let child = rules_with_context.replace::<Element<'m>>(child, mathml)
?0
;
974
6.12k
            new_children.push(ChildOfElement::Element(child));
975
        };
976
3.20k
        new_element.append_children(new_children);
977
3.20k
        return Ok(new_element);
978
132k
    }
979
980
981
    /// Return true if there are no replacements.
982
29.8k
    pub fn is_empty(&self) -> bool {
983
29.8k
        return self.replacements.is_empty();
984
29.8k
    }
985
    
986
10
    fn pretty_print_replacements(&self) -> String {
987
10
        let mut group_string = String::with_capacity(128);
988
10
        if self.replacements.len() == 1 {
989
9
            group_string += &format!("[{}]", self.replacements[0]);
990
9
        } else {
991
1
            group_string += &self.replacements.iter()
992
1
                    .map(|replacement| 
format!0
("\n - {replacement}"))
993
1
                    .collect::<Vec<String>>()
994
1
                    .join("");
995
1
            group_string += "\n";
996
        }
997
10
        return group_string;
998
10
    }
999
}
1000
1001
1002
1003
// MyXPath is a wrapper around an 'XPath' that keeps around the original xpath expr (as a string) so it can be used in error reporting.
1004
// Because we want to be able to clone them and XPath doesn't support clone(), this is a wrapper around an internal MyXPath.
1005
// It supports the standard SpeechRule functionality of building and replacing.
1006
#[derive(Debug)]
1007
struct RCMyXPath {
1008
    xpath: XPath,
1009
    string: String,        // store for error reporting
1010
}
1011
1012
#[derive(Debug, Clone)]
1013
pub struct MyXPath {
1014
    rc: Rc<RCMyXPath>        // rather than putting Rc around both 'xpath' and 'string', just use one and indirect to internal RCMyXPath
1015
}
1016
1017
1018
impl fmt::Display for MyXPath {
1019
2.79k
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1020
2.79k
        return write!(f, "\"{}\"", self.rc.string);
1021
2.79k
    }
1022
}
1023
1024
// pub fn xpath_count() -> (usize, usize) {
1025
//     return (XPATH_CACHE.with( |cache| cache.borrow().len()), unsafe{XPATH_CACHE_HITS} );
1026
// }
1027
thread_local!{
1028
    static XPATH_CACHE: RefCell<HashMap<String, MyXPath>> = RefCell::new( HashMap::with_capacity(2047) );
1029
}
1030
// static mut XPATH_CACHE_HITS: usize = 0;
1031
1032
impl MyXPath {
1033
8.88M
    fn new(xpath: String) -> Result<MyXPath> {
1034
8.88M
        return XPATH_CACHE.with( |cache|  {
1035
8.88M
            let mut cache = cache.borrow_mut();
1036
            return Ok(
1037
8.88M
                match cache.get(&xpath) {
1038
5.82M
                    Some(compiled_xpath) => {
1039
                        // unsafe{ XPATH_CACHE_HITS += 1;};
1040
5.82M
                        compiled_xpath.clone()
1041
                    },
1042
                    None => {
1043
3.06M
                        let new_xpath = MyXPath {
1044
3.06M
                            rc: Rc::new( RCMyXPath {
1045
3.06M
                                xpath: MyXPath::compile_xpath(&xpath)
?0
,
1046
3.06M
                                string: xpath.clone()
1047
                            })};
1048
3.06M
                        cache.insert(xpath.clone(), new_xpath.clone());
1049
3.06M
                        new_xpath
1050
                    },
1051
                }
1052
            )
1053
8.88M
        });
1054
8.88M
    }
1055
1056
8.83M
    pub fn build(xpath: &Yaml) -> Result<MyXPath> {
1057
8.83M
        let xpath = match xpath {
1058
8.64M
            Yaml::String(s) => s.to_string(),
1059
0
            Yaml::Integer(i) => i.to_string(),
1060
0
            Yaml::Real(s) => s.to_string(),
1061
0
            Yaml::Boolean(s) => s.to_string(),
1062
193k
            Yaml::Array(v) =>
1063
                // array of strings -- concatenate them together
1064
193k
                v.iter()
1065
193k
                    .map(as_str_checked)
1066
193k
                    .collect::<Result<Vec<&str>>>()
?0
1067
193k
                    .join(" "),
1068
0
            _ => bail!("Bad value when trying to create an xpath: {}", yaml_to_string(xpath, 1)),
1069
        };
1070
8.83M
        return MyXPath::new(xpath);
1071
8.83M
    }
1072
1073
3.06M
    fn compile_xpath(xpath: &str) -> Result<XPath> {
1074
3.06M
        let factory = Factory::new();
1075
3.06M
        let xpath_with_debug_info = MyXPath::add_debug_string_arg(xpath)
?0
;
1076
3.06M
        let compiled_xpath = factory.build(&xpath_with_debug_info)
1077
3.06M
                        .with_context(|| 
format!0
(
1078
                            "Could not compile XPath for pattern:\n{}{}",
1079
0
                            &xpath, more_details(xpath)))?;
1080
3.06M
        return match compiled_xpath {
1081
3.06M
            Some(xpath) => Ok(xpath),
1082
0
            None => bail!("Problem compiling Xpath for pattern:\n{}{}",
1083
0
                            &xpath, more_details(xpath)),
1084
        };
1085
1086
        
1087
0
        fn more_details(xpath: &str) -> String {
1088
            // try to give a better error message by counting [], (), 's, and "s
1089
0
            if xpath.is_empty() {
1090
0
                return "xpath is empty string".to_string();
1091
0
            }
1092
0
            let as_bytes = xpath.trim().as_bytes();
1093
0
            if as_bytes[0] == b'\'' && as_bytes[as_bytes.len()-1] != b'\'' {
1094
0
                return "\nmissing \"'\"".to_string();
1095
0
            }
1096
0
            if (as_bytes[0] == b'"' && as_bytes[as_bytes.len()-1] != b'"') ||
1097
0
               (as_bytes[0] != b'"' && as_bytes[as_bytes.len()-1] == b'"'){
1098
0
                return "\nmissing '\"'".to_string();
1099
0
            }
1100
1101
0
            let mut i_bytes = 0;      // keep track of # of bytes into string for error reporting
1102
0
            let mut paren_count = 0;    // counter to make sure they are balanced
1103
0
            let mut i_paren = 0;      // position of the outermost open paren
1104
0
            let mut bracket_count = 0;
1105
0
            let mut i_bracket = 0;
1106
0
            for ch in xpath.chars() {
1107
0
                if ch == '(' {
1108
0
                    if paren_count == 0 {
1109
0
                        i_paren = i_bytes;
1110
0
                    }
1111
0
                    paren_count += 1;
1112
0
                } else if ch == '[' {
1113
0
                    if bracket_count == 0 {
1114
0
                        i_bracket = i_bytes;
1115
0
                    }
1116
0
                    bracket_count += 1;
1117
0
                } else if ch == ')' {
1118
0
                    if paren_count == 0 {
1119
0
                        return format!("\nExtra ')' found after '{}'", &xpath[i_paren..i_bytes]);
1120
0
                    }
1121
0
                    paren_count -= 1;
1122
0
                    if paren_count == 0 && bracket_count > 0 && i_bracket > i_paren {
1123
0
                        return format!("\nUnclosed brackets found at '{}'", &xpath[i_paren..i_bytes]);
1124
0
                    }
1125
0
                } else if ch == ']' {
1126
0
                    if bracket_count == 0 {
1127
0
                        return format!("\nExtra ']' found after '{}'", &xpath[i_bracket..i_bytes]);
1128
0
                    }
1129
0
                    bracket_count -= 1;
1130
0
                    if bracket_count == 0 && paren_count > 0 && i_paren > i_bracket {
1131
0
                        return format!("\nUnclosed parens found at '{}'", &xpath[i_bracket..i_bytes]);
1132
0
                    }
1133
0
                }
1134
0
                i_bytes += ch.len_utf8();
1135
            }
1136
0
            return "".to_string();
1137
0
        }
1138
3.06M
    }
1139
1140
    /// Convert DEBUG(...) input to the internal function which is DEBUG(arg, arg_as_string)
1141
3.06M
    fn add_debug_string_arg(xpath: &str) -> Result<String> {
1142
        // do a quick check to see if "DEBUG" is in the string -- this is the common case
1143
3.06M
        let debug_start = xpath.find("DEBUG(");
1144
3.06M
        if debug_start.is_none() {
1145
3.06M
            return Ok( xpath.to_string() );
1146
1.56k
        }
1147
1148
1.56k
        let debug_start = debug_start.unwrap();
1149
1.56k
        let mut before_paren = xpath[..debug_start+5].to_string();   // includes "DEBUG"
1150
1.56k
        let chars = xpath[debug_start+5..].chars().collect::<Vec<char>>();     // begins at '('
1151
1.56k
        before_paren.push_str(&chars_add_debug_string_arg(&chars).with_context(|| 
format!0
("In xpath='{xpath}'"))
?0
);
1152
        // debug!("add_debug_string_arg: {}", before_paren);
1153
1.56k
        return Ok(before_paren);
1154
1155
1.56k
        fn chars_add_debug_string_arg(chars: &[char]) -> Result<String>  {
1156
            // Find all the DEBUG(...) commands in 'xpath' and adds a string argument.
1157
            // The DEBUG function that is used internally takes two arguments, the second one being a string version of the DEBUG arg.
1158
            //   Being a string, any quotes need to be escaped, and DEBUGs inside of DEBUGs need more escaping.
1159
            //   This is done via recursive calls to this function.
1160
1.56k
            assert_eq!(chars[0], '(', "{} does not start with ')'", 
chars0
.
iter0
().
collect0
::<String>());
1161
1.56k
            let mut count = 1;  // open/close count
1162
1.56k
            let mut i = 1;
1163
1.56k
            let mut inside_quote = false;
1164
50.8k
            while i < chars.len() {
1165
50.8k
                let ch = chars[i];
1166
805
                match ch {
1167
                    '\\' => {
1168
0
                        if i+1 == chars.len() {
1169
0
                            bail!("Syntax error in DEBUG: last char is escape char\nDebug string: '{}'", chars.iter().collect::<String>());
1170
0
                        }
1171
0
                        i += 1;
1172
                    },
1173
2.21k
                    '\'' => inside_quote = !inside_quote,
1174
804
                    '(' if !inside_quote => {
1175
804
                        count += 1;
1176
804
                        // FIX: it would be more efficient to spot "DEBUG" preceding this and recurse rather than matching the whole string and recursing
1177
804
                    },
1178
1
                    '(' => (),
1179
2.36k
                    ')' if !inside_quote => {
1180
2.36k
                        count -= 1;
1181
2.36k
                        if count == 0 {
1182
1.56k
                            let arg = &chars[1..i].iter().collect::<String>();
1183
1.56k
                            let escaped_arg = arg.replace('"', "\\\"");
1184
                            // DEBUG(...) may be inside 'arg' -- recurse
1185
1.56k
                            let processed_arg = MyXPath::add_debug_string_arg(arg)
?0
;
1186
1187
                            // DEBUG(...) may be in the remainder of the string -- recurse
1188
1.56k
                            let processed_rest = MyXPath::add_debug_string_arg(&chars[i+1..].iter().collect::<String>())
?0
;
1189
1.56k
                            return Ok( format!("({processed_arg}, \"{escaped_arg}\"){processed_rest}") );
1190
804
                        }
1191
                    },
1192
0
                    ')' => (),
1193
45.4k
                    _ => (),
1194
                }
1195
49.2k
                i += 1;
1196
            }
1197
0
            bail!("Syntax error in DEBUG: didn't find matching closing paren\nDEBUG{}", chars.iter().collect::<String>());
1198
1.56k
        }
1199
3.06M
    }
1200
1201
156k
    fn is_true(&self, context: &sxd_xpath::Context, mathml: Element) -> Result<bool> {
1202
        // return true if there is no condition or if the condition evaluates to true
1203
        return Ok(
1204
156k
            match self.evaluate(context, mathml)
?0
{
1205
115k
                Value::Boolean(b) => b,
1206
40.6k
                Value::Nodeset(nodes) => nodes.size() > 0,
1207
0
                _                      => false,      
1208
            }
1209
        )
1210
156k
    }
1211
1212
153k
    pub fn replace<'c, 's:'c, 'm:'c, T:TreeOrString<'c, 'm, T>>(&self, rules_with_context: &mut SpeechRulesWithContext<'c, 's,'m>, mathml: Element<'c>) -> Result<T> {
1213
153k
        if self.rc.string == "process-intent(.)" {
1214
2.47k
            return 
T::from_element2.46k
( infer_intent(rules_with_context, mathml)
?9
);
1215
150k
        }
1216
        
1217
150k
        let result = self.evaluate(&rules_with_context.context_stack.base, mathml)
1218
150k
                .with_context(|| 
format!0
("in '{}' replacing after pattern match",
&self.rc.string0
) )
?0
;
1219
150k
        let 
string28.9k
= match result {
1220
121k
                Value::Nodeset(nodes) => {
1221
121k
                    if nodes.size() == 0 {
1222
0
                        bail!("During replacement, no matching element found");
1223
121k
                    }
1224
121k
                    return rules_with_context.replace_nodes(nodes.document_order(), mathml);
1225
                },
1226
25.1k
                Value::String(s) => s,
1227
3.80k
                Value::Number(num) => num.to_string(),
1228
0
                Value::Boolean(b) => b.to_string(),          // FIX: is this right???
1229
        };
1230
        // Hack!: this test for input that starts with a '$' (defined variable), avoids a double evaluate;
1231
        // We don't need NO_EVAL_QUOTE_CHAR here, but the more general solution of a quoted execute (- xq:) would avoid this hack
1232
28.9k
        let result = if self.rc.string.starts_with('$') {
string5.63k
} else {
rules_with_context23.3k
.
replace_chars23.3k
(
&string23.3k
,
mathml23.3k
)
?0
};
1233
28.9k
        return T::from_string(result, rules_with_context.doc );
1234
153k
    }
1235
    
1236
1.29M
    pub fn evaluate<'c>(&self, context: &sxd_xpath::Context<'c>, mathml: Element<'c>) -> Result<Value<'c>> {
1237
        // debug!("evaluate: {}", self);
1238
1.29M
        let result = self.rc.xpath.evaluate(context, mathml);
1239
1.29M
        return match result {
1240
1.29M
            Ok(val) => Ok( val ),
1241
0
            Err(e) => {
1242
                // debug!("MyXPath::trying to evaluate:\n  '{}'\n caused the error\n'{}'", self, e.to_string().replace("OwnedPrefixedName { prefix: None, local_part:", "").replace(" }", ""));
1243
0
                bail!( "{}\n\n",
1244
                     // remove confusing parts of error message from xpath
1245
0
                    e.to_string().replace("OwnedPrefixedName { prefix: None, local_part:", "").replace(" }", "") );
1246
            }
1247
        };
1248
1.29M
    }
1249
1250
0
    pub fn test_input<F>(self, f: F) -> bool where F: Fn(&str) -> bool {
1251
0
        return f(self.rc.string.as_ref());
1252
0
    }
1253
}
1254
1255
// 'SpeechPattern' holds a single pattern.
1256
// Some info is not needed beyond converting the Yaml to the SpeechPattern, but is useful for error reporting.
1257
// The two main parts are the pattern to be matched and the replacements to do if there is a match.
1258
// Any variables/prefs that are defined/set are also stored.
1259
#[derive(Debug)]
1260
struct SpeechPattern {
1261
    pattern_name: String,
1262
    tag_name: String,
1263
    file_name: String,
1264
    pattern: MyXPath,                     // the xpath expr to attempt to match
1265
    match_uses_var_defs: bool,            // include var_defs in context for matching
1266
    var_defs: VariableDefinitions,        // any variable definitions [can be and probably is an empty vector most of the time]
1267
    replacements: ReplacementArray,       // the replacements in case there is a match
1268
}
1269
1270
impl fmt::Display for SpeechPattern {
1271
0
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1272
0
        return write!(f, "[name: {}, tag: {},\n  variables: {:?}, pattern: {},\n  replacement: {}]",
1273
                self.pattern_name, self.tag_name, self.var_defs, self.pattern,
1274
0
                self.replacements.pretty_print_replacements());
1275
0
    }
1276
}
1277
1278
impl SpeechPattern  {
1279
894k
    fn build(dict: &Yaml, file: &Path, rules: &mut SpeechRules) -> Result<Option<Vec<PathBuf>>> {
1280
        // Rule::SpeechPattern
1281
        //   build { "pattern_name", "tag_name", "pattern", "replacement" }
1282
        // or recurse via include: file_name
1283
1284
        // debug!("\nbuild_speech_pattern: dict:\n{}", yaml_to_string(dict, 0));
1285
894k
        if let Some(
include_file_name30.2k
) = find_str(dict, "include") {
1286
30.2k
            let do_include_fn = |new_file: &Path| {
1287
30.2k
                rules.read_patterns(new_file)
1288
30.2k
            };
1289
1290
30.2k
            return Ok( Some(process_include(file, include_file_name, do_include_fn)
?0
) );
1291
864k
        }
1292
1293
864k
        let pattern_name = find_str(dict, "name");
1294
1295
        // tag_named can be either a string (most common) or an array of strings
1296
864k
        let mut tag_names: Vec<&str> = Vec::new();
1297
864k
        match find_str(dict, "tag") {
1298
740k
            Some(str) => tag_names.push(str),
1299
            None => {
1300
                // check for array
1301
124k
                let tag_array  = &dict["tag"];
1302
124k
                tag_names = vec![];
1303
124k
                if tag_array.is_array() {
1304
263k
                    for (i, name) in 
tag_array124k
.as_vec().unwrap().iter().
enumerate124k
() {
1305
263k
                        match as_str_checked(name) {
1306
0
                            Err(e) => return Err(
1307
0
                                e.context(
1308
0
                                    format!("tag name '{}' is not a string in:\n{}",
1309
0
                                        &yaml_to_string(&tag_array.as_vec().unwrap()[i], 0),
1310
0
                                        &yaml_to_string(dict, 1)))
1311
0
                            ),
1312
263k
                            Ok(str) => tag_names.push(str),
1313
                        };
1314
                    }
1315
                } else {
1316
0
                    bail!("Errors trying to find 'tag' in:\n{}", &yaml_to_string(dict, 1));
1317
                }
1318
            }
1319
        }
1320
1321
864k
        if pattern_name.is_none() {
1322
0
            if dict.is_null() {
1323
0
                bail!("Error trying to find 'name': empty value (two consecutive '-'s?");
1324
            } else {
1325
0
                bail!("Errors trying to find 'name' in:\n{}", &yaml_to_string(dict, 1));
1326
            };
1327
864k
        };
1328
864k
        let pattern_name = pattern_name.unwrap().to_string();
1329
1330
        // FIX: add check to make sure tag_name is a valid MathML tag name
1331
864k
        if dict["match"].is_badvalue() {
1332
0
            bail!("Did not find 'match' in\n{}", yaml_to_string(dict, 1));
1333
864k
        }
1334
864k
        if dict["replace"].is_badvalue() {
1335
0
            bail!("Did not find 'replace' in\n{}", yaml_to_string(dict, 1));
1336
864k
        }
1337
    
1338
        // xpath's can't be cloned, so we need to do a 'build_xxx' for each tag name
1339
1.00M
        for tag_name in 
tag_names864k
{
1340
1.00M
            let tag_name = tag_name.to_string();
1341
1.00M
            let pattern_xpath = MyXPath::build(&dict["match"])
1342
1.00M
                    .with_context(|| 
{0
1343
0
                        format!("value for 'match' in rule ({}: {}):\n{}",
1344
0
                                tag_name, pattern_name, yaml_to_string(dict, 1))
1345
0
                    })?;
1346
1.00M
            let speech_pattern =
1347
1.00M
                Box::new( SpeechPattern{
1348
1.00M
                    pattern_name: pattern_name.clone(),
1349
1.00M
                    tag_name: tag_name.clone(),
1350
1.00M
                    file_name: file.to_str().unwrap().to_string(),
1351
1.00M
                    match_uses_var_defs: dict["variables"].is_array() && 
pattern_xpath.rc.string.contains('$')169k
, // FIX: should look at var_defs for actual name
1352
1.00M
                    pattern: pattern_xpath,
1353
1.00M
                    var_defs: VariableDefinitions::build(&dict["variables"])
1354
1.00M
                        .with_context(|| 
{0
1355
0
                            format!("value for 'variables' in rule ({}: {}):\n{}",
1356
0
                                    tag_name, pattern_name, yaml_to_string(dict, 1))
1357
0
                        })?,
1358
1.00M
                    replacements: ReplacementArray::build(&dict["replace"])
1359
1.00M
                        .with_context(|| 
{0
1360
0
                            format!("value for 'replace' in rule ({}: {}). Replacements:\n{}",
1361
0
                                    tag_name, pattern_name, yaml_to_string(&dict["replace"], 1))
1362
0
                    })?
1363
                } );
1364
            // get the array of rules for the tag name
1365
1.00M
            let rule_value = rules.rules.entry(tag_name).or_default();
1366
1367
            // if the name exists, replace it. Otherwise add the new rule
1368
2.67M
            match 
rule_value.iter().enumerate()1.00M
.
find1.00M
(|&pattern| pattern.1.pattern_name == speech_pattern.pattern_name) {
1369
1.00M
                None => rule_value.push(speech_pattern),
1370
9
                Some((i, _old_pattern)) => {
1371
9
                    let old_rule = &rule_value[i];
1372
9
                    info!("\n\n***WARNING***: replacing {}/'{}' in {} with rule from {}\n",
1373
                            old_rule.tag_name, old_rule.pattern_name, old_rule.file_name, speech_pattern.file_name);
1374
9
                    rule_value[i] = speech_pattern;
1375
                },
1376
            }
1377
        }
1378
1379
864k
        return Ok(None);
1380
894k
    }
1381
1382
870k
    fn is_match(&self, context: &sxd_xpath::Context, mathml: Element) -> Result<bool> {
1383
870k
        if self.tag_name != mathml.name().local_part() && 
self.tag_name != "*"224k
&&
self.tag_name != "!*"164k
{
1384
0
            return Ok( false );
1385
870k
        }
1386
1387
        // debug!("\nis_match: pattern='{}'", self.pattern_name);
1388
        // debug!("    pattern_expr {:?}", self.pattern);
1389
        // debug!("is_match: mathml is\n{}", mml_to_string(mathml));
1390
        return Ok(
1391
870k
            match self.pattern.evaluate(context, mathml)
?0
{
1392
652k
                Value::Boolean(b)       => b,
1393
217k
                Value::Nodeset(nodes) => nodes.size() > 0,
1394
0
                _                             => false,
1395
            }
1396
        );
1397
870k
    }
1398
}
1399
1400
1401
// 'Test' holds information used if the replacement is a "test:" clause.
1402
// The condition is an xpath expr and the "else:" part is optional.
1403
1404
#[derive(Debug, Clone)]
1405
struct TestArray {
1406
    tests: Vec<Test>
1407
}
1408
1409
impl fmt::Display for TestArray {
1410
0
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1411
0
        for test in &self.tests {
1412
0
            writeln!(f, "{test}")?;
1413
        }
1414
0
        return Ok( () );
1415
0
    }
1416
}
1417
1418
impl TestArray {
1419
3.29M
    fn build(test: &Yaml) -> Result<TestArray> {
1420
        // 'test:' for convenience takes either a dictionary with keys if/else_if/then/then_test/else/else_test or
1421
        //      or an array of those values (there should be at most one else/else_test)
1422
1423
        // if 'test' is a dictionary ('Hash'), we convert it to an array with one entry and proceed
1424
3.29M
        let tests = if test.as_hash().is_some() {
1425
3.01M
            vec![test]
1426
287k
        } else if let Some(vec) = test.as_vec() {
1427
287k
            vec.iter().collect()
1428
        } else {
1429
0
            bail!("Value for 'test:' is neither a dictionary or an array.")
1430
        };
1431
1432
        // each entry in 'tests' should be a dictionary with keys if/then/then_test/else/else_test
1433
        // a valid entry is one of:
1434
        //   if:/else_if:, then:/then_test: and optional else:/else_test:
1435
        //   else:/else_test: -- if this case, it should be the last entry in 'tests'
1436
        // 'if:' should only be the first entry in the array; 'else_if' should never be the first entry. Otherwise, they are the same
1437
3.29M
        let mut test_array = vec![];
1438
3.73M
        for test in 
tests3.29M
{
1439
3.73M
            if test.as_hash().is_none() {
1440
0
                bail!("Value for array entry in 'test:' must be a dictionary/contain keys");
1441
3.73M
            }
1442
3.73M
            let if_part = &test[if test_array.is_empty() {
"if"3.29M
} else {
"else_if"437k
}];
1443
3.73M
            if !if_part.is_badvalue() {
1444
                // first case: if:, then:, optional else:
1445
3.69M
                let condition = Some( MyXPath::build(if_part)
?0
);
1446
3.69M
                let then_part = TestOrReplacements::build(test, "then", "then_test", true)
?0
;
1447
3.69M
                let else_part = TestOrReplacements::build(test, "else", "else_test", false)
?0
;
1448
3.69M
                let n_keys = if else_part.is_none() {
22.45M
} else {
31.23M
};
1449
3.69M
                if test.as_hash().unwrap().len() > n_keys {
1450
0
                    bail!("A key other than 'if', 'else_if', 'then', 'then_test', 'else', or 'else_test' was found in the 'then' clause of 'test'");
1451
3.69M
                };
1452
3.69M
                test_array.push(
1453
3.69M
                    Test { condition, then_part, else_part }
1454
                );
1455
            } else {
1456
                // second case: should be else/else_test
1457
42.3k
                let else_part = TestOrReplacements::build(test, "else", "else_test", true)
?0
;
1458
42.3k
                if test.as_hash().unwrap().len() > 1 {
1459
0
                    bail!("A key other than 'if', 'else_if', 'then', 'then_test', 'else', or 'else_test' was found the 'else' clause of 'test'");
1460
42.3k
                };
1461
42.3k
                test_array.push(
1462
42.3k
                    Test { condition: None, then_part: None, else_part }
1463
                );
1464
                
1465
                // there shouldn't be any trailing tests
1466
42.3k
                if test_array.len() < test.as_hash().unwrap().len() {
1467
0
                    bail!("'else'/'else_test' key is not last key in 'test:'");
1468
42.3k
                }
1469
            }
1470
        };
1471
1472
3.29M
        if test_array.is_empty() {
1473
0
            bail!("No entries for 'test:'");
1474
3.29M
        }
1475
1476
3.29M
        return Ok( TestArray { tests: test_array } );
1477
3.29M
    }
1478
1479
121k
    fn replace<'c, 's:'c, 'm:'c, T:TreeOrString<'c, 'm, T>>(&self, rules_with_context: &mut SpeechRulesWithContext<'c, 's,'m>, mathml: Element<'c>) -> Result<T> {
1480
156k
        for test in 
&self.tests121k
{
1481
156k
            if test.is_true(&rules_with_context.context_stack.base, mathml)
?0
{
1482
85.2k
                assert!(test.then_part.is_some());
1483
85.2k
                return test.then_part.as_ref().unwrap().replace(rules_with_context, mathml);
1484
71.1k
            } else if let Some(
else_part12.9k
) = test.else_part.as_ref() {
1485
12.9k
                return else_part.replace(rules_with_context, mathml);
1486
58.1k
            }
1487
        }
1488
23.4k
        return T::from_string("".to_string(), rules_with_context.doc);
1489
121k
    }
1490
}
1491
1492
#[derive(Debug, Clone)]
1493
// Used to hold then/then_test and also else/else_test -- only one of these can be present at a time
1494
enum TestOrReplacements {
1495
    Replacements(ReplacementArray),     // replacements to use when a test is true
1496
    Test(TestArray),                    // the array of if/then/else tests
1497
}
1498
1499
impl fmt::Display for TestOrReplacements {
1500
0
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1501
0
        if let TestOrReplacements::Test(_) = self {
1502
0
            write!(f, "  _test")?;
1503
0
        }
1504
0
        write!(f, ":")?;
1505
0
        return match self {
1506
0
            TestOrReplacements::Test(t) => write!(f, "{t}"),
1507
0
            TestOrReplacements::Replacements(r) => write!(f, "{r}"),
1508
        };
1509
0
    }
1510
}
1511
1512
impl TestOrReplacements {
1513
7.43M
    fn build(test: &Yaml, replace_key: &str, test_key: &str, key_required: bool) -> Result<Option<TestOrReplacements>> {
1514
7.43M
        let part = &test[replace_key];
1515
7.43M
        let test_part = &test[test_key];
1516
7.43M
        if !part.is_badvalue() && 
!test_part.is_badvalue()4.26M
{
1517
0
            bail!(format!("Only one of '{}' or '{}' is allowed as part of 'test'.\n{}\n    \
1518
                  Suggestion: delete one or adjust indentation",
1519
0
                    replace_key, test_key, yaml_to_string(test, 2)));
1520
7.43M
        }
1521
7.43M
        if part.is_badvalue() && 
test_part3.16M
.
is_badvalue3.16M
() {
1522
2.45M
            if key_required {
1523
0
                bail!(format!("Missing one of '{}'/'{}:' as part of 'test:'\n{}\n   \
1524
                    Suggestion: add the missing key or indent so it is contained in 'test'",
1525
0
                    replace_key, test_key, yaml_to_string(test, 2)))
1526
            } else {
1527
2.45M
                return Ok( None );
1528
            }
1529
4.97M
        }
1530
        // at this point, we have only one of the two options
1531
4.97M
        if test_part.is_badvalue() {
1532
4.26M
            return Ok( Some( TestOrReplacements::Replacements( ReplacementArray::build(part)
?0
) ) );
1533
        } else {
1534
712k
            return Ok( Some( TestOrReplacements::Test( TestArray::build(test_part)
?0
) ) );
1535
        }
1536
7.43M
    }
1537
1538
98.2k
    fn replace<'c, 's:'c, 'm:'c, T:TreeOrString<'c, 'm, T>>(&self, rules_with_context: &mut SpeechRulesWithContext<'c, 's,'m>, mathml: Element<'c>) -> Result<T> {
1539
98.2k
        return match self {
1540
92.4k
            TestOrReplacements::Replacements(r) => r.replace(rules_with_context, mathml),
1541
5.74k
            TestOrReplacements::Test(t) => t.replace(rules_with_context, mathml),
1542
        }
1543
98.2k
    }
1544
}
1545
1546
#[derive(Debug, Clone)]
1547
struct Test {
1548
    condition: Option<MyXPath>,
1549
    then_part: Option<TestOrReplacements>,
1550
    else_part: Option<TestOrReplacements>,
1551
}
1552
impl fmt::Display for Test {
1553
0
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1554
0
        write!(f, "test: [ ")?;
1555
0
        if let Some(if_part) = &self.condition {
1556
0
            write!(f, " if: '{if_part}'")?;
1557
0
        }
1558
0
        if let Some(then_part) = &self.then_part {
1559
0
            write!(f, " then{then_part}")?;
1560
0
        }
1561
0
        if let Some(else_part) = &self.else_part {
1562
0
            write!(f, " else{else_part}")?;
1563
0
        }
1564
0
        return write!(f, "]");
1565
0
    }
1566
}
1567
1568
impl Test {
1569
156k
    fn is_true(&self, context: &sxd_xpath::Context, mathml: Element) -> Result<bool> {
1570
156k
        return match self.condition.as_ref() {
1571
136
            None => Ok( false ),     // trivially false -- want to do else part
1572
156k
            Some(condition) => condition.is_true(context, mathml)
1573
156k
                                .context("Failure in conditional test"),
1574
        }
1575
156k
    }
1576
}
1577
1578
// Used for speech rules with "variables: ..."
1579
#[derive(Debug, Clone)]
1580
struct VariableDefinition {
1581
    name: String,     // name of variable
1582
    value: MyXPath,   // xpath value, typically a constant like "true" or "0", but could be "*/*[1]" to store some nodes   
1583
}
1584
1585
impl fmt::Display for VariableDefinition {
1586
0
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1587
0
        return write!(f, "[name: {}={}]", self.name, self.value);
1588
0
    }   
1589
}
1590
1591
// Used for speech rules with "variables: ..."
1592
#[derive(Debug)]
1593
struct VariableValue<'v> {
1594
    name: String,       // name of variable
1595
    value: Option<Value<'v>>,   // xpath value, typically a constant like "true" or "0", but could be "*/*[1]" to store some nodes   
1596
}
1597
1598
impl fmt::Display for VariableValue<'_> {
1599
0
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1600
0
        let value = match &self.value {
1601
0
            None => "unset".to_string(),
1602
0
            Some(val) => format!("{val:?}")
1603
        };
1604
0
        return write!(f, "[name: {}, value: {}]", self.name, value);
1605
0
    }   
1606
}
1607
1608
impl VariableDefinition {
1609
472k
    fn build(name_value_def: &Yaml) -> Result<VariableDefinition> {
1610
472k
        match name_value_def.as_hash() {
1611
472k
            Some(map) => {
1612
472k
                if map.len() != 1 {
1613
0
                    bail!("definition is not a key/value pair. Found {}",
1614
0
                            yaml_to_string(name_value_def, 1) );
1615
472k
                }
1616
472k
                let (name, value) = map.iter().next().unwrap();
1617
472k
                let name = as_str_checked( name)
1618
472k
                    .with_context(|| 
format!0
( "definition name is not a string: {}",
1619
472k
                            
yaml_to_string0
(
name0
, 1) ))
?0
.to_string();
1620
472k
                match value {
1621
472k
                    Yaml::Boolean(_) | Yaml::String(_)  | Yaml::Integer(_) | Yaml::Real(_) => (),
1622
0
                    _ => bail!("definition value is not a string, boolean, or number. Found {}",
1623
0
                            yaml_to_string(value, 1) )
1624
                };
1625
                return Ok(
1626
                    VariableDefinition{
1627
472k
                        name,
1628
472k
                        value: MyXPath::build(value)
?0
1629
                    }
1630
                );
1631
            },
1632
0
            None => bail!("definition is not a key/value pair. Found {}",
1633
0
                            yaml_to_string(name_value_def, 1) )
1634
        }
1635
472k
    }
1636
}
1637
1638
1639
#[derive(Debug, Clone)]
1640
struct VariableDefinitions {
1641
    defs: Vec<VariableDefinition>
1642
}
1643
1644
impl fmt::Display for VariableDefinitions {
1645
0
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1646
0
        for def in &self.defs {
1647
0
            write!(f, "{def},")?;
1648
        }
1649
0
        return Ok( () );
1650
0
    }
1651
}
1652
1653
struct VariableValues<'v> {
1654
    defs: Vec<VariableValue<'v>>
1655
}
1656
1657
impl fmt::Display for VariableValues<'_> {
1658
0
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1659
0
        for value in &self.defs {
1660
0
            write!(f, "{value}")?;
1661
        }
1662
0
        return writeln!(f);
1663
0
    }
1664
}
1665
1666
impl VariableDefinitions {
1667
1.11M
    fn new(len: usize) -> VariableDefinitions {
1668
1.11M
        return VariableDefinitions{ defs: Vec::with_capacity(len) };
1669
1.11M
    }
1670
1671
1.11M
    fn build(defs: &Yaml) -> Result<VariableDefinitions> {
1672
1.11M
        if defs.is_badvalue() {
1673
834k
            return Ok( VariableDefinitions::new(0) );
1674
277k
        };
1675
277k
        if defs.is_array() {
1676
277k
            let defs = defs.as_vec().unwrap();
1677
277k
            let mut definitions = VariableDefinitions::new(defs.len());
1678
472k
            for def in 
defs277k
{
1679
472k
                let variable_def = VariableDefinition::build(def)
1680
472k
                        .context("definition of 'variables'")
?0
;
1681
472k
                definitions.push( variable_def);
1682
            };
1683
277k
            return Ok (definitions );
1684
0
        }
1685
0
        bail!( "'variables' is not an array of {{name: xpath-value}} definitions. Found {}'",
1686
0
                yaml_to_string(defs, 1) );
1687
1.11M
    }
1688
1689
472k
    fn push(&mut self, var_def: VariableDefinition) {
1690
472k
        self.defs.push(var_def);
1691
472k
    }
1692
1693
241k
    fn len(&self) -> usize {
1694
241k
        return self.defs.len();
1695
241k
    }
1696
}
1697
1698
struct ContextStack<'c> {
1699
    // Note: values are generated by calling value_of on an Evaluation -- that makes the two lifetimes the same
1700
    old_values: Vec<VariableValues<'c>>,   // store old values so they can be set on pop 
1701
    base: sxd_xpath::Context<'c>                      // initial context -- contains all the function defs and pref variables
1702
}
1703
1704
impl fmt::Display for ContextStack<'_> {
1705
0
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1706
0
        writeln!(f, " {} old_values", self.old_values.len())?;
1707
0
        for values in &self.old_values {
1708
0
            writeln!(f, "  {values}")?;
1709
        }
1710
0
        return writeln!(f);
1711
0
    }
1712
}
1713
1714
impl<'c, 'r> ContextStack<'c> {
1715
22.7k
    fn new<'a,>(pref_manager: &'a PreferenceManager) -> ContextStack<'c> {
1716
22.7k
        let prefs = pref_manager.merge_prefs();
1717
22.7k
        let mut context_stack = ContextStack {
1718
22.7k
            base: ContextStack::base_context(prefs),
1719
22.7k
            old_values: Vec::with_capacity(31)      // should avoid allocations
1720
22.7k
        };
1721
        // FIX: the list of variables to set should come from definitions.yaml
1722
        // These can't be set on the <math> tag because of the "translate" command which starts speech at an 'id'
1723
22.7k
        context_stack.base.set_variable("MatchingPause", Value::Boolean(false));
1724
22.7k
        context_stack.base.set_variable("IsColumnSilent", Value::Boolean(false));
1725
1726
1727
22.7k
        return context_stack;
1728
22.7k
    }
1729
1730
22.7k
    fn base_context(var_defs: PreferenceHashMap) -> sxd_xpath::Context<'c> {
1731
22.7k
        let mut context  = sxd_xpath::Context::new();
1732
22.7k
        context.set_namespace("m", "http://www.w3.org/1998/Math/MathML");
1733
22.7k
        crate::xpath_functions::add_builtin_functions(&mut context);
1734
1.88M
        for (key, value) in 
var_defs22.7k
{
1735
1.88M
            context.set_variable(key.as_str(), yaml_to_value(&value));
1736
1.88M
            // if let Some(str_value) = value.as_str() {
1737
1.88M
            //     if str_value != "Auto" {
1738
1.88M
            //         debug!("Set {}='{}'", key.as_str(), str_value);
1739
1.88M
            //     }
1740
1.88M
            // }
1741
1.88M
        };
1742
22.7k
        return context;
1743
22.7k
    }
1744
1745
3.78k
    fn set_globals(&'r mut self, new_vars: VariableDefinitions, mathml: Element<'c>) -> Result<()> {
1746
        // for each var/value pair, evaluate the value and add the var/value to the base context
1747
4.84k
        for def in 
&new_vars.defs3.78k
{
1748
            // set the new value
1749
4.84k
            let new_value = match def.value.evaluate(&self.base, mathml) {
1750
4.84k
                Ok(val) => val,
1751
0
                Err(_) => bail!(format!("Can't evaluate variable def for {}", def)),
1752
            };
1753
4.84k
            let qname = QName::new(def.name.as_str());
1754
4.84k
            self.base.set_variable(qname, new_value);
1755
        }
1756
3.78k
        return Ok( () );
1757
3.78k
    }
1758
1759
27.3k
    fn push(&'r mut self, new_vars: VariableDefinitions, mathml: Element<'c>) -> Result<()> {
1760
        // store the old value and set the new one 
1761
27.3k
        let mut old_values = VariableValues {defs: Vec::with_capacity(new_vars.defs.len()) };
1762
27.3k
        let evaluation = Evaluation::new(&self.base, Node::Element(mathml));
1763
66.9k
        for def in 
&new_vars.defs27.3k
{
1764
66.9k
            // get the old value (might not be defined)
1765
66.9k
            let qname = QName::new(def.name.as_str());
1766
66.9k
            let old_value = evaluation.value_of(qname).cloned();
1767
66.9k
            old_values.defs.push( VariableValue{ name: def.name.clone(), value: old_value} );
1768
66.9k
        }
1769
1770
        // use a second loop because of borrow problem with self.base and 'evaluation'
1771
66.9k
        for def in 
&new_vars.defs27.3k
{
1772
            // set the new value
1773
66.9k
            let new_value = match def.value.evaluate(&self.base, mathml) {
1774
66.9k
                Ok(val) => val,
1775
0
                Err(_) => Value::Nodeset(sxd_xpath::nodeset::Nodeset::new()),
1776
            };
1777
66.9k
            let qname = QName::new(def.name.as_str());
1778
66.9k
            self.base.set_variable(qname, new_value);
1779
        }
1780
27.3k
        self.old_values.push(old_values);
1781
27.3k
        return Ok( () );
1782
27.3k
    }
1783
1784
27.3k
    fn pop(&mut self) {
1785
        const MISSING_VALUE: &str = "-- unset value --";     // can't remove a variable from context, so use this value
1786
27.3k
        let old_values = self.old_values.pop().unwrap();
1787
66.9k
        for variable in 
old_values.defs27.3k
{
1788
66.9k
            let qname = QName::new(&variable.name);
1789
66.9k
            let old_value = match variable.value {
1790
22.8k
                None => Value::String(MISSING_VALUE.to_string()),
1791
44.1k
                Some(val) => val,
1792
            };
1793
66.9k
            self.base.set_variable(qname, old_value);
1794
        }
1795
27.3k
    }
1796
}
1797
1798
1799
1.88M
fn yaml_to_value<'b>(yaml: &Yaml) -> Value<'b> {
1800
1.88M
    return match yaml {
1801
1.47M
        Yaml::String(s) => Value::String(s.clone()),
1802
295k
        Yaml::Boolean(b)  => Value::Boolean(*b),
1803
31.7k
        Yaml::Integer(i)   => Value::Number(*i as f64),
1804
91.0k
        Yaml::Real(s)   => Value::Number(s.parse::<f64>().unwrap()),
1805
        _  => {
1806
0
            error!("yaml_to_value: illegal type found in Yaml value: {}", yaml_to_string(yaml, 1));
1807
0
            Value::String("".to_string())
1808
        },
1809
    }
1810
1.88M
}
1811
1812
1813
// Information for matching a Unicode char (defined in unicode.yaml) and building its replacement
1814
struct UnicodeDef {
1815
    ch: u32,
1816
    speech: ReplacementArray
1817
}
1818
1819
impl  fmt::Display for UnicodeDef {
1820
0
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1821
0
        return write!(f, "UnicodeDef{{ch: {}, speech: {:?}}}", self.ch, self.speech);
1822
0
    }
1823
}
1824
1825
impl UnicodeDef {
1826
2.24M
    fn build(unicode_def: &Yaml, file_name: &Path, speech_rules: &SpeechRules, use_short: bool) -> Result<Option<Vec<PathBuf>>> {
1827
2.24M
        if let Some(
include_file_name3
) = find_str(unicode_def, "include") {
1828
3
            let do_include_fn = |new_file: &Path| {
1829
3
                speech_rules.read_unicode(Some(new_file.to_path_buf()), use_short)
1830
3
            };
1831
3
            return Ok( Some(process_include(file_name, include_file_name, do_include_fn)
?0
) );
1832
2.24M
        }
1833
        // key: char, value is replacement or array of replacements
1834
2.24M
        let dictionary = unicode_def.as_hash();
1835
2.24M
        if dictionary.is_none() {
1836
0
            bail!("Expected a unicode definition (e.g, '+':[t: \"plus\"]'), found {}", yaml_to_string(unicode_def, 0));
1837
2.24M
        }
1838
1839
2.24M
        let dictionary = dictionary.unwrap();
1840
2.24M
        if dictionary.len() != 1 {
1841
0
            bail!("Expected a unicode definition (e.g, '+':[t: \"plus\"]'), found {}", yaml_to_string(unicode_def, 0));
1842
2.24M
        }
1843
1844
2.24M
        let (ch, replacements) = dictionary.iter().next().ok_or_else(|| 
anyhow!0
("Expected a unicode definition (e.g, '+':[t: \"plus\"]'), found {}",
yaml_to_string0
(
unicode_def0
, 0)))
?0
;
1845
2.24M
        let mut unicode_table = if use_short {
1846
1.06M
            speech_rules.unicode_short.borrow_mut()
1847
        } else {
1848
1.17M
            speech_rules.unicode_full.borrow_mut()
1849
        };
1850
2.24M
        if let Some(str) = ch.as_str() {
1851
2.24M
            if str.is_empty() {
1852
0
                bail!("Empty character definition. Replacement is {}", replacements.as_str().unwrap());
1853
2.24M
            }
1854
2.24M
            let mut chars = str.chars();
1855
2.24M
            let first_ch = chars.next().unwrap();       // non-empty string, so a char exists
1856
2.24M
            if chars.next().is_some() {                       // more than one char
1857
54.7k
                if str.contains('-')  {
1858
38.4k
                    return process_range(str, replacements, unicode_table);
1859
16.2k
                } else if first_ch != '0' {     // exclude 0xDDDD
1860
74.5k
                    for ch in 
str16.2k
.
chars16.2k
() { // restart the iterator
1861
74.5k
                        let ch_as_str = ch.to_string();
1862
74.5k
                        if unicode_table.insert(ch as u32, ReplacementArray::build(&substitute_ch(replacements, &ch_as_str))
1863
74.5k
                                            .with_context(|| 
format!0
("In definition of char: '{str}'"))
?0
.replacements).is_some() {
1864
0
                            error!("*** Character '{}' (0x{:X}) is repeated", ch, ch as u32);
1865
74.5k
                        }
1866
                    }
1867
16.2k
                    return Ok(None);
1868
0
                }
1869
2.18M
            }
1870
0
        }
1871
1872
2.18M
        let ch = UnicodeDef::get_unicode_char(ch)
?0
;
1873
2.18M
        if unicode_table.insert(ch, ReplacementArray::build(replacements)
1874
2.18M
                                        .with_context(|| 
format!0
("In definition of char: '{}' (0x{})",
1875
2.18M
                                                                        
char::from_u320
(
ch0
).
unwrap0
(), ch))
?0
.replacements).is_some() {
1876
147
            error!("*** Character '{}' (0x{:X}) is repeated", 
char::from_u320
(
ch0
).
unwrap0
(), ch);
1877
2.18M
        }
1878
2.18M
        return Ok(None);
1879
1880
38.4k
        fn process_range(def_range: &str, replacements: &Yaml, mut unicode_table: RefMut<HashMap<u32,Vec<Replacement>>>) -> Result<Option<Vec<PathBuf>>> {
1881
            // should be a character range (e.g., "A-Z")
1882
            // iterate over that range and also substitute the char for '.' in the 
1883
38.4k
            let mut range = def_range.split('-');
1884
38.4k
            let first = range.next().unwrap().chars().next().unwrap() as u32;
1885
38.4k
            let last = range.next().unwrap().chars().next().unwrap() as u32;
1886
38.4k
            if range.next().is_some() {
1887
0
                bail!("Character range definition has more than one '-': '{}'", def_range);
1888
38.4k
            }
1889
1890
889k
            for ch in 
first..last+138.4k
{
1891
889k
                let ch_as_str = char::from_u32(ch).unwrap().to_string();
1892
889k
                unicode_table.insert(ch, ReplacementArray::build(&substitute_ch(replacements, &ch_as_str))
1893
889k
                                        .with_context(|| 
format!0
("In definition of char: '{def_range}'"))
?0
.replacements);
1894
            };
1895
1896
38.4k
            return Ok(None)
1897
38.4k
        }
1898
1899
10.3M
        fn substitute_ch(yaml: &Yaml, ch: &str) -> Yaml {
1900
10.3M
            return match yaml {
1901
2.34M
                Yaml::Array(v) => {
1902
                    Yaml::Array(
1903
2.34M
                        v.iter()
1904
3.03M
                         .
map2.34M
(|e| substitute_ch(e, ch))
1905
2.34M
                         .collect::<Vec<Yaml>>()
1906
                    )
1907
                },
1908
4.52M
                Yaml::Hash(h) => {
1909
                    Yaml::Hash(
1910
4.52M
                        h.iter()
1911
6.34M
                         .
map4.52M
(|(key,val)| (key.clone(), substitute_ch(val, ch)) )
1912
4.52M
                         .collect::<Hash>()
1913
                    )
1914
                },
1915
3.47M
                Yaml::String(s) => Yaml::String( s.replace('.', ch) ),
1916
0
                _ => yaml.clone(),
1917
            }
1918
10.3M
        }
1919
2.24M
    }
1920
    
1921
2.18M
    fn get_unicode_char(ch: &Yaml) -> Result<u32> {
1922
        // either "a" or 0x1234 (number)
1923
2.18M
        if let Some(ch) = ch.as_str() {
1924
2.18M
            let mut ch_iter = ch.chars();
1925
2.18M
            let unicode_ch = ch_iter.next();
1926
2.18M
            if unicode_ch.is_none() || ch_iter.next().is_some() {
1927
0
                bail!("Wanted unicode char, found string '{}')", ch);
1928
2.18M
            };
1929
2.18M
            return Ok( unicode_ch.unwrap() as u32 );
1930
0
        }
1931
    
1932
0
        if let Some(num) = ch.as_i64() {
1933
0
            return Ok( num as u32 );
1934
0
        }
1935
0
        bail!("Unicode character '{}' can't be converted to an code point", yaml_to_string(ch, 0));
1936
2.18M
    }    
1937
}
1938
1939
// Fix: there should be a cache so subsequent library calls don't have to read in the same speech rules
1940
//   likely a cache of size 1 is fine
1941
// Fix: all statics should be gathered together into one structure that is a Mutex
1942
//   for each library call, we should grab a lock on the Mutex in case others try to call
1943
//   at the same time.
1944
//   If this turns out to be something that others actually do, then a cache > 1 would be good
1945
1946
 type RuleTable = HashMap<String, Vec<Box<SpeechPattern>>>;
1947
 type UnicodeTable = Rc<RefCell<HashMap<u32,Vec<Replacement>>>>;
1948
 type FilesAndTimesShared = Rc<RefCell<FilesAndTimes>>;
1949
1950
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
1951
 pub enum RulesFor {
1952
     Intent,
1953
     Speech,
1954
     OverView,
1955
     Navigation,
1956
     Braille,
1957
 }
1958
1959
 impl fmt::Display for RulesFor {
1960
0
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1961
0
        let name = match self {
1962
0
            RulesFor::Intent => "Intent",
1963
0
            RulesFor::Speech => "Speech",
1964
0
            RulesFor::OverView => "OverView",
1965
0
            RulesFor::Navigation => "Navigation",
1966
0
            RulesFor::Braille => "Braille",
1967
        };
1968
0
       return write!(f, "{name}");
1969
0
    }
1970
 }
1971
1972
 
1973
#[derive(Debug, Clone)]
1974
pub struct FileAndTime {
1975
    file: PathBuf,
1976
    time: SystemTime,
1977
}
1978
1979
impl FileAndTime {
1980
0
    fn new(file: PathBuf) -> FileAndTime {
1981
0
        return FileAndTime {
1982
0
            file,
1983
0
            time: SystemTime::UNIX_EPOCH,
1984
0
        }
1985
0
    }
1986
1987
    // used for debugging preference settings
1988
0
    pub fn debug_get_file(&self) -> Option<&str> {
1989
0
        return self.file.to_str();
1990
0
    }
1991
1992
8.29k
    pub fn new_with_time(file: PathBuf) -> FileAndTime {
1993
8.29k
        return FileAndTime {
1994
8.29k
            time: FileAndTime::get_metadata(&file),
1995
8.29k
            file,
1996
8.29k
        }
1997
8.29k
    }
1998
1999
33.7k
    pub fn is_up_to_date(&self) -> bool {
2000
33.7k
        let file_mod_time = FileAndTime::get_metadata(&self.file);
2001
33.7k
        return self.time >= file_mod_time;
2002
33.7k
    }
2003
2004
140k
    fn get_metadata(path: &Path) -> SystemTime {
2005
        use std::fs;
2006
140k
        if !cfg!(target_family = "wasm") {
2007
140k
            let metadata = fs::metadata(path);
2008
140k
            if let Ok(
metadata120k
) = metadata &&
2009
120k
               let Ok(mod_time) = metadata.modified() {
2010
120k
                    return mod_time;
2011
20.3k
                }
2012
0
        }
2013
20.3k
        return SystemTime::UNIX_EPOCH
2014
140k
    }
2015
2016
}
2017
#[derive(Debug, Default)]
2018
pub struct FilesAndTimes {
2019
    // ft[0] is the main file -- other files are included by it (or recursively)
2020
    // We could be a little smarter about invalidation by tracking what file is the parent (including file),
2021
    // but it seems more complicated than it is worth
2022
    ft: Vec<FileAndTime>
2023
}
2024
2025
impl FilesAndTimes {
2026
0
    pub fn new(start_path: PathBuf) -> FilesAndTimes {
2027
0
        let mut ft = Vec::with_capacity(8);
2028
0
        ft.push( FileAndTime::new(start_path) );
2029
0
        return FilesAndTimes{ ft };
2030
0
    }
2031
2032
    /// Returns true if the main file matches the corresponding preference location and files' times are all current
2033
33.4k
    pub fn is_file_up_to_date(&self, pref_path: &Path, should_ignore_file_time: bool) -> bool {
2034
2035
        // if the time isn't set or the path is different from the preference (which might have changed), return false
2036
33.4k
        if self.ft.is_empty() || 
self.as_path() != pref_path28.0k
{
2037
5.74k
            return false;
2038
27.7k
        }
2039
27.7k
        if should_ignore_file_time || 
cfg!1.18k
(target_family = "wasm") {
2040
26.5k
            return true;
2041
1.18k
        }
2042
1.18k
        if  self.ft[0].time == SystemTime::UNIX_EPOCH {
2043
0
            return false;
2044
1.18k
        }
2045
2046
2047
        // check the time stamp on the included files -- if the head file hasn't changed, the paths for the included files will be the same
2048
1.19k
        for file in 
&self.ft1.18k
{
2049
1.19k
            if !file.is_up_to_date() {
2050
1
                return false;
2051
1.19k
            }
2052
        }
2053
1.18k
        return true;
2054
33.4k
    }
2055
2056
19.8k
    fn set_files_and_times(&mut self, new_files: Vec<PathBuf>)  {
2057
19.8k
        self.ft.clear();
2058
98.4k
        for path in 
new_files19.8k
{
2059
98.4k
            let time = FileAndTime::get_metadata(&path);      // do before move below
2060
98.4k
            self.ft.push( FileAndTime{ file: path, time })
2061
        }
2062
19.8k
    }
2063
2064
28.0k
    pub fn as_path(&self) -> &Path {
2065
28.0k
        assert!(!self.ft.is_empty());
2066
28.0k
        return &self.ft[0].file;
2067
28.0k
    }
2068
2069
0
    pub fn paths(&self) -> Vec<PathBuf> {
2070
0
        return self.ft.iter().map(|ft| ft.file.clone()).collect::<Vec<PathBuf>>();
2071
0
    }
2072
2073
}
2074
2075
2076
/// `SpeechRulesWithContext` encapsulates a named group of speech rules (e.g, "ClearSpeak")
2077
/// along with the preferences to be used for speech.
2078
// Note: if we can't read the files, an error message is stored in the structure and needs to be checked.
2079
// I tried using Result<SpeechRules>, but it was a mess with all the unwrapping.
2080
// Important: the code needs to be careful to check this at the top level calls
2081
pub struct SpeechRules {
2082
    error: String,
2083
    name: RulesFor,
2084
    pub pref_manager: Rc<RefCell<PreferenceManager>>,
2085
    rules: RuleTable,                              // the speech rules used (partitioned into MathML tags in hashmap, then linearly searched)
2086
    rule_files: FilesAndTimes,                     // files that were read
2087
    translate_single_chars_only: bool,             // strings like "half" don't want 'a's translated, but braille does
2088
    unicode_short: UnicodeTable,                   // the short list of rules used for Unicode characters
2089
    unicode_short_files: FilesAndTimesShared,     // files that were read
2090
    unicode_full:  UnicodeTable,                   // the long remaining rules used for Unicode characters
2091
    unicode_full_files: FilesAndTimesShared,      // files that were read
2092
    definitions_files: FilesAndTimesShared,       // files that were read
2093
}
2094
2095
impl fmt::Display for SpeechRules {
2096
0
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
2097
0
        writeln!(f, "SpeechRules '{}'\n{})", self.name, self.pref_manager.borrow())?;
2098
0
        let mut rules_vec: Vec<(&String, &Vec<Box<SpeechPattern>>)> = self.rules.iter().collect();
2099
0
        rules_vec.sort_by_key(|(tag_name, _)| tag_name.as_str());
2100
0
        for (tag_name, rules) in rules_vec {
2101
0
            writeln!(f, "   {}: #patterns {}", tag_name, rules.len())?;
2102
        };
2103
0
        return writeln!(f, "   {}+{} unicode entries", &self.unicode_short.borrow().len(), &self.unicode_full.borrow().len());
2104
0
    }
2105
}
2106
2107
2108
/// `SpeechRulesWithContext` encapsulates a named group of speech rules (e.g, "ClearSpeak")
2109
/// along with the preferences to be used for speech.
2110
/// Because speech rules can define variables, there is also a context that is carried with them
2111
pub struct SpeechRulesWithContext<'c, 's:'c, 'm:'c> {
2112
    speech_rules: &'s SpeechRules,
2113
    context_stack: ContextStack<'c>,   // current value of (context) variables
2114
    doc: Document<'m>,
2115
    nav_node_id: &'m str,
2116
    nav_node_offset: usize,
2117
    pub inside_spell: bool,     // hack to allow 'spell' to avoid infinite loop (see 'spell' implementation in tts.rs)
2118
    pub translate_count: usize, // hack to avoid 'translate' infinite loop (see 'spell' implementation in tts.rs)
2119
}
2120
2121
impl<'c, 's:'c, 'm:'c> fmt::Display for SpeechRulesWithContext<'c, 's,'m> {
2122
0
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
2123
0
        writeln!(f, "SpeechRulesWithContext \n{})", self.speech_rules)?;
2124
0
        return writeln!(f, "   {} context entries, nav node id '({}, {})'", &self.context_stack, self.nav_node_id, self.nav_node_offset);
2125
0
    }
2126
}
2127
2128
thread_local!{
2129
    /// SPEECH_UNICODE_SHORT is shared among several rules, so "RC" is used
2130
    static SPEECH_UNICODE_SHORT: UnicodeTable =
2131
        Rc::new( RefCell::new( HashMap::with_capacity(500) ) );
2132
        
2133
    /// SPEECH_UNICODE_FULL is shared among several rules, so "RC" is used
2134
    static SPEECH_UNICODE_FULL: UnicodeTable =
2135
        Rc::new( RefCell::new( HashMap::with_capacity(6500) ) );
2136
        
2137
    /// BRAILLE_UNICODE_SHORT is shared among several rules, so "RC" is used
2138
    static BRAILLE_UNICODE_SHORT: UnicodeTable =
2139
        Rc::new( RefCell::new( HashMap::with_capacity(500) ) );
2140
        
2141
    /// BRAILLE_UNICODE_FULL is shared among several rules, so "RC" is used
2142
    static BRAILLE_UNICODE_FULL: UnicodeTable =
2143
        Rc::new( RefCell::new( HashMap::with_capacity(5000) ) );
2144
2145
    /// SPEECH_DEFINITION_FILES_AND_TIMES is shared among several rules, so "RC" is used
2146
    static SPEECH_DEFINITION_FILES_AND_TIMES: FilesAndTimesShared =
2147
        Rc::new( RefCell::new(FilesAndTimes::default()) );
2148
        
2149
    /// BRAILLE_DEFINITION_FILES_AND_TIMES is shared among several rules, so "RC" is used
2150
    static BRAILLE_DEFINITION_FILES_AND_TIMES: FilesAndTimesShared =
2151
        Rc::new( RefCell::new(FilesAndTimes::default()) );
2152
        
2153
    /// SPEECH_UNICODE_SHORT_FILES_AND_TIMES is shared among several rules, so "RC" is used
2154
    static SPEECH_UNICODE_SHORT_FILES_AND_TIMES: FilesAndTimesShared =
2155
        Rc::new( RefCell::new(FilesAndTimes::default()) );
2156
        
2157
    /// SPEECH_UNICODE_FULL_FILES_AND_TIMES is shared among several rules, so "RC" is used
2158
    static SPEECH_UNICODE_FULL_FILES_AND_TIMES: FilesAndTimesShared =
2159
        Rc::new( RefCell::new(FilesAndTimes::default()) );
2160
        
2161
    /// BRAILLE_UNICODE_SHORT_FILES_AND_TIMES is shared among several rules, so "RC" is used
2162
    static BRAILLE_UNICODE_SHORT_FILES_AND_TIMES: FilesAndTimesShared =
2163
        Rc::new( RefCell::new(FilesAndTimes::default()) );
2164
        
2165
    /// BRAILLE_UNICODE_FULL_FILES_AND_TIMES is shared among several rules, so "RC" is used
2166
    static BRAILLE_UNICODE_FULL_FILES_AND_TIMES: FilesAndTimesShared =
2167
        Rc::new( RefCell::new(FilesAndTimes::default()) );
2168
        
2169
    /// The current set of speech rules
2170
    // maybe this should be a small cache of rules in case people switch rules/prefs?
2171
    pub static INTENT_RULES: RefCell<SpeechRules> =
2172
            RefCell::new( SpeechRules::new(RulesFor::Intent, true) );
2173
2174
    pub static SPEECH_RULES: RefCell<SpeechRules> =
2175
            RefCell::new( SpeechRules::new(RulesFor::Speech, true) );
2176
2177
    pub static OVERVIEW_RULES: RefCell<SpeechRules> =
2178
            RefCell::new( SpeechRules::new(RulesFor::OverView, true) );
2179
2180
    pub static NAVIGATION_RULES: RefCell<SpeechRules> =
2181
            RefCell::new( SpeechRules::new(RulesFor::Navigation, true) );
2182
2183
    pub static BRAILLE_RULES: RefCell<SpeechRules> =
2184
            RefCell::new( SpeechRules::new(RulesFor::Braille, false) );
2185
}
2186
2187
impl SpeechRules {
2188
8.16k
    pub fn new(name: RulesFor, translate_single_chars_only: bool) -> SpeechRules {
2189
8.16k
        let globals = if name == RulesFor::Braille {
2190
1.35k
            (
2191
1.35k
                (BRAILLE_UNICODE_SHORT.with(Rc::clone), BRAILLE_UNICODE_SHORT_FILES_AND_TIMES.with(Rc::clone)),
2192
1.35k
                (BRAILLE_UNICODE_FULL. with(Rc::clone), BRAILLE_UNICODE_FULL_FILES_AND_TIMES.with(Rc::clone)),
2193
1.35k
                BRAILLE_DEFINITION_FILES_AND_TIMES.with(Rc::clone),
2194
1.35k
            )
2195
        } else {
2196
6.80k
            (
2197
6.80k
                (SPEECH_UNICODE_SHORT.with(Rc::clone), SPEECH_UNICODE_SHORT_FILES_AND_TIMES.with(Rc::clone)),
2198
6.80k
                (SPEECH_UNICODE_FULL. with(Rc::clone), SPEECH_UNICODE_FULL_FILES_AND_TIMES.with(Rc::clone)),
2199
6.80k
                SPEECH_DEFINITION_FILES_AND_TIMES.with(Rc::clone),
2200
6.80k
            )
2201
        };
2202
2203
        return SpeechRules {
2204
8.16k
            error: Default::default(),
2205
8.16k
            name,
2206
8.16k
            rules: HashMap::with_capacity(if name == RulesFor::Intent || 
name == RulesFor::Speech5.53k
{
5006.76k
} else {
501.39k
}), // lazy load them
2207
8.16k
            rule_files: FilesAndTimes::default(),
2208
8.16k
            unicode_short: globals.0.0,       // lazy load them
2209
8.16k
            unicode_short_files: globals.0.1,
2210
8.16k
            unicode_full: globals.1.0,        // lazy load them
2211
8.16k
            unicode_full_files: globals.1.1,
2212
8.16k
            definitions_files: globals.2,
2213
8.16k
            translate_single_chars_only,
2214
8.16k
            pref_manager: PreferenceManager::get(),
2215
        };
2216
8.16k
}
2217
2218
17.7k
    pub fn get_error(&self) -> Option<&str> {
2219
17.7k
        return if self.error.is_empty() {
2220
17.7k
             None
2221
        } else {
2222
0
            Some(&self.error)
2223
        }
2224
17.7k
    }
2225
2226
15.3k
    pub fn read_files(&mut self) -> Result<()> {
2227
15.3k
        let check_rule_files = self.pref_manager.borrow().pref_to_string("CheckRuleFiles");
2228
15.3k
        if check_rule_files != "None" {  // "Prefs" or "All" are other values
2229
15.3k
            self.pref_manager.borrow_mut().set_preference_files()
?0
;
2230
2
        }
2231
15.3k
        let should_ignore_file_time = self.pref_manager.borrow().pref_to_string("CheckRuleFiles") != "All";     // ignore for "None", "Prefs"
2232
15.3k
        let rule_file = self.pref_manager.borrow().get_rule_file(&self.name).to_path_buf();     // need to create PathBuf to avoid a move/use problem
2233
15.3k
        if self.rules.is_empty() || 
!7.17k
self.rule_files7.17k
.
is_file_up_to_date7.17k
(&rule_file, should_ignore_file_time) {
2234
8.35k
            self.rules.clear();
2235
8.35k
            let files_read = self.read_patterns(&rule_file)
?0
;
2236
8.35k
            self.rule_files.set_files_and_times(files_read);
2237
6.94k
        }
2238
2239
15.3k
        let pref_manager = self.pref_manager.borrow();
2240
15.3k
        let unicode_pref_files = if self.name == RulesFor::Braille {
pref_manager.get_braille_unicode_file()1.82k
} else {
pref_manager.get_speech_unicode_file()13.4k
};
2241
2242
15.3k
        if !self.unicode_short_files.borrow().is_file_up_to_date(unicode_pref_files.0, should_ignore_file_time) {
2243
5.50k
            self.unicode_short.borrow_mut().clear();
2244
5.50k
            self.unicode_short_files.borrow_mut().set_files_and_times(self.read_unicode(None, true)
?0
);
2245
9.80k
        }
2246
2247
15.3k
        if self.definitions_files.borrow().ft.is_empty() || 
!9.82k
self.definitions_files.borrow()9.82k
.
is_file_up_to_date9.82k
(
2248
9.82k
                            pref_manager.get_definitions_file(self.name != RulesFor::Braille),
2249
9.82k
                            should_ignore_file_time
2250
9.82k
        ) {
2251
5.49k
            self.definitions_files.borrow_mut().set_files_and_times(read_definitions_file(self.name != RulesFor::Braille)
?0
);
2252
9.80k
        }
2253
15.3k
        return Ok( () );
2254
15.3k
    }
2255
2256
38.6k
    fn read_patterns(&mut self, path: &Path) -> Result<Vec<PathBuf>> {
2257
        // info!("Reading rule file: {}", p.to_str().unwrap());
2258
38.6k
        let rule_file_contents = read_to_string_shim(path).with_context(|| 
format!0
("cannot read file '{}'",
path0
.
to_str0
().
unwrap0
()))
?0
;
2259
38.6k
        let rules_build_fn = |pattern: &Yaml| {
2260
38.6k
            self.build_speech_patterns(pattern, path)
2261
38.6k
                .with_context(||
format!0
("in file {:?}",
path0
.
to_str0
().
unwrap0
()))
2262
38.6k
        };
2263
38.6k
        return compile_rule(&rule_file_contents, rules_build_fn)
2264
38.6k
                .with_context(||
format!0
("in file {:?}",
path0
.
to_str0
().
unwrap0
()));
2265
38.6k
    }
2266
2267
38.6k
    fn build_speech_patterns(&mut self, patterns: &Yaml, file_name: &Path) -> Result<Vec<PathBuf>> {
2268
        // Rule::SpeechPatternList
2269
38.6k
        let patterns_vec = patterns.as_vec();
2270
38.6k
        if patterns_vec.is_none() {
2271
0
            bail!(yaml_type_err(patterns, "array"));
2272
38.6k
        }
2273
38.6k
        let patterns_vec = patterns.as_vec().unwrap();
2274
38.6k
        let mut files_read = vec![file_name.to_path_buf()];
2275
894k
        for entry in 
patterns_vec.iter()38.6k
{
2276
894k
            if let Some(
mut added_files30.2k
) = SpeechPattern::build(entry, file_name, self)
?0
{
2277
30.2k
                files_read.append(&mut added_files);
2278
864k
            }
2279
        }
2280
38.6k
        return Ok(files_read)
2281
38.6k
    }
2282
    
2283
5.97k
    fn read_unicode(&self, path: Option<PathBuf>, use_short: bool) -> Result<Vec<PathBuf>> {
2284
5.97k
        let path = match path {
2285
3
            Some(p) => p,
2286
            None => {
2287
                // get the path to either the short or long unicode file
2288
5.97k
                let pref_manager = self.pref_manager.borrow();
2289
5.97k
                let unicode_files = if self.name == RulesFor::Braille {
2290
1.57k
                    pref_manager.get_braille_unicode_file()
2291
                } else {
2292
4.40k
                    pref_manager.get_speech_unicode_file()
2293
                };
2294
5.97k
                let unicode_files = if use_short {
unicode_files.05.50k
} else {
unicode_files.1468
};
2295
5.97k
                unicode_files.to_path_buf()
2296
            }
2297
        };
2298
2299
        // FIX: should read first (lang), then supplement with second (region)
2300
        // info!("Reading unicode file {}", path.to_str().unwrap());
2301
5.97k
        let unicode_file_contents = read_to_string_shim(&path)
?0
;
2302
5.97k
        let unicode_build_fn = |unicode_def_list: &Yaml| {
2303
5.97k
            let unicode_defs = unicode_def_list.as_vec();
2304
5.97k
            if unicode_defs.is_none() {
2305
0
                bail!("File '{}' does not begin with an array", yaml_to_type(unicode_def_list));
2306
5.97k
            };
2307
5.97k
            let mut files_read = vec![path.to_path_buf()];
2308
2.24M
            for unicode_def in 
unicode_defs5.97k
.
unwrap5.97k
() {
2309
2.24M
                if let Some(
mut added_files3
) = UnicodeDef::build(unicode_def, &path, self, use_short)
2310
2.24M
                                                                .with_context(|| 
{format!0
("In file {:?}",
path.to_str()0
)
}0
)
?0
{
2311
3
                    files_read.append(&mut added_files);
2312
2.24M
                }
2313
            };
2314
5.97k
            return Ok(files_read)
2315
5.97k
        };
2316
2317
5.97k
        return compile_rule(&unicode_file_contents, unicode_build_fn)
2318
5.97k
                    .with_context(||
format!0
("in file {:?}",
path.to_str()0
.
unwrap0
()));
2319
5.97k
    }
2320
2321
0
    pub fn print_sizes() -> String {
2322
        // let _ = &SPEECH_RULES.with_borrow(|rules| {
2323
        //     debug!("SPEECH RULES entries\n");
2324
        //     let rules = &rules.rules;
2325
        //     for (key, _) in rules.iter() {
2326
        //         debug!("key: {}", key);
2327
        //     }
2328
        // });
2329
0
        let mut answer = rule_size(&SPEECH_RULES, "SPEECH_RULES");
2330
0
        answer += &rule_size(&INTENT_RULES, "INTENT_RULES");
2331
0
        answer += &rule_size(&BRAILLE_RULES, "BRAILLE_RULES");
2332
0
        answer += &rule_size(&NAVIGATION_RULES, "NAVIGATION_RULES");
2333
0
        answer += &rule_size(&OVERVIEW_RULES, "OVERVIEW_RULES");
2334
0
        SPEECH_RULES.with_borrow(|rule| {
2335
0
            answer += &format!("Speech Unicode tables: short={}/{}, long={}/{}\n",
2336
0
                                rule.unicode_short.borrow().len(), rule.unicode_short.borrow().capacity(),
2337
0
                                rule.unicode_full.borrow().len(), rule.unicode_full.borrow().capacity());
2338
0
        });
2339
0
        BRAILLE_RULES.with_borrow(|rule| {
2340
0
            answer += &format!("Braille Unicode tables: short={}/{}, long={}/{}\n",
2341
0
                                rule.unicode_short.borrow().len(), rule.unicode_short.borrow().capacity(),
2342
0
                                rule.unicode_full.borrow().len(), rule.unicode_full.borrow().capacity());
2343
0
        });
2344
0
        return answer;
2345
2346
0
        fn rule_size(rules: &'static std::thread::LocalKey<RefCell<SpeechRules>>, name: &str) -> String {
2347
0
            rules.with_borrow(|rule| {
2348
0
                let hash_map = &rule.rules;
2349
0
                return format!("{}: {}/{}\n", name, hash_map.len(), hash_map.capacity());
2350
0
            })
2351
0
        }
2352
0
    }
2353
}
2354
2355
2356
/// We track three different lifetimes:
2357
///   'c -- the lifetime of the context and mathml
2358
///   's -- the lifetime of the speech rules (which is static)
2359
///   'r -- the lifetime of the reference (this seems to be key to keep the rust memory checker happy)
2360
impl<'c, 's:'c, 'r, 'm:'c> SpeechRulesWithContext<'c, 's,'m> {
2361
22.7k
    pub fn new(speech_rules: &'s SpeechRules, doc: Document<'m>, nav_node_id: &'m str, nav_node_offset: usize) -> SpeechRulesWithContext<'c, 's, 'm> {
2362
22.7k
        return SpeechRulesWithContext {
2363
22.7k
            speech_rules,
2364
22.7k
            context_stack: ContextStack::new(&speech_rules.pref_manager.borrow()),
2365
22.7k
            doc,
2366
22.7k
            nav_node_id,
2367
22.7k
            nav_node_offset,
2368
22.7k
            inside_spell: false,
2369
22.7k
            translate_count: 0,
2370
22.7k
        }
2371
22.7k
    }
2372
2373
1.84k
    pub fn get_rules(&mut self) -> &SpeechRules {
2374
1.84k
        return self.speech_rules;
2375
1.84k
    }
2376
2377
45.5k
    pub fn get_context(&mut self) -> &mut sxd_xpath::Context<'c> {
2378
45.5k
        return &mut self.context_stack.base;
2379
45.5k
    }
2380
2381
3.23k
    pub fn get_document(&mut self) -> Document<'m> {
2382
3.23k
        return self.doc;
2383
3.23k
    }
2384
2385
1.13k
    pub fn set_nav_node_offset(&mut self, offset: usize) {
2386
        // debug!("Setting nav node offset to {}", offset);
2387
1.13k
        self.nav_node_offset = offset;
2388
1.13k
    }
2389
2390
121k
    pub fn match_pattern<T:TreeOrString<'c, 'm, T>>(&'r mut self, mathml: Element<'c>) -> Result<T> {
2391
        // debug!("Looking for a match for: \n{}", mml_to_string(mathml));
2392
121k
        let tag_name = mathml.name().local_part();
2393
121k
        let rules = &self.speech_rules.rules;
2394
2395
        // start with priority rules that apply to any node (should be a very small number)
2396
121k
        if let Some(
rule_vector95.8k
) = rules.get("!*") &&
2397
95.8k
           let Some(
result3.18k
) = self.find_match(rule_vector, mathml)
?9
{
2398
3.18k
                return Ok(result);      // found a match
2399
118k
            }
2400
        
2401
118k
        if let Some(
rule_vector116k
) = rules.get(tag_name) &&
2402
116k
           let Some(
result82.1k
) = self.find_match(rule_vector, mathml)
?0
{
2403
82.1k
                return Ok(result);      // found a match
2404
35.9k
            }
2405
2406
        // no rules for specific element, fall back to rules for "*" which *should* be present in all rule files as fallback
2407
35.9k
        if let Some(rule_vector) = rules.get("*") &&
2408
35.9k
           let Some(result) = self.find_match(rule_vector, mathml)
?0
{
2409
35.9k
                return Ok(result);      // found a match
2410
0
            }
2411
2412
        // no rules matched -- poorly written rule file -- let flow through to default error
2413
        // report error message with file name
2414
0
        let speech_manager = self.speech_rules.pref_manager.borrow();
2415
0
        let file_name = speech_manager.get_rule_file(&self.speech_rules.name);
2416
        // FIX: handle error appropriately 
2417
0
        bail!("\nNo match found!\nMissing patterns in {} for MathML.\n{}", file_name.to_string_lossy(), mml_to_string(mathml));
2418
121k
    }
2419
2420
248k
    fn find_match<T:TreeOrString<'c, 'm, T>>(&'r mut self, rule_vector: &[Box<SpeechPattern>], mathml: Element<'c>) -> Result<Option<T>> {
2421
870k
        for pattern in 
rule_vector248k
{
2422
            // debug!("Pattern name: {}", pattern.pattern_name);
2423
            // always pushing and popping around the is_match would be a little cleaner, but push/pop is relatively expensive,
2424
            //   so we optimize and only push first if the variables are needed to do the match
2425
870k
            if pattern.match_uses_var_defs {
2426
7.05k
                self.context_stack.push(pattern.var_defs.clone(), mathml)
?0
;
2427
863k
            }
2428
870k
            if pattern.is_match(&self.context_stack.base, mathml)
2429
870k
                    .with_context(|| 
error_string0
(
pattern0
,
mathml0
) )
?0
{
2430
                // debug!("  find_match: FOUND!!!");
2431
121k
                if !pattern.match_uses_var_defs && 
pattern.var_defs.len() > 0119k
{ // don't push them on twice
2432
13.0k
                    self.context_stack.push(pattern.var_defs.clone(), mathml)
?0
;
2433
108k
                }
2434
121k
                let result = if self.nav_node_offset > 0 &&
2435
47
                            self.nav_node_id == mathml.attribute_value("id").unwrap_or_default() && 
is_leaf7
(
mathml7
) {
2436
7
                    let ch = crate::canonicalize::as_text(mathml).chars().nth(self.nav_node_offset-1).unwrap_or_default();
2437
7
                    let ch = self.replace_single_char(ch, mathml)
?0
;
2438
                    // debug!("find_match: ch={} from '{}'; matched pattern name/tag: {}/{} with nav_node_offset={}",
2439
                    //     ch, crate::canonicalize::as_text(mathml),
2440
                    //     pattern.pattern_name, pattern.tag_name, self.nav_node_offset);
2441
7
                    T::from_string(ch.to_string(), self.doc)
2442
                } else {
2443
121k
                    pattern.replacements.replace(self, mathml)
2444
                };
2445
121k
                if pattern.var_defs.len() > 0 {
2446
14.5k
                    self.context_stack.pop();
2447
106k
                }
2448
121k
                return match result {
2449
121k
                    Ok(s) => {
2450
                        // for all except braille and navigation, nav_node_id will be an empty string and will not match
2451
121k
                        if self.nav_node_id.is_empty() {
2452
102k
                            Ok( Some(s) )
2453
                        } else {
2454
18.5k
                            if self.nav_node_id == mathml.attribute_value("id").unwrap_or_default() {
debug!990
("Matched pattern name/tag: {}/{}", pattern.pattern_name, pattern.tag_name)
}17.5k
;
2455
18.5k
                            Ok ( Some(self.nav_node_adjust(s, mathml)) )
2456
                        }
2457
                    },
2458
9
                    Err(e) => Err( e.context(
2459
9
                        format!(
2460
9
                            "attempting replacement pattern: \"{}\" for \"{}\".\n\
2461
9
                            Replacement\n{}\n...due to matching the MathML\n{} with the pattern\n\
2462
9
                            {}\n\
2463
9
                            The patterns are in {}.\n",
2464
9
                            pattern.pattern_name, pattern.tag_name,
2465
9
                            pattern.replacements.pretty_print_replacements(),
2466
9
                            mml_to_string(mathml), pattern.pattern,
2467
9
                            pattern.file_name
2468
9
                        )
2469
9
                    ))
2470
                }
2471
749k
            } else if pattern.match_uses_var_defs {
2472
5.60k
                self.context_stack.pop();
2473
743k
            }
2474
        };
2475
127k
        return Ok(None);    // no matches
2476
2477
0
        fn error_string(pattern: &SpeechPattern, mathml: Element) -> String {
2478
0
            return format!(
2479
                "error during pattern match using: \"{}\" for \"{}\".\n\
2480
                Pattern is \n{}\nMathML for the match:\n\
2481
                {}\
2482
                The patterns are in {}.\n",
2483
                pattern.pattern_name, pattern.tag_name,
2484
                pattern.pattern,
2485
0
                mml_to_string(mathml),
2486
                pattern.file_name
2487
            );
2488
0
        }
2489
2490
248k
    }
2491
2492
18.5k
    fn nav_node_adjust<T:TreeOrString<'c, 'm, T>>(&self, speech: T, mathml: Element<'c>) -> T {
2493
18.5k
      if let Some(id) = mathml.attribute_value("id") &&
2494
18.5k
         self.nav_node_id == id {
2495
990
        let offset = mathml.attribute_value(crate::navigate::ID_OFFSET).unwrap_or("0");
2496
990
        debug!("nav_node_adjust: id/name='{}/{}' offset?='{}'", id, 
name0
(
mathml0
),
2497
0
               self.nav_node_offset.to_string().as_str() == offset
2498
        );
2499
990
        if is_leaf(mathml) || 
self.nav_node_offset.to_string().as_str() == offset527
{
2500
990
          if self.speech_rules.name == RulesFor::Braille {
2501
469
            let highlight_style =  self.speech_rules.pref_manager.borrow().pref_to_string("BrailleNavHighlight");
2502
469
            return T::highlight_braille(speech, highlight_style);
2503
          } else {
2504
521
            debug!("nav_node_adjust: id='{}' offset='{}/{}'", id, self.nav_node_offset, offset);
2505
521
            return T::mark_nav_speech(speech)
2506
          }
2507
0
        }
2508
17.5k
      }
2509
17.5k
      return speech;
2510
18.5k
    }
2511
    
2512
469
    fn highlight_braille_string(braille: String, highlight_style: String) -> String {
2513
        // add dots 7 & 8 to the Unicode braille (28xx)
2514
469
        if &highlight_style == "Off" || braille.is_empty() {
2515
6
            return braille;
2516
463
        }
2517
        
2518
        // FIX: this seems needlessly complex. It is much simpler if the char can be changed in place...
2519
        // find first char that can get the dots and add them
2520
463
        let mut chars = braille.chars().collect::<Vec<char>>();
2521
2522
        // the 'b' for baseline indicator is really part of the previous token, so it needs to be highlighted but isn't because it is not Unicode braille
2523
463
        let baseline_indicator_hack = PreferenceManager::get().borrow().pref_to_string("BrailleCode") == "Nemeth";
2524
        // debug!("highlight_braille_string: highlight_style={}\n braille={}", highlight_style, braille);
2525
463
        let mut i_first_modified = 0;
2526
760
        for (i, ch) in 
chars.iter_mut()463
.
enumerate463
() {
2527
760
            let modified_ch = add_dots_to_braille_char(*ch, baseline_indicator_hack);
2528
760
            if *ch != modified_ch {
2529
463
                *ch = modified_ch; 
2530
463
                i_first_modified = i;
2531
463
                break;
2532
297
            };
2533
        };
2534
2535
463
        let mut i_last_modified = i_first_modified;
2536
463
        if &highlight_style != "FirstChar" {
2537
            // find last char so that we know when to modify the char
2538
491
            for i in (
i_first_modified463
..chars.len()).
rev463
(){
2539
491
                let ch = chars[i];
2540
491
                let modified_ch = add_dots_to_braille_char(ch, baseline_indicator_hack);
2541
491
                chars[i] = modified_ch;
2542
491
                if ch !=  modified_ch {
2543
390
                    i_last_modified = i;
2544
390
                    break;
2545
101
                }
2546
            }
2547
0
        }
2548
2549
463
        if &highlight_style == "All" {
2550
            // finish going through the string
2551
      #[allow(clippy::needless_range_loop)]  // I don't like enumerate/take/skip here
2552
4
            for 
i0
in i_first_modified+1..i_last_modified {
2553
0
                chars[i] = add_dots_to_braille_char(chars[i], baseline_indicator_hack);
2554
0
            };
2555
459
        }
2556
2557
463
        let result = chars.into_iter().collect::<String>(); 
2558
        // debug!("    result={}", result);
2559
463
        return result;
2560
2561
1.25k
        fn add_dots_to_braille_char(ch: char, baseline_indicator_hack: bool) -> char {
2562
1.25k
            let as_u32 = ch as u32;
2563
1.25k
            if (0x2800..0x28FF).contains(&as_u32) {
2564
919
                return unsafe {char::from_u32_unchecked(as_u32 | 0xC0)};  // safe because we have checked the range
2565
332
            } else if baseline_indicator_hack && 
ch == 'b'89
{
2566
7
                return '𝑏'
2567
            } else {
2568
325
                return ch;
2569
            }
2570
1.25k
        }
2571
469
    }
2572
2573
521
    fn mark_nav_speech(speech: String) -> String {
2574
        // add unique markers (since speech is mostly ascii letters and digits, most any symbol will do)
2575
        // it's a bug (but happened during intent generation), we might have identical id's, choose innermost one
2576
521
        debug!("mark_nav_speech: adding [[ {} ]] ", 
&speech0
);
2577
521
        if !speech.contains("[[") {
2578
521
            return "[[".to_string() + &speech + "]]";
2579
        } else {
2580
0
            return speech
2581
        }
2582
521
    }
2583
2584
456k
    fn replace<T:TreeOrString<'c, 'm, T>>(&'r mut self, replacement: &Replacement, mathml: Element<'c>) -> Result<T> {
2585
        return Ok(
2586
456k
            match replacement {
2587
63.6k
                Replacement::Text(t) => T::from_string(t.clone(), self.doc)
?0
,
2588
151k
                Replacement::XPath(xpath) => xpath.replace(self, mathml)
?9
,
2589
60.7k
                Replacement::TTS(tts) => {
2590
60.7k
                    T::from_string(
2591
60.7k
                        self.speech_rules.pref_manager.borrow().get_tts().replace(tts, &self.speech_rules.pref_manager.borrow(), self, mathml)
?0
,
2592
60.7k
                        self.doc
2593
0
                    )?
2594
                },
2595
45.5k
                Replacement::Intent(intent) => {
2596
45.5k
                    intent.replace(self, mathml)
?0
2597
                },
2598
115k
                Replacement::Test(test) => {
2599
115k
                    test.replace(self, mathml)
?0
2600
                },
2601
7.28k
                Replacement::With(with) => {
2602
7.28k
                    with.replace(self, mathml)
?0
2603
                },
2604
3.78k
                Replacement::SetVariables(vars) => {
2605
3.78k
                    vars.replace(self, mathml)
?0
2606
                },
2607
7.45k
                Replacement::Insert(ic) => {
2608
7.45k
                    ic.replace(self, mathml)
?0
2609
                },
2610
2
                Replacement::Translate(id) => {
2611
2
                    id.replace(self, mathml)
?0
2612
                },
2613
            }
2614
        )
2615
456k
    }
2616
2617
    /// Iterate over all the nodes, concatenating the result strings together with a ' ' between them
2618
    /// If the node is an element, pattern match it
2619
    /// For 'Text' and 'Attribute' nodes, convert them to strings
2620
121k
    fn replace_nodes<T:TreeOrString<'c, 'm, T>>(&'r mut self, nodes: Vec<Node<'c>>, mathml: Element<'c>) -> Result<T> {
2621
121k
        return T::replace_nodes(self, nodes, mathml);
2622
121k
    }
2623
2624
    /// Iterate over all the nodes finding matches for the elements
2625
    /// For this case of returning MathML, everything else is an error
2626
48.6k
    fn replace_nodes_tree(&'r mut self, nodes: Vec<Node<'c>>, _mathml: Element<'c>) -> Result<Element<'m>> {
2627
48.6k
        let mut children = Vec::with_capacity(3*nodes.len());   // guess (2 chars/node + space)
2628
69.6k
        for node in 
nodes48.6k
{
2629
69.6k
            let matched = match node {
2630
41.9k
                Node::Element(n) => self.match_pattern::<Element<'m>>(n)
?0
,
2631
27.5k
                Node::Text(t) =>  {
2632
27.5k
                    let leaf = create_mathml_element(&self.doc, "TEMP_NAME");
2633
27.5k
                    leaf.set_text(t.text());
2634
27.5k
                    leaf
2635
                },
2636
32
                Node::Attribute(attr) => {
2637
                    // debug!("  from attr with text '{}'", attr.value());
2638
32
                    let leaf = create_mathml_element(&self.doc, "TEMP_NAME");
2639
32
                    leaf.set_text(attr.value());
2640
32
                    leaf
2641
                },
2642
                _ => {
2643
0
                    bail!("replace_nodes: found unexpected node type!!!");
2644
                },
2645
            };
2646
69.6k
            children.push(matched);
2647
        }
2648
2649
48.6k
        let result = create_mathml_element(&self.doc, "TEMP_NAME");    // FIX: what name should be used?
2650
48.6k
        result.append_children(children);
2651
        // debug!("replace_nodes_tree\n{}\n====>>>>>\n", mml_to_string(result));
2652
48.6k
        return Ok( result );
2653
48.6k
    }
2654
2655
72.9k
    fn replace_nodes_string(&'r mut self, nodes: Vec<Node<'c>>, mathml: Element<'c>) -> Result<String> {
2656
        // debug!("replace_nodes: working on {} nodes", nodes.len());
2657
72.9k
        let mut result = String::with_capacity(3*nodes.len());   // guess (2 chars/node + space)
2658
72.9k
        let mut first_time = true;
2659
85.7k
        for node in 
nodes72.9k
{
2660
85.7k
            if first_time {
2661
72.9k
                first_time = false;
2662
72.9k
            } else {
2663
12.8k
                result.push(' ');
2664
12.8k
            };
2665
85.7k
            let matched = match node {
2666
66.5k
                Node::Element(n) => self.match_pattern::<String>(n)
?0
,
2667
19.2k
                Node::Text(t) =>  self.replace_chars(t.text(), mathml)
?0
,
2668
14
                Node::Attribute(attr) => self.replace_chars(attr.value(), mathml)
?0
,
2669
0
                _ => bail!("replace_nodes: found unexpected node type!!!"),
2670
            };
2671
85.7k
            result += &matched;
2672
        }
2673
72.9k
        return Ok( result );
2674
72.9k
    }
2675
2676
    /// Lookup unicode "pronunciation" of char.
2677
    /// Note: TTS is not supported here (not needed and a little less efficient)
2678
58.0k
    pub fn replace_chars(&'r mut self, str: &str, mathml: Element<'c>) -> Result<String> {
2679
58.0k
        let chars = str.chars().collect::<Vec<char>>();
2680
58.0k
        let rules = self.speech_rules;
2681
        // handled in match_pattern -- temporarily leaving as comments in case something is missed and needed here
2682
        // if self.nav_node_offset > 0 && chars.len() > 1 {
2683
        //     if self.nav_node_offset > chars.len() {
2684
        //         debug!("replace_chars: nav_node_offset {} is larger than string length {}", self.nav_node_offset, chars.len());
2685
        //         self.nav_node_offset = chars.len();
2686
        //     }
2687
        //     let ch = chars[self.nav_node_offset-1];
2688
        //     debug!("replace_chars: adjusted string to '{}' based on nav_node_offset {}", ch, self.nav_node_offset);
2689
        //     if rules.translate_single_chars_only {
2690
        //         return self.replace_single_char(ch, mathml);
2691
        //     } else {
2692
        //         return Ok( ch.to_string() );
2693
        //     }
2694
        // }
2695
58.0k
        if is_quoted_string(str) {  // quoted string -- already translated (set in get_braille_chars)
2696
12.5k
            return Ok(unquote_string(str).to_string());
2697
45.5k
        }
2698
        // in a string, avoid "a" -> "eigh", "." -> "point", etc
2699
45.5k
        if rules.translate_single_chars_only {
2700
30.0k
            if chars.len() == 1 {
2701
27.3k
                return self.replace_single_char(chars[0], mathml)
2702
            } else {
2703
                // more than one char -- fix up non-breaking space
2704
2.69k
                return Ok(str.replace('\u{00A0}', " ").replace(['\u{2061}', '\u{2062}', '\u{2063}', '\u{2064}'], ""))
2705
            }
2706
15.5k
        };
2707
2708
15.5k
        let result = chars.iter()
2709
18.2k
            .
map15.5k
(|&ch| self.replace_single_char(ch, mathml))
2710
15.5k
            .collect::<Result<Vec<String>>>()
?0
2711
15.5k
            .join("");
2712
15.5k
        return Ok( result );
2713
58.0k
    }
2714
2715
45.6k
    fn replace_single_char(&'r mut self, ch: char, mathml: Element<'c>) -> Result<String> {
2716
45.6k
        let ch_as_u32 = ch as u32;
2717
45.6k
        let rules =  self.speech_rules;
2718
45.6k
        let mut unicode = rules.unicode_short.borrow();
2719
45.6k
        let mut replacements = unicode.get( &ch_as_u32 );
2720
        // debug!("replace_single_char: looking for unicode {} for char '{}'/{:#06x}, found: {:?}", rules.name, ch, ch_as_u32, replacements);
2721
45.6k
        if replacements.is_none() {
2722
            // see if it in the full unicode table (if it isn't loaded already)
2723
1.64k
            let pref_manager = rules.pref_manager.borrow();
2724
1.64k
            let unicode_pref_files = if rules.name == RulesFor::Braille {
pref_manager.get_braille_unicode_file()525
} else {
pref_manager.get_speech_unicode_file()1.12k
};
2725
1.64k
            let should_ignore_file_time = pref_manager.pref_to_string("CheckRuleFiles") == "All";
2726
1.64k
            if rules.unicode_full.borrow().is_empty() || 
!1.18k
rules.unicode_full_files.borrow()1.18k
.
is_file_up_to_date1.18k
(unicode_pref_files.1, should_ignore_file_time) {
2727
468
                info!("*** Loading full unicode {} for char '{}'/{:#06x}", rules.name, ch, ch_as_u32);
2728
468
                rules.unicode_full.borrow_mut().clear();
2729
468
                rules.unicode_full_files.borrow_mut().set_files_and_times(rules.read_unicode(None, false)
?0
);
2730
468
                info!("# Unicode defs = {}/{}", 
rules.unicode_short.borrow().len()0
,
rules.unicode_full.borrow().len()0
);
2731
1.18k
            }
2732
1.64k
            unicode = rules.unicode_full.borrow();
2733
1.64k
            replacements = unicode.get( &ch_as_u32 );
2734
1.64k
            if replacements.is_none() {
2735
269
              self.translate_count = 0;     // not in loop
2736
              // debug!("*** Did not find unicode {} for char '{}'/{:#06x}", rules.name, ch, ch_as_u32);
2737
269
              if rules.translate_single_chars_only || 
ch247
.
is_ascii247
() { // speech or if braille, avoid loop (ASCII remains ASCII if not found)
2738
269
                return Ok(String::from(ch));   // no replacement, so just return the char and hope for the best
2739
              } else { // braille -- must turn into braille dots
2740
                // Emulate what NVDA does: generate (including single quotes) '\xhhhh' or '\yhhhhhh'
2741
0
                let ch_as_int = ch as u32;
2742
0
                let prefix_indicator = if ch_as_int < 1<<16 {'x'} else {'y'};
2743
0
                return self.replace_chars( &format!("'\\{prefix_indicator}{:06x}'", ch_as_int), mathml);
2744
              }
2745
1.37k
            }
2746
43.9k
        };
2747
2748
        // map across all the parts of the replacement, collect them up into a Vec, and then concat them together
2749
45.3k
        let result = replacements.unwrap()
2750
45.3k
                    .iter()
2751
45.3k
                    .map(|replacement|
2752
49.1k
                         self.replace(replacement, mathml)
2753
49.1k
                                .with_context(|| 
format!0
("Unicode replacement error: {replacement}")) )
2754
45.3k
                    .collect::<Result<Vec<String>>>()
?0
2755
45.3k
                    .join(" ");
2756
45.3k
         self.translate_count = 0;     // found a replacement, so not in a loop
2757
45.3k
        return Ok(result);
2758
45.6k
    }
2759
}
2760
2761
/// Hack to allow replacement of `str` with braille chars.
2762
12.5k
pub fn braille_replace_chars(str: &str, mathml: Element) -> Result<String> {
2763
12.5k
    return BRAILLE_RULES.with(|rules| {
2764
12.5k
        let rules = rules.borrow();
2765
12.5k
        let new_package = Package::new();
2766
12.5k
        let mut rules_with_context = SpeechRulesWithContext::new(&rules, new_package.as_document(), "", 0);
2767
12.5k
        return rules_with_context.replace_chars(str, mathml);
2768
12.5k
    })
2769
12.5k
}
2770
2771
2772
2773
#[cfg(test)]
2774
mod tests {
2775
    #[allow(unused_imports)]
2776
    use crate::init_logger;
2777
2778
    use super::*;
2779
2780
    #[test]
2781
1
    fn test_read_statement() {
2782
1
        let str = r#"---
2783
1
        {name: default, tag: math, match: ".", replace: [x: "./*"] }"#;
2784
1
        let doc = YamlLoader::load_from_str(str).unwrap();
2785
1
        assert_eq!(doc.len(), 1);
2786
1
        let mut rules = SpeechRules::new(RulesFor::Speech, true);
2787
2788
1
        SpeechPattern::build(&doc[0], Path::new("testing"), &mut rules).unwrap();
2789
1
        assert_eq!(rules.rules["math"].len(), 1, "\nshould only be one rule");
2790
2791
1
        let speech_pattern = &rules.rules["math"][0];
2792
1
        assert_eq!(speech_pattern.pattern_name, "default", "\npattern name failure");
2793
1
        assert_eq!(speech_pattern.tag_name, "math", "\ntag name failure");
2794
1
        assert_eq!(speech_pattern.pattern.rc.string, ".", "\npattern failure");
2795
1
        assert_eq!(speech_pattern.replacements.replacements.len(), 1, "\nreplacement failure");
2796
1
        assert_eq!(speech_pattern.replacements.replacements[0].to_string(), r#""./*""#, "\nreplacement failure");
2797
1
    }
2798
2799
    #[test]
2800
1
    fn test_read_statements_with_replace() {
2801
1
        let str = r#"---
2802
1
        {name: default, tag: math, match: ".", replace: [x: "./*"] }"#;
2803
1
        let doc = YamlLoader::load_from_str(str).unwrap();
2804
1
        assert_eq!(doc.len(), 1);
2805
1
        let mut rules = SpeechRules::new(RulesFor::Speech, true);
2806
1
        SpeechPattern::build(&doc[0], Path::new("testing"), &mut rules).unwrap();
2807
2808
1
        let str = r#"---
2809
1
        {name: default, tag: math, match: ".", replace: [t: "test", x: "./*"] }"#;
2810
1
        let doc2 = YamlLoader::load_from_str(str).unwrap();
2811
1
        assert_eq!(doc2.len(), 1);
2812
1
        SpeechPattern::build(&doc2[0], Path::new("testing"), &mut rules).unwrap();
2813
1
        assert_eq!(rules.rules["math"].len(), 1, "\nfirst rule not replaced");
2814
2815
1
        let speech_pattern = &rules.rules["math"][0];
2816
1
        assert_eq!(speech_pattern.pattern_name, "default", "\npattern name failure");
2817
1
        assert_eq!(speech_pattern.tag_name, "math", "\ntag name failure");
2818
1
        assert_eq!(speech_pattern.pattern.rc.string, ".", "\npattern failure");
2819
1
        assert_eq!(speech_pattern.replacements.replacements.len(), 2, "\nreplacement failure");
2820
1
    }
2821
2822
    #[test]
2823
1
    fn test_read_statements_with_add() {
2824
1
        let str = r#"---
2825
1
        {name: default, tag: math, match: ".", replace: [x: "./*"] }"#;
2826
1
        let doc = YamlLoader::load_from_str(str).unwrap();
2827
1
        assert_eq!(doc.len(), 1);
2828
1
        let mut rules = SpeechRules::new(RulesFor::Speech, true);
2829
1
        SpeechPattern::build(&doc[0], Path::new("testing"), &mut rules).unwrap();
2830
2831
1
        let str = r#"---
2832
1
        {name: another-rule, tag: math, match: ".", replace: [t: "test", x: "./*"] }"#;
2833
1
        let doc2 = YamlLoader::load_from_str(str).unwrap();
2834
1
        assert_eq!(doc2.len(), 1);
2835
1
        SpeechPattern::build(&doc2[0], Path::new("testing"), &mut rules).unwrap();
2836
1
        assert_eq!(rules.rules["math"].len(), 2, "\nsecond rule not added");
2837
2838
1
        let speech_pattern = &rules.rules["math"][0];
2839
1
        assert_eq!(speech_pattern.pattern_name, "default", "\npattern name failure");
2840
1
        assert_eq!(speech_pattern.tag_name, "math", "\ntag name failure");
2841
1
        assert_eq!(speech_pattern.pattern.rc.string, ".", "\npattern failure");
2842
1
        assert_eq!(speech_pattern.replacements.replacements.len(), 1, "\nreplacement failure");
2843
1
    }
2844
2845
    #[test]
2846
1
    fn test_debug_no_debug() {
2847
1
        let str = r#"*[2]/*[3][text()='3']"#;
2848
1
        let result = MyXPath::add_debug_string_arg(str);
2849
1
        assert!(result.is_ok());
2850
1
        assert_eq!(result.unwrap(), str);
2851
1
    }
2852
2853
    #[test]
2854
1
    fn test_debug_no_debug_with_quote() {
2855
1
        let str = r#"*[2]/*[3][text()='(']"#;
2856
1
        let result = MyXPath::add_debug_string_arg(str);
2857
1
        assert!(result.is_ok());
2858
1
        assert_eq!(result.unwrap(), str);
2859
1
    }
2860
2861
    #[test]
2862
1
    fn test_debug_no_quoted_paren() {
2863
1
        let str = r#"DEBUG(*[2]/*[3][text()='3'])"#;
2864
1
        let result = MyXPath::add_debug_string_arg(str);
2865
1
        assert!(result.is_ok());
2866
1
        assert_eq!(result.unwrap(), r#"DEBUG(*[2]/*[3][text()='3'], "*[2]/*[3][text()='3']")"#);
2867
1
    }
2868
2869
    #[test]
2870
1
    fn test_debug_quoted_paren() {
2871
1
        let str = r#"DEBUG(*[2]/*[3][text()='('])"#;
2872
1
        let result = MyXPath::add_debug_string_arg(str);
2873
1
        assert!(result.is_ok());
2874
1
        assert_eq!(result.unwrap(), r#"DEBUG(*[2]/*[3][text()='('], "*[2]/*[3][text()='(']")"#);
2875
1
    }
2876
2877
    #[test]
2878
1
    fn test_debug_quoted_paren_before_paren() {
2879
1
        let str = r#"DEBUG(ClearSpeak_Matrix = 'Combinatorics') and IsBracketed(., '(', ')')"#;
2880
1
        let result = MyXPath::add_debug_string_arg(str);
2881
1
        assert!(result.is_ok());
2882
1
        assert_eq!(result.unwrap(), r#"DEBUG(ClearSpeak_Matrix = 'Combinatorics', "ClearSpeak_Matrix = 'Combinatorics'") and IsBracketed(., '(', ')')"#);
2883
1
    }
2884
2885
2886
// zipped files do NOT include "zz", hence we need to exclude this test
2887
cfg_if::cfg_if! {if #[cfg(not(feature = "include-zip"))] {  
2888
    #[test]
2889
1
    fn test_up_to_date() {
2890
        use crate::interface::*;
2891
        // initialize and move to a directory where making a time change doesn't really matter
2892
1
        set_rules_dir(super::super::abs_rules_dir_path()).unwrap();
2893
1
        set_preference("Language", "zz-aa").unwrap();
2894
        // not much is support in zz
2895
1
        if let Err(
e0
) = set_mathml("<math><mi>x</mi></math>") {
2896
0
            error!("{}", crate::errors_to_string(&e));
2897
0
            panic!("Should not be an error in setting MathML")
2898
1
        }
2899
2900
1
        set_preference("CheckRuleFiles", "All").unwrap();
2901
1
        assert!(!is_file_time_same(), "file's time did not get updated");
2902
1
        set_preference("CheckRuleFiles", "None").unwrap();
2903
1
        assert!(is_file_time_same(), "file's time was wrongly updated (preference 'CheckRuleFiles' should have prevented updating)");
2904
2905
        // change a file, cause read_files to be called, and return if MathCAT noticed the change and updated its time
2906
2
        fn is_file_time_same() -> bool {
2907
            // read and write a unicode file in a test dir
2908
            // files are read in due to setting the MathML
2909
2910
            use std::time::Duration;
2911
2
            return SPEECH_RULES.with(|rules| {
2912
2
                let start_main_file = rules.borrow().unicode_short_files.borrow().ft[0].clone();
2913
2914
                // open the file, read all the contents, then write them back so the time changes
2915
2
                let contents = std::fs::read(&start_main_file.file).expect(&format!("Failed to read file {} during test", &start_main_file.file.to_string_lossy()));
2916
2
                std::fs::write(start_main_file.file, contents).unwrap();
2917
2
                std::thread::sleep(Duration::from_millis(5));       // pause a little to make sure the time changes
2918
2919
                // speak should cause the file stored to have a new time
2920
2
                if let Err(
e0
) = get_spoken_text() {
2921
0
                    error!("{}", crate::errors_to_string(&e));
2922
0
                    panic!("Should not be an error in speech")
2923
2
                }
2924
2
                return rules.borrow().unicode_short_files.borrow().ft[0].time == start_main_file.time;
2925
2
            });
2926
2
        }    
2927
1
    }
2928
}}
2929
2930
    // #[test]
2931
    // fn test_nested_debug_quoted_paren() {
2932
    //     let str = r#"DEBUG(*[2]/*[3][DEBUG(text()='(')])"#;
2933
    //     let result = MyXPath::add_debug_string_arg(str);
2934
    //     assert!(result.is_ok());
2935
    //     assert_eq!(result.unwrap(), r#"DEBUG(*[2]/*[3][DEBUG(text()='(')], "DEBUG(*[2]/*[3][DEBUG(text()='(')], \"text()='(')]\")"#);
2936
    // }
2937
2938
}
\ No newline at end of file +

Coverage Report

Created: 2026-05-04 09:37

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/home/runner/work/MathCAT/MathCAT/src/speech.rs
Line
Count
Source
1
//! The speech module is where the speech rules are read in and speech generated.
2
//!
3
//! The speech rules call out to the preferences and tts modules and the dividing line is not always clean.
4
//! A number of useful utility functions used by other modules are defined here.
5
#![allow(clippy::needless_return)]
6
use std::path::PathBuf;
7
use std::collections::HashMap;
8
use std::cell::{RefCell, RefMut};
9
use std::sync::LazyLock;
10
use sxd_document::dom::{ChildOfElement, Document, Element};
11
use sxd_document::{Package, QName};
12
use sxd_xpath::context::Evaluation;
13
use sxd_xpath::{Factory, Value, XPath};
14
use sxd_xpath::nodeset::Node;
15
use std::fmt;
16
use std::time::SystemTime;
17
use crate::definitions::read_definitions_file;
18
use crate::errors::*;
19
use crate::prefs::*;
20
use crate::xpath_functions::is_leaf;
21
use yaml_rust::{YamlLoader, Yaml, yaml::Hash};
22
use crate::tts::*;
23
use crate::infer_intent::*;
24
use crate::pretty_print::{mml_to_string, yaml_to_string};
25
use std::path::Path;
26
use std::rc::Rc;
27
use crate::shim_filesystem::{read_to_string_shim, canonicalize_shim};
28
use crate::canonicalize::{as_element, create_mathml_element, set_mathml_name, name, MATHML_FROM_NAME_ATTR};
29
use regex::Regex;
30
use log::{debug, error, info};
31
32
33
pub const NAV_NODE_SPEECH_NOT_FOUND: &str = "NAV_NODE_NOT_FOUND";
34
35
/// Like lisp's ' (quote foo), this is used to block "replace_chars" being called.
36
///   Unlike lisp, this appended to the end of a string (more efficient)
37
/// At the moment, the only use is BrailleChars(...) -- internally, it calls replace_chars and we don't want it called again.
38
/// Note: an alternative to this hack is to add "xq" (execute but don't eval the result), but that's heavy-handed for the current need
39
const NO_EVAL_QUOTE_CHAR: char = '\u{efff}';            // a private space char
40
const NO_EVAL_QUOTE_CHAR_AS_BYTES: [u8;3] = [0xee,0xbf,0xbf];
41
const N_BYTES_NO_EVAL_QUOTE_CHAR: usize = NO_EVAL_QUOTE_CHAR.len_utf8();
42
43
/// Converts 'string' into a "quoted" string -- use is_quoted_string and unquote_string
44
12.5k
pub fn make_quoted_string(mut string: String) -> String {
45
12.5k
    string.push(NO_EVAL_QUOTE_CHAR);
46
12.5k
    return string;
47
12.5k
}
48
49
/// Checks the string to see if it is "quoted"
50
58.0k
pub fn is_quoted_string(str: &str) -> bool {
51
58.0k
    if str.len() < N_BYTES_NO_EVAL_QUOTE_CHAR {
52
34.1k
        return false;
53
23.9k
    }
54
23.9k
    let bytes = str.as_bytes();
55
23.9k
    return bytes[bytes.len()-N_BYTES_NO_EVAL_QUOTE_CHAR..] == NO_EVAL_QUOTE_CHAR_AS_BYTES;
56
58.0k
}
57
58
/// Converts 'string' into a "quoted" string -- use is_quoted_string and unquote_string
59
/// IMPORTANT: this assumes the string is quoted -- no check is made
60
12.5k
pub fn unquote_string(str: &str) -> &str {
61
12.5k
    return &str[..str.len()-N_BYTES_NO_EVAL_QUOTE_CHAR];
62
12.5k
}
63
64
65
/// The main external call, `intent_from_mathml` returns a string for the speech associated with the `mathml`.
66
///   It matches against the rules that are computed by user prefs such as "Language" and "SpeechStyle".
67
///
68
/// The speech rules assume `mathml` has been "cleaned" via the canonicalization step.
69
///
70
/// If the preferences change (and hence the speech rules to use change), or if the rule file changes,
71
///   `intent_from_mathml` will detect that and (re)load the proper rules.
72
///
73
/// A string is returned in call cases.
74
/// If there is an error, the speech string will indicate an error.
75
3.88k
pub fn intent_from_mathml<'m>(mathml: Element, doc: Document<'m>) -> Result<Element<'m>> {
76
3.88k
    let 
intent_tree3.87k
= intent_rules(&INTENT_RULES, doc, mathml, "")
?9
;
77
3.87k
    doc.root().append_child(intent_tree);
78
3.87k
    return Ok(intent_tree);
79
3.88k
}
80
81
3.96k
pub fn speak_mathml(mathml: Element, nav_node_id: &str, nav_node_offset: usize) -> Result<String> {
82
3.96k
    return speak_rules(&SPEECH_RULES, mathml, nav_node_id, nav_node_offset);
83
3.96k
}
84
85
14
pub fn overview_mathml(mathml: Element, nav_node_id: &str, nav_node_offset: usize) -> Result<String> {
86
14
    return speak_rules(&OVERVIEW_RULES, mathml, nav_node_id, nav_node_offset);
87
14
}
88
89
90
3.88k
fn intent_rules<'m>(rules: &'static std::thread::LocalKey<RefCell<SpeechRules>>, doc: Document<'m>, mathml: Element, nav_node_id: &'m str) -> Result<Element<'m>> {
91
3.88k
    rules.with(|rules| {
92
3.88k
        rules.borrow_mut().read_files()
?0
;
93
3.88k
        let rules = rules.borrow();
94
        // debug!("intent_rules:\n{}", mml_to_string(mathml));
95
3.88k
        let should_set_literal_intent = rules.pref_manager.borrow().pref_to_string("SpeechStyle").as_str() == "LiteralSpeak";
96
3.88k
        let original_intent = mathml.attribute_value("intent");
97
3.88k
        if should_set_literal_intent {
98
10
            if let Some(
intent4
) = original_intent {
99
4
                let intent = if intent.contains('(') {
intent2
.
replace2
('(',
":literal("2
)} else {
intent2
.to_string() + ":literal"};
100
4
                mathml.set_attribute_value("intent", &intent);
101
6
            } else {
102
6
                mathml.set_attribute_value("intent", ":literal");
103
6
            };
104
3.87k
        }
105
3.88k
        let mut rules_with_context = SpeechRulesWithContext::new(&rules, doc, nav_node_id, 0);
106
3.88k
        let 
intent3.87k
= rules_with_context.match_pattern::<Element<'m>>(mathml)
107
3.88k
                    .context("Pattern match/replacement failure!")
?9
;
108
3.87k
        let answer = if name(intent) == "TEMP_NAME" {   // unneeded extra layer
109
0
            assert_eq!(intent.children().len(), 1);
110
0
            as_element(intent.children()[0])
111
        } else {
112
3.87k
            intent
113
        };
114
3.87k
        if should_set_literal_intent {
115
10
            if let Some(
original_intent4
) = original_intent {
116
4
                mathml.set_attribute_value("intent", original_intent);
117
6
            } else {
118
6
                mathml.remove_attribute("intent");
119
6
            }
120
3.86k
        }
121
3.87k
        return Ok(answer);
122
3.88k
    })
123
3.88k
}
124
125
/// Speak the MathML
126
/// If 'nav_node_id' is not an empty string, then the element with that id will have [[...]] around it
127
3.98k
fn speak_rules(rules: &'static std::thread::LocalKey<RefCell<SpeechRules>>, mathml: Element, nav_node_id: &str, nav_node_offset: usize) -> Result<String> {
128
3.98k
    return rules.with(|rules| {
129
3.98k
        rules.borrow_mut().read_files()
?0
;
130
3.98k
        let rules = rules.borrow();
131
        // debug!("speak_rules:\n{}", mml_to_string(mathml));
132
3.98k
        let new_package = Package::new();
133
3.98k
        let mut rules_with_context = SpeechRulesWithContext::new(&rules, new_package.as_document(), nav_node_id, nav_node_offset);
134
3.98k
        let 
speech_string3.98k
= nestable_speak_rules(& mut rules_with_context, mathml)
?1
;
135
3.98k
        return Ok( rules.pref_manager.borrow().get_tts()
136
3.98k
            .merge_pauses(remove_optional_indicators(
137
3.98k
                &speech_string.replace(CONCAT_STRING, "")
138
3.98k
                                    .replace(CONCAT_INDICATOR, "")                            
139
3.98k
                            )
140
3.98k
            .trim_start().trim_end_matches([' ', ',', ';'])) );
141
3.98k
    });
142
143
3.99k
    fn nestable_speak_rules<'c, 's:'c, 'm:'c>(rules_with_context: &mut SpeechRulesWithContext<'c, 's, 'm>, mathml: Element<'c>) -> Result<String> {
144
3.99k
        let mut speech_string = rules_with_context.match_pattern::<String>(mathml)
145
3.99k
                    .context("Pattern match/replacement failure!")
?0
;
146
        // Note: [[...]] is added around a matching child, but if the "id" is on 'mathml', the whole string is used
147
3.99k
        if !rules_with_context.nav_node_id.is_empty() {
148
            // See https://github.com/NSoiffer/MathCAT/issues/174 for why we can just start the speech at the nav node
149
536
            let intent_attr = mathml.attribute_value("data-intent-property").unwrap_or_default();
150
536
            if let Some(
start521
) = speech_string.find("[[") {
151
521
                match speech_string[start+2..].find("]]") {
152
0
                    None => bail!("Internal error: looking for '[[...]]' during navigation -- only found '[[' in '{}'", speech_string),
153
521
                    Some(end) => speech_string = speech_string[start+2..start+2+end].to_string(),
154
                }
155
15
            } else if !intent_attr.contains(":literal:") {
156
                // try again with LiteralSpeak -- some parts might have been elided in other SpeechStyles
157
14
                mathml.set_attribute_value("data-intent-property", (":literal:".to_string() + intent_attr).as_str());
158
14
                let speech = nestable_speak_rules(rules_with_context, mathml);
159
14
                mathml.set_attribute_value("data-intent-property", intent_attr);
160
14
                return speech;
161
            } else {
162
1
                bail!(NAV_NODE_SPEECH_NOT_FOUND); //  NAV_NODE_SPEECH_NOT_FOUND is tested for later
163
            }
164
3.46k
        }
165
3.98k
        return Ok(speech_string);
166
3.99k
    }
167
3.98k
}
168
169
/// Converts its argument to a string that can be used in a debugging message.
170
0
pub fn yaml_to_type(yaml: &Yaml) -> String {
171
0
    return match yaml {
172
0
        Yaml::Real(v)=> format!("real='{v:#}'"),
173
0
        Yaml::Integer(v)=> format!("integer='{v:#}'"),
174
0
        Yaml::String(v)=> format!("string='{v:#}'"),
175
0
        Yaml::Boolean(v)=> format!("boolean='{v:#}'"),
176
0
        Yaml::Array(v)=> match v.len() {
177
0
            0 => "array with no entries".to_string(),
178
0
            1 => format!("array with the entry: {}", yaml_to_type(&v[0])),
179
0
            _ => format!("array with {} entries. First entry: {}", v.len(), yaml_to_type(&v[0])),
180
        }
181
0
        Yaml::Hash(h)=> {
182
0
            let first_pair = 
183
0
                if h.is_empty() {
184
0
                    "no pairs".to_string()
185
                } else {
186
0
                    let (key, val) = h.iter().next().unwrap();
187
0
                    format!("({}, {})", yaml_to_type(key), yaml_to_type(val))
188
                };
189
0
            format!("dictionary with {} pair{}. A pair: {}", h.len(), if h.len()==1 {""} else {"s"}, first_pair)
190
        }
191
0
        Yaml::Alias(_)=> "Alias".to_string(),
192
0
        Yaml::Null=> "Null".to_string(),
193
0
        Yaml::BadValue=> "BadValue".to_string(),       
194
    }
195
0
}
196
197
0
fn yaml_type_err(yaml: &Yaml, str: &str) -> Error {
198
0
    anyhow!("Expected {}, found {}", str, yaml_to_type(yaml))
199
0
}
200
201
// fn yaml_key_err(dict: &Yaml, key: &str, yaml_type: &str) -> String {
202
//     if dict.as_hash().is_none() {
203
//        return format!("Expected dictionary with key '{}', found\n{}", key, yaml_to_string(dict, 1));
204
//     }
205
//     let str = &dict[key];
206
//     if str.is_badvalue() {
207
//         return format!("Did not find '{}' in\n{}", key,  yaml_to_string(dict, 1));
208
//     }
209
//     return format!("Type of '{}' is not a {}.\nIt is a {}. YAML value is\n{}", 
210
//             key, yaml_type, yaml_to_type(str), yaml_to_string(dict, 0));
211
// }
212
213
4.86M
fn find_str<'a>(dict: &'a Yaml, key: &'a str) -> Option<&'a str> {
214
4.86M
    return dict[key].as_str();
215
4.86M
}
216
217
/// Returns the Yaml as a `Hash` or an error if it isn't.
218
175k
pub fn as_hash_checked(value: &Yaml) -> Result<&Hash> {
219
175k
    let result = value.as_hash();
220
175k
    let result = result.ok_or_else(|| 
yaml_type_err0
(
value0
,
"hashmap"0
))
?0
;
221
175k
    return Ok( result );
222
175k
}
223
224
/// Returns the Yaml as a `Vec` or an error if it isn't.
225
11.7k
pub fn as_vec_checked(value: &Yaml) -> Result<&Vec<Yaml>> {
226
11.7k
    let result = value.as_vec();
227
11.7k
    let result = result.ok_or_else(|| 
yaml_type_err0
(
value0
,
"array"0
))
?0
;
228
11.7k
    return Ok( result );
229
11.7k
}
230
231
/// Returns the Yaml as a `&str` or an error if it isn't.
232
8.08M
pub fn as_str_checked(yaml: &Yaml) -> Result<&str> {
233
8.08M
    return yaml.as_str().ok_or_else(|| 
yaml_type_err0
(
yaml0
,
"string"0
));
234
8.08M
}
235
236
237
/// A bit of a hack to concatenate replacements (without a ' ').
238
/// The CONCAT_INDICATOR is added by a "ct:" (instead of 't:') in the speech rules
239
/// and checked for by the tts code.
240
pub const CONCAT_INDICATOR: &str = "\u{F8FE}";
241
242
// This is the pattern that needs to be matched (and deleted)
243
pub const CONCAT_STRING: &str = " \u{F8FE}";
244
245
// a similar hack to potentially delete (repetitive) optional replacements
246
// the OPTIONAL_INDICATOR is added by "ot:" before and after the optional string
247
const OPTIONAL_INDICATOR: &str  = "\u{F8FD}";
248
const OPTIONAL_INDICATOR_LEN: usize = OPTIONAL_INDICATOR.len();
249
250
5.10k
pub fn remove_optional_indicators(str: &str) -> String {
251
5.10k
    return str.replace(OPTIONAL_INDICATOR, "");
252
5.10k
}
253
254
/// Given a string that should be Yaml, it calls `build_fn` with that string.
255
/// The build function/closure should process the Yaml as appropriate and capture any errors and write them to `std_err`.
256
/// The returned value should be a Vector containing the paths of all the files that were included.
257
56.3k
pub fn compile_rule<F>(str: &str, mut build_fn: F) -> Result<Vec<PathBuf>> where
258
56.3k
            F: FnMut(&Yaml) -> Result<Vec<PathBuf>> {
259
56.3k
    let docs = YamlLoader::load_from_str(str);
260
56.3k
    match docs {
261
0
        Err(e) => {
262
0
            bail!("Parse error!!: {}", e);
263
        },
264
56.3k
        Ok(docs) => {
265
56.3k
            if docs.len() != 1 {
266
0
                bail!("Didn't find rules!");
267
56.3k
            }
268
56.3k
            return build_fn(&docs[0]);
269
        }
270
    }
271
56.3k
}
272
273
36.5k
pub fn process_include<F>(current_file: &Path, new_file_name: &str, mut read_new_file: F) -> Result<Vec<PathBuf>>
274
36.5k
                    where F: FnMut(&Path) -> Result<Vec<PathBuf>> {
275
36.5k
    let parent_path = current_file.parent();
276
36.5k
    if parent_path.is_none() {
277
0
        bail!("Internal error: {:?} is not a valid file name", current_file);
278
36.5k
    }
279
36.5k
    let mut new_file = match canonicalize_shim(parent_path.unwrap()) {
280
36.5k
        Ok(path) => path,
281
0
        Err(e) => bail!("process_include: canonicalize failed for {} with message {}", parent_path.unwrap().display(), e),
282
    };
283
284
    // the referenced file might be in a directory that hasn't been zipped up -- find the dir and call the unzip function
285
89.0k
    for unzip_dir in 
new_file.ancestors()36.5k
{
286
89.0k
        if unzip_dir.ends_with("Rules") {
287
36.5k
            break;      // nothing to unzip
288
52.4k
        }
289
52.4k
        if unzip_dir.ends_with("Languages") || 
unzip_dir28.5k
.
ends_with28.5k
("Braille") {
290
            // get the subdir ...Rules/Braille/en/...
291
            // could have ...Rules/Braille/definitions.yaml, so 'next()' doesn't exist in this case, but the file wasn't zipped up
292
26.0k
            if let Some(
subdir24.9k
) = new_file.strip_prefix(unzip_dir).unwrap().iter().next() {
293
24.9k
                let default_lang = if unzip_dir.ends_with("Languages") {
"en"23.9k
} else {
"UEB;"1.06k
};
294
24.9k
                PreferenceManager::unzip_files(unzip_dir, subdir.to_str().unwrap(), Some(default_lang)).unwrap_or_default();
295
1.06k
            }
296
26.4k
        }
297
    }
298
36.5k
    new_file.push(new_file_name);
299
36.5k
    info!("...processing include: {new_file_name}...");
300
36.5k
    let new_file = match crate::shim_filesystem::canonicalize_shim(new_file.as_path()) {
301
36.5k
        Ok(buf) => buf,
302
0
        Err(msg) => bail!("-include: constructed file name '{}' causes error '{}'",
303
0
                                 new_file.to_str().unwrap(), msg),
304
    };
305
306
36.5k
    let mut included_files = read_new_file(new_file.as_path())
?0
;
307
36.5k
    let mut files_read = vec![new_file];
308
36.5k
    files_read.append(&mut included_files);
309
36.5k
    return Ok(files_read);
310
36.5k
}
311
312
/// As the name says, TreeOrString is either a Tree (Element) or a String
313
/// It is used to share code during pattern matching
314
pub trait TreeOrString<'c, 'm:'c, T> {
315
    fn from_element(e: Element<'m>) -> Result<T>;
316
    fn from_string(s: String, doc: Document<'m>) -> Result<T>;
317
    fn replace_tts<'s:'c, 'r>(tts: &TTS, command: &TTSCommandRule, prefs: &PreferenceManager, rules_with_context: &'r mut SpeechRulesWithContext<'c, 's,'m>, mathml: Element<'c>) -> Result<T>;
318
    fn replace<'s:'c, 'r>(ra: &ReplacementArray, rules_with_context: &'r mut SpeechRulesWithContext<'c, 's,'m>, mathml: Element<'c>) -> Result<T>;
319
    fn replace_nodes<'s:'c, 'r>(rules: &'r mut SpeechRulesWithContext<'c, 's,'m>, nodes: Vec<Node<'c>>, mathml: Element<'c>) -> Result<T>;
320
    fn highlight_braille(braille: T, highlight_style: String) -> T;
321
    fn mark_nav_speech(speech: T) -> T;
322
}
323
324
impl<'c, 'm:'c> TreeOrString<'c, 'm, String> for String {
325
0
    fn from_element(_e: Element<'m>) -> Result<String> {
326
0
         bail!("from_element not allowed for strings");
327
0
    }
328
329
180k
    fn from_string(s: String, _doc: Document<'m>) -> Result<String> {
330
180k
        return Ok(s);
331
180k
    }
332
333
60.7k
    fn replace_tts<'s:'c, 'r>(tts: &TTS, command: &TTSCommandRule, prefs: &PreferenceManager, rules_with_context: &'r mut SpeechRulesWithContext<'c, 's,'m>, mathml: Element<'c>) -> Result<String> {
334
60.7k
        return tts.replace_string(command, prefs, rules_with_context, mathml);
335
60.7k
    }
336
337
142k
    fn replace<'s:'c, 'r>(ra: &ReplacementArray, rules_with_context: &'r mut SpeechRulesWithContext<'c, 's,'m>, mathml: Element<'c>) -> Result<String> {
338
142k
        return ra.replace_array_string(rules_with_context, mathml);
339
142k
    }
340
341
72.9k
    fn replace_nodes<'s:'c, 'r>(rules: &'r mut SpeechRulesWithContext<'c, 's,'m>, nodes: Vec<Node<'c>>, mathml: Element<'c>) -> Result<String> {
342
72.9k
        return rules.replace_nodes_string(nodes, mathml);
343
72.9k
    }
344
345
469
    fn highlight_braille(braille: String, highlight_style: String) -> String {
346
469
        return SpeechRulesWithContext::highlight_braille_string(braille, highlight_style);
347
469
    }
348
349
521
    fn mark_nav_speech(speech: String) -> String {
350
521
        return SpeechRulesWithContext::mark_nav_speech(speech);
351
521
    }
352
}
353
354
impl<'c, 'm:'c> TreeOrString<'c, 'm, Element<'m>> for Element<'m> {
355
48.0k
    fn from_element(e: Element<'m>) -> Result<Element<'m>> {
356
48.0k
         return Ok(e);
357
48.0k
    }
358
359
213
    fn from_string(s: String, doc: Document<'m>) -> Result<Element<'m>> {
360
        // FIX: is 'mi' really ok?  Don't want to use TEMP_NAME because this name needs to move to the outside world
361
213
        let leaf = create_mathml_element(&doc, "mi");
362
213
        leaf.set_text(&s);
363
213
        return Ok(leaf);
364
213
}
365
366
0
    fn replace_tts<'s:'c, 'r>(_tts: &TTS, _command: &TTSCommandRule, _prefs: &PreferenceManager, _rules_with_context: &'r mut SpeechRulesWithContext<'c, 's,'m>, _mathml: Element<'c>) -> Result<Element<'m>> {
367
0
        bail!("Internal error: applying a TTS rule to a tree");
368
0
    }
369
370
132k
    fn replace<'s:'c, 'r>(ra: &ReplacementArray, rules_with_context: &'r mut SpeechRulesWithContext<'c, 's,'m>, mathml: Element<'c>) -> Result<Element<'m>> {
371
132k
        return ra.replace_array_tree(rules_with_context, mathml);
372
132k
    }
373
374
48.6k
    fn replace_nodes<'s:'c, 'r>(rules: &'r mut SpeechRulesWithContext<'c, 's,'m>, nodes: Vec<Node<'c>>, mathml: Element<'c>) -> Result<Element<'m>> {
375
48.6k
        return rules.replace_nodes_tree(nodes, mathml);
376
48.6k
    }
377
378
0
    fn highlight_braille(_braille: Element<'c>, _highlight_style: String) -> Element<'m> {
379
0
        panic!("Internal error: highlight_braille called on a tree");
380
    }
381
382
0
    fn mark_nav_speech(_speech: Element<'c>) -> Element<'m> {
383
0
        panic!("Internal error: mark_nav_speech called on a tree");
384
    }
385
}
386
387
/// 'Replacement' is an enum that contains all the potential replacement types/structs
388
/// Hence there are fields 'Test' ("test:"), 'Text" ("t:"), "XPath", etc
389
#[derive(Debug, Clone)]
390
#[allow(clippy::upper_case_acronyms)]
391
enum Replacement {
392
    // Note: all of these are pointer types
393
    Text(String),
394
    XPath(MyXPath),
395
    Intent(Box<Intent>),
396
    Test(Box<TestArray>),
397
    TTS(Box<TTSCommandRule>),
398
    With(Box<With>),
399
    SetVariables(Box<SetVariables>),
400
    Insert(Box<InsertChildren>),
401
    Translate(TranslateExpression),
402
}
403
404
impl fmt::Display for Replacement {
405
10
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
406
10
        return write!(f, "{}",
407
10
            match self {
408
0
                Replacement::Test(c) => c.to_string(),
409
0
                Replacement::Text(t) => format!("t: \"{t}\""),
410
10
                Replacement::XPath(x) => x.to_string(),
411
0
                Replacement::Intent(i) => i.to_string(),
412
0
                Replacement::TTS(t) => t.to_string(),
413
0
                Replacement::With(w) => w.to_string(),
414
0
                Replacement::SetVariables(v) => v.to_string(),
415
0
                Replacement::Insert(ic) => ic.to_string(),
416
0
                Replacement::Translate(x) => x.to_string(),
417
            }
418
        );
419
10
    }
420
}
421
422
impl Replacement {   
423
13.5M
    fn build(replacement: &Yaml) -> Result<Replacement> {
424
        // Replacement -- single key/value (see below for allowed values)
425
13.5M
        let dictionary = replacement.as_hash();
426
13.5M
        if dictionary.is_none() {
427
0
            bail!("  expected a key/value pair. Found {}.",  yaml_to_string(replacement, 0));
428
13.5M
        };
429
13.5M
        let dictionary = dictionary.unwrap();
430
13.5M
        if dictionary.is_empty() { 
431
0
            bail!("No key/value pairs found for key 'replace'.\n\
432
                Suggestion: are the following lines indented properly?");
433
13.5M
        }
434
13.5M
        if dictionary.len() > 1 { 
435
0
            bail!("Should only be one key/value pair for the replacement.\n    \
436
                    Suggestion: are the following lines indented properly?\n    \
437
0
                    The key/value pairs found are\n{}", yaml_to_string(replacement, 2));
438
13.5M
        }
439
440
        // get the single value
441
13.5M
        let (key, value) = dictionary.iter().next().unwrap();
442
13.5M
        let key = key.as_str().ok_or_else(|| 
anyhow!0
("replacement key(e.g, 't') is not a string"))
?0
;
443
13.5M
        match key {
444
13.5M
            "t" | 
"T"10.4M
=> {
445
5.76M
                return Ok( Replacement::Text( as_str_checked(value)
?0
.to_string() ) );
446
            },
447
7.75M
            "ct" | 
"CT"7.72M
=> {
448
24.7k
                return Ok( Replacement::Text( CONCAT_INDICATOR.to_string() + as_str_checked(value)
?0
) );
449
            },
450
7.72M
            "ot" | 
"OT"7.69M
=> {
451
36.2k
                return Ok( Replacement::Text( OPTIONAL_INDICATOR.to_string() + as_str_checked(value)
?0
+ OPTIONAL_INDICATOR ) );
452
            },
453
7.69M
            "x" => {
454
2.27M
                return Ok( Replacement::XPath( MyXPath::build(value)
455
2.27M
                    .context("while trying to evaluate value of 'x:'")
?0
) );
456
            },
457
5.41M
            "pause" | 
"rate"4.58M
|
"pitch"4.58M
|
"volume"4.36M
|
"audio"4.36M
|
"gender"4.13M
|
"voice"4.13M
|
"spell"4.13M
|
"SPELL"3.47M
|
"bookmark"3.18M
|
"pronounce"3.00M
|
"PRONOUNCE"2.99M
=> {
458
2.41M
                return Ok( Replacement::TTS( TTS::build(&key.to_ascii_lowercase(), value)
?0
) );
459
            },
460
2.99M
            "intent" => {
461
283k
                return Ok( Replacement::Intent( Intent::build(value)
?0
) );
462
            },
463
2.71M
            "test" => {
464
2.58M
                return Ok( Replacement::Test( Box::new( TestArray::build(value)
?0
) ) );
465
            },
466
128k
            "with" => {
467
77.5k
                return Ok( Replacement::With( With::build(value)
?0
) );
468
            },
469
51.3k
            "set_variables" => {
470
30.3k
                return Ok( Replacement::SetVariables( SetVariables::build(value)
?0
) );
471
            },
472
21.0k
            "insert" => {
473
20.9k
                return Ok( Replacement::Insert( InsertChildren::build(value)
?0
) );
474
            },
475
102
            "translate" => {
476
102
                return Ok( Replacement::Translate( TranslateExpression::build(value)
477
102
                    .context("while trying to evaluate value of 'speak:'")
?0
) );
478
            },
479
            _ => {
480
0
                bail!("Unknown 'replace' command ({}) with value: {}", key, yaml_to_string(value, 0));
481
            }
482
        }
483
13.5M
    }
484
}
485
486
// structure used when "insert:" is encountered in a rule
487
// the 'replacements' are inserted between each node in the 'xpath'
488
#[derive(Debug, Clone)]
489
struct InsertChildren {
490
    xpath: MyXPath,                     // the replacement nodes
491
    replacements: ReplacementArray,     // what is inserted between each node
492
}
493
494
impl fmt::Display for InsertChildren {
495
0
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
496
0
        return write!(f, "InsertChildren:\n  nodes {}\n  replacements {}", self.xpath, &self.replacements);
497
0
    }
498
}
499
500
impl InsertChildren {
501
20.9k
    fn build(insert: &Yaml) -> Result<Box<InsertChildren>> {
502
        // 'insert:' -- 'nodes': xxx 'replace': xxx
503
20.9k
        if insert.as_hash().is_none() {
504
0
            bail!("")
505
20.9k
        }
506
20.9k
        let nodes = &insert["nodes"];
507
20.9k
        if nodes.is_badvalue() { 
508
0
            bail!("Missing 'nodes' as part of 'insert'.\n    \
509
                  Suggestion: add 'nodes:' or if present, indent so it is contained in 'insert'");
510
20.9k
        }
511
20.9k
        let nodes = as_str_checked(nodes)
?0
;
512
20.9k
        let replace = &insert["replace"];
513
20.9k
        if replace.is_badvalue() { 
514
0
            bail!("Missing 'replace' as part of 'insert'.\n    \
515
                  Suggestion: add 'replace:' or if present, indent so it is contained in 'insert'");
516
20.9k
        }
517
20.9k
        return Ok( Box::new( InsertChildren {
518
20.9k
            xpath: MyXPath::new(nodes.to_string())
?0
,
519
20.9k
            replacements: ReplacementArray::build(replace).context("'replace:'")
?0
,
520
        } ) );
521
20.9k
    }
522
    
523
    // It would be most efficient to do an xpath eval, get the nodes (type: NodeSet) and then intersperse the node_replace()
524
    //   calls with replacements for the ReplacementArray parts. But that causes problems with the "pause: auto" calculation because
525
    //   the replacements are segmented (can't look to neighbors for the calculation there)
526
    // An alternative is to introduce another Replacement enum value, but that's a lot of complication for not that much
527
    //    gain (and Node's have contagious lifetimes)
528
    // The solution adopted is to find out the number of nodes and build up MyXPaths with each node selected (e.g, "*" => "*[3]")
529
    //    and put those nodes into a flat ReplacementArray and then do a standard replace on that.
530
    //    This is slower than the alternatives, but reuses a bunch of code and hence is less complicated.
531
7.45k
    fn replace<'c, 's:'c, 'm: 'c, T:TreeOrString<'c, 'm, T>>(&self, rules_with_context: &mut SpeechRulesWithContext<'c, 's,'m>, mathml: Element<'c>) -> Result<T> {
532
7.45k
        let result = self.xpath.evaluate(&rules_with_context.context_stack.base, mathml)
533
7.45k
                .with_context(||
format!0
("in '{}' replacing after pattern match",
&self.xpath.rc.string0
) )
?0
;
534
7.45k
        match result {
535
7.45k
            Value::Nodeset(nodes) => {
536
7.45k
                if nodes.size() == 0 {
537
0
                    bail!("During replacement, no matching element found");
538
7.45k
                };
539
7.45k
                let nodes = nodes.document_order();
540
7.45k
                let n_nodes = nodes.len();
541
7.45k
                let mut expanded_result = Vec::with_capacity(n_nodes + (n_nodes+1)*self.replacements.replacements.len());
542
7.45k
                expanded_result.push(
543
                    Replacement::XPath(
544
7.45k
                        MyXPath::new(format!("{}[{}]", self.xpath.rc.string , 1))
?0
545
                    )
546
                );
547
19.3k
                for i in 
2..n_nodes+17.45k
{
548
19.3k
                    expanded_result.extend_from_slice(&self.replacements.replacements);
549
19.3k
                    expanded_result.push(
550
                        Replacement::XPath(
551
19.3k
                            MyXPath::new(format!("{}[{}]", self.xpath.rc.string , i))
?0
552
                        )
553
                    );
554
                }
555
7.45k
                let replacements = ReplacementArray{ replacements: expanded_result };
556
7.45k
                return replacements.replace(rules_with_context, mathml);
557
            },
558
559
            // FIX: should the options be errors???
560
0
            Value::String(t) => { return T::from_string(rules_with_context.replace_chars(&t, mathml)?, rules_with_context.doc); },
561
0
            Value::Number(num)  => { return T::from_string( num.to_string(), rules_with_context.doc ); },
562
0
            Value::Boolean(b)  => { return T::from_string( b.to_string(), rules_with_context.doc ); },          // FIX: is this right???
563
        }
564
        
565
7.45k
    }    
566
}
567
568
569
2
static ATTR_NAME_VALUE: LazyLock<Regex> = LazyLock::new(|| {
570
2
    Regex::new(
571
        // match name='value', where name is sort of an NCNAME (see CONCEPT_OR_LITERAL in infer_intent.rs)
572
        // The quotes can be either single or double quotes
573
2
        r#"(?P<name>[^\s\u{0}-\u{40}\[\\\]^`\u{7B}-\u{BF}][^\s\u{0}-\u{2C}/:;<=>?@\[\\\]^`\u{7B}-\u{BF}]*)\s*=\s*('(?P<value>[^']+)'|"(?P<dqvalue>[^"]+)")"#
574
2
    ).unwrap()
575
2
});
576
577
// structure used when "intent:" is encountered in a rule
578
// the name is either a string or an xpath that needs evaluation. 99% of the time it is a string
579
#[derive(Debug, Clone)]
580
struct Intent {
581
    name: Option<String>,           // name of node
582
    xpath: Option<MyXPath>,         // alternative to directly using the string
583
    attrs: String,                  // optional attrs -- format "attr1='val1' [attr2='val2'...]"
584
    children: ReplacementArray,     // children of node
585
}
586
587
impl fmt::Display for Intent {
588
0
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
589
0
        let name = if let Some(name) = &self.name {
590
0
            name.to_string()
591
        } else {
592
0
            self.xpath.as_ref().unwrap().to_string()
593
        };
594
0
        return write!(f, "intent: {}: {},  attrs='{}'>\n      children: {}",
595
0
                        if self.name.is_some() {"name"} else {"xpath-name"}, name,
596
                        self.attrs,
597
0
                        &self.children);
598
0
    }
599
}
600
601
impl Intent {
602
283k
    fn build(yaml_dict: &Yaml) -> Result<Box<Intent>> {
603
        // 'intent:' -- 'name': xxx 'children': xxx
604
283k
        if yaml_dict.as_hash().is_none() {
605
0
            bail!("Array found for contents of 'intent' -- should be dictionary with keys 'name' and 'children'")
606
283k
        }
607
283k
        let name = &yaml_dict["name"];
608
283k
        let xpath_name = &yaml_dict["xpath-name"];
609
283k
        if name.is_badvalue() && 
xpath_name31.5k
.
is_badvalue31.5k
(){
610
0
            bail!("Missing 'name' or 'xpath-name' as part of 'intent'.\n    \
611
                  Suggestion: add 'name:' or if present, indent so it is contained in 'intent'");
612
283k
        }
613
283k
        let attrs = &yaml_dict["attrs"];
614
283k
        let replace = &yaml_dict["children"];
615
283k
        if replace.is_badvalue() {
616
0
            bail!("Missing 'children' as part of 'intent'.\n    \
617
                  Suggestion: add 'children:' or if present, indent so it is contained in 'intent'");
618
283k
        }
619
283k
        return Ok( Box::new( Intent {
620
283k
            name: if name.is_badvalue() {
None31.5k
} else {Some(
as_str_checked252k
(
name252k
).
context252k
("'name'")
?0
.
to_string252k
())},
621
283k
            xpath: if xpath_name.is_badvalue() {
None252k
} else {Some(
MyXPath::build31.5k
(
xpath_name31.5k
).
context31.5k
("'intent'")
?0
)},
622
283k
            attrs: if attrs.is_badvalue() {
""128k
.
to_string128k
()} else {
as_str_checked154k
(
attrs154k
).
context154k
("'attrs'")
?0
.
to_string154k
()},
623
283k
            children: ReplacementArray::build(replace).context("'children:'")
?0
,
624
        } ) );
625
283k
    }
626
        
627
45.5k
    fn replace<'c, 's:'c, 'm: 'c, T:TreeOrString<'c, 'm, T>>(&self, rules_with_context: &mut SpeechRulesWithContext<'c, 's,'m>, mathml: Element<'c>) -> Result<T> {
628
45.5k
        let result = self.children.replace::<Element<'m>>(rules_with_context, mathml)
629
45.5k
                    .context("replacing inside 'intent'")
?0
;
630
45.5k
        let mut result = lift_children(result);
631
45.5k
        if name(result) != "TEMP_NAME" && 
name(result) != "Unknown"3.43k
{
632
235
            // this case happens when you have an 'intent' replacement as a direct child of an 'intent' replacement
633
235
            let temp = create_mathml_element(&result.document(), "TEMP_NAME");
634
235
            temp.append_child(result);
635
235
            result = temp;
636
45.3k
        }
637
45.5k
        if let Some(
intent_name11.2k
) = &self.name {
638
11.2k
            result.set_attribute_value(MATHML_FROM_NAME_ATTR, name(mathml));
639
11.2k
            set_mathml_name(result, intent_name.as_str());
640
34.2k
        }
641
45.5k
        if let Some(
my_xpath34.2k
) = &self.xpath{ // self.xpath_name must be != None
642
34.2k
            let xpath_value = my_xpath.evaluate(rules_with_context.get_context(), mathml)
?0
;
643
34.2k
            match xpath_value {
644
34.2k
                Value::String(intent_name) => {
645
34.2k
                    result.set_attribute_value(MATHML_FROM_NAME_ATTR, name(mathml));
646
34.2k
                    set_mathml_name(result, intent_name.as_str())
647
                },
648
0
                _ => bail!("'xpath-name' value '{}' was not a string", &my_xpath),
649
            }
650
11.2k
        }
651
45.5k
        if self.name.is_none() && 
self.xpath34.2k
.
is_none34.2k
() {
652
0
            bail!("Intent::replace: internal error -- neither 'name' nor 'xpath' is set");
653
45.5k
        };
654
        
655
100k
        for attr in 
mathml45.5k
.
attributes45.5k
() {
656
100k
            result.set_attribute_value(attr.name(), attr.value());
657
100k
        }
658
659
        // can't test against name == "math" because intent might a new element
660
45.5k
        if mathml.parent().is_some() && mathml.parent().unwrap().element().is_some() &&
661
41.7k
           result.attribute_value("id") == crate::canonicalize::get_parent(mathml).attribute_value("id") {
662
32
            // avoid duplicate ids -- it's a bug if it does, but this helps in that case
663
32
            result.remove_attribute("id");
664
45.5k
        }
665
666
45.5k
        if !self.attrs.is_empty() {
667
            // debug!("MathML after children, before attr processing:\n{}", mml_to_string(mathml));
668
            // debug!("Result after children, before attr processing:\n{}", mml_to_string(result));
669
            // debug!("Intent::replace attrs = \"{}\"", &self.attrs);
670
5.63k
            for cap in 
ATTR_NAME_VALUE5.58k
.captures_iter(&self.attrs) {
671
5.63k
                let matched_value = if cap["value"].is_empty() {
&cap["dqvalue"]0
} else {&cap["value"]};
672
5.63k
                let value_as_xpath = MyXPath::new(matched_value.to_string()).context("attr value inside 'intent'")
?0
;
673
5.63k
                let value = value_as_xpath.evaluate(rules_with_context.get_context(), result)
674
5.63k
                        .context("attr xpath evaluation value inside 'intent'")
?0
;
675
5.63k
                let mut value = value.into_string();
676
5.63k
                if &cap["name"] == INTENT_PROPERTY {
677
5.23k
                    value = simplify_fixity_properties(&value);
678
5.23k
                
}397
679
                // debug!("Intent::replace match\n  name={}\n  value={}\n  xpath value={}", &cap["name"], &cap["value"], &value);
680
5.63k
                if &cap["name"] == INTENT_PROPERTY && 
value == ":"5.23k
{
681
1.81k
                    // should have been an empty string, so remove the attribute
682
1.81k
                    result.remove_attribute(INTENT_PROPERTY);
683
3.82k
                } else {
684
3.82k
                    result.set_attribute_value(&cap["name"], &value);
685
3.82k
                }
686
            };
687
39.9k
        }
688
689
        // debug!("Result from 'intent:'\n{}", mml_to_string(result));
690
45.5k
        return T::from_element(result);
691
692
693
        /// "lift" up the children any "TEMP_NAME" child -- could short circuit when only one child
694
45.5k
        fn lift_children(result: Element) -> Element {
695
            // debug!("lift_children:\n{}", mml_to_string(result));
696
            // most likely there will be the same number of new children as result has, but there could be more
697
45.5k
            let mut new_children = Vec::with_capacity(2*result.children().len());
698
69.6k
            for child_of_element in 
result45.5k
.
children45.5k
() {
699
69.6k
                match child_of_element {
700
69.6k
                    ChildOfElement::Element(child) => {
701
69.6k
                        if name(child) == "TEMP_NAME" {
702
34.1k
                            new_children.append(&mut child.children());  // almost always just one
703
35.5k
                        } else {
704
35.5k
                            new_children.push(child_of_element);
705
35.5k
                        }
706
                    },
707
7
                    _ => new_children.push(child_of_element),      // text()
708
                }
709
            }
710
45.5k
            result.replace_children(new_children);
711
45.5k
            return result;
712
45.5k
        }
713
45.5k
    }    
714
}
715
716
// structure used when "with:" is encountered in a rule
717
// the variables are placed on (and later) popped of a variable stack before/after the replacement
718
#[derive(Debug, Clone)]
719
struct With {
720
    variables: VariableDefinitions,     // variables and values
721
    replacements: ReplacementArray,     // what to do with these vars
722
}
723
724
impl fmt::Display for With {
725
0
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
726
0
        return write!(f, "with:\n      variables: {}\n      replace: {}", &self.variables, &self.replacements);
727
0
    }
728
}
729
730
impl With {
731
77.5k
    fn build(vars_replacements: &Yaml) -> Result<Box<With>> {
732
        // 'with:' -- 'variables': xxx 'replace': xxx
733
77.5k
        if vars_replacements.as_hash().is_none() {
734
0
            bail!("Array found for contents of 'with' -- should be dictionary with keys 'variables' and 'replace'")
735
77.5k
        }
736
77.5k
        let var_defs = &vars_replacements["variables"];
737
77.5k
        if var_defs.is_badvalue() { 
738
0
            bail!("Missing 'variables' as part of 'with'.\n    \
739
                  Suggestion: add 'variables:' or if present, indent so it is contained in 'with'");
740
77.5k
        }
741
77.5k
        let replace = &vars_replacements["replace"];
742
77.5k
        if replace.is_badvalue() { 
743
0
            bail!("Missing 'replace' as part of 'with'.\n    \
744
                  Suggestion: add 'replace:' or if present, indent so it is contained in 'with'");
745
77.5k
        }
746
77.5k
        return Ok( Box::new( With {
747
77.5k
            variables: VariableDefinitions::build(var_defs).context("'variables'")
?0
,
748
77.5k
            replacements: ReplacementArray::build(replace).context("'replace:'")
?0
,
749
        } ) );
750
77.5k
    }
751
752
7.28k
    fn replace<'c, 's:'c, 'm: 'c, T:TreeOrString<'c, 'm, T>>(&self, rules_with_context: &mut SpeechRulesWithContext<'c, 's,'m>, mathml: Element<'c>) -> Result<T> {
753
7.28k
        rules_with_context.context_stack.push(self.variables.clone(), mathml)
?0
;
754
7.28k
        let result = self.replacements.replace(rules_with_context, mathml)
755
7.28k
                    .context("replacing inside 'with'")
?0
;
756
7.28k
        rules_with_context.context_stack.pop();
757
7.28k
        return Ok( result );
758
7.28k
    }    
759
}
760
761
// structure used when "set_variables:" is encountered in a rule
762
// the variables are global and are placed in the base context and never popped off
763
#[derive(Debug, Clone)]
764
struct SetVariables {
765
    variables: VariableDefinitions,     // variables and values
766
}
767
768
impl fmt::Display for SetVariables {
769
0
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
770
0
        return write!(f, "SetVariables: variables {}", &self.variables);
771
0
    }
772
}
773
774
impl SetVariables {
775
30.3k
    fn build(vars: &Yaml) -> Result<Box<SetVariables>> {
776
        // 'set_variables:' -- 'variables': xxx (array)
777
30.3k
        if vars.as_vec().is_none() {
778
0
            bail!("'set_variables' -- should be an array of variable name, xpath value");
779
30.3k
        }
780
30.3k
        return Ok( Box::new( SetVariables {
781
30.3k
            variables: VariableDefinitions::build(vars).context("'set_variables'")
?0
782
        } ) );
783
30.3k
    }
784
        
785
3.78k
    fn replace<'c, 's:'c, 'm: 'c, T:TreeOrString<'c, 'm, T>>(&self, rules_with_context: &mut SpeechRulesWithContext<'c, 's,'m>, mathml: Element<'c>) -> Result<T> {
786
3.78k
        rules_with_context.context_stack.set_globals(self.variables.clone(), mathml)
?0
;
787
3.78k
        return T::from_string( "".to_string(), rules_with_context.doc );
788
3.78k
    }    
789
}
790
791
792
/// Allow speech of an expression in the middle of a rule (used by "WhereAmI" for navigation)
793
#[derive(Debug, Clone)]
794
struct TranslateExpression {
795
    xpath: MyXPath,     // variables and values
796
}
797
798
impl fmt::Display for TranslateExpression {
799
0
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
800
0
        return write!(f, "speak: {}", &self.xpath);
801
0
    }
802
}
803
impl TranslateExpression {
804
102
    fn build(vars: &Yaml) -> Result<TranslateExpression> {
805
        // 'translate:' -- xpath (should evaluate to an id)
806
102
        return Ok( TranslateExpression { xpath: MyXPath::build(vars).context("'translate'")
?0
} );
807
102
    }
808
        
809
2
    fn replace<'c, 's:'c, 'm:'c, T:TreeOrString<'c, 'm, T>>(&self, rules_with_context: &mut SpeechRulesWithContext<'c, 's,'m>, mathml: Element<'c>) -> Result<T> {
810
2
        if self.xpath.rc.string.starts_with('@') {
811
2
            let xpath_value = self.xpath.evaluate(rules_with_context.get_context(), mathml)
?0
;
812
2
            let id = match xpath_value {
813
0
                Value::String(s) => Some(s),
814
2
                Value::Nodeset(nodes) => {
815
2
                    if nodes.size() == 1 {
816
2
                        nodes.document_order_first().unwrap().attribute().map(|attr| attr.value().to_string())
817
                    } else {
818
0
                        None
819
                    }
820
                },
821
0
                _ => None,
822
            };
823
2
            match id {
824
0
                None => bail!("'translate' value '{}' is not a string or an attribute value (correct by using '@id'??):\n", self.xpath),
825
2
                Some(id) => {
826
2
                    let speech = speak_mathml(mathml, &id, 0)
?0
;
827
2
                    return T::from_string(speech, rules_with_context.doc);
828
                }
829
            }
830
        } else {
831
0
            return T::from_string(
832
0
                self.xpath.replace(rules_with_context, mathml).context("'translate'")?,
833
0
                rules_with_context.doc
834
            );
835
        }  
836
2
    } 
837
}
838
839
840
/// An array of rule `Replacement`s (text, xpath, tts commands, etc)
841
#[derive(Debug, Clone)]
842
pub struct ReplacementArray {
843
    replacements: Vec<Replacement>
844
}
845
846
impl fmt::Display for ReplacementArray {
847
1
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
848
1
        return write!(f, "{}", self.pretty_print_replacements());
849
1
    }
850
}
851
852
impl ReplacementArray {
853
    /// Return an empty `ReplacementArray`
854
1.98M
    pub fn build_empty() -> ReplacementArray {
855
1.98M
        return ReplacementArray {
856
1.98M
            replacements: vec![]
857
1.98M
        }
858
1.98M
    }
859
860
    /// Convert a Yaml input into a [`ReplacementArray`].
861
    /// Any errors are passed back out.
862
9.24M
    pub fn build(replacements: &Yaml) -> Result<ReplacementArray> {
863
        // replacements is either a single replacement or an array of replacements
864
9.24M
        let result= if replacements.is_array() {
865
9.22M
            let replacements = replacements.as_vec().unwrap();
866
9.22M
            replacements
867
9.22M
                .iter()
868
9.22M
                .enumerate()    // useful for errors
869
13.4M
                .
map9.22M
(|(i, r)| Replacement::build(r)
870
13.4M
                            .with_context(|| 
format!0
("replacement #{} of {}",
i+10
,
replacements0
.
len0
())))
871
9.22M
                .collect::<Result<Vec<Replacement>>>()
?0
872
        } else {
873
21.2k
            vec![ Replacement::build(replacements)
?0
]
874
        };
875
876
9.24M
        return Ok( ReplacementArray{ replacements: result } );
877
9.24M
    }
878
879
    /// Do all the replacements in `mathml` using `rules`.
880
275k
    pub fn replace<'c, 's:'c, 'm:'c, T:TreeOrString<'c, 'm, T>>(&self, rules_with_context: &mut SpeechRulesWithContext<'c, 's,'m>, mathml: Element<'c>) -> Result<T> {
881
275k
        return T::replace(self, rules_with_context, mathml);
882
275k
    }
883
884
142k
    pub fn replace_array_string<'c, 's:'c, 'm:'c>(&self, rules_with_context: &mut SpeechRulesWithContext<'c, 's,'m>, mathml: Element<'c>) -> Result<String> {
885
        // loop over the replacements and build up a vector of strings, excluding empty ones.
886
        // * eliminate any redundance
887
        // * add/replace auto-pauses
888
        // * join the remaining vector together
889
142k
        let mut replacement_strings = Vec::with_capacity(self.replacements.len());   // probably conservative guess
890
271k
        for replacement in 
self.replacements.iter()142k
{
891
271k
            let string: String = rules_with_context.replace(replacement, mathml)
?0
;
892
271k
            if !string.is_empty() {
893
200k
                replacement_strings.push(string);
894
200k
            
}70.3k
895
        }
896
897
142k
        if replacement_strings.is_empty() {
898
12.9k
            return Ok( "".to_string() );
899
129k
        }
900
        // delete an optional text that is repetitive
901
        // we do this by looking for the optional text marker, and if present, check for repetition at end of previous string
902
        // if repetitive, we delete the optional string
903
        // if not, we leave the markers because the repetition might happen several "levels" up
904
        // this could also be done in a final cleanup of the entire string (where we remove any markers),
905
        //   but the match is harder (rust regex lacks look behind pattern match) and it is less efficient
906
        // Note: we skip the first string since it can't be repetitive of something at this level
907
129k
        for 
i45.4k
in 1..replacement_strings.len()-1 {
908
45.4k
            if let Some(
bytes13
) = is_repetitive(&replacement_strings[i-1], &replacement_strings[i]) {
909
13
                replacement_strings[i] = bytes.to_string();
910
45.4k
            } 
911
        }
912
                        
913
200k
        for i in 
0..replacement_strings.len()129k
{
914
200k
            if replacement_strings[i].contains(PAUSE_AUTO_STR) {
915
19.5k
                let before = if i == 0 {
""194
} else {
&replacement_strings[i-1]19.3k
};
916
19.5k
                let after = if i+1 == replacement_strings.len() {
""230
} else {
&replacement_strings[i+1]19.3k
};
917
19.5k
                replacement_strings[i] = replacement_strings[i].replace(
918
19.5k
                    PAUSE_AUTO_STR,
919
19.5k
                    &rules_with_context.speech_rules.pref_manager.borrow().get_tts().compute_auto_pause(&rules_with_context.speech_rules.pref_manager.borrow(), before, after));
920
181k
            }
921
        }
922
923
        // join the strings together with spaces in between
924
        // concatenation (removal of spaces) is saved for the top level because they otherwise are stripped at the wrong sometimes
925
129k
        return Ok( replacement_strings.join(" ") );
926
927
        /// delete an optional text (in 'next') that is repetitive at the end of 'prev'
928
        /// we do this by looking for the optional text marker, and if present, check for repetition at end of previous string
929
        /// if repetitive, we delete the optional string
930
45.4k
        fn is_repetitive<'a>(prev: &str, next: &'a str) -> Option<&'a str> {
931
            // OPTIONAL_INDICATOR optionally surrounds the end of 'prev'(ignoring trailing whitespace)
932
            // OPTIONAL_INDICATOR surrounds the start of 'next'
933
            // minor optimization -- lots of short strings and the OPTIONAL_INDICATOR takes a few bytes, so skip the check for those strings
934
45.4k
            if next.len() <=  2 * OPTIONAL_INDICATOR_LEN {
935
14.2k
                return None;
936
31.2k
            }
937
938
            // should be exactly one match -- ignore more than one for now
939
31.2k
            let 
i_start36
= next.find(OPTIONAL_INDICATOR)
?31.2k
;
940
36
            let start_repeat_word_in_next = &next[i_start + OPTIONAL_INDICATOR_LEN..];
941
36
            let i_end = start_repeat_word_in_next.find(OPTIONAL_INDICATOR)
942
36
                .unwrap_or_else(|| 
panic!0
("Internal error: missing end optional char -- text handling is corrupted!"));
943
36
            let repeat_word = &start_repeat_word_in_next[..i_end];
944
            // debug!("check if '{}' is repetitive, end_index={}", repeat_word, i_end);
945
            // debug!("   prev: '{}', next '{}'", prev, next);
946
947
36
            let prev_trimmed = prev.trim_end();
948
36
            let ends_with_word = prev_trimmed.len() > repeat_word.len() && 
prev_trimmed35
.
ends_with35
(
repeat_word35
);
949
36
            let ends_with_wrapped_word =
950
36
                prev_trimmed
951
36
                    .strip_suffix(OPTIONAL_INDICATOR)
952
36
                    .and_then(|s| 
s0
.
strip_suffix0
(
repeat_word0
))
953
36
                    .and_then(|s| 
s0
.
strip_suffix0
(OPTIONAL_INDICATOR))
954
36
                    .is_some();
955
36
            if ends_with_word || 
ends_with_wrapped_word23
{
956
                // debug!("  is repetitive");
957
13
                Some(start_repeat_word_in_next[i_end + OPTIONAL_INDICATOR_LEN..].trim_start())  // remove repeat word and OPTIONAL_INDICATOR
958
            } else {
959
23
                None
960
            }
961
45.4k
        }
962
142k
    }
963
964
132k
    pub fn replace_array_tree<'c, 's:'c, 'm:'c>(&self, rules_with_context: &mut SpeechRulesWithContext<'c, 's,'m>, mathml: Element<'c>) -> Result<Element<'m>> {
965
        // shortcut for common case (don't build a new tree node)
966
132k
        if self.replacements.len() == 1 {
967
129k
            return rules_with_context.replace::<Element<'m>>(&self.replacements[0], mathml);
968
3.20k
        }
969
970
3.20k
        let new_element = create_mathml_element(&rules_with_context.doc, "Unknown");  // Hopefully set later (in Intent::Replace())
971
3.20k
        let mut new_children = Vec::with_capacity(self.replacements.len());
972
6.12k
        for child in 
self.replacements.iter()3.20k
{
973
6.12k
            let child = rules_with_context.replace::<Element<'m>>(child, mathml)
?0
;
974
6.12k
            new_children.push(ChildOfElement::Element(child));
975
        };
976
3.20k
        new_element.append_children(new_children);
977
3.20k
        return Ok(new_element);
978
132k
    }
979
980
981
    /// Return true if there are no replacements.
982
29.8k
    pub fn is_empty(&self) -> bool {
983
29.8k
        return self.replacements.is_empty();
984
29.8k
    }
985
    
986
10
    fn pretty_print_replacements(&self) -> String {
987
10
        let mut group_string = String::with_capacity(128);
988
10
        if self.replacements.len() == 1 {
989
9
            group_string += &format!("[{}]", self.replacements[0]);
990
9
        } else {
991
1
            group_string += &self.replacements.iter()
992
1
                    .map(|replacement| 
format!0
("\n - {replacement}"))
993
1
                    .collect::<Vec<String>>()
994
1
                    .join("");
995
1
            group_string += "\n";
996
        }
997
10
        return group_string;
998
10
    }
999
}
1000
1001
1002
1003
// MyXPath is a wrapper around an 'XPath' that keeps around the original xpath expr (as a string) so it can be used in error reporting.
1004
// Because we want to be able to clone them and XPath doesn't support clone(), this is a wrapper around an internal MyXPath.
1005
// It supports the standard SpeechRule functionality of building and replacing.
1006
#[derive(Debug)]
1007
struct RCMyXPath {
1008
    xpath: XPath,
1009
    string: String,        // store for error reporting
1010
}
1011
1012
#[derive(Debug, Clone)]
1013
pub struct MyXPath {
1014
    rc: Rc<RCMyXPath>        // rather than putting Rc around both 'xpath' and 'string', just use one and indirect to internal RCMyXPath
1015
}
1016
1017
1018
impl fmt::Display for MyXPath {
1019
2.79k
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1020
2.79k
        return write!(f, "\"{}\"", self.rc.string);
1021
2.79k
    }
1022
}
1023
1024
// pub fn xpath_count() -> (usize, usize) {
1025
//     return (XPATH_CACHE.with( |cache| cache.borrow().len()), unsafe{XPATH_CACHE_HITS} );
1026
// }
1027
thread_local!{
1028
    static XPATH_CACHE: RefCell<HashMap<String, MyXPath>> = RefCell::new( HashMap::with_capacity(2047) );
1029
}
1030
// static mut XPATH_CACHE_HITS: usize = 0;
1031
1032
impl MyXPath {
1033
8.88M
    fn new(xpath: String) -> Result<MyXPath> {
1034
8.88M
        return XPATH_CACHE.with( |cache|  {
1035
8.88M
            let mut cache = cache.borrow_mut();
1036
            return Ok(
1037
8.88M
                match cache.get(&xpath) {
1038
5.82M
                    Some(compiled_xpath) => {
1039
                        // unsafe{ XPATH_CACHE_HITS += 1;};
1040
5.82M
                        compiled_xpath.clone()
1041
                    },
1042
                    None => {
1043
3.06M
                        let new_xpath = MyXPath {
1044
3.06M
                            rc: Rc::new( RCMyXPath {
1045
3.06M
                                xpath: MyXPath::compile_xpath(&xpath)
?0
,
1046
3.06M
                                string: xpath.clone()
1047
                            })};
1048
3.06M
                        cache.insert(xpath.clone(), new_xpath.clone());
1049
3.06M
                        new_xpath
1050
                    },
1051
                }
1052
            )
1053
8.88M
        });
1054
8.88M
    }
1055
1056
8.83M
    pub fn build(xpath: &Yaml) -> Result<MyXPath> {
1057
8.83M
        let xpath = match xpath {
1058
8.64M
            Yaml::String(s) => s.to_string(),
1059
0
            Yaml::Integer(i) => i.to_string(),
1060
0
            Yaml::Real(s) => s.to_string(),
1061
0
            Yaml::Boolean(s) => s.to_string(),
1062
193k
            Yaml::Array(v) =>
1063
                // array of strings -- concatenate them together
1064
193k
                v.iter()
1065
193k
                    .map(as_str_checked)
1066
193k
                    .collect::<Result<Vec<&str>>>()
?0
1067
193k
                    .join(" "),
1068
0
            _ => bail!("Bad value when trying to create an xpath: {}", yaml_to_string(xpath, 1)),
1069
        };
1070
8.83M
        return MyXPath::new(xpath);
1071
8.83M
    }
1072
1073
3.06M
    fn compile_xpath(xpath: &str) -> Result<XPath> {
1074
3.06M
        let factory = Factory::new();
1075
3.06M
        let xpath_with_debug_info = MyXPath::add_debug_string_arg(xpath)
?0
;
1076
3.06M
        let compiled_xpath = factory.build(&xpath_with_debug_info)
1077
3.06M
                        .with_context(|| 
format!0
(
1078
                            "Could not compile XPath for pattern:\n{}{}",
1079
0
                            &xpath, more_details(xpath)))?;
1080
3.06M
        return match compiled_xpath {
1081
3.06M
            Some(xpath) => Ok(xpath),
1082
0
            None => bail!("Problem compiling Xpath for pattern:\n{}{}",
1083
0
                            &xpath, more_details(xpath)),
1084
        };
1085
1086
        
1087
0
        fn more_details(xpath: &str) -> String {
1088
            // try to give a better error message by counting [], (), 's, and "s
1089
0
            if xpath.is_empty() {
1090
0
                return "xpath is empty string".to_string();
1091
0
            }
1092
0
            let as_bytes = xpath.trim().as_bytes();
1093
0
            if as_bytes[0] == b'\'' && as_bytes[as_bytes.len()-1] != b'\'' {
1094
0
                return "\nmissing \"'\"".to_string();
1095
0
            }
1096
0
            if (as_bytes[0] == b'"' && as_bytes[as_bytes.len()-1] != b'"') ||
1097
0
               (as_bytes[0] != b'"' && as_bytes[as_bytes.len()-1] == b'"'){
1098
0
                return "\nmissing '\"'".to_string();
1099
0
            }
1100
1101
0
            let mut i_bytes = 0;      // keep track of # of bytes into string for error reporting
1102
0
            let mut paren_count = 0;    // counter to make sure they are balanced
1103
0
            let mut i_paren = 0;      // position of the outermost open paren
1104
0
            let mut bracket_count = 0;
1105
0
            let mut i_bracket = 0;
1106
0
            for ch in xpath.chars() {
1107
0
                if ch == '(' {
1108
0
                    if paren_count == 0 {
1109
0
                        i_paren = i_bytes;
1110
0
                    }
1111
0
                    paren_count += 1;
1112
0
                } else if ch == '[' {
1113
0
                    if bracket_count == 0 {
1114
0
                        i_bracket = i_bytes;
1115
0
                    }
1116
0
                    bracket_count += 1;
1117
0
                } else if ch == ')' {
1118
0
                    if paren_count == 0 {
1119
0
                        return format!("\nExtra ')' found after '{}'", &xpath[i_paren..i_bytes]);
1120
0
                    }
1121
0
                    paren_count -= 1;
1122
0
                    if paren_count == 0 && bracket_count > 0 && i_bracket > i_paren {
1123
0
                        return format!("\nUnclosed brackets found at '{}'", &xpath[i_paren..i_bytes]);
1124
0
                    }
1125
0
                } else if ch == ']' {
1126
0
                    if bracket_count == 0 {
1127
0
                        return format!("\nExtra ']' found after '{}'", &xpath[i_bracket..i_bytes]);
1128
0
                    }
1129
0
                    bracket_count -= 1;
1130
0
                    if bracket_count == 0 && paren_count > 0 && i_paren > i_bracket {
1131
0
                        return format!("\nUnclosed parens found at '{}'", &xpath[i_bracket..i_bytes]);
1132
0
                    }
1133
0
                }
1134
0
                i_bytes += ch.len_utf8();
1135
            }
1136
0
            return "".to_string();
1137
0
        }
1138
3.06M
    }
1139
1140
    /// Convert DEBUG(...) input to the internal function which is DEBUG(arg, arg_as_string)
1141
3.06M
    fn add_debug_string_arg(xpath: &str) -> Result<String> {
1142
        // do a quick check to see if "DEBUG" is in the string -- this is the common case
1143
3.06M
        let debug_start = xpath.find("DEBUG(");
1144
3.06M
        if debug_start.is_none() {
1145
3.06M
            return Ok( xpath.to_string() );
1146
1.56k
        }
1147
1148
1.56k
        let debug_start = debug_start.unwrap();
1149
1.56k
        let mut before_paren = xpath[..debug_start+5].to_string();   // includes "DEBUG"
1150
1.56k
        let chars = xpath[debug_start+5..].chars().collect::<Vec<char>>();     // begins at '('
1151
1.56k
        before_paren.push_str(&chars_add_debug_string_arg(&chars).with_context(|| 
format!0
("In xpath='{xpath}'"))
?0
);
1152
        // debug!("add_debug_string_arg: {}", before_paren);
1153
1.56k
        return Ok(before_paren);
1154
1155
1.56k
        fn chars_add_debug_string_arg(chars: &[char]) -> Result<String>  {
1156
            // Find all the DEBUG(...) commands in 'xpath' and adds a string argument.
1157
            // The DEBUG function that is used internally takes two arguments, the second one being a string version of the DEBUG arg.
1158
            //   Being a string, any quotes need to be escaped, and DEBUGs inside of DEBUGs need more escaping.
1159
            //   This is done via recursive calls to this function.
1160
1.56k
            assert_eq!(chars[0], '(', "{} does not start with ')'", 
chars0
.
iter0
().
collect0
::<String>());
1161
1.56k
            let mut count = 1;  // open/close count
1162
1.56k
            let mut i = 1;
1163
1.56k
            let mut inside_quote = false;
1164
50.8k
            while i < chars.len() {
1165
50.8k
                let ch = chars[i];
1166
805
                match ch {
1167
                    '\\' => {
1168
0
                        if i+1 == chars.len() {
1169
0
                            bail!("Syntax error in DEBUG: last char is escape char\nDebug string: '{}'", chars.iter().collect::<String>());
1170
0
                        }
1171
0
                        i += 1;
1172
                    },
1173
2.21k
                    '\'' => inside_quote = !inside_quote,
1174
804
                    '(' if !inside_quote => {
1175
804
                        count += 1;
1176
804
                        // FIX: it would be more efficient to spot "DEBUG" preceding this and recurse rather than matching the whole string and recursing
1177
804
                    },
1178
1
                    '(' => (),
1179
2.36k
                    ')' if !inside_quote => {
1180
2.36k
                        count -= 1;
1181
2.36k
                        if count == 0 {
1182
1.56k
                            let arg = &chars[1..i].iter().collect::<String>();
1183
1.56k
                            let escaped_arg = arg.replace('"', "\\\"");
1184
                            // DEBUG(...) may be inside 'arg' -- recurse
1185
1.56k
                            let processed_arg = MyXPath::add_debug_string_arg(arg)
?0
;
1186
1187
                            // DEBUG(...) may be in the remainder of the string -- recurse
1188
1.56k
                            let processed_rest = MyXPath::add_debug_string_arg(&chars[i+1..].iter().collect::<String>())
?0
;
1189
1.56k
                            return Ok( format!("({processed_arg}, \"{escaped_arg}\"){processed_rest}") );
1190
804
                        }
1191
                    },
1192
0
                    ')' => (),
1193
45.4k
                    _ => (),
1194
                }
1195
49.2k
                i += 1;
1196
            }
1197
0
            bail!("Syntax error in DEBUG: didn't find matching closing paren\nDEBUG{}", chars.iter().collect::<String>());
1198
1.56k
        }
1199
3.06M
    }
1200
1201
156k
    fn is_true(&self, context: &sxd_xpath::Context, mathml: Element) -> Result<bool> {
1202
        // return true if there is no condition or if the condition evaluates to true
1203
        return Ok(
1204
156k
            match self.evaluate(context, mathml)
?0
{
1205
115k
                Value::Boolean(b) => b,
1206
40.6k
                Value::Nodeset(nodes) => nodes.size() > 0,
1207
0
                _                      => false,      
1208
            }
1209
        )
1210
156k
    }
1211
1212
153k
    pub fn replace<'c, 's:'c, 'm:'c, T:TreeOrString<'c, 'm, T>>(&self, rules_with_context: &mut SpeechRulesWithContext<'c, 's,'m>, mathml: Element<'c>) -> Result<T> {
1213
153k
        if self.rc.string == "process-intent(.)" {
1214
2.47k
            return 
T::from_element2.46k
( infer_intent(rules_with_context, mathml)
?9
);
1215
150k
        }
1216
        
1217
150k
        let result = self.evaluate(&rules_with_context.context_stack.base, mathml)
1218
150k
                .with_context(|| 
format!0
("in '{}' replacing after pattern match",
&self.rc.string0
) )
?0
;
1219
150k
        let 
string28.9k
= match result {
1220
121k
                Value::Nodeset(nodes) => {
1221
121k
                    if nodes.size() == 0 {
1222
0
                        bail!("During replacement, no matching element found");
1223
121k
                    }
1224
121k
                    return rules_with_context.replace_nodes(nodes.document_order(), mathml);
1225
                },
1226
25.1k
                Value::String(s) => s,
1227
3.80k
                Value::Number(num) => num.to_string(),
1228
0
                Value::Boolean(b) => b.to_string(),          // FIX: is this right???
1229
        };
1230
        // Hack!: this test for input that starts with a '$' (defined variable), avoids a double evaluate;
1231
        // We don't need NO_EVAL_QUOTE_CHAR here, but the more general solution of a quoted execute (- xq:) would avoid this hack
1232
28.9k
        let result = if self.rc.string.starts_with('$') {
string5.63k
} else {
rules_with_context23.3k
.
replace_chars23.3k
(
&string23.3k
,
mathml23.3k
)
?0
};
1233
28.9k
        return T::from_string(result, rules_with_context.doc );
1234
153k
    }
1235
    
1236
1.29M
    pub fn evaluate<'c>(&self, context: &sxd_xpath::Context<'c>, mathml: Element<'c>) -> Result<Value<'c>> {
1237
        // debug!("evaluate: {}", self);
1238
1.29M
        let result = self.rc.xpath.evaluate(context, mathml);
1239
1.29M
        return match result {
1240
1.29M
            Ok(val) => Ok( val ),
1241
0
            Err(e) => {
1242
                // debug!("MyXPath::trying to evaluate:\n  '{}'\n caused the error\n'{}'", self, e.to_string().replace("OwnedPrefixedName { prefix: None, local_part:", "").replace(" }", ""));
1243
0
                bail!( "{}\n\n",
1244
                     // remove confusing parts of error message from xpath
1245
0
                    e.to_string().replace("OwnedPrefixedName { prefix: None, local_part:", "").replace(" }", "") );
1246
            }
1247
        };
1248
1.29M
    }
1249
1250
0
    pub fn test_input<F>(self, f: F) -> bool where F: Fn(&str) -> bool {
1251
0
        return f(self.rc.string.as_ref());
1252
0
    }
1253
}
1254
1255
// 'SpeechPattern' holds a single pattern.
1256
// Some info is not needed beyond converting the Yaml to the SpeechPattern, but is useful for error reporting.
1257
// The two main parts are the pattern to be matched and the replacements to do if there is a match.
1258
// Any variables/prefs that are defined/set are also stored.
1259
#[derive(Debug)]
1260
struct SpeechPattern {
1261
    pattern_name: String,
1262
    tag_name: String,
1263
    file_name: String,
1264
    pattern: MyXPath,                     // the xpath expr to attempt to match
1265
    match_uses_var_defs: bool,            // include var_defs in context for matching
1266
    var_defs: VariableDefinitions,        // any variable definitions [can be and probably is an empty vector most of the time]
1267
    replacements: ReplacementArray,       // the replacements in case there is a match
1268
}
1269
1270
impl fmt::Display for SpeechPattern {
1271
0
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1272
0
        return write!(f, "[name: {}, tag: {},\n  variables: {:?}, pattern: {},\n  replacement: {}]",
1273
                self.pattern_name, self.tag_name, self.var_defs, self.pattern,
1274
0
                self.replacements.pretty_print_replacements());
1275
0
    }
1276
}
1277
1278
impl SpeechPattern  {
1279
894k
    fn build(dict: &Yaml, file: &Path, rules: &mut SpeechRules) -> Result<Option<Vec<PathBuf>>> {
1280
        // Rule::SpeechPattern
1281
        //   build { "pattern_name", "tag_name", "pattern", "replacement" }
1282
        // or recurse via include: file_name
1283
1284
        // debug!("\nbuild_speech_pattern: dict:\n{}", yaml_to_string(dict, 0));
1285
894k
        if let Some(
include_file_name30.2k
) = find_str(dict, "include") {
1286
30.2k
            let do_include_fn = |new_file: &Path| {
1287
30.2k
                rules.read_patterns(new_file)
1288
30.2k
            };
1289
1290
30.2k
            return Ok( Some(process_include(file, include_file_name, do_include_fn)
?0
) );
1291
864k
        }
1292
1293
864k
        let pattern_name = find_str(dict, "name");
1294
1295
        // tag_named can be either a string (most common) or an array of strings
1296
864k
        let mut tag_names: Vec<&str> = Vec::new();
1297
864k
        match find_str(dict, "tag") {
1298
740k
            Some(str) => tag_names.push(str),
1299
            None => {
1300
                // check for array
1301
124k
                let tag_array  = &dict["tag"];
1302
124k
                tag_names = vec![];
1303
124k
                if tag_array.is_array() {
1304
263k
                    for (i, name) in 
tag_array124k
.as_vec().unwrap().iter().
enumerate124k
() {
1305
263k
                        match as_str_checked(name) {
1306
0
                            Err(e) => return Err(
1307
0
                                e.context(
1308
0
                                    format!("tag name '{}' is not a string in:\n{}",
1309
0
                                        &yaml_to_string(&tag_array.as_vec().unwrap()[i], 0),
1310
0
                                        &yaml_to_string(dict, 1)))
1311
0
                            ),
1312
263k
                            Ok(str) => tag_names.push(str),
1313
                        };
1314
                    }
1315
                } else {
1316
0
                    bail!("Errors trying to find 'tag' in:\n{}", &yaml_to_string(dict, 1));
1317
                }
1318
            }
1319
        }
1320
1321
864k
        if pattern_name.is_none() {
1322
0
            if dict.is_null() {
1323
0
                bail!("Error trying to find 'name': empty value (two consecutive '-'s?");
1324
            } else {
1325
0
                bail!("Errors trying to find 'name' in:\n{}", &yaml_to_string(dict, 1));
1326
            };
1327
864k
        };
1328
864k
        let pattern_name = pattern_name.unwrap().to_string();
1329
1330
        // FIX: add check to make sure tag_name is a valid MathML tag name
1331
864k
        if dict["match"].is_badvalue() {
1332
0
            bail!("Did not find 'match' in\n{}", yaml_to_string(dict, 1));
1333
864k
        }
1334
864k
        if dict["replace"].is_badvalue() {
1335
0
            bail!("Did not find 'replace' in\n{}", yaml_to_string(dict, 1));
1336
864k
        }
1337
    
1338
        // xpath's can't be cloned, so we need to do a 'build_xxx' for each tag name
1339
1.00M
        for tag_name in 
tag_names864k
{
1340
1.00M
            let tag_name = tag_name.to_string();
1341
1.00M
            let pattern_xpath = MyXPath::build(&dict["match"])
1342
1.00M
                    .with_context(|| 
{0
1343
0
                        format!("value for 'match' in rule ({}: {}):\n{}",
1344
0
                                tag_name, pattern_name, yaml_to_string(dict, 1))
1345
0
                    })?;
1346
1.00M
            let speech_pattern =
1347
1.00M
                Box::new( SpeechPattern{
1348
1.00M
                    pattern_name: pattern_name.clone(),
1349
1.00M
                    tag_name: tag_name.clone(),
1350
1.00M
                    file_name: file.to_str().unwrap().to_string(),
1351
1.00M
                    match_uses_var_defs: dict["variables"].is_array() && 
pattern_xpath.rc.string.contains('$')169k
, // FIX: should look at var_defs for actual name
1352
1.00M
                    pattern: pattern_xpath,
1353
1.00M
                    var_defs: VariableDefinitions::build(&dict["variables"])
1354
1.00M
                        .with_context(|| 
{0
1355
0
                            format!("value for 'variables' in rule ({}: {}):\n{}",
1356
0
                                    tag_name, pattern_name, yaml_to_string(dict, 1))
1357
0
                        })?,
1358
1.00M
                    replacements: ReplacementArray::build(&dict["replace"])
1359
1.00M
                        .with_context(|| 
{0
1360
0
                            format!("value for 'replace' in rule ({}: {}). Replacements:\n{}",
1361
0
                                    tag_name, pattern_name, yaml_to_string(&dict["replace"], 1))
1362
0
                    })?
1363
                } );
1364
            // get the array of rules for the tag name
1365
1.00M
            let rule_value = rules.rules.entry(tag_name).or_default();
1366
1367
            // if the name exists, replace it. Otherwise add the new rule
1368
2.67M
            match 
rule_value.iter().enumerate()1.00M
.
find1.00M
(|&pattern| pattern.1.pattern_name == speech_pattern.pattern_name) {
1369
1.00M
                None => rule_value.push(speech_pattern),
1370
9
                Some((i, _old_pattern)) => {
1371
9
                    let old_rule = &rule_value[i];
1372
9
                    info!("\n\n***WARNING***: replacing {}/'{}' in {} with rule from {}\n",
1373
                            old_rule.tag_name, old_rule.pattern_name, old_rule.file_name, speech_pattern.file_name);
1374
9
                    rule_value[i] = speech_pattern;
1375
                },
1376
            }
1377
        }
1378
1379
864k
        return Ok(None);
1380
894k
    }
1381
1382
870k
    fn is_match(&self, context: &sxd_xpath::Context, mathml: Element) -> Result<bool> {
1383
870k
        if self.tag_name != mathml.name().local_part() && 
self.tag_name != "*"224k
&&
self.tag_name != "!*"164k
{
1384
0
            return Ok( false );
1385
870k
        }
1386
1387
        // debug!("\nis_match: pattern='{}'", self.pattern_name);
1388
        // debug!("    pattern_expr {:?}", self.pattern);
1389
        // debug!("is_match: mathml is\n{}", mml_to_string(mathml));
1390
        return Ok(
1391
870k
            match self.pattern.evaluate(context, mathml)
?0
{
1392
652k
                Value::Boolean(b)       => b,
1393
217k
                Value::Nodeset(nodes) => nodes.size() > 0,
1394
0
                _                             => false,
1395
            }
1396
        );
1397
870k
    }
1398
}
1399
1400
1401
// 'Test' holds information used if the replacement is a "test:" clause.
1402
// The condition is an xpath expr and the "else:" part is optional.
1403
1404
#[derive(Debug, Clone)]
1405
struct TestArray {
1406
    tests: Vec<Test>
1407
}
1408
1409
impl fmt::Display for TestArray {
1410
0
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1411
0
        for test in &self.tests {
1412
0
            writeln!(f, "{test}")?;
1413
        }
1414
0
        return Ok( () );
1415
0
    }
1416
}
1417
1418
impl TestArray {
1419
3.29M
    fn build(test: &Yaml) -> Result<TestArray> {
1420
        // 'test:' for convenience takes either a dictionary with keys if/else_if/then/then_test/else/else_test or
1421
        //      or an array of those values (there should be at most one else/else_test)
1422
1423
        // if 'test' is a dictionary ('Hash'), we convert it to an array with one entry and proceed
1424
3.29M
        let tests = if test.as_hash().is_some() {
1425
3.01M
            vec![test]
1426
287k
        } else if let Some(vec) = test.as_vec() {
1427
287k
            vec.iter().collect()
1428
        } else {
1429
0
            bail!("Value for 'test:' is neither a dictionary or an array.")
1430
        };
1431
1432
        // each entry in 'tests' should be a dictionary with keys if/then/then_test/else/else_test
1433
        // a valid entry is one of:
1434
        //   if:/else_if:, then:/then_test: and optional else:/else_test:
1435
        //   else:/else_test: -- if this case, it should be the last entry in 'tests'
1436
        // 'if:' should only be the first entry in the array; 'else_if' should never be the first entry. Otherwise, they are the same
1437
3.29M
        let mut test_array = vec![];
1438
3.73M
        for test in 
tests3.29M
{
1439
3.73M
            if test.as_hash().is_none() {
1440
0
                bail!("Value for array entry in 'test:' must be a dictionary/contain keys");
1441
3.73M
            }
1442
3.73M
            let if_part = &test[if test_array.is_empty() {
"if"3.29M
} else {
"else_if"437k
}];
1443
3.73M
            if !if_part.is_badvalue() {
1444
                // first case: if:, then:, optional else:
1445
3.69M
                let condition = Some( MyXPath::build(if_part)
?0
);
1446
3.69M
                let then_part = TestOrReplacements::build(test, "then", "then_test", true)
?0
;
1447
3.69M
                let else_part = TestOrReplacements::build(test, "else", "else_test", false)
?0
;
1448
3.69M
                let n_keys = if else_part.is_none() {
22.45M
} else {
31.23M
};
1449
3.69M
                if test.as_hash().unwrap().len() > n_keys {
1450
0
                    bail!("A key other than 'if', 'else_if', 'then', 'then_test', 'else', or 'else_test' was found in the 'then' clause of 'test'");
1451
3.69M
                };
1452
3.69M
                test_array.push(
1453
3.69M
                    Test { condition, then_part, else_part }
1454
                );
1455
            } else {
1456
                // second case: should be else/else_test
1457
42.3k
                let else_part = TestOrReplacements::build(test, "else", "else_test", true)
?0
;
1458
42.3k
                if test.as_hash().unwrap().len() > 1 {
1459
0
                    bail!("A key other than 'if', 'else_if', 'then', 'then_test', 'else', or 'else_test' was found the 'else' clause of 'test'");
1460
42.3k
                };
1461
42.3k
                test_array.push(
1462
42.3k
                    Test { condition: None, then_part: None, else_part }
1463
                );
1464
                
1465
                // there shouldn't be any trailing tests
1466
42.3k
                if test_array.len() < test.as_hash().unwrap().len() {
1467
0
                    bail!("'else'/'else_test' key is not last key in 'test:'");
1468
42.3k
                }
1469
            }
1470
        };
1471
1472
3.29M
        if test_array.is_empty() {
1473
0
            bail!("No entries for 'test:'");
1474
3.29M
        }
1475
1476
3.29M
        return Ok( TestArray { tests: test_array } );
1477
3.29M
    }
1478
1479
121k
    fn replace<'c, 's:'c, 'm:'c, T:TreeOrString<'c, 'm, T>>(&self, rules_with_context: &mut SpeechRulesWithContext<'c, 's,'m>, mathml: Element<'c>) -> Result<T> {
1480
156k
        for test in 
&self.tests121k
{
1481
156k
            if test.is_true(&rules_with_context.context_stack.base, mathml)
?0
{
1482
85.2k
                assert!(test.then_part.is_some());
1483
85.2k
                return test.then_part.as_ref().unwrap().replace(rules_with_context, mathml);
1484
71.1k
            } else if let Some(
else_part12.9k
) = test.else_part.as_ref() {
1485
12.9k
                return else_part.replace(rules_with_context, mathml);
1486
58.1k
            }
1487
        }
1488
23.4k
        return T::from_string("".to_string(), rules_with_context.doc);
1489
121k
    }
1490
}
1491
1492
#[derive(Debug, Clone)]
1493
// Used to hold then/then_test and also else/else_test -- only one of these can be present at a time
1494
enum TestOrReplacements {
1495
    Replacements(ReplacementArray),     // replacements to use when a test is true
1496
    Test(TestArray),                    // the array of if/then/else tests
1497
}
1498
1499
impl fmt::Display for TestOrReplacements {
1500
0
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1501
0
        if let TestOrReplacements::Test(_) = self {
1502
0
            write!(f, "  _test")?;
1503
0
        }
1504
0
        write!(f, ":")?;
1505
0
        return match self {
1506
0
            TestOrReplacements::Test(t) => write!(f, "{t}"),
1507
0
            TestOrReplacements::Replacements(r) => write!(f, "{r}"),
1508
        };
1509
0
    }
1510
}
1511
1512
impl TestOrReplacements {
1513
7.43M
    fn build(test: &Yaml, replace_key: &str, test_key: &str, key_required: bool) -> Result<Option<TestOrReplacements>> {
1514
7.43M
        let part = &test[replace_key];
1515
7.43M
        let test_part = &test[test_key];
1516
7.43M
        if !part.is_badvalue() && 
!test_part.is_badvalue()4.26M
{
1517
0
            bail!(format!("Only one of '{}' or '{}' is allowed as part of 'test'.\n{}\n    \
1518
                  Suggestion: delete one or adjust indentation",
1519
0
                    replace_key, test_key, yaml_to_string(test, 2)));
1520
7.43M
        }
1521
7.43M
        if part.is_badvalue() && 
test_part3.16M
.
is_badvalue3.16M
() {
1522
2.45M
            if key_required {
1523
0
                bail!(format!("Missing one of '{}'/'{}:' as part of 'test:'\n{}\n   \
1524
                    Suggestion: add the missing key or indent so it is contained in 'test'",
1525
0
                    replace_key, test_key, yaml_to_string(test, 2)))
1526
            } else {
1527
2.45M
                return Ok( None );
1528
            }
1529
4.97M
        }
1530
        // at this point, we have only one of the two options
1531
4.97M
        if test_part.is_badvalue() {
1532
4.26M
            return Ok( Some( TestOrReplacements::Replacements( ReplacementArray::build(part)
?0
) ) );
1533
        } else {
1534
712k
            return Ok( Some( TestOrReplacements::Test( TestArray::build(test_part)
?0
) ) );
1535
        }
1536
7.43M
    }
1537
1538
98.2k
    fn replace<'c, 's:'c, 'm:'c, T:TreeOrString<'c, 'm, T>>(&self, rules_with_context: &mut SpeechRulesWithContext<'c, 's,'m>, mathml: Element<'c>) -> Result<T> {
1539
98.2k
        return match self {
1540
92.4k
            TestOrReplacements::Replacements(r) => r.replace(rules_with_context, mathml),
1541
5.74k
            TestOrReplacements::Test(t) => t.replace(rules_with_context, mathml),
1542
        }
1543
98.2k
    }
1544
}
1545
1546
#[derive(Debug, Clone)]
1547
struct Test {
1548
    condition: Option<MyXPath>,
1549
    then_part: Option<TestOrReplacements>,
1550
    else_part: Option<TestOrReplacements>,
1551
}
1552
impl fmt::Display for Test {
1553
0
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1554
0
        write!(f, "test: [ ")?;
1555
0
        if let Some(if_part) = &self.condition {
1556
0
            write!(f, " if: '{if_part}'")?;
1557
0
        }
1558
0
        if let Some(then_part) = &self.then_part {
1559
0
            write!(f, " then{then_part}")?;
1560
0
        }
1561
0
        if let Some(else_part) = &self.else_part {
1562
0
            write!(f, " else{else_part}")?;
1563
0
        }
1564
0
        return write!(f, "]");
1565
0
    }
1566
}
1567
1568
impl Test {
1569
156k
    fn is_true(&self, context: &sxd_xpath::Context, mathml: Element) -> Result<bool> {
1570
156k
        return match self.condition.as_ref() {
1571
136
            None => Ok( false ),     // trivially false -- want to do else part
1572
156k
            Some(condition) => condition.is_true(context, mathml)
1573
156k
                                .context("Failure in conditional test"),
1574
        }
1575
156k
    }
1576
}
1577
1578
// Used for speech rules with "variables: ..."
1579
#[derive(Debug, Clone)]
1580
struct VariableDefinition {
1581
    name: String,     // name of variable
1582
    value: MyXPath,   // xpath value, typically a constant like "true" or "0", but could be "*/*[1]" to store some nodes   
1583
}
1584
1585
impl fmt::Display for VariableDefinition {
1586
0
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1587
0
        return write!(f, "[name: {}={}]", self.name, self.value);
1588
0
    }   
1589
}
1590
1591
// Used for speech rules with "variables: ..."
1592
#[derive(Debug)]
1593
struct VariableValue<'v> {
1594
    name: String,       // name of variable
1595
    value: Option<Value<'v>>,   // xpath value, typically a constant like "true" or "0", but could be "*/*[1]" to store some nodes   
1596
}
1597
1598
impl fmt::Display for VariableValue<'_> {
1599
0
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1600
0
        let value = match &self.value {
1601
0
            None => "unset".to_string(),
1602
0
            Some(val) => format!("{val:?}")
1603
        };
1604
0
        return write!(f, "[name: {}, value: {}]", self.name, value);
1605
0
    }   
1606
}
1607
1608
impl VariableDefinition {
1609
472k
    fn build(name_value_def: &Yaml) -> Result<VariableDefinition> {
1610
472k
        match name_value_def.as_hash() {
1611
472k
            Some(map) => {
1612
472k
                if map.len() != 1 {
1613
0
                    bail!("definition is not a key/value pair. Found {}",
1614
0
                            yaml_to_string(name_value_def, 1) );
1615
472k
                }
1616
472k
                let (name, value) = map.iter().next().unwrap();
1617
472k
                let name = as_str_checked( name)
1618
472k
                    .with_context(|| 
format!0
( "definition name is not a string: {}",
1619
472k
                            
yaml_to_string0
(
name0
, 1) ))
?0
.to_string();
1620
472k
                match value {
1621
472k
                    Yaml::Boolean(_) | Yaml::String(_)  | Yaml::Integer(_) | Yaml::Real(_) => (),
1622
0
                    _ => bail!("definition value is not a string, boolean, or number. Found {}",
1623
0
                            yaml_to_string(value, 1) )
1624
                };
1625
                return Ok(
1626
                    VariableDefinition{
1627
472k
                        name,
1628
472k
                        value: MyXPath::build(value)
?0
1629
                    }
1630
                );
1631
            },
1632
0
            None => bail!("definition is not a key/value pair. Found {}",
1633
0
                            yaml_to_string(name_value_def, 1) )
1634
        }
1635
472k
    }
1636
}
1637
1638
1639
#[derive(Debug, Clone)]
1640
struct VariableDefinitions {
1641
    defs: Vec<VariableDefinition>
1642
}
1643
1644
impl fmt::Display for VariableDefinitions {
1645
0
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1646
0
        for def in &self.defs {
1647
0
            write!(f, "{def},")?;
1648
        }
1649
0
        return Ok( () );
1650
0
    }
1651
}
1652
1653
struct VariableValues<'v> {
1654
    defs: Vec<VariableValue<'v>>
1655
}
1656
1657
impl fmt::Display for VariableValues<'_> {
1658
0
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1659
0
        for value in &self.defs {
1660
0
            write!(f, "{value}")?;
1661
        }
1662
0
        return writeln!(f);
1663
0
    }
1664
}
1665
1666
impl VariableDefinitions {
1667
1.11M
    fn new(len: usize) -> VariableDefinitions {
1668
1.11M
        return VariableDefinitions{ defs: Vec::with_capacity(len) };
1669
1.11M
    }
1670
1671
1.11M
    fn build(defs: &Yaml) -> Result<VariableDefinitions> {
1672
1.11M
        if defs.is_badvalue() {
1673
834k
            return Ok( VariableDefinitions::new(0) );
1674
277k
        };
1675
277k
        if defs.is_array() {
1676
277k
            let defs = defs.as_vec().unwrap();
1677
277k
            let mut definitions = VariableDefinitions::new(defs.len());
1678
472k
            for def in 
defs277k
{
1679
472k
                let variable_def = VariableDefinition::build(def)
1680
472k
                        .context("definition of 'variables'")
?0
;
1681
472k
                definitions.push( variable_def);
1682
            };
1683
277k
            return Ok (definitions );
1684
0
        }
1685
0
        bail!( "'variables' is not an array of {{name: xpath-value}} definitions. Found {}'",
1686
0
                yaml_to_string(defs, 1) );
1687
1.11M
    }
1688
1689
472k
    fn push(&mut self, var_def: VariableDefinition) {
1690
472k
        self.defs.push(var_def);
1691
472k
    }
1692
1693
241k
    fn len(&self) -> usize {
1694
241k
        return self.defs.len();
1695
241k
    }
1696
}
1697
1698
struct ContextStack<'c> {
1699
    // Note: values are generated by calling value_of on an Evaluation -- that makes the two lifetimes the same
1700
    old_values: Vec<VariableValues<'c>>,   // store old values so they can be set on pop 
1701
    base: sxd_xpath::Context<'c>                      // initial context -- contains all the function defs and pref variables
1702
}
1703
1704
impl fmt::Display for ContextStack<'_> {
1705
0
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1706
0
        writeln!(f, " {} old_values", self.old_values.len())?;
1707
0
        for values in &self.old_values {
1708
0
            writeln!(f, "  {values}")?;
1709
        }
1710
0
        return writeln!(f);
1711
0
    }
1712
}
1713
1714
impl<'c, 'r> ContextStack<'c> {
1715
22.7k
    fn new<'a,>(pref_manager: &'a PreferenceManager) -> ContextStack<'c> {
1716
22.7k
        let prefs = pref_manager.merge_prefs();
1717
22.7k
        let mut context_stack = ContextStack {
1718
22.7k
            base: ContextStack::base_context(prefs),
1719
22.7k
            old_values: Vec::with_capacity(31)      // should avoid allocations
1720
22.7k
        };
1721
        // FIX: the list of variables to set should come from definitions.yaml
1722
        // These can't be set on the <math> tag because of the "translate" command which starts speech at an 'id'
1723
22.7k
        context_stack.base.set_variable("MatchingPause", Value::Boolean(false));
1724
22.7k
        context_stack.base.set_variable("IsColumnSilent", Value::Boolean(false));
1725
1726
1727
22.7k
        return context_stack;
1728
22.7k
    }
1729
1730
22.7k
    fn base_context(var_defs: PreferenceHashMap) -> sxd_xpath::Context<'c> {
1731
22.7k
        let mut context  = sxd_xpath::Context::new();
1732
22.7k
        context.set_namespace("m", "http://www.w3.org/1998/Math/MathML");
1733
22.7k
        crate::xpath_functions::add_builtin_functions(&mut context);
1734
1.88M
        for (key, value) in 
var_defs22.7k
{
1735
1.88M
            context.set_variable(key.as_str(), yaml_to_value(&value));
1736
1.88M
            // if let Some(str_value) = value.as_str() {
1737
1.88M
            //     if str_value != "Auto" {
1738
1.88M
            //         debug!("Set {}='{}'", key.as_str(), str_value);
1739
1.88M
            //     }
1740
1.88M
            // }
1741
1.88M
        };
1742
22.7k
        return context;
1743
22.7k
    }
1744
1745
3.78k
    fn set_globals(&'r mut self, new_vars: VariableDefinitions, mathml: Element<'c>) -> Result<()> {
1746
        // for each var/value pair, evaluate the value and add the var/value to the base context
1747
4.84k
        for def in 
&new_vars.defs3.78k
{
1748
            // set the new value
1749
4.84k
            let new_value = match def.value.evaluate(&self.base, mathml) {
1750
4.84k
                Ok(val) => val,
1751
0
                Err(_) => bail!(format!("Can't evaluate variable def for {}", def)),
1752
            };
1753
4.84k
            let qname = QName::new(def.name.as_str());
1754
4.84k
            self.base.set_variable(qname, new_value);
1755
        }
1756
3.78k
        return Ok( () );
1757
3.78k
    }
1758
1759
27.3k
    fn push(&'r mut self, new_vars: VariableDefinitions, mathml: Element<'c>) -> Result<()> {
1760
        // store the old value and set the new one 
1761
27.3k
        let mut old_values = VariableValues {defs: Vec::with_capacity(new_vars.defs.len()) };
1762
27.3k
        let evaluation = Evaluation::new(&self.base, Node::Element(mathml));
1763
66.9k
        for def in 
&new_vars.defs27.3k
{
1764
66.9k
            // get the old value (might not be defined)
1765
66.9k
            let qname = QName::new(def.name.as_str());
1766
66.9k
            let old_value = evaluation.value_of(qname).cloned();
1767
66.9k
            old_values.defs.push( VariableValue{ name: def.name.clone(), value: old_value} );
1768
66.9k
        }
1769
1770
        // use a second loop because of borrow problem with self.base and 'evaluation'
1771
66.9k
        for def in 
&new_vars.defs27.3k
{
1772
            // set the new value
1773
66.9k
            let new_value = match def.value.evaluate(&self.base, mathml) {
1774
66.9k
                Ok(val) => val,
1775
0
                Err(_) => Value::Nodeset(sxd_xpath::nodeset::Nodeset::new()),
1776
            };
1777
66.9k
            let qname = QName::new(def.name.as_str());
1778
66.9k
            self.base.set_variable(qname, new_value);
1779
        }
1780
27.3k
        self.old_values.push(old_values);
1781
27.3k
        return Ok( () );
1782
27.3k
    }
1783
1784
27.3k
    fn pop(&mut self) {
1785
        const MISSING_VALUE: &str = "-- unset value --";     // can't remove a variable from context, so use this value
1786
27.3k
        let old_values = self.old_values.pop().unwrap();
1787
66.9k
        for variable in 
old_values.defs27.3k
{
1788
66.9k
            let qname = QName::new(&variable.name);
1789
66.9k
            let old_value = match variable.value {
1790
22.8k
                None => Value::String(MISSING_VALUE.to_string()),
1791
44.1k
                Some(val) => val,
1792
            };
1793
66.9k
            self.base.set_variable(qname, old_value);
1794
        }
1795
27.3k
    }
1796
}
1797
1798
1799
1.88M
fn yaml_to_value<'b>(yaml: &Yaml) -> Value<'b> {
1800
1.88M
    return match yaml {
1801
1.47M
        Yaml::String(s) => Value::String(s.clone()),
1802
295k
        Yaml::Boolean(b)  => Value::Boolean(*b),
1803
31.7k
        Yaml::Integer(i)   => Value::Number(*i as f64),
1804
91.0k
        Yaml::Real(s)   => Value::Number(s.parse::<f64>().unwrap()),
1805
        _  => {
1806
0
            error!("yaml_to_value: illegal type found in Yaml value: {}", yaml_to_string(yaml, 1));
1807
0
            Value::String("".to_string())
1808
        },
1809
    }
1810
1.88M
}
1811
1812
1813
// Information for matching a Unicode char (defined in unicode.yaml) and building its replacement
1814
struct UnicodeDef {
1815
    ch: u32,
1816
    speech: ReplacementArray
1817
}
1818
1819
impl  fmt::Display for UnicodeDef {
1820
0
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1821
0
        return write!(f, "UnicodeDef{{ch: {}, speech: {:?}}}", self.ch, self.speech);
1822
0
    }
1823
}
1824
1825
impl UnicodeDef {
1826
2.24M
    fn build(unicode_def: &Yaml, file_name: &Path, speech_rules: &SpeechRules, use_short: bool) -> Result<Option<Vec<PathBuf>>> {
1827
2.24M
        if let Some(
include_file_name3
) = find_str(unicode_def, "include") {
1828
3
            let do_include_fn = |new_file: &Path| {
1829
3
                speech_rules.read_unicode(Some(new_file.to_path_buf()), use_short)
1830
3
            };
1831
3
            return Ok( Some(process_include(file_name, include_file_name, do_include_fn)
?0
) );
1832
2.24M
        }
1833
        // key: char, value is replacement or array of replacements
1834
2.24M
        let dictionary = unicode_def.as_hash();
1835
2.24M
        if dictionary.is_none() {
1836
0
            bail!("Expected a unicode definition (e.g, '+':[t: \"plus\"]'), found {}", yaml_to_string(unicode_def, 0));
1837
2.24M
        }
1838
1839
2.24M
        let dictionary = dictionary.unwrap();
1840
2.24M
        if dictionary.len() != 1 {
1841
0
            bail!("Expected a unicode definition (e.g, '+':[t: \"plus\"]'), found {}", yaml_to_string(unicode_def, 0));
1842
2.24M
        }
1843
1844
2.24M
        let (ch, replacements) = dictionary.iter().next().ok_or_else(|| 
anyhow!0
("Expected a unicode definition (e.g, '+':[t: \"plus\"]'), found {}",
yaml_to_string0
(
unicode_def0
, 0)))
?0
;
1845
2.24M
        let mut unicode_table = if use_short {
1846
1.06M
            speech_rules.unicode_short.borrow_mut()
1847
        } else {
1848
1.17M
            speech_rules.unicode_full.borrow_mut()
1849
        };
1850
2.24M
        if let Some(str) = ch.as_str() {
1851
2.24M
            if str.is_empty() {
1852
0
                bail!("Empty character definition. Replacement is {}", replacements.as_str().unwrap());
1853
2.24M
            }
1854
2.24M
            let mut chars = str.chars();
1855
2.24M
            let first_ch = chars.next().unwrap();       // non-empty string, so a char exists
1856
2.24M
            if chars.next().is_some() {                       // more than one char
1857
54.7k
                if str.contains('-')  {
1858
38.4k
                    return process_range(str, replacements, unicode_table);
1859
16.2k
                } else if first_ch != '0' {     // exclude 0xDDDD
1860
74.5k
                    for ch in 
str16.2k
.
chars16.2k
() { // restart the iterator
1861
74.5k
                        let ch_as_str = ch.to_string();
1862
74.5k
                        if unicode_table.insert(ch as u32, ReplacementArray::build(&substitute_ch(replacements, &ch_as_str))
1863
74.5k
                                            .with_context(|| 
format!0
("In definition of char: '{str}'"))
?0
.replacements).is_some() {
1864
0
                            error!("*** Character '{}' (0x{:X}) is repeated", ch, ch as u32);
1865
74.5k
                        }
1866
                    }
1867
16.2k
                    return Ok(None);
1868
0
                }
1869
2.18M
            }
1870
0
        }
1871
1872
2.18M
        let ch = UnicodeDef::get_unicode_char(ch)
?0
;
1873
2.18M
        if unicode_table.insert(ch, ReplacementArray::build(replacements)
1874
2.18M
                                        .with_context(|| 
format!0
("In definition of char: '{}' (0x{})",
1875
2.18M
                                                                        
char::from_u320
(
ch0
).
unwrap0
(), ch))
?0
.replacements).is_some() {
1876
147
            error!("*** Character '{}' (0x{:X}) is repeated", 
char::from_u320
(
ch0
).
unwrap0
(), ch);
1877
2.18M
        }
1878
2.18M
        return Ok(None);
1879
1880
38.4k
        fn process_range(def_range: &str, replacements: &Yaml, mut unicode_table: RefMut<HashMap<u32,Vec<Replacement>>>) -> Result<Option<Vec<PathBuf>>> {
1881
            // should be a character range (e.g., "A-Z")
1882
            // iterate over that range and also substitute the char for '.' in the 
1883
38.4k
            let mut range = def_range.split('-');
1884
38.4k
            let first = range.next().unwrap().chars().next().unwrap() as u32;
1885
38.4k
            let last = range.next().unwrap().chars().next().unwrap() as u32;
1886
38.4k
            if range.next().is_some() {
1887
0
                bail!("Character range definition has more than one '-': '{}'", def_range);
1888
38.4k
            }
1889
1890
889k
            for ch in 
first..last+138.4k
{
1891
889k
                let ch_as_str = char::from_u32(ch).unwrap().to_string();
1892
889k
                unicode_table.insert(ch, ReplacementArray::build(&substitute_ch(replacements, &ch_as_str))
1893
889k
                                        .with_context(|| 
format!0
("In definition of char: '{def_range}'"))
?0
.replacements);
1894
            };
1895
1896
38.4k
            return Ok(None)
1897
38.4k
        }
1898
1899
10.3M
        fn substitute_ch(yaml: &Yaml, ch: &str) -> Yaml {
1900
10.3M
            return match yaml {
1901
2.34M
                Yaml::Array(v) => {
1902
                    Yaml::Array(
1903
2.34M
                        v.iter()
1904
3.03M
                         .
map2.34M
(|e| substitute_ch(e, ch))
1905
2.34M
                         .collect::<Vec<Yaml>>()
1906
                    )
1907
                },
1908
4.52M
                Yaml::Hash(h) => {
1909
                    Yaml::Hash(
1910
4.52M
                        h.iter()
1911
6.34M
                         .
map4.52M
(|(key,val)| (key.clone(), substitute_ch(val, ch)) )
1912
4.52M
                         .collect::<Hash>()
1913
                    )
1914
                },
1915
3.47M
                Yaml::String(s) => Yaml::String( s.replace('.', ch) ),
1916
0
                _ => yaml.clone(),
1917
            }
1918
10.3M
        }
1919
2.24M
    }
1920
    
1921
2.18M
    fn get_unicode_char(ch: &Yaml) -> Result<u32> {
1922
        // either "a" or 0x1234 (number)
1923
2.18M
        if let Some(ch) = ch.as_str() {
1924
2.18M
            let mut ch_iter = ch.chars();
1925
2.18M
            let unicode_ch = ch_iter.next();
1926
2.18M
            if unicode_ch.is_none() || ch_iter.next().is_some() {
1927
0
                bail!("Wanted unicode char, found string '{}')", ch);
1928
2.18M
            };
1929
2.18M
            return Ok( unicode_ch.unwrap() as u32 );
1930
0
        }
1931
    
1932
0
        if let Some(num) = ch.as_i64() {
1933
0
            return Ok( num as u32 );
1934
0
        }
1935
0
        bail!("Unicode character '{}' can't be converted to an code point", yaml_to_string(ch, 0));
1936
2.18M
    }    
1937
}
1938
1939
// Fix: there should be a cache so subsequent library calls don't have to read in the same speech rules
1940
//   likely a cache of size 1 is fine
1941
// Fix: all statics should be gathered together into one structure that is a Mutex
1942
//   for each library call, we should grab a lock on the Mutex in case others try to call
1943
//   at the same time.
1944
//   If this turns out to be something that others actually do, then a cache > 1 would be good
1945
1946
 type RuleTable = HashMap<String, Vec<Box<SpeechPattern>>>;
1947
 type UnicodeTable = Rc<RefCell<HashMap<u32,Vec<Replacement>>>>;
1948
 type FilesAndTimesShared = Rc<RefCell<FilesAndTimes>>;
1949
1950
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
1951
 pub enum RulesFor {
1952
     Intent,
1953
     Speech,
1954
     OverView,
1955
     Navigation,
1956
     Braille,
1957
 }
1958
1959
 impl fmt::Display for RulesFor {
1960
0
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1961
0
        let name = match self {
1962
0
            RulesFor::Intent => "Intent",
1963
0
            RulesFor::Speech => "Speech",
1964
0
            RulesFor::OverView => "OverView",
1965
0
            RulesFor::Navigation => "Navigation",
1966
0
            RulesFor::Braille => "Braille",
1967
        };
1968
0
       return write!(f, "{name}");
1969
0
    }
1970
 }
1971
1972
 
1973
#[derive(Debug, Clone)]
1974
pub struct FileAndTime {
1975
    file: PathBuf,
1976
    time: SystemTime,
1977
}
1978
1979
impl FileAndTime {
1980
0
    fn new(file: PathBuf) -> FileAndTime {
1981
0
        return FileAndTime {
1982
0
            file,
1983
0
            time: SystemTime::UNIX_EPOCH,
1984
0
        }
1985
0
    }
1986
1987
    // used for debugging preference settings
1988
0
    pub fn debug_get_file(&self) -> Option<&str> {
1989
0
        return self.file.to_str();
1990
0
    }
1991
1992
8.29k
    pub fn new_with_time(file: PathBuf) -> FileAndTime {
1993
8.29k
        return FileAndTime {
1994
8.29k
            time: FileAndTime::get_metadata(&file),
1995
8.29k
            file,
1996
8.29k
        }
1997
8.29k
    }
1998
1999
33.7k
    pub fn is_up_to_date(&self) -> bool {
2000
33.7k
        let file_mod_time = FileAndTime::get_metadata(&self.file);
2001
33.7k
        return self.time >= file_mod_time;
2002
33.7k
    }
2003
2004
140k
    fn get_metadata(path: &Path) -> SystemTime {
2005
        use std::fs;
2006
140k
        if !cfg!(target_family = "wasm") {
2007
140k
            let metadata = fs::metadata(path);
2008
140k
            if let Ok(
metadata120k
) = metadata &&
2009
120k
               let Ok(mod_time) = metadata.modified() {
2010
120k
                    return mod_time;
2011
20.3k
                }
2012
0
        }
2013
20.3k
        return SystemTime::UNIX_EPOCH
2014
140k
    }
2015
2016
}
2017
#[derive(Debug, Default)]
2018
pub struct FilesAndTimes {
2019
    // ft[0] is the main file -- other files are included by it (or recursively)
2020
    // We could be a little smarter about invalidation by tracking what file is the parent (including file),
2021
    // but it seems more complicated than it is worth
2022
    ft: Vec<FileAndTime>
2023
}
2024
2025
impl FilesAndTimes {
2026
0
    pub fn new(start_path: PathBuf) -> FilesAndTimes {
2027
0
        let mut ft = Vec::with_capacity(8);
2028
0
        ft.push( FileAndTime::new(start_path) );
2029
0
        return FilesAndTimes{ ft };
2030
0
    }
2031
2032
    /// Returns true if the main file matches the corresponding preference location and files' times are all current
2033
33.4k
    pub fn is_file_up_to_date(&self, pref_path: &Path, should_ignore_file_time: bool) -> bool {
2034
2035
        // if the time isn't set or the path is different from the preference (which might have changed), return false
2036
33.4k
        if self.ft.is_empty() || 
self.as_path() != pref_path28.0k
{
2037
5.74k
            return false;
2038
27.7k
        }
2039
27.7k
        if should_ignore_file_time || 
cfg!1.18k
(target_family = "wasm") {
2040
26.5k
            return true;
2041
1.18k
        }
2042
1.18k
        if  self.ft[0].time == SystemTime::UNIX_EPOCH {
2043
0
            return false;
2044
1.18k
        }
2045
2046
2047
        // check the time stamp on the included files -- if the head file hasn't changed, the paths for the included files will be the same
2048
1.19k
        for file in 
&self.ft1.18k
{
2049
1.19k
            if !file.is_up_to_date() {
2050
1
                return false;
2051
1.19k
            }
2052
        }
2053
1.18k
        return true;
2054
33.4k
    }
2055
2056
19.8k
    fn set_files_and_times(&mut self, new_files: Vec<PathBuf>)  {
2057
19.8k
        self.ft.clear();
2058
98.4k
        for path in 
new_files19.8k
{
2059
98.4k
            let time = FileAndTime::get_metadata(&path);      // do before move below
2060
98.4k
            self.ft.push( FileAndTime{ file: path, time })
2061
        }
2062
19.8k
    }
2063
2064
28.0k
    pub fn as_path(&self) -> &Path {
2065
28.0k
        assert!(!self.ft.is_empty());
2066
28.0k
        return &self.ft[0].file;
2067
28.0k
    }
2068
2069
0
    pub fn paths(&self) -> Vec<PathBuf> {
2070
0
        return self.ft.iter().map(|ft| ft.file.clone()).collect::<Vec<PathBuf>>();
2071
0
    }
2072
2073
}
2074
2075
2076
/// `SpeechRulesWithContext` encapsulates a named group of speech rules (e.g, "ClearSpeak")
2077
/// along with the preferences to be used for speech.
2078
// Note: if we can't read the files, an error message is stored in the structure and needs to be checked.
2079
// I tried using Result<SpeechRules>, but it was a mess with all the unwrapping.
2080
// Important: the code needs to be careful to check this at the top level calls
2081
pub struct SpeechRules {
2082
    error: String,
2083
    name: RulesFor,
2084
    pub pref_manager: Rc<RefCell<PreferenceManager>>,
2085
    rules: RuleTable,                              // the speech rules used (partitioned into MathML tags in hashmap, then linearly searched)
2086
    rule_files: FilesAndTimes,                     // files that were read
2087
    translate_single_chars_only: bool,             // strings like "half" don't want 'a's translated, but braille does
2088
    unicode_short: UnicodeTable,                   // the short list of rules used for Unicode characters
2089
    unicode_short_files: FilesAndTimesShared,     // files that were read
2090
    unicode_full:  UnicodeTable,                   // the long remaining rules used for Unicode characters
2091
    unicode_full_files: FilesAndTimesShared,      // files that were read
2092
    definitions_files: FilesAndTimesShared,       // files that were read
2093
}
2094
2095
impl fmt::Display for SpeechRules {
2096
0
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
2097
0
        writeln!(f, "SpeechRules '{}'\n{})", self.name, self.pref_manager.borrow())?;
2098
0
        let mut rules_vec: Vec<(&String, &Vec<Box<SpeechPattern>>)> = self.rules.iter().collect();
2099
0
        rules_vec.sort_by_key(|(tag_name, _)| tag_name.as_str());
2100
0
        for (tag_name, rules) in rules_vec {
2101
0
            writeln!(f, "   {}: #patterns {}", tag_name, rules.len())?;
2102
        };
2103
0
        return writeln!(f, "   {}+{} unicode entries", &self.unicode_short.borrow().len(), &self.unicode_full.borrow().len());
2104
0
    }
2105
}
2106
2107
2108
/// `SpeechRulesWithContext` encapsulates a named group of speech rules (e.g, "ClearSpeak")
2109
/// along with the preferences to be used for speech.
2110
/// Because speech rules can define variables, there is also a context that is carried with them
2111
pub struct SpeechRulesWithContext<'c, 's:'c, 'm:'c> {
2112
    speech_rules: &'s SpeechRules,
2113
    context_stack: ContextStack<'c>,   // current value of (context) variables
2114
    doc: Document<'m>,
2115
    nav_node_id: &'m str,
2116
    nav_node_offset: usize,
2117
    pub inside_spell: bool,     // hack to allow 'spell' to avoid infinite loop (see 'spell' implementation in tts.rs)
2118
    pub translate_count: usize, // hack to avoid 'translate' infinite loop (see 'spell' implementation in tts.rs)
2119
}
2120
2121
impl<'c, 's:'c, 'm:'c> fmt::Display for SpeechRulesWithContext<'c, 's,'m> {
2122
0
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
2123
0
        writeln!(f, "SpeechRulesWithContext \n{})", self.speech_rules)?;
2124
0
        return writeln!(f, "   {} context entries, nav node id '({}, {})'", &self.context_stack, self.nav_node_id, self.nav_node_offset);
2125
0
    }
2126
}
2127
2128
thread_local!{
2129
    /// SPEECH_UNICODE_SHORT is shared among several rules, so "RC" is used
2130
    static SPEECH_UNICODE_SHORT: UnicodeTable =
2131
        Rc::new( RefCell::new( HashMap::with_capacity(500) ) );
2132
        
2133
    /// SPEECH_UNICODE_FULL is shared among several rules, so "RC" is used
2134
    static SPEECH_UNICODE_FULL: UnicodeTable =
2135
        Rc::new( RefCell::new( HashMap::with_capacity(6500) ) );
2136
        
2137
    /// BRAILLE_UNICODE_SHORT is shared among several rules, so "RC" is used
2138
    static BRAILLE_UNICODE_SHORT: UnicodeTable =
2139
        Rc::new( RefCell::new( HashMap::with_capacity(500) ) );
2140
        
2141
    /// BRAILLE_UNICODE_FULL is shared among several rules, so "RC" is used
2142
    static BRAILLE_UNICODE_FULL: UnicodeTable =
2143
        Rc::new( RefCell::new( HashMap::with_capacity(5000) ) );
2144
2145
    /// SPEECH_DEFINITION_FILES_AND_TIMES is shared among several rules, so "RC" is used
2146
    static SPEECH_DEFINITION_FILES_AND_TIMES: FilesAndTimesShared =
2147
        Rc::new( RefCell::new(FilesAndTimes::default()) );
2148
        
2149
    /// BRAILLE_DEFINITION_FILES_AND_TIMES is shared among several rules, so "RC" is used
2150
    static BRAILLE_DEFINITION_FILES_AND_TIMES: FilesAndTimesShared =
2151
        Rc::new( RefCell::new(FilesAndTimes::default()) );
2152
        
2153
    /// SPEECH_UNICODE_SHORT_FILES_AND_TIMES is shared among several rules, so "RC" is used
2154
    static SPEECH_UNICODE_SHORT_FILES_AND_TIMES: FilesAndTimesShared =
2155
        Rc::new( RefCell::new(FilesAndTimes::default()) );
2156
        
2157
    /// SPEECH_UNICODE_FULL_FILES_AND_TIMES is shared among several rules, so "RC" is used
2158
    static SPEECH_UNICODE_FULL_FILES_AND_TIMES: FilesAndTimesShared =
2159
        Rc::new( RefCell::new(FilesAndTimes::default()) );
2160
        
2161
    /// BRAILLE_UNICODE_SHORT_FILES_AND_TIMES is shared among several rules, so "RC" is used
2162
    static BRAILLE_UNICODE_SHORT_FILES_AND_TIMES: FilesAndTimesShared =
2163
        Rc::new( RefCell::new(FilesAndTimes::default()) );
2164
        
2165
    /// BRAILLE_UNICODE_FULL_FILES_AND_TIMES is shared among several rules, so "RC" is used
2166
    static BRAILLE_UNICODE_FULL_FILES_AND_TIMES: FilesAndTimesShared =
2167
        Rc::new( RefCell::new(FilesAndTimes::default()) );
2168
        
2169
    /// The current set of speech rules
2170
    // maybe this should be a small cache of rules in case people switch rules/prefs?
2171
    pub static INTENT_RULES: RefCell<SpeechRules> =
2172
            RefCell::new( SpeechRules::new(RulesFor::Intent, true) );
2173
2174
    pub static SPEECH_RULES: RefCell<SpeechRules> =
2175
            RefCell::new( SpeechRules::new(RulesFor::Speech, true) );
2176
2177
    pub static OVERVIEW_RULES: RefCell<SpeechRules> =
2178
            RefCell::new( SpeechRules::new(RulesFor::OverView, true) );
2179
2180
    pub static NAVIGATION_RULES: RefCell<SpeechRules> =
2181
            RefCell::new( SpeechRules::new(RulesFor::Navigation, true) );
2182
2183
    pub static BRAILLE_RULES: RefCell<SpeechRules> =
2184
            RefCell::new( SpeechRules::new(RulesFor::Braille, false) );
2185
}
2186
2187
impl SpeechRules {
2188
8.16k
    pub fn new(name: RulesFor, translate_single_chars_only: bool) -> SpeechRules {
2189
8.16k
        let globals = if name == RulesFor::Braille {
2190
1.35k
            (
2191
1.35k
                (BRAILLE_UNICODE_SHORT.with(Rc::clone), BRAILLE_UNICODE_SHORT_FILES_AND_TIMES.with(Rc::clone)),
2192
1.35k
                (BRAILLE_UNICODE_FULL. with(Rc::clone), BRAILLE_UNICODE_FULL_FILES_AND_TIMES.with(Rc::clone)),
2193
1.35k
                BRAILLE_DEFINITION_FILES_AND_TIMES.with(Rc::clone),
2194
1.35k
            )
2195
        } else {
2196
6.80k
            (
2197
6.80k
                (SPEECH_UNICODE_SHORT.with(Rc::clone), SPEECH_UNICODE_SHORT_FILES_AND_TIMES.with(Rc::clone)),
2198
6.80k
                (SPEECH_UNICODE_FULL. with(Rc::clone), SPEECH_UNICODE_FULL_FILES_AND_TIMES.with(Rc::clone)),
2199
6.80k
                SPEECH_DEFINITION_FILES_AND_TIMES.with(Rc::clone),
2200
6.80k
            )
2201
        };
2202
2203
        return SpeechRules {
2204
8.16k
            error: Default::default(),
2205
8.16k
            name,
2206
8.16k
            rules: HashMap::with_capacity(if name == RulesFor::Intent || 
name == RulesFor::Speech5.53k
{
5006.76k
} else {
501.39k
}), // lazy load them
2207
8.16k
            rule_files: FilesAndTimes::default(),
2208
8.16k
            unicode_short: globals.0.0,       // lazy load them
2209
8.16k
            unicode_short_files: globals.0.1,
2210
8.16k
            unicode_full: globals.1.0,        // lazy load them
2211
8.16k
            unicode_full_files: globals.1.1,
2212
8.16k
            definitions_files: globals.2,
2213
8.16k
            translate_single_chars_only,
2214
8.16k
            pref_manager: PreferenceManager::get(),
2215
        };
2216
8.16k
}
2217
2218
17.7k
    pub fn get_error(&self) -> Option<&str> {
2219
17.7k
        return if self.error.is_empty() {
2220
17.7k
             None
2221
        } else {
2222
0
            Some(&self.error)
2223
        }
2224
17.7k
    }
2225
2226
15.3k
    pub fn read_files(&mut self) -> Result<()> {
2227
15.3k
        let check_rule_files = self.pref_manager.borrow().pref_to_string("CheckRuleFiles");
2228
15.3k
        if check_rule_files != "None" {  // "Prefs" or "All" are other values
2229
15.3k
            self.pref_manager.borrow_mut().set_preference_files()
?0
;
2230
2
        }
2231
15.3k
        let should_ignore_file_time = self.pref_manager.borrow().pref_to_string("CheckRuleFiles") != "All";     // ignore for "None", "Prefs"
2232
15.3k
        let rule_file = self.pref_manager.borrow().get_rule_file(&self.name).to_path_buf();     // need to create PathBuf to avoid a move/use problem
2233
15.3k
        if self.rules.is_empty() || 
!7.17k
self.rule_files7.17k
.
is_file_up_to_date7.17k
(&rule_file, should_ignore_file_time) {
2234
8.35k
            self.rules.clear();
2235
8.35k
            let files_read = self.read_patterns(&rule_file)
?0
;
2236
8.35k
            self.rule_files.set_files_and_times(files_read);
2237
6.94k
        }
2238
2239
15.3k
        let pref_manager = self.pref_manager.borrow();
2240
15.3k
        let unicode_pref_files = if self.name == RulesFor::Braille {
pref_manager.get_braille_unicode_file()1.82k
} else {
pref_manager.get_speech_unicode_file()13.4k
};
2241
2242
15.3k
        if !self.unicode_short_files.borrow().is_file_up_to_date(unicode_pref_files.0, should_ignore_file_time) {
2243
5.50k
            self.unicode_short.borrow_mut().clear();
2244
5.50k
            self.unicode_short_files.borrow_mut().set_files_and_times(self.read_unicode(None, true)
?0
);
2245
9.80k
        }
2246
2247
15.3k
        if self.definitions_files.borrow().ft.is_empty() || 
!9.82k
self.definitions_files.borrow()9.82k
.
is_file_up_to_date9.82k
(
2248
9.82k
                            pref_manager.get_definitions_file(self.name != RulesFor::Braille),
2249
9.82k
                            should_ignore_file_time
2250
9.82k
        ) {
2251
5.49k
            self.definitions_files.borrow_mut().set_files_and_times(read_definitions_file(self.name != RulesFor::Braille)
?0
);
2252
9.80k
        }
2253
15.3k
        return Ok( () );
2254
15.3k
    }
2255
2256
38.6k
    fn read_patterns(&mut self, path: &Path) -> Result<Vec<PathBuf>> {
2257
        // info!("Reading rule file: {}", p.to_str().unwrap());
2258
38.6k
        let rule_file_contents = read_to_string_shim(path).with_context(|| 
format!0
("cannot read file '{}'",
path0
.
to_str0
().
unwrap0
()))
?0
;
2259
38.6k
        let rules_build_fn = |pattern: &Yaml| {
2260
38.6k
            self.build_speech_patterns(pattern, path)
2261
38.6k
                .with_context(||
format!0
("in file {:?}",
path0
.
to_str0
().
unwrap0
()))
2262
38.6k
        };
2263
38.6k
        return compile_rule(&rule_file_contents, rules_build_fn)
2264
38.6k
                .with_context(||
format!0
("in file {:?}",
path0
.
to_str0
().
unwrap0
()));
2265
38.6k
    }
2266
2267
38.6k
    fn build_speech_patterns(&mut self, patterns: &Yaml, file_name: &Path) -> Result<Vec<PathBuf>> {
2268
        // Rule::SpeechPatternList
2269
38.6k
        let patterns_vec = patterns.as_vec();
2270
38.6k
        if patterns_vec.is_none() {
2271
0
            bail!(yaml_type_err(patterns, "array"));
2272
38.6k
        }
2273
38.6k
        let patterns_vec = patterns.as_vec().unwrap();
2274
38.6k
        let mut files_read = vec![file_name.to_path_buf()];
2275
894k
        for entry in 
patterns_vec.iter()38.6k
{
2276
894k
            if let Some(
mut added_files30.2k
) = SpeechPattern::build(entry, file_name, self)
?0
{
2277
30.2k
                files_read.append(&mut added_files);
2278
864k
            }
2279
        }
2280
38.6k
        return Ok(files_read)
2281
38.6k
    }
2282
    
2283
5.97k
    fn read_unicode(&self, path: Option<PathBuf>, use_short: bool) -> Result<Vec<PathBuf>> {
2284
5.97k
        let path = match path {
2285
3
            Some(p) => p,
2286
            None => {
2287
                // get the path to either the short or long unicode file
2288
5.97k
                let pref_manager = self.pref_manager.borrow();
2289
5.97k
                let unicode_files = if self.name == RulesFor::Braille {
2290
1.57k
                    pref_manager.get_braille_unicode_file()
2291
                } else {
2292
4.40k
                    pref_manager.get_speech_unicode_file()
2293
                };
2294
5.97k
                let unicode_files = if use_short {
unicode_files.05.50k
} else {
unicode_files.1468
};
2295
5.97k
                unicode_files.to_path_buf()
2296
            }
2297
        };
2298
2299
        // FIX: should read first (lang), then supplement with second (region)
2300
        // info!("Reading unicode file {}", path.to_str().unwrap());
2301
5.97k
        let unicode_file_contents = read_to_string_shim(&path)
?0
;
2302
5.97k
        let unicode_build_fn = |unicode_def_list: &Yaml| {
2303
5.97k
            let unicode_defs = unicode_def_list.as_vec();
2304
5.97k
            if unicode_defs.is_none() {
2305
0
                bail!("File '{}' does not begin with an array", yaml_to_type(unicode_def_list));
2306
5.97k
            };
2307
5.97k
            let mut files_read = vec![path.to_path_buf()];
2308
2.24M
            for unicode_def in 
unicode_defs5.97k
.
unwrap5.97k
() {
2309
2.24M
                if let Some(
mut added_files3
) = UnicodeDef::build(unicode_def, &path, self, use_short)
2310
2.24M
                                                                .with_context(|| 
{format!0
("In file {:?}",
path.to_str()0
)
}0
)
?0
{
2311
3
                    files_read.append(&mut added_files);
2312
2.24M
                }
2313
            };
2314
5.97k
            return Ok(files_read)
2315
5.97k
        };
2316
2317
5.97k
        return compile_rule(&unicode_file_contents, unicode_build_fn)
2318
5.97k
                    .with_context(||
format!0
("in file {:?}",
path.to_str()0
.
unwrap0
()));
2319
5.97k
    }
2320
2321
0
    pub fn print_sizes() -> String {
2322
        // let _ = &SPEECH_RULES.with_borrow(|rules| {
2323
        //     debug!("SPEECH RULES entries\n");
2324
        //     let rules = &rules.rules;
2325
        //     for (key, _) in rules.iter() {
2326
        //         debug!("key: {}", key);
2327
        //     }
2328
        // });
2329
0
        let mut answer = rule_size(&SPEECH_RULES, "SPEECH_RULES");
2330
0
        answer += &rule_size(&INTENT_RULES, "INTENT_RULES");
2331
0
        answer += &rule_size(&BRAILLE_RULES, "BRAILLE_RULES");
2332
0
        answer += &rule_size(&NAVIGATION_RULES, "NAVIGATION_RULES");
2333
0
        answer += &rule_size(&OVERVIEW_RULES, "OVERVIEW_RULES");
2334
0
        SPEECH_RULES.with_borrow(|rule| {
2335
0
            answer += &format!("Speech Unicode tables: short={}/{}, long={}/{}\n",
2336
0
                                rule.unicode_short.borrow().len(), rule.unicode_short.borrow().capacity(),
2337
0
                                rule.unicode_full.borrow().len(), rule.unicode_full.borrow().capacity());
2338
0
        });
2339
0
        BRAILLE_RULES.with_borrow(|rule| {
2340
0
            answer += &format!("Braille Unicode tables: short={}/{}, long={}/{}\n",
2341
0
                                rule.unicode_short.borrow().len(), rule.unicode_short.borrow().capacity(),
2342
0
                                rule.unicode_full.borrow().len(), rule.unicode_full.borrow().capacity());
2343
0
        });
2344
0
        return answer;
2345
2346
0
        fn rule_size(rules: &'static std::thread::LocalKey<RefCell<SpeechRules>>, name: &str) -> String {
2347
0
            rules.with_borrow(|rule| {
2348
0
                let hash_map = &rule.rules;
2349
0
                return format!("{}: {}/{}\n", name, hash_map.len(), hash_map.capacity());
2350
0
            })
2351
0
        }
2352
0
    }
2353
}
2354
2355
2356
/// We track three different lifetimes:
2357
///   'c -- the lifetime of the context and mathml
2358
///   's -- the lifetime of the speech rules (which is static)
2359
///   'r -- the lifetime of the reference (this seems to be key to keep the rust memory checker happy)
2360
impl<'c, 's:'c, 'r, 'm:'c> SpeechRulesWithContext<'c, 's,'m> {
2361
22.7k
    pub fn new(speech_rules: &'s SpeechRules, doc: Document<'m>, nav_node_id: &'m str, nav_node_offset: usize) -> SpeechRulesWithContext<'c, 's, 'm> {
2362
22.7k
        return SpeechRulesWithContext {
2363
22.7k
            speech_rules,
2364
22.7k
            context_stack: ContextStack::new(&speech_rules.pref_manager.borrow()),
2365
22.7k
            doc,
2366
22.7k
            nav_node_id,
2367
22.7k
            nav_node_offset,
2368
22.7k
            inside_spell: false,
2369
22.7k
            translate_count: 0,
2370
22.7k
        }
2371
22.7k
    }
2372
2373
1.84k
    pub fn get_rules(&mut self) -> &SpeechRules {
2374
1.84k
        return self.speech_rules;
2375
1.84k
    }
2376
2377
45.5k
    pub fn get_context(&mut self) -> &mut sxd_xpath::Context<'c> {
2378
45.5k
        return &mut self.context_stack.base;
2379
45.5k
    }
2380
2381
3.23k
    pub fn get_document(&mut self) -> Document<'m> {
2382
3.23k
        return self.doc;
2383
3.23k
    }
2384
2385
1.13k
    pub fn set_nav_node_offset(&mut self, offset: usize) {
2386
        // debug!("Setting nav node offset to {}", offset);
2387
1.13k
        self.nav_node_offset = offset;
2388
1.13k
    }
2389
2390
121k
    pub fn match_pattern<T:TreeOrString<'c, 'm, T>>(&'r mut self, mathml: Element<'c>) -> Result<T> {
2391
        // debug!("Looking for a match for: \n{}", mml_to_string(mathml));
2392
121k
        let tag_name = mathml.name().local_part();
2393
121k
        let rules = &self.speech_rules.rules;
2394
2395
        // start with priority rules that apply to any node (should be a very small number)
2396
121k
        if let Some(
rule_vector95.8k
) = rules.get("!*") &&
2397
95.8k
           let Some(
result3.18k
) = self.find_match(rule_vector, mathml)
?9
{
2398
3.18k
                return Ok(result);      // found a match
2399
118k
            }
2400
        
2401
118k
        if let Some(
rule_vector116k
) = rules.get(tag_name) &&
2402
116k
           let Some(
result82.1k
) = self.find_match(rule_vector, mathml)
?0
{
2403
82.1k
                return Ok(result);      // found a match
2404
35.9k
            }
2405
2406
        // no rules for specific element, fall back to rules for "*" which *should* be present in all rule files as fallback
2407
35.9k
        if let Some(rule_vector) = rules.get("*") &&
2408
35.9k
           let Some(result) = self.find_match(rule_vector, mathml)
?0
{
2409
35.9k
                return Ok(result);      // found a match
2410
0
            }
2411
2412
        // no rules matched -- poorly written rule file -- let flow through to default error
2413
        // report error message with file name
2414
0
        let speech_manager = self.speech_rules.pref_manager.borrow();
2415
0
        let file_name = speech_manager.get_rule_file(&self.speech_rules.name);
2416
        // FIX: handle error appropriately 
2417
0
        bail!("\nNo match found!\nMissing patterns in {} for MathML.\n{}", file_name.to_string_lossy(), mml_to_string(mathml));
2418
121k
    }
2419
2420
248k
    fn find_match<T:TreeOrString<'c, 'm, T>>(&'r mut self, rule_vector: &[Box<SpeechPattern>], mathml: Element<'c>) -> Result<Option<T>> {
2421
870k
        for pattern in 
rule_vector248k
{
2422
            // debug!("Pattern name: {}", pattern.pattern_name);
2423
            // always pushing and popping around the is_match would be a little cleaner, but push/pop is relatively expensive,
2424
            //   so we optimize and only push first if the variables are needed to do the match
2425
870k
            if pattern.match_uses_var_defs {
2426
7.05k
                self.context_stack.push(pattern.var_defs.clone(), mathml)
?0
;
2427
863k
            }
2428
870k
            if pattern.is_match(&self.context_stack.base, mathml)
2429
870k
                    .with_context(|| 
error_string0
(
pattern0
,
mathml0
) )
?0
{
2430
                // debug!("  find_match: FOUND!!!");
2431
121k
                if !pattern.match_uses_var_defs && 
pattern.var_defs.len() > 0119k
{ // don't push them on twice
2432
13.0k
                    self.context_stack.push(pattern.var_defs.clone(), mathml)
?0
;
2433
108k
                }
2434
121k
                let result = if self.nav_node_offset > 0 &&
2435
47
                            self.nav_node_id == mathml.attribute_value("id").unwrap_or_default() && 
is_leaf7
(
mathml7
) {
2436
7
                    let ch = crate::canonicalize::as_text(mathml).chars().nth(self.nav_node_offset-1).unwrap_or_default();
2437
7
                    let ch = self.replace_single_char(ch, mathml)
?0
;
2438
                    // debug!("find_match: ch={} from '{}'; matched pattern name/tag: {}/{} with nav_node_offset={}",
2439
                    //     ch, crate::canonicalize::as_text(mathml),
2440
                    //     pattern.pattern_name, pattern.tag_name, self.nav_node_offset);
2441
7
                    T::from_string(ch.to_string(), self.doc)
2442
                } else {
2443
121k
                    pattern.replacements.replace(self, mathml)
2444
                };
2445
121k
                if pattern.var_defs.len() > 0 {
2446
14.5k
                    self.context_stack.pop();
2447
106k
                }
2448
121k
                return match result {
2449
121k
                    Ok(s) => {
2450
                        // for all except braille and navigation, nav_node_id will be an empty string and will not match
2451
121k
                        if self.nav_node_id.is_empty() {
2452
102k
                            Ok( Some(s) )
2453
                        } else {
2454
18.5k
                            if self.nav_node_id == mathml.attribute_value("id").unwrap_or_default() {
debug!990
("Matched pattern name/tag: {}/{}", pattern.pattern_name, pattern.tag_name)
}17.5k
;
2455
18.5k
                            Ok ( Some(self.nav_node_adjust(s, mathml)) )
2456
                        }
2457
                    },
2458
9
                    Err(e) => Err( e.context(
2459
9
                        format!(
2460
9
                            "attempting replacement pattern: \"{}\" for \"{}\".\n\
2461
9
                            Replacement\n{}\n...due to matching the MathML\n{} with the pattern\n\
2462
9
                            {}\n\
2463
9
                            The patterns are in {}.\n",
2464
9
                            pattern.pattern_name, pattern.tag_name,
2465
9
                            pattern.replacements.pretty_print_replacements(),
2466
9
                            mml_to_string(mathml), pattern.pattern,
2467
9
                            pattern.file_name
2468
9
                        )
2469
9
                    ))
2470
                }
2471
749k
            } else if pattern.match_uses_var_defs {
2472
5.60k
                self.context_stack.pop();
2473
743k
            }
2474
        };
2475
127k
        return Ok(None);    // no matches
2476
2477
0
        fn error_string(pattern: &SpeechPattern, mathml: Element) -> String {
2478
0
            return format!(
2479
                "error during pattern match using: \"{}\" for \"{}\".\n\
2480
                Pattern is \n{}\nMathML for the match:\n\
2481
                {}\
2482
                The patterns are in {}.\n",
2483
                pattern.pattern_name, pattern.tag_name,
2484
                pattern.pattern,
2485
0
                mml_to_string(mathml),
2486
                pattern.file_name
2487
            );
2488
0
        }
2489
2490
248k
    }
2491
2492
18.5k
    fn nav_node_adjust<T:TreeOrString<'c, 'm, T>>(&self, speech: T, mathml: Element<'c>) -> T {
2493
18.5k
      if let Some(id) = mathml.attribute_value("id") &&
2494
18.5k
         self.nav_node_id == id {
2495
990
        let offset = mathml.attribute_value(crate::navigate::ID_OFFSET).unwrap_or("0");
2496
990
        debug!("nav_node_adjust: id/name='{}/{}' offset?='{}'", id, 
name0
(
mathml0
),
2497
0
               self.nav_node_offset.to_string().as_str() == offset
2498
        );
2499
990
        if is_leaf(mathml) || 
self.nav_node_offset.to_string().as_str() == offset527
{
2500
990
          if self.speech_rules.name == RulesFor::Braille {
2501
469
            let highlight_style =  self.speech_rules.pref_manager.borrow().pref_to_string("BrailleNavHighlight");
2502
469
            return T::highlight_braille(speech, highlight_style);
2503
          } else {
2504
521
            debug!("nav_node_adjust: id='{}' offset='{}/{}'", id, self.nav_node_offset, offset);
2505
521
            return T::mark_nav_speech(speech)
2506
          }
2507
0
        }
2508
17.5k
      }
2509
17.5k
      return speech;
2510
18.5k
    }
2511
    
2512
469
    fn highlight_braille_string(braille: String, highlight_style: String) -> String {
2513
        // add dots 7 & 8 to the Unicode braille (28xx)
2514
469
        if &highlight_style == "Off" || braille.is_empty() {
2515
6
            return braille;
2516
463
        }
2517
        
2518
        // FIX: this seems needlessly complex. It is much simpler if the char can be changed in place...
2519
        // find first char that can get the dots and add them
2520
463
        let mut chars = braille.chars().collect::<Vec<char>>();
2521
2522
        // the 'b' for baseline indicator is really part of the previous token, so it needs to be highlighted but isn't because it is not Unicode braille
2523
463
        let baseline_indicator_hack = PreferenceManager::get().borrow().pref_to_string("BrailleCode") == "Nemeth";
2524
        // debug!("highlight_braille_string: highlight_style={}\n braille={}", highlight_style, braille);
2525
463
        let mut i_first_modified = 0;
2526
760
        for (i, ch) in 
chars.iter_mut()463
.
enumerate463
() {
2527
760
            let modified_ch = add_dots_to_braille_char(*ch, baseline_indicator_hack);
2528
760
            if *ch != modified_ch {
2529
463
                *ch = modified_ch; 
2530
463
                i_first_modified = i;
2531
463
                break;
2532
297
            };
2533
        };
2534
2535
463
        let mut i_last_modified = i_first_modified;
2536
463
        if &highlight_style != "FirstChar" {
2537
            // find last char so that we know when to modify the char
2538
491
            for i in (
i_first_modified463
..chars.len()).
rev463
(){
2539
491
                let ch = chars[i];
2540
491
                let modified_ch = add_dots_to_braille_char(ch, baseline_indicator_hack);
2541
491
                chars[i] = modified_ch;
2542
491
                if ch !=  modified_ch {
2543
390
                    i_last_modified = i;
2544
390
                    break;
2545
101
                }
2546
            }
2547
0
        }
2548
2549
463
        if &highlight_style == "All" {
2550
            // finish going through the string
2551
      #[allow(clippy::needless_range_loop)]  // I don't like enumerate/take/skip here
2552
4
            for 
i0
in i_first_modified+1..i_last_modified {
2553
0
                chars[i] = add_dots_to_braille_char(chars[i], baseline_indicator_hack);
2554
0
            };
2555
459
        }
2556
2557
463
        let result = chars.into_iter().collect::<String>(); 
2558
        // debug!("    result={}", result);
2559
463
        return result;
2560
2561
1.25k
        fn add_dots_to_braille_char(ch: char, baseline_indicator_hack: bool) -> char {
2562
1.25k
            let as_u32 = ch as u32;
2563
1.25k
            if (0x2800..0x28FF).contains(&as_u32) {
2564
919
                return unsafe {char::from_u32_unchecked(as_u32 | 0xC0)};  // safe because we have checked the range
2565
332
            } else if baseline_indicator_hack && 
ch == 'b'89
{
2566
7
                return '𝑏'
2567
            } else {
2568
325
                return ch;
2569
            }
2570
1.25k
        }
2571
469
    }
2572
2573
521
    fn mark_nav_speech(speech: String) -> String {
2574
        // add unique markers (since speech is mostly ascii letters and digits, most any symbol will do)
2575
        // it's a bug (but happened during intent generation), we might have identical id's, choose innermost one
2576
521
        debug!("mark_nav_speech: adding [[ {} ]] ", 
&speech0
);
2577
521
        if !speech.contains("[[") {
2578
521
            return "[[".to_string() + &speech + "]]";
2579
        } else {
2580
0
            return speech
2581
        }
2582
521
    }
2583
2584
456k
    fn replace<T:TreeOrString<'c, 'm, T>>(&'r mut self, replacement: &Replacement, mathml: Element<'c>) -> Result<T> {
2585
        return Ok(
2586
456k
            match replacement {
2587
63.6k
                Replacement::Text(t) => T::from_string(t.clone(), self.doc)
?0
,
2588
151k
                Replacement::XPath(xpath) => xpath.replace(self, mathml)
?9
,
2589
60.7k
                Replacement::TTS(tts) => {
2590
60.7k
                    T::from_string(
2591
60.7k
                        self.speech_rules.pref_manager.borrow().get_tts().replace(tts, &self.speech_rules.pref_manager.borrow(), self, mathml)
?0
,
2592
60.7k
                        self.doc
2593
0
                    )?
2594
                },
2595
45.5k
                Replacement::Intent(intent) => {
2596
45.5k
                    intent.replace(self, mathml)
?0
2597
                },
2598
115k
                Replacement::Test(test) => {
2599
115k
                    test.replace(self, mathml)
?0
2600
                },
2601
7.28k
                Replacement::With(with) => {
2602
7.28k
                    with.replace(self, mathml)
?0
2603
                },
2604
3.78k
                Replacement::SetVariables(vars) => {
2605
3.78k
                    vars.replace(self, mathml)
?0
2606
                },
2607
7.45k
                Replacement::Insert(ic) => {
2608
7.45k
                    ic.replace(self, mathml)
?0
2609
                },
2610
2
                Replacement::Translate(id) => {
2611
2
                    id.replace(self, mathml)
?0
2612
                },
2613
            }
2614
        )
2615
456k
    }
2616
2617
    /// Iterate over all the nodes, concatenating the result strings together with a ' ' between them
2618
    /// If the node is an element, pattern match it
2619
    /// For 'Text' and 'Attribute' nodes, convert them to strings
2620
121k
    fn replace_nodes<T:TreeOrString<'c, 'm, T>>(&'r mut self, nodes: Vec<Node<'c>>, mathml: Element<'c>) -> Result<T> {
2621
121k
        return T::replace_nodes(self, nodes, mathml);
2622
121k
    }
2623
2624
    /// Iterate over all the nodes finding matches for the elements
2625
    /// For this case of returning MathML, everything else is an error
2626
48.6k
    fn replace_nodes_tree(&'r mut self, nodes: Vec<Node<'c>>, _mathml: Element<'c>) -> Result<Element<'m>> {
2627
48.6k
        let mut children = Vec::with_capacity(3*nodes.len());   // guess (2 chars/node + space)
2628
69.6k
        for node in 
nodes48.6k
{
2629
69.6k
            let matched = match node {
2630
41.9k
                Node::Element(n) => self.match_pattern::<Element<'m>>(n)
?0
,
2631
27.5k
                Node::Text(t) =>  {
2632
27.5k
                    let leaf = create_mathml_element(&self.doc, "TEMP_NAME");
2633
27.5k
                    leaf.set_text(t.text());
2634
27.5k
                    leaf
2635
                },
2636
32
                Node::Attribute(attr) => {
2637
                    // debug!("  from attr with text '{}'", attr.value());
2638
32
                    let leaf = create_mathml_element(&self.doc, "TEMP_NAME");
2639
32
                    leaf.set_text(attr.value());
2640
32
                    leaf
2641
                },
2642
                _ => {
2643
0
                    bail!("replace_nodes: found unexpected node type!!!");
2644
                },
2645
            };
2646
69.6k
            children.push(matched);
2647
        }
2648
2649
48.6k
        let result = create_mathml_element(&self.doc, "TEMP_NAME");    // FIX: what name should be used?
2650
48.6k
        result.append_children(children);
2651
        // debug!("replace_nodes_tree\n{}\n====>>>>>\n", mml_to_string(result));
2652
48.6k
        return Ok( result );
2653
48.6k
    }
2654
2655
72.9k
    fn replace_nodes_string(&'r mut self, nodes: Vec<Node<'c>>, mathml: Element<'c>) -> Result<String> {
2656
        // debug!("replace_nodes: working on {} nodes", nodes.len());
2657
72.9k
        let mut result = String::with_capacity(3*nodes.len());   // guess (2 chars/node + space)
2658
72.9k
        let mut first_time = true;
2659
85.7k
        for node in 
nodes72.9k
{
2660
85.7k
            if first_time {
2661
72.9k
                first_time = false;
2662
72.9k
            } else {
2663
12.8k
                result.push(' ');
2664
12.8k
            };
2665
85.7k
            let matched = match node {
2666
66.5k
                Node::Element(n) => self.match_pattern::<String>(n)
?0
,
2667
19.2k
                Node::Text(t) =>  self.replace_chars(t.text(), mathml)
?0
,
2668
14
                Node::Attribute(attr) => self.replace_chars(attr.value(), mathml)
?0
,
2669
0
                _ => bail!("replace_nodes: found unexpected node type!!!"),
2670
            };
2671
85.7k
            result += &matched;
2672
        }
2673
72.9k
        return Ok( result );
2674
72.9k
    }
2675
2676
    /// Lookup unicode "pronunciation" of char.
2677
    /// Note: TTS is not supported here (not needed and a little less efficient)
2678
58.0k
    pub fn replace_chars(&'r mut self, str: &str, mathml: Element<'c>) -> Result<String> {
2679
58.0k
        let chars = str.chars().collect::<Vec<char>>();
2680
58.0k
        let rules = self.speech_rules;
2681
        // handled in match_pattern -- temporarily leaving as comments in case something is missed and needed here
2682
        // if self.nav_node_offset > 0 && chars.len() > 1 {
2683
        //     if self.nav_node_offset > chars.len() {
2684
        //         debug!("replace_chars: nav_node_offset {} is larger than string length {}", self.nav_node_offset, chars.len());
2685
        //         self.nav_node_offset = chars.len();
2686
        //     }
2687
        //     let ch = chars[self.nav_node_offset-1];
2688
        //     debug!("replace_chars: adjusted string to '{}' based on nav_node_offset {}", ch, self.nav_node_offset);
2689
        //     if rules.translate_single_chars_only {
2690
        //         return self.replace_single_char(ch, mathml);
2691
        //     } else {
2692
        //         return Ok( ch.to_string() );
2693
        //     }
2694
        // }
2695
58.0k
        if is_quoted_string(str) {  // quoted string -- already translated (set in get_braille_chars)
2696
12.5k
            return Ok(unquote_string(str).to_string());
2697
45.5k
        }
2698
        // in a string, avoid "a" -> "eigh", "." -> "point", etc
2699
45.5k
        if rules.translate_single_chars_only {
2700
30.0k
            if chars.len() == 1 {
2701
27.3k
                return self.replace_single_char(chars[0], mathml)
2702
            } else {
2703
                // more than one char -- fix up non-breaking space
2704
2.69k
                return Ok(str.replace('\u{00A0}', " ").replace(['\u{2061}', '\u{2062}', '\u{2063}', '\u{2064}'], ""))
2705
            }
2706
15.5k
        };
2707
2708
15.5k
        let result = chars.iter()
2709
18.2k
            .
map15.5k
(|&ch| self.replace_single_char(ch, mathml))
2710
15.5k
            .collect::<Result<Vec<String>>>()
?0
2711
15.5k
            .join("");
2712
15.5k
        return Ok( result );
2713
58.0k
    }
2714
2715
45.6k
    fn replace_single_char(&'r mut self, ch: char, mathml: Element<'c>) -> Result<String> {
2716
45.6k
        let ch_as_u32 = ch as u32;
2717
45.6k
        let rules =  self.speech_rules;
2718
45.6k
        let mut unicode = rules.unicode_short.borrow();
2719
45.6k
        let mut replacements = unicode.get( &ch_as_u32 );
2720
        // debug!("replace_single_char: looking for unicode {} for char '{}'/{:#06x}, found: {:?}", rules.name, ch, ch_as_u32, replacements);
2721
45.6k
        if replacements.is_none() {
2722
            // see if it in the full unicode table (if it isn't loaded already)
2723
1.64k
            let pref_manager = rules.pref_manager.borrow();
2724
1.64k
            let unicode_pref_files = if rules.name == RulesFor::Braille {
pref_manager.get_braille_unicode_file()525
} else {
pref_manager.get_speech_unicode_file()1.12k
};
2725
1.64k
            let should_ignore_file_time = pref_manager.pref_to_string("CheckRuleFiles") == "All";
2726
1.64k
            if rules.unicode_full.borrow().is_empty() || 
!1.18k
rules.unicode_full_files.borrow()1.18k
.
is_file_up_to_date1.18k
(unicode_pref_files.1, should_ignore_file_time) {
2727
468
                info!("*** Loading full unicode {} for char '{}'/{:#06x}", rules.name, ch, ch_as_u32);
2728
468
                rules.unicode_full.borrow_mut().clear();
2729
468
                rules.unicode_full_files.borrow_mut().set_files_and_times(rules.read_unicode(None, false)
?0
);
2730
468
                info!("# Unicode defs = {}/{}", 
rules.unicode_short.borrow().len()0
,
rules.unicode_full.borrow().len()0
);
2731
1.18k
            }
2732
1.64k
            unicode = rules.unicode_full.borrow();
2733
1.64k
            replacements = unicode.get( &ch_as_u32 );
2734
1.64k
            if replacements.is_none() {
2735
269
              self.translate_count = 0;     // not in loop
2736
              // debug!("*** Did not find unicode {} for char '{}'/{:#06x}", rules.name, ch, ch_as_u32);
2737
269
              if rules.translate_single_chars_only || 
ch247
.
is_ascii247
() { // speech or if braille, avoid loop (ASCII remains ASCII if not found)
2738
269
                return Ok(String::from(ch));   // no replacement, so just return the char and hope for the best
2739
              } else { // braille -- must turn into braille dots
2740
                // Emulate what NVDA does: generate (including single quotes) '\xhhhh' or '\yhhhhhh'
2741
0
                let ch_as_int = ch as u32;
2742
0
                let prefix_indicator = if ch_as_int < 1<<16 {'x'} else {'y'};
2743
0
                return self.replace_chars( &format!("'\\{prefix_indicator}{:06x}'", ch_as_int), mathml);
2744
              }
2745
1.37k
            }
2746
43.9k
        };
2747
2748
        // map across all the parts of the replacement, collect them up into a Vec, and then concat them together
2749
45.3k
        let result = replacements.unwrap()
2750
45.3k
                    .iter()
2751
45.3k
                    .map(|replacement|
2752
49.1k
                         self.replace(replacement, mathml)
2753
49.1k
                                .with_context(|| 
format!0
("Unicode replacement error: {replacement}")) )
2754
45.3k
                    .collect::<Result<Vec<String>>>()
?0
2755
45.3k
                    .join(" ");
2756
45.3k
         self.translate_count = 0;     // found a replacement, so not in a loop
2757
45.3k
        return Ok(result);
2758
45.6k
    }
2759
}
2760
2761
/// Hack to allow replacement of `str` with braille chars.
2762
12.5k
pub fn braille_replace_chars(str: &str, mathml: Element) -> Result<String> {
2763
12.5k
    return BRAILLE_RULES.with(|rules| {
2764
12.5k
        let rules = rules.borrow();
2765
12.5k
        let new_package = Package::new();
2766
12.5k
        let mut rules_with_context = SpeechRulesWithContext::new(&rules, new_package.as_document(), "", 0);
2767
12.5k
        return rules_with_context.replace_chars(str, mathml);
2768
12.5k
    })
2769
12.5k
}
2770
2771
2772
2773
#[cfg(test)]
2774
mod tests {
2775
    #[allow(unused_imports)]
2776
    use crate::init_logger;
2777
2778
    use super::*;
2779
2780
    #[test]
2781
1
    fn test_read_statement() {
2782
1
        let str = r#"---
2783
1
        {name: default, tag: math, match: ".", replace: [x: "./*"] }"#;
2784
1
        let doc = YamlLoader::load_from_str(str).unwrap();
2785
1
        assert_eq!(doc.len(), 1);
2786
1
        let mut rules = SpeechRules::new(RulesFor::Speech, true);
2787
2788
1
        SpeechPattern::build(&doc[0], Path::new("testing"), &mut rules).unwrap();
2789
1
        assert_eq!(rules.rules["math"].len(), 1, "\nshould only be one rule");
2790
2791
1
        let speech_pattern = &rules.rules["math"][0];
2792
1
        assert_eq!(speech_pattern.pattern_name, "default", "\npattern name failure");
2793
1
        assert_eq!(speech_pattern.tag_name, "math", "\ntag name failure");
2794
1
        assert_eq!(speech_pattern.pattern.rc.string, ".", "\npattern failure");
2795
1
        assert_eq!(speech_pattern.replacements.replacements.len(), 1, "\nreplacement failure");
2796
1
        assert_eq!(speech_pattern.replacements.replacements[0].to_string(), r#""./*""#, "\nreplacement failure");
2797
1
    }
2798
2799
    #[test]
2800
1
    fn test_read_statements_with_replace() {
2801
1
        let str = r#"---
2802
1
        {name: default, tag: math, match: ".", replace: [x: "./*"] }"#;
2803
1
        let doc = YamlLoader::load_from_str(str).unwrap();
2804
1
        assert_eq!(doc.len(), 1);
2805
1
        let mut rules = SpeechRules::new(RulesFor::Speech, true);
2806
1
        SpeechPattern::build(&doc[0], Path::new("testing"), &mut rules).unwrap();
2807
2808
1
        let str = r#"---
2809
1
        {name: default, tag: math, match: ".", replace: [t: "test", x: "./*"] }"#;
2810
1
        let doc2 = YamlLoader::load_from_str(str).unwrap();
2811
1
        assert_eq!(doc2.len(), 1);
2812
1
        SpeechPattern::build(&doc2[0], Path::new("testing"), &mut rules).unwrap();
2813
1
        assert_eq!(rules.rules["math"].len(), 1, "\nfirst rule not replaced");
2814
2815
1
        let speech_pattern = &rules.rules["math"][0];
2816
1
        assert_eq!(speech_pattern.pattern_name, "default", "\npattern name failure");
2817
1
        assert_eq!(speech_pattern.tag_name, "math", "\ntag name failure");
2818
1
        assert_eq!(speech_pattern.pattern.rc.string, ".", "\npattern failure");
2819
1
        assert_eq!(speech_pattern.replacements.replacements.len(), 2, "\nreplacement failure");
2820
1
    }
2821
2822
    #[test]
2823
1
    fn test_read_statements_with_add() {
2824
1
        let str = r#"---
2825
1
        {name: default, tag: math, match: ".", replace: [x: "./*"] }"#;
2826
1
        let doc = YamlLoader::load_from_str(str).unwrap();
2827
1
        assert_eq!(doc.len(), 1);
2828
1
        let mut rules = SpeechRules::new(RulesFor::Speech, true);
2829
1
        SpeechPattern::build(&doc[0], Path::new("testing"), &mut rules).unwrap();
2830
2831
1
        let str = r#"---
2832
1
        {name: another-rule, tag: math, match: ".", replace: [t: "test", x: "./*"] }"#;
2833
1
        let doc2 = YamlLoader::load_from_str(str).unwrap();
2834
1
        assert_eq!(doc2.len(), 1);
2835
1
        SpeechPattern::build(&doc2[0], Path::new("testing"), &mut rules).unwrap();
2836
1
        assert_eq!(rules.rules["math"].len(), 2, "\nsecond rule not added");
2837
2838
1
        let speech_pattern = &rules.rules["math"][0];
2839
1
        assert_eq!(speech_pattern.pattern_name, "default", "\npattern name failure");
2840
1
        assert_eq!(speech_pattern.tag_name, "math", "\ntag name failure");
2841
1
        assert_eq!(speech_pattern.pattern.rc.string, ".", "\npattern failure");
2842
1
        assert_eq!(speech_pattern.replacements.replacements.len(), 1, "\nreplacement failure");
2843
1
    }
2844
2845
    #[test]
2846
1
    fn test_debug_no_debug() {
2847
1
        let str = r#"*[2]/*[3][text()='3']"#;
2848
1
        let result = MyXPath::add_debug_string_arg(str);
2849
1
        assert!(result.is_ok());
2850
1
        assert_eq!(result.unwrap(), str);
2851
1
    }
2852
2853
    #[test]
2854
1
    fn test_debug_no_debug_with_quote() {
2855
1
        let str = r#"*[2]/*[3][text()='(']"#;
2856
1
        let result = MyXPath::add_debug_string_arg(str);
2857
1
        assert!(result.is_ok());
2858
1
        assert_eq!(result.unwrap(), str);
2859
1
    }
2860
2861
    #[test]
2862
1
    fn test_debug_no_quoted_paren() {
2863
1
        let str = r#"DEBUG(*[2]/*[3][text()='3'])"#;
2864
1
        let result = MyXPath::add_debug_string_arg(str);
2865
1
        assert!(result.is_ok());
2866
1
        assert_eq!(result.unwrap(), r#"DEBUG(*[2]/*[3][text()='3'], "*[2]/*[3][text()='3']")"#);
2867
1
    }
2868
2869
    #[test]
2870
1
    fn test_debug_quoted_paren() {
2871
1
        let str = r#"DEBUG(*[2]/*[3][text()='('])"#;
2872
1
        let result = MyXPath::add_debug_string_arg(str);
2873
1
        assert!(result.is_ok());
2874
1
        assert_eq!(result.unwrap(), r#"DEBUG(*[2]/*[3][text()='('], "*[2]/*[3][text()='(']")"#);
2875
1
    }
2876
2877
    #[test]
2878
1
    fn test_debug_quoted_paren_before_paren() {
2879
1
        let str = r#"DEBUG(ClearSpeak_Matrix = 'Combinatorics') and IsBracketed(., '(', ')')"#;
2880
1
        let result = MyXPath::add_debug_string_arg(str);
2881
1
        assert!(result.is_ok());
2882
1
        assert_eq!(result.unwrap(), r#"DEBUG(ClearSpeak_Matrix = 'Combinatorics', "ClearSpeak_Matrix = 'Combinatorics'") and IsBracketed(., '(', ')')"#);
2883
1
    }
2884
2885
2886
// zipped files do NOT include "zz", hence we need to exclude this test
2887
cfg_if::cfg_if! {if #[cfg(not(feature = "include-zip"))] {  
2888
    #[test]
2889
1
    fn test_up_to_date() {
2890
        use crate::interface::*;
2891
        // initialize and move to a directory where making a time change doesn't really matter
2892
1
        set_rules_dir(super::super::abs_rules_dir_path()).unwrap();
2893
1
        set_preference("Language", "zz-aa").unwrap();
2894
        // not much is support in zz
2895
1
        if let Err(
e0
) = set_mathml("<math><mi>x</mi></math>") {
2896
0
            error!("{}", crate::errors_to_string(&e));
2897
0
            panic!("Should not be an error in setting MathML")
2898
1
        }
2899
2900
1
        set_preference("CheckRuleFiles", "All").unwrap();
2901
1
        assert!(!is_file_time_same(), "file's time did not get updated");
2902
1
        set_preference("CheckRuleFiles", "None").unwrap();
2903
1
        assert!(is_file_time_same(), "file's time was wrongly updated (preference 'CheckRuleFiles' should have prevented updating)");
2904
2905
        // change a file, cause read_files to be called, and return if MathCAT noticed the change and updated its time
2906
2
        fn is_file_time_same() -> bool {
2907
            // read and write a unicode file in a test dir
2908
            // files are read in due to setting the MathML
2909
2910
            use std::time::Duration;
2911
2
            return SPEECH_RULES.with(|rules| {
2912
2
                let start_main_file = rules.borrow().unicode_short_files.borrow().ft[0].clone();
2913
2914
                // open the file, read all the contents, then write them back so the time changes
2915
2
                let contents = std::fs::read(&start_main_file.file).expect(&format!("Failed to read file {} during test", &start_main_file.file.to_string_lossy()));
2916
2
                std::fs::write(start_main_file.file, contents).unwrap();
2917
2
                std::thread::sleep(Duration::from_millis(5));       // pause a little to make sure the time changes
2918
2919
                // speak should cause the file stored to have a new time
2920
2
                if let Err(
e0
) = get_spoken_text() {
2921
0
                    error!("{}", crate::errors_to_string(&e));
2922
0
                    panic!("Should not be an error in speech")
2923
2
                }
2924
2
                return rules.borrow().unicode_short_files.borrow().ft[0].time == start_main_file.time;
2925
2
            });
2926
2
        }    
2927
1
    }
2928
}}
2929
2930
    // #[test]
2931
    // fn test_nested_debug_quoted_paren() {
2932
    //     let str = r#"DEBUG(*[2]/*[3][DEBUG(text()='(')])"#;
2933
    //     let result = MyXPath::add_debug_string_arg(str);
2934
    //     assert!(result.is_ok());
2935
    //     assert_eq!(result.unwrap(), r#"DEBUG(*[2]/*[3][DEBUG(text()='(')], "DEBUG(*[2]/*[3][DEBUG(text()='(')], \"text()='(')]\")"#);
2936
    // }
2937
2938
}
\ No newline at end of file diff --git a/docs/llvm-cov/html/coverage/home/runner/work/MathCAT/MathCAT/src/tts.rs.html b/docs/llvm-cov/html/coverage/home/runner/work/MathCAT/MathCAT/src/tts.rs.html index 805aec71..b37af4f2 100644 --- a/docs/llvm-cov/html/coverage/home/runner/work/MathCAT/MathCAT/src/tts.rs.html +++ b/docs/llvm-cov/html/coverage/home/runner/work/MathCAT/MathCAT/src/tts.rs.html @@ -1 +1 @@ -

Coverage Report

Created: 2026-04-30 05:51

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/home/runner/work/MathCAT/MathCAT/src/tts.rs
Line
Count
Source
1
//! #Speech Engine Information
2
//!
3
//! ## Pitch (default 140hz)
4
//! ### SAPI4: Relative pitch
5
//! * Number is relative to the default/current pitch.
6
//! * 50 is 1/2 of the default/current pitch, 200 is 2 times the default/current pitch.
7
//!
8
//!  Note: no range is specified by the spec
9
//! ### SAPI5: Relative pitch
10
//! From https://documentation.help/SAPI-5/sapi.xsd
11
//! * A value of +10 sets a voice to speak at four-thirds (or 4/3) of its default pitch.
12
//! * Each increment between –10 and +10 is logarithmically distributed such that
13
//!   incrementing/decrementing by 1 is multiplying/dividing the pitch by the 24th root of 2 (about 1.03).
14
//! * Values more extreme than –10 and 10 will be passed to an engine but SAPI 5compliant engines may not support
15
//!   such extremes and instead may clip the pitch to the maximum or minimum pitch it supports.
16
//! * Values of –24 and +24 must lower and raise pitch by 1 octave respectively.
17
//!   All incrementing/decrementing by 1 must multiply/divide the pitch by the 24th root of 2.
18
//! 
19
//! Note: an octave is a doubling of frequency, so pitch change of 100% should turn into +/- 24
20
//! ### SSML: Relative pitch
21
//! * pitch in hertz (default/current man's voice is about 100hz, woman's 180hz)
22
//!
23
//! Note: other legal values for SSML are not supported, and all numbers are interpreted as relative changes
24
//! ### Eloquence: Absolute pitch (relative pitch not supported by Eloquence)
25
//! * Range is 0 - 100.  Guess is that 0 ~= 42hz, 100 ~= 422hz based on supported \"sapi\" values
26
//! ## Rate (default 180 words/min)
27
//! ### SAPI4: Absolute rate
28
//! * Number is relative to the default/current rate
29
//! * 50 is 1/2 of the default/current rate, 200 is 2 times the default/current rate
30
//!
31
//! Note: no range is specified by the spec
32
//! ### SAPI5: Relative rate
33
//! * Number is in range -10 to 10
34
//! * -10 is 1/3 of the default/current speed; 10 3 times the default/current speech
35
//! * changes are logarithmic -- a change of +/-1 corresponds to multiplying/dividing by 10th root of 3 (10*log_3(change))
36
//! ### SSML: Relative rate %
37
//! * 100% is no change, 50% is half the current rate, 200% is doubling the rate
38
//!
39
//!  Note:  other legal values for SSML are not supported, and all numbers are interpreted as relative changes
40
//! ### Eloquence: Absolute rate (relative rate not supported by Eloquence)
41
//! * Range is 0 - 250, which manual seems to indicate corresponds to 70 - 1297 words/min.
42
//! * * Window-Eyes only seems to give values in range 1 - 150.
43
//! * On the low end, 1 ~= 72words/min
44
//! * On the high end, I can't tell, but 80 seems to be a bit over twice normal (~400 words/min?)
45
//!   250 ~= 1297 words/min based on supported "sapi" values
46
//!
47
//! Note: this means words/min = 4.18 * Eloquence rate + 66
48
//! So the relative pause rate is 180/computed value
49
//!
50
//!
51
//! ## Volume (default 100 \[full])
52
//! ### SAPI4: Relative volume
53
//! * Number is relative to the default/current rate
54
//! * Range is 0 - 065535
55
//! ### SAPI5: Relative volume
56
//! * Number is in range 0 to 100
57
//! ### SSML: Relative volume
58
//! * Number is in range 0 to 100
59
//!
60
//! Note:  other legal values for SSML are not supported, and all numbers are interpreted as relative changes
61
//! ### Eloquence: Absolute volume (relative volume not supported by Eloquence)
62
//! * Range is 0 - 100
63
//!
64
//! ## Pause
65
//! * All systems -- pauses are given in milliseconds
66
//!
67
//! Note: Pauses on output are scaled based on the ratio of the current rate to the default rate (180 wpm)
68
#![allow(clippy::needless_return)]
69
70
use crate::{errors::*, prefs::PreferenceManager, speech::ReplacementArray};
71
use sxd_document::dom::Element;
72
use yaml_rust::Yaml;
73
74
use std::fmt;
75
use crate::speech::{SpeechRulesWithContext, MyXPath, TreeOrString};
76
use std::string::ToString;
77
use std::str::FromStr;
78
use strum_macros::{Display, EnumString};
79
use regex::Regex;
80
use std::sync::LazyLock;
81
use sxd_xpath::Value;
82
use html_escape::encode_safe;
83
84
const MIN_PAUSE:f64 = 50.0;         // ms -- avoids clutter of putting out pauses that probably can't be heard
85
const PAUSE_SHORT:f64 = 200.0;  // ms
86
const PAUSE_MEDIUM:f64 = 400.0; // ms
87
const PAUSE_LONG:f64 = 800.0;   // ms
88
const PAUSE_XLONG:f64 = 1600.0;   // ms
89
const PAUSE_AUTO:f64 = 987654321.5;   // ms -- hopefully unique
90
pub const PAUSE_AUTO_STR: &str = "\u{F8FA}\u{F8FA}";
91
const RATE_FROM_CONTEXT:f64 = 987654321.5;   // hopefully unique
92
93
const MAX_TRANSLATE_RECURSION: usize = 5;   // probably never more than three -- prevents infinite loop/stack overflows bugs
94
95
/// TTSCommand are the supported TTS commands
96
/// When parsing the YAML rule files, they are converted to these enums
97
#[derive(Debug, Clone, PartialEq, Eq, Display, EnumString)]
98
#[strum(serialize_all = "snake_case")]  // allows lower case
99
pub enum TTSCommand {
100
    Pause,
101
    Rate,
102
    Volume,
103
    Pitch,
104
    Audio,
105
    Gender,
106
    Voice,
107
    Spell,
108
    Bookmark,
109
    Pronounce,
110
}
111
112
#[derive(Debug, Clone)]
113
pub struct Pronounce {
114
    text: String,       // plain text
115
    ipa: String,        // ipa 
116
    sapi5: String,
117
    eloquence: String,
118
}
119
120
121
impl fmt::Display for Pronounce {
122
1
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
123
1
        let mut comma = "";     // comma separator so it looks right
124
1
        write!(f, "pronounce: [")
?0
;
125
1
        if !self.text.is_empty() {
126
1
            write!(f, "text: '{}'", self.text)
?0
;
127
1
            comma = ",";
128
0
        }
129
1
        write!(f, "pronounce: [")
?0
;
130
1
        if !self.ipa.is_empty() {
131
1
            write!(f, "{}ipa: '{}'", comma, self.ipa)
?0
;
132
1
            comma = ",";
133
0
        }
134
1
        write!(f, "pronounce: [")
?0
;
135
1
        if !self.sapi5.is_empty() {
136
1
            write!(f, "{}sapi5: '{}'", comma, self.sapi5)
?0
;
137
1
            comma = ",";
138
0
        }
139
1
        write!(f, "pronounce: [")
?0
;
140
1
        if !self.eloquence.is_empty() {
141
1
            write!(f, "{}eloquence: '{}'", comma, self.eloquence)
?0
;
142
0
        }
143
1
        return writeln!(f, "]");
144
1
    }
145
}
146
147
impl Pronounce {
148
5.02k
    fn build(values: &Yaml) -> Result<Pronounce> {
149
        use crate::speech::{as_str_checked, yaml_to_type};
150
        use crate::pretty_print::yaml_to_string;
151
152
5.02k
        let mut text = "";
153
5.02k
        let mut ipa = "";
154
5.02k
        let mut sapi5 = "";
155
5.02k
        let mut eloquence = "";
156
        // values should be an array with potential values for Pronounce
157
5.02k
        let values = values.as_vec().ok_or_else(||
158
0
                                        anyhow!("'pronounce' value '{}' is not an array", yaml_to_type(values)))?;
159
20.0k
        for key_value in 
values5.02k
{
160
20.0k
            let key_value_hash = key_value.as_hash().ok_or_else(||
161
0
                                        anyhow!("pronounce value '{}' is not key/value pair", yaml_to_string(key_value, 0)))?;
162
20.0k
            if key_value_hash.len() != 1 {
163
0
                bail!("pronounce value {:?} is not a single key/value pair", key_value_hash);
164
20.0k
            }
165
        
166
20.0k
            for (key, value) in key_value_hash {
167
20.0k
                match as_str_checked(key)
?0
{
168
20.0k
                    "text" => text = 
as_str_checked5.02k
(
value5.02k
)
?0
,
169
15.0k
                    "ipa" => ipa = 
as_str_checked5.02k
(
value5.02k
)
?0
,
170
10.0k
                    "sapi5" => sapi5 = 
as_str_checked5.02k
(
value5.02k
)
?0
,
171
5.02k
                    "eloquence" => eloquence = as_str_checked(value)
?0
,
172
0
                    _ => bail!("unknown pronounce type: {} with value {}", yaml_to_string(key, 0), yaml_to_string(value, 0)),
173
                }
174
            }
175
        }
176
5.02k
        if text.is_empty() {
177
1
            bail!("'text' key/value is required for 'pronounce' -- it is used is the speech engine is unknown.")
178
5.02k
        }
179
5.02k
        return Ok( Pronounce{
180
5.02k
            text: text.to_string(),
181
5.02k
            ipa: ipa.to_string(),
182
5.02k
            sapi5: sapi5.to_string(),
183
5.02k
            eloquence: eloquence.to_string()
184
5.02k
        } );
185
    
186
187
5.02k
    }
188
}
189
/// TTSCommands are either numbers (f64 because of YAML) or strings
190
#[derive(Debug, Clone)]
191
pub enum TTSCommandValue {
192
    Number(f64),
193
    String(String),
194
    XPath(MyXPath),
195
    Pronounce(Box<Pronounce>),
196
}
197
198
impl TTSCommandValue {
199
77.2k
    fn get_num(&self) -> f64 {
200
77.2k
        match self {
201
77.2k
            TTSCommandValue::Number(n) => return *n,
202
0
            _                               => panic!("Internal error: TTSCommandValue is not a number"),
203
        }
204
77.2k
    }
205
206
0
    fn get_string(&self) -> &String {
207
0
        match self {
208
0
            TTSCommandValue::String(s) => return s,
209
0
            _                                  => panic!("Internal error: TTSCommandValue is not a string"),
210
        }
211
0
    }
212
213
0
    fn get_pronounce(&self) -> &Pronounce {
214
0
        match self {
215
0
            TTSCommandValue::Pronounce(p) => return p,
216
0
            _                               => panic!("Internal error: TTSCommandValue is not a 'pronounce' command'"),
217
        }
218
        
219
0
    }
220
}
221
222
/// A TTS rule consists of the command, the value, and its replacement
223
#[derive(Debug, Clone)]
224
pub struct TTSCommandRule {
225
    command: TTSCommand,
226
    value: TTSCommandValue,
227
    replacements: ReplacementArray
228
}
229
230
impl fmt::Display for TTSCommandRule {
231
1
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
232
1
        let value = match &self.value {
233
0
            TTSCommandValue::String(s) => s.to_string(),
234
0
            TTSCommandValue::Number(f) => f.to_string(),
235
0
            TTSCommandValue::XPath(p) => p.to_string(),
236
1
            TTSCommandValue::Pronounce(p) => p.to_string(),
237
        };
238
1
        if self.command == TTSCommand::Pause {
239
0
            return write!(f, "pause: {value}");
240
        } else {
241
1
            return write!(f, "{}: {}{}", self.command, value, self.replacements);
242
        };
243
1
    }
244
}
245
246
247
impl TTSCommandRule {
248
2.43M
    pub fn new(command: TTSCommand, value: TTSCommandValue, replacements: ReplacementArray) -> TTSCommandRule {
249
2.43M
        return TTSCommandRule{
250
2.43M
            command,
251
2.43M
            value,
252
2.43M
            replacements
253
2.43M
        }
254
2.43M
    }
255
}
256
257
/// Supported TTS engines
258
/// These types should do something for all the TTSCommands
259
#[allow(clippy::upper_case_acronyms)]
260
#[allow(dead_code)]
261
#[derive(Debug, Clone, PartialEq, Eq)]
262
pub enum TTS {
263
    None,
264
    SSML,
265
    SAPI5,
266
//    Eloquence,
267
//    Mac,
268
}
269
270
impl TTS {
271
    /// Given the tts command ("pause", "rate", etc) and its value, build the TTS data structure for it.
272
    ///
273
    /// `tts_command`: one of "pause", "rate", etc
274
    ///
275
    /// `value`: keyword 'value' or dict with 'value' and 'replace' (optional) keys
276
2.41M
    pub fn build(tts_command: &str, values: &Yaml) -> Result<Box<TTSCommandRule>> {
277
        use crate::pretty_print::yaml_to_string;
278
2.41M
        let hashmap = values.as_hash();
279
        let tts_value;
280
        let replacements;
281
2.41M
        if hashmap.is_some() {
282
446k
            tts_value = &values["value"];
283
446k
            if tts_value.is_badvalue() {
284
0
                bail!("{} TTS command is missing a 'value' sub-key. Found\n{}", tts_command, yaml_to_string(values, 1));
285
446k
            };
286
446k
            replacements = ReplacementArray::build(&values["replace"])
?0
;
287
1.96M
        } else {
288
1.96M
            tts_value = values;
289
1.96M
            replacements = ReplacementArray::build_empty();
290
1.96M
        }
291
2.41M
        let tts_str_value = yaml_to_string(tts_value, 0);
292
2.41M
        let tts_str_value = tts_str_value.trim();
293
2.41M
        let tts_enum = match TTSCommand::from_str(tts_command) {
294
2.41M
            Ok(t) => t,
295
0
            Err(_) => bail!("Internal error in build_tts: unexpected rule ({:?}) encountered", tts_command),
296
        };
297
    
298
2.41M
        let 
tts_command_value2.41M
= match tts_enum {
299
            TTSCommand::Pause | TTSCommand::Rate | TTSCommand::Volume | TTSCommand::Pitch => {
300
                // these strings are almost always what the value will be, so we try them first
301
1.05M
                let val = match tts_str_value {
302
1.05M
                    "auto" => 
Ok( PAUSE_AUTO )71.1k
,
303
982k
                    "short" => 
Ok( PAUSE_SHORT )555k
,
304
426k
                    "medium" => 
Ok( PAUSE_MEDIUM )112k
,
305
314k
                    "long" => 
Ok( PAUSE_LONG )84.7k
,
306
229k
                    "xlong" => 
Ok( PAUSE_XLONG )3.89k
,
307
225k
                    "$MathRate" => 
Ok( RATE_FROM_CONTEXT )4.32k
, // special case hack -- value determined in replace
308
221k
                    _ => tts_str_value.parse::<f64>()
309
                };
310
311
1.05M
                match val {
312
832k
                    Ok(num) => TTSCommandValue::Number(num),
313
                    Err(_) => {
314
                        // let's try as an xpath (e.g., could be '$CapitalLetters_Pitch')
315
                        TTSCommandValue::XPath(
316
221k
                            MyXPath::build(tts_value).with_context(|| 
format!0
("while trying to evaluate value of '{tts_enum}:'"))
?0
317
                        )
318
                    }
319
                }
320
            },
321
            TTSCommand::Bookmark | TTSCommand::Spell => {
322
                TTSCommandValue::XPath(
323
1.13M
                    MyXPath::build(values).with_context(|| 
format!0
("while trying to evaluate value of '{tts_enum}:'"))
?0
324
                )
325
            },
326
            TTSCommand::Pronounce => {
327
5.02k
                TTSCommandValue::Pronounce( 
Box::new5.02k
( Pronounce::build(values)
?1
) )
328
            },
329
            _ => {
330
220k
                TTSCommandValue::String(tts_str_value.to_string())
331
            },
332
        };
333
2.41M
        return Ok( Box::new( TTSCommandRule::new(tts_enum, tts_command_value, replacements) ) );
334
2.41M
    }
335
    
336
    /// The rule called to execute the TTSCommand `command`
337
    /// `prefs` are used for scaling the speech rate
338
    /// some rules have MathML nested inside, so we need to do replacements on them (hence `rules` and `mathml` are needed)
339
    ///
340
    /// A string is returned for the speech engine.
341
    ///
342
    /// `auto` pausing is handled at a later phase and a special char is used for it
343
60.7k
    pub fn replace<'c, 's:'c, 'm:'c, 'r, T:TreeOrString<'c, 'm, T>>(&self, command: &TTSCommandRule, prefs: &PreferenceManager, rules_with_context: &'r mut SpeechRulesWithContext<'c, 's, 'm>, mathml: Element<'c>) -> Result<T> {
344
60.7k
        return T::replace_tts(self, command, prefs, rules_with_context, mathml);
345
60.7k
    }
346
347
60.7k
    pub fn replace_string<'c, 's:'c, 'm, 'r>(&self, command: &TTSCommandRule, prefs: &PreferenceManager, rules_with_context: &'r mut SpeechRulesWithContext<'c, 's, 'm>, mathml: Element<'c>) -> Result<String> {
348
        // The general idea is we handle the begin tag, the contents, and then the end tag
349
        // For the begin/end tag, we dispatch off to specialized code for each TTS engine
350
351
        // 'bookmark' is special in that we need to eval the xpath
352
        // rather than pass a bunch of extra info into the generic handling routines, we just deal with them here
353
60.7k
        if command.command == TTSCommand::Bookmark {
354
            // if we aren't suppose to generate bookmarks, short circuit and just return
355
26.7k
            if prefs.pref_to_string("Bookmark") != "true"{
356
26.7k
                return Ok("".to_string());
357
0
            }
358
0
            return Ok( match self {
359
0
                TTS::None  => "".to_string(),
360
0
                TTS::SSML => compute_bookmark_element(&command.value, "mark name", rules_with_context, mathml)?,
361
0
                TTS::SAPI5 => compute_bookmark_element(&command.value, "bookmark mark", rules_with_context, mathml)?,
362
            } );
363
33.9k
        }
364
365
33.9k
        let mut command = command.clone();
366
33.9k
        if command.command == TTSCommand::Spell {
367
            // spell is also special because we need to eval the xpath to get the string to spell (typically the text content of an mi)
368
2.77k
            match command.value {
369
2.77k
                TTSCommandValue::XPath(xpath) => {
370
2.77k
                    let value = xpath.evaluate(rules_with_context.get_context(), mathml)
371
2.77k
                        .with_context(|| 
format!0
("in 'spell': can't evaluate xpath \"{}\"",
&xpath.to_string()0
) )
?0
;
372
2.77k
                    let value_string = match 
value527
{
373
2.24k
                        Value::String(s) => s,
374
527
                        Value::Nodeset(nodes) if nodes.size() == 1 => {
375
527
                            let node = nodes.iter().next().unwrap();
376
527
                            if let Some(text) = node.text() {
377
527
                                text.text().to_string()
378
0
                            } else if let Some(el) = node.element() {
379
0
                                if crate::xpath_functions::is_leaf(el) {
380
0
                                    crate::canonicalize::as_text(el).to_string()
381
                                } else {
382
0
                                    bail!("in 'spell': value returned from xpath '{}' does not evaluate to a string",  &xpath.to_string());
383
                                }
384
                            } else {
385
0
                                bail!("in 'spell': value returned from xpath '{}' does not evaluate to a string, it is {} nodes",
386
0
                                        &xpath.to_string(), nodes.size());
387
                            }
388
                        },
389
0
                        _ => bail!("in 'spell': value returned from xpath '{}' does not evaluate to a string",  &xpath.to_string()),
390
                    };
391
                    // Chemistry wants to spell elements like "Na". But we also have the issue of capitalization (SpeechOverrides_CapitalLetters)
392
                    //   so the "N" need to use that. The logic for that is already in unicode.yaml. We could replicate that here.
393
                    // Rather than duplicate the logic (we would need to handle 'a', and who knows what in other languages),
394
                    //   we split the token into each letter and call the replacement on each letter.
395
                    // That in turns calls spell again. We end up in an infinite loop. To prevent this we set a flag that says don't recurse.
396
                    // The only structure to put that in is SpeechRulesWithContext. A bit of a hack to put it there, but better than a static var.
397
                    // Also, to avoid repeating the code for "cap" over and over, "spell" with "translate" is used. So keep going until no "translate"
398
2.77k
                    let xpath_str = xpath.to_string();
399
2.77k
                    if rules_with_context.inside_spell && 
!xpath_str.contains("translate")848
{
400
0
                        command.value = TTSCommandValue::String(value_string);
401
0
                        rules_with_context.translate_count  = 0;
402
2.77k
                    } else if rules_with_context.translate_count > MAX_TRANSLATE_RECURSION {
403
0
                        bail!("Rule error: potential infinite recursion found in translate: {}", xpath_str);
404
                    } else {
405
                        // let the call to replace call spell on the individual chars -- that lets an "cap" be outside "spell"
406
2.77k
                        rules_with_context.translate_count += 1;
407
2.77k
                        let str_with_spaces = value_string.chars()
408
2.94k
                                .
map2.77k
(|ch| {
409
2.94k
                                    rules_with_context.inside_spell = true;
410
2.94k
                                    let spelled_char = rules_with_context.replace_chars(ch.to_string().as_str(), mathml);
411
2.94k
                                    rules_with_context.inside_spell = false;
412
2.94k
                                    spelled_char
413
2.94k
                                })
414
2.77k
                                .collect::<Result<Vec<String>>>()
?0
415
2.77k
                                .join(" ");
416
2.77k
                        return Ok(str_with_spaces);
417
                    }             
418
                },
419
0
                _ => bail!("Implementation error: found non-xpath value for spell"),
420
            }
421
31.1k
        } else if command.command == TTSCommand::Rate && 
self != &TTS::None0
&&
422
0
                  let TTSCommandValue::Number(number_value) = command.value &&
423
0
                  number_value == RATE_FROM_CONTEXT {
424
                    // handle hack for $Rate -- need to look up in context
425
0
                    let rate_from_context = crate::navigate::context_get_variable(rules_with_context.get_context(), "MathRate", mathml)?.parse::<usize>().unwrap_or(100);
426
0
                    command.value = TTSCommandValue::Number(rate_from_context as f64);
427
31.1k
                }
428
429
        // evaluate any xpath value now to simplify later code
430
31.1k
        if let TTSCommandValue::XPath(
xpath1.31k
) = command.value {
431
1.31k
            let eval_str = xpath.replace::<String>(rules_with_context, mathml)
?0
;
432
            // can it be a number?
433
1.31k
            command.value = match eval_str.parse::<f64>() {
434
1.31k
                Ok(num) => TTSCommandValue::Number(num),
435
0
                Err(_) => TTSCommandValue::String(eval_str),
436
            }
437
29.8k
        };
438
439
440
        // small optimization to avoid generating tags that do nothing
441
31.1k
        if ((command.command == TTSCommand::Pitch || 
command.command == TTSCommand::Volume29.8k
||
command.command == TTSCommand::Pause29.8k
) &&
command.value.get_num() == 0.031.1k
) ||
442
29.8k
           (command.command == TTSCommand::Rate && 
command.value.get_num() == 100.00
) {
443
1.31k
            return command.replacements.replace::<String>(rules_with_context, mathml);
444
29.8k
        }
445
446
29.8k
        let mut result = String::with_capacity(255);
447
29.8k
        result += &match self {
448
29.8k
            TTS::None  => self.get_string_none(&command, prefs, true),
449
0
            TTS::SSML  => self.get_string_ssml(&command, prefs, true),
450
0
            TTS::SAPI5 => self.get_string_sapi5(&command, prefs, true),
451
        };
452
453
454
29.8k
        if !command.replacements.is_empty()  {
455
0
            if result.is_empty() {
456
0
                result += " ";
457
0
            }
458
            // need to sanitize string so that SSML is not injected into it via mtext, etc.
459
0
            let speech = command.replacements.replace::<String>(rules_with_context, mathml)?;  
460
0
            result += &encode_safe(&speech);
461
29.8k
        }
462
463
29.8k
        let end_tag = match self {
464
29.8k
            TTS::None  => self.get_string_none(&command, prefs, false),
465
0
            TTS::SSML  => self.get_string_ssml(&command, prefs, false),
466
0
            TTS::SAPI5 => self.get_string_sapi5(&command, prefs, false),
467
        };
468
469
29.8k
        if end_tag.is_empty() {
470
29.8k
            return Ok( result ); // avoids adding in " "
471
        } else {
472
0
            return Ok( result + &end_tag );
473
        }
474
475
476
0
        fn compute_bookmark_element<'c, 's:'c, 'm, 'r>(value: &TTSCommandValue, tag_and_attr: &str, rules_with_context: &'r mut SpeechRulesWithContext<'c, 's, 'm>, mathml: Element<'c>) -> Result<String> {
477
0
            match value {
478
0
                TTSCommandValue::XPath(xpath) => {
479
0
                    let id = xpath.replace::<String>(rules_with_context, mathml)?;
480
0
                    return Ok( format!("<{tag_and_attr}='{id}'/>") );
481
                },
482
0
                _ => bail!("Implementation error: found bookmark value that did not evaluate to a string"),
483
            }
484
0
        }
485
    
486
60.7k
    }
487
488
    // auto pausing can't be known until neighboring strings are computed
489
    // we create a unique string in this case and compute the real value later 
490
75.9k
    fn get_string_none(&self, command: &TTSCommandRule,  prefs: &PreferenceManager, is_start_tag: bool) -> String  {
491
        // they only thing to do is handle "pause" with some punctuation hacks along with 'spell'        
492
75.9k
        if is_start_tag {
493
46.1k
            if command.command == TTSCommand::Pause {
494
46.1k
                let amount = command.value.get_num();
495
                // only ',' and ';' are used as '.' didn't seem to reliably generate pauses in tests
496
46.1k
                return crate::speech::CONCAT_INDICATOR.to_string() + (
497
46.1k
                    if amount == PAUSE_AUTO {
498
19.5k
                        PAUSE_AUTO_STR
499
                    } else {
500
26.5k
                        let amount  =  amount * TTS::get_pause_multiplier(prefs);
501
26.5k
                        if amount <= MIN_PAUSE {
502
11.3k
                            ""
503
15.1k
                        } else if amount <= 250.0 {
504
9.93k
                            ","
505
                        } else  {
506
5.21k
                            ";"
507
                        }
508
                    }
509
                );
510
32
            } else if command.command == TTSCommand::Spell {
511
                // debug!("spell rule: {}", command.value.get_string());
512
0
                return command.value.get_string().to_string();
513
32
            } else if let TTSCommandValue::Pronounce(p) = &command.value {
514
32
                return crate::speech::CONCAT_INDICATOR.to_string() + &p.text;
515
0
            }
516
29.8k
        };
517
29.8k
        return "".to_string();
518
75.9k
    }
519
    
520
0
    fn get_string_sapi5(&self, command: &TTSCommandRule, prefs: &PreferenceManager, is_start_tag: bool) -> String  {
521
0
        return match &command.command {
522
0
            TTSCommand::Pause => if is_start_tag {
523
0
                let amount = command.value.get_num();
524
0
                if amount == PAUSE_AUTO {
525
0
                    PAUSE_AUTO_STR.to_string()
526
                } else {
527
0
                    let amount = amount * TTS::get_pause_multiplier(prefs);
528
0
                    if amount > MIN_PAUSE {
529
0
                        format!("<silence msec=='{}ms'/>", (amount * 180.0/prefs.get_rate()).round())
530
                    } else {
531
0
                        "".to_string()
532
                    }
533
                }
534
            } else {
535
0
                "".to_string()
536
            },
537
            // pitch must be in [-10, 10], logarithmic based on octaves
538
            // note MathPlayer uses 'absmiddle' (requires keeping a stack) -- could be 'middle' is not well supported
539
0
            TTSCommand::Pitch => if is_start_tag {format!("<pitch middle=\"{}\">", (24.0*(1.0+command.value.get_num()/100.0).log2()).round())} else {String::from("</prosody>")},
540
            // rate must be in [-10, 10], but we get relative %s. 300% => 10 (see comments at top of file)
541
0
            TTSCommand::Rate =>  if is_start_tag {format!("<rate speed='{:.1}'>", 10.0*(0.01*command.value.get_num()).log(3.0))} else {String::from("</rate>")},
542
0
            TTSCommand::Volume =>if is_start_tag {format!("<volume level='{}'>", command.value.get_num())} else {String::from("</volume>")},
543
0
            TTSCommand::Audio => "".to_string(),    // SAPI5 doesn't support audio
544
0
            TTSCommand::Gender =>if is_start_tag {format!("<voice required=\"Gender={}\">", command.value.get_string())} else {String::from("</prosody>")},
545
0
            TTSCommand::Voice =>if is_start_tag {format!("<voice required=\"Name={}\">", command.value.get_string())} else {String::from("</prosody>")},
546
0
            TTSCommand::Spell =>if is_start_tag {format!("<spell>{}", command.value.get_string())} else {String::from("</spell>")},
547
0
            TTSCommand::Pronounce =>if is_start_tag {
548
0
                    format!("<pron sym='{}'>{}", &command.value.get_pronounce().sapi5, &command.value.get_pronounce().text)
549
                } else {
550
0
                    String::from("</pron>")
551
                },
552
0
            TTSCommand::Bookmark => panic!("Internal error: bookmarks should have been handled earlier"),
553
        };
554
0
    }
555
556
0
    fn get_string_ssml(&self, command: &TTSCommandRule, prefs: &PreferenceManager, is_start_tag: bool) -> String  {
557
0
        return match &command.command {
558
            TTSCommand::Pause => {
559
0
                if is_start_tag {
560
0
                    let amount = command.value.get_num();
561
0
                    if amount == PAUSE_AUTO {
562
0
                        PAUSE_AUTO_STR.to_string()
563
                    } else {
564
0
                        let amount = amount * TTS::get_pause_multiplier(prefs);
565
0
                        if amount > MIN_PAUSE {
566
0
                            format!("<break time='{}ms'/>", (amount * 180.0/prefs.get_rate()).round())
567
                        } else {
568
0
                            "".to_string()
569
                        }
570
                    }
571
                } else {
572
0
                    "".to_string()
573
                }
574
            },
575
0
            TTSCommand::Pitch => if is_start_tag {format!("<prosody pitch='{}%'>", command.value.get_num())} else {String::from("</prosody>")},
576
0
            TTSCommand::Rate =>  if is_start_tag {format!("<prosody rate='{}%'>", command.value.get_num())} else {String::from("</prosody>")},
577
0
            TTSCommand::Volume =>if is_start_tag {format!("<prosody volume='{}db'>", command.value.get_num())} else {String::from("</prosody>")},
578
0
            TTSCommand::Audio =>if is_start_tag {format!("<audio src='{}'>", command.value.get_string())} else {String::from("</audio>")}, // only 'beep' is supported for now
579
0
            TTSCommand::Gender =>if is_start_tag {format!("<voice required='gender=\"{}\"'>", command.value.get_string())} else {String::from("</voice>")},
580
0
            TTSCommand::Voice =>if is_start_tag {format!("<voice required='{}'>", command.value.get_string())} else {String::from("</voice>")},
581
0
            TTSCommand::Spell =>if is_start_tag {format!("<say-as interpret-as='characters'>{}", command.value.get_string())} else {String::from("</say-as>")},
582
0
            TTSCommand::Pronounce =>if is_start_tag {
583
0
                format!("<phoneme alphabet='ipa' ph='{}'>{}", &command.value.get_pronounce().ipa, &command.value.get_pronounce().text)
584
            } else {
585
0
                String::from("</phoneme>")
586
            },
587
0
        TTSCommand::Bookmark => panic!("Internal error: bookmarks should have been handled earlier"),
588
        }
589
0
    }
590
591
26.5k
    fn get_pause_multiplier(prefs: &PreferenceManager) -> f64 {
592
26.5k
        return prefs.pref_to_string("PauseFactor").parse::<f64>().unwrap_or(100.)/100.0;
593
26.5k
    }
594
595
    /// Compute the length of the pause to use.
596
    ///
597
    /// The computation is based on the length of the speech strings (after removing tagging).
598
    /// There is a bias towards pausing more _after_ longer strings.
599
19.5k
    pub fn compute_auto_pause(&self, prefs: &PreferenceManager, before: &str, after: &str) -> String {
600
0
        static REMOVE_XML: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"<.+?>").unwrap()); // punctuation ending with a '.'
601
        let before_len;
602
        let after_len;
603
19.5k
        match self {
604
0
            TTS::SSML | TTS::SAPI5 => {
605
0
                before_len = REMOVE_XML.replace_all(before, "").len();
606
0
                after_len = REMOVE_XML.replace_all(after, "").len();
607
0
            },
608
19.5k
            _ => {
609
19.5k
                before_len = before.len();
610
19.5k
                after_len = after.len();
611
19.5k
            },
612
        }
613
614
        // pause values are not cut in stone
615
        // the calculation bias to 'previous' is based on MathPlayer which used '30 * #-of-descendants-on-left
616
        // I think I did this as a sort of "take a breath" after saying something long although one might want to do that
617
        //   before speaking something long.
618
19.5k
        if after_len < 3 {
619
            // hack to prevent pausing before "of" in exprs like "the fourth power of secant, of x"
620
            // if it should pause anywhere, it should be after the "of"
621
3.31k
            return "".to_string(); 
622
16.2k
        }
623
16.2k
        let pause = std::cmp::min(3000, ((2 * before_len + after_len)/48) * 128);
624
        // create a TTSCommandRule so we reuse code
625
16.2k
        let command = TTSCommandRule::new(
626
16.2k
            TTSCommand::Pause,
627
16.2k
            TTSCommandValue::Number(pause as f64),
628
16.2k
            ReplacementArray::build_empty(),
629
        );
630
16.2k
        return match self {
631
16.2k
            TTS::None  => self.get_string_none(&command, prefs, true),
632
0
            TTS::SSML  => self.get_string_ssml(&command, prefs, true),
633
0
            TTS::SAPI5 => self.get_string_sapi5(&command, prefs, true),
634
        };
635
636
19.5k
    }
637
638
    /// Take the longest of the pauses
639
    ///
640
    /// Two other options are:
641
    /// 1. average the pauses
642
    /// 2. add the pauses together.
643
    ///
644
    /// Until evidence points otherwise, use 'longest'.
645
5.10k
    pub fn merge_pauses(&self, str: &str) -> String {
646
        // we need specialized merges for each TTS engine because we need to know the format of the commands
647
5.10k
        return match self {
648
5.10k
            TTS::None  => self.merge_pauses_none(str),
649
1
            TTS::SSML  => self.merge_pauses_ssml(str),
650
1
            TTS::SAPI5 => self.merge_pauses_sapi5(str),
651
        };        
652
5.10k
    }
653
654
5.10k
    fn merge_pauses_none(&self, str: &str) -> String {
655
        // punctuation used for pauses is ",", ";" 
656
2
        static SPACES: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\s+([;,])").unwrap()); // two or more pauses
657
2
        static MULTIPLE_PAUSES: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"([,;][,;]+)").unwrap()); // two or more pauses
658
        // we reduce all sequences of two or more pauses to a single medium pause
659
5.10k
        let merges_string = SPACES.replace_all(str, "$1").to_string();
660
5.10k
        let merges_string = MULTIPLE_PAUSES.replace_all(&merges_string, ";").to_string();
661
5.10k
        return merges_string;
662
5.10k
    }
663
664
2
    fn merge_pauses_xml<F>(str: &str, full_attr_re: &Regex, sub_attr_re: &Regex, replace_with: F) -> String 
665
2
            where F: Fn(usize) -> String {
666
        // we reduce all sequences of two or more pauses to the max pause amount
667
        // other options would be the sum or an average
668
        // maybe some amount a little longer than the max would be best???
669
2
        let mut merges_string = str.to_string();
670
2
        for cap in full_attr_re.captures_iter(str) {
671
2
            let mut amount = 0;
672
4
            for c in 
sub_attr_re2
.
captures_iter2
(
&cap[0]2
) {
673
4
                amount = std::cmp::max(amount, c[1].parse::<usize>().unwrap());
674
4
            };
675
2
            merges_string = merges_string.replace(&cap[0], &replace_with(amount));
676
        }
677
2
        return merges_string;
678
2
    }
679
680
1
    fn merge_pauses_sapi5(&self, str: &str) -> String {
681
1
        static CONSECUTIVE_BREAKS: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(<silence msec[^>]+?> *){2,}").unwrap()); // two or more pauses
682
1
        static PAUSE_AMOUNT: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"msec=.*?(\d+)").unwrap()); // amount after 'time'
683
1
        let replacement = |amount: usize| format!("<silence msec=='{amount}ms'/>");
684
1
        return TTS::merge_pauses_xml(str, &CONSECUTIVE_BREAKS, &PAUSE_AMOUNT, replacement);
685
1
    }
686
687
1
    fn merge_pauses_ssml(&self, str: &str) -> String {
688
1
        static CONSECUTIVE_BREAKS: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(<break time=[^>]+?> *){2,}").unwrap()); // two or more pauses
689
1
        static PAUSE_AMOUNT: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"time=.*?(\d+)").unwrap()); // amount after 'time'
690
1
        let replacement = |amount: usize| format!("<break time='{amount}ms'/>");
691
1
        return TTS::merge_pauses_xml(str, &CONSECUTIVE_BREAKS, &PAUSE_AMOUNT, replacement);
692
1
    }
693
}
694
695
#[cfg(test)]
696
mod tests {
697
    use super::*;
698
    use yaml_rust::YamlLoader;
699
700
    #[test]
701
    /// Verifies pronounce YAML builds and renders all supported fields.
702
1
    fn pronounce_build_and_display() {
703
1
        let yaml = YamlLoader::load_from_str(
704
1
            r#"
705
1
- text: "alpha"
706
1
- ipa: "a"
707
1
- sapi5: "b"
708
1
- eloquence: "c"
709
1
"#,
710
        )
711
1
        .unwrap();
712
1
        let values = &yaml[0];
713
1
        let rule = TTS::build("pronounce", values).unwrap();
714
1
        let rendered = format!("{rule}");
715
716
1
        assert!(rendered.contains("text: 'alpha'"));
717
1
        assert!(rendered.contains("ipa: 'a'"));
718
1
        assert!(rendered.contains("sapi5: 'b'"));
719
1
        assert!(rendered.contains("eloquence: 'c'"));
720
1
    }
721
722
    #[test]
723
    /// Ensures pronounce requires a text entry and rejects missing text.
724
1
    fn pronounce_requires_text() {
725
1
        let yaml = YamlLoader::load_from_str(
726
1
            r#"
727
1
- ipa: "a"
728
1
"#,
729
        )
730
1
        .unwrap();
731
1
        let values = &yaml[0];
732
1
        let err = TTS::build("pronounce", values).unwrap_err();
733
1
        assert!(err.to_string().contains("'text' key/value is required"));
734
1
    }
735
736
    #[test]
737
    /// Coalesces adjacent punctuation pauses for the None engine.
738
1
    fn merge_pauses_none_coalesces() {
739
1
        let input = "a,,;b";
740
1
        let output = TTS::None.merge_pauses(input);
741
1
        assert!(!output.contains(",,"));
742
1
        assert!(output.contains(";"));
743
1
    }
744
745
    #[test]
746
    /// Uses the maximum pause when merging consecutive SSML breaks.
747
1
    fn merge_pauses_ssml_keeps_max() {
748
1
        let input = "<break time='100ms'/><break time='300ms'/>";
749
1
        let output = TTS::SSML.merge_pauses(input);
750
1
        assert!(!output.contains("100ms"));
751
1
        assert!(output.contains("300ms"));
752
1
    }
753
754
    #[test]
755
    /// Uses the maximum pause when merging consecutive SAPI5 breaks.
756
1
    fn merge_pauses_sapi5_keeps_max() {
757
1
        let input = "<silence msec=='100ms'/><silence msec=='300ms'/>";
758
1
        let output = TTS::SAPI5.merge_pauses(input);
759
1
        assert!(!output.contains("100ms"));
760
1
        assert!(output.contains("300ms"));
761
1
    }
762
}
\ No newline at end of file +

Coverage Report

Created: 2026-05-04 09:37

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/home/runner/work/MathCAT/MathCAT/src/tts.rs
Line
Count
Source
1
//! #Speech Engine Information
2
//!
3
//! ## Pitch (default 140hz)
4
//! ### SAPI4: Relative pitch
5
//! * Number is relative to the default/current pitch.
6
//! * 50 is 1/2 of the default/current pitch, 200 is 2 times the default/current pitch.
7
//!
8
//!  Note: no range is specified by the spec
9
//! ### SAPI5: Relative pitch
10
//! From https://documentation.help/SAPI-5/sapi.xsd
11
//! * A value of +10 sets a voice to speak at four-thirds (or 4/3) of its default pitch.
12
//! * Each increment between –10 and +10 is logarithmically distributed such that
13
//!   incrementing/decrementing by 1 is multiplying/dividing the pitch by the 24th root of 2 (about 1.03).
14
//! * Values more extreme than –10 and 10 will be passed to an engine but SAPI 5compliant engines may not support
15
//!   such extremes and instead may clip the pitch to the maximum or minimum pitch it supports.
16
//! * Values of –24 and +24 must lower and raise pitch by 1 octave respectively.
17
//!   All incrementing/decrementing by 1 must multiply/divide the pitch by the 24th root of 2.
18
//! 
19
//! Note: an octave is a doubling of frequency, so pitch change of 100% should turn into +/- 24
20
//! ### SSML: Relative pitch
21
//! * pitch in hertz (default/current man's voice is about 100hz, woman's 180hz)
22
//!
23
//! Note: other legal values for SSML are not supported, and all numbers are interpreted as relative changes
24
//! ### Eloquence: Absolute pitch (relative pitch not supported by Eloquence)
25
//! * Range is 0 - 100.  Guess is that 0 ~= 42hz, 100 ~= 422hz based on supported \"sapi\" values
26
//! ## Rate (default 180 words/min)
27
//! ### SAPI4: Absolute rate
28
//! * Number is relative to the default/current rate
29
//! * 50 is 1/2 of the default/current rate, 200 is 2 times the default/current rate
30
//!
31
//! Note: no range is specified by the spec
32
//! ### SAPI5: Relative rate
33
//! * Number is in range -10 to 10
34
//! * -10 is 1/3 of the default/current speed; 10 3 times the default/current speech
35
//! * changes are logarithmic -- a change of +/-1 corresponds to multiplying/dividing by 10th root of 3 (10*log_3(change))
36
//! ### SSML: Relative rate %
37
//! * 100% is no change, 50% is half the current rate, 200% is doubling the rate
38
//!
39
//!  Note:  other legal values for SSML are not supported, and all numbers are interpreted as relative changes
40
//! ### Eloquence: Absolute rate (relative rate not supported by Eloquence)
41
//! * Range is 0 - 250, which manual seems to indicate corresponds to 70 - 1297 words/min.
42
//! * * Window-Eyes only seems to give values in range 1 - 150.
43
//! * On the low end, 1 ~= 72words/min
44
//! * On the high end, I can't tell, but 80 seems to be a bit over twice normal (~400 words/min?)
45
//!   250 ~= 1297 words/min based on supported "sapi" values
46
//!
47
//! Note: this means words/min = 4.18 * Eloquence rate + 66
48
//! So the relative pause rate is 180/computed value
49
//!
50
//!
51
//! ## Volume (default 100 \[full])
52
//! ### SAPI4: Relative volume
53
//! * Number is relative to the default/current rate
54
//! * Range is 0 - 065535
55
//! ### SAPI5: Relative volume
56
//! * Number is in range 0 to 100
57
//! ### SSML: Relative volume
58
//! * Number is in range 0 to 100
59
//!
60
//! Note:  other legal values for SSML are not supported, and all numbers are interpreted as relative changes
61
//! ### Eloquence: Absolute volume (relative volume not supported by Eloquence)
62
//! * Range is 0 - 100
63
//!
64
//! ## Pause
65
//! * All systems -- pauses are given in milliseconds
66
//!
67
//! Note: Pauses on output are scaled based on the ratio of the current rate to the default rate (180 wpm)
68
#![allow(clippy::needless_return)]
69
70
use crate::{errors::*, prefs::PreferenceManager, speech::ReplacementArray};
71
use sxd_document::dom::Element;
72
use yaml_rust::Yaml;
73
74
use std::fmt;
75
use crate::speech::{SpeechRulesWithContext, MyXPath, TreeOrString};
76
use std::string::ToString;
77
use std::str::FromStr;
78
use strum_macros::{Display, EnumString};
79
use regex::Regex;
80
use std::sync::LazyLock;
81
use sxd_xpath::Value;
82
use html_escape::encode_safe;
83
84
const MIN_PAUSE:f64 = 50.0;         // ms -- avoids clutter of putting out pauses that probably can't be heard
85
const PAUSE_SHORT:f64 = 200.0;  // ms
86
const PAUSE_MEDIUM:f64 = 400.0; // ms
87
const PAUSE_LONG:f64 = 800.0;   // ms
88
const PAUSE_XLONG:f64 = 1600.0;   // ms
89
const PAUSE_AUTO:f64 = 987654321.5;   // ms -- hopefully unique
90
pub const PAUSE_AUTO_STR: &str = "\u{F8FA}\u{F8FA}";
91
const RATE_FROM_CONTEXT:f64 = 987654321.5;   // hopefully unique
92
93
const MAX_TRANSLATE_RECURSION: usize = 5;   // probably never more than three -- prevents infinite loop/stack overflows bugs
94
95
/// TTSCommand are the supported TTS commands
96
/// When parsing the YAML rule files, they are converted to these enums
97
#[derive(Debug, Clone, PartialEq, Eq, Display, EnumString)]
98
#[strum(serialize_all = "snake_case")]  // allows lower case
99
pub enum TTSCommand {
100
    Pause,
101
    Rate,
102
    Volume,
103
    Pitch,
104
    Audio,
105
    Gender,
106
    Voice,
107
    Spell,
108
    Bookmark,
109
    Pronounce,
110
}
111
112
#[derive(Debug, Clone)]
113
pub struct Pronounce {
114
    text: String,       // plain text
115
    ipa: String,        // ipa 
116
    sapi5: String,
117
    eloquence: String,
118
}
119
120
121
impl fmt::Display for Pronounce {
122
1
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
123
1
        let mut comma = "";     // comma separator so it looks right
124
1
        write!(f, "pronounce: [")
?0
;
125
1
        if !self.text.is_empty() {
126
1
            write!(f, "text: '{}'", self.text)
?0
;
127
1
            comma = ",";
128
0
        }
129
1
        write!(f, "pronounce: [")
?0
;
130
1
        if !self.ipa.is_empty() {
131
1
            write!(f, "{}ipa: '{}'", comma, self.ipa)
?0
;
132
1
            comma = ",";
133
0
        }
134
1
        write!(f, "pronounce: [")
?0
;
135
1
        if !self.sapi5.is_empty() {
136
1
            write!(f, "{}sapi5: '{}'", comma, self.sapi5)
?0
;
137
1
            comma = ",";
138
0
        }
139
1
        write!(f, "pronounce: [")
?0
;
140
1
        if !self.eloquence.is_empty() {
141
1
            write!(f, "{}eloquence: '{}'", comma, self.eloquence)
?0
;
142
0
        }
143
1
        return writeln!(f, "]");
144
1
    }
145
}
146
147
impl Pronounce {
148
5.02k
    fn build(values: &Yaml) -> Result<Pronounce> {
149
        use crate::speech::{as_str_checked, yaml_to_type};
150
        use crate::pretty_print::yaml_to_string;
151
152
5.02k
        let mut text = "";
153
5.02k
        let mut ipa = "";
154
5.02k
        let mut sapi5 = "";
155
5.02k
        let mut eloquence = "";
156
        // values should be an array with potential values for Pronounce
157
5.02k
        let values = values.as_vec().ok_or_else(||
158
0
                                        anyhow!("'pronounce' value '{}' is not an array", yaml_to_type(values)))?;
159
20.0k
        for key_value in 
values5.02k
{
160
20.0k
            let key_value_hash = key_value.as_hash().ok_or_else(||
161
0
                                        anyhow!("pronounce value '{}' is not key/value pair", yaml_to_string(key_value, 0)))?;
162
20.0k
            if key_value_hash.len() != 1 {
163
0
                bail!("pronounce value {:?} is not a single key/value pair", key_value_hash);
164
20.0k
            }
165
        
166
20.0k
            for (key, value) in key_value_hash {
167
20.0k
                match as_str_checked(key)
?0
{
168
20.0k
                    "text" => text = 
as_str_checked5.02k
(
value5.02k
)
?0
,
169
15.0k
                    "ipa" => ipa = 
as_str_checked5.02k
(
value5.02k
)
?0
,
170
10.0k
                    "sapi5" => sapi5 = 
as_str_checked5.02k
(
value5.02k
)
?0
,
171
5.02k
                    "eloquence" => eloquence = as_str_checked(value)
?0
,
172
0
                    _ => bail!("unknown pronounce type: {} with value {}", yaml_to_string(key, 0), yaml_to_string(value, 0)),
173
                }
174
            }
175
        }
176
5.02k
        if text.is_empty() {
177
1
            bail!("'text' key/value is required for 'pronounce' -- it is used is the speech engine is unknown.")
178
5.02k
        }
179
5.02k
        return Ok( Pronounce{
180
5.02k
            text: text.to_string(),
181
5.02k
            ipa: ipa.to_string(),
182
5.02k
            sapi5: sapi5.to_string(),
183
5.02k
            eloquence: eloquence.to_string()
184
5.02k
        } );
185
    
186
187
5.02k
    }
188
}
189
/// TTSCommands are either numbers (f64 because of YAML) or strings
190
#[derive(Debug, Clone)]
191
pub enum TTSCommandValue {
192
    Number(f64),
193
    String(String),
194
    XPath(MyXPath),
195
    Pronounce(Box<Pronounce>),
196
}
197
198
impl TTSCommandValue {
199
77.2k
    fn get_num(&self) -> f64 {
200
77.2k
        match self {
201
77.2k
            TTSCommandValue::Number(n) => return *n,
202
0
            _                               => panic!("Internal error: TTSCommandValue is not a number"),
203
        }
204
77.2k
    }
205
206
0
    fn get_string(&self) -> &String {
207
0
        match self {
208
0
            TTSCommandValue::String(s) => return s,
209
0
            _                                  => panic!("Internal error: TTSCommandValue is not a string"),
210
        }
211
0
    }
212
213
0
    fn get_pronounce(&self) -> &Pronounce {
214
0
        match self {
215
0
            TTSCommandValue::Pronounce(p) => return p,
216
0
            _                               => panic!("Internal error: TTSCommandValue is not a 'pronounce' command'"),
217
        }
218
        
219
0
    }
220
}
221
222
/// A TTS rule consists of the command, the value, and its replacement
223
#[derive(Debug, Clone)]
224
pub struct TTSCommandRule {
225
    command: TTSCommand,
226
    value: TTSCommandValue,
227
    replacements: ReplacementArray
228
}
229
230
impl fmt::Display for TTSCommandRule {
231
1
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
232
1
        let value = match &self.value {
233
0
            TTSCommandValue::String(s) => s.to_string(),
234
0
            TTSCommandValue::Number(f) => f.to_string(),
235
0
            TTSCommandValue::XPath(p) => p.to_string(),
236
1
            TTSCommandValue::Pronounce(p) => p.to_string(),
237
        };
238
1
        if self.command == TTSCommand::Pause {
239
0
            return write!(f, "pause: {value}");
240
        } else {
241
1
            return write!(f, "{}: {}{}", self.command, value, self.replacements);
242
        };
243
1
    }
244
}
245
246
247
impl TTSCommandRule {
248
2.43M
    pub fn new(command: TTSCommand, value: TTSCommandValue, replacements: ReplacementArray) -> TTSCommandRule {
249
2.43M
        return TTSCommandRule{
250
2.43M
            command,
251
2.43M
            value,
252
2.43M
            replacements
253
2.43M
        }
254
2.43M
    }
255
}
256
257
/// Supported TTS engines
258
/// These types should do something for all the TTSCommands
259
#[allow(clippy::upper_case_acronyms)]
260
#[allow(dead_code)]
261
#[derive(Debug, Clone, PartialEq, Eq)]
262
pub enum TTS {
263
    None,
264
    SSML,
265
    SAPI5,
266
//    Eloquence,
267
//    Mac,
268
}
269
270
impl TTS {
271
    /// Given the tts command ("pause", "rate", etc) and its value, build the TTS data structure for it.
272
    ///
273
    /// `tts_command`: one of "pause", "rate", etc
274
    ///
275
    /// `value`: keyword 'value' or dict with 'value' and 'replace' (optional) keys
276
2.41M
    pub fn build(tts_command: &str, values: &Yaml) -> Result<Box<TTSCommandRule>> {
277
        use crate::pretty_print::yaml_to_string;
278
2.41M
        let hashmap = values.as_hash();
279
        let tts_value;
280
        let replacements;
281
2.41M
        if hashmap.is_some() {
282
446k
            tts_value = &values["value"];
283
446k
            if tts_value.is_badvalue() {
284
0
                bail!("{} TTS command is missing a 'value' sub-key. Found\n{}", tts_command, yaml_to_string(values, 1));
285
446k
            };
286
446k
            replacements = ReplacementArray::build(&values["replace"])
?0
;
287
1.96M
        } else {
288
1.96M
            tts_value = values;
289
1.96M
            replacements = ReplacementArray::build_empty();
290
1.96M
        }
291
2.41M
        let tts_str_value = yaml_to_string(tts_value, 0);
292
2.41M
        let tts_str_value = tts_str_value.trim();
293
2.41M
        let tts_enum = match TTSCommand::from_str(tts_command) {
294
2.41M
            Ok(t) => t,
295
0
            Err(_) => bail!("Internal error in build_tts: unexpected rule ({:?}) encountered", tts_command),
296
        };
297
    
298
2.41M
        let 
tts_command_value2.41M
= match tts_enum {
299
            TTSCommand::Pause | TTSCommand::Rate | TTSCommand::Volume | TTSCommand::Pitch => {
300
                // these strings are almost always what the value will be, so we try them first
301
1.05M
                let val = match tts_str_value {
302
1.05M
                    "auto" => 
Ok( PAUSE_AUTO )71.1k
,
303
982k
                    "short" => 
Ok( PAUSE_SHORT )555k
,
304
426k
                    "medium" => 
Ok( PAUSE_MEDIUM )112k
,
305
314k
                    "long" => 
Ok( PAUSE_LONG )84.7k
,
306
229k
                    "xlong" => 
Ok( PAUSE_XLONG )3.89k
,
307
225k
                    "$MathRate" => 
Ok( RATE_FROM_CONTEXT )4.32k
, // special case hack -- value determined in replace
308
221k
                    _ => tts_str_value.parse::<f64>()
309
                };
310
311
1.05M
                match val {
312
832k
                    Ok(num) => TTSCommandValue::Number(num),
313
                    Err(_) => {
314
                        // let's try as an xpath (e.g., could be '$CapitalLetters_Pitch')
315
                        TTSCommandValue::XPath(
316
221k
                            MyXPath::build(tts_value).with_context(|| 
format!0
("while trying to evaluate value of '{tts_enum}:'"))
?0
317
                        )
318
                    }
319
                }
320
            },
321
            TTSCommand::Bookmark | TTSCommand::Spell => {
322
                TTSCommandValue::XPath(
323
1.13M
                    MyXPath::build(values).with_context(|| 
format!0
("while trying to evaluate value of '{tts_enum}:'"))
?0
324
                )
325
            },
326
            TTSCommand::Pronounce => {
327
5.02k
                TTSCommandValue::Pronounce( 
Box::new5.02k
( Pronounce::build(values)
?1
) )
328
            },
329
            _ => {
330
220k
                TTSCommandValue::String(tts_str_value.to_string())
331
            },
332
        };
333
2.41M
        return Ok( Box::new( TTSCommandRule::new(tts_enum, tts_command_value, replacements) ) );
334
2.41M
    }
335
    
336
    /// The rule called to execute the TTSCommand `command`
337
    /// `prefs` are used for scaling the speech rate
338
    /// some rules have MathML nested inside, so we need to do replacements on them (hence `rules` and `mathml` are needed)
339
    ///
340
    /// A string is returned for the speech engine.
341
    ///
342
    /// `auto` pausing is handled at a later phase and a special char is used for it
343
60.7k
    pub fn replace<'c, 's:'c, 'm:'c, 'r, T:TreeOrString<'c, 'm, T>>(&self, command: &TTSCommandRule, prefs: &PreferenceManager, rules_with_context: &'r mut SpeechRulesWithContext<'c, 's, 'm>, mathml: Element<'c>) -> Result<T> {
344
60.7k
        return T::replace_tts(self, command, prefs, rules_with_context, mathml);
345
60.7k
    }
346
347
60.7k
    pub fn replace_string<'c, 's:'c, 'm, 'r>(&self, command: &TTSCommandRule, prefs: &PreferenceManager, rules_with_context: &'r mut SpeechRulesWithContext<'c, 's, 'm>, mathml: Element<'c>) -> Result<String> {
348
        // The general idea is we handle the begin tag, the contents, and then the end tag
349
        // For the begin/end tag, we dispatch off to specialized code for each TTS engine
350
351
        // 'bookmark' is special in that we need to eval the xpath
352
        // rather than pass a bunch of extra info into the generic handling routines, we just deal with them here
353
60.7k
        if command.command == TTSCommand::Bookmark {
354
            // if we aren't suppose to generate bookmarks, short circuit and just return
355
26.7k
            if prefs.pref_to_string("Bookmark") != "true"{
356
26.7k
                return Ok("".to_string());
357
0
            }
358
0
            return Ok( match self {
359
0
                TTS::None  => "".to_string(),
360
0
                TTS::SSML => compute_bookmark_element(&command.value, "mark name", rules_with_context, mathml)?,
361
0
                TTS::SAPI5 => compute_bookmark_element(&command.value, "bookmark mark", rules_with_context, mathml)?,
362
            } );
363
33.9k
        }
364
365
33.9k
        let mut command = command.clone();
366
33.9k
        if command.command == TTSCommand::Spell {
367
            // spell is also special because we need to eval the xpath to get the string to spell (typically the text content of an mi)
368
2.77k
            match command.value {
369
2.77k
                TTSCommandValue::XPath(xpath) => {
370
2.77k
                    let value = xpath.evaluate(rules_with_context.get_context(), mathml)
371
2.77k
                        .with_context(|| 
format!0
("in 'spell': can't evaluate xpath \"{}\"",
&xpath.to_string()0
) )
?0
;
372
2.77k
                    let value_string = match 
value527
{
373
2.24k
                        Value::String(s) => s,
374
527
                        Value::Nodeset(nodes) if nodes.size() == 1 => {
375
527
                            let node = nodes.iter().next().unwrap();
376
527
                            if let Some(text) = node.text() {
377
527
                                text.text().to_string()
378
0
                            } else if let Some(el) = node.element() {
379
0
                                if crate::xpath_functions::is_leaf(el) {
380
0
                                    crate::canonicalize::as_text(el).to_string()
381
                                } else {
382
0
                                    bail!("in 'spell': value returned from xpath '{}' does not evaluate to a string",  &xpath.to_string());
383
                                }
384
                            } else {
385
0
                                bail!("in 'spell': value returned from xpath '{}' does not evaluate to a string, it is {} nodes",
386
0
                                        &xpath.to_string(), nodes.size());
387
                            }
388
                        },
389
0
                        _ => bail!("in 'spell': value returned from xpath '{}' does not evaluate to a string",  &xpath.to_string()),
390
                    };
391
                    // Chemistry wants to spell elements like "Na". But we also have the issue of capitalization (SpeechOverrides_CapitalLetters)
392
                    //   so the "N" need to use that. The logic for that is already in unicode.yaml. We could replicate that here.
393
                    // Rather than duplicate the logic (we would need to handle 'a', and who knows what in other languages),
394
                    //   we split the token into each letter and call the replacement on each letter.
395
                    // That in turns calls spell again. We end up in an infinite loop. To prevent this we set a flag that says don't recurse.
396
                    // The only structure to put that in is SpeechRulesWithContext. A bit of a hack to put it there, but better than a static var.
397
                    // Also, to avoid repeating the code for "cap" over and over, "spell" with "translate" is used. So keep going until no "translate"
398
2.77k
                    let xpath_str = xpath.to_string();
399
2.77k
                    if rules_with_context.inside_spell && 
!xpath_str.contains("translate")848
{
400
0
                        command.value = TTSCommandValue::String(value_string);
401
0
                        rules_with_context.translate_count  = 0;
402
2.77k
                    } else if rules_with_context.translate_count > MAX_TRANSLATE_RECURSION {
403
0
                        bail!("Rule error: potential infinite recursion found in translate: {}", xpath_str);
404
                    } else {
405
                        // let the call to replace call spell on the individual chars -- that lets an "cap" be outside "spell"
406
2.77k
                        rules_with_context.translate_count += 1;
407
2.77k
                        let str_with_spaces = value_string.chars()
408
2.94k
                                .
map2.77k
(|ch| {
409
2.94k
                                    rules_with_context.inside_spell = true;
410
2.94k
                                    let spelled_char = rules_with_context.replace_chars(ch.to_string().as_str(), mathml);
411
2.94k
                                    rules_with_context.inside_spell = false;
412
2.94k
                                    spelled_char
413
2.94k
                                })
414
2.77k
                                .collect::<Result<Vec<String>>>()
?0
415
2.77k
                                .join(" ");
416
2.77k
                        return Ok(str_with_spaces);
417
                    }             
418
                },
419
0
                _ => bail!("Implementation error: found non-xpath value for spell"),
420
            }
421
31.1k
        } else if command.command == TTSCommand::Rate && 
self != &TTS::None0
&&
422
0
                  let TTSCommandValue::Number(number_value) = command.value &&
423
0
                  number_value == RATE_FROM_CONTEXT {
424
                    // handle hack for $Rate -- need to look up in context
425
0
                    let rate_from_context = crate::navigate::context_get_variable(rules_with_context.get_context(), "MathRate", mathml)?.parse::<usize>().unwrap_or(100);
426
0
                    command.value = TTSCommandValue::Number(rate_from_context as f64);
427
31.1k
                }
428
429
        // evaluate any xpath value now to simplify later code
430
31.1k
        if let TTSCommandValue::XPath(
xpath1.31k
) = command.value {
431
1.31k
            let eval_str = xpath.replace::<String>(rules_with_context, mathml)
?0
;
432
            // can it be a number?
433
1.31k
            command.value = match eval_str.parse::<f64>() {
434
1.31k
                Ok(num) => TTSCommandValue::Number(num),
435
0
                Err(_) => TTSCommandValue::String(eval_str),
436
            }
437
29.8k
        };
438
439
440
        // small optimization to avoid generating tags that do nothing
441
31.1k
        if ((command.command == TTSCommand::Pitch || 
command.command == TTSCommand::Volume29.8k
||
command.command == TTSCommand::Pause29.8k
) &&
command.value.get_num() == 0.031.1k
) ||
442
29.8k
           (command.command == TTSCommand::Rate && 
command.value.get_num() == 100.00
) {
443
1.31k
            return command.replacements.replace::<String>(rules_with_context, mathml);
444
29.8k
        }
445
446
29.8k
        let mut result = String::with_capacity(255);
447
29.8k
        result += &match self {
448
29.8k
            TTS::None  => self.get_string_none(&command, prefs, true),
449
0
            TTS::SSML  => self.get_string_ssml(&command, prefs, true),
450
0
            TTS::SAPI5 => self.get_string_sapi5(&command, prefs, true),
451
        };
452
453
454
29.8k
        if !command.replacements.is_empty()  {
455
0
            if result.is_empty() {
456
0
                result += " ";
457
0
            }
458
            // need to sanitize string so that SSML is not injected into it via mtext, etc.
459
0
            let speech = command.replacements.replace::<String>(rules_with_context, mathml)?;  
460
0
            result += &encode_safe(&speech);
461
29.8k
        }
462
463
29.8k
        let end_tag = match self {
464
29.8k
            TTS::None  => self.get_string_none(&command, prefs, false),
465
0
            TTS::SSML  => self.get_string_ssml(&command, prefs, false),
466
0
            TTS::SAPI5 => self.get_string_sapi5(&command, prefs, false),
467
        };
468
469
29.8k
        if end_tag.is_empty() {
470
29.8k
            return Ok( result ); // avoids adding in " "
471
        } else {
472
0
            return Ok( result + &end_tag );
473
        }
474
475
476
0
        fn compute_bookmark_element<'c, 's:'c, 'm, 'r>(value: &TTSCommandValue, tag_and_attr: &str, rules_with_context: &'r mut SpeechRulesWithContext<'c, 's, 'm>, mathml: Element<'c>) -> Result<String> {
477
0
            match value {
478
0
                TTSCommandValue::XPath(xpath) => {
479
0
                    let id = xpath.replace::<String>(rules_with_context, mathml)?;
480
0
                    return Ok( format!("<{tag_and_attr}='{id}'/>") );
481
                },
482
0
                _ => bail!("Implementation error: found bookmark value that did not evaluate to a string"),
483
            }
484
0
        }
485
    
486
60.7k
    }
487
488
    // auto pausing can't be known until neighboring strings are computed
489
    // we create a unique string in this case and compute the real value later 
490
75.9k
    fn get_string_none(&self, command: &TTSCommandRule,  prefs: &PreferenceManager, is_start_tag: bool) -> String  {
491
        // they only thing to do is handle "pause" with some punctuation hacks along with 'spell'        
492
75.9k
        if is_start_tag {
493
46.1k
            if command.command == TTSCommand::Pause {
494
46.1k
                let amount = command.value.get_num();
495
                // only ',' and ';' are used as '.' didn't seem to reliably generate pauses in tests
496
46.1k
                return crate::speech::CONCAT_INDICATOR.to_string() + (
497
46.1k
                    if amount == PAUSE_AUTO {
498
19.5k
                        PAUSE_AUTO_STR
499
                    } else {
500
26.5k
                        let amount  =  amount * TTS::get_pause_multiplier(prefs);
501
26.5k
                        if amount <= MIN_PAUSE {
502
11.3k
                            ""
503
15.1k
                        } else if amount <= 250.0 {
504
9.93k
                            ","
505
                        } else  {
506
5.21k
                            ";"
507
                        }
508
                    }
509
                );
510
32
            } else if command.command == TTSCommand::Spell {
511
                // debug!("spell rule: {}", command.value.get_string());
512
0
                return command.value.get_string().to_string();
513
32
            } else if let TTSCommandValue::Pronounce(p) = &command.value {
514
32
                return crate::speech::CONCAT_INDICATOR.to_string() + &p.text;
515
0
            }
516
29.8k
        };
517
29.8k
        return "".to_string();
518
75.9k
    }
519
    
520
0
    fn get_string_sapi5(&self, command: &TTSCommandRule, prefs: &PreferenceManager, is_start_tag: bool) -> String  {
521
0
        return match &command.command {
522
0
            TTSCommand::Pause => if is_start_tag {
523
0
                let amount = command.value.get_num();
524
0
                if amount == PAUSE_AUTO {
525
0
                    PAUSE_AUTO_STR.to_string()
526
                } else {
527
0
                    let amount = amount * TTS::get_pause_multiplier(prefs);
528
0
                    if amount > MIN_PAUSE {
529
0
                        format!("<silence msec=='{}ms'/>", (amount * 180.0/prefs.get_rate()).round())
530
                    } else {
531
0
                        "".to_string()
532
                    }
533
                }
534
            } else {
535
0
                "".to_string()
536
            },
537
            // pitch must be in [-10, 10], logarithmic based on octaves
538
            // note MathPlayer uses 'absmiddle' (requires keeping a stack) -- could be 'middle' is not well supported
539
0
            TTSCommand::Pitch => if is_start_tag {format!("<pitch middle=\"{}\">", (24.0*(1.0+command.value.get_num()/100.0).log2()).round())} else {String::from("</prosody>")},
540
            // rate must be in [-10, 10], but we get relative %s. 300% => 10 (see comments at top of file)
541
0
            TTSCommand::Rate =>  if is_start_tag {format!("<rate speed='{:.1}'>", 10.0*(0.01*command.value.get_num()).log(3.0))} else {String::from("</rate>")},
542
0
            TTSCommand::Volume =>if is_start_tag {format!("<volume level='{}'>", command.value.get_num())} else {String::from("</volume>")},
543
0
            TTSCommand::Audio => "".to_string(),    // SAPI5 doesn't support audio
544
0
            TTSCommand::Gender =>if is_start_tag {format!("<voice required=\"Gender={}\">", command.value.get_string())} else {String::from("</prosody>")},
545
0
            TTSCommand::Voice =>if is_start_tag {format!("<voice required=\"Name={}\">", command.value.get_string())} else {String::from("</prosody>")},
546
0
            TTSCommand::Spell =>if is_start_tag {format!("<spell>{}", command.value.get_string())} else {String::from("</spell>")},
547
0
            TTSCommand::Pronounce =>if is_start_tag {
548
0
                    format!("<pron sym='{}'>{}", &command.value.get_pronounce().sapi5, &command.value.get_pronounce().text)
549
                } else {
550
0
                    String::from("</pron>")
551
                },
552
0
            TTSCommand::Bookmark => panic!("Internal error: bookmarks should have been handled earlier"),
553
        };
554
0
    }
555
556
0
    fn get_string_ssml(&self, command: &TTSCommandRule, prefs: &PreferenceManager, is_start_tag: bool) -> String  {
557
0
        return match &command.command {
558
            TTSCommand::Pause => {
559
0
                if is_start_tag {
560
0
                    let amount = command.value.get_num();
561
0
                    if amount == PAUSE_AUTO {
562
0
                        PAUSE_AUTO_STR.to_string()
563
                    } else {
564
0
                        let amount = amount * TTS::get_pause_multiplier(prefs);
565
0
                        if amount > MIN_PAUSE {
566
0
                            format!("<break time='{}ms'/>", (amount * 180.0/prefs.get_rate()).round())
567
                        } else {
568
0
                            "".to_string()
569
                        }
570
                    }
571
                } else {
572
0
                    "".to_string()
573
                }
574
            },
575
0
            TTSCommand::Pitch => if is_start_tag {format!("<prosody pitch='{}%'>", command.value.get_num())} else {String::from("</prosody>")},
576
0
            TTSCommand::Rate =>  if is_start_tag {format!("<prosody rate='{}%'>", command.value.get_num())} else {String::from("</prosody>")},
577
0
            TTSCommand::Volume =>if is_start_tag {format!("<prosody volume='{}db'>", command.value.get_num())} else {String::from("</prosody>")},
578
0
            TTSCommand::Audio =>if is_start_tag {format!("<audio src='{}'>", command.value.get_string())} else {String::from("</audio>")}, // only 'beep' is supported for now
579
0
            TTSCommand::Gender =>if is_start_tag {format!("<voice required='gender=\"{}\"'>", command.value.get_string())} else {String::from("</voice>")},
580
0
            TTSCommand::Voice =>if is_start_tag {format!("<voice required='{}'>", command.value.get_string())} else {String::from("</voice>")},
581
0
            TTSCommand::Spell =>if is_start_tag {format!("<say-as interpret-as='characters'>{}", command.value.get_string())} else {String::from("</say-as>")},
582
0
            TTSCommand::Pronounce =>if is_start_tag {
583
0
                format!("<phoneme alphabet='ipa' ph='{}'>{}", &command.value.get_pronounce().ipa, &command.value.get_pronounce().text)
584
            } else {
585
0
                String::from("</phoneme>")
586
            },
587
0
        TTSCommand::Bookmark => panic!("Internal error: bookmarks should have been handled earlier"),
588
        }
589
0
    }
590
591
26.5k
    fn get_pause_multiplier(prefs: &PreferenceManager) -> f64 {
592
26.5k
        return prefs.pref_to_string("PauseFactor").parse::<f64>().unwrap_or(100.)/100.0;
593
26.5k
    }
594
595
    /// Compute the length of the pause to use.
596
    ///
597
    /// The computation is based on the length of the speech strings (after removing tagging).
598
    /// There is a bias towards pausing more _after_ longer strings.
599
19.5k
    pub fn compute_auto_pause(&self, prefs: &PreferenceManager, before: &str, after: &str) -> String {
600
0
        static REMOVE_XML: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"<.+?>").unwrap()); // punctuation ending with a '.'
601
        let before_len;
602
        let after_len;
603
19.5k
        match self {
604
0
            TTS::SSML | TTS::SAPI5 => {
605
0
                before_len = REMOVE_XML.replace_all(before, "").len();
606
0
                after_len = REMOVE_XML.replace_all(after, "").len();
607
0
            },
608
19.5k
            _ => {
609
19.5k
                before_len = before.len();
610
19.5k
                after_len = after.len();
611
19.5k
            },
612
        }
613
614
        // pause values are not cut in stone
615
        // the calculation bias to 'previous' is based on MathPlayer which used '30 * #-of-descendants-on-left
616
        // I think I did this as a sort of "take a breath" after saying something long although one might want to do that
617
        //   before speaking something long.
618
19.5k
        if after_len < 3 {
619
            // hack to prevent pausing before "of" in exprs like "the fourth power of secant, of x"
620
            // if it should pause anywhere, it should be after the "of"
621
3.31k
            return "".to_string(); 
622
16.2k
        }
623
16.2k
        let pause = std::cmp::min(3000, ((2 * before_len + after_len)/48) * 128);
624
        // create a TTSCommandRule so we reuse code
625
16.2k
        let command = TTSCommandRule::new(
626
16.2k
            TTSCommand::Pause,
627
16.2k
            TTSCommandValue::Number(pause as f64),
628
16.2k
            ReplacementArray::build_empty(),
629
        );
630
16.2k
        return match self {
631
16.2k
            TTS::None  => self.get_string_none(&command, prefs, true),
632
0
            TTS::SSML  => self.get_string_ssml(&command, prefs, true),
633
0
            TTS::SAPI5 => self.get_string_sapi5(&command, prefs, true),
634
        };
635
636
19.5k
    }
637
638
    /// Take the longest of the pauses
639
    ///
640
    /// Two other options are:
641
    /// 1. average the pauses
642
    /// 2. add the pauses together.
643
    ///
644
    /// Until evidence points otherwise, use 'longest'.
645
5.10k
    pub fn merge_pauses(&self, str: &str) -> String {
646
        // we need specialized merges for each TTS engine because we need to know the format of the commands
647
5.10k
        return match self {
648
5.10k
            TTS::None  => self.merge_pauses_none(str),
649
1
            TTS::SSML  => self.merge_pauses_ssml(str),
650
1
            TTS::SAPI5 => self.merge_pauses_sapi5(str),
651
        };        
652
5.10k
    }
653
654
5.10k
    fn merge_pauses_none(&self, str: &str) -> String {
655
        // punctuation used for pauses is ",", ";" 
656
2
        static SPACES: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\s+([;,])").unwrap()); // two or more pauses
657
2
        static MULTIPLE_PAUSES: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"([,;][,;]+)").unwrap()); // two or more pauses
658
        // we reduce all sequences of two or more pauses to a single medium pause
659
5.10k
        let merges_string = SPACES.replace_all(str, "$1").to_string();
660
5.10k
        let merges_string = MULTIPLE_PAUSES.replace_all(&merges_string, ";").to_string();
661
5.10k
        return merges_string;
662
5.10k
    }
663
664
2
    fn merge_pauses_xml<F>(str: &str, full_attr_re: &Regex, sub_attr_re: &Regex, replace_with: F) -> String 
665
2
            where F: Fn(usize) -> String {
666
        // we reduce all sequences of two or more pauses to the max pause amount
667
        // other options would be the sum or an average
668
        // maybe some amount a little longer than the max would be best???
669
2
        let mut merges_string = str.to_string();
670
2
        for cap in full_attr_re.captures_iter(str) {
671
2
            let mut amount = 0;
672
4
            for c in 
sub_attr_re2
.
captures_iter2
(
&cap[0]2
) {
673
4
                amount = std::cmp::max(amount, c[1].parse::<usize>().unwrap());
674
4
            };
675
2
            merges_string = merges_string.replace(&cap[0], &replace_with(amount));
676
        }
677
2
        return merges_string;
678
2
    }
679
680
1
    fn merge_pauses_sapi5(&self, str: &str) -> String {
681
1
        static CONSECUTIVE_BREAKS: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(<silence msec[^>]+?> *){2,}").unwrap()); // two or more pauses
682
1
        static PAUSE_AMOUNT: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"msec=.*?(\d+)").unwrap()); // amount after 'time'
683
1
        let replacement = |amount: usize| format!("<silence msec=='{amount}ms'/>");
684
1
        return TTS::merge_pauses_xml(str, &CONSECUTIVE_BREAKS, &PAUSE_AMOUNT, replacement);
685
1
    }
686
687
1
    fn merge_pauses_ssml(&self, str: &str) -> String {
688
1
        static CONSECUTIVE_BREAKS: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(<break time=[^>]+?> *){2,}").unwrap()); // two or more pauses
689
1
        static PAUSE_AMOUNT: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"time=.*?(\d+)").unwrap()); // amount after 'time'
690
1
        let replacement = |amount: usize| format!("<break time='{amount}ms'/>");
691
1
        return TTS::merge_pauses_xml(str, &CONSECUTIVE_BREAKS, &PAUSE_AMOUNT, replacement);
692
1
    }
693
}
694
695
#[cfg(test)]
696
mod tests {
697
    use super::*;
698
    use yaml_rust::YamlLoader;
699
700
    #[test]
701
    /// Verifies pronounce YAML builds and renders all supported fields.
702
1
    fn pronounce_build_and_display() {
703
1
        let yaml = YamlLoader::load_from_str(
704
1
            r#"
705
1
- text: "alpha"
706
1
- ipa: "a"
707
1
- sapi5: "b"
708
1
- eloquence: "c"
709
1
"#,
710
        )
711
1
        .unwrap();
712
1
        let values = &yaml[0];
713
1
        let rule = TTS::build("pronounce", values).unwrap();
714
1
        let rendered = format!("{rule}");
715
716
1
        assert!(rendered.contains("text: 'alpha'"));
717
1
        assert!(rendered.contains("ipa: 'a'"));
718
1
        assert!(rendered.contains("sapi5: 'b'"));
719
1
        assert!(rendered.contains("eloquence: 'c'"));
720
1
    }
721
722
    #[test]
723
    /// Ensures pronounce requires a text entry and rejects missing text.
724
1
    fn pronounce_requires_text() {
725
1
        let yaml = YamlLoader::load_from_str(
726
1
            r#"
727
1
- ipa: "a"
728
1
"#,
729
        )
730
1
        .unwrap();
731
1
        let values = &yaml[0];
732
1
        let err = TTS::build("pronounce", values).unwrap_err();
733
1
        assert!(err.to_string().contains("'text' key/value is required"));
734
1
    }
735
736
    #[test]
737
    /// Coalesces adjacent punctuation pauses for the None engine.
738
1
    fn merge_pauses_none_coalesces() {
739
1
        let input = "a,,;b";
740
1
        let output = TTS::None.merge_pauses(input);
741
1
        assert!(!output.contains(",,"));
742
1
        assert!(output.contains(";"));
743
1
    }
744
745
    #[test]
746
    /// Uses the maximum pause when merging consecutive SSML breaks.
747
1
    fn merge_pauses_ssml_keeps_max() {
748
1
        let input = "<break time='100ms'/><break time='300ms'/>";
749
1
        let output = TTS::SSML.merge_pauses(input);
750
1
        assert!(!output.contains("100ms"));
751
1
        assert!(output.contains("300ms"));
752
1
    }
753
754
    #[test]
755
    /// Uses the maximum pause when merging consecutive SAPI5 breaks.
756
1
    fn merge_pauses_sapi5_keeps_max() {
757
1
        let input = "<silence msec=='100ms'/><silence msec=='300ms'/>";
758
1
        let output = TTS::SAPI5.merge_pauses(input);
759
1
        assert!(!output.contains("100ms"));
760
1
        assert!(output.contains("300ms"));
761
1
    }
762
}
\ No newline at end of file diff --git a/docs/llvm-cov/html/coverage/home/runner/work/MathCAT/MathCAT/src/xpath_functions.rs.html b/docs/llvm-cov/html/coverage/home/runner/work/MathCAT/MathCAT/src/xpath_functions.rs.html index f7172e75..58f85f6c 100644 --- a/docs/llvm-cov/html/coverage/home/runner/work/MathCAT/MathCAT/src/xpath_functions.rs.html +++ b/docs/llvm-cov/html/coverage/home/runner/work/MathCAT/MathCAT/src/xpath_functions.rs.html @@ -1 +1 @@ -

Coverage Report

Created: 2026-04-30 05:51

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/home/runner/work/MathCAT/MathCAT/src/xpath_functions.rs
Line
Count
Source
1
#![allow(clippy::needless_return)]
2
//! XPath underlies rule matching and speech generation. The version of xpath used is based on xpath 1.0
3
//! and includes the ability to define functions and variables.
4
//! The variables defined are all the preferences and also variables set in speech rules via the `variables` keyword.
5
//! The function defined here are:
6
//! * `IsNode(node, kind)`:  returns true if the node matches the "kind".
7
//!   Valid values are "leaf", "2D", "simple", "common_fraction", "trig_name".
8
//! * `ToOrdinal(number, fractional, plural)`: converts the number to an ordinal (e.g, third)
9
//!   * `number` -- the number to translate
10
//!   * `fractional` -- true if this is a fractional ordinal (e.g, "half")
11
//!   * `plural` -- true if answer should be plural
12
//! * `ToCommonFraction(mfrac)` -- converts the fraction to an ordinal version (e.g, 2 thirds)
13
//! * `IsLargeOp(node)` -- returns true if the node is a large operator (e.g, integral or sum)
14
//! * `IsBracketed(node, left, right, requires_comma)` -- returns true if the first/last element in the mrow match `left`/`right`.
15
//!   If the optional `requires_comma` argument is given and is `true`, then there also must be a "," in the mrow (e.g., "f(x,y)")
16
//! * `DEBUG(xpath)` -- _Very_ useful function for debugging speech rules.
17
//!   This can be used to surround a whole or part of an xpath expression in a match or output.
18
//!   The result will be printed to standard output and the result returned so that `DEBUG` does not affect the computation.    
19
20
use sxd_document::dom::{Element, ChildOfElement};
21
use sxd_xpath::{Value, Context, context, function::*, nodeset::*};
22
use crate::definitions::{Definitions, SPEECH_DEFINITIONS, BRAILLE_DEFINITIONS};
23
use regex::Regex;
24
use crate::pretty_print::mml_to_string;
25
use std::cell::{Ref, RefCell};
26
use log::{debug, error, warn};
27
use std::sync::LazyLock;
28
use std::thread::LocalKey;
29
use phf::phf_set;
30
use sxd_xpath::function::Error as XPathError;
31
use crate::canonicalize::{as_element, name, get_parent, MATHML_FROM_NAME_ATTR};
32
33
// useful utility functions
34
// note: child of an element is a ChildOfElement, so sometimes it is useful to have parallel functions,
35
//   one for Element and one for ChildOfElement.
36
37
// @returns {String} -- the text of the (leaf) element otherwise an empty string
38
126k
fn get_text_from_element(e: Element) -> String {
39
126k
    if e.children().len() == 1 &&
40
103k
       let ChildOfElement::Text(
t102k
) = e.children()[0] {
41
102k
            return t.text().to_string();
42
23.4k
        }
43
23.4k
    return "".to_string();
44
126k
}
45
46
#[allow(non_snake_case)]
47
// Same as 'is_tag', but for ChildOfElement
48
110k
fn get_text_from_COE(coe: &ChildOfElement) -> String {
49
110k
    let element = coe.element();
50
110k
    return match element {
51
110k
        Some(e) => get_text_from_element(e),
52
0
        None => "".to_string(),
53
    };
54
110k
}
55
56
// make sure that there is only one node in the NodeSet
57
// Returns the node or an Error
58
147k
pub fn validate_one_node<'n>(nodes: Nodeset<'n>, func_name: &str) -> Result<Node<'n>, Error> {
59
147k
    if nodes.size() == 0 {
60
0
        return Err(Error::Other(format!("Missing argument for {func_name}")));
61
147k
    } else if nodes.size() > 1 {
62
0
        return Err( Error::Other(format!("{} arguments for {}; expected 1 argument", nodes.size(), func_name)) );
63
147k
    }
64
147k
    return Ok( nodes.iter().next().unwrap() );
65
147k
}
66
67
// Return true if the element's name is 'name'
68
157k
fn is_tag(e: Element, name: &str) -> bool {
69
    // need to check name before the fallback of where the name came from
70
157k
    return e.name().local_part() == name || 
e47.8k
.attribute_value(MATHML_FROM_NAME_ATTR).unwrap_or_default() == name;
71
157k
}
72
73
#[allow(non_snake_case)]
74
// Same as 'is_tag', but for ChildOfElement
75
1.40k
fn is_COE_tag(coe: ChildOfElement, name: &str) -> bool {
76
1.40k
    let element = coe.element();
77
1.40k
    return element.is_some() && is_tag(element.unwrap(), name)
78
1.40k
}
79
80
/// Should be an internal structure for implementation of the IsNode, but it was useful in one place in a separate module.
81
/// This should probably be restructured slightly.
82
pub struct IsNode;
83
84
impl IsNode {
85
    /// implements ClearSpeak's definition of "simple"
86
    /// this is fairly detailed, so we define a few local functions (at end) to help out
87
    /// Also, it doesn't help that the structure is a bit complicated Elements->ChildOfElement->Element/Text
88
7.44k
    pub fn is_simple(elem: Element) -> bool {
89
7.44k
        if is_trivially_simple(elem) {
90
3.63k
            return true;
91
3.81k
        }
92
93
3.81k
        if is_negative_of_trivially_simple(elem) {
94
            // -3 or -x
95
41
            return true;
96
3.76k
        }
97
98
3.76k
        if !is_tag(elem, "mrow") || 
elem.children()867
.
is_empty867
() {
99
2.90k
            return false;
100
867
        }
101
102
        // x y or -x or -3 x or -x y or -3 x y or x° or n° or -x° or -n°
103
        #[allow(clippy::if_same_then_else)]
104
867
        if is_times_mi(elem) {
105
42
            return true;    // x y
106
825
        } else if is_degrees(elem) {
107
0
            return true;    // x° or n°
108
825
        } else if is_function(elem) {
109
44
            return true;
110
781
        }
111
112
781
        return false;
113
114
115
        // returns the element's text value
116
5.72k
        fn to_str(e: Element<'_>) -> &str {
117
            // typically usage assumes 'e' is a leaf
118
            // bad MathML is the following isn't true
119
5.72k
            if e.children().len() == 1 {
120
5.72k
                let text_node = e.children()[0];
121
5.72k
                if let Some(t) = text_node.text() {
122
5.72k
                    return t.text();
123
0
                }
124
0
            }               
125
0
            return "";
126
5.72k
        }
127
128
        // same as 'to_str' but for ChildOfElement
129
1.01k
        fn coe_to_str(coe: ChildOfElement<'_>) -> &str {
130
            // typically usage assumes 'coe' is a leaf
131
1.01k
            let element_node = coe.element();
132
1.01k
            if let Some(e) = element_node {
133
                // bad MathML is the following isn't true
134
1.01k
                if e.children().len() == 1 {
135
1.01k
                    let text_node = e.children()[0];
136
1.01k
                    if let Some(t) = text_node.text() {
137
1.01k
                        return t.text();
138
0
                    }
139
8
                }
140
0
            }               
141
8
            return "";
142
1.01k
        }
143
144
        // returns true if the string is just a single *char* (which can be multiple bytes)
145
5.72k
        fn is_single_char(str: &str) -> bool {
146
5.72k
            let mut chars =  str.chars();
147
5.72k
            return chars.next().is_some() && chars.next().is_none();
148
5.72k
        }
149
150
        // checks the single element to see if it is simple (mn, mi that is a single char, common fraction)
151
8.33k
        fn is_trivially_simple(elem: Element) -> bool {
152
8.33k
            if is_tag(elem, "mn")  {
153
914
                return true;
154
7.42k
            }
155
7.42k
            if is_tag(elem, "mi") && 
is_single_char5.72k
(
to_str(elem)5.72k
) {
156
                // "simple" only if it is a single char (which can be multiple bytes)
157
3.14k
                return true;
158
4.27k
            }
159
160
            // FIX: need to consult preference Fraction_Ordinal
161
4.27k
            if IsNode::is_common_fraction(elem, 10, 19) {
162
66
                return true;
163
4.21k
            }
164
4.21k
            return false;
165
8.33k
        }
166
167
        // true if the negative of a single element that is simple
168
4.20k
        fn is_negative_of_trivially_simple(elem: Element) -> bool {
169
4.20k
            if is_tag(elem, "mrow") && 
elem.children().len() == 2933
{
170
38
                let children = elem.children();
171
                // better be negative of something at this point...
172
38
                if is_COE_tag(children[0], "mo") && 
is_equal11
(
children[0]11
, '-') &&
173
6
                   children[1].element().is_some() && is_trivially_simple(children[1].element().unwrap()) {
174
6
                    return true;
175
32
                }
176
4.16k
            }
177
4.20k
            if is_tag(elem, "minus") && 
elem.children().len() == 154
{
178
54
                let child = elem.children()[0];
179
54
                if let Some(e) = child.element() {
180
54
                    return is_trivially_simple(e);
181
0
                }
182
4.14k
            }
183
184
4.14k
            return false;
185
4.20k
        }
186
187
        // return true if ChildOfElement has exactly text 'ch'
188
967
        fn is_equal(coe: ChildOfElement, ch: char) -> bool {
189
967
            return coe_to_str(coe).starts_with(ch);
190
967
        }
191
192
        // true if mrow(xxx, &it;, mi) or mrow(xxx, &it; mi, &it;, mi) where mi's have len==1
193
867
        fn is_times_mi(mrow: Element) -> bool {
194
867
            assert!( is_tag(mrow, "mrow") );
195
867
            let children = mrow.children();
196
867
            if !(children.len() == 3 || 
children.len() == 541
) {
197
34
                return false;
198
833
            }
199
833
            if children[0].element().is_none() {
200
0
                return false;
201
833
            }
202
203
833
            let first_child = children[0].element().unwrap();
204
833
            if !is_trivially_simple(first_child) {
205
396
                if !is_negative_of_trivially_simple(first_child) {
206
382
                    return false;
207
14
                }
208
14
                if children.len() == 5 && 
209
2
                   ( (name(first_child) == "minus" && 
first_child.children().len() == 10
&&
!0
is_COE_tag0
(first_child.children()[0], "mn")) ||
210
2
                     (name(first_child) == "mrow"  && !is_COE_tag(first_child.children()[1], "mn")) ) {
211
1
                    return false;      // '-x y z' is too complicated () -- -2 x y is ok
212
13
                }
213
437
            }
214
215
450
            if !(is_COE_tag(children[1], "mo") && 
216
450
                    is_equal(children[1], '\u{2062}') &&
217
63
                 is_COE_tag(children[2], "mi") &&
218
51
                    coe_to_str(children[2]).len()==1 ) {
219
408
                return false;
220
42
            }
221
222
42
            if children.len() == 3 {
223
41
                return true;
224
1
            }
225
226
            // len == 5
227
1
            return  is_COE_tag(children[3], "mo") && 
228
1
                        is_equal(children[3], '\u{2062}') &&       // invisible times
229
1
                    is_COE_tag(children[4], "mi") &&
230
1
                        coe_to_str(children[4]).len()==1 ;
231
867
        }
232
233
        // return true if the mrow is var° or num°
234
825
        fn is_degrees(mrow: Element) -> bool {
235
825
            assert!( is_tag(mrow, "mrow") );
236
825
            let children = mrow.children();
237
825
            return children.len() == 2 &&
238
32
                is_equal(children[1], '°') &&
239
0
                (is_COE_tag(children[0], "mi") ||
240
0
                 is_COE_tag(children[0], "mn") );
241
825
        }
242
243
        // fn_name &af; [simple arg or (simple arg)]
244
825
        fn is_function(mrow: Element) -> bool {
245
825
            assert!( is_tag(mrow, "mrow") );
246
825
            let children = mrow.children();
247
825
            if children.len() != 3 {
248
40
                return false;
249
785
            }
250
785
            if !(is_COE_tag(children[1], "mo") && 
251
473
                 is_equal(children[1], '\u{2061}') ) {    // invisible function application
252
717
                return false;
253
68
            }
254
68
            if !is_COE_tag(children[0], "mi") {
255
0
                return false;
256
68
            }
257
68
            let function_arg = children[2].element().unwrap();
258
68
            if IsBracketed::is_bracketed(function_arg, "(", ")", false, false) {
259
60
                return IsNode::is_simple(function_arg.children()[1].element().unwrap());
260
            } else {
261
8
                return IsNode::is_simple(function_arg);
262
            }
263
825
        }
264
7.44k
    }
265
266
    // Returns true if 'frac' is a common fraction
267
    // In this case, the numerator and denominator can be no larger than 'num_limit' and 'denom_limit'
268
4.31k
    fn is_common_fraction(frac: Element, num_limit: usize, denom_limit: usize) -> bool {
269
2
        static ALL_DIGITS: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\d+").unwrap()); // match one or more digits
270
271
4.31k
        if !is_tag(frac, "mfrac") &&  
!4.12k
is_tag4.12k
(frac, "fraction"){
272
4.12k
            return false;
273
188
        }
274
188
        let children = frac.children();
275
188
        if children.len() != 2 {
276
0
            return false;
277
188
        }
278
279
188
        let num = children[0].element();
280
188
        let denom = children[1].element();
281
188
        if num.is_none() || denom.is_none() {
282
0
            return false;
283
188
        };
284
285
188
        let num = num.unwrap();
286
188
        let denom = denom.unwrap();
287
188
        if !is_tag(num, "mn") || 
!115
is_tag115
(denom, "mn") {
288
87
            return false
289
101
        };
290
291
101
        let num = get_text_from_element(num);
292
101
        let denom = get_text_from_element(denom);
293
101
        if num.is_empty() || denom.is_empty() {
294
0
            return false;
295
101
        }
296
297
101
        return ALL_DIGITS.is_match(&num)   && is_small_enough(&num, num_limit) &&
298
100
               ALL_DIGITS.is_match(&denom) && is_small_enough(&denom, denom_limit);
299
300
201
        fn is_small_enough(val: &str, upper_bound: usize) -> bool {
301
201
            return if let Ok(value) = val.parse::<usize>() { value <= upper_bound } else { 
false0
};
302
201
        }
303
4.31k
    }
304
305
14.2k
    pub fn is_mathml(elem: Element) -> bool {
306
        // doesn't check MATHML_FROM_NAME_ATTR because we are interested in if it is an intent.
307
14.2k
        return ALL_MATHML_ELEMENTS.contains(name(elem));
308
14.2k
    }
309
310
    #[allow(non_snake_case)]
311
14.3k
    pub fn is_2D(elem: Element) -> bool {
312
14.3k
        return MATHML_2D_NODES.contains(elem.attribute_value(MATHML_FROM_NAME_ATTR).unwrap_or(name(elem)));
313
14.3k
    }
314
315
37.8k
    pub fn is_scripted(elem: Element) -> bool {
316
37.8k
        return MATHML_SCRIPTED_NODES.contains(elem.attribute_value(MATHML_FROM_NAME_ATTR).unwrap_or(name(elem)));
317
37.8k
    }
318
319
138k
    pub fn is_modified(elem: Element) -> bool {
320
138k
        return MATHML_MODIFIED_NODES.contains(elem.attribute_value(MATHML_FROM_NAME_ATTR).unwrap_or(name(elem)));
321
138k
    }
322
    }
323
324
/// All MathML elements, including a few that get cleaned away
325
/// "semantics", "annotation-xml", "annotation" and Content MathML are not included
326
static ALL_MATHML_ELEMENTS: phf::Set<&str> = phf_set!{
327
    "mi", "mo", "mn", "mtext", "ms", "mspace", "mglyph",
328
    "mfrac", "mroot", "msub", "msup", "msubsup","munder", "mover", "munderover", "mmultiscripts",
329
    "mstack", "mlongdiv", "msgroup", "msrow", "mscarries", "mscarry", "msline",
330
    "none", "mprescripts", "malignmark", "maligngroup",
331
    "math", "msqrt", "merror", "mpadded", "mphantom", "menclose", "mtd", "mstyle",
332
    "mrow", "a", "mfenced", "mtable", "mtr", "mlabeledtr",
333
};
334
335
static MATHML_LEAF_NODES: phf::Set<&str> = phf_set! {
336
  "mi", "mo", "mn", "mtext", "ms", "mspace", "mglyph",
337
    "none", "annotation", "ci", "cn", "csymbol",    // content could be inside an annotation-xml (faster to allow here than to check lots of places)
338
};
339
340
341
// Should mstack and mlongdiv be included here?
342
static MATHML_2D_NODES: phf::Set<&str> = phf_set! {
343
    "mfrac", "msqrt", "mroot", "menclose",
344
    "msub", "msup", "msubsup", "munder", "mover", "munderover", "mmultiscripts",
345
    "mtable", "mtr", "mlabeledtr", "mtd",
346
};
347
348
// Should mstack and mlongdiv be included here?
349
static MATHML_MODIFIED_NODES: phf::Set<&str> = phf_set! {
350
    "msub", "msup", "msubsup", "munder", "mover", "munderover", "mmultiscripts",
351
};
352
353
// Should mstack and mlongdiv be included here?
354
static MATHML_SCRIPTED_NODES: phf::Set<&str> = phf_set! {
355
    "msub", "msup", "msubsup", "mmultiscripts",
356
};
357
358
1.07M
pub fn is_leaf(element: Element) -> bool {
359
1.07M
    return MATHML_LEAF_NODES.contains(name(element));
360
1.07M
}
361
362
impl Function for IsNode {
363
    // eval function for IsNode
364
    // errors happen for wrong number/kind of arg
365
5.12k
    fn evaluate<'d>(&self,
366
5.12k
                        _context: &context::Evaluation<'_, 'd>,
367
5.12k
                        args: Vec<Value<'d>>)
368
5.12k
                        -> Result<Value<'d>, Error>
369
    {
370
371
5.12k
        let mut args = Args(args);
372
5.12k
        args.exactly(2)
?0
;
373
5.12k
        let kind = args.pop_string()
?0
;
374
        // FIX: there is some conflict problem with xpath errors and error-chain
375
        //                .chain_err(|e| format!("Second arg to is_leaf is not a string: {}", e.to_string()))?;
376
5.12k
        match kind.as_str() {
377
5.12k
            "simple" | 
"leaf"3.09k
|
"common_fraction"849
|
"2D"849
|
"modified"162
|
"scripted"140
|
"mathml"49
=> (),
378
0
            _ => return Err( Error::Other(format!("Unknown argument value '{}' for IsNode",  kind.as_str())) ),
379
        };
380
381
5.12k
        let nodes = args.pop_nodeset()
?0
;
382
5.12k
        if nodes.size() == 0 {
383
0
            return Ok (Value::Boolean(false));  // like xpath, don't make this an error
384
5.12k
        };
385
        return Ok(
386
            Value::Boolean( 
387
5.12k
                nodes.iter()
388
5.12k
                    .all(|node|
389
5.39k
                        if let Node::Element(e) = node {
390
5.39k
                            match kind.as_str() {
391
5.39k
                                "simple" => 
IsNode::is_simple2.29k
(
e2.29k
),
392
3.09k
                                "leaf"   => 
is_leaf_any_name2.25k
(
e2.25k
),
393
849
                                "2D" => 
IsNode::is_2D687
(
e687
),
394
162
                                "modified" => 
IsNode::is_modified22
(
e22
),
395
140
                                "scripted" => 
IsNode::is_scripted91
(
e91
),
396
49
                                "mathml" => IsNode::is_mathml(e),
397
0
                                "common_fraction" => IsNode::is_common_fraction(e, usize::MAX, usize::MAX), 
398
0
                                _        => true,       // can't happen due to check above
399
                            }    
400
                        } else {
401
                            // xpath is something besides an element, so no match
402
0
                            false
403
5.39k
                        }
404
                    )
405
            )
406
        );
407
408
2.25k
        fn is_leaf_any_name(e: Element) -> bool {
409
2.25k
            let children = e.children();
410
2.25k
            if children.is_empty() {
411
0
                return true;
412
2.25k
            } else if children.len() == 1 &&
413
1.24k
                      let ChildOfElement::Text(_) = children[0] {
414
1.17k
                    return true;
415
1.07k
                }
416
1.07k
            return false
417
2.25k
        }
418
5.12k
    }
419
}
420
421
struct ToOrdinal;
422
impl ToOrdinal {
423
    // ordinals often have an irregular start (e.g., "half") before becoming regular.
424
    // if the number is irregular, return the ordinal form, otherwise return 'None'.
425
353
    fn compute_irregular_fractional_speech(number: &str, plural: bool) -> Option<String> {
426
353
        SPEECH_DEFINITIONS.with(|definitions| {
427
353
            let definitions = definitions.borrow();
428
353
            let words = if plural {
429
208
                definitions.get_vec("NumbersOrdinalFractionalPluralOnes")
?0
430
            } else {
431
145
                definitions.get_vec("NumbersOrdinalFractionalOnes")
?0
432
            };
433
353
            let number_as_int: usize = number.parse().unwrap(); // already verified it is only digits
434
353
            if number_as_int < words.len() {
435
                // use the words associated with this irregular pattern.
436
291
                return Some( words[number_as_int].clone() );
437
62
            };
438
62
            return None;
439
353
        })
440
353
    }
441
442
    /**
443
     * Translates a number of up to twelve digits into a string representation.
444
     *   number -- the number to translate
445
     *   fractional -- true if this is a fractional ordinal (e.g, "half")
446
     *   plural -- true if answer should be plural
447
     * Returns the string representation of that number or an error message
448
     */
449
416
    fn convert(number: &str, fractional: bool, plural: bool) -> Option<String> {
450
2
        static NO_DIGIT: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"[^\d]").unwrap()); // match anything except a digit
451
416
        return SPEECH_DEFINITIONS.with(|definitions| {
452
416
            let definitions = definitions.borrow();
453
416
            let numbers_large = definitions.get_vec("NumbersLarge")
?0
;
454
455
416
            let pref_manager = crate::prefs::PreferenceManager::get();
456
416
            let pref_manager = pref_manager.borrow();
457
416
            let block_separators = pref_manager.pref_to_string("BlockSeparators");
458
416
            let decimal_separator = pref_manager.pref_to_string("DecimalSeparators");
459
460
            // check number validity (has digits, not a decimal)
461
416
            if number.is_empty() ||  number.contains(&decimal_separator) {
462
0
                return Some(String::from(number));
463
416
            }
464
            // remove any block separators
465
416
            let number = match clean_number(number, &block_separators) {
466
0
                None => return Some(String::from(number)),
467
416
                Some(num) => num,
468
            };
469
    
470
            // check to see if the number is too big or is not an integer or has non-digits
471
416
            if number.len() > 3*numbers_large.len() {
472
0
                return Some(number);
473
416
            }
474
416
            if NO_DIGIT.is_match(&number) {
475
                // this shouldn't have been part of an mn, so likely an error. Log a warning
476
                // FIX: log a warning that a non-number was passed to convert()
477
0
                return Some(number);
478
416
            }
479
480
            // first deal with the abnormalities of fractional ordinals (one half, etc). That simplifies what remains
481
416
            if fractional &&
482
353
               let Some(
string291
) = ToOrdinal::compute_irregular_fractional_speech(&number, plural) {
483
291
                    return Some(string);
484
125
                }
485
486
            // at this point, we only need to worry about singular/plural distinction
487
488
            // break into groups of three digits and add 10^3 word (thousands, millions, ...) after each chunk
489
            // FIX: add a pause between groups of three -- need to use TTS-specific pause
490
491
            // handle special case of trailing zeros
492
            // num_thousands_at_end represents the amount to shift NumbersLarge... (e.g., millions->thousands)
493
243
            let 
num_thousands_at_end125
= match
number125
.
rfind125
(|ch| ch > '0') { // last non-0 on right
494
122
                Some(n) => (number.len() - 1 - n) / 3 ,
495
3
                None => 0
496
            };
497
125
            let (number,_) = number.split_at(number.len() - 3 * num_thousands_at_end); // drop the 0s
498
499
            // everything is simplified if we add zeros at the start so that block size is a factor of 3
500
125
            let number = match number.len() % 3 {
501
18
                0 => "".to_string() + number,
502
69
                1 => "00".to_string() + number,
503
38
                _ => "0".to_string() + number, // can only be "2" -- compiler doesn't know there aren't other options
504
            };
505
506
            // At this point we have at least three "digits", and length is a multiple of 3
507
            // We have already verified that there are only ASCII digits, so we can subtract '0' to get an index
508
            const ASCII_0: usize = 48;
509
125
            let digits = number.as_bytes()
510
125
                        .iter()
511
411
                        .
map125
(|&byte| byte as usize - ASCII_0)
512
125
                        .collect::<Vec<usize>>();
513
514
125
            let mut answer = String::with_capacity(255);  // reasonable max most of the time
515
125
            let large_words = numbers_large;
516
125
            if digits.len() > 3 { 
517
                // speak this first groups as cardinal numbers
518
7
                let words = [
519
7
                    definitions.get_vec("NumbersHundreds")
?0
,
520
7
                    definitions.get_vec("NumbersTens")
?0
,
521
7
                    definitions.get_vec("NumbersOnes")
?0
,
522
                ];
523
7
                answer = digits[0..digits.len()-3]
524
7
                            .chunks(3)
525
7
                            .enumerate()
526
12
                            .
map7
(|(i, chunk)| {
527
12
                                if chunk[0] != 0 || 
chunk[1] != 08
||
chunk[2] != 08
{
528
7
                                    Some(ToOrdinal::hundreds_to_words(chunk, &words)
?0
+ " " +
529
7
                                        &large_words[num_thousands_at_end + digits.len()/3 - 1 - i] + " ")
530
                                } else {
531
5
                                    Some("".to_string())
532
                                }
533
12
                            })
534
7
                            .collect::<Option<Vec<String>>>()
?0
535
7
                            .join("");  // can't use " " because 1000567 would get extra space in the middle
536
7
                if num_thousands_at_end > 0 {
537
                    // add on "billionths", etc and we are done
538
0
                    let large_words = if plural {
539
0
                        definitions.get_vec("NumbersOrdinalPluralLarge")
540
                    } else {
541
0
                        definitions.get_vec("NumbersOrdinalLarge")
542
                    };
543
0
                    return Some(answer + &large_words?[num_thousands_at_end]);
544
7
                }
545
118
            };
546
547
            // all that is left is to speak the hundreds part, possibly followed by "thousands", "billions", etc
548
125
            let words = match (num_thousands_at_end > 0, plural) {
549
                (true, _) => [
550
10
                    definitions.get_vec("NumbersHundreds")
?0
,
551
10
                    definitions.get_vec("NumbersTens")
?0
,
552
10
                    definitions.get_vec("NumbersOnes")
?0
,
553
                ],
554
                (false, true) => [
555
54
                    definitions.get_vec("NumbersOrdinalPluralHundreds")
?0
,
556
54
                    definitions.get_vec("NumbersOrdinalPluralTens")
?0
,
557
54
                    definitions.get_vec("NumbersOrdinalPluralOnes")
?0
,
558
                ],
559
                (false, false) => [
560
61
                    definitions.get_vec("NumbersOrdinalHundreds")
?0
,
561
61
                    definitions.get_vec("NumbersOrdinalTens")
?0
,
562
61
                    definitions.get_vec("NumbersOrdinalOnes")
?0
,
563
                ],
564
            };
565
125
            answer += &ToOrdinal::hundreds_to_words(&digits[digits.len()-3..], &words)
?0
;
566
125
            if num_thousands_at_end > 0 {
567
10
                let large_words = if plural {
568
3
                    definitions.get_vec("NumbersOrdinalPluralLarge")
?0
569
                } else {
570
7
                    definitions.get_vec("NumbersOrdinalLarge")
?0
571
                };
572
10
                answer = answer + " " + &large_words[num_thousands_at_end];
573
115
            }
574
125
            return Some(answer);
575
416
        });
576
577
        /// Remove block separators and convert alphanumeric digits to ascii digits
578
416
        fn clean_number(number: &str, block_separators: &str) -> Option<String> {
579
416
            let mut answer = String::with_capacity(number.len());
580
617
            for ch in 
number416
.
chars416
() {
581
617
                if block_separators.contains(ch) {
582
0
                    continue;
583
617
                }
584
617
                if ch.is_ascii_digit() {
585
615
                    answer.push(ch);
586
615
                } else {
587
2
                    let shifted_ch = match ch {
588
2
                        '𝟎'..='𝟗' => ch as u32 -'𝟎' as u32 + '0' as u32,
589
0
                        '𝟘'..='𝟡' => ch as u32 -'𝟘' as u32 + '0' as u32,
590
0
                        '𝟢'..='𝟫' => ch as u32 -'𝟢' as u32 + '0' as u32,
591
0
                        '𝟬'..='𝟵' => ch as u32 -'𝟬' as u32 + '0' as u32,
592
0
                        '𝟶'..='𝟿' => ch as u32 -'𝟶' as u32 + '0' as u32,
593
0
                        _ => return None,
594
                    };
595
2
                    answer.push(char::from_u32(shifted_ch).unwrap());
596
                }
597
            }
598
416
            return Some(answer);
599
416
        }
600
416
    }
601
602
603
132
    fn hundreds_to_words(number: &[usize], words: &[Ref<Vec<String>>; 3]) -> Option<String> {
604
132
        assert!( number.len() == 3 );
605
132
        return SPEECH_DEFINITIONS.with(|definitions| {
606
132
            let definitions = definitions.borrow();
607
132
            if number[0] != 0 && 
number[1] == 024
&&
number[2] == 012
{
608
6
                return Some(words[0][number[0]].clone());
609
126
            }
610
611
126
            let mut hundreds = definitions.get_vec("NumbersHundreds")
?0
[number[0]].clone();
612
126
            if !hundreds.is_empty() {
613
18
                hundreds += " ";
614
108
            }
615
616
126
            if number[1] != 0 && 
number[2] == 049
{
617
26
                return Some(hundreds + &words[1][number[1]]);
618
100
            }
619
620
100
            if 10*number[1] < words[2].len() {
621
                // usurp regular ordering to handle something like '14'
622
85
                return Some(hundreds + &words[2][10*number[1] + number[2]]);
623
            } else {
624
15
                return Some(hundreds + &definitions.get_vec("NumbersTens")
?0
[number[1]] + " " + &words[2][number[2]]);
625
            }
626
132
        });
627
132
    }
628
}
629
630
impl Function for ToOrdinal {
631
    // convert a node to an ordinal number
632
320
    fn evaluate<'d>(&self,
633
320
                        _context: &context::Evaluation<'_, 'd>,
634
320
                        args: Vec<Value<'d>>)
635
320
                        -> Result<Value<'d>, Error>
636
    {
637
320
        let mut args = Args(args);
638
320
        if let Err(
e0
) = args.exactly(1).or_else(|_|
args288
.
exactly288
(3)) {
639
0
            return Err( XPathError::Other(format!("ToOrdinal requires 1 or 3 args: {e}")));
640
320
        };
641
320
        let mut fractional = false;
642
320
        let mut plural = false;
643
320
        if args.len() == 3 {
644
288
            plural = args.pop_boolean()
?0
;
645
288
            fractional = args.pop_boolean()
?0
;
646
32
        }
647
320
        let node = validate_one_node(args.pop_nodeset()
?0
, "ToOrdinal")
?0
;
648
320
        return match node {
649
0
            Node::Text(t) =>  Ok( Value::String(
650
0
                match ToOrdinal::convert(t.text(), fractional, plural) {
651
0
                    None => t.text().to_string(),
652
0
                    Some(ord) => ord,
653
                } ) ),
654
320
            Node::Element(e) => Ok( Value::String(
655
320
                match ToOrdinal::convert(&get_text_from_element(e), fractional, plural) {
656
0
                    None => get_text_from_element(e).to_string(),
657
320
                    Some(ord) => ord,
658
                } ) ),
659
0
            _   =>  Err( Error::ArgumentNotANodeset{actual: ArgumentType::String} ),
660
        }
661
320
    }
662
}
663
664
665
struct ToCommonFraction;
666
667
impl Function for ToCommonFraction {
668
    // convert a node to a common fraction (if the numerator and denominator are within given limits)
669
34
    fn evaluate<'d>(&self,
670
34
                        _context: &context::Evaluation<'_, 'd>,
671
34
                        args: Vec<Value<'d>>)
672
34
                        -> Result<Value<'d>, Error>
673
    {
674
34
        let mut args = Args(args);
675
34
        args.exactly(1)
?0
;
676
677
        // FIX: should probably handle errors by logging them and then trying to evaluate any children
678
34
        let node = validate_one_node(args.pop_nodeset()
?0
, "ToCommonFraction")
?0
;
679
34
        if let Node::Element(frac) = node {
680
34
            if !IsNode::is_common_fraction(frac, usize::MAX, usize::MAX) {
681
0
                return Err( Error::Other( format!("ToCommonFraction -- argument is not an 'mfrac': {}': ", mml_to_string(frac))) );
682
34
            }
683
    
684
            // everything has been verified, so we can just get the pieces and ignore potential error results
685
34
            let children = frac.children();
686
34
            let num = children[0].element().unwrap();
687
34
            let num =   get_text_from_element( num );
688
34
            let denom = children[1].element().unwrap();
689
34
            let denom = get_text_from_element( denom );
690
34
            let mut answer = num.clone() + " ";
691
34
            answer += &match ToOrdinal::convert(&denom, true, num!="1") {
692
0
                None => denom,
693
34
                Some(ord) => ord,
694
            };
695
696
34
            return Ok( Value::String( answer ) )
697
        } else {
698
0
            return Err( Error::Other( "ToCommonFraction -- argument is not an element".to_string()) );
699
        }
700
34
    }
701
}
702
703
struct Min;
704
/**
705
 * Returns true the smallest of the two args
706
 * @param(num1) 
707
 * @param(num2)
708
 */
709
 impl Function for Min {
710
711
0
    fn evaluate<'d>(&self,
712
0
                        _context: &context::Evaluation<'_, 'd>,
713
0
                        args: Vec<Value<'d>>)
714
0
                        -> Result<Value<'d>, Error>
715
    {
716
0
        let mut args = Args(args);
717
0
        args.exactly(2)?;
718
0
        let num1 = args.pop_number()?;
719
0
        let num2 = args.pop_number()?;
720
0
        return Ok( Value::Number( num1.min(num2) ) );
721
0
    }
722
}
723
724
struct Max;
725
726
impl Function for Max {
727
728
0
    fn evaluate<'d>(&self,
729
0
                        _context: &context::Evaluation<'_, 'd>,
730
0
                        args: Vec<Value<'d>>)
731
0
                        -> Result<Value<'d>, Error>
732
    {
733
0
        let mut args = Args(args);
734
0
        args.exactly(2)?;
735
0
        let num1 = args.pop_number()?;
736
0
        let num2 = args.pop_number()?;
737
0
        return Ok( Value::Number( num1.max(num2) ) );
738
0
    }
739
}
740
741
742
struct BaseNode;
743
/**
744
 * Returns true if the node is a large op
745
 * @param(node)     -- node(s) to test -- should be an <mo>
746
 */
747
 impl BaseNode {
748
    /// Recursively find the base node
749
    /// The base node of a non scripted element is the element itself
750
1.26k
    fn base_node(node: Element) -> Element {
751
1.26k
        let name = node.attribute_value(MATHML_FROM_NAME_ATTR).unwrap_or(name(node));
752
1.26k
        if ["msub", "msup", "msubsup", "munder", "mover", "munderover", "mmultiscripts"].contains(&name) {
753
97
            return BaseNode::base_node(as_element(node.children()[0]));
754
        } else {
755
1.16k
            return node;
756
        }
757
1.26k
    }
758
 }
759
 impl Function for BaseNode {
760
761
1.16k
    fn evaluate<'d>(&self,
762
1.16k
                        _context: &context::Evaluation<'_, 'd>,
763
1.16k
                        args: Vec<Value<'d>>)
764
1.16k
                        -> Result<Value<'d>, Error>
765
    {
766
1.16k
        let mut args = Args(args);
767
1.16k
        args.exactly(1)
?0
;
768
1.16k
        let node = validate_one_node(args.pop_nodeset()
?0
, "BaseNode")
?0
;
769
1.16k
        if let Node::Element(e) = node {
770
1.16k
            let mut node_set = Nodeset::new();
771
1.16k
            node_set.add(BaseNode::base_node(e));
772
1.16k
            return Ok( Value::Nodeset(node_set) );
773
        } else {
774
            // xpath is something besides an element, so no match
775
0
            return Err( Error::Other("Argument other than a node given to BaseNode".to_string()) );
776
        }
777
1.16k
    }
778
}
779
780
781
struct IfThenElse;
782
 impl Function for IfThenElse {
783
36.2k
    fn evaluate<'d>(&self,
784
36.2k
                        _context: &context::Evaluation<'_, 'd>,
785
36.2k
                        args: Vec<Value<'d>>)
786
36.2k
                        -> Result<Value<'d>, Error>
787
    {
788
36.2k
        let args = Args(args);
789
36.2k
        args.exactly(3)
?0
;
790
36.2k
        let if_val = &args[0];
791
36.2k
        let then_val = &args[1];
792
36.2k
        let else_val = &args[2];
793
36.2k
        let is_true = match if_val {
794
14.5k
            Value::Nodeset(nodes) => nodes.size() > 0,
795
21.7k
            Value::Boolean(b) => *b,
796
0
            Value::Number(f) => *f != 0.0,
797
0
            Value::String(s) => !s.is_empty(),
798
        };
799
36.2k
        return Ok( if is_true {
then_val4.13k
.
clone4.13k
()} else {
else_val32.1k
.
clone32.1k
()});
800
36.2k
    }
801
}
802
803
804
struct Debug;
805
/**
806
 * Prints it's argument along with the string that was evaluated
807
 * @param(node)     -- node(s) to be evaluated/printed
808
 * @param(string)   -- string showing what is being evaluated
809
 */
810
 impl Function for Debug {
811
812
348
    fn evaluate<'d>(&self,
813
348
                        _context: &context::Evaluation<'_, 'd>,
814
348
                        args: Vec<Value<'d>>)
815
348
                        -> Result<Value<'d>, Error>
816
    {
817
348
        let mut args = Args(args);
818
348
        args.exactly(2)
?0
;
819
348
        let xpath_str = args.pop_string()
?0
;
820
348
        let eval_result = &args[0];
821
348
        debug!("  -- Debug: value of '{xpath_str}' is ");
822
348
        match eval_result {
823
78
            Value::Nodeset(nodes) => {
824
78
                if nodes.size() == 0 {
825
0
                    debug!("0 nodes (false)");
826
                } else {
827
78
                    let singular = nodes.size()==1;
828
78
                    debug!("{} node{}. {}:", 
nodes0
.
size0
(),
829
0
                        if singular {""} else {"s"},
830
0
                        if singular {"Node is"} else {"Nodes are"});
831
78
                    nodes.document_order()
832
78
                        .iter()
833
78
                        .enumerate()
834
78
                        .for_each(|(i, node)| {
835
78
                            match node {
836
78
                                Node::Element(mathml) => debug!("#{}:\n{}",
837
0
                                        i, mml_to_string(*mathml)),
838
0
                                _ => debug!("'{node:?}'"),
839
                            }   
840
78
                        })    
841
                }
842
            },
843
270
            _ => debug!("'{eval_result:?}'"),
844
        }
845
348
        return Ok( eval_result.clone() );
846
348
    }
847
}
848
849
850
/// Should be an internal structure for implementation of the IsBracketed, but it was useful in one place in a separate module.
851
/// This should probably be restructured slightly.
852
pub struct IsBracketed;
853
impl IsBracketed {
854
139k
    pub fn is_bracketed(element: Element, left: &str, right: &str, requires_comma: bool, requires_mrow: bool) -> bool {
855
        use crate::canonicalize::is_fence;
856
139k
        if requires_mrow && 
!116k
is_tag116k
(element, "mrow") {
857
18.8k
            return false;
858
120k
        }
859
120k
        let children = element.children();
860
120k
        let n_children = children.len();
861
120k
        if (n_children == 0 ||
862
120k
            !left.is_empty() && 
!right.is_empty()108k
&&
n_children < 2108k
) ||
863
116k
            requires_comma && 
element.children().len() < 34.04k
{
864
            // not enough argument for there to be a match
865
4.44k
            return false;
866
115k
        }
867
868
115k
        let first_child = as_element(children[0]);
869
115k
        let last_child = as_element(children[children.len()-1]);
870
        // debug!("first_child: {}", crate::pretty_print::mml_to_string(first_child));
871
        // debug!("last_child: {}", crate::pretty_print::mml_to_string(last_child));
872
115k
        if (left.is_empty()  && (
name(first_child) != "mo"11.2k
||
!is_fence(first_child)2.26k
)) ||
873
106k
           (right.is_empty() && (
name(last_child) != "mo"639
||
!is_fence(last_child)629
)) {
874
9.61k
            return false;
875
106k
        }
876
877
106k
        if !left.is_empty() && 
get_text_from_COE104k
(&children[0]) != left ||
878
6.14k
           !right.is_empty() && 
get_text_from_COE5.51k
(&
children5.51k
[children.len()-1]) != right {
879
            // left or right don't match
880
101k
            return false;
881
5.12k
        }
882
883
5.12k
        if requires_comma {
884
445
            if let ChildOfElement::Element(contents) = children[1] {
885
445
                let children = contents.children();
886
445
                if !is_tag(contents, "mrow") || 
children.len() <= 1248
{
887
197
                    return false;
888
248
                }
889
                // finally, we can check for a comma -- we might not have operands, so we to check first and second entry
890
248
                if get_text_from_COE(&children[0]).as_str() == "," {
891
1
                    return true;
892
247
                }
893
247
                if children.len() > 1 && get_text_from_COE(&children[1]).as_str() == "," {
894
133
                    return true;
895
114
                }
896
0
            }
897
114
            return false;
898
        } else {
899
4.67k
            return true;
900
        }
901
139k
    }
902
}
903
904
/**
905
 * Returns true if the node is a bracketed expr with the indicated left/right chars
906
 * node -- node(s) to test
907
 * left -- string (like "[") or empty
908
 * right -- string (like "]") or empty
909
 * requires_comma - boolean, optional (check the top level of 'node' for commas)
910
 */
911
// 'requiresComma' is useful for checking parenthesized expressions vs function arg lists and other lists
912
 impl Function for IsBracketed {
913
115k
    fn evaluate<'d>(&self,
914
115k
                        _context: &context::Evaluation<'_, 'd>,
915
115k
                        args: Vec<Value<'d>>)
916
115k
                        -> Result<Value<'d>, Error>
917
    {
918
115k
        let mut args = Args(args);
919
115k
        args.at_least(3)
?0
;
920
115k
        args.at_most(5)
?0
;
921
115k
        let mut requires_comma = false;
922
115k
        let mut requires_mrow = true;
923
115k
        if args.len() == 5 {
924
0
            requires_mrow = args.pop_boolean()?;
925
115k
        }
926
115k
        if args.len() >= 4 {
927
15
            requires_comma = args.pop_boolean()
?0
;
928
115k
        }
929
115k
        let right = args.pop_string()
?0
;
930
115k
        let left = args.pop_string()
?0
;
931
        return Ok( Value::Boolean(
932
115k
            match validate_one_node(args.pop_nodeset()
?0
, "IsBracketed") {
933
0
                Err(_) => false,  // be fault tolerant, like xpath,
934
115k
                Ok(node) => {
935
115k
                    if let Node::Element(e) = node {
936
115k
                        IsBracketed::is_bracketed(e, &left, &right, requires_comma, requires_mrow)
937
                    } else {
938
0
                        false
939
                    }
940
                }
941
            }) );
942
115k
        }
943
}
944
945
pub struct IsInDefinition;
946
impl IsInDefinition {
947
    /// Returns true if `test_str` is in `set_name`
948
    /// Returns an error if `set_name` is not defined
949
11.0k
    pub fn is_defined_in(test_str: &str, defs: &'static LocalKey<RefCell<Definitions>>, set_name: &str) -> Result<bool, Error> {
950
11.0k
        return defs.with(|definitions| {
951
11.0k
            if let Some(
set11.0k
) = definitions.borrow().get_hashset(set_name) {
952
11.0k
                return Ok( set.contains(test_str) );
953
12
            }
954
12
            if let Some(hashmap) = definitions.borrow().get_hashmap(set_name) {
955
12
                return Ok( hashmap.contains_key(test_str) );
956
0
            }
957
0
            return Err( Error::Other( format!("\n  IsInDefinition: '{set_name}' is not defined in definitions.yaml") ) );
958
11.0k
        });
959
11.0k
    }
960
}
961
962
/**
963
 * Returns true if the text is contained in the set defined in Speech or Braille.
964
 * element/string -- element (converted to string)/string to test
965
 * speech or braille
966
 * set_name -- the set in which the string is to be searched
967
 */
968
// 'requiresComma' is useful for checking parenthesized expressions vs function arg lists and other lists
969
 impl Function for IsInDefinition {
970
12.0k
    fn evaluate<'d>(&self,
971
12.0k
                        _context: &context::Evaluation<'_, 'd>,
972
12.0k
                        args: Vec<Value<'d>>)
973
12.0k
                        -> Result<Value<'d>, Error>
974
    {
975
12.0k
        let mut args = Args(args);
976
        // FIX: temporarily accept two args as assume SPEECH_DEFINITIONS until the Rule files are fixed
977
12.0k
        args.at_least(2)
?0
;
978
12.0k
        args.at_most(3)
?0
;
979
12.0k
        let set_name = args.pop_string()
?0
;
980
        // FIX: this (len == 1) is temporary until all the usages are switched to the (new) 3-arg form
981
12.0k
        let definitions = if args.len() == 2 {
982
10.4k
            match args.pop_string()
?0
.as_str() {
983
10.4k
                "Speech" => 
&SPEECH_DEFINITIONS1.35k
,
984
9.09k
                "Braille" => &BRAILLE_DEFINITIONS,
985
0
                _ => return Err( Error::Other("IsInDefinition:: second argument must be either 'Speech' or 'Braille'".to_string()) )
986
            }
987
        } else {
988
1.61k
            &SPEECH_DEFINITIONS
989
        };
990
12.0k
        match &args[0] {
991
5.04k
            Value::String(str) => return match IsInDefinition::is_defined_in(str, definitions, &set_name) {
992
5.04k
                Ok(result) => Ok( Value::Boolean( result ) ),
993
0
                Err(e) => Err(e),
994
            },
995
7.02k
            Value::Nodeset(nodes) => {
996
7.02k
                return if nodes.size() == 0 {
997
0
                    Ok( Value::Boolean(false) )    // trivially not in definition
998
                } else {
999
7.02k
                    let node = validate_one_node(nodes.clone(), "IsInDefinition")
?0
;
1000
7.02k
                    if let Node::Element(e) = node {
1001
7.02k
                        let text = get_text_from_element(e);
1002
7.02k
                        if text.is_empty() {
1003
979
                            Ok( Value::Boolean(false) )
1004
                        } else {
1005
6.04k
                            match IsInDefinition::is_defined_in(&text, definitions, &set_name) {
1006
6.04k
                                Ok(result) => Ok( Value::Boolean( result ) ),
1007
0
                                Err(e) => Err(e),
1008
                            }          
1009
                        }
1010
                    } else {
1011
0
                        Ok( Value::Boolean(false))       // trivially not in definition                    }
1012
                    }
1013
                }
1014
            },
1015
0
            _ => Err( Error::Other("IsInDefinition:: neither a node nor a string is passed for first argument".to_string()) ),
1016
        }
1017
12.0k
    }
1018
}
1019
1020
1021
pub struct DefinitionValue;
1022
impl DefinitionValue {
1023
    /// Returns the value associated with `key` in `set_name`. If `key` is not in `set_name`, an empty string is returned
1024
    /// Returns an error if `set_name` is not defined
1025
12.7k
    pub fn definition_value(key: &str, defs: &'static LocalKey<RefCell<Definitions>>, set_name: &str) -> Result<String, Error> {
1026
12.7k
        return defs.with(|definitions| {
1027
12.7k
            if let Some(map) = definitions.borrow().get_hashmap(set_name) {
1028
12.7k
                return Ok( match map.get(key) {
1029
5.64k
                    None => "".to_string(),
1030
7.09k
                    Some(str) => str.clone(),
1031
                });
1032
0
            }
1033
0
            return Err( Error::Other( format!("\n  DefinitionValue: '{set_name}' is not defined in definitions.yaml") ) );
1034
12.7k
        });
1035
12.7k
    }
1036
}
1037
1038
/**
1039
 * Returns true if the node is a bracketed expr with the indicated left/right chars
1040
 * element/string -- element (converted to string)/string to test
1041
 * left -- string (like "[") or empty
1042
 * right -- string (like "]") or empty
1043
 * requires_comma - boolean, optional (check the top level of 'node' for commas
1044
 */
1045
// 'requiresComma' is useful for checking parenthesized expressions vs function arg lists and other lists
1046
 impl Function for DefinitionValue {
1047
13.1k
    fn evaluate<'d>(&self,
1048
13.1k
                        _context: &context::Evaluation<'_, 'd>,
1049
13.1k
                        args: Vec<Value<'d>>)
1050
13.1k
                        -> Result<Value<'d>, Error>
1051
    {
1052
13.1k
        let mut args = Args(args);
1053
13.1k
        args.exactly(3)
?0
;
1054
13.1k
        let set_name = args.pop_string()
?0
;
1055
13.1k
        let definitions = match args.pop_string()
?0
.as_str() {
1056
13.1k
            "Speech" => 
&SPEECH_DEFINITIONS13.1k
,
1057
12
            "Braille" => &BRAILLE_DEFINITIONS,
1058
0
            _ => return Err( Error::Other("IsInDefinition:: second argument must be either 'Speech' or 'Braille'".to_string()) )
1059
        };
1060
13.1k
        match &args[0] {
1061
5.04k
            Value::String(str) => return match DefinitionValue::definition_value(str, definitions, &set_name) {
1062
5.04k
                Ok(result) => Ok( Value::String( result ) ),
1063
0
                Err(e) => Err(e),
1064
            },
1065
8.10k
            Value::Nodeset(nodes) => {
1066
8.10k
                return if nodes.size() == 0 {
1067
0
                    Ok( Value::String("".to_string()) )    // trivially not in definition
1068
                } else {
1069
8.10k
                    let node = validate_one_node(nodes.clone(), "DefinitionValue")
?0
;
1070
8.10k
                    if let Node::Element(
e8.10k
) = node {
1071
8.10k
                        let text = get_text_from_element(e);
1072
8.10k
                        if text.is_empty() {
1073
410
                            Ok( Value::String("".to_string()) )
1074
                        } else {
1075
7.69k
                            match DefinitionValue::definition_value(&text, definitions, &set_name) {
1076
7.69k
                                Ok(result) => Ok( Value::String( result ) ),
1077
0
                                Err(e) => Err(e),
1078
                            }          
1079
                        }
1080
                    } else {
1081
3
                        Ok( Value::String("".to_string()) )       // trivially not in definition                    }
1082
                    }
1083
                }
1084
            },
1085
0
            _ => Err( Error::Other("DefinitionValue:: neither a node nor a string is passed for first argument".to_string()) ),
1086
        }
1087
13.1k
    }
1088
}
1089
1090
pub struct DistanceFromLeaf;
1091
impl DistanceFromLeaf {
1092
240
    fn distance(element: Element, use_left_side: bool, treat_2d_elements_as_tokens: bool) -> usize {
1093
        // FIX: need to handle char level (i.e., chars in a leaf element)
1094
240
        let mut element = element;
1095
240
        let mut distance = 1;
1096
        loop {
1097
            // debug!("distance={} -- element: {}", distance, mml_to_string(element));
1098
361
            if MATHML_LEAF_NODES.contains(element.attribute_value(MATHML_FROM_NAME_ATTR).unwrap_or(name(element))) {
1099
199
                return distance;
1100
162
            }
1101
162
            if treat_2d_elements_as_tokens && (
IsNode::is_2D60
(
element60
) ||
!IsNode::is_mathml(element)20
) {
1102
41
                return distance;
1103
121
            }
1104
121
            let children = element.children();
1105
121
            assert!(!children.is_empty());
1106
121
            element = as_element( if use_left_side {
children[0]0
} else {children[children.len()-1]} );
1107
121
            distance += 1;
1108
        }
1109
240
    }
1110
}
1111
1112
/**
1113
 * Returns distance from the current node to the leftmost/rightmost leaf (if char, then = 0, if token, then 1).
1114
 * If the node is a bracketed expr with the indicated left/right chars
1115
 * node -- node(s) to test
1116
 * left_side -- (bool) traverse leftmost child to leaf
1117
 * treat2D_elements_as_tokens -- (bool) 2D notations such as fractions are treated like leaves 
1118
 */
1119
impl Function for DistanceFromLeaf {
1120
240
    fn evaluate<'d>(&self,
1121
240
                        _context: &context::Evaluation<'_, 'd>,
1122
240
                        args: Vec<Value<'d>>)
1123
240
                        -> Result<Value<'d>, Error>
1124
    {
1125
240
        let mut args = Args(args);
1126
240
        args.exactly(3)
?0
;
1127
240
        let treat_2d_elements_as_tokens = args.pop_boolean()
?0
;
1128
240
        let use_left_side = args.pop_boolean()
?0
;
1129
240
        let node = validate_one_node(args.pop_nodeset()
?0
, "DistanceFromLeaf")
?0
;
1130
240
        if let Node::Element(e) = node {
1131
240
            return Ok( Value::Number( DistanceFromLeaf::distance(e, use_left_side, treat_2d_elements_as_tokens) as f64) );
1132
0
        }
1133
1134
        // FIX: should having a non-element be an error instead??
1135
0
        return Err(Error::Other(format!("DistanceFromLeaf: first arg '{node:?}' is not a node")));
1136
240
    }
1137
}
1138
1139
1140
1141
pub struct EdgeNode;
1142
impl EdgeNode {
1143
    // Return the root of the ancestor tree if we are at the left/right side of a path from that to 'element'
1144
2.09k
    fn edge_node<'a>(element: Element<'a>, use_left_side: bool, stop_node_name: &str) -> Option<Element<'a>> {
1145
2.09k
        let element_name = element.attribute_value(MATHML_FROM_NAME_ATTR).unwrap_or(name(element));
1146
2.09k
        if element_name == "math" {
1147
86
            return Some(element);
1148
2.00k
        };
1149
1150
2.00k
        let parent = get_parent(element);   // there is always a "math" node
1151
2.00k
        let parent_name = parent.attribute_value(MATHML_FROM_NAME_ATTR).unwrap_or(name(parent));
1152
1153
        // first check to see if we have the special case of punctuation as last child of math/mrow element
1154
        // it only matters if we are looking at the right edge
1155
1156
        // debug!("EdgeNode: there are {} preceding siblings",element.preceding_siblings().len() );
1157
2.00k
        if use_left_side  && 
!element.preceding_siblings().is_empty()1.15k
{// not at left side
1158
587
            return None;
1159
1.41k
        };
1160
1161
1.41k
        if !use_left_side && 
!element.following_siblings().is_empty()848
{ // not at right side
1162
            // check for the special case that the parent is an mrow and the grandparent is <math> and we have punctuation
1163
574
            let grandparent = get_parent(parent);
1164
574
            let grandparent_name = grandparent.attribute_value(MATHML_FROM_NAME_ATTR).unwrap_or(name(grandparent));
1165
574
            if grandparent_name == "math" &&
1166
105
               parent_name == "mrow" && 
parent.children().len() == 289
{ // right kind of mrow
1167
11
                let text = get_text_from_element( as_element(parent.children()[1]) );
1168
11
                if text == "," || text == "." || 
text == ";"10
||
text == "?"10
{
1169
1
                    return Some(grandparent);
1170
10
                }
1171
563
            }
1172
573
             return None;
1173
843
        };
1174
1175
        // at an edge -- check to see the parent is desired root
1176
843
        if parent_name == stop_node_name || 
1177
735
           (stop_node_name == "2D" && 
IsNode::is_2D338
(
parent338
)) {
1178
176
            return Some(parent);
1179
667
        };
1180
        
1181
        // debug!("EdgeNode: recurse to {}", parent_name);
1182
667
        return EdgeNode::edge_node(parent, use_left_side, stop_node_name)
1183
2.09k
    }
1184
}
1185
1186
// EdgeNode(node, "left"/"right", stopNodeName)
1187
//    -- returns the stopNode if at left/right edge of named ancestor node. "stopNodeName' can also be "2D'
1188
//       returns original node match isn't found
1189
//  Note: if stopNodeName=="math", then punctuation is taken into account since it isn't really part of the math
1190
impl Function for EdgeNode {
1191
1.41k
    fn evaluate<'d>(&self,
1192
1.41k
                        _context: &context::Evaluation<'_, 'd>,
1193
1.41k
                        args: Vec<Value<'d>>)
1194
1.41k
                        -> Result<Value<'d>, Error>
1195
    {
1196
1.41k
        let mut args = Args(args);
1197
1.41k
        args.exactly(3)
?0
;
1198
1.41k
        let stop_node_name = args.pop_string()
?0
;
1199
1.41k
        let use_left_side = args.pop_string()
?0
.to_lowercase() == "left";
1200
1.41k
        let node = validate_one_node(args.pop_nodeset()
?0
, "EdgeNode")
?0
;
1201
1.41k
        if let Node::Element(e) = node {
1202
1.41k
            let result = match EdgeNode::edge_node(e, use_left_side, &stop_node_name) {
1203
260
                Some(found) => found,
1204
1.15k
                None => e,
1205
            };
1206
1.41k
            let mut node_set = Nodeset::new();
1207
1.41k
            node_set.add(result);
1208
1.41k
            return Ok( Value::Nodeset(node_set) );
1209
0
        }
1210
1211
        // FIX: should having a non-element be an error instead??
1212
0
        return Err(Error::Other(format!("EdgeNode: first arg '{node:?}' is not a node")));
1213
1.41k
    }
1214
}
1215
1216
pub struct SpeakIntentName;
1217
/// SpeakIntentName(intent, verbosity)
1218
///   Returns a string corresponding to the intent name with the indicated verbosity
1219
impl Function for SpeakIntentName {
1220
340
    fn evaluate<'d>(&self,
1221
340
                        _context: &context::Evaluation<'_, 'd>,
1222
340
                        args: Vec<Value<'d>>)
1223
340
                        -> Result<Value<'d>, Error>
1224
    {
1225
340
        let mut args = Args(args);
1226
340
        args.exactly(3)
?0
;
1227
340
        let fixity = args.pop_string()
?0
;
1228
340
        let verbosity = args.pop_string()
?0
;
1229
340
        let intent_name = args.pop_string()
?0
;
1230
340
        return Ok( Value::String(crate::infer_intent::intent_speech_for_name(&intent_name, &verbosity, &fixity)) );
1231
340
    }
1232
}
1233
1234
pub struct GetBracketingIntentName;
1235
/// GetBracketingIntentName(name, verbosity, at_start_or_end)
1236
///   Returns a potentially empty string to use to bracket an intent expression (start foo... end foo)
1237
/// 
1238
impl GetBracketingIntentName {
1239
61
    fn bracketing_words(intent_name: &str, verbosity: &str, fixity: &str, at_start: bool) -> String {
1240
61
        crate::definitions::SPEECH_DEFINITIONS.with(|definitions| {
1241
61
            let definitions = definitions.borrow();
1242
61
            if let Some(
intent_name_pattern57
) = definitions.get_hashmap("IntentMappings").unwrap().get(intent_name) {
1243
                // Split the pattern is: fixity-def [|| fixity-def]*
1244
                //   fixity-def := fixity=open; verbosity; close
1245
                //   verbosity := terse | medium | verbose
1246
68
                if let Some(
matched_intent57
) =
intent_name_pattern.split("||")57
.
find57
(|&entry| entry.trim().starts_with(fixity)) {
1247
57
                    let (_, matched_intent) = matched_intent.split_once("=").unwrap_or_default();
1248
57
                    let parts = matched_intent.trim().split(";").collect::<Vec<&str>>();
1249
57
                    if parts.len() == 1 {
1250
30
                        return "".to_string();
1251
27
                    }
1252
27
                    if parts.len() != 3 {
1253
0
                        error!("Intent '{}' has {} ';' separated parts, should have 3", intent_name, parts.len());
1254
0
                        return "".to_string();
1255
27
                    }
1256
27
                    let mut speech = (if at_start {
parts[0]4
} else {
parts[2]23
}).split(":").collect::<Vec<&str>>();
1257
27
                    match speech.len() {
1258
20
                        1 => return speech[0].to_string(),
1259
                        2 | 3 => {
1260
7
                            if speech.len() == 2 {
1261
0
                                warn!("Intent '{intent_name}'  has only two ':' separated parts, but should have three");
1262
0
                                speech.push(speech[1]);
1263
7
                            }
1264
7
                            let bracketing_words = match verbosity {
1265
7
                                "Terse" => 
speech[0]0
,
1266
7
                                "Medium" => speech[1],
1267
0
                                _ => speech[2],
1268
                            };
1269
7
                            return bracketing_words.to_string();
1270
                        },
1271
                        _ => {
1272
0
                            error!("Intent '{}' has too many ({}) operator names, should only have 2", intent_name, speech.len());
1273
                        },
1274
                    }
1275
0
                }   
1276
4
            };
1277
4
            return "".to_string();
1278
61
        })
1279
61
    }
1280
}
1281
1282
impl Function for GetBracketingIntentName {
1283
61
    fn evaluate<'d>(&self,
1284
61
                        _context: &context::Evaluation<'_, 'd>,
1285
61
                        args: Vec<Value<'d>>)
1286
61
                        -> Result<Value<'d>, Error>
1287
    {
1288
61
        let mut args = Args(args);
1289
61
        args.exactly(4)
?0
;
1290
61
        let start_or_end = args.pop_string()
?0
;
1291
61
        if start_or_end != "start" && 
start_or_end != "end"57
{
1292
0
            return Err( Error::Other("GetBracketingIntentName: first argument must be either 'start' or 'end'".to_string()) );
1293
61
        }
1294
61
        let fixity = args.pop_string()
?0
;
1295
61
        let verbosity = args.pop_string()
?0
;
1296
61
        let name = args.pop_string()
?0
;
1297
61
        return Ok( Value::String(GetBracketingIntentName:: bracketing_words(&name, &verbosity, &fixity, start_or_end == "start")) );
1298
61
    }
1299
}
1300
1301
pub struct GetNavigationPartName;
1302
/// GetNavigationPartName(name, index)
1303
/// Returns the name to use to speak the part of a navigation expression (e.g., 'numerator', 'denominator', 'base', 'exponent', ...).
1304
/// If there is no match, an empty string is returned.
1305
/// 'index' is 0-based
1306
/// 
1307
impl GetNavigationPartName {
1308
129
    fn navigation_part_name(intent_name: &str, index: usize) -> String {
1309
129
        crate::definitions::SPEECH_DEFINITIONS.with(|definitions| {
1310
129
            let definitions = definitions.borrow();
1311
129
            if let Some(navigation_names) = definitions.get_hashmap("NavigationParts") &&
1312
129
               let Some(
nav_part_names105
) = navigation_names.get(intent_name) {
1313
                    // Split the pattern is: part [; part]*
1314
105
                    if let Some(part_name) = nav_part_names.trim().split(";").nth(index) {
1315
105
                        return part_name.trim().to_string();
1316
0
                    }
1317
24
                }
1318
24
            return "".to_string();
1319
129
        })
1320
129
    }
1321
}
1322
1323
impl Function for GetNavigationPartName {
1324
129
    fn evaluate<'d>(&self,
1325
129
                        _context: &context::Evaluation<'_, 'd>,
1326
129
                        args: Vec<Value<'d>>)
1327
129
                        -> Result<Value<'d>, Error>
1328
    {
1329
129
        let mut args = Args(args);
1330
129
        args.exactly(2)
?0
;
1331
129
        let index = args.pop_number()
?0
as usize;
1332
129
        let name = args.pop_string()
?0
;
1333
129
        return Ok( Value::String(GetNavigationPartName:: navigation_part_name(&name, index)) );
1334
129
    }
1335
}
1336
1337
pub struct FontSizeGuess;
1338
/// FontSizeGuess(size_string)
1339
///   returns a guess of the size in "ems"
1340
/// Examples:
1341
///    "0.278em" -> 0.278
1342
///    ""
1343
//       returns original node match isn't found
1344
impl FontSizeGuess {
1345
224
    pub fn em_from_value(value_with_unit: &str) -> f64 {
1346
        // match one or more digits followed by a unit -- there are many more units, but they tend to be large and rarer(?)
1347
3
        static FONT_VALUE: LazyLock<Regex> = LazyLock::new(|| { Regex::new(r"(-?[0-9]*\.?[0-9]*)(px|cm|mm|Q|in|ppc|pt|ex|em|rem)").unwrap() });
1348
224
        let cap = FONT_VALUE.captures(value_with_unit);
1349
224
        if let Some(
cap200
) = cap {
1350
200
            if cap.len() == 3 {
1351
200
                let multiplier = match &cap[2] {    // guess based on 12pt font to convert to ems
1352
200
                    "px" => 
1.0/12.00
,
1353
200
                    "cm" => 
2.370
,
1354
200
                    "mm" => 
0.2370
,
1355
200
                    "Q" => 
0.0590
, // 1/4 mm
1356
200
                    "in" => 
6.0223
,
1357
177
                    "pc" => 
1.00
,
1358
177
                    "pt" => 
1.0/12.06
,
1359
171
                    "ex" => 
0.50
,
1360
171
                    "em" => 1.0,
1361
0
                    "rem" => 16.0/12.0,
1362
0
                    default => {debug!("unit='{default}'"); 10.0}
1363
                };
1364
                // debug!("FontSizeGuess: {}->{}, val={}, multiplier={}", value_with_unit, value*multiplier, value, multiplier);
1365
200
                return cap[1].parse::<f64>().unwrap_or(0.0) * multiplier;
1366
            }  else {
1367
0
                return 0.0;             // something bad happened
1368
            }
1369
        }else {
1370
24
            let multiplier = match value_with_unit {    // guess based on 12pt font to convert to ems
1371
24
                "veryverythinspace" => 
1.0/18.00
,
1372
24
                "verythinspace" => 
2.0/18.00
,
1373
24
                "thinspace" => 
3.0/18.00
,
1374
24
                "mediumspace" => 
4.0/18.00
,
1375
24
                "thickspace" => 
5.0/18.00
,
1376
24
                "verythickspace" => 
6.0/18.00
,
1377
24
                "veryverythickspace" => 
7.0/18.00
,
1378
24
                _ => 0.0,
1379
            };
1380
24
            return multiplier;
1381
        }
1382
224
    }
1383
}
1384
impl Function for FontSizeGuess {
1385
0
    fn evaluate<'d>(&self,
1386
0
                        _context: &context::Evaluation<'_, 'd>,
1387
0
                        args: Vec<Value<'d>>)
1388
0
                        -> Result<Value<'d>, Error>
1389
    {
1390
0
        let mut args = Args(args);
1391
0
        args.exactly(1)?;
1392
0
        let value_with_unit = args.pop_string()?;
1393
0
        let em_value = FontSizeGuess::em_from_value(&value_with_unit);
1394
0
        return Ok( Value::Number(em_value) );
1395
0
    }
1396
}
1397
1398
pub struct ReplaceAll;
1399
/// ReplaceAll(haystack, needle, replacement)
1400
///   Returns a string with all occurrences of 'needle' replaced with 'replacement'
1401
impl Function for ReplaceAll {
1402
0
    fn evaluate<'d>(&self,
1403
0
                        _context: &context::Evaluation<'_, 'd>,
1404
0
                        args: Vec<Value<'d>>)
1405
0
                        -> Result<Value<'d>, Error>
1406
    {
1407
0
        let mut args = Args(args);
1408
0
        args.exactly(3)?;
1409
0
        let replacement = args.pop_string()?;
1410
0
        let needle = args.pop_string()?;
1411
0
        let haystack = args.pop_string()?;
1412
0
        return Ok( Value::String(haystack.replace(&needle, &replacement)) );
1413
0
    }
1414
}
1415
1416
/// Add all the functions defined in this module to `context`.
1417
22.7k
pub fn add_builtin_functions(context: &mut Context) {
1418
22.7k
    context.set_function("NestingChars", crate::braille::NemethNestingChars);
1419
22.7k
    context.set_function("BrailleChars", crate::braille::BrailleChars);
1420
22.7k
    context.set_function("NeedsToBeGrouped", crate::braille::NeedsToBeGrouped);
1421
22.7k
    context.set_function("IsNode", IsNode);
1422
22.7k
    context.set_function("ToOrdinal", ToOrdinal);
1423
22.7k
    context.set_function("ToCommonFraction", ToCommonFraction);
1424
22.7k
    context.set_function("IsBracketed", IsBracketed);
1425
22.7k
    context.set_function("IsInDefinition", IsInDefinition);
1426
22.7k
    context.set_function("DefinitionValue", DefinitionValue);
1427
22.7k
    context.set_function("BaseNode", BaseNode);
1428
22.7k
    context.set_function("IfThenElse", IfThenElse);
1429
22.7k
    context.set_function("IFTHENELSE", IfThenElse);
1430
22.7k
    context.set_function("DistanceFromLeaf", DistanceFromLeaf);
1431
22.7k
    context.set_function("EdgeNode", EdgeNode);
1432
22.7k
    context.set_function("SpeakIntentName", SpeakIntentName);
1433
22.7k
    context.set_function("GetBracketingIntentName", GetBracketingIntentName);
1434
22.7k
    context.set_function("GetNavigationPartName", GetNavigationPartName);
1435
22.7k
    context.set_function("DEBUG", Debug);
1436
1437
    // Not used: remove??
1438
22.7k
    context.set_function("min", Min);       // missing in xpath 1.0
1439
22.7k
    context.set_function("max", Max);       // missing in xpath 1.0
1440
22.7k
    context.set_function("FontSizeGuess", FontSizeGuess);
1441
22.7k
    context.set_function("ReplaceAll", ReplaceAll);
1442
22.7k
}
1443
1444
1445
#[cfg(test)]
1446
mod tests {
1447
    use super::*;
1448
    use sxd_document::parser;
1449
    use crate::interface::{trim_element, get_element};
1450
1451
1452
4
    fn init_word_list() {
1453
4
        crate::interface::set_rules_dir(super::super::abs_rules_dir_path()).unwrap();
1454
4
        let result = crate::definitions::read_definitions_file(true);
1455
4
        if let Err(
e0
) = result {
1456
0
            panic!("unable to read 'Rules/Languages/en/definitions.yaml\n{e}");
1457
4
        }
1458
4
    }
1459
1460
    #[test]
1461
1
    fn ordinal_one_digit() {
1462
1
        init_word_list();
1463
1
        assert_eq!("zeroth", ToOrdinal::convert("0", false, false).unwrap());
1464
1
        assert_eq!("second", ToOrdinal::convert("2", false, false).unwrap());
1465
1
        assert_eq!("ninth", ToOrdinal::convert("9", false, false).unwrap());
1466
1467
1
        assert_eq!("zeroth", ToOrdinal::convert("0", false, true).unwrap());
1468
1
        assert_eq!("seconds", ToOrdinal::convert("2", false, true).unwrap());
1469
1
        assert_eq!("ninths", ToOrdinal::convert("9", false, true).unwrap());
1470
1471
1
        assert_eq!("first", ToOrdinal::convert("1", true, false).unwrap());
1472
1
        assert_eq!("half", ToOrdinal::convert("2", true, false).unwrap());
1473
1
        assert_eq!("half", ToOrdinal::convert("02", true, false).unwrap());
1474
1
        assert_eq!("ninth", ToOrdinal::convert("9", true, false).unwrap());
1475
1476
1
        assert_eq!("halves", ToOrdinal::convert("2", true, true).unwrap());
1477
1
        assert_eq!("halves", ToOrdinal::convert("002", true, true).unwrap());
1478
1
        assert_eq!("ninths", ToOrdinal::convert("9", true, true).unwrap());
1479
1
    }
1480
1481
    #[test]
1482
1
    fn ordinal_two_digit() {
1483
1
        init_word_list();
1484
1
        assert_eq!("tenth", ToOrdinal::convert("10", false, false).unwrap());
1485
1
        assert_eq!("seventeenth", ToOrdinal::convert("17", false, false).unwrap());
1486
1
        assert_eq!("thirty second", ToOrdinal::convert("32", false, false).unwrap());
1487
1
        assert_eq!("fortieth", ToOrdinal::convert("40", false, false).unwrap());
1488
1489
1
        assert_eq!("tenths", ToOrdinal::convert("10", false, true).unwrap());
1490
1
        assert_eq!("sixteenths", ToOrdinal::convert("16", false, true).unwrap());
1491
1
        assert_eq!("eighty eighths", ToOrdinal::convert("88", false, true).unwrap());
1492
1
        assert_eq!("fiftieths", ToOrdinal::convert("50", false, true).unwrap());
1493
1494
1
        assert_eq!("eleventh", ToOrdinal::convert("11", true, false).unwrap());
1495
1
        assert_eq!("forty fourth", ToOrdinal::convert("44", true, false).unwrap());
1496
1
        assert_eq!("ninth", ToOrdinal::convert("9", true, false).unwrap());
1497
1
        assert_eq!("ninth", ToOrdinal::convert("00000009", true, false).unwrap());
1498
1
        assert_eq!("sixtieth", ToOrdinal::convert("60", true, false).unwrap());
1499
1500
1
        assert_eq!("tenths", ToOrdinal::convert("10", true, true).unwrap());
1501
1
        assert_eq!("tenths", ToOrdinal::convert("0010", true, true).unwrap());
1502
1
        assert_eq!("elevenths", ToOrdinal::convert("11", true, true).unwrap());
1503
1
        assert_eq!("nineteenths", ToOrdinal::convert("19", true, true).unwrap());
1504
1
        assert_eq!("twentieths", ToOrdinal::convert("20", true, true).unwrap());
1505
1
        assert_eq!("nineteenths", ToOrdinal::convert("𝟏𝟗", true, true).unwrap());
1506
1
    }
1507
1508
    #[test]
1509
1
    fn ordinal_three_digit() {
1510
1
        init_word_list();
1511
1
        assert_eq!("one hundred first", ToOrdinal::convert("101", false, false).unwrap());
1512
1
        assert_eq!("two hundred tenth", ToOrdinal::convert("210", false, false).unwrap());
1513
1
        assert_eq!("four hundred thirty second", ToOrdinal::convert("432", false, false).unwrap());
1514
1
        assert_eq!("four hundred second", ToOrdinal::convert("402", false, false).unwrap());
1515
1516
1
        assert_eq!("one hundred first", ToOrdinal::convert("101", true, false).unwrap());
1517
1
        assert_eq!("two hundred second", ToOrdinal::convert("202", true, false).unwrap());
1518
1
        assert_eq!("four hundred thirty second", ToOrdinal::convert("432", true, false).unwrap());
1519
1
        assert_eq!("five hundred third", ToOrdinal::convert("503", true, false).unwrap());
1520
1521
1
        assert_eq!("three hundred elevenths", ToOrdinal::convert("311", false, true).unwrap());
1522
1
        assert_eq!("four hundred ninety ninths", ToOrdinal::convert("499", false, true).unwrap());
1523
1
        assert_eq!("nine hundred ninetieths", ToOrdinal::convert("990", false, true).unwrap());
1524
1
        assert_eq!("six hundred seconds", ToOrdinal::convert("602", false, true).unwrap());
1525
1526
1
        assert_eq!("seven hundredths", ToOrdinal::convert("700", true, true).unwrap());
1527
1
        assert_eq!("one hundredths", ToOrdinal::convert("100", true, true).unwrap());
1528
1
        assert_eq!("eight hundred seventeenths", ToOrdinal::convert("817", true, true).unwrap());
1529
1
    }
1530
    #[test]
1531
1
    fn ordinal_large() {
1532
1
        init_word_list();
1533
1
        assert_eq!("one thousandth", ToOrdinal::convert("1000", false, false).unwrap());
1534
1
        assert_eq!("two thousand one hundredth", ToOrdinal::convert("2100", false, false).unwrap());
1535
1
        assert_eq!("thirty thousandth", ToOrdinal::convert("30000", false, false).unwrap());
1536
1
        assert_eq!("four hundred thousandth", ToOrdinal::convert("400000", false, false).unwrap());
1537
1538
1
        assert_eq!("four hundred thousandth", ToOrdinal::convert("400000", true, false).unwrap());
1539
1
        assert_eq!("five hundred thousand second", ToOrdinal::convert("500002", true, false).unwrap());
1540
1
        assert_eq!("six millionth", ToOrdinal::convert("6000000", true, false).unwrap());
1541
1
        assert_eq!("sixty millionth", ToOrdinal::convert("60000000", true, false).unwrap());
1542
1543
1
        assert_eq!("seven billionths", ToOrdinal::convert("7000000000", false, true).unwrap());
1544
1
        assert_eq!("eight trillionths", ToOrdinal::convert("8000000000000", false, true).unwrap());
1545
1
        assert_eq!("nine quadrillionths", ToOrdinal::convert("9000000000000000", false, true).unwrap());
1546
1
        assert_eq!("one quintillionth", ToOrdinal::convert("1000000000000000000", false, false).unwrap());
1547
1548
1
        assert_eq!("nine billion eight hundred seventy six million five hundred forty three thousand two hundred tenths", ToOrdinal::convert("9876543210", true, true).unwrap());
1549
1
        assert_eq!("nine billion five hundred forty three thousand two hundred tenths", ToOrdinal::convert("9000543210", true, true).unwrap());
1550
1
        assert_eq!("zeroth", ToOrdinal::convert("00000", false, false).unwrap());
1551
1
    }
1552
1553
1554
11
    fn test_is_simple(message: &'static str, mathml_str: &'static str) {
1555
    // this forces initialization
1556
11
    crate::speech::SPEECH_RULES.with(|_| true);
1557
11
        let package = parser::parse(mathml_str)
1558
11
        .expect("failed to parse XML");
1559
11
        let mathml = get_element(&package);
1560
11
        trim_element(mathml, false);
1561
11
        assert!(IsNode::is_simple(mathml), "{}", message);
1562
11
    }
1563
1564
7
    fn test_is_not_simple(message: &'static str, mathml_str: &'static str) {
1565
    // this forces initialization
1566
7
    crate::speech::SPEECH_RULES.with(|_| true);
1567
7
        let package = parser::parse(mathml_str)
1568
7
        .expect("failed to parse XML");
1569
7
        let mathml = get_element(&package);
1570
7
        trim_element(mathml, false);
1571
7
        assert!(!IsNode::is_simple(mathml), "{}", message);
1572
7
    }
1573
    #[test]
1574
1
    fn is_simple() {
1575
1
        test_is_simple("single variable", "<mi>x</mi>");
1576
1
        test_is_simple("single number", "<mn>1.2</mn>");
1577
1
        test_is_simple("negative number", "<mrow><mo>-</mo><mn>10</mn></mrow>");
1578
1
        test_is_simple("negative variable", "<mrow><mo>-</mo><mi>x</mi></mrow>");
1579
1
        test_is_simple("ordinal fraction", "<mfrac><mn>3</mn><mn>4</mn></mfrac>");
1580
1
        test_is_simple("x y", "<mrow><mi>x</mi><mo>&#x2062;</mo><mi>y</mi></mrow>");
1581
1
        test_is_simple("negative two vars", 
1582
                "<mrow><mrow><mo>-</mo><mi>x</mi></mrow><mo>&#x2062;</mo><mi>y</mi></mrow>");
1583
1
        test_is_simple("-2 x y", 
1584
                "<mrow><mrow><mo>-</mo><mn>2</mn></mrow>
1585
                             <mo>&#x2062;</mo><mi>x</mi><mo>&#x2062;</mo><mi>z</mi></mrow>");
1586
1
        test_is_simple("sin x", "<mrow><mi>sin</mi><mo>&#x2061;</mo><mi>x</mi></mrow>");
1587
1
        test_is_simple("f(x)", "<mrow><mi>f</mi><mo>&#x2061;</mo><mrow><mo>(</mo><mi>x</mi><mo>)</mo></mrow></mrow>");
1588
1
        test_is_simple("f(x+y)",
1589
         "<mrow><mi>f</mi><mo>&#x2061;</mo>\
1590
            <mrow><mo>(</mo><mi>x</mi><mo>+</mo><mi>y</mi><mo>)</mo></mrow></mrow>");
1591
        
1592
1
    }
1593
1594
    #[test]
1595
1
    fn is_not_simple() {
1596
1
        test_is_not_simple("multi-char variable", "<mi>rise</mi>");
1597
1
        test_is_not_simple("large ordinal fraction", "<mfrac><mn>30</mn><mn>4</mn></mfrac>");
1598
1
        test_is_not_simple("fraction with var in numerator", "<mfrac><mi>x</mi><mn>4</mn></mfrac>");
1599
1
        test_is_not_simple("square root", "<msqrt><mi>x</mi></msqrt>");
1600
1
        test_is_not_simple("subscript", "<msub><mi>x</mi><mn>4</mn></msub>");
1601
1
        test_is_not_simple("-x y z", 
1602
                "<mrow><mrow><mo>-</mo><mi>x</mi></mrow>
1603
                            <mo>&#x2062;</mo><mi>y</mi><mo>&#x2062;</mo><mi>z</mi></mrow>");
1604
1
        test_is_not_simple("C(-2,1,4)",             // github.com/NSoiffer/MathCAT/issues/199
1605
                    "<mrow><mi>C</mi><mrow><mo>(</mo><mo>−</mo><mn>2</mn><mo>,</mo><mn>1</mn><mo>,</mo><mn>4</mn><mo>)</mo></mrow></mrow>");
1606
                   
1607
1
    }
1608
1609
    #[test]
1610
1
    fn at_left_edge() {
1611
1
        let mathml = "<math><mfrac><mrow><mn>30</mn><mi>x</mi></mrow><mn>4</mn></mfrac></math>";
1612
1
        let package = parser::parse(mathml).expect("failed to parse XML");
1613
1
        let mathml = get_element(&package);
1614
1
        trim_element(mathml, false);
1615
1
        let fraction = as_element(mathml.children()[0]);
1616
1
        let mn = as_element(as_element(fraction.children()[0]).children()[0]);
1617
1
        assert_eq!(EdgeNode::edge_node(mn, true, "2D"), Some(fraction));
1618
1
        assert_eq!(EdgeNode::edge_node(mn, false, "2D"), None);
1619
1620
1
        let mi = as_element(as_element(fraction.children()[0]).children()[1]);
1621
1
        assert_eq!(EdgeNode::edge_node(mi, true, "2D"), None);
1622
1
    }
1623
1624
    #[test]
1625
1
    fn at_right_edge() {
1626
1
        let mathml = "<math><mrow><mfrac><mn>4</mn><mrow><mn>30</mn><mi>x</mi></mrow></mfrac><mo>.</mo></mrow></math>";
1627
1
        let package = parser::parse(mathml).expect("failed to parse XML");
1628
1
        let mathml = get_element(&package);
1629
1
        trim_element(mathml, false);
1630
1
        let fraction = as_element(as_element(mathml.children()[0]).children()[0]);
1631
1
        let mi = as_element(as_element(fraction.children()[1]).children()[1]);
1632
1
        assert_eq!(EdgeNode::edge_node(mi, true, "2D"), None);
1633
1
        assert_eq!(EdgeNode::edge_node(mi, false, "2D"), Some(fraction));
1634
1
        assert_eq!(EdgeNode::edge_node(mi, false, "math"), Some(mathml));
1635
1636
1
        let mn = as_element(as_element(fraction.children()[1]).children()[0]);
1637
1
        assert_eq!(EdgeNode::edge_node(mn, true, "2D"), None);
1638
1
    }
1639
}
\ No newline at end of file +

Coverage Report

Created: 2026-05-04 09:37

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/home/runner/work/MathCAT/MathCAT/src/xpath_functions.rs
Line
Count
Source
1
#![allow(clippy::needless_return)]
2
//! XPath underlies rule matching and speech generation. The version of xpath used is based on xpath 1.0
3
//! and includes the ability to define functions and variables.
4
//! The variables defined are all the preferences and also variables set in speech rules via the `variables` keyword.
5
//! The function defined here are:
6
//! * `IsNode(node, kind)`:  returns true if the node matches the "kind".
7
//!   Valid values are "leaf", "2D", "simple", "common_fraction", "trig_name".
8
//! * `ToOrdinal(number, fractional, plural)`: converts the number to an ordinal (e.g, third)
9
//!   * `number` -- the number to translate
10
//!   * `fractional` -- true if this is a fractional ordinal (e.g, "half")
11
//!   * `plural` -- true if answer should be plural
12
//! * `ToCommonFraction(mfrac)` -- converts the fraction to an ordinal version (e.g, 2 thirds)
13
//! * `IsLargeOp(node)` -- returns true if the node is a large operator (e.g, integral or sum)
14
//! * `IsBracketed(node, left, right, requires_comma)` -- returns true if the first/last element in the mrow match `left`/`right`.
15
//!   If the optional `requires_comma` argument is given and is `true`, then there also must be a "," in the mrow (e.g., "f(x,y)")
16
//! * `DEBUG(xpath)` -- _Very_ useful function for debugging speech rules.
17
//!   This can be used to surround a whole or part of an xpath expression in a match or output.
18
//!   The result will be printed to standard output and the result returned so that `DEBUG` does not affect the computation.    
19
20
use sxd_document::dom::{Element, ChildOfElement};
21
use sxd_xpath::{Value, Context, context, function::*, nodeset::*};
22
use crate::definitions::{Definitions, SPEECH_DEFINITIONS, BRAILLE_DEFINITIONS};
23
use regex::Regex;
24
use crate::pretty_print::mml_to_string;
25
use std::cell::{Ref, RefCell};
26
use log::{debug, error, warn};
27
use std::sync::LazyLock;
28
use std::thread::LocalKey;
29
use phf::phf_set;
30
use sxd_xpath::function::Error as XPathError;
31
use crate::canonicalize::{as_element, name, get_parent, MATHML_FROM_NAME_ATTR};
32
33
// useful utility functions
34
// note: child of an element is a ChildOfElement, so sometimes it is useful to have parallel functions,
35
//   one for Element and one for ChildOfElement.
36
37
// @returns {String} -- the text of the (leaf) element otherwise an empty string
38
126k
fn get_text_from_element(e: Element) -> String {
39
126k
    if e.children().len() == 1 &&
40
103k
       let ChildOfElement::Text(
t102k
) = e.children()[0] {
41
102k
            return t.text().to_string();
42
23.4k
        }
43
23.4k
    return "".to_string();
44
126k
}
45
46
#[allow(non_snake_case)]
47
// Same as 'is_tag', but for ChildOfElement
48
110k
fn get_text_from_COE(coe: &ChildOfElement) -> String {
49
110k
    let element = coe.element();
50
110k
    return match element {
51
110k
        Some(e) => get_text_from_element(e),
52
0
        None => "".to_string(),
53
    };
54
110k
}
55
56
// make sure that there is only one node in the NodeSet
57
// Returns the node or an Error
58
147k
pub fn validate_one_node<'n>(nodes: Nodeset<'n>, func_name: &str) -> Result<Node<'n>, Error> {
59
147k
    if nodes.size() == 0 {
60
0
        return Err(Error::Other(format!("Missing argument for {func_name}")));
61
147k
    } else if nodes.size() > 1 {
62
0
        return Err( Error::Other(format!("{} arguments for {}; expected 1 argument", nodes.size(), func_name)) );
63
147k
    }
64
147k
    return Ok( nodes.iter().next().unwrap() );
65
147k
}
66
67
// Return true if the element's name is 'name'
68
157k
fn is_tag(e: Element, name: &str) -> bool {
69
    // need to check name before the fallback of where the name came from
70
157k
    return e.name().local_part() == name || 
e47.8k
.attribute_value(MATHML_FROM_NAME_ATTR).unwrap_or_default() == name;
71
157k
}
72
73
#[allow(non_snake_case)]
74
// Same as 'is_tag', but for ChildOfElement
75
1.40k
fn is_COE_tag(coe: ChildOfElement, name: &str) -> bool {
76
1.40k
    let element = coe.element();
77
1.40k
    return element.is_some() && is_tag(element.unwrap(), name)
78
1.40k
}
79
80
/// Should be an internal structure for implementation of the IsNode, but it was useful in one place in a separate module.
81
/// This should probably be restructured slightly.
82
pub struct IsNode;
83
84
impl IsNode {
85
    /// implements ClearSpeak's definition of "simple"
86
    /// this is fairly detailed, so we define a few local functions (at end) to help out
87
    /// Also, it doesn't help that the structure is a bit complicated Elements->ChildOfElement->Element/Text
88
7.43k
    pub fn is_simple(elem: Element) -> bool {
89
7.43k
        if is_trivially_simple(elem) {
90
3.62k
            return true;
91
3.81k
        }
92
93
3.81k
        if is_negative_of_trivially_simple(elem) {
94
            // -3 or -x
95
41
            return true;
96
3.76k
        }
97
98
3.76k
        if !is_tag(elem, "mrow") || 
elem.children()867
.
is_empty867
() {
99
2.90k
            return false;
100
867
        }
101
102
        // x y or -x or -3 x or -x y or -3 x y or x° or n° or -x° or -n°
103
        #[allow(clippy::if_same_then_else)]
104
867
        if is_times_mi(elem) {
105
42
            return true;    // x y
106
825
        } else if is_degrees(elem) {
107
0
            return true;    // x° or n°
108
825
        } else if is_function(elem) {
109
44
            return true;
110
781
        }
111
112
781
        return false;
113
114
115
        // returns the element's text value
116
5.71k
        fn to_str(e: Element<'_>) -> &str {
117
            // typically usage assumes 'e' is a leaf
118
            // bad MathML is the following isn't true
119
5.71k
            if e.children().len() == 1 {
120
5.71k
                let text_node = e.children()[0];
121
5.71k
                if let Some(t) = text_node.text() {
122
5.71k
                    return t.text();
123
0
                }
124
0
            }               
125
0
            return "";
126
5.71k
        }
127
128
        // same as 'to_str' but for ChildOfElement
129
1.01k
        fn coe_to_str(coe: ChildOfElement<'_>) -> &str {
130
            // typically usage assumes 'coe' is a leaf
131
1.01k
            let element_node = coe.element();
132
1.01k
            if let Some(e) = element_node {
133
                // bad MathML is the following isn't true
134
1.01k
                if e.children().len() == 1 {
135
1.01k
                    let text_node = e.children()[0];
136
1.01k
                    if let Some(t) = text_node.text() {
137
1.01k
                        return t.text();
138
0
                    }
139
8
                }
140
0
            }               
141
8
            return "";
142
1.01k
        }
143
144
        // returns true if the string is just a single *char* (which can be multiple bytes)
145
5.71k
        fn is_single_char(str: &str) -> bool {
146
5.71k
            let mut chars =  str.chars();
147
5.71k
            return chars.next().is_some() && chars.next().is_none();
148
5.71k
        }
149
150
        // checks the single element to see if it is simple (mn, mi that is a single char, common fraction)
151
8.33k
        fn is_trivially_simple(elem: Element) -> bool {
152
8.33k
            if is_tag(elem, "mn")  {
153
914
                return true;
154
7.41k
            }
155
7.41k
            if is_tag(elem, "mi") && 
is_single_char5.71k
(
to_str(elem)5.71k
) {
156
                // "simple" only if it is a single char (which can be multiple bytes)
157
3.14k
                return true;
158
4.27k
            }
159
160
            // FIX: need to consult preference Fraction_Ordinal
161
4.27k
            if IsNode::is_common_fraction(elem, 10, 19) {
162
66
                return true;
163
4.21k
            }
164
4.21k
            return false;
165
8.33k
        }
166
167
        // true if the negative of a single element that is simple
168
4.20k
        fn is_negative_of_trivially_simple(elem: Element) -> bool {
169
4.20k
            if is_tag(elem, "mrow") && 
elem.children().len() == 2933
{
170
38
                let children = elem.children();
171
                // better be negative of something at this point...
172
38
                if is_COE_tag(children[0], "mo") && 
is_equal11
(
children[0]11
, '-') &&
173
6
                   children[1].element().is_some() && is_trivially_simple(children[1].element().unwrap()) {
174
6
                    return true;
175
32
                }
176
4.16k
            }
177
4.20k
            if is_tag(elem, "minus") && 
elem.children().len() == 154
{
178
54
                let child = elem.children()[0];
179
54
                if let Some(e) = child.element() {
180
54
                    return is_trivially_simple(e);
181
0
                }
182
4.14k
            }
183
184
4.14k
            return false;
185
4.20k
        }
186
187
        // return true if ChildOfElement has exactly text 'ch'
188
967
        fn is_equal(coe: ChildOfElement, ch: char) -> bool {
189
967
            return coe_to_str(coe).starts_with(ch);
190
967
        }
191
192
        // true if mrow(xxx, &it;, mi) or mrow(xxx, &it; mi, &it;, mi) where mi's have len==1
193
867
        fn is_times_mi(mrow: Element) -> bool {
194
867
            assert!( is_tag(mrow, "mrow") );
195
867
            let children = mrow.children();
196
867
            if !(children.len() == 3 || 
children.len() == 541
) {
197
34
                return false;
198
833
            }
199
833
            if children[0].element().is_none() {
200
0
                return false;
201
833
            }
202
203
833
            let first_child = children[0].element().unwrap();
204
833
            if !is_trivially_simple(first_child) {
205
396
                if !is_negative_of_trivially_simple(first_child) {
206
382
                    return false;
207
14
                }
208
14
                if children.len() == 5 && 
209
2
                   ( (name(first_child) == "minus" && 
first_child.children().len() == 10
&&
!0
is_COE_tag0
(first_child.children()[0], "mn")) ||
210
2
                     (name(first_child) == "mrow"  && !is_COE_tag(first_child.children()[1], "mn")) ) {
211
1
                    return false;      // '-x y z' is too complicated () -- -2 x y is ok
212
13
                }
213
437
            }
214
215
450
            if !(is_COE_tag(children[1], "mo") && 
216
450
                    is_equal(children[1], '\u{2062}') &&
217
63
                 is_COE_tag(children[2], "mi") &&
218
51
                    coe_to_str(children[2]).len()==1 ) {
219
408
                return false;
220
42
            }
221
222
42
            if children.len() == 3 {
223
41
                return true;
224
1
            }
225
226
            // len == 5
227
1
            return  is_COE_tag(children[3], "mo") && 
228
1
                        is_equal(children[3], '\u{2062}') &&       // invisible times
229
1
                    is_COE_tag(children[4], "mi") &&
230
1
                        coe_to_str(children[4]).len()==1 ;
231
867
        }
232
233
        // return true if the mrow is var° or num°
234
825
        fn is_degrees(mrow: Element) -> bool {
235
825
            assert!( is_tag(mrow, "mrow") );
236
825
            let children = mrow.children();
237
825
            return children.len() == 2 &&
238
32
                is_equal(children[1], '°') &&
239
0
                (is_COE_tag(children[0], "mi") ||
240
0
                 is_COE_tag(children[0], "mn") );
241
825
        }
242
243
        // fn_name &af; [simple arg or (simple arg)]
244
825
        fn is_function(mrow: Element) -> bool {
245
825
            assert!( is_tag(mrow, "mrow") );
246
825
            let children = mrow.children();
247
825
            if children.len() != 3 {
248
40
                return false;
249
785
            }
250
785
            if !(is_COE_tag(children[1], "mo") && 
251
473
                 is_equal(children[1], '\u{2061}') ) {    // invisible function application
252
717
                return false;
253
68
            }
254
68
            if !is_COE_tag(children[0], "mi") {
255
0
                return false;
256
68
            }
257
68
            let function_arg = children[2].element().unwrap();
258
68
            if IsBracketed::is_bracketed(function_arg, "(", ")", false, false) {
259
60
                return IsNode::is_simple(function_arg.children()[1].element().unwrap());
260
            } else {
261
8
                return IsNode::is_simple(function_arg);
262
            }
263
825
        }
264
7.43k
    }
265
266
    // Returns true if 'frac' is a common fraction
267
    // In this case, the numerator and denominator can be no larger than 'num_limit' and 'denom_limit'
268
4.31k
    fn is_common_fraction(frac: Element, num_limit: usize, denom_limit: usize) -> bool {
269
2
        static ALL_DIGITS: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\d+").unwrap()); // match one or more digits
270
271
4.31k
        if !is_tag(frac, "mfrac") &&  
!4.12k
is_tag4.12k
(frac, "fraction"){
272
4.12k
            return false;
273
188
        }
274
188
        let children = frac.children();
275
188
        if children.len() != 2 {
276
0
            return false;
277
188
        }
278
279
188
        let num = children[0].element();
280
188
        let denom = children[1].element();
281
188
        if num.is_none() || denom.is_none() {
282
0
            return false;
283
188
        };
284
285
188
        let num = num.unwrap();
286
188
        let denom = denom.unwrap();
287
188
        if !is_tag(num, "mn") || 
!115
is_tag115
(denom, "mn") {
288
87
            return false
289
101
        };
290
291
101
        let num = get_text_from_element(num);
292
101
        let denom = get_text_from_element(denom);
293
101
        if num.is_empty() || denom.is_empty() {
294
0
            return false;
295
101
        }
296
297
101
        return ALL_DIGITS.is_match(&num)   && is_small_enough(&num, num_limit) &&
298
100
               ALL_DIGITS.is_match(&denom) && is_small_enough(&denom, denom_limit);
299
300
201
        fn is_small_enough(val: &str, upper_bound: usize) -> bool {
301
201
            return if let Ok(value) = val.parse::<usize>() { value <= upper_bound } else { 
false0
};
302
201
        }
303
4.31k
    }
304
305
14.2k
    pub fn is_mathml(elem: Element) -> bool {
306
        // doesn't check MATHML_FROM_NAME_ATTR because we are interested in if it is an intent.
307
14.2k
        return ALL_MATHML_ELEMENTS.contains(name(elem));
308
14.2k
    }
309
310
    #[allow(non_snake_case)]
311
14.3k
    pub fn is_2D(elem: Element) -> bool {
312
14.3k
        return MATHML_2D_NODES.contains(elem.attribute_value(MATHML_FROM_NAME_ATTR).unwrap_or(name(elem)));
313
14.3k
    }
314
315
37.8k
    pub fn is_scripted(elem: Element) -> bool {
316
37.8k
        return MATHML_SCRIPTED_NODES.contains(elem.attribute_value(MATHML_FROM_NAME_ATTR).unwrap_or(name(elem)));
317
37.8k
    }
318
319
138k
    pub fn is_modified(elem: Element) -> bool {
320
138k
        return MATHML_MODIFIED_NODES.contains(elem.attribute_value(MATHML_FROM_NAME_ATTR).unwrap_or(name(elem)));
321
138k
    }
322
    }
323
324
/// All MathML elements, including a few that get cleaned away
325
/// "semantics", "annotation-xml", "annotation" and Content MathML are not included
326
static ALL_MATHML_ELEMENTS: phf::Set<&str> = phf_set!{
327
    "mi", "mo", "mn", "mtext", "ms", "mspace", "mglyph",
328
    "mfrac", "mroot", "msub", "msup", "msubsup","munder", "mover", "munderover", "mmultiscripts",
329
    "mstack", "mlongdiv", "msgroup", "msrow", "mscarries", "mscarry", "msline",
330
    "none", "mprescripts", "malignmark", "maligngroup",
331
    "math", "msqrt", "merror", "mpadded", "mphantom", "menclose", "mtd", "mstyle",
332
    "mrow", "a", "mfenced", "mtable", "mtr", "mlabeledtr",
333
};
334
335
static MATHML_LEAF_NODES: phf::Set<&str> = phf_set! {
336
  "mi", "mo", "mn", "mtext", "ms", "mspace", "mglyph",
337
    "none", "annotation", "ci", "cn", "csymbol",    // content could be inside an annotation-xml (faster to allow here than to check lots of places)
338
};
339
340
341
// Should mstack and mlongdiv be included here?
342
static MATHML_2D_NODES: phf::Set<&str> = phf_set! {
343
    "mfrac", "msqrt", "mroot", "menclose",
344
    "msub", "msup", "msubsup", "munder", "mover", "munderover", "mmultiscripts",
345
    "mtable", "mtr", "mlabeledtr", "mtd",
346
};
347
348
// Should mstack and mlongdiv be included here?
349
static MATHML_MODIFIED_NODES: phf::Set<&str> = phf_set! {
350
    "msub", "msup", "msubsup", "munder", "mover", "munderover", "mmultiscripts",
351
};
352
353
// Should mstack and mlongdiv be included here?
354
static MATHML_SCRIPTED_NODES: phf::Set<&str> = phf_set! {
355
    "msub", "msup", "msubsup", "mmultiscripts",
356
};
357
358
1.07M
pub fn is_leaf(element: Element) -> bool {
359
1.07M
    return MATHML_LEAF_NODES.contains(name(element));
360
1.07M
}
361
362
impl Function for IsNode {
363
    // eval function for IsNode
364
    // errors happen for wrong number/kind of arg
365
5.12k
    fn evaluate<'d>(&self,
366
5.12k
                        _context: &context::Evaluation<'_, 'd>,
367
5.12k
                        args: Vec<Value<'d>>)
368
5.12k
                        -> Result<Value<'d>, Error>
369
    {
370
371
5.12k
        let mut args = Args(args);
372
5.12k
        args.exactly(2)
?0
;
373
5.12k
        let kind = args.pop_string()
?0
;
374
        // FIX: there is some conflict problem with xpath errors and error-chain
375
        //                .chain_err(|e| format!("Second arg to is_leaf is not a string: {}", e.to_string()))?;
376
5.12k
        match kind.as_str() {
377
5.12k
            "simple" | 
"leaf"3.09k
|
"common_fraction"849
|
"2D"849
|
"modified"162
|
"scripted"140
|
"mathml"49
=> (),
378
0
            _ => return Err( Error::Other(format!("Unknown argument value '{}' for IsNode",  kind.as_str())) ),
379
        };
380
381
5.12k
        let nodes = args.pop_nodeset()
?0
;
382
5.12k
        if nodes.size() == 0 {
383
0
            return Ok (Value::Boolean(false));  // like xpath, don't make this an error
384
5.12k
        };
385
        return Ok(
386
            Value::Boolean( 
387
5.12k
                nodes.iter()
388
5.12k
                    .all(|node|
389
5.39k
                        if let Node::Element(e) = node {
390
5.39k
                            match kind.as_str() {
391
5.39k
                                "simple" => 
IsNode::is_simple2.29k
(
e2.29k
),
392
3.09k
                                "leaf"   => 
is_leaf_any_name2.25k
(
e2.25k
),
393
849
                                "2D" => 
IsNode::is_2D687
(
e687
),
394
162
                                "modified" => 
IsNode::is_modified22
(
e22
),
395
140
                                "scripted" => 
IsNode::is_scripted91
(
e91
),
396
49
                                "mathml" => IsNode::is_mathml(e),
397
0
                                "common_fraction" => IsNode::is_common_fraction(e, usize::MAX, usize::MAX), 
398
0
                                _        => true,       // can't happen due to check above
399
                            }    
400
                        } else {
401
                            // xpath is something besides an element, so no match
402
0
                            false
403
5.39k
                        }
404
                    )
405
            )
406
        );
407
408
2.25k
        fn is_leaf_any_name(e: Element) -> bool {
409
2.25k
            let children = e.children();
410
2.25k
            if children.is_empty() {
411
0
                return true;
412
2.25k
            } else if children.len() == 1 &&
413
1.24k
                      let ChildOfElement::Text(_) = children[0] {
414
1.17k
                    return true;
415
1.07k
                }
416
1.07k
            return false
417
2.25k
        }
418
5.12k
    }
419
}
420
421
struct ToOrdinal;
422
impl ToOrdinal {
423
    // ordinals often have an irregular start (e.g., "half") before becoming regular.
424
    // if the number is irregular, return the ordinal form, otherwise return 'None'.
425
353
    fn compute_irregular_fractional_speech(number: &str, plural: bool) -> Option<String> {
426
353
        SPEECH_DEFINITIONS.with(|definitions| {
427
353
            let definitions = definitions.borrow();
428
353
            let words = if plural {
429
208
                definitions.get_vec("NumbersOrdinalFractionalPluralOnes")
?0
430
            } else {
431
145
                definitions.get_vec("NumbersOrdinalFractionalOnes")
?0
432
            };
433
353
            let number_as_int: usize = number.parse().unwrap(); // already verified it is only digits
434
353
            if number_as_int < words.len() {
435
                // use the words associated with this irregular pattern.
436
291
                return Some( words[number_as_int].clone() );
437
62
            };
438
62
            return None;
439
353
        })
440
353
    }
441
442
    /**
443
     * Translates a number of up to twelve digits into a string representation.
444
     *   number -- the number to translate
445
     *   fractional -- true if this is a fractional ordinal (e.g, "half")
446
     *   plural -- true if answer should be plural
447
     * Returns the string representation of that number or an error message
448
     */
449
416
    fn convert(number: &str, fractional: bool, plural: bool) -> Option<String> {
450
2
        static NO_DIGIT: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"[^\d]").unwrap()); // match anything except a digit
451
416
        return SPEECH_DEFINITIONS.with(|definitions| {
452
416
            let definitions = definitions.borrow();
453
416
            let numbers_large = definitions.get_vec("NumbersLarge")
?0
;
454
455
416
            let pref_manager = crate::prefs::PreferenceManager::get();
456
416
            let pref_manager = pref_manager.borrow();
457
416
            let block_separators = pref_manager.pref_to_string("BlockSeparators");
458
416
            let decimal_separator = pref_manager.pref_to_string("DecimalSeparators");
459
460
            // check number validity (has digits, not a decimal)
461
416
            if number.is_empty() ||  number.contains(&decimal_separator) {
462
0
                return Some(String::from(number));
463
416
            }
464
            // remove any block separators
465
416
            let number = match clean_number(number, &block_separators) {
466
0
                None => return Some(String::from(number)),
467
416
                Some(num) => num,
468
            };
469
    
470
            // check to see if the number is too big or is not an integer or has non-digits
471
416
            if number.len() > 3*numbers_large.len() {
472
0
                return Some(number);
473
416
            }
474
416
            if NO_DIGIT.is_match(&number) {
475
                // this shouldn't have been part of an mn, so likely an error. Log a warning
476
                // FIX: log a warning that a non-number was passed to convert()
477
0
                return Some(number);
478
416
            }
479
480
            // first deal with the abnormalities of fractional ordinals (one half, etc). That simplifies what remains
481
416
            if fractional &&
482
353
               let Some(
string291
) = ToOrdinal::compute_irregular_fractional_speech(&number, plural) {
483
291
                    return Some(string);
484
125
                }
485
486
            // at this point, we only need to worry about singular/plural distinction
487
488
            // break into groups of three digits and add 10^3 word (thousands, millions, ...) after each chunk
489
            // FIX: add a pause between groups of three -- need to use TTS-specific pause
490
491
            // handle special case of trailing zeros
492
            // num_thousands_at_end represents the amount to shift NumbersLarge... (e.g., millions->thousands)
493
243
            let 
num_thousands_at_end125
= match
number125
.
rfind125
(|ch| ch > '0') { // last non-0 on right
494
122
                Some(n) => (number.len() - 1 - n) / 3 ,
495
3
                None => 0
496
            };
497
125
            let (number,_) = number.split_at(number.len() - 3 * num_thousands_at_end); // drop the 0s
498
499
            // everything is simplified if we add zeros at the start so that block size is a factor of 3
500
125
            let number = match number.len() % 3 {
501
18
                0 => "".to_string() + number,
502
69
                1 => "00".to_string() + number,
503
38
                _ => "0".to_string() + number, // can only be "2" -- compiler doesn't know there aren't other options
504
            };
505
506
            // At this point we have at least three "digits", and length is a multiple of 3
507
            // We have already verified that there are only ASCII digits, so we can subtract '0' to get an index
508
            const ASCII_0: usize = 48;
509
125
            let digits = number.as_bytes()
510
125
                        .iter()
511
411
                        .
map125
(|&byte| byte as usize - ASCII_0)
512
125
                        .collect::<Vec<usize>>();
513
514
125
            let mut answer = String::with_capacity(255);  // reasonable max most of the time
515
125
            let large_words = numbers_large;
516
125
            if digits.len() > 3 { 
517
                // speak this first groups as cardinal numbers
518
7
                let words = [
519
7
                    definitions.get_vec("NumbersHundreds")
?0
,
520
7
                    definitions.get_vec("NumbersTens")
?0
,
521
7
                    definitions.get_vec("NumbersOnes")
?0
,
522
                ];
523
7
                answer = digits[0..digits.len()-3]
524
7
                            .chunks(3)
525
7
                            .enumerate()
526
12
                            .
map7
(|(i, chunk)| {
527
12
                                if chunk[0] != 0 || 
chunk[1] != 08
||
chunk[2] != 08
{
528
7
                                    Some(ToOrdinal::hundreds_to_words(chunk, &words)
?0
+ " " +
529
7
                                        &large_words[num_thousands_at_end + digits.len()/3 - 1 - i] + " ")
530
                                } else {
531
5
                                    Some("".to_string())
532
                                }
533
12
                            })
534
7
                            .collect::<Option<Vec<String>>>()
?0
535
7
                            .join("");  // can't use " " because 1000567 would get extra space in the middle
536
7
                if num_thousands_at_end > 0 {
537
                    // add on "billionths", etc and we are done
538
0
                    let large_words = if plural {
539
0
                        definitions.get_vec("NumbersOrdinalPluralLarge")
540
                    } else {
541
0
                        definitions.get_vec("NumbersOrdinalLarge")
542
                    };
543
0
                    return Some(answer + &large_words?[num_thousands_at_end]);
544
7
                }
545
118
            };
546
547
            // all that is left is to speak the hundreds part, possibly followed by "thousands", "billions", etc
548
125
            let words = match (num_thousands_at_end > 0, plural) {
549
                (true, _) => [
550
10
                    definitions.get_vec("NumbersHundreds")
?0
,
551
10
                    definitions.get_vec("NumbersTens")
?0
,
552
10
                    definitions.get_vec("NumbersOnes")
?0
,
553
                ],
554
                (false, true) => [
555
54
                    definitions.get_vec("NumbersOrdinalPluralHundreds")
?0
,
556
54
                    definitions.get_vec("NumbersOrdinalPluralTens")
?0
,
557
54
                    definitions.get_vec("NumbersOrdinalPluralOnes")
?0
,
558
                ],
559
                (false, false) => [
560
61
                    definitions.get_vec("NumbersOrdinalHundreds")
?0
,
561
61
                    definitions.get_vec("NumbersOrdinalTens")
?0
,
562
61
                    definitions.get_vec("NumbersOrdinalOnes")
?0
,
563
                ],
564
            };
565
125
            answer += &ToOrdinal::hundreds_to_words(&digits[digits.len()-3..], &words)
?0
;
566
125
            if num_thousands_at_end > 0 {
567
10
                let large_words = if plural {
568
3
                    definitions.get_vec("NumbersOrdinalPluralLarge")
?0
569
                } else {
570
7
                    definitions.get_vec("NumbersOrdinalLarge")
?0
571
                };
572
10
                answer = answer + " " + &large_words[num_thousands_at_end];
573
115
            }
574
125
            return Some(answer);
575
416
        });
576
577
        /// Remove block separators and convert alphanumeric digits to ascii digits
578
416
        fn clean_number(number: &str, block_separators: &str) -> Option<String> {
579
416
            let mut answer = String::with_capacity(number.len());
580
617
            for ch in 
number416
.
chars416
() {
581
617
                if block_separators.contains(ch) {
582
0
                    continue;
583
617
                }
584
617
                if ch.is_ascii_digit() {
585
615
                    answer.push(ch);
586
615
                } else {
587
2
                    let shifted_ch = match ch {
588
2
                        '𝟎'..='𝟗' => ch as u32 -'𝟎' as u32 + '0' as u32,
589
0
                        '𝟘'..='𝟡' => ch as u32 -'𝟘' as u32 + '0' as u32,
590
0
                        '𝟢'..='𝟫' => ch as u32 -'𝟢' as u32 + '0' as u32,
591
0
                        '𝟬'..='𝟵' => ch as u32 -'𝟬' as u32 + '0' as u32,
592
0
                        '𝟶'..='𝟿' => ch as u32 -'𝟶' as u32 + '0' as u32,
593
0
                        _ => return None,
594
                    };
595
2
                    answer.push(char::from_u32(shifted_ch).unwrap());
596
                }
597
            }
598
416
            return Some(answer);
599
416
        }
600
416
    }
601
602
603
132
    fn hundreds_to_words(number: &[usize], words: &[Ref<Vec<String>>; 3]) -> Option<String> {
604
132
        assert!( number.len() == 3 );
605
132
        return SPEECH_DEFINITIONS.with(|definitions| {
606
132
            let definitions = definitions.borrow();
607
132
            if number[0] != 0 && 
number[1] == 024
&&
number[2] == 012
{
608
6
                return Some(words[0][number[0]].clone());
609
126
            }
610
611
126
            let mut hundreds = definitions.get_vec("NumbersHundreds")
?0
[number[0]].clone();
612
126
            if !hundreds.is_empty() {
613
18
                hundreds += " ";
614
108
            }
615
616
126
            if number[1] != 0 && 
number[2] == 049
{
617
26
                return Some(hundreds + &words[1][number[1]]);
618
100
            }
619
620
100
            if 10*number[1] < words[2].len() {
621
                // usurp regular ordering to handle something like '14'
622
85
                return Some(hundreds + &words[2][10*number[1] + number[2]]);
623
            } else {
624
15
                return Some(hundreds + &definitions.get_vec("NumbersTens")
?0
[number[1]] + " " + &words[2][number[2]]);
625
            }
626
132
        });
627
132
    }
628
}
629
630
impl Function for ToOrdinal {
631
    // convert a node to an ordinal number
632
320
    fn evaluate<'d>(&self,
633
320
                        _context: &context::Evaluation<'_, 'd>,
634
320
                        args: Vec<Value<'d>>)
635
320
                        -> Result<Value<'d>, Error>
636
    {
637
320
        let mut args = Args(args);
638
320
        if let Err(
e0
) = args.exactly(1).or_else(|_|
args288
.
exactly288
(3)) {
639
0
            return Err( XPathError::Other(format!("ToOrdinal requires 1 or 3 args: {e}")));
640
320
        };
641
320
        let mut fractional = false;
642
320
        let mut plural = false;
643
320
        if args.len() == 3 {
644
288
            plural = args.pop_boolean()
?0
;
645
288
            fractional = args.pop_boolean()
?0
;
646
32
        }
647
320
        let node = validate_one_node(args.pop_nodeset()
?0
, "ToOrdinal")
?0
;
648
320
        return match node {
649
0
            Node::Text(t) =>  Ok( Value::String(
650
0
                match ToOrdinal::convert(t.text(), fractional, plural) {
651
0
                    None => t.text().to_string(),
652
0
                    Some(ord) => ord,
653
                } ) ),
654
320
            Node::Element(e) => Ok( Value::String(
655
320
                match ToOrdinal::convert(&get_text_from_element(e), fractional, plural) {
656
0
                    None => get_text_from_element(e).to_string(),
657
320
                    Some(ord) => ord,
658
                } ) ),
659
0
            _   =>  Err( Error::ArgumentNotANodeset{actual: ArgumentType::String} ),
660
        }
661
320
    }
662
}
663
664
665
struct ToCommonFraction;
666
667
impl Function for ToCommonFraction {
668
    // convert a node to a common fraction (if the numerator and denominator are within given limits)
669
34
    fn evaluate<'d>(&self,
670
34
                        _context: &context::Evaluation<'_, 'd>,
671
34
                        args: Vec<Value<'d>>)
672
34
                        -> Result<Value<'d>, Error>
673
    {
674
34
        let mut args = Args(args);
675
34
        args.exactly(1)
?0
;
676
677
        // FIX: should probably handle errors by logging them and then trying to evaluate any children
678
34
        let node = validate_one_node(args.pop_nodeset()
?0
, "ToCommonFraction")
?0
;
679
34
        if let Node::Element(frac) = node {
680
34
            if !IsNode::is_common_fraction(frac, usize::MAX, usize::MAX) {
681
0
                return Err( Error::Other( format!("ToCommonFraction -- argument is not an 'mfrac': {}': ", mml_to_string(frac))) );
682
34
            }
683
    
684
            // everything has been verified, so we can just get the pieces and ignore potential error results
685
34
            let children = frac.children();
686
34
            let num = children[0].element().unwrap();
687
34
            let num =   get_text_from_element( num );
688
34
            let denom = children[1].element().unwrap();
689
34
            let denom = get_text_from_element( denom );
690
34
            let mut answer = num.clone() + " ";
691
34
            answer += &match ToOrdinal::convert(&denom, true, num!="1") {
692
0
                None => denom,
693
34
                Some(ord) => ord,
694
            };
695
696
34
            return Ok( Value::String( answer ) )
697
        } else {
698
0
            return Err( Error::Other( "ToCommonFraction -- argument is not an element".to_string()) );
699
        }
700
34
    }
701
}
702
703
struct Min;
704
/**
705
 * Returns true the smallest of the two args
706
 * @param(num1) 
707
 * @param(num2)
708
 */
709
 impl Function for Min {
710
711
0
    fn evaluate<'d>(&self,
712
0
                        _context: &context::Evaluation<'_, 'd>,
713
0
                        args: Vec<Value<'d>>)
714
0
                        -> Result<Value<'d>, Error>
715
    {
716
0
        let mut args = Args(args);
717
0
        args.exactly(2)?;
718
0
        let num1 = args.pop_number()?;
719
0
        let num2 = args.pop_number()?;
720
0
        return Ok( Value::Number( num1.min(num2) ) );
721
0
    }
722
}
723
724
struct Max;
725
726
impl Function for Max {
727
728
0
    fn evaluate<'d>(&self,
729
0
                        _context: &context::Evaluation<'_, 'd>,
730
0
                        args: Vec<Value<'d>>)
731
0
                        -> Result<Value<'d>, Error>
732
    {
733
0
        let mut args = Args(args);
734
0
        args.exactly(2)?;
735
0
        let num1 = args.pop_number()?;
736
0
        let num2 = args.pop_number()?;
737
0
        return Ok( Value::Number( num1.max(num2) ) );
738
0
    }
739
}
740
741
742
struct BaseNode;
743
/**
744
 * Returns true if the node is a large op
745
 * @param(node)     -- node(s) to test -- should be an <mo>
746
 */
747
 impl BaseNode {
748
    /// Recursively find the base node
749
    /// The base node of a non scripted element is the element itself
750
1.26k
    fn base_node(node: Element) -> Element {
751
1.26k
        let name = node.attribute_value(MATHML_FROM_NAME_ATTR).unwrap_or(name(node));
752
1.26k
        if ["msub", "msup", "msubsup", "munder", "mover", "munderover", "mmultiscripts"].contains(&name) {
753
97
            return BaseNode::base_node(as_element(node.children()[0]));
754
        } else {
755
1.16k
            return node;
756
        }
757
1.26k
    }
758
 }
759
 impl Function for BaseNode {
760
761
1.16k
    fn evaluate<'d>(&self,
762
1.16k
                        _context: &context::Evaluation<'_, 'd>,
763
1.16k
                        args: Vec<Value<'d>>)
764
1.16k
                        -> Result<Value<'d>, Error>
765
    {
766
1.16k
        let mut args = Args(args);
767
1.16k
        args.exactly(1)
?0
;
768
1.16k
        let node = validate_one_node(args.pop_nodeset()
?0
, "BaseNode")
?0
;
769
1.16k
        if let Node::Element(e) = node {
770
1.16k
            let mut node_set = Nodeset::new();
771
1.16k
            node_set.add(BaseNode::base_node(e));
772
1.16k
            return Ok( Value::Nodeset(node_set) );
773
        } else {
774
            // xpath is something besides an element, so no match
775
0
            return Err( Error::Other("Argument other than a node given to BaseNode".to_string()) );
776
        }
777
1.16k
    }
778
}
779
780
781
struct IfThenElse;
782
 impl Function for IfThenElse {
783
36.2k
    fn evaluate<'d>(&self,
784
36.2k
                        _context: &context::Evaluation<'_, 'd>,
785
36.2k
                        args: Vec<Value<'d>>)
786
36.2k
                        -> Result<Value<'d>, Error>
787
    {
788
36.2k
        let args = Args(args);
789
36.2k
        args.exactly(3)
?0
;
790
36.2k
        let if_val = &args[0];
791
36.2k
        let then_val = &args[1];
792
36.2k
        let else_val = &args[2];
793
36.2k
        let is_true = match if_val {
794
14.5k
            Value::Nodeset(nodes) => nodes.size() > 0,
795
21.7k
            Value::Boolean(b) => *b,
796
0
            Value::Number(f) => *f != 0.0,
797
0
            Value::String(s) => !s.is_empty(),
798
        };
799
36.2k
        return Ok( if is_true {
then_val4.13k
.
clone4.13k
()} else {
else_val32.1k
.
clone32.1k
()});
800
36.2k
    }
801
}
802
803
804
struct Debug;
805
/**
806
 * Prints it's argument along with the string that was evaluated
807
 * @param(node)     -- node(s) to be evaluated/printed
808
 * @param(string)   -- string showing what is being evaluated
809
 */
810
 impl Function for Debug {
811
812
348
    fn evaluate<'d>(&self,
813
348
                        _context: &context::Evaluation<'_, 'd>,
814
348
                        args: Vec<Value<'d>>)
815
348
                        -> Result<Value<'d>, Error>
816
    {
817
348
        let mut args = Args(args);
818
348
        args.exactly(2)
?0
;
819
348
        let xpath_str = args.pop_string()
?0
;
820
348
        let eval_result = &args[0];
821
348
        debug!("  -- Debug: value of '{xpath_str}' is ");
822
348
        match eval_result {
823
78
            Value::Nodeset(nodes) => {
824
78
                if nodes.size() == 0 {
825
0
                    debug!("0 nodes (false)");
826
                } else {
827
78
                    let singular = nodes.size()==1;
828
78
                    debug!("{} node{}. {}:", 
nodes0
.
size0
(),
829
0
                        if singular {""} else {"s"},
830
0
                        if singular {"Node is"} else {"Nodes are"});
831
78
                    nodes.document_order()
832
78
                        .iter()
833
78
                        .enumerate()
834
78
                        .for_each(|(i, node)| {
835
78
                            match node {
836
78
                                Node::Element(mathml) => debug!("#{}:\n{}",
837
0
                                        i, mml_to_string(*mathml)),
838
0
                                _ => debug!("'{node:?}'"),
839
                            }   
840
78
                        })    
841
                }
842
            },
843
270
            _ => debug!("'{eval_result:?}'"),
844
        }
845
348
        return Ok( eval_result.clone() );
846
348
    }
847
}
848
849
850
/// Should be an internal structure for implementation of the IsBracketed, but it was useful in one place in a separate module.
851
/// This should probably be restructured slightly.
852
pub struct IsBracketed;
853
impl IsBracketed {
854
139k
    pub fn is_bracketed(element: Element, left: &str, right: &str, requires_comma: bool, requires_mrow: bool) -> bool {
855
        use crate::canonicalize::is_fence;
856
139k
        if requires_mrow && 
!116k
is_tag116k
(element, "mrow") {
857
18.8k
            return false;
858
120k
        }
859
120k
        let children = element.children();
860
120k
        let n_children = children.len();
861
120k
        if (n_children == 0 ||
862
120k
            !left.is_empty() && 
!right.is_empty()108k
&&
n_children < 2108k
) ||
863
116k
            requires_comma && 
element.children().len() < 34.04k
{
864
            // not enough argument for there to be a match
865
4.44k
            return false;
866
115k
        }
867
868
115k
        let first_child = as_element(children[0]);
869
115k
        let last_child = as_element(children[children.len()-1]);
870
        // debug!("first_child: {}", crate::pretty_print::mml_to_string(first_child));
871
        // debug!("last_child: {}", crate::pretty_print::mml_to_string(last_child));
872
115k
        if (left.is_empty()  && (
name(first_child) != "mo"11.2k
||
!is_fence(first_child)2.26k
)) ||
873
106k
           (right.is_empty() && (
name(last_child) != "mo"639
||
!is_fence(last_child)629
)) {
874
9.61k
            return false;
875
106k
        }
876
877
106k
        if !left.is_empty() && 
get_text_from_COE104k
(&children[0]) != left ||
878
6.14k
           !right.is_empty() && 
get_text_from_COE5.51k
(&
children5.51k
[children.len()-1]) != right {
879
            // left or right don't match
880
101k
            return false;
881
5.12k
        }
882
883
5.12k
        if requires_comma {
884
445
            if let ChildOfElement::Element(contents) = children[1] {
885
445
                let children = contents.children();
886
445
                if !is_tag(contents, "mrow") || 
children.len() <= 1248
{
887
197
                    return false;
888
248
                }
889
                // finally, we can check for a comma -- we might not have operands, so we to check first and second entry
890
248
                if get_text_from_COE(&children[0]).as_str() == "," {
891
1
                    return true;
892
247
                }
893
247
                if children.len() > 1 && get_text_from_COE(&children[1]).as_str() == "," {
894
133
                    return true;
895
114
                }
896
0
            }
897
114
            return false;
898
        } else {
899
4.67k
            return true;
900
        }
901
139k
    }
902
}
903
904
/**
905
 * Returns true if the node is a bracketed expr with the indicated left/right chars
906
 * node -- node(s) to test
907
 * left -- string (like "[") or empty
908
 * right -- string (like "]") or empty
909
 * requires_comma - boolean, optional (check the top level of 'node' for commas)
910
 */
911
// 'requiresComma' is useful for checking parenthesized expressions vs function arg lists and other lists
912
 impl Function for IsBracketed {
913
115k
    fn evaluate<'d>(&self,
914
115k
                        _context: &context::Evaluation<'_, 'd>,
915
115k
                        args: Vec<Value<'d>>)
916
115k
                        -> Result<Value<'d>, Error>
917
    {
918
115k
        let mut args = Args(args);
919
115k
        args.at_least(3)
?0
;
920
115k
        args.at_most(5)
?0
;
921
115k
        let mut requires_comma = false;
922
115k
        let mut requires_mrow = true;
923
115k
        if args.len() == 5 {
924
0
            requires_mrow = args.pop_boolean()?;
925
115k
        }
926
115k
        if args.len() >= 4 {
927
15
            requires_comma = args.pop_boolean()
?0
;
928
115k
        }
929
115k
        let right = args.pop_string()
?0
;
930
115k
        let left = args.pop_string()
?0
;
931
        return Ok( Value::Boolean(
932
115k
            match validate_one_node(args.pop_nodeset()
?0
, "IsBracketed") {
933
0
                Err(_) => false,  // be fault tolerant, like xpath,
934
115k
                Ok(node) => {
935
115k
                    if let Node::Element(e) = node {
936
115k
                        IsBracketed::is_bracketed(e, &left, &right, requires_comma, requires_mrow)
937
                    } else {
938
0
                        false
939
                    }
940
                }
941
            }) );
942
115k
        }
943
}
944
945
pub struct IsInDefinition;
946
impl IsInDefinition {
947
    /// Returns true if `test_str` is in `set_name`
948
    /// Returns an error if `set_name` is not defined
949
11.0k
    pub fn is_defined_in(test_str: &str, defs: &'static LocalKey<RefCell<Definitions>>, set_name: &str) -> Result<bool, Error> {
950
11.0k
        return defs.with(|definitions| {
951
11.0k
            if let Some(
set11.0k
) = definitions.borrow().get_hashset(set_name) {
952
11.0k
                return Ok( set.contains(test_str) );
953
12
            }
954
12
            if let Some(hashmap) = definitions.borrow().get_hashmap(set_name) {
955
12
                return Ok( hashmap.contains_key(test_str) );
956
0
            }
957
0
            return Err( Error::Other( format!("\n  IsInDefinition: '{set_name}' is not defined in definitions.yaml") ) );
958
11.0k
        });
959
11.0k
    }
960
}
961
962
/**
963
 * Returns true if the text is contained in the set defined in Speech or Braille.
964
 * element/string -- element (converted to string)/string to test
965
 * speech or braille
966
 * set_name -- the set in which the string is to be searched
967
 */
968
// 'requiresComma' is useful for checking parenthesized expressions vs function arg lists and other lists
969
 impl Function for IsInDefinition {
970
12.0k
    fn evaluate<'d>(&self,
971
12.0k
                        _context: &context::Evaluation<'_, 'd>,
972
12.0k
                        args: Vec<Value<'d>>)
973
12.0k
                        -> Result<Value<'d>, Error>
974
    {
975
12.0k
        let mut args = Args(args);
976
        // FIX: temporarily accept two args as assume SPEECH_DEFINITIONS until the Rule files are fixed
977
12.0k
        args.at_least(2)
?0
;
978
12.0k
        args.at_most(3)
?0
;
979
12.0k
        let set_name = args.pop_string()
?0
;
980
        // FIX: this (len == 1) is temporary until all the usages are switched to the (new) 3-arg form
981
12.0k
        let definitions = if args.len() == 2 {
982
10.4k
            match args.pop_string()
?0
.as_str() {
983
10.4k
                "Speech" => 
&SPEECH_DEFINITIONS1.35k
,
984
9.09k
                "Braille" => &BRAILLE_DEFINITIONS,
985
0
                _ => return Err( Error::Other("IsInDefinition:: second argument must be either 'Speech' or 'Braille'".to_string()) )
986
            }
987
        } else {
988
1.61k
            &SPEECH_DEFINITIONS
989
        };
990
12.0k
        match &args[0] {
991
5.04k
            Value::String(str) => return match IsInDefinition::is_defined_in(str, definitions, &set_name) {
992
5.04k
                Ok(result) => Ok( Value::Boolean( result ) ),
993
0
                Err(e) => Err(e),
994
            },
995
7.02k
            Value::Nodeset(nodes) => {
996
7.02k
                return if nodes.size() == 0 {
997
0
                    Ok( Value::Boolean(false) )    // trivially not in definition
998
                } else {
999
7.02k
                    let node = validate_one_node(nodes.clone(), "IsInDefinition")
?0
;
1000
7.02k
                    if let Node::Element(e) = node {
1001
7.02k
                        let text = get_text_from_element(e);
1002
7.02k
                        if text.is_empty() {
1003
979
                            Ok( Value::Boolean(false) )
1004
                        } else {
1005
6.04k
                            match IsInDefinition::is_defined_in(&text, definitions, &set_name) {
1006
6.04k
                                Ok(result) => Ok( Value::Boolean( result ) ),
1007
0
                                Err(e) => Err(e),
1008
                            }          
1009
                        }
1010
                    } else {
1011
0
                        Ok( Value::Boolean(false))       // trivially not in definition                    }
1012
                    }
1013
                }
1014
            },
1015
0
            _ => Err( Error::Other("IsInDefinition:: neither a node nor a string is passed for first argument".to_string()) ),
1016
        }
1017
12.0k
    }
1018
}
1019
1020
1021
pub struct DefinitionValue;
1022
impl DefinitionValue {
1023
    /// Returns the value associated with `key` in `set_name`. If `key` is not in `set_name`, an empty string is returned
1024
    /// Returns an error if `set_name` is not defined
1025
12.7k
    pub fn definition_value(key: &str, defs: &'static LocalKey<RefCell<Definitions>>, set_name: &str) -> Result<String, Error> {
1026
12.7k
        return defs.with(|definitions| {
1027
12.7k
            if let Some(map) = definitions.borrow().get_hashmap(set_name) {
1028
12.7k
                return Ok( match map.get(key) {
1029
5.64k
                    None => "".to_string(),
1030
7.09k
                    Some(str) => str.clone(),
1031
                });
1032
0
            }
1033
0
            return Err( Error::Other( format!("\n  DefinitionValue: '{set_name}' is not defined in definitions.yaml") ) );
1034
12.7k
        });
1035
12.7k
    }
1036
}
1037
1038
/**
1039
 * Returns true if the node is a bracketed expr with the indicated left/right chars
1040
 * element/string -- element (converted to string)/string to test
1041
 * left -- string (like "[") or empty
1042
 * right -- string (like "]") or empty
1043
 * requires_comma - boolean, optional (check the top level of 'node' for commas
1044
 */
1045
// 'requiresComma' is useful for checking parenthesized expressions vs function arg lists and other lists
1046
 impl Function for DefinitionValue {
1047
13.1k
    fn evaluate<'d>(&self,
1048
13.1k
                        _context: &context::Evaluation<'_, 'd>,
1049
13.1k
                        args: Vec<Value<'d>>)
1050
13.1k
                        -> Result<Value<'d>, Error>
1051
    {
1052
13.1k
        let mut args = Args(args);
1053
13.1k
        args.exactly(3)
?0
;
1054
13.1k
        let set_name = args.pop_string()
?0
;
1055
13.1k
        let definitions = match args.pop_string()
?0
.as_str() {
1056
13.1k
            "Speech" => 
&SPEECH_DEFINITIONS13.1k
,
1057
12
            "Braille" => &BRAILLE_DEFINITIONS,
1058
0
            _ => return Err( Error::Other("IsInDefinition:: second argument must be either 'Speech' or 'Braille'".to_string()) )
1059
        };
1060
13.1k
        match &args[0] {
1061
5.04k
            Value::String(str) => return match DefinitionValue::definition_value(str, definitions, &set_name) {
1062
5.04k
                Ok(result) => Ok( Value::String( result ) ),
1063
0
                Err(e) => Err(e),
1064
            },
1065
8.10k
            Value::Nodeset(nodes) => {
1066
8.10k
                return if nodes.size() == 0 {
1067
0
                    Ok( Value::String("".to_string()) )    // trivially not in definition
1068
                } else {
1069
8.10k
                    let node = validate_one_node(nodes.clone(), "DefinitionValue")
?0
;
1070
8.10k
                    if let Node::Element(
e8.10k
) = node {
1071
8.10k
                        let text = get_text_from_element(e);
1072
8.10k
                        if text.is_empty() {
1073
410
                            Ok( Value::String("".to_string()) )
1074
                        } else {
1075
7.69k
                            match DefinitionValue::definition_value(&text, definitions, &set_name) {
1076
7.69k
                                Ok(result) => Ok( Value::String( result ) ),
1077
0
                                Err(e) => Err(e),
1078
                            }          
1079
                        }
1080
                    } else {
1081
3
                        Ok( Value::String("".to_string()) )       // trivially not in definition                    }
1082
                    }
1083
                }
1084
            },
1085
0
            _ => Err( Error::Other("DefinitionValue:: neither a node nor a string is passed for first argument".to_string()) ),
1086
        }
1087
13.1k
    }
1088
}
1089
1090
pub struct DistanceFromLeaf;
1091
impl DistanceFromLeaf {
1092
240
    fn distance(element: Element, use_left_side: bool, treat_2d_elements_as_tokens: bool) -> usize {
1093
        // FIX: need to handle char level (i.e., chars in a leaf element)
1094
240
        let mut element = element;
1095
240
        let mut distance = 1;
1096
        loop {
1097
            // debug!("distance={} -- element: {}", distance, mml_to_string(element));
1098
361
            if MATHML_LEAF_NODES.contains(element.attribute_value(MATHML_FROM_NAME_ATTR).unwrap_or(name(element))) {
1099
199
                return distance;
1100
162
            }
1101
162
            if treat_2d_elements_as_tokens && (
IsNode::is_2D60
(
element60
) ||
!IsNode::is_mathml(element)20
) {
1102
41
                return distance;
1103
121
            }
1104
121
            let children = element.children();
1105
121
            assert!(!children.is_empty());
1106
121
            element = as_element( if use_left_side {
children[0]0
} else {children[children.len()-1]} );
1107
121
            distance += 1;
1108
        }
1109
240
    }
1110
}
1111
1112
/**
1113
 * Returns distance from the current node to the leftmost/rightmost leaf (if char, then = 0, if token, then 1).
1114
 * If the node is a bracketed expr with the indicated left/right chars
1115
 * node -- node(s) to test
1116
 * left_side -- (bool) traverse leftmost child to leaf
1117
 * treat2D_elements_as_tokens -- (bool) 2D notations such as fractions are treated like leaves 
1118
 */
1119
impl Function for DistanceFromLeaf {
1120
240
    fn evaluate<'d>(&self,
1121
240
                        _context: &context::Evaluation<'_, 'd>,
1122
240
                        args: Vec<Value<'d>>)
1123
240
                        -> Result<Value<'d>, Error>
1124
    {
1125
240
        let mut args = Args(args);
1126
240
        args.exactly(3)
?0
;
1127
240
        let treat_2d_elements_as_tokens = args.pop_boolean()
?0
;
1128
240
        let use_left_side = args.pop_boolean()
?0
;
1129
240
        let node = validate_one_node(args.pop_nodeset()
?0
, "DistanceFromLeaf")
?0
;
1130
240
        if let Node::Element(e) = node {
1131
240
            return Ok( Value::Number( DistanceFromLeaf::distance(e, use_left_side, treat_2d_elements_as_tokens) as f64) );
1132
0
        }
1133
1134
        // FIX: should having a non-element be an error instead??
1135
0
        return Err(Error::Other(format!("DistanceFromLeaf: first arg '{node:?}' is not a node")));
1136
240
    }
1137
}
1138
1139
1140
1141
pub struct EdgeNode;
1142
impl EdgeNode {
1143
    // Return the root of the ancestor tree if we are at the left/right side of a path from that to 'element'
1144
2.09k
    fn edge_node<'a>(element: Element<'a>, use_left_side: bool, stop_node_name: &str) -> Option<Element<'a>> {
1145
2.09k
        let element_name = element.attribute_value(MATHML_FROM_NAME_ATTR).unwrap_or(name(element));
1146
2.09k
        if element_name == "math" {
1147
86
            return Some(element);
1148
2.00k
        };
1149
1150
2.00k
        let parent = get_parent(element);   // there is always a "math" node
1151
2.00k
        let parent_name = parent.attribute_value(MATHML_FROM_NAME_ATTR).unwrap_or(name(parent));
1152
1153
        // first check to see if we have the special case of punctuation as last child of math/mrow element
1154
        // it only matters if we are looking at the right edge
1155
1156
        // debug!("EdgeNode: there are {} preceding siblings",element.preceding_siblings().len() );
1157
2.00k
        if use_left_side  && 
!element.preceding_siblings().is_empty()1.15k
{// not at left side
1158
587
            return None;
1159
1.41k
        };
1160
1161
1.41k
        if !use_left_side && 
!element.following_siblings().is_empty()848
{ // not at right side
1162
            // check for the special case that the parent is an mrow and the grandparent is <math> and we have punctuation
1163
574
            let grandparent = get_parent(parent);
1164
574
            let grandparent_name = grandparent.attribute_value(MATHML_FROM_NAME_ATTR).unwrap_or(name(grandparent));
1165
574
            if grandparent_name == "math" &&
1166
105
               parent_name == "mrow" && 
parent.children().len() == 289
{ // right kind of mrow
1167
11
                let text = get_text_from_element( as_element(parent.children()[1]) );
1168
11
                if text == "," || text == "." || 
text == ";"10
||
text == "?"10
{
1169
1
                    return Some(grandparent);
1170
10
                }
1171
563
            }
1172
573
             return None;
1173
843
        };
1174
1175
        // at an edge -- check to see the parent is desired root
1176
843
        if parent_name == stop_node_name || 
1177
735
           (stop_node_name == "2D" && 
IsNode::is_2D338
(
parent338
)) {
1178
176
            return Some(parent);
1179
667
        };
1180
        
1181
        // debug!("EdgeNode: recurse to {}", parent_name);
1182
667
        return EdgeNode::edge_node(parent, use_left_side, stop_node_name)
1183
2.09k
    }
1184
}
1185
1186
// EdgeNode(node, "left"/"right", stopNodeName)
1187
//    -- returns the stopNode if at left/right edge of named ancestor node. "stopNodeName' can also be "2D'
1188
//       returns original node match isn't found
1189
//  Note: if stopNodeName=="math", then punctuation is taken into account since it isn't really part of the math
1190
impl Function for EdgeNode {
1191
1.41k
    fn evaluate<'d>(&self,
1192
1.41k
                        _context: &context::Evaluation<'_, 'd>,
1193
1.41k
                        args: Vec<Value<'d>>)
1194
1.41k
                        -> Result<Value<'d>, Error>
1195
    {
1196
1.41k
        let mut args = Args(args);
1197
1.41k
        args.exactly(3)
?0
;
1198
1.41k
        let stop_node_name = args.pop_string()
?0
;
1199
1.41k
        let use_left_side = args.pop_string()
?0
.to_lowercase() == "left";
1200
1.41k
        let node = validate_one_node(args.pop_nodeset()
?0
, "EdgeNode")
?0
;
1201
1.41k
        if let Node::Element(e) = node {
1202
1.41k
            let result = match EdgeNode::edge_node(e, use_left_side, &stop_node_name) {
1203
260
                Some(found) => found,
1204
1.15k
                None => e,
1205
            };
1206
1.41k
            let mut node_set = Nodeset::new();
1207
1.41k
            node_set.add(result);
1208
1.41k
            return Ok( Value::Nodeset(node_set) );
1209
0
        }
1210
1211
        // FIX: should having a non-element be an error instead??
1212
0
        return Err(Error::Other(format!("EdgeNode: first arg '{node:?}' is not a node")));
1213
1.41k
    }
1214
}
1215
1216
pub struct SpeakIntentName;
1217
/// SpeakIntentName(intent, verbosity)
1218
///   Returns a string corresponding to the intent name with the indicated verbosity
1219
impl Function for SpeakIntentName {
1220
340
    fn evaluate<'d>(&self,
1221
340
                        _context: &context::Evaluation<'_, 'd>,
1222
340
                        args: Vec<Value<'d>>)
1223
340
                        -> Result<Value<'d>, Error>
1224
    {
1225
340
        let mut args = Args(args);
1226
340
        args.exactly(3)
?0
;
1227
340
        let fixity = args.pop_string()
?0
;
1228
340
        let verbosity = args.pop_string()
?0
;
1229
340
        let intent_name = args.pop_string()
?0
;
1230
340
        return Ok( Value::String(crate::infer_intent::intent_speech_for_name(&intent_name, &verbosity, &fixity)) );
1231
340
    }
1232
}
1233
1234
pub struct GetBracketingIntentName;
1235
/// GetBracketingIntentName(name, verbosity, at_start_or_end)
1236
///   Returns a potentially empty string to use to bracket an intent expression (start foo... end foo)
1237
/// 
1238
impl GetBracketingIntentName {
1239
61
    fn bracketing_words(intent_name: &str, verbosity: &str, fixity: &str, at_start: bool) -> String {
1240
61
        crate::definitions::SPEECH_DEFINITIONS.with(|definitions| {
1241
61
            let definitions = definitions.borrow();
1242
61
            if let Some(
intent_name_pattern57
) = definitions.get_hashmap("IntentMappings").unwrap().get(intent_name) {
1243
                // Split the pattern is: fixity-def [|| fixity-def]*
1244
                //   fixity-def := fixity=open; verbosity; close
1245
                //   verbosity := terse | medium | verbose
1246
68
                if let Some(
matched_intent57
) =
intent_name_pattern.split("||")57
.
find57
(|&entry| entry.trim().starts_with(fixity)) {
1247
57
                    let (_, matched_intent) = matched_intent.split_once("=").unwrap_or_default();
1248
57
                    let parts = matched_intent.trim().split(";").collect::<Vec<&str>>();
1249
57
                    if parts.len() == 1 {
1250
30
                        return "".to_string();
1251
27
                    }
1252
27
                    if parts.len() != 3 {
1253
0
                        error!("Intent '{}' has {} ';' separated parts, should have 3", intent_name, parts.len());
1254
0
                        return "".to_string();
1255
27
                    }
1256
27
                    let mut speech = (if at_start {
parts[0]4
} else {
parts[2]23
}).split(":").collect::<Vec<&str>>();
1257
27
                    match speech.len() {
1258
20
                        1 => return speech[0].to_string(),
1259
                        2 | 3 => {
1260
7
                            if speech.len() == 2 {
1261
0
                                warn!("Intent '{intent_name}'  has only two ':' separated parts, but should have three");
1262
0
                                speech.push(speech[1]);
1263
7
                            }
1264
7
                            let bracketing_words = match verbosity {
1265
7
                                "Terse" => 
speech[0]0
,
1266
7
                                "Medium" => speech[1],
1267
0
                                _ => speech[2],
1268
                            };
1269
7
                            return bracketing_words.to_string();
1270
                        },
1271
                        _ => {
1272
0
                            error!("Intent '{}' has too many ({}) operator names, should only have 2", intent_name, speech.len());
1273
                        },
1274
                    }
1275
0
                }   
1276
4
            };
1277
4
            return "".to_string();
1278
61
        })
1279
61
    }
1280
}
1281
1282
impl Function for GetBracketingIntentName {
1283
61
    fn evaluate<'d>(&self,
1284
61
                        _context: &context::Evaluation<'_, 'd>,
1285
61
                        args: Vec<Value<'d>>)
1286
61
                        -> Result<Value<'d>, Error>
1287
    {
1288
61
        let mut args = Args(args);
1289
61
        args.exactly(4)
?0
;
1290
61
        let start_or_end = args.pop_string()
?0
;
1291
61
        if start_or_end != "start" && 
start_or_end != "end"57
{
1292
0
            return Err( Error::Other("GetBracketingIntentName: first argument must be either 'start' or 'end'".to_string()) );
1293
61
        }
1294
61
        let fixity = args.pop_string()
?0
;
1295
61
        let verbosity = args.pop_string()
?0
;
1296
61
        let name = args.pop_string()
?0
;
1297
61
        return Ok( Value::String(GetBracketingIntentName:: bracketing_words(&name, &verbosity, &fixity, start_or_end == "start")) );
1298
61
    }
1299
}
1300
1301
pub struct GetNavigationPartName;
1302
/// GetNavigationPartName(name, index)
1303
/// Returns the name to use to speak the part of a navigation expression (e.g., 'numerator', 'denominator', 'base', 'exponent', ...).
1304
/// If there is no match, an empty string is returned.
1305
/// 'index' is 0-based
1306
/// 
1307
impl GetNavigationPartName {
1308
129
    fn navigation_part_name(intent_name: &str, index: usize) -> String {
1309
129
        crate::definitions::SPEECH_DEFINITIONS.with(|definitions| {
1310
129
            let definitions = definitions.borrow();
1311
129
            if let Some(navigation_names) = definitions.get_hashmap("NavigationParts") &&
1312
129
               let Some(
nav_part_names105
) = navigation_names.get(intent_name) {
1313
                    // Split the pattern is: part [; part]*
1314
105
                    if let Some(part_name) = nav_part_names.trim().split(";").nth(index) {
1315
105
                        return part_name.trim().to_string();
1316
0
                    }
1317
24
                }
1318
24
            return "".to_string();
1319
129
        })
1320
129
    }
1321
}
1322
1323
impl Function for GetNavigationPartName {
1324
129
    fn evaluate<'d>(&self,
1325
129
                        _context: &context::Evaluation<'_, 'd>,
1326
129
                        args: Vec<Value<'d>>)
1327
129
                        -> Result<Value<'d>, Error>
1328
    {
1329
129
        let mut args = Args(args);
1330
129
        args.exactly(2)
?0
;
1331
129
        let index = args.pop_number()
?0
as usize;
1332
129
        let name = args.pop_string()
?0
;
1333
129
        return Ok( Value::String(GetNavigationPartName:: navigation_part_name(&name, index)) );
1334
129
    }
1335
}
1336
1337
pub struct FontSizeGuess;
1338
/// FontSizeGuess(size_string)
1339
///   returns a guess of the size in "ems"
1340
/// Examples:
1341
///    "0.278em" -> 0.278
1342
///    ""
1343
//       returns original node match isn't found
1344
impl FontSizeGuess {
1345
224
    pub fn em_from_value(value_with_unit: &str) -> f64 {
1346
        // match one or more digits followed by a unit -- there are many more units, but they tend to be large and rarer(?)
1347
3
        static FONT_VALUE: LazyLock<Regex> = LazyLock::new(|| { Regex::new(r"(-?[0-9]*\.?[0-9]*)(px|cm|mm|Q|in|ppc|pt|ex|em|rem)").unwrap() });
1348
224
        let cap = FONT_VALUE.captures(value_with_unit);
1349
224
        if let Some(
cap200
) = cap {
1350
200
            if cap.len() == 3 {
1351
200
                let multiplier = match &cap[2] {    // guess based on 12pt font to convert to ems
1352
200
                    "px" => 
1.0/12.00
,
1353
200
                    "cm" => 
2.370
,
1354
200
                    "mm" => 
0.2370
,
1355
200
                    "Q" => 
0.0590
, // 1/4 mm
1356
200
                    "in" => 
6.0223
,
1357
177
                    "pc" => 
1.00
,
1358
177
                    "pt" => 
1.0/12.06
,
1359
171
                    "ex" => 
0.50
,
1360
171
                    "em" => 1.0,
1361
0
                    "rem" => 16.0/12.0,
1362
0
                    default => {debug!("unit='{default}'"); 10.0}
1363
                };
1364
                // debug!("FontSizeGuess: {}->{}, val={}, multiplier={}", value_with_unit, value*multiplier, value, multiplier);
1365
200
                return cap[1].parse::<f64>().unwrap_or(0.0) * multiplier;
1366
            }  else {
1367
0
                return 0.0;             // something bad happened
1368
            }
1369
        }else {
1370
24
            let multiplier = match value_with_unit {    // guess based on 12pt font to convert to ems
1371
24
                "veryverythinspace" => 
1.0/18.00
,
1372
24
                "verythinspace" => 
2.0/18.00
,
1373
24
                "thinspace" => 
3.0/18.00
,
1374
24
                "mediumspace" => 
4.0/18.00
,
1375
24
                "thickspace" => 
5.0/18.00
,
1376
24
                "verythickspace" => 
6.0/18.00
,
1377
24
                "veryverythickspace" => 
7.0/18.00
,
1378
24
                _ => 0.0,
1379
            };
1380
24
            return multiplier;
1381
        }
1382
224
    }
1383
}
1384
impl Function for FontSizeGuess {
1385
0
    fn evaluate<'d>(&self,
1386
0
                        _context: &context::Evaluation<'_, 'd>,
1387
0
                        args: Vec<Value<'d>>)
1388
0
                        -> Result<Value<'d>, Error>
1389
    {
1390
0
        let mut args = Args(args);
1391
0
        args.exactly(1)?;
1392
0
        let value_with_unit = args.pop_string()?;
1393
0
        let em_value = FontSizeGuess::em_from_value(&value_with_unit);
1394
0
        return Ok( Value::Number(em_value) );
1395
0
    }
1396
}
1397
1398
pub struct ReplaceAll;
1399
/// ReplaceAll(haystack, needle, replacement)
1400
///   Returns a string with all occurrences of 'needle' replaced with 'replacement'
1401
impl Function for ReplaceAll {
1402
0
    fn evaluate<'d>(&self,
1403
0
                        _context: &context::Evaluation<'_, 'd>,
1404
0
                        args: Vec<Value<'d>>)
1405
0
                        -> Result<Value<'d>, Error>
1406
    {
1407
0
        let mut args = Args(args);
1408
0
        args.exactly(3)?;
1409
0
        let replacement = args.pop_string()?;
1410
0
        let needle = args.pop_string()?;
1411
0
        let haystack = args.pop_string()?;
1412
0
        return Ok( Value::String(haystack.replace(&needle, &replacement)) );
1413
0
    }
1414
}
1415
1416
/// Add all the functions defined in this module to `context`.
1417
22.7k
pub fn add_builtin_functions(context: &mut Context) {
1418
22.7k
    context.set_function("NestingChars", crate::braille::NemethNestingChars);
1419
22.7k
    context.set_function("BrailleChars", crate::braille::BrailleChars);
1420
22.7k
    context.set_function("NeedsToBeGrouped", crate::braille::NeedsToBeGrouped);
1421
22.7k
    context.set_function("IsNode", IsNode);
1422
22.7k
    context.set_function("ToOrdinal", ToOrdinal);
1423
22.7k
    context.set_function("ToCommonFraction", ToCommonFraction);
1424
22.7k
    context.set_function("IsBracketed", IsBracketed);
1425
22.7k
    context.set_function("IsInDefinition", IsInDefinition);
1426
22.7k
    context.set_function("DefinitionValue", DefinitionValue);
1427
22.7k
    context.set_function("BaseNode", BaseNode);
1428
22.7k
    context.set_function("IfThenElse", IfThenElse);
1429
22.7k
    context.set_function("IFTHENELSE", IfThenElse);
1430
22.7k
    context.set_function("DistanceFromLeaf", DistanceFromLeaf);
1431
22.7k
    context.set_function("EdgeNode", EdgeNode);
1432
22.7k
    context.set_function("SpeakIntentName", SpeakIntentName);
1433
22.7k
    context.set_function("GetBracketingIntentName", GetBracketingIntentName);
1434
22.7k
    context.set_function("GetNavigationPartName", GetNavigationPartName);
1435
22.7k
    context.set_function("DEBUG", Debug);
1436
1437
    // Not used: remove??
1438
22.7k
    context.set_function("min", Min);       // missing in xpath 1.0
1439
22.7k
    context.set_function("max", Max);       // missing in xpath 1.0
1440
22.7k
    context.set_function("FontSizeGuess", FontSizeGuess);
1441
22.7k
    context.set_function("ReplaceAll", ReplaceAll);
1442
22.7k
}
1443
1444
1445
#[cfg(test)]
1446
mod tests {
1447
    use super::*;
1448
    use sxd_document::parser;
1449
    use crate::interface::{trim_element, get_element};
1450
1451
1452
4
    fn init_word_list() {
1453
4
        crate::interface::set_rules_dir(super::super::abs_rules_dir_path()).unwrap();
1454
4
        let result = crate::definitions::read_definitions_file(true);
1455
4
        if let Err(
e0
) = result {
1456
0
            panic!("unable to read 'Rules/Languages/en/definitions.yaml\n{e}");
1457
4
        }
1458
4
    }
1459
1460
    #[test]
1461
1
    fn ordinal_one_digit() {
1462
1
        init_word_list();
1463
1
        assert_eq!("zeroth", ToOrdinal::convert("0", false, false).unwrap());
1464
1
        assert_eq!("second", ToOrdinal::convert("2", false, false).unwrap());
1465
1
        assert_eq!("ninth", ToOrdinal::convert("9", false, false).unwrap());
1466
1467
1
        assert_eq!("zeroth", ToOrdinal::convert("0", false, true).unwrap());
1468
1
        assert_eq!("seconds", ToOrdinal::convert("2", false, true).unwrap());
1469
1
        assert_eq!("ninths", ToOrdinal::convert("9", false, true).unwrap());
1470
1471
1
        assert_eq!("first", ToOrdinal::convert("1", true, false).unwrap());
1472
1
        assert_eq!("half", ToOrdinal::convert("2", true, false).unwrap());
1473
1
        assert_eq!("half", ToOrdinal::convert("02", true, false).unwrap());
1474
1
        assert_eq!("ninth", ToOrdinal::convert("9", true, false).unwrap());
1475
1476
1
        assert_eq!("halves", ToOrdinal::convert("2", true, true).unwrap());
1477
1
        assert_eq!("halves", ToOrdinal::convert("002", true, true).unwrap());
1478
1
        assert_eq!("ninths", ToOrdinal::convert("9", true, true).unwrap());
1479
1
    }
1480
1481
    #[test]
1482
1
    fn ordinal_two_digit() {
1483
1
        init_word_list();
1484
1
        assert_eq!("tenth", ToOrdinal::convert("10", false, false).unwrap());
1485
1
        assert_eq!("seventeenth", ToOrdinal::convert("17", false, false).unwrap());
1486
1
        assert_eq!("thirty second", ToOrdinal::convert("32", false, false).unwrap());
1487
1
        assert_eq!("fortieth", ToOrdinal::convert("40", false, false).unwrap());
1488
1489
1
        assert_eq!("tenths", ToOrdinal::convert("10", false, true).unwrap());
1490
1
        assert_eq!("sixteenths", ToOrdinal::convert("16", false, true).unwrap());
1491
1
        assert_eq!("eighty eighths", ToOrdinal::convert("88", false, true).unwrap());
1492
1
        assert_eq!("fiftieths", ToOrdinal::convert("50", false, true).unwrap());
1493
1494
1
        assert_eq!("eleventh", ToOrdinal::convert("11", true, false).unwrap());
1495
1
        assert_eq!("forty fourth", ToOrdinal::convert("44", true, false).unwrap());
1496
1
        assert_eq!("ninth", ToOrdinal::convert("9", true, false).unwrap());
1497
1
        assert_eq!("ninth", ToOrdinal::convert("00000009", true, false).unwrap());
1498
1
        assert_eq!("sixtieth", ToOrdinal::convert("60", true, false).unwrap());
1499
1500
1
        assert_eq!("tenths", ToOrdinal::convert("10", true, true).unwrap());
1501
1
        assert_eq!("tenths", ToOrdinal::convert("0010", true, true).unwrap());
1502
1
        assert_eq!("elevenths", ToOrdinal::convert("11", true, true).unwrap());
1503
1
        assert_eq!("nineteenths", ToOrdinal::convert("19", true, true).unwrap());
1504
1
        assert_eq!("twentieths", ToOrdinal::convert("20", true, true).unwrap());
1505
1
        assert_eq!("nineteenths", ToOrdinal::convert("𝟏𝟗", true, true).unwrap());
1506
1
    }
1507
1508
    #[test]
1509
1
    fn ordinal_three_digit() {
1510
1
        init_word_list();
1511
1
        assert_eq!("one hundred first", ToOrdinal::convert("101", false, false).unwrap());
1512
1
        assert_eq!("two hundred tenth", ToOrdinal::convert("210", false, false).unwrap());
1513
1
        assert_eq!("four hundred thirty second", ToOrdinal::convert("432", false, false).unwrap());
1514
1
        assert_eq!("four hundred second", ToOrdinal::convert("402", false, false).unwrap());
1515
1516
1
        assert_eq!("one hundred first", ToOrdinal::convert("101", true, false).unwrap());
1517
1
        assert_eq!("two hundred second", ToOrdinal::convert("202", true, false).unwrap());
1518
1
        assert_eq!("four hundred thirty second", ToOrdinal::convert("432", true, false).unwrap());
1519
1
        assert_eq!("five hundred third", ToOrdinal::convert("503", true, false).unwrap());
1520
1521
1
        assert_eq!("three hundred elevenths", ToOrdinal::convert("311", false, true).unwrap());
1522
1
        assert_eq!("four hundred ninety ninths", ToOrdinal::convert("499", false, true).unwrap());
1523
1
        assert_eq!("nine hundred ninetieths", ToOrdinal::convert("990", false, true).unwrap());
1524
1
        assert_eq!("six hundred seconds", ToOrdinal::convert("602", false, true).unwrap());
1525
1526
1
        assert_eq!("seven hundredths", ToOrdinal::convert("700", true, true).unwrap());
1527
1
        assert_eq!("one hundredths", ToOrdinal::convert("100", true, true).unwrap());
1528
1
        assert_eq!("eight hundred seventeenths", ToOrdinal::convert("817", true, true).unwrap());
1529
1
    }
1530
    #[test]
1531
1
    fn ordinal_large() {
1532
1
        init_word_list();
1533
1
        assert_eq!("one thousandth", ToOrdinal::convert("1000", false, false).unwrap());
1534
1
        assert_eq!("two thousand one hundredth", ToOrdinal::convert("2100", false, false).unwrap());
1535
1
        assert_eq!("thirty thousandth", ToOrdinal::convert("30000", false, false).unwrap());
1536
1
        assert_eq!("four hundred thousandth", ToOrdinal::convert("400000", false, false).unwrap());
1537
1538
1
        assert_eq!("four hundred thousandth", ToOrdinal::convert("400000", true, false).unwrap());
1539
1
        assert_eq!("five hundred thousand second", ToOrdinal::convert("500002", true, false).unwrap());
1540
1
        assert_eq!("six millionth", ToOrdinal::convert("6000000", true, false).unwrap());
1541
1
        assert_eq!("sixty millionth", ToOrdinal::convert("60000000", true, false).unwrap());
1542
1543
1
        assert_eq!("seven billionths", ToOrdinal::convert("7000000000", false, true).unwrap());
1544
1
        assert_eq!("eight trillionths", ToOrdinal::convert("8000000000000", false, true).unwrap());
1545
1
        assert_eq!("nine quadrillionths", ToOrdinal::convert("9000000000000000", false, true).unwrap());
1546
1
        assert_eq!("one quintillionth", ToOrdinal::convert("1000000000000000000", false, false).unwrap());
1547
1548
1
        assert_eq!("nine billion eight hundred seventy six million five hundred forty three thousand two hundred tenths", ToOrdinal::convert("9876543210", true, true).unwrap());
1549
1
        assert_eq!("nine billion five hundred forty three thousand two hundred tenths", ToOrdinal::convert("9000543210", true, true).unwrap());
1550
1
        assert_eq!("zeroth", ToOrdinal::convert("00000", false, false).unwrap());
1551
1
    }
1552
1553
1554
11
    fn test_is_simple(message: &'static str, mathml_str: &'static str) {
1555
    // this forces initialization
1556
11
    crate::speech::SPEECH_RULES.with(|_| true);
1557
11
        let package = parser::parse(mathml_str)
1558
11
        .expect("failed to parse XML");
1559
11
        let mathml = get_element(&package);
1560
11
        trim_element(mathml, false);
1561
11
        assert!(IsNode::is_simple(mathml), "{}", message);
1562
11
    }
1563
1564
7
    fn test_is_not_simple(message: &'static str, mathml_str: &'static str) {
1565
    // this forces initialization
1566
7
    crate::speech::SPEECH_RULES.with(|_| true);
1567
7
        let package = parser::parse(mathml_str)
1568
7
        .expect("failed to parse XML");
1569
7
        let mathml = get_element(&package);
1570
7
        trim_element(mathml, false);
1571
7
        assert!(!IsNode::is_simple(mathml), "{}", message);
1572
7
    }
1573
    #[test]
1574
1
    fn is_simple() {
1575
1
        test_is_simple("single variable", "<mi>x</mi>");
1576
1
        test_is_simple("single number", "<mn>1.2</mn>");
1577
1
        test_is_simple("negative number", "<mrow><mo>-</mo><mn>10</mn></mrow>");
1578
1
        test_is_simple("negative variable", "<mrow><mo>-</mo><mi>x</mi></mrow>");
1579
1
        test_is_simple("ordinal fraction", "<mfrac><mn>3</mn><mn>4</mn></mfrac>");
1580
1
        test_is_simple("x y", "<mrow><mi>x</mi><mo>&#x2062;</mo><mi>y</mi></mrow>");
1581
1
        test_is_simple("negative two vars", 
1582
                "<mrow><mrow><mo>-</mo><mi>x</mi></mrow><mo>&#x2062;</mo><mi>y</mi></mrow>");
1583
1
        test_is_simple("-2 x y", 
1584
                "<mrow><mrow><mo>-</mo><mn>2</mn></mrow>
1585
                             <mo>&#x2062;</mo><mi>x</mi><mo>&#x2062;</mo><mi>z</mi></mrow>");
1586
1
        test_is_simple("sin x", "<mrow><mi>sin</mi><mo>&#x2061;</mo><mi>x</mi></mrow>");
1587
1
        test_is_simple("f(x)", "<mrow><mi>f</mi><mo>&#x2061;</mo><mrow><mo>(</mo><mi>x</mi><mo>)</mo></mrow></mrow>");
1588
1
        test_is_simple("f(x+y)",
1589
         "<mrow><mi>f</mi><mo>&#x2061;</mo>\
1590
            <mrow><mo>(</mo><mi>x</mi><mo>+</mo><mi>y</mi><mo>)</mo></mrow></mrow>");
1591
        
1592
1
    }
1593
1594
    #[test]
1595
1
    fn is_not_simple() {
1596
1
        test_is_not_simple("multi-char variable", "<mi>rise</mi>");
1597
1
        test_is_not_simple("large ordinal fraction", "<mfrac><mn>30</mn><mn>4</mn></mfrac>");
1598
1
        test_is_not_simple("fraction with var in numerator", "<mfrac><mi>x</mi><mn>4</mn></mfrac>");
1599
1
        test_is_not_simple("square root", "<msqrt><mi>x</mi></msqrt>");
1600
1
        test_is_not_simple("subscript", "<msub><mi>x</mi><mn>4</mn></msub>");
1601
1
        test_is_not_simple("-x y z", 
1602
                "<mrow><mrow><mo>-</mo><mi>x</mi></mrow>
1603
                            <mo>&#x2062;</mo><mi>y</mi><mo>&#x2062;</mo><mi>z</mi></mrow>");
1604
1
        test_is_not_simple("C(-2,1,4)",             // github.com/NSoiffer/MathCAT/issues/199
1605
                    "<mrow><mi>C</mi><mrow><mo>(</mo><mo>−</mo><mn>2</mn><mo>,</mo><mn>1</mn><mo>,</mo><mn>4</mn><mo>)</mo></mrow></mrow>");
1606
                   
1607
1
    }
1608
1609
    #[test]
1610
1
    fn at_left_edge() {
1611
1
        let mathml = "<math><mfrac><mrow><mn>30</mn><mi>x</mi></mrow><mn>4</mn></mfrac></math>";
1612
1
        let package = parser::parse(mathml).expect("failed to parse XML");
1613
1
        let mathml = get_element(&package);
1614
1
        trim_element(mathml, false);
1615
1
        let fraction = as_element(mathml.children()[0]);
1616
1
        let mn = as_element(as_element(fraction.children()[0]).children()[0]);
1617
1
        assert_eq!(EdgeNode::edge_node(mn, true, "2D"), Some(fraction));
1618
1
        assert_eq!(EdgeNode::edge_node(mn, false, "2D"), None);
1619
1620
1
        let mi = as_element(as_element(fraction.children()[0]).children()[1]);
1621
1
        assert_eq!(EdgeNode::edge_node(mi, true, "2D"), None);
1622
1
    }
1623
1624
    #[test]
1625
1
    fn at_right_edge() {
1626
1
        let mathml = "<math><mrow><mfrac><mn>4</mn><mrow><mn>30</mn><mi>x</mi></mrow></mfrac><mo>.</mo></mrow></math>";
1627
1
        let package = parser::parse(mathml).expect("failed to parse XML");
1628
1
        let mathml = get_element(&package);
1629
1
        trim_element(mathml, false);
1630
1
        let fraction = as_element(as_element(mathml.children()[0]).children()[0]);
1631
1
        let mi = as_element(as_element(fraction.children()[1]).children()[1]);
1632
1
        assert_eq!(EdgeNode::edge_node(mi, true, "2D"), None);
1633
1
        assert_eq!(EdgeNode::edge_node(mi, false, "2D"), Some(fraction));
1634
1
        assert_eq!(EdgeNode::edge_node(mi, false, "math"), Some(mathml));
1635
1636
1
        let mn = as_element(as_element(fraction.children()[1]).children()[0]);
1637
1
        assert_eq!(EdgeNode::edge_node(mn, true, "2D"), None);
1638
1
    }
1639
}
\ No newline at end of file diff --git a/docs/llvm-cov/html/index.html b/docs/llvm-cov/html/index.html index 38c150e8..7d6bd1a4 100644 --- a/docs/llvm-cov/html/index.html +++ b/docs/llvm-cov/html/index.html @@ -1 +1 @@ -

Coverage Report

Created: 2026-04-30 05:51

Click here for information about interpreting this report.

FilenameFunction CoverageLine CoverageRegion CoverageBranch Coverage
bin/mathml2text.rs
   0.00% (0/3)
   0.00% (0/60)
   0.00% (0/129)
- (0/0)
braille.rs
  89.63% (147/164)
  84.01% (1608/1914)
  81.11% (2955/3643)
- (0/0)
canonicalize.rs
  97.40% (300/308)
  94.43% (4747/5027)
  93.14% (6712/7206)
- (0/0)
chemistry.rs
  98.68% (149/151)
  94.93% (2453/2584)
  90.78% (2933/3231)
- (0/0)
definitions.rs
  61.90% (26/42)
  86.97% (207/238)
  82.48% (433/525)
- (0/0)
infer_intent.rs
  91.94% (57/62)
  87.67% (654/746)
  79.38% (916/1154)
- (0/0)
interface.rs
  76.03% (92/121)
  73.62% (681/925)
  72.60% (1224/1686)
- (0/0)
lib.rs
  85.71% (6/7)
  78.00% (39/50)
  85.06% (74/87)
- (0/0)
main.rs
   0.00% (0/3)
   0.00% (0/108)
   0.00% (0/209)
- (0/0)
navigate.rs
  93.96% (140/149)
  87.98% (1881/2138)
  86.18% (2826/3279)
- (0/0)
prefs.rs
  83.33% (65/78)
  79.12% (610/771)
  79.49% (1601/2014)
- (0/0)
pretty_print.rs
  89.74% (35/39)
  70.12% (298/425)
  71.20% (576/809)
- (0/0)
shim_filesystem.rs
 100.00% (7/7)
  81.69% (58/71)
  75.86% (110/145)
- (0/0)
speech.rs
  67.33% (136/202)
  76.21% (1371/1799)
  74.80% (2662/3559)
- (0/0)
tts.rs
  73.17% (30/41)
  68.86% (272/395)
  61.54% (480/780)
- (0/0)
xpath_functions.rs
  95.18% (79/83)
  87.34% (911/1043)
  85.58% (1845/2156)
- (0/0)
Totals
  86.92% (1269/1460)
  86.31% (15790/18294)
  82.80% (25347/30612)
- (0/0)
Generated by llvm-cov -- llvm version 22.1.2-rust-1.95.0-stable
\ No newline at end of file +

Coverage Report

Created: 2026-05-04 09:37

Click here for information about interpreting this report.

FilenameFunction CoverageLine CoverageRegion CoverageBranch Coverage
bin/mathml2text.rs
   0.00% (0/3)
   0.00% (0/60)
   0.00% (0/129)
- (0/0)
braille.rs
  89.63% (147/164)
  84.01% (1608/1914)
  81.11% (2955/3643)
- (0/0)
canonicalize.rs
  97.40% (300/308)
  94.43% (4747/5027)
  93.14% (6712/7206)
- (0/0)
chemistry.rs
  98.68% (149/151)
  94.93% (2453/2584)
  90.78% (2933/3231)
- (0/0)
definitions.rs
  61.90% (26/42)
  86.97% (207/238)
  82.48% (433/525)
- (0/0)
infer_intent.rs
  91.94% (57/62)
  87.67% (654/746)
  79.38% (916/1154)
- (0/0)
interface.rs
  76.03% (92/121)
  73.62% (681/925)
  72.60% (1224/1686)
- (0/0)
lib.rs
  85.71% (6/7)
  78.00% (39/50)
  85.06% (74/87)
- (0/0)
main.rs
   0.00% (0/3)
   0.00% (0/108)
   0.00% (0/209)
- (0/0)
navigate.rs
  93.96% (140/149)
  87.98% (1881/2138)
  86.18% (2826/3279)
- (0/0)
prefs.rs
  83.33% (65/78)
  79.25% (615/776)
  79.66% (1606/2016)
- (0/0)
pretty_print.rs
  89.74% (35/39)
  70.12% (298/425)
  71.20% (576/809)
- (0/0)
shim_filesystem.rs
 100.00% (7/7)
  78.67% (59/75)
  74.50% (111/149)
- (0/0)
speech.rs
  67.33% (136/202)
  76.21% (1371/1799)
  74.80% (2662/3559)
- (0/0)
tts.rs
  73.17% (30/41)
  68.86% (272/395)
  61.54% (480/780)
- (0/0)
xpath_functions.rs
  95.18% (79/83)
  87.34% (911/1043)
  85.58% (1845/2156)
- (0/0)
Totals
  86.92% (1269/1460)
  86.30% (15796/18303)
  82.80% (25353/30618)
- (0/0)
Generated by llvm-cov -- llvm version 22.1.2-rust-1.95.0-stable
\ No newline at end of file