From efc26ce5df2cdbc103cee9016460f4605f6b9ff6 Mon Sep 17 00:00:00 2001 From: Brady Fomegne Date: Wed, 24 Jun 2026 21:13:03 +0100 Subject: [PATCH] perf(config): reduce memory allocation This commit reduces the memory allocation to increase the performance of reading a configuration file. It inroduces the following changes: - replaced the `insert_with_auto_capitalize` macro with a `capitalize` function - fixed a bug in the `capitalize` logic - split the `read_from_filesystem` function in `read_config` and `process_config` - reduced excessive borrowing and cloning --- config/src/lib.rs | 367 +++++++++++++++++++++++++++------------------- 1 file changed, 215 insertions(+), 152 deletions(-) diff --git a/config/src/lib.rs b/config/src/lib.rs index 6af4a8b..c816297 100644 --- a/config/src/lib.rs +++ b/config/src/lib.rs @@ -60,7 +60,7 @@ //! // Note that the auto_capitalize is enabled by default. //! assert_eq!( //! Vec::from_iter(config.extract_data().into_iter()), -//! vec![("n*".to_owned(), "ŋ".to_owned()), ("N*".to_owned(), "Ŋ".to_owned())] +//! vec![("N*".to_owned(), "Ŋ".to_owned()), ("n*".to_owned(), "ŋ".to_owned())] //! ); //! ``` @@ -222,16 +222,19 @@ struct MoreDetailedData { alias: Vec, } -macro_rules! insert_with_auto_capitalize { - ( $data: expr, $auto_capitalize: expr, $key: expr, $value: expr ) => { - $data.insert($key.to_owned(), Data::Simple($value.to_owned())); - - if $auto_capitalize && !$key.is_empty() && $key.chars().next().unwrap().is_lowercase() { - $data - .entry($key[0..1].to_uppercase() + &$key[1..]) - .or_insert(Data::Simple($value.to_uppercase())); - } - }; +// Helper function to capitalize a string. +fn capitalize(value: &str) -> Option { + let mut chars = value.chars(); + let first_char = chars.next()?; + if !first_char.is_lowercase() { + return None; + } + let mut cap_key = String::with_capacity(value.len()); + for c in first_char.to_uppercase() { + cap_key.push(c); + } + cap_key.push_str(chars.as_str()); + Some(cap_key) } impl Config { @@ -242,11 +245,79 @@ impl Config { /// Loads the configuration from a file in using a specified filesystem. pub fn from_filesystem(filepath: &Path, fs: &impl FileSystem) -> Result { + let mut data = IndexMap::new(); + #[cfg(feature = "rhai")] + let mut translators = IndexMap::new(); + let mut translation = IndexMap::new(); + let content = fs .read_to_string(filepath) .with_context(|| format!("Couldn't open file {filepath:?}."))?; - let mut config: Self = toml::from_str(&content) + let root_config: Self = toml::from_str(&content).with_context(|| { + format!("Failed to parse the root configuration file {filepath:?}.") + })?; + + let root_core = root_config.core.clone(); + + // Pass the already-parsed config directly instead of calling + // read_config, which would re-read and re-parse the root same file. + Self::process_config( + root_config, + filepath, + fs, + &mut data, + #[cfg(feature = "rhai")] + &mut translators, + &mut translation, + )?; + + Ok(Config { + core: root_core, + data: Some(data), + #[cfg(feature = "rhai")] + translators: Some(translators), + translation: Some(translation), + }) + } + + /// Reads and parses the file at `filepath`, then delegates to + /// [`Self::process_config`]. Only called for *nested* includes the root + /// file is handled by `from_filesystem` without a second I/O round-trip. + fn read_config( + filepath: &Path, + fs: &impl FileSystem, + data: &mut IndexMap, + #[cfg(feature = "rhai")] translators: &mut IndexMap, + translation: &mut IndexMap, + ) -> Result<()> { + let content = fs + .read_to_string(filepath) + .with_context(|| format!("Couldn't open file {filepath:?}."))?; + let config: Self = toml::from_str(&content) .with_context(|| format!("Failed to parse configuration file {filepath:?}."))?; + + Self::process_config( + config, + filepath, + fs, + data, + #[cfg(feature = "rhai")] + translators, + translation, + ) + } + + /// Populates `data`, `translators`, and `translation` from an + /// already-parsed [`Config`], recursively following path-valued entries + /// via [`Self::read_config`]. + fn process_config( + config: Self, + filepath: &Path, + fs: &impl FileSystem, + data: &mut IndexMap, + #[cfg(feature = "rhai")] translators: &mut IndexMap, + translation: &mut IndexMap, + ) -> Result<()> { let config_path = filepath.parent().unwrap(); let auto_capitalize = config .core @@ -254,115 +325,112 @@ impl Config { .and_then(|c| c.auto_capitalize) .unwrap_or(true); - // Data - let mut data = IndexMap::new(); - - config - .data - .unwrap_or_default() - .iter() - .try_for_each(|(key, value)| -> Result<()> { - match value { - Data::File(DataFile { path }) => { - let filepath = config_path.join(path); - let conf = Config::from_filesystem(&filepath, fs)?; - data.extend(conf.data.unwrap_or_default()); - } - Data::Simple(value) => { - insert_with_auto_capitalize!(data, auto_capitalize, key, value); - } - Data::Detailed(DetailedData { value, alias }) => { - alias.iter().chain([key.to_owned()].iter()).for_each(|key| { - insert_with_auto_capitalize!(data, auto_capitalize, key, value); - }); - } - _ => Err(anyhow!("{value:?} not allowed in the data table.")) - .with_context(|| format!("Invalid configuration file {filepath:?}."))?, - }; - Ok(()) - })?; - config.data = Some(data); - - // Translators - #[cfg(feature = "rhai")] - { - let mut translators = IndexMap::new(); - - config - .translators - .unwrap_or_default() - .into_iter() - .try_for_each(|(key, value)| -> Result<()> { - match value { - Data::File(DataFile { path }) => { - let filepath = config_path.join(path); - let conf = Config::from_filesystem(&filepath, fs)?; - translators.extend(conf.translators.unwrap_or_default()); + for (key, value) in config.data.unwrap_or_default() { + match value { + Data::File(DataFile { path }) => { + let nested = config_path.join(&path); + Self::read_config( + &nested, + fs, + data, + #[cfg(feature = "rhai")] + translators, + translation, + )?; + } + Data::Simple(value) => { + // Borrow key/value for the capitalized entry before moving + // them into the main insert below. + if auto_capitalize && !key.is_empty() { + if let Some(cap_key) = capitalize(&key) { + data.entry(cap_key) + .or_insert_with(|| Data::Simple(value.to_uppercase())); } - Data::Simple(value) => { - let filepath = config_path.join(value).to_str().unwrap().to_string(); - translators.insert(key, Data::Simple(filepath)); + } + data.insert(key, Data::Simple(value)); + } + Data::Detailed(DetailedData { value, alias }) => { + for k in alias.iter().chain(std::iter::once(&key)) { + data.insert(k.clone(), Data::Simple(value.clone())); + if auto_capitalize { + if let Some(cap_key) = capitalize(k) { + data.entry(cap_key) + .or_insert_with(|| Data::Simple(value.to_uppercase())); + } } - _ => Err(anyhow!("{value:?} not allowed in the translator table")) - .with_context(|| format!("Invalid configuration file {filepath:?}."))?, - }; - Ok(()) - })?; - config.translators = Some(translators); + } + } + _ => Err(anyhow!("{value:?} not allowed in the data table.")) + .with_context(|| format!("Invalid configuration file {filepath:?}."))?, + } } // Translation - let mut translation = IndexMap::new(); + #[cfg(feature = "rhai")] + for (key, value) in config.translators.unwrap_or_default() { + match value { + Data::File(DataFile { path }) => { + let nested = config_path.join(&path); + Self::read_config(&nested, fs, data, translators, translation)?; + } + Data::Simple(path) => { + let abs_path = config_path + .join(&path) + .into_os_string() + .into_string() + .unwrap(); + translators.insert(key, Data::Simple(abs_path)); + } + _ => Err(anyhow!("{value:?} not allowed in the translator table")) + .with_context(|| format!("Invalid configuration file {filepath:?}."))?, + } + } - config - .translation - .unwrap_or_default() - .into_iter() - .try_for_each(|(key, value)| -> Result<()> { - match value { - Data::File(DataFile { path }) => { - let filepath = config_path.join(path); - let conf = Config::from_filesystem(&filepath, fs)?; - translation.extend(conf.translation.unwrap_or_default()); - } - Data::Simple(_) | Data::Multi(_) => { - translation.insert(key, value); + for (key, value) in config.translation.unwrap_or_default() { + match value { + Data::File(DataFile { path }) => { + let nested = config_path.join(&path); + Self::read_config( + &nested, + fs, + data, + #[cfg(feature = "rhai")] + translators, + translation, + )?; + } + Data::Simple(_) | Data::Multi(_) => { + translation.insert(key, value); + } + Data::Detailed(DetailedData { value, alias }) => { + for e in alias.iter().chain(std::iter::once(&key)) { + translation.insert(e.clone(), Data::Simple(value.clone())); } - Data::Detailed(DetailedData { value, alias }) => { - alias.iter().chain([key].iter()).for_each(|e| { - translation.insert(e.to_owned(), Data::Simple(value.to_owned())); - }); + } + Data::MoreDetailed(MoreDetailedData { values, alias }) => { + for k in alias.iter().chain(std::iter::once(&key)) { + translation.insert(k.clone(), Data::Multi(values.clone())); } - Data::MoreDetailed(MoreDetailedData { values, alias }) => { - alias.iter().chain([key].iter()).for_each(|key| { - translation.insert(key.to_owned(), Data::Multi(values.to_owned())); - }); - } - }; - Ok(()) - })?; - - config.translation = Some(translation); + } + } + } - Ok(config) + Ok(()) } /// Extracts the data from the configuration. pub fn extract_data(&self) -> IndexMap { - let empty = IndexMap::default(); - - self.data - .as_ref() - .unwrap_or(&empty) - .iter() - .filter_map(|(key, value)| { - let value = match value { - Data::Simple(value) => Some(value), - _ => None, - }; - value.map(|value| (key.to_owned(), value.to_owned())) - }) - .collect() + // with_capacity avoids incremental reallocations during iteration. + let Some(data) = &self.data else { + return IndexMap::new(); + }; + let mut result = IndexMap::with_capacity(data.len()); + for (key, value) in data { + if let Data::Simple(v) = value { + result.insert(key.clone(), v.clone()); + } + } + result } /// Extracts the translators from the configuration. @@ -371,61 +439,56 @@ impl Config { self.extract_translators_using_filesystem(&StdFileSystem {}) } - /// Extracts the translators from the configuration using the specified filesystem.. + /// Extracts the translators from the configuration using the specified + /// filesystem. #[cfg(feature = "rhai")] pub fn extract_translators_using_filesystem( &self, fs: &impl FileSystem, ) -> Result> { - let empty = IndexMap::default(); + // with_capacity avoids incremental reallocations during iteration. + let Some(translators) = &self.translators else { + return Ok(IndexMap::new()); + }; let engine = Engine::new(); + let mut result = IndexMap::with_capacity(translators.len()); + + for (name, file_path) in translators { + let Data::Simple(file_path) = file_path else { + continue; + }; + let file_path = Path::new(file_path.as_str()); + let parent = file_path.parent().unwrap().to_str().unwrap(); + let header = format!(r#"const DIR = {parent:?};"#); + let body = fs + .read_to_string(file_path) + .with_context(|| format!("Couldn't open script file {file_path:?}."))?; + let ast = engine + .compile(body) + .with_context(|| format!("Failed to parse script file {file_path:?}."))?; + let ast = engine.compile(header).unwrap().merge(&ast); + result.insert(name.clone(), ast); + } - self.translators - .as_ref() - .unwrap_or(&empty) - .iter() - .filter_map(|(name, file_path)| { - let file_path = match file_path { - Data::Simple(file_path) => Some(file_path), - _ => None, - }; - - file_path.map(|file_path| { - let file_path = Path::new(&file_path); - let parent = file_path.parent().unwrap().to_str().unwrap(); - let header = format!(r#"const DIR = {parent:?};"#); - let body = fs - .read_to_string(file_path) - .with_context(|| format!("Couldn't open script file {file_path:?}."))?; - let ast = engine - .compile(body) - .with_context(|| format!("Failed to parse script file {file_path:?}."))?; - let ast = engine.compile(header).unwrap().merge(&ast); - - Ok((name.to_owned(), ast)) - }) - }) - .collect() + Ok(result) } /// Extracts the translation from the configuration. pub fn extract_translation(&self) -> IndexMap> { - let empty = IndexMap::new(); - - self.translation - .as_ref() - .unwrap_or(&empty) - .iter() - .filter_map(|(key, value)| { - let value = match value { - Data::Simple(value) => Some(vec![value.to_owned()]), - Data::Multi(value) => Some(value.to_owned()), - _ => None, - }; - - value.map(|value| (key.to_owned(), value)) - }) - .collect() + // with_capacity avoids incremental reallocations during iteration. + let Some(translation) = &self.translation else { + return IndexMap::new(); + }; + let mut result = IndexMap::with_capacity(translation.len()); + for (key, value) in translation { + let value = match value { + Data::Simple(v) => vec![v.clone()], + Data::Multi(v) => v.clone(), + _ => continue, + }; + result.insert(key.clone(), value); + } + result } }