diff --git a/src/cached_source.rs b/src/cached_source.rs index 7c758f22..9b18c0ae 100644 --- a/src/cached_source.rs +++ b/src/cached_source.rs @@ -8,8 +8,10 @@ use rustc_hash::FxHasher; use crate::{ helpers::{ - stream_and_get_source_and_map, stream_chunks_of_raw_source, - stream_chunks_of_source_map, Chunks, GeneratedInfo, StreamChunks, + stream_and_get_source_and_map, + stream_chunks_of_raw_source_with_known_ascii, + stream_chunks_of_source_map_with_known_ascii, Chunks, GeneratedInfo, + StreamChunks, }, object_pool::ObjectPool, source::SourceValue, @@ -20,6 +22,7 @@ use crate::{ struct CachedData { hash: OnceLock, size: OnceLock, + is_ascii: OnceLock, chunks: OnceLock>, columns_map: OnceLock>, line_only_map: OnceLock>, @@ -100,6 +103,15 @@ impl CachedSource { } }) } + + fn is_ascii(&self) -> bool { + *self.cache.is_ascii.get_or_init(|| { + if let Some(chunks) = self.cache.chunks.get() { + return chunks.iter().all(|chunk| chunk.is_ascii()); + } + self.inner.source().as_bytes().is_ascii() + }) + } } impl Source for CachedSource { @@ -169,16 +181,19 @@ struct CachedSourceChunks<'source> { chunks: Box, cache: Arc, source: Cow<'source, str>, + source_is_ascii: bool, } impl<'a> CachedSourceChunks<'a> { fn new(cache_source: &'a CachedSource) -> Self { let source = cache_source.source().into_string_lossy(); + let source_is_ascii = cache_source.is_ascii(); Self { chunks: cache_source.inner.stream_chunks(), cache: cache_source.cache.clone(), source, + source_is_ascii, } } } @@ -200,19 +215,21 @@ impl Chunks for CachedSourceChunks<'_> { match cell.get() { Some(map) => { if let Some(map) = map { - stream_chunks_of_source_map( + stream_chunks_of_source_map_with_known_ascii( options, object_pool, self.source.as_ref(), + self.source_is_ascii, map, on_chunk, on_source, on_name, ) } else { - stream_chunks_of_raw_source( + stream_chunks_of_raw_source_with_known_ascii( self.source.as_ref(), options, + self.source_is_ascii, on_chunk, on_source, on_name, diff --git a/src/encoder.rs b/src/encoder.rs index e435c135..6d06112a 100644 --- a/src/encoder.rs +++ b/src/encoder.rs @@ -5,12 +5,22 @@ const B64_CHARS: &[u8] = #[inline(always)] pub fn encode_vlq(out: &mut Vec, a: u32, b: u32) { + if a == b { + out.push(b'A'); + return; + } + let mut num = if a >= b { (a - b) << 1 } else { ((b - a) << 1) + 1 }; + if num < 0b100000 { + out.push(B64_CHARS[num as usize]); + return; + } + loop { let mut digit = num & 0b11111; num >>= 5; @@ -30,7 +40,7 @@ pub(crate) enum MappingsEncoder { } impl MappingsEncoder { - #[inline] + #[inline(always)] pub fn encode(&mut self, mapping: &Mapping) { match self { MappingsEncoder::Full(enc) => enc.encode(mapping), @@ -86,6 +96,7 @@ impl FullMappingsEncoder { } impl FullMappingsEncoder { + #[inline(always)] fn encode(&mut self, mapping: &Mapping) { if self.active_mapping && self.current_line == mapping.generated_line { // A mapping is still active @@ -137,12 +148,16 @@ impl FullMappingsEncoder { ); self.current_source_index = original.source_index; } - encode_vlq( - &mut self.mappings, - original.original_line, - self.current_original_line, - ); - self.current_original_line = original.original_line; + if original.original_line == self.current_original_line { + self.mappings.push(b'A'); + } else { + encode_vlq( + &mut self.mappings, + original.original_line, + self.current_original_line, + ); + self.current_original_line = original.original_line; + } if original.original_column == self.current_original_column { self.mappings.push(b'A'); } else { @@ -166,6 +181,7 @@ impl FullMappingsEncoder { } #[allow(unsafe_code)] + #[inline] fn drain(&mut self) -> String { unsafe { // SAFETY: The `mappings` field in the source map consists solely of ASCII characters. @@ -195,6 +211,7 @@ impl LinesOnlyMappingsEncoder { } impl LinesOnlyMappingsEncoder { + #[inline(always)] fn encode(&mut self, mapping: &Mapping) { if let Some(original) = &mapping.original { if self.last_written_line == mapping.generated_line { @@ -246,6 +263,7 @@ impl LinesOnlyMappingsEncoder { } #[allow(unsafe_code)] + #[inline] fn drain(&mut self) -> String { unsafe { // SAFETY: The `mappings` field in the source map consists solely of ASCII characters. diff --git a/src/helpers.rs b/src/helpers.rs index 4e57cd68..2e2b6996 100644 --- a/src/helpers.rs +++ b/src/helpers.rs @@ -94,8 +94,116 @@ pub trait StreamChunks { fn stream_chunks<'a>(&'a self) -> Box; } +/// A streamed source chunk with precomputed text metadata. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct StreamChunk<'a> { + text: &'a str, + is_ascii: bool, +} + +impl<'a> StreamChunk<'a> { + /// Create a chunk and compute its ASCII status. + #[inline] + pub fn new(text: &'a str) -> Self { + Self { + text, + is_ascii: text.is_ascii(), + } + } + + /// Create a chunk from known ASCII status. + #[inline] + pub fn with_ascii(text: &'a str, is_ascii: bool) -> Self { + debug_assert!(!is_ascii || text.is_ascii()); + Self { text, is_ascii } + } + + /// Return the chunk text. + #[inline] + pub fn as_str(&self) -> &'a str { + self.text + } + + /// Return the byte length of the chunk text. + #[inline] + pub fn len(&self) -> usize { + self.text.len() + } + + /// Return whether the chunk text is empty. + #[inline] + pub fn is_empty(&self) -> bool { + self.text.is_empty() + } + + /// Return whether the chunk text ends with a character. + #[inline] + pub fn ends_with(&self, ch: char) -> bool { + self.text.ends_with(ch) + } + + /// Return whether this chunk is ASCII. + #[inline] + pub fn is_ascii(&self) -> bool { + self.is_ascii + } + + /// Return the UTF-16 length of the chunk. + #[inline] + pub fn utf16_len(&self) -> usize { + if self.is_ascii { + self.text.len() + } else { + simd_utf16_len::utf16_len(self.text) + } + } + + /// Slice this chunk by byte offsets. + #[inline] + pub fn slice(&self, start: usize, end: usize) -> Self { + Self { + text: &self.text[start..end], + is_ascii: self.is_ascii, + } + } + + /// Slice this chunk from the start to a byte offset. + #[inline] + pub fn slice_to(&self, end: usize) -> Self { + Self { + text: &self.text[..end], + is_ascii: self.is_ascii, + } + } + + /// Slice this chunk from a byte offset to the end. + #[inline] + pub fn slice_from(&self, start: usize) -> Self { + Self { + text: &self.text[start..], + is_ascii: self.is_ascii, + } + } +} + +impl AsRef for StreamChunk<'_> { + #[inline] + fn as_ref(&self) -> &str { + self.text + } +} + +impl std::ops::Deref for StreamChunk<'_> { + type Target = str; + + #[inline] + fn deref(&self) -> &Self::Target { + self.text + } +} + /// [OnChunk] abstraction, see [webpack-sources onChunk](https://github.com/webpack/webpack-sources/blob/9f98066311d53a153fdc7c633422a1d086528027/lib/helpers/streamChunks.js#L13). -pub type OnChunk<'a, 'b> = &'a mut dyn FnMut(Option<&'b str>, Mapping); +pub type OnChunk<'a, 'b> = &'a mut dyn FnMut(Option>, Mapping); /// [OnSource] abstraction, see [webpack-sources onSource](https://github.com/webpack/webpack-sources/blob/9f98066311d53a153fdc7c633422a1d086528027/lib/helpers/streamChunks.js#L13). /// @@ -158,7 +266,32 @@ pub fn encode_mappings(mappings: impl Iterator) -> String { /// Formula: `utf16_len = byte_length - continuation_bytes + four_byte_leaders` #[inline] pub fn utf16_len(s: &str) -> usize { - simd_utf16_len::utf16_len(s) + if s.is_ascii() { + s.len() + } else { + simd_utf16_len::utf16_len(s) + } +} + +#[inline] +fn utf16_len_with_known_ascii(is_ascii: bool, s: &str) -> usize { + if is_ascii { + s.len() + } else { + simd_utf16_len::utf16_len(s) + } +} + +#[inline] +fn stream_chunk_with_known_ascii( + is_ascii: bool, + chunk: &str, +) -> StreamChunk<'_> { + if is_ascii { + StreamChunk::with_ascii(chunk, true) + } else { + StreamChunk::new(chunk) + } } pub struct PotentialTokens<'a> { @@ -242,7 +375,10 @@ pub fn split_into_lines(source: &str) -> impl Iterator { split(source, b'\n') } -pub fn get_generated_source_info(source: &str) -> GeneratedInfo { +pub(crate) fn get_generated_source_info_with_known_ascii( + source: &str, + is_ascii: bool, +) -> GeneratedInfo { let (generated_line, generated_column) = if source.ends_with('\n') { (split_into_lines(source).count() + 1, 0) } else { @@ -254,7 +390,10 @@ pub fn get_generated_source_info(source: &str) -> GeneratedInfo { last_line = line; } - (line_count.max(1), utf16_len(last_line)) + ( + line_count.max(1), + utf16_len_with_known_ascii(is_ascii, last_line), + ) }; GeneratedInfo { generated_line: generated_line as u32, @@ -268,16 +407,34 @@ pub fn stream_chunks_of_raw_source<'a>( on_chunk: OnChunk<'_, 'a>, _on_source: OnSource<'_, 'a>, _on_name: OnName<'_, 'a>, +) -> GeneratedInfo { + stream_chunks_of_raw_source_with_known_ascii( + source, + options, + source.is_ascii(), + on_chunk, + _on_source, + _on_name, + ) +} + +pub(crate) fn stream_chunks_of_raw_source_with_known_ascii<'a>( + source: &'a str, + options: &MapOptions, + is_ascii: bool, + on_chunk: OnChunk<'_, 'a>, + _on_source: OnSource<'_, 'a>, + _on_name: OnName<'_, 'a>, ) -> GeneratedInfo { if options.final_source { - return get_generated_source_info(source); + return get_generated_source_info_with_known_ascii(source, is_ascii); } let mut line = 1; let mut last_line = None; for l in split_into_lines(source) { on_chunk( - Some(l), + Some(stream_chunk_with_known_ascii(is_ascii, l)), Mapping { generated_line: line, generated_column: 0, @@ -310,6 +467,29 @@ pub fn stream_chunks_of_source_map<'a>( on_chunk: OnChunk<'_, 'a>, on_source: OnSource<'_, 'a>, on_name: OnName<'_, 'a>, +) -> GeneratedInfo { + stream_chunks_of_source_map_with_known_ascii( + options, + object_pool, + source, + source.is_ascii(), + source_map, + on_chunk, + on_source, + on_name, + ) +} + +#[allow(clippy::too_many_arguments)] +pub(crate) fn stream_chunks_of_source_map_with_known_ascii<'a>( + options: &MapOptions, + object_pool: &'a ObjectPool, + source: &'a str, + is_ascii: bool, + source_map: &'a SourceMap, + on_chunk: OnChunk<'_, 'a>, + on_source: OnSource<'_, 'a>, + on_name: OnName<'_, 'a>, ) -> GeneratedInfo { match options { MapOptions { @@ -317,7 +497,7 @@ pub fn stream_chunks_of_source_map<'a>( final_source: true, .. } => stream_chunks_of_source_map_final( - source, source_map, on_chunk, on_source, on_name, + source, is_ascii, source_map, on_chunk, on_source, on_name, ), MapOptions { columns: true, @@ -326,6 +506,7 @@ pub fn stream_chunks_of_source_map<'a>( } => stream_chunks_of_source_map_full( object_pool, source, + is_ascii, source_map, on_chunk, on_source, @@ -336,14 +517,14 @@ pub fn stream_chunks_of_source_map<'a>( final_source: true, .. } => stream_chunks_of_source_map_lines_final( - source, source_map, on_chunk, on_source, on_name, + source, is_ascii, source_map, on_chunk, on_source, on_name, ), MapOptions { columns: false, final_source: false, .. } => stream_chunks_of_source_map_lines_full( - source, source_map, on_chunk, on_source, on_name, + source, is_ascii, source_map, on_chunk, on_source, on_name, ), } } @@ -360,12 +541,14 @@ fn get_source<'a>(source_map: &SourceMap, source: &'a str) -> Cow<'a, str> { fn stream_chunks_of_source_map_final<'a>( source: &'a str, + source_is_ascii: bool, source_map: &'a SourceMap, on_chunk: OnChunk, on_source: OnSource<'_, 'a>, on_name: OnName<'_, 'a>, ) -> GeneratedInfo { - let result = get_generated_source_info(source); + let result = + get_generated_source_info_with_known_ascii(source, source_is_ascii); if result.generated_line == 1 && result.generated_column == 0 { return result; } @@ -417,6 +600,7 @@ fn stream_chunks_of_source_map_final<'a>( fn stream_chunks_of_source_map_full<'a>( object_pool: &'a ObjectPool, source: &'a str, + source_is_ascii: bool, source_map: &'a SourceMap, on_chunk: OnChunk<'_, 'a>, on_source: OnSource<'_, 'a>, @@ -452,7 +636,7 @@ fn stream_chunks_of_source_map_full<'a>( let final_column: u32 = if last_new_line { 0 } else { - utf16_len(last_line) + utf16_len_with_known_ascii(source_is_ascii, last_line) } as u32; let mut current_generated_line: u32 = 1; let mut current_generated_column: u32 = 0; @@ -478,11 +662,11 @@ fn stream_chunks_of_source_map_full<'a>( } if !chunk.is_empty() { on_chunk( - Some(chunk), + Some(stream_chunk_with_known_ascii(source_is_ascii, chunk)), Mapping { generated_line: mapping_line, generated_column: mapping_column, - original: active_mapping_original.clone(), + original: active_mapping_original, }, ) } @@ -495,7 +679,7 @@ fn stream_chunks_of_source_map_full<'a>( let chunk = lines[(current_generated_line - 1) as usize] .substring(current_generated_column as usize, usize::MAX); on_chunk( - Some(chunk), + Some(stream_chunk_with_known_ascii(source_is_ascii, chunk)), Mapping { generated_line: current_generated_line, generated_column: current_generated_column, @@ -510,7 +694,7 @@ fn stream_chunks_of_source_map_full<'a>( if current_generated_line as usize <= lines.len() { let chunk = &lines[(current_generated_line as usize) - 1].line; on_chunk( - Some(chunk), + Some(stream_chunk_with_known_ascii(source_is_ascii, chunk)), Mapping { generated_line: current_generated_line, generated_column: 0, @@ -527,7 +711,7 @@ fn stream_chunks_of_source_map_full<'a>( mapping.generated_column as usize, ); on_chunk( - Some(chunk), + Some(stream_chunk_with_known_ascii(source_is_ascii, chunk)), Mapping { generated_line: current_generated_line, generated_column: current_generated_column, @@ -563,12 +747,14 @@ fn stream_chunks_of_source_map_full<'a>( fn stream_chunks_of_source_map_lines_final<'a>( source: &'a str, + source_is_ascii: bool, source_map: &'a SourceMap, on_chunk: OnChunk, on_source: OnSource<'_, 'a>, _on_name: OnName, ) -> GeneratedInfo { - let result = get_generated_source_info(source); + let result = + get_generated_source_info_with_known_ascii(source, source_is_ascii); if result.generated_line == 1 && result.generated_column == 0 { return GeneratedInfo { generated_line: 1, @@ -608,6 +794,7 @@ fn stream_chunks_of_source_map_lines_final<'a>( fn stream_chunks_of_source_map_lines_full<'a>( source: &'a str, + source_is_ascii: bool, source_map: &'a SourceMap, on_chunk: OnChunk<'_, 'a>, on_source: OnSource<'_, 'a>, @@ -639,7 +826,7 @@ fn stream_chunks_of_source_map_lines_full<'a>( if current_generated_line as usize <= lines.len() { let chunk = &lines[current_generated_line as usize - 1]; on_chunk( - Some(chunk), + Some(stream_chunk_with_known_ascii(source_is_ascii, chunk)), Mapping { generated_line: current_generated_line, generated_column: 0, @@ -657,7 +844,10 @@ fn stream_chunks_of_source_map_lines_full<'a>( let chunk = &lines[current_generated_line as usize - 1]; mapping.generated_column = 0; original.name_index = None; - on_chunk(Some(chunk), mapping); + on_chunk( + Some(stream_chunk_with_known_ascii(source_is_ascii, chunk)), + mapping, + ); current_generated_line += 1; } }; @@ -667,7 +857,7 @@ fn stream_chunks_of_source_map_lines_full<'a>( while current_generated_line as usize <= lines.len() { let chunk = &lines[current_generated_line as usize - 1]; on_chunk( - Some(chunk), + Some(stream_chunk_with_known_ascii(source_is_ascii, chunk)), Mapping { generated_line: current_generated_line, generated_column: 0, @@ -686,7 +876,7 @@ fn stream_chunks_of_source_map_lines_full<'a>( let final_column = if last_new_line { 0 } else { - utf16_len(last_line) + utf16_len_with_known_ascii(source_is_ascii, last_line) } as u32; GeneratedInfo { generated_line: final_line, @@ -697,7 +887,7 @@ fn stream_chunks_of_source_map_lines_full<'a>( #[derive(Debug)] struct SourceMapLineData<'a> { pub mappings_data: Vec, - pub chunks: Vec<&'a str>, + pub chunks: Vec>, } type InnerSourceIndexValueMapping<'a> = @@ -813,7 +1003,7 @@ pub fn stream_chunks_of_combined_source_map<'a>( let inner_source_index = inner_source_index as u32; // Check for an identity mapping // where we are allowed to adjust the original column - let inner_chunk = &chunks[idx]; + let inner_chunk = chunks[idx]; let inner_generated_column = mappings_data[mi]; let location_in_chunk = original_column - inner_generated_column; if location_in_chunk > 0 { @@ -846,6 +1036,7 @@ pub fn stream_chunks_of_combined_source_map<'a>( if let Some(original_chunk) = original_chunk { if original_chunk.len() <= inner_chunk.len() && inner_chunk + .as_str() .get(..original_chunk.len()) .is_some_and(|slice| slice == original_chunk) { @@ -1250,7 +1441,7 @@ mod tests { use super::{ split_into_potential_tokens, stream_chunks_of_source_map_final, stream_chunks_of_source_map_full, stream_chunks_of_source_map_lines_final, - stream_chunks_of_source_map_lines_full, GeneratedInfo, + stream_chunks_of_source_map_lines_full, utf16_len, GeneratedInfo, }; use crate::{Mapping, ObjectPool, OriginalLocation, SourceMap}; @@ -1260,6 +1451,13 @@ mod tests { SourceMap::from_json("{\"version\":3,\"sources\":[\"i18.js\"],\"sourcesContent\":[\"var i18n = JSON.parse('{\\\"魑魅魍魉\\\":{\\\"en-US\\\":\\\"Evil spirits\\\",\\\"zh-CN\\\":\\\"魑魅魍魉\\\"}}');\\nvar __webpack_exports___ = i18n[\\\"魑魅魍魉\\\"];\\nexport { __webpack_exports___ as 魑魅魍魉 };\\n\"],\"names\":[\"i18n\",\"JSON\",\"__webpack_exports___\",\"魑魅魍魉\"],\"mappings\":\"AAAA,IAAIA,OAAOC,KAAK,KAAK,CAAC;AACtB,IAAIC,uBAAuBF,IAAI,CAAC,OAAO;AACvC,SAASE,wBAAwBC,IAAI,GAAG\"}").unwrap() }); + #[test] + fn test_utf16_len_fast_path_and_unicode() { + let ascii = "let answer = 42;"; + assert_eq!(utf16_len(ascii), ascii.len()); + assert_eq!(utf16_len("a😋b"), 4); + } + #[test] fn test_stream_chunks_of_source_map_full_handles_multi_unit_utf16() { let source = UTF16_SOURCE; @@ -1271,9 +1469,10 @@ mod tests { let generated_info = stream_chunks_of_source_map_full( &object_pool, source, + source.is_ascii(), source_map, &mut |chunk, mapping| { - chunks.push((chunk.unwrap(), mapping)); + chunks.push((chunk.unwrap().as_str(), mapping)); }, &mut |_i, _source, _source_content| {}, &mut |_i, _name| {}, @@ -1317,6 +1516,7 @@ mod tests { let generated_info = stream_chunks_of_source_map_final( source, + source.is_ascii(), source_map, &mut |_chunk, _mapping| {}, &mut |_i, _source, _source_content| {}, @@ -1339,6 +1539,7 @@ mod tests { let generated_info = stream_chunks_of_source_map_lines_final( source, + source.is_ascii(), source_map, &mut |_chunk, _mapping| {}, &mut |_i, _source, _source_content| {}, @@ -1361,6 +1562,7 @@ mod tests { let generated_info = stream_chunks_of_source_map_lines_full( source, + source.is_ascii(), source_map, &mut |_chunk, _mapping| {}, &mut |_i, _source, _source_content| {}, diff --git a/src/lib.rs b/src/lib.rs index c3cf863f..48fe0091 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -38,7 +38,7 @@ pub use source_map_source::{ pub mod stream_chunks { pub use super::helpers::{ stream_chunks_default, Chunks, GeneratedInfo, OnChunk, OnName, OnSource, - StreamChunks, + StreamChunk, StreamChunks, }; } diff --git a/src/original_source.rs b/src/original_source.rs index 64506388..1d407270 100644 --- a/src/original_source.rs +++ b/src/original_source.rs @@ -6,8 +6,8 @@ use std::{ use crate::{ helpers::{ - get_generated_source_info, get_map, split_into_lines, - split_into_potential_tokens, utf16_len, Chunks, GeneratedInfo, + get_generated_source_info_with_known_ascii, get_map, split_into_lines, + split_into_potential_tokens, utf16_len, Chunks, GeneratedInfo, StreamChunk, StreamChunks, }, object_pool::ObjectPool, @@ -130,6 +130,27 @@ impl<'source> OriginalSourceChunks<'source> { } } +#[inline] +fn utf16_len_with_known_ascii(is_ascii: bool, value: &str) -> usize { + if is_ascii { + value.len() + } else { + utf16_len(value) + } +} + +#[inline] +fn stream_chunk_with_known_ascii( + is_ascii: bool, + value: &str, +) -> StreamChunk<'_> { + if is_ascii { + StreamChunk::with_ascii(value, true) + } else { + StreamChunk::new(value) + } +} + impl Chunks for OriginalSourceChunks<'_> { fn stream<'b>( &'b self, @@ -140,6 +161,7 @@ impl Chunks for OriginalSourceChunks<'_> { _on_name: crate::helpers::OnName<'_, 'b>, ) -> GeneratedInfo { on_source(0, Cow::Borrowed(&self.0.name), Some(&self.0.value)); + let is_ascii = self.0.value.as_ref().is_ascii(); if options.columns { // With column info we need to read all lines and split them let mut line = 1; @@ -149,7 +171,7 @@ impl Chunks for OriginalSourceChunks<'_> { if is_end_of_line && token.len() == 1 { if !options.final_source { on_chunk( - Some(token), + Some(stream_chunk_with_known_ascii(is_ascii, token)), Mapping { generated_line: line, generated_column: column, @@ -159,7 +181,8 @@ impl Chunks for OriginalSourceChunks<'_> { } } else { on_chunk( - (!options.final_source).then_some(token), + (!options.final_source) + .then_some(stream_chunk_with_known_ascii(is_ascii, token)), Mapping { generated_line: line, generated_column: column, @@ -176,7 +199,7 @@ impl Chunks for OriginalSourceChunks<'_> { line += 1; column = 0; } else { - column += utf16_len(token) as u32; + column += utf16_len_with_known_ascii(is_ascii, token) as u32; } } GeneratedInfo { @@ -186,7 +209,10 @@ impl Chunks for OriginalSourceChunks<'_> { } else if options.final_source { // Without column info and with final source we only // need meta info to generate mapping - let result = get_generated_source_info(self.0.value.as_ref()); + let result = get_generated_source_info_with_known_ascii( + self.0.value.as_ref(), + is_ascii, + ); if result.generated_column == 0 { for line in 1..result.generated_line { on_chunk( @@ -228,7 +254,8 @@ impl Chunks for OriginalSourceChunks<'_> { let mut last_line = None; for l in split_into_lines(self.0.value.as_ref()) { on_chunk( - (!options.final_source).then_some(l), + (!options.final_source) + .then_some(stream_chunk_with_known_ascii(is_ascii, l)), Mapping { generated_line: line, generated_column: 0, @@ -248,7 +275,8 @@ impl Chunks for OriginalSourceChunks<'_> { { GeneratedInfo { generated_line: line - 1, - generated_column: utf16_len(last_line) as u32, + generated_column: utf16_len_with_known_ascii(is_ascii, last_line) + as u32, } } else { GeneratedInfo { @@ -332,6 +360,29 @@ mod tests { assert_eq!(source.size(), 4); } + #[test] + fn stream_chunks_expose_ascii_metadata() { + let source = OriginalSource::new("let answer = 42;\nanswer;", "file.js"); + let object_pool = ObjectPool::default(); + let chunks = source.stream_chunks(); + let mut seen_chunk = false; + + chunks.stream( + &object_pool, + &MapOptions::default(), + &mut |chunk, _mapping| { + let chunk = chunk.unwrap(); + seen_chunk = true; + assert!(chunk.is_ascii()); + assert_eq!(chunk.utf16_len(), chunk.len()); + }, + &mut |_source_index, _source, _source_content| {}, + &mut |_name_index, _name| {}, + ); + + assert!(seen_chunk); + } + #[test] fn should_split_code_into_statements() { let input = "if (hello()) { world(); hi(); there(); } done();\nif (hello()) { world(); hi(); there(); } done();"; @@ -381,7 +432,7 @@ mod tests { &object_pool, &MapOptions::default(), &mut |chunk, mapping| { - chunks.push((chunk.unwrap(), mapping)); + chunks.push((chunk.unwrap().as_str(), mapping)); }, &mut |_source_index, _source, _source_content| {}, &mut |_name_index, _name| {}, diff --git a/src/raw_source.rs b/src/raw_source.rs index c7fb29c9..9dfb13a4 100644 --- a/src/raw_source.rs +++ b/src/raw_source.rs @@ -6,8 +6,9 @@ use std::{ use crate::{ helpers::{ - get_generated_source_info, stream_chunks_of_raw_source, Chunks, - GeneratedInfo, StreamChunks, + get_generated_source_info_with_known_ascii, + stream_chunks_of_raw_source_with_known_ascii, Chunks, GeneratedInfo, + StreamChunks, }, object_pool::ObjectPool, MapOptions, Source, SourceMap, SourceValue, @@ -124,10 +125,18 @@ impl Chunks for RawStringChunks<'_> { on_source: crate::helpers::OnSource<'_, 'a>, on_name: crate::helpers::OnName<'_, 'a>, ) -> crate::helpers::GeneratedInfo { + let source_is_ascii = self.0.is_ascii(); if options.final_source { - get_generated_source_info(self.0) + get_generated_source_info_with_known_ascii(self.0, source_is_ascii) } else { - stream_chunks_of_raw_source(self.0, options, on_chunk, on_source, on_name) + stream_chunks_of_raw_source_with_known_ascii( + self.0, + options, + source_is_ascii, + on_chunk, + on_source, + on_name, + ) } } } @@ -265,10 +274,18 @@ impl Chunks for RawBufferSourceChunks<'_> { on_name: crate::helpers::OnName<'_, 'a>, ) -> GeneratedInfo { let code = self.0.get_or_init_value_as_string(); + let source_is_ascii = code.is_ascii(); if options.final_source { - get_generated_source_info(code) + get_generated_source_info_with_known_ascii(code, source_is_ascii) } else { - stream_chunks_of_raw_source(code, options, on_chunk, on_source, on_name) + stream_chunks_of_raw_source_with_known_ascii( + code, + options, + source_is_ascii, + on_chunk, + on_source, + on_name, + ) } } } diff --git a/src/replace_source.rs b/src/replace_source.rs index e39f8100..d4892be2 100644 --- a/src/replace_source.rs +++ b/src/replace_source.rs @@ -8,9 +8,7 @@ use std::{ use rustc_hash::FxHashMap as HashMap; use crate::{ - helpers::{ - get_map, split_into_lines, utf16_len, Chunks, GeneratedInfo, StreamChunks, - }, + helpers::{get_map, Chunks, GeneratedInfo, StreamChunk, StreamChunks}, linear_map::LinearMap, object_pool::ObjectPool, source_content_lines::SourceContentLines, @@ -127,9 +125,7 @@ impl ReplaceSource { pub fn replacements(&self) -> &[Replacement] { &self.replacements } -} -impl ReplaceSource { /// Insert a content at start. pub fn insert(&mut self, start: u32, content: String, name: Option) { self.replace(start, start, content, name) @@ -502,15 +498,15 @@ impl Source for ReplaceSource { fn map( &self, - _: &ObjectPool, + object_pool: &ObjectPool, options: &crate::MapOptions, ) -> Option { let replacements = &self.replacements; if replacements.is_empty() { - return self.inner.map(&ObjectPool::default(), options); + return self.inner.map(object_pool, options); } let chunks = self.stream_chunks(); - get_map(&ObjectPool::default(), chunks.as_ref(), options) + get_map(object_pool, chunks.as_ref(), options) } fn to_writer(&self, writer: &mut dyn std::io::Write) -> std::io::Result<()> { @@ -587,6 +583,32 @@ fn check_content_at_position( } } +#[inline] +fn for_each_line<'a>(source: &'a str, mut on_line: impl FnMut(&'a str, bool)) { + let mut remaining = source; + while !remaining.is_empty() { + match memchr::memchr(b'\n', remaining.as_bytes()) { + Some(pos) => { + #[allow(unsafe_code)] + // SAFETY: `pos` points to an ASCII newline found in a valid UTF-8 str, + // so both byte ranges are valid UTF-8 boundaries. + let (line, next) = unsafe { + ( + remaining.get_unchecked(..pos + 1), + remaining.get_unchecked(pos + 1..), + ) + }; + remaining = next; + on_line(line, true); + } + None => { + on_line(remaining, false); + break; + } + } + } +} + struct ReplaceSourceChunks<'a> { is_original_source: bool, chunks: Box, @@ -658,13 +680,6 @@ impl Chunks for ReplaceSourceChunks<'_> { // webpack-sources also have this function, refer https://github.com/webpack/webpack-sources/blob/main/lib/ReplaceSource.js#L158 let check_original_content = |source_index: u32, line: u32, column: u32, expected_chunk: &str| { - // Performance optimization: Skip content validation for OriginalSourceChunks. - // Since OriginalSourceChunks guarantees that the source content matches the actual source, - // we can safely bypass the expensive content checking process. - if self.is_original_source { - return true; - } - if let Some(Some(source_content)) = source_content_lines.borrow_mut().get_mut(&source_index) { @@ -710,9 +725,9 @@ impl Chunks for ReplaceSourceChunks<'_> { generated_column_offset += mapping.generated_column as i64; } } else if generated_column_offset_line == line { - generated_column_offset -= utf16_len(chunk) as i64; + generated_column_offset -= chunk.utf16_len() as i64; } else { - generated_column_offset = -(utf16_len(chunk) as i64); + generated_column_offset = -(chunk.utf16_len() as i64); generated_column_offset_line = line; } pos = end_pos; @@ -720,18 +735,20 @@ impl Chunks for ReplaceSourceChunks<'_> { } // Partially skip over chunk chunk_pos = replacement_end - pos; - if let Some(original) = mapping.original.as_mut().filter(|original| { - check_original_content( - original.source_index, - original.original_line, - original.original_column, - &chunk[0..chunk_pos as usize], - ) - }) { - original.original_column += chunk_pos; + if let Some(original) = mapping.original.as_mut() { + if self.is_original_source + || check_original_content( + original.source_index, + original.original_line, + original.original_column, + chunk.slice_to(chunk_pos as usize).as_str(), + ) + { + original.original_column += chunk_pos; + } } pos += chunk_pos; - let chunk_utf16_pos = utf16_len(&chunk[..chunk_pos as usize]); + let chunk_utf16_pos = chunk.slice_to(chunk_pos as usize).utf16_len(); let line = mapping.generated_line as i64 + generated_line_offset; if generated_column_offset_line == line { generated_column_offset -= chunk_utf16_pos as i64; @@ -751,8 +768,8 @@ impl Chunks for ReplaceSourceChunks<'_> { // Emit chunk until replacement let offset = next_replacement_pos - pos; let chunk_slice = - &chunk[chunk_pos as usize..(chunk_pos + offset) as usize]; - let chunk_slice_utf16_offset = utf16_len(chunk_slice) as u32; + chunk.slice(chunk_pos as usize, (chunk_pos + offset) as usize); + let chunk_slice_utf16_offset = chunk_slice.utf16_len() as u32; on_chunk( Some(chunk_slice), Mapping { @@ -763,32 +780,34 @@ impl Chunks for ReplaceSourceChunks<'_> { } else { 0 }) as u32, - original: mapping.original.as_ref().map(|original| { - OriginalLocation { + original: if self.is_original_source { + mapping.original + } else { + mapping.original.as_ref().map(|original| OriginalLocation { source_index: original.source_index, original_line: original.original_line, original_column: original.original_column, name_index: original.name_index.and_then(|name_index| { name_index_mapping.borrow().get(&name_index).copied() }), - } - }), + }) + }, }, ); mapping.generated_column += chunk_slice_utf16_offset; chunk_pos += offset; pos = next_replacement_pos; - if let Some(original) = - mapping.original.as_mut().filter(|original| { - check_original_content( + if let Some(original) = mapping.original.as_mut() { + if self.is_original_source + || check_original_content( original.source_index, original.original_line, original.original_column, - chunk_slice, + chunk_slice.as_str(), ) - }) - { - original.original_column += chunk_slice_utf16_offset; + { + original.original_column += chunk_slice_utf16_offset; + } } } // Insert replacement content split into chunks by lines @@ -800,24 +819,26 @@ impl Chunks for ReplaceSourceChunks<'_> { .original .as_ref() .and_then(|original| original.name_index); - if let Some(name) = - repl.name.as_ref().filter(|_| mapping.original.is_some()) - { - let mut name_mapping = name_mapping.borrow_mut(); - let mut global_index = name_mapping.get(name.as_ref()).copied(); - if global_index.is_none() { - let len = name_mapping.len() as u32; - name_mapping.insert(Cow::Borrowed(name), len); - on_name.borrow_mut()(len, Cow::Borrowed(name)); - global_index = Some(len); + if mapping.original.is_some() { + if let Some(name) = repl.name.as_ref() { + let mut name_mapping = name_mapping.borrow_mut(); + let mut global_index = name_mapping.get(name.as_ref()).copied(); + if global_index.is_none() { + let len = name_mapping.len() as u32; + name_mapping.insert(Cow::Borrowed(name), len); + on_name.borrow_mut()(len, Cow::Borrowed(name)); + global_index = Some(len); + } + replacement_name_index = global_index; } - replacement_name_index = global_index; } - let mut lines = split_into_lines(repl.content.as_ref()).peekable(); - while let Some(content_line) = lines.next() { - let is_last_line = lines.peek().is_none(); + let content = repl.content.as_ref(); + let content_is_ascii = content.is_ascii(); + for_each_line(content, |content_line, ends_with_newline| { + let content_chunk = + StreamChunk::with_ascii(content_line, content_is_ascii); on_chunk( - Some(content_line), + Some(content_chunk), Mapping { generated_line: line as u32, generated_column: ((mapping.generated_column as i64) @@ -826,24 +847,31 @@ impl Chunks for ReplaceSourceChunks<'_> { } else { 0 }) as u32, - original: mapping.original.as_ref().map(|original| { - OriginalLocation { + original: if mapping + .original + .and_then(|original| original.name_index) + == replacement_name_index + { + mapping.original + } else { + mapping.original.map(|original| OriginalLocation { source_index: original.source_index, original_line: original.original_line, original_column: original.original_column, name_index: replacement_name_index, - } - }), + }) + }, }, ); // Only the first chunk has name assigned replacement_name_index = None; - if is_last_line && !content_line.ends_with('\n') { + if !ends_with_newline { + let content_utf16_len = content_chunk.utf16_len() as i64; if generated_column_offset_line == line { - generated_column_offset += utf16_len(content_line) as i64; + generated_column_offset += content_utf16_len; } else { - generated_column_offset = utf16_len(content_line) as i64; + generated_column_offset = content_utf16_len; generated_column_offset_line = line; } } else { @@ -852,7 +880,7 @@ impl Chunks for ReplaceSourceChunks<'_> { generated_column_offset = -(mapping.generated_column as i64); generated_column_offset_line = line; } - } + }); // Remove replaced content by settings this variable replacement_end = if let Some(replacement_end) = replacement_end { @@ -887,11 +915,11 @@ impl Chunks for ReplaceSourceChunks<'_> { } } else if generated_column_offset_line == line { let remaining_chunk_utf16_len = - utf16_len(&chunk[chunk_pos as usize..]) as i64; + chunk.slice_from(chunk_pos as usize).utf16_len() as i64; generated_column_offset -= remaining_chunk_utf16_len; } else { generated_column_offset = - -(utf16_len(&chunk[chunk_pos as usize..]) as i64); + -(chunk.slice_from(chunk_pos as usize).utf16_len() as i64); generated_column_offset_line = line; } pos = end_pos; @@ -900,23 +928,27 @@ impl Chunks for ReplaceSourceChunks<'_> { // Partially skip over chunk let line = mapping.generated_line as i64 + generated_line_offset; - if let Some(original) = - mapping.original.as_mut().filter(|original| { - check_original_content( + if let Some(original) = mapping.original.as_mut() { + if self.is_original_source + || check_original_content( original.source_index, original.original_line, original.original_column, - &chunk - [chunk_pos as usize..(chunk_pos + offset as u32) as usize], + chunk + .slice( + chunk_pos as usize, + (chunk_pos + offset as u32) as usize, + ) + .as_str(), ) - }) - { - original.original_column += offset as u32; + { + original.original_column += offset as u32; + } } - let utf16_offset = utf16_len( - &chunk[chunk_pos as usize..(chunk_pos + offset as u32) as usize], - ) as i64; + let utf16_offset = chunk + .slice(chunk_pos as usize, (chunk_pos + offset as u32) as usize) + .utf16_len() as i64; chunk_pos += offset as u32; pos += offset as u32; @@ -935,7 +967,7 @@ impl Chunks for ReplaceSourceChunks<'_> { let chunk_slice = if chunk_pos == 0 { chunk } else { - &chunk[chunk_pos as usize..chunk.len()] + chunk.slice_from(chunk_pos as usize) }; let line = mapping.generated_line as i64 + generated_line_offset; on_chunk( @@ -948,26 +980,30 @@ impl Chunks for ReplaceSourceChunks<'_> { } else { 0 }) as u32, - original: mapping.original.as_ref().map(|original| { - OriginalLocation { + original: if self.is_original_source { + mapping.original + } else { + mapping.original.as_ref().map(|original| OriginalLocation { source_index: original.source_index, original_line: original.original_line, original_column: original.original_column, name_index: original.name_index.and_then(|name_index| { name_index_mapping.borrow().get(&name_index).copied() }), - } - }), + }) + }, }, ); } pos = end_pos; }, &mut |source_index, source, source_content| { - let mut source_content_lines = source_content_lines.borrow_mut(); - let lines = source_content - .map(|source_content| SourceContent::Raw(source_content.clone())); - source_content_lines.insert(source_index, lines); + if !self.is_original_source { + let mut source_content_lines = source_content_lines.borrow_mut(); + let lines = source_content + .map(|source_content| SourceContent::Raw(source_content.clone())); + source_content_lines.insert(source_index, lines); + } on_source(source_index, source, source_content); }, &mut |name_index, name| { @@ -988,11 +1024,14 @@ impl Chunks for ReplaceSourceChunks<'_> { // Handle remaining replacements one by one let mut line = result.generated_line as i64 + generated_line_offset; while i < repls.len() { - let content = &repls[i].content; + let content = repls[i].content.as_ref(); + let content_is_ascii = content.is_ascii(); - for content_line in split_into_lines(content) { + for_each_line(content, |content_line, ends_with_newline| { + let content_chunk = + StreamChunk::with_ascii(content_line, content_is_ascii); on_chunk( - Some(content_line), + Some(content_chunk), Mapping { generated_line: line as u32, generated_column: ((result.generated_column as i64) @@ -1006,8 +1045,8 @@ impl Chunks for ReplaceSourceChunks<'_> { ); // Handle line and column offset updates - if !content_line.ends_with('\n') { - let content_utf16_len = utf16_len(content_line) as i64; + if !ends_with_newline { + let content_utf16_len = content_chunk.utf16_len() as i64; // Last line of current replacement doesn't end with newline if generated_column_offset_line == line { generated_column_offset += content_utf16_len; @@ -1021,7 +1060,7 @@ impl Chunks for ReplaceSourceChunks<'_> { generated_column_offset = -(result.generated_column as i64); generated_column_offset_line = line; } - } + }); i += 1; } @@ -1769,7 +1808,7 @@ return
{data.foo}
&object_pool, &MapOptions::default(), &mut |chunk, mapping| { - chunks.push((chunk.unwrap(), mapping)); + chunks.push((chunk.unwrap().as_str(), mapping)); }, &mut |_source_index, _source, _source_content| {}, &mut |_name_index, _name| {}, diff --git a/src/source.rs b/src/source.rs index 7d416c5a..15e10a5a 100644 --- a/src/source.rs +++ b/src/source.rs @@ -639,7 +639,7 @@ impl TryFrom for SourceMap { } /// Represent a [Mapping] information of source map. -#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub struct Mapping { /// Generated line. pub generated_line: u32, @@ -650,7 +650,7 @@ pub struct Mapping { } /// Represent original position information of a [Mapping]. -#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub struct OriginalLocation { /// Source index. pub source_index: u32,