rust-lang · fereidani · Dec 5, 2025 · nnethercote · Dec 7, 2025 · fereidani
diff --git a/compiler/rustc_lexer/src/cursor.rs b/compiler/rustc_lexer/src/cursor.rs
@@ -21,6 +21,7 @@ pub struct Cursor<'a> {
 pub(crate) const EOF_CHAR: char = '\0';
 
 impl<'a> Cursor<'a> {
+    #[inline]
     pub fn new(input: &'a str, frontmatter_allowed: FrontmatterAllowed) -> Cursor<'a> {
         Cursor {
             len_remaining: input.len(),
@@ -31,6 +32,7 @@ impl<'a> Cursor<'a> {
         }
     }
 
+    #[inline]
     pub fn as_str(&self) -> &'a str {
         self.chars.as_str()
     }
@@ -53,12 +55,14 @@ impl<'a> Cursor<'a> {
     /// If requested position doesn't exist, `EOF_CHAR` is returned.
     /// However, getting `EOF_CHAR` doesn't always mean actual end of file,
     /// it should be checked with `is_eof` method.
+    #[inline]
     pub fn first(&self) -> char {
         // `.next()` optimizes better than `.nth(0)`
         self.chars.clone().next().unwrap_or(EOF_CHAR)
     }
 
     /// Peeks the second symbol from the input stream without consuming it.
+    #[inline]
     pub(crate) fn second(&self) -> char {
         // `.next()` optimizes better than `.nth(1)`
         let mut iter = self.chars.clone();
@@ -67,6 +71,7 @@ impl<'a> Cursor<'a> {
     }
 
     /// Peeks the third symbol from the input stream without consuming it.
+    #[inline]
     pub fn third(&self) -> char {
         // `.next()` optimizes better than `.nth(2)`
         let mut iter = self.chars.clone();
@@ -76,21 +81,25 @@ impl<'a> Cursor<'a> {
     }
 
     /// Checks if there is nothing more to consume.
+    #[inline]
     pub(crate) fn is_eof(&self) -> bool {
         self.chars.as_str().is_empty()
     }
 
     /// Returns amount of already consumed symbols.
+    #[inline]
     pub(crate) fn pos_within_token(&self) -> u32 {
         (self.len_remaining - self.chars.as_str().len()) as u32
     }
 
     /// Resets the number of bytes consumed to 0.
+    #[inline]
     pub(crate) fn reset_pos_within_token(&mut self) {
         self.len_remaining = self.chars.as_str().len();
     }
 
     /// Moves to the next character.
+    #[inline]
     pub(crate) fn bump(&mut self) -> Option<char> {
         let c = self.chars.next()?;
 
@@ -102,24 +111,76 @@ impl<'a> Cursor<'a> {
         Some(c)
     }
 
+    #[inline]
+    pub(crate) fn bump_if(&mut self, expected: char) -> bool {
+        let mut chars = self.chars.clone();
+        if chars.next() == Some(expected) {
+            self.chars = chars;
+            true
+        } else {
+            false
+        }
+    }
+
+    /// Bumps the cursor if the next character is either of the two expected characters.
+    #[inline]
+    pub(crate) fn bump_if2(&mut self, expected1: char, expected2: char) -> bool {
+        let mut chars = self.chars.clone();
+        if let Some(c) = chars.next()
+            && (c == expected1 || c == expected2)
+        {
+            self.chars = chars;
+            return true;
+        }
+        false
+    }
+
     /// Moves to a substring by a number of bytes.
+    #[inline]
     pub(crate) fn bump_bytes(&mut self, n: usize) {
-        self.chars = self.as_str()[n..].chars();
+        self.chars = self.as_str().get(n..).unwrap_or("").chars();
     }
 
     /// Eats symbols while predicate returns true or until the end of file is reached.
+    #[inline]
     pub(crate) fn eat_while(&mut self, mut predicate: impl FnMut(char) -> bool) {
         // It was tried making optimized version of this for eg. line comments, but
         // LLVM can inline all of this and compile it down to fast iteration over bytes.
         while predicate(self.first()) && !self.is_eof() {
             self.bump();
         }
     }
+    /// Eats characters until the given byte is found.
+    /// Returns true if the byte was found, false if end of file was reached.
+    #[inline]
+    pub(crate) fn eat_until(&mut self, byte: u8) -> bool {
+        match memchr::memchr(byte, self.as_str().as_bytes()) {
+            Some(index) => {
+                self.bump_bytes(index);
+                true
+            }
+            None => {
+                self.chars = "".chars();
+                false
+            }
+        }
+    }
 
-    pub(crate) fn eat_until(&mut self, byte: u8) {
-        self.chars = match memchr::memchr(byte, self.as_str().as_bytes()) {
-            Some(index) => self.as_str()[index..].chars(),
-            None => "".chars(),
+    /// Eats characters until any of the given bytes is found, then consumes past it.
+    /// Returns the found byte if any, or None if end of file was reached.
+    #[inline]
+    pub(crate) fn eat_past2(&mut self, byte1: u8, byte2: u8) -> Option<u8> {
+        let bytes = self.as_str().as_bytes();
+        match memchr::memchr2(byte1, byte2, bytes) {
+            Some(index) => {
+                let found = bytes[index];
+                self.bump_bytes(index + 1);
+                Some(found)
+            }
+            None => {
+                self.chars = "".chars();
+                None
+            }
         }
     }
 }
diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs
@@ -563,11 +563,30 @@ impl Cursor<'_> {
         self.eat_while(|ch| ch != '\n' && is_horizontal_whitespace(ch));
         let invalid_infostring = self.first() != '\n';
 
-        let mut found = false;
-        let nl_fence_pattern = format!("\n{:-<1$}", "", length_opening as usize);
-        if let Some(closing) = self.as_str().find(&nl_fence_pattern) {
+        #[inline]
+        fn find_closing_fence(s: &str, dash_count: usize) -> Option<usize> {
+            let bytes = s.as_bytes();
+            let mut i = 0;
+            while i < bytes.len() {
+                if let Some(newline_pos) = memchr::memchr(b'\n', &bytes[i..]) {
+                    i += newline_pos + 1;
+                    let start = i;
+                    if start + dash_count <= bytes.len() {
+                        let slice = &bytes[start..start + dash_count];
+                        if slice.iter().all(|&b| b == b'-') {
+                            return Some(start + dash_count);
+                        }
+                    }
+                } else {
+                    break;
+                }
+            }
+            None
+        }
+
+        if let Some(closing) = find_closing_fence(self.as_str(), length_opening as usize) {
             // candidate found
-            self.bump_bytes(closing + nl_fence_pattern.len());
+            self.bump_bytes(closing);
             // in case like
             // ---cargo
             // --- blahblah
@@ -576,10 +595,7 @@ impl Cursor<'_> {
             // ----
             // combine those stuff into this frontmatter token such that it gets detected later.
             self.eat_until(b'\n');
-            found = true;
-        }
-
-        if !found {
+        } else {
             // recovery strategy: a closing statement might have preceding whitespace/newline
             // but not have enough dashes to properly close. In this case, we eat until there,
             // and report a mismatch in the parser.
@@ -656,23 +672,25 @@ impl Cursor<'_> {
         };
 
         let mut depth = 1usize;
-        while let Some(c) = self.bump() {
+        while let Some(c) = self.eat_past2(b'/', b'*') {
             match c {
-                '/' if self.first() == '*' => {
-                    self.bump();
-                    depth += 1;
+                b'/' => {
+                    if self.bump_if('*') {
+                        depth += 1;
+                    }
                 }
-                '*' if self.first() == '/' => {
-                    self.bump();
-                    depth -= 1;
-                    if depth == 0 {
-                        // This block comment is closed, so for a construction like "/* */ */"
-                        // there will be a successfully parsed block comment "/* */"
-                        // and " */" will be processed separately.
-                        break;
+                b'*' => {
+                    if self.bump_if('/') {
+                        depth -= 1;
+                        if depth == 0 {
+                            // This block comment is closed, so for a construction like "/* */ */"
+                            // there will be a successfully parsed block comment "/* */"
+                            // and " */" will be processed separately.
+                            break;
+                        }
                     }
                 }
-                _ => (),
+                _ => unreachable!(),
             }
         }
 
@@ -935,19 +953,21 @@ impl Cursor<'_> {
     /// if string is terminated.
     fn double_quoted_string(&mut self) -> bool {
         debug_assert!(self.prev() == '"');
-        while let Some(c) = self.bump() {
+        while let Some(c) = self.eat_past2(b'"', b'\\') {
             match c {
-                '"' => {
+                b'"' => {
                     return true;
                 }
-                '\\' if self.first() == '\\' || self.first() == '"' => {
-                    // Bump again to skip escaped character.
-                    self.bump();
+                b'\\' => {
+                    let first = self.first();
+                    if first == '\\' || first == '"' {
+                        // Bump to skip escaped character.
+                        self.bump();
+                    }
                 }
-                _ => (),
+                _ => unreachable!(),
             }
         }
-        // End of file reached.
         false
     }
 
@@ -963,9 +983,8 @@ impl Cursor<'_> {
         debug_assert!(self.prev() != '#');
 
         let mut n_start_hashes: u32 = 0;
-        while self.first() == '#' {
+        while self.bump_if('#') {
             n_start_hashes += 1;
-            self.bump();
         }
 
         if self.first() != '"' {
@@ -1025,9 +1044,8 @@ impl Cursor<'_> {
 
         // Count opening '#' symbols.
         let mut eaten = 0;
-        while self.first() == '#' {
+        while self.bump_if('#') {
             eaten += 1;
-            self.bump();
         }
         let n_start_hashes = eaten;
 
@@ -1043,9 +1061,7 @@ impl Cursor<'_> {
         // Skip the string contents and on each '#' character met, check if this is
         // a raw string termination.
         loop {
-            self.eat_until(b'"');
-
-            if self.is_eof() {
+            if !self.eat_until(b'"') {
                 return Err(RawStrError::NoTerminator {
                     expected: n_start_hashes,
                     found: max_hashes,
@@ -1117,9 +1133,7 @@ impl Cursor<'_> {
     /// and returns false otherwise.
     fn eat_float_exponent(&mut self) -> bool {
         debug_assert!(self.prev() == 'e' || self.prev() == 'E');
-        if self.first() == '-' || self.first() == '+' {
-            self.bump();
-        }
+        self.bump_if2('-', '+');
         self.eat_decimal_digits()
     }