rust-lang
diff --git a/‎library/core/src/char/methods.rs‎
Lines changed: 101 additions & 4 deletions b/‎library/core/src/char/methods.rs‎
Lines changed: 101 additions & 4 deletions
diff --git a/‎library/core/src/char/mod.rs‎
Lines changed: 48 additions & 15 deletions b/‎library/core/src/char/mod.rs‎
Lines changed: 48 additions & 15 deletions
diff --git a/‎library/core/src/unicode/unicode_data.rs‎
Lines changed: 67 additions & 12 deletions b/‎library/core/src/unicode/unicode_data.rs‎
Lines changed: 67 additions & 12 deletions
@@ -1196,14 +1196,111 @@ impl char {
     /// // convert into themselves.
     /// assert_eq!('山'.to_lowercase().to_string(), "山");
     /// ```
-    #[must_use = "this returns the lowercase character as a new iterator, \
+    #[must_use = "this returns the lowercased character as a new iterator, \
                   without modifying the original"]
     #[stable(feature = "rust1", since = "1.0.0")]
     #[inline]
     pub fn to_lowercase(self) -> ToLowercase {
         ToLowercase(CaseMappingIter::new(conversions::to_lower(self)))
     }
 
+    /// Returns an iterator that yields the titlecase mapping of this `char` as one or more
+    /// `char`s.
+    ///
+    /// If this `char` does not have an titlecase mapping, the iterator yields the same `char`.
+    ///
+    /// If this `char` has a one-to-one titlecase mapping given by the [Unicode Character
+    /// Database][ucd] [`UnicodeData.txt`], the iterator yields that `char`.
+    ///
+    /// [ucd]: https://www.unicode.org/reports/tr44/
+    /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
+    ///
+    /// If this `char` requires special considerations (e.g. multiple `char`s) the iterator yields
+    /// the `char`(s) given by [`SpecialCasing.txt`].
+    ///
+    /// [`SpecialCasing.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/SpecialCasing.txt
+    ///
+    /// This operation performs an unconditional mapping without tailoring. That is, the conversion
+    /// is independent of context and language.
+    ///
+    /// In the [Unicode Standard], Chapter 4 (Character Properties) discusses case mapping in
+    /// general and Chapter 3 (Conformance) discusses the default algorithm for case conversion.
+    ///
+    /// [Unicode Standard]: https://www.unicode.org/versions/latest/
+    ///
+    /// # Examples
+    ///
+    /// As an iterator:
+    ///
+    /// ```
+    /// #![feature(titlecase)]
+    /// for c in 'ß'.to_titlecase() {
+    ///     print!("{c}");
+    /// }
+    /// println!();
+    /// ```
+    ///
+    /// Using `println!` directly:
+    ///
+    /// ```
+    /// #![feature(titlecase)]
+    /// println!("{}", 'ß'.to_titlecase());
+    /// ```
+    ///
+    /// Both are equivalent to:
+    ///
+    /// ```
+    /// println!("Ss");
+    /// ```
+    ///
+    /// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string):
+    ///
+    /// ```
+    /// #![feature(titlecase)]
+    /// assert_eq!('c'.to_titlecase().to_string(), "C");
+    ///
+    /// // Sometimes the result is more than one character:
+    /// assert_eq!('ß'.to_titlecase().to_string(), "Ss");
+    ///
+    /// // Characters that do not have separate cased forms
+    /// // convert into themselves.
+    /// assert_eq!('山'.to_titlecase().to_string(), "山");
+    /// ```
+    ///
+    /// # Note on locale
+    ///
+    /// In Turkish and Azeri, the equivalent of 'i' in Latin has five forms instead of two:
+    ///
+    /// * 'Dotless': I / ı, sometimes written ï
+    /// * 'Dotted': İ / i
+    ///
+    /// Note that the lowercase dotted 'i' is the same as the Latin. Therefore:
+    ///
+    /// ```
+    /// #![feature(titlecase)]
+    /// let upper_i = 'i'.to_titlecase().to_string();
+    /// ```
+    ///
+    /// The value of `upper_i` here relies on the language of the text: if we're
+    /// in `en-US`, it should be `"I"`, but if we're in `tr-TR` or `az-AZ`, it should
+    /// be `"İ"`. `to_titlecase()` does not take this into account, and so:
+    ///
+    /// ```
+    /// #![feature(titlecase)]
+    /// let upper_i = 'i'.to_titlecase().to_string();
+    ///
+    /// assert_eq!(upper_i, "I");
+    /// ```
+    ///
+    /// holds across languages.
+    #[must_use = "this returns the titlecased character as a new iterator, \
+                  without modifying the original"]
+    #[unstable(feature = "titlecase", issue = "none")]
+    #[inline]
+    pub fn to_titlecase(self) -> ToTitlecase {
+        ToTitlecase(CaseMappingIter::new(conversions::to_title(self)))
+    }
+
     /// Returns an iterator that yields the uppercase mapping of this `char` as one or more
     /// `char`s.
     ///
@@ -1267,7 +1364,7 @@ impl char {
     ///
     /// # Note on locale
     ///
-    /// In Turkish, the equivalent of 'i' in Latin has five forms instead of two:
+    /// In Turkish and Azeri, the equivalent of 'i' in Latin has five forms instead of two:
     ///
     /// * 'Dotless': I / ı, sometimes written ï
     /// * 'Dotted': İ / i
@@ -1279,7 +1376,7 @@ impl char {
     /// ```
     ///
     /// The value of `upper_i` here relies on the language of the text: if we're
-    /// in `en-US`, it should be `"I"`, but if we're in `tr_TR`, it should
+    /// in `en-US`, it should be `"I"`, but if we're in `tr-TR` or `az-AZ`, it should
     /// be `"İ"`. `to_uppercase()` does not take this into account, and so:
     ///
     /// ```
@@ -1289,7 +1386,7 @@ impl char {
     /// ```
     ///
     /// holds across languages.
-    #[must_use = "this returns the uppercase character as a new iterator, \
+    #[must_use = "this returns the uppercased character as a new iterator, \
                   without modifying the original"]
     #[stable(feature = "rust1", since = "1.0.0")]
     #[inline]
 
@@ -363,13 +363,21 @@ impl fmt::Display for EscapeDebug {
 }
 
 macro_rules! casemappingiter_impls {
-    ($(#[$attr:meta])* $ITER_NAME:ident) => {
+    (
+        #[$stab:meta]
+        #[$dendstab:meta]
+        #[$fusedstab:meta]
+        #[$exactstab:meta]
+        #[$displaystab:meta]
+        $(#[$attr:meta])*
+        $ITER_NAME:ident
+    ) => {
         $(#[$attr])*
-        #[stable(feature = "rust1", since = "1.0.0")]
+        #[$stab]
         #[derive(Debug, Clone)]
         pub struct $ITER_NAME(CaseMappingIter);
 
-        #[stable(feature = "rust1", since = "1.0.0")]
+        #[$stab]
         impl Iterator for $ITER_NAME {
             type Item = char;
             fn next(&mut self) -> Option<char> {
@@ -405,7 +413,7 @@ macro_rules! casemappingiter_impls {
             }
         }
 
-        #[stable(feature = "case_mapping_double_ended", since = "1.59.0")]
+        #[$dendstab]
         impl DoubleEndedIterator for $ITER_NAME {
             fn next_back(&mut self) -> Option<char> {
                 self.0.next_back()
@@ -423,10 +431,10 @@ macro_rules! casemappingiter_impls {
             }
         }
 
-        #[stable(feature = "fused", since = "1.26.0")]
+        #[$fusedstab]
         impl FusedIterator for $ITER_NAME {}
 
-        #[stable(feature = "exact_size_case_mapping_iter", since = "1.35.0")]
+        #[$exactstab]
         impl ExactSizeIterator for $ITER_NAME {
             fn len(&self) -> usize {
                 self.0.len()
@@ -453,7 +461,7 @@ macro_rules! casemappingiter_impls {
         #[unstable(feature = "std_internals", issue = "none")]
         unsafe impl TrustedRandomAccess for $ITER_NAME {}
 
-        #[stable(feature = "char_struct_display", since = "1.16.0")]
+        #[$displaystab]
         impl fmt::Display for $ITER_NAME {
             #[inline]
             fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
@@ -464,23 +472,48 @@ macro_rules! casemappingiter_impls {
 }
 
 casemappingiter_impls! {
-    /// Returns an iterator that yields the lowercase equivalent of a `char`.
+    #[stable(feature = "rust1", since = "1.0.0")]
+    #[stable(feature = "case_mapping_double_ended", since = "1.59.0")]
+    #[stable(feature = "fused", since = "1.26.0")]
+    #[stable(feature = "exact_size_case_mapping_iter", since = "1.35.0")]
+    #[stable(feature = "char_struct_display", since = "1.16.0")]
+    /// Returns an iterator that yields the uppercase equivalent of a `char`.
     ///
-    /// This `struct` is created by the [`to_lowercase`] method on [`char`]. See
+    /// This `struct` is created by the [`to_uppercase`] method on [`char`]. See
     /// its documentation for more.
     ///
-    /// [`to_lowercase`]: char::to_lowercase
-    ToLowercase
+    /// [`to_uppercase`]: char::to_uppercase
+    ToUppercase
 }
 
 casemappingiter_impls! {
-    /// Returns an iterator that yields the uppercase equivalent of a `char`.
+    #[unstable(feature = "titlecase", issue = "none")]
+    #[unstable(feature = "titlecase", issue = "none")]
+    #[unstable(feature = "titlecase", issue = "none")]
+    #[unstable(feature = "titlecase", issue = "none")]
+    #[unstable(feature = "titlecase", issue = "none")]
+    /// Returns an iterator that yields the titlecase equivalent of a `char`.
     ///
-    /// This `struct` is created by the [`to_uppercase`] method on [`char`]. See
+    /// This `struct` is created by the [`to_titlecase`] method on [`char`]. See
     /// its documentation for more.
     ///
-    /// [`to_uppercase`]: char::to_uppercase
-    ToUppercase
+    /// [`to_titlecase`]: char::to_titlecase
+    ToTitlecase
+}
+
+casemappingiter_impls! {
+    #[stable(feature = "rust1", since = "1.0.0")]
+    #[stable(feature = "case_mapping_double_ended", since = "1.59.0")]
+    #[stable(feature = "fused", since = "1.26.0")]
+    #[stable(feature = "exact_size_case_mapping_iter", since = "1.35.0")]
+    #[stable(feature = "char_struct_display", since = "1.16.0")]
+    /// Returns an iterator that yields the lowercase equivalent of a `char`.
+    ///
+    /// This `struct` is created by the [`to_lowercase`] method on [`char`]. See
+    /// its documentation for more.
+    ///
+    /// [`to_lowercase`]: char::to_lowercase
+    ToLowercase
 }
 
 #[derive(Debug, Clone)]
 
@@ -9,7 +9,8 @@
 // White_Space     :   256 bytes,     19 codepoints in   8 ranges (U+000085 - U+003001) using cascading
 // to_lower        :  1112 bytes,   1462 codepoints in 185 ranges (U+0000C0 - U+01E921) using 2-level LUT
 // to_upper        :  1998 bytes,   1554 codepoints in 299 ranges (U+0000B5 - U+01E943) using 2-level LUT
-// Total           :  9657 bytes
+// to_title        :   340 bytes,    135 codepoints in  49 ranges (U+0000DF - U+00FB17) using 2-level LUT
+// Total           :  9997 bytes
 
 #[inline(always)]
 const fn bitset_search<
@@ -823,14 +824,10 @@ pub mod conversions {
         unsafe { char::from_u32_unchecked(((plane as u32) << 16) | (low as u32)) }
     }
 
-    fn lookup(input: char, ascii: char, l1_lut: &L1Lut) -> [char; 3] {
-        if input.is_ascii() {
-            return [ascii, '\0', '\0'];
-        }
-
+    fn lookup(input: char, l1_lut: &L1Lut) -> Option<[char; 3]> {
         let (input_high, input_low) = deconstruct(input);
         let Some(l2_lut) = l1_lut.l2_luts.get(input_high as usize) else {
-            return [input, '\0', '\0'];
+            return None;
         };
 
         let idx = l2_lut.singles.binary_search_by(|(range, _)| {
@@ -844,6 +841,7 @@ pub mod conversions {
                 Ordering::Equal
             }
         });
+
         if let Ok(idx) = idx {
             // SAFETY: binary search guarantees that the index is in bounds.
             let &(range, output_delta) = unsafe { l2_lut.singles.get_unchecked(idx) };
@@ -852,7 +850,7 @@ pub mod conversions {
                 let output_low = input_low.wrapping_add_signed(output_delta);
                 // SAFETY: Table data are guaranteed to be valid Unicode.
                 let output = unsafe { reconstruct(input_high, output_low) };
-                return [output, '\0', '\0'];
+                return Some([output, '\0', '\0']);
             }
         };
 
@@ -861,18 +859,34 @@ pub mod conversions {
             let &(_, output_lows) = unsafe { l2_lut.multis.get_unchecked(idx) };
             // SAFETY: Table data are guaranteed to be valid Unicode.
             let output = output_lows.map(|output_low| unsafe { reconstruct(input_high, output_low) });
-            return output;
+            return Some(output);
         };
 
-        [input, '\0', '\0']
+        None
     }
 
     pub fn to_lower(c: char) -> [char; 3] {
-        lookup(c, c.to_ascii_lowercase(), &LOWERCASE_LUT)
+        if c.is_ascii() {
+            return [c.to_ascii_lowercase(), '\0', '\0'];
+        }
+
+        lookup(c, &LOWERCASE_LUT).unwrap_or([c, '\0', '\0'])
     }
 
     pub fn to_upper(c: char) -> [char; 3] {
-        lookup(c, c.to_ascii_uppercase(), &UPPERCASE_LUT)
+        if c.is_ascii() {
+            return [c.to_ascii_uppercase(), '\0', '\0'];
+        }
+
+        lookup(c, &UPPERCASE_LUT).unwrap_or([c, '\0', '\0'])
+    }
+
+    pub fn to_title(c: char) -> [char; 3] {
+        if c.is_ascii() {
+            return [c.to_ascii_uppercase(), '\0', '\0'];
+        }
+
+        lookup(c, &TITLECASE_LUT).or_else(|| lookup(c, &UPPERCASE_LUT)).unwrap_or([c, '\0', '\0'])
     }
 
     static LOWERCASE_LUT: L1Lut = L1Lut {
@@ -1150,4 +1164,45 @@ pub mod conversions {
             },
         ],
     };
+
+    static TITLECASE_LUT: L1Lut = L1Lut {
+        l2_luts: [
+            L2Lut {
+                singles: &[ // 26 entries, 156 bytes
+                    (Range::singleton(0x01c4), 1), (Range::singleton(0x01c5), 0),
+                    (Range::singleton(0x01c6), -1), (Range::singleton(0x01c7), 1),
+                    (Range::singleton(0x01c8), 0), (Range::singleton(0x01c9), -1),
+                    (Range::singleton(0x01ca), 1), (Range::singleton(0x01cb), 0),
+                    (Range::singleton(0x01cc), -1), (Range::singleton(0x01f1), 1),
+                    (Range::singleton(0x01f2), 0), (Range::singleton(0x01f3), -1),
+                    (Range::step_by_1(0x10d0..=0x10fa), 0), (Range::step_by_1(0x10fd..=0x10ff), 0),
+                    (Range::step_by_1(0x1f80..=0x1f87), 8), (Range::step_by_1(0x1f88..=0x1f8f), 0),
+                    (Range::step_by_1(0x1f90..=0x1f97), 8), (Range::step_by_1(0x1f98..=0x1f9f), 0),
+                    (Range::step_by_1(0x1fa0..=0x1fa7), 8), (Range::step_by_1(0x1fa8..=0x1faf), 0),
+                    (Range::singleton(0x1fb3), 9), (Range::singleton(0x1fbc), 0), (Range::singleton(0x1fc3), 9),
+                    (Range::singleton(0x1fcc), 0), (Range::singleton(0x1ff3), 9), (Range::singleton(0x1ffc), 0),
+                ],
+                multis: &[ // 23 entries, 184 bytes
+                    (0x00df, [0x0053, 0x0073, 0x0000]), (0x0587, [0x0535, 0x0582, 0x0000]),
+                    (0x1fb2, [0x1fba, 0x0345, 0x0000]), (0x1fb4, [0x0386, 0x0345, 0x0000]),
+                    (0x1fb7, [0x0391, 0x0342, 0x0345]), (0x1fc2, [0x1fca, 0x0345, 0x0000]),
+                    (0x1fc4, [0x0389, 0x0345, 0x0000]), (0x1fc7, [0x0397, 0x0342, 0x0345]),
+                    (0x1ff2, [0x1ffa, 0x0345, 0x0000]), (0x1ff4, [0x038f, 0x0345, 0x0000]),
+                    (0x1ff7, [0x03a9, 0x0342, 0x0345]), (0xfb00, [0x0046, 0x0066, 0x0000]),
+                    (0xfb01, [0x0046, 0x0069, 0x0000]), (0xfb02, [0x0046, 0x006c, 0x0000]),
+                    (0xfb03, [0x0046, 0x0066, 0x0069]), (0xfb04, [0x0046, 0x0066, 0x006c]),
+                    (0xfb05, [0x0053, 0x0074, 0x0000]), (0xfb06, [0x0053, 0x0074, 0x0000]),
+                    (0xfb13, [0x0544, 0x0576, 0x0000]), (0xfb14, [0x0544, 0x0565, 0x0000]),
+                    (0xfb15, [0x0544, 0x056b, 0x0000]), (0xfb16, [0x054e, 0x0576, 0x0000]),
+                    (0xfb17, [0x0544, 0x056d, 0x0000]),
+                ],
+            },
+            L2Lut {
+                singles: &[ // 0 entries, 0 bytes
+                ],
+                multis: &[ // 0 entries, 0 bytes
+                ],
+            },
+        ],
+    };
 }