@@ -1639,13 +1639,204 @@ impl<'i> Parser<'i> {
16391639
16401640 let content = self . source ;
16411641
1642- // Parser is whitespace agnostic - consume entire remaining content
1643- // The outer take_*() methods have already isolated the numeric content
1644- let numeric = validate_numeric ( content) . ok_or ( ParsingError :: InvalidNumeric ( self . offset ) ) ?;
1642+ if is_numeric_integral ( content) {
1643+ self . read_numeric_integral ( )
1644+ } else if is_numeric_quantity ( content) {
1645+ self . read_numeric_quantity ( )
1646+ } else {
1647+ Err ( ParsingError :: InvalidNumeric ( self . offset ) )
1648+ }
1649+ }
16451650
1646- self . advance ( content. len ( ) ) ;
1651+ /// Parse a simple integral number
1652+ fn read_numeric_integral ( & mut self ) -> Result < Numeric < ' i > , ParsingError < ' i > > {
1653+ let content = self . source ;
16471654
1648- Ok ( numeric)
1655+ if let Ok ( amount) = content
1656+ . trim_ascii ( )
1657+ . parse :: < i64 > ( )
1658+ {
1659+ self . advance ( content. len ( ) ) ;
1660+ Ok ( Numeric :: Integral ( amount) )
1661+ } else {
1662+ Err ( ParsingError :: InvalidNumeric ( self . offset ) )
1663+ }
1664+ }
1665+
1666+ /// Parse a scientific quantity with units
1667+ fn read_numeric_quantity ( & mut self ) -> Result < Numeric < ' i > , ParsingError < ' i > > {
1668+ self . trim_whitespace ( ) ;
1669+
1670+ // Parse mantissa (required)
1671+ let mantissa = self . read_decimal_part ( ) ?;
1672+ self . skip_whitespace ( ) ;
1673+
1674+ // Parse optional uncertainty
1675+ let uncertainty = if self
1676+ . source
1677+ . starts_with ( '±' )
1678+ || self
1679+ . source
1680+ . starts_with ( "+/-" )
1681+ {
1682+ if self
1683+ . source
1684+ . starts_with ( "+/-" )
1685+ {
1686+ self . advance ( 3 ) ; // Skip +/- (3 bytes)
1687+ } else {
1688+ self . advance ( 2 ) ; // Skip ± (2 bytes in UTF-8)
1689+ }
1690+ self . skip_whitespace ( ) ;
1691+ Some ( self . read_decimal_part ( ) ?)
1692+ } else {
1693+ None
1694+ } ;
1695+ self . skip_whitespace ( ) ;
1696+
1697+ // Parse optional magnitude
1698+ let magnitude = if self
1699+ . source
1700+ . starts_with ( '×' )
1701+ || self
1702+ . source
1703+ . starts_with ( 'x' )
1704+ || self
1705+ . source
1706+ . starts_with ( '*' )
1707+ {
1708+ if self
1709+ . source
1710+ . starts_with ( '×' )
1711+ {
1712+ self . advance ( 2 ) ; // Skip × (2 bytes in UTF-8)
1713+ } else {
1714+ self . advance ( 1 ) ; // Skip x or * (1 byte each)
1715+ }
1716+ self . skip_whitespace ( ) ;
1717+ if !self
1718+ . source
1719+ . starts_with ( "10" )
1720+ {
1721+ return Err ( ParsingError :: InvalidNumeric ( self . offset ) ) ;
1722+ }
1723+ self . advance ( 2 ) ; // Skip "10"
1724+
1725+ if self
1726+ . source
1727+ . starts_with ( '^' )
1728+ {
1729+ self . advance ( 1 ) ; // Skip ^
1730+ Some ( self . read_exponent_ascii ( ) ?)
1731+ } else if let Some ( exp) = self . read_exponent_superscript ( ) {
1732+ Some ( exp)
1733+ } else {
1734+ return Err ( ParsingError :: InvalidNumeric ( self . offset ) ) ;
1735+ }
1736+ } else {
1737+ None
1738+ } ;
1739+ self . skip_whitespace ( ) ;
1740+
1741+ // Parse unit symbol (required)
1742+ let symbol = self . read_units_symbol ( ) ?;
1743+
1744+ // Verify we've consumed all the input - if there are remaining characters,
1745+ // it means there was invalid content after the unit symbol
1746+ if !self
1747+ . source
1748+ . trim_ascii ( )
1749+ . is_empty ( )
1750+ {
1751+ return Err ( ParsingError :: InvalidNumeric ( self . offset ) ) ;
1752+ }
1753+
1754+ let quantity = Quantity {
1755+ mantissa,
1756+ uncertainty,
1757+ magnitude,
1758+ symbol,
1759+ } ;
1760+
1761+ Ok ( Numeric :: Scientific ( quantity) )
1762+ }
1763+
1764+ fn skip_whitespace ( & mut self ) {
1765+ while self
1766+ . source
1767+ . starts_with ( ' ' )
1768+ || self
1769+ . source
1770+ . starts_with ( '\t' )
1771+ {
1772+ self . advance ( 1 ) ;
1773+ }
1774+ }
1775+
1776+ fn read_decimal_part ( & mut self ) -> Result < crate :: language:: Decimal , ParsingError < ' i > > {
1777+ use crate :: regex:: * ;
1778+ let re = regex ! ( r"^-?[0-9]+(\.[0-9]+)?" ) ;
1779+
1780+ if let Some ( mat) = re. find ( self . source ) {
1781+ let decimal_str = mat. as_str ( ) ;
1782+ if let Some ( decimal) = crate :: language:: parse_decimal ( decimal_str) {
1783+ self . advance ( decimal_str. len ( ) ) ;
1784+ Ok ( decimal)
1785+ } else {
1786+ Err ( ParsingError :: InvalidNumeric ( self . offset ) )
1787+ }
1788+ } else {
1789+ Err ( ParsingError :: InvalidNumeric ( self . offset ) )
1790+ }
1791+ }
1792+
1793+ fn read_exponent_ascii ( & mut self ) -> Result < i8 , ParsingError < ' i > > {
1794+ use crate :: regex:: * ;
1795+ let re = regex ! ( r"^-?[0-9]+" ) ;
1796+
1797+ if let Some ( mat) = re. find ( self . source ) {
1798+ let exp_str = mat. as_str ( ) ;
1799+ if let Ok ( exp) = exp_str. parse :: < i8 > ( ) {
1800+ self . advance ( exp_str. len ( ) ) ;
1801+ Ok ( exp)
1802+ } else {
1803+ Err ( ParsingError :: InvalidNumeric ( self . offset ) )
1804+ }
1805+ } else {
1806+ Err ( ParsingError :: InvalidNumeric ( self . offset ) )
1807+ }
1808+ }
1809+
1810+ fn read_exponent_superscript ( & mut self ) -> Option < i8 > {
1811+ use crate :: regex:: * ;
1812+ let re = regex ! ( r"^[⁰¹²³⁴⁵⁶⁷⁸⁹⁻]+" ) ;
1813+
1814+ if let Some ( mat) = re. find ( self . source ) {
1815+ let super_str = mat. as_str ( ) ;
1816+ let converted = crate :: language:: convert_superscript ( super_str) ;
1817+ if let Ok ( exp) = converted. parse :: < i8 > ( ) {
1818+ self . advance ( super_str. len ( ) ) ;
1819+ Some ( exp)
1820+ } else {
1821+ None
1822+ }
1823+ } else {
1824+ None
1825+ }
1826+ }
1827+
1828+ fn read_units_symbol ( & mut self ) -> Result < & ' i str , ParsingError < ' i > > {
1829+ use crate :: regex:: * ;
1830+ let re = regex ! ( r"^[a-zA-Z°/μ]+" ) ;
1831+
1832+ if let Some ( mat) = re. find ( self . source ) {
1833+ let symbol = mat. as_str ( ) ;
1834+ self . advance ( symbol. len ( ) ) ;
1835+ Ok ( symbol)
1836+ } else {
1837+ // Point to the invalid character
1838+ Err ( ParsingError :: InvalidNumeric ( self . offset ) )
1839+ }
16491840 }
16501841
16511842 /// Parse a target like <procedure_name> or <https://example.com/proc>
@@ -2500,10 +2691,19 @@ fn malformed_response_pattern(content: &str) -> bool {
25002691}
25012692
25022693fn is_numeric ( content : & str ) -> bool {
2694+ is_numeric_integral ( content) || is_numeric_quantity ( content)
2695+ }
2696+
2697+ fn is_numeric_integral ( content : & str ) -> bool {
25032698 let integral = regex ! ( r"^\s*-?[0-9]+(\.[0-9]+)?\s*$" ) ;
2504- let scientific = regex ! ( r"^\s*-?[0-9]+(\.[0-9]+)?(\s*[a-zA-Z°/μ]|\s*±|\s*\+/-|\s*×|\s*x\s*10|\s*\*\s*10|\*\s*10)" ) ;
2699+ integral. is_match ( content)
2700+ }
25052701
2506- integral. is_match ( content) || scientific. is_match ( content)
2702+ fn is_numeric_quantity ( content : & str ) -> bool {
2703+ let scientific = regex ! (
2704+ r"^\s*-?[0-9]+(\.[0-9]+)?(\s*[a-zA-Z°/μ]|\s*±|\s*\+/-|\s*×|\s*x\s*10|\s*\*\s*10|\*\s*10)"
2705+ ) ;
2706+ scientific. is_match ( content)
25072707}
25082708
25092709fn is_string_literal ( content : & str ) -> bool {
0 commit comments