88import functools
99import logging
1010import os
11+ import re
1112import types
1213import unicodedata
1314
@@ -1648,7 +1649,8 @@ class _MathStyle(enum.Enum):
16481649 \cdot \bigtriangledown \bigcirc
16491650 \cap \triangleleft \dagger
16501651 \cup \triangleright \ddagger
1651- \uplus \lhd \amalg''' .split ())
1652+ \uplus \lhd \amalg
1653+ \dotplus \dotminus''' .split ())
16521654
16531655 _relation_symbols = set (r'''
16541656 = < > :
@@ -1661,7 +1663,7 @@ class _MathStyle(enum.Enum):
16611663 \sqsubset \sqsupset \neq \smile
16621664 \sqsubseteq \sqsupseteq \doteq \frown
16631665 \in \ni \propto \vdash
1664- \dashv \dots \dotplus \ doteqdot''' .split ())
1666+ \dashv \dots \doteqdot''' .split ())
16651667
16661668 _arrow_symbols = set (r'''
16671669 \leftarrow \longleftarrow \uparrow
@@ -1717,24 +1719,36 @@ def set_names_and_parse_actions():
17171719
17181720 # Root definitions.
17191721
1722+ # In TeX parlance, a csname is a control sequence name (a "\foo").
1723+ def csnames (group , names ):
1724+ ends_with_alpha = []
1725+ ends_with_nonalpha = []
1726+ for name in names :
1727+ if name [- 1 ].isalpha ():
1728+ ends_with_alpha .append (name )
1729+ else :
1730+ ends_with_nonalpha .append (name )
1731+ return Regex (r"\\(?P<{}>(?:{})(?![A-Za-z]){})" .format (
1732+ group ,
1733+ "|" .join (map (re .escape , ends_with_alpha )),
1734+ "" .join (f"|{ s } " for s in map (re .escape , ends_with_nonalpha )),
1735+ ))
1736+
17201737 p .float_literal = Regex (r"[-+]?([0-9]+\.?[0-9]*|\.[0-9]+)" )
17211738 p .space = oneOf (self ._space_widths )("space" )
17221739
17231740 p .style_literal = oneOf (
17241741 [str (e .value ) for e in self ._MathStyle ])("style_literal" )
17251742
1726- p .single_symbol = Regex (
1727- r"([a-zA-Z0-9 +\-*/<>=:,.;!\?&'@()\[\]|%s])|(\\[%%${}\[\]_|])" %
1728- "\U00000080 -\U0001ffff " # unicode range
1729- )("sym" )
1730- p .accentprefixed = "\\ " + oneOf (self ._accentprefixed )("sym" )
1731- p .symbol_name = (
1732- oneOf ([rf"\{ sym } " for sym in tex2uni ])("sym" )
1733- + Regex ("(?=[^A-Za-z]|$)" ).leaveWhitespace ())
1734- p .symbol = (p .single_symbol | p .symbol_name ).leaveWhitespace ()
1743+ p .symbol = Regex (
1744+ r"[a-zA-Z0-9 +\-*/<>=:,.;!\?&'@()\[\]|\U00000080-\U0001ffff]"
1745+ r"|\\[%${}\[\]_|]"
1746+ + r"|\\(?:{})(?![A-Za-z])" .format (
1747+ "|" .join (map (re .escape , tex2uni )))
1748+ )("sym" ).leaveWhitespace ()
17351749 p .unknown_symbol = Regex (r"\\[A-Za-z]*" )("name" )
17361750
1737- p .font = " \\ " + oneOf ( self ._fontnames )( "font" )
1751+ p .font = csnames ( "font" , self ._fontnames )
17381752 p .start_group = (
17391753 Optional (r"\math" + oneOf (self ._fontnames )("font" )) + "{" )
17401754 p .end_group = Literal ("}" )
@@ -1771,11 +1785,10 @@ def set_names_and_parse_actions():
17711785 p .customspace <<= cmd (r"\hspace" , "{" + p .float_literal ("space" ) + "}" )
17721786
17731787 p .accent <<= (
1774- "\\ "
1775- + oneOf ([* self ._accent_map , * self ._wide_accents ])("accent" )
1788+ csnames ("accent" , [* self ._accent_map , * self ._wide_accents ])
17761789 - p .placeable ("sym" ))
17771790
1778- p .function <<= " \\ " + oneOf ( self ._function_names )( "name" )
1791+ p .function <<= csnames ( "name" , self ._function_names )
17791792 p .operatorname <<= cmd (
17801793 r"\operatorname" ,
17811794 "{" + ZeroOrMore (p .simple | p .unknown_symbol )("name" ) + "}" )
@@ -1816,10 +1829,8 @@ def set_names_and_parse_actions():
18161829 p .optional_group ("annotation" ) + p .optional_group ("body" ))
18171830
18181831 p .placeable <<= (
1819- p .accentprefixed # Must be before accent so named symbols that are
1820- # prefixed with an accent name work
1821- | p .accent # Must be before symbol as all accents are symbols
1822- | p .symbol # Must be third to catch all named symbols and single
1832+ p .accent # Must be before symbol as all accents are symbols
1833+ | p .symbol # Must be second to catch all named symbols and single
18231834 # chars not in a group
18241835 | p .function
18251836 | p .operatorname
@@ -2019,8 +2030,6 @@ def symbol(self, s, loc, toks):
20192030 return [Hlist ([char , self ._make_space (0.2 )], do_kern = True )]
20202031 return [char ]
20212032
2022- accentprefixed = symbol
2023-
20242033 def unknown_symbol (self , s , loc , toks ):
20252034 raise ParseFatalException (s , loc , f"Unknown symbol: { toks ['name' ]} " )
20262035
@@ -2049,12 +2058,6 @@ def unknown_symbol(self, s, loc, toks):
20492058
20502059 _wide_accents = set (r"widehat widetilde widebar" .split ())
20512060
2052- # make a lambda and call it to get the namespace right
2053- _accentprefixed = (lambda am : [
2054- p for p in tex2uni
2055- if any (p .startswith (a ) and a != p for a in am )
2056- ])(set (_accent_map ))
2057-
20582061 def accent (self , s , loc , toks ):
20592062 state = self .get_state ()
20602063 thickness = state .get_current_underline_thickness ()
0 commit comments