Skip to content

Commit a76d957

Browse files
gh-139489: Add xml.is_valid_name() (GH-139768)
It allows to check whether a string can be used as an element or attribute name in XML.
1 parent 02d02f4 commit a76d957

6 files changed

Lines changed: 79 additions & 1 deletion

File tree

Doc/library/xml.rst

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,21 @@ The XML handling submodules are:
4141
* :mod:`xml.sax`: SAX2 base classes and convenience functions
4242
* :mod:`xml.parsers.expat`: the Expat parser binding
4343

44+
This module also defines utility functions.
45+
46+
.. function:: is_valid_name(name)
47+
48+
Return ``True`` if the string is a valid element or attribute name,
49+
``False`` otherwise.
50+
51+
Almost all characters are permitted in names, except control characters and
52+
those which either are or reasonably could be used as delimiters.
53+
Characters like ":", "-", ".", "_", and "·" are permitted, but "<", "/",
54+
"!", "?", and "=" are forbidden.
55+
The name cannot start with a digit or a character like "-", ".", and "·".
56+
57+
..versionadded:: next
58+
4459

4560
.. _xml-security:
4661
.. _xml-vulnerabilities:

Doc/whatsnew/3.15.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1653,6 +1653,14 @@ wave
16531653
(Contributed by Lionel Koenig and Michiel W. Beijen in :gh:`60729`.)
16541654

16551655

1656+
xml
1657+
---
1658+
1659+
* Add the :func:`xml.is_valid_name` function, which allows to check
1660+
whether a string can be used as an element or attribute name in XML.
1661+
(Contributed by Serhiy Storchaka in :gh:`139489`.)
1662+
1663+
16561664
xml.parsers.expat
16571665
-----------------
16581666

Lib/test/test_xml.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
import xml
2+
import unittest
3+
4+
5+
class TestUtils(unittest.TestCase):
6+
7+
def test_is_valid_name(self):
8+
is_valid_name = xml.is_valid_name
9+
self.assertFalse(is_valid_name(''))
10+
self.assertTrue(is_valid_name('name'))
11+
self.assertTrue(is_valid_name('NAME'))
12+
self.assertTrue(is_valid_name('name0:-._·'))
13+
self.assertTrue(is_valid_name('_'))
14+
self.assertTrue(is_valid_name(':'))
15+
self.assertTrue(is_valid_name('Ñàḿĕ'))
16+
self.assertTrue(is_valid_name('\U000EFFFF'))
17+
self.assertFalse(is_valid_name('0'))
18+
self.assertFalse(is_valid_name('-'))
19+
self.assertFalse(is_valid_name('.'))
20+
self.assertFalse(is_valid_name('·'))
21+
self.assertFalse(is_valid_name('na me'))
22+
for c in '<>/!?=\x00\x01\x7f\ud800\udfff\ufffe\uffff\U000F0000':
23+
self.assertFalse(is_valid_name('name' + c))
24+
25+
26+
if __name__ == '__main__':
27+
unittest.main()

Lib/xml/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,5 +16,6 @@
1616
1717
"""
1818

19+
from .utils import *
1920

20-
__all__ = ["dom", "parsers", "sax", "etree"]
21+
__all__ = ["dom", "parsers", "sax", "etree", "is_valid_name"]

Lib/xml/utils.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
lazy import re as _re
2+
3+
4+
def is_valid_name(name):
5+
"""Test whether a string is a valid element or attribute name."""
6+
# https://www.w3.org/TR/xml/#NT-Name
7+
return _re.fullmatch(
8+
# NameStartChar
9+
'['
10+
':A-Z_a-z'
11+
'\xC0-\xD6\xD8-\xF6\xF8-\u02FF\u0370-\u037D\u037F-\u1FFF'
12+
'\u200C\u200D'
13+
'\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF'
14+
'\uF900-\uFDCF\uFDF0-\uFFFD\U00010000-\U000EFFFF'
15+
']'
16+
# NameChar
17+
'['
18+
r'\-.0-9:A-Z_a-z'
19+
'\xB7'
20+
'\xC0-\xD6\xD8-\xF6\xF8-\u037D\u037F-\u1FFF'
21+
'\u200C\u200D\u203F\u2040'
22+
'\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF'
23+
'\uF900-\uFDCF\uFDF0-\uFFFD\U00010000-\U000EFFFF'
24+
']*+',
25+
name) is not None
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Add the :func:`xml.is_valid_name` function, which allows to check
2+
whether a string can be used as an element or attribute name in XML.

0 commit comments

Comments
 (0)