Skip to content

Commit e9fd487

Browse files
authored
Merge pull request #161 from derek73/drop-python-2-cleanup
Complete Python 2 removal and Python 3-only tooling
2 parents 41e1df4 + 6a1889b commit e9fd487

13 files changed

Lines changed: 47 additions & 122 deletions

File tree

.github/workflows/python-package.yml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ jobs:
1717
strategy:
1818
fail-fast: false
1919
matrix:
20-
python-version: ["3.10", "3.11", "3.12", "3.13"]
20+
python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"]
2121

2222
steps:
2323
- uses: actions/checkout@v4
@@ -28,12 +28,13 @@ jobs:
2828
- name: Install dependencies
2929
run: |
3030
python -m pip install --upgrade pip setuptools
31+
python -m pip install build
3132
python -m pip install twine
3233
python -m pip install sphinx
3334
if [ -f dev-requirements.txt ]; then pip install -r dev-requirements.txt; fi
3435
- name: Run Tests
3536
run: |
3637
python tests.py
37-
python setup.py sdist
38+
python -m build --sdist
3839
twine check dist/*
3940
sphinx-build -b html docs dist/docs

nameparser/config/__init__.py

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
# -*- coding: utf-8 -*-
21
"""
32
The :py:mod:`nameparser.config` module manages the configuration of the
43
nameparser.
@@ -11,7 +10,7 @@
1110
1211
>>> from nameparser.config import CONSTANTS
1312
>>> CONSTANTS.titles.remove('hon').add('chemistry','dean') # doctest: +ELLIPSIS
14-
SetManager(set([u'msgt', ..., u'adjutant']))
13+
SetManager({'msgt', ..., 'adjutant'})
1514
1615
You can also adjust the configuration of individual instances by passing
1716
``None`` as the second argument upon instantiation.
@@ -21,22 +20,16 @@
2120
>>> from nameparser import HumanName
2221
>>> hn = HumanName("Dean Robert Johns", None)
2322
>>> hn.C.titles.add('dean') # doctest: +ELLIPSIS
24-
SetManager(set([u'msgt', ..., u'adjutant']))
23+
SetManager({'msgt', ..., 'adjutant'})
2524
>>> hn.parse_full_name() # need to run this again after config changes
2625
2726
**Potential Gotcha**: If you do not pass ``None`` as the second argument,
2827
``hn.C`` will be a reference to the module config, possibly yielding
2928
unexpected results. See `Customizing the Parser <customize.html>`_.
3029
"""
31-
from __future__ import unicode_literals
3230
import sys
33-
try:
34-
# Python 3.3+
35-
from collections.abc import Set
36-
except ImportError:
37-
from collections import Set
31+
from collections.abc import Set
3832

39-
from nameparser.util import binary_type
4033
from nameparser.util import lc
4134
from nameparser.config.prefixes import PREFIXES
4235
from nameparser.config.capitalization import CAPITALIZATION_EXCEPTIONS
@@ -80,9 +73,6 @@ def __contains__(self, value):
8073
def __len__(self):
8174
return len(self.elements)
8275

83-
def next(self):
84-
return self.__next__()
85-
8676
def __next__(self):
8777
if self.count >= len(self.elements):
8878
self.count = 0
@@ -102,7 +92,7 @@ def add_with_encoding(self, s, encoding=None):
10292
if sys.stdin:
10393
stdin_encoding = sys.stdin.encoding
10494
encoding = encoding or stdin_encoding or DEFAULT_ENCODING
105-
if type(s) == binary_type:
95+
if isinstance(s, bytes):
10696
s = s.decode(encoding)
10797
self.elements.add(lc(s))
10898

nameparser/config/capitalization.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,3 @@
1-
# -*- coding: utf-8 -*-
2-
from __future__ import unicode_literals
3-
41
CAPITALIZATION_EXCEPTIONS = (
52
('ii', 'II'),
63
('iii', 'III'),

nameparser/config/conjunctions.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,3 @@
1-
# -*- coding: utf-8 -*-
2-
from __future__ import unicode_literals
3-
41
CONJUNCTIONS = set([
52
'&',
63
'and',

nameparser/config/prefixes.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,3 @@
1-
# -*- coding: utf-8 -*-
2-
from __future__ import unicode_literals
3-
41
#: Name pieces that appear before a last name. Prefixes join to the piece
52
#: that follows them to make one new piece. They can be chained together, e.g
63
#: "von der" and "de la". Because they only appear in middle or last names,

nameparser/config/regexes.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
# -*- coding: utf-8 -*-
2-
from __future__ import unicode_literals
31
import re
42

53
# emoji regex from https://stackoverflow.com/questions/26568722/remove-unicode-emoji-using-re-in-python

nameparser/config/suffixes.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,3 @@
1-
# -*- coding: utf-8 -*-
2-
from __future__ import unicode_literals
3-
41
SUFFIX_NOT_ACRONYMS = set([
52
'dr',
63
'esq',

nameparser/config/titles.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,3 @@
1-
# -*- coding: utf-8 -*-
2-
from __future__ import unicode_literals
3-
41
FIRST_NAME_TITLES = set([
52
'aunt',
63
'auntie',

nameparser/parser.py

Lines changed: 10 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,7 @@
1-
# -*- coding: utf-8 -*-
2-
from __future__ import unicode_literals
3-
4-
import sys
51
import re
62
from operator import itemgetter
73
from itertools import groupby
84

9-
from nameparser.util import u
10-
from nameparser.util import text_types, binary_type
115
from nameparser.util import lc
126
from nameparser.util import log
137
from nameparser.config import CONSTANTS
@@ -123,10 +117,10 @@ def __eq__(self, other):
123117
HumanName instances are equal to other objects whose
124118
lower case unicode representation is the same.
125119
"""
126-
return (u(self)).lower() == (u(other)).lower()
120+
return str(self).lower() == str(other).lower()
127121

128122
def __ne__(self, other):
129-
return not (u(self)).lower() == (u(other)).lower()
123+
return not str(self).lower() == str(other).lower()
130124

131125
def __getitem__(self, key):
132126
if isinstance(key, slice):
@@ -140,9 +134,6 @@ def __setitem__(self, key, value):
140134
else:
141135
raise KeyError("Not a valid HumanName attribute", key)
142136

143-
def next(self):
144-
return self.__next__()
145-
146137
def __next__(self):
147138
if self._count >= len(self._members):
148139
self._count = 0
@@ -152,7 +143,7 @@ def __next__(self):
152143
self._count = c + 1
153144
return getattr(self, self._members[c]) or next(self)
154145

155-
def __unicode__(self):
146+
def __str__(self):
156147
if self.string_format:
157148
# string_format = "{title} {first} {middle} {last} {suffix} ({nickname})"
158149
_s = self.string_format.format(**self.as_dict())
@@ -164,11 +155,6 @@ def __unicode__(self):
164155
def __hash__(self):
165156
return hash(str(self))
166157

167-
def __str__(self):
168-
if sys.version_info[0] >= 3:
169-
return self.__unicode__()
170-
return self.__unicode__().encode(self.encoding)
171-
172158
def __repr__(self):
173159
if self.unparsable:
174160
_string = "<%(class)s : [ Unparsable ] >" % {'class': self.__class__.__name__, }
@@ -182,9 +168,7 @@ def __repr__(self):
182168
'suffix': self.suffix or '',
183169
'nickname': self.nickname or '',
184170
}
185-
if sys.version_info[0] >= 3:
186-
return _string
187-
return _string.encode(self.encoding)
171+
return _string
188172

189173
def as_dict(self, include_empty=True):
190174
"""
@@ -361,7 +345,7 @@ def surnames(self):
361345
def _set_list(self, attr, value):
362346
if isinstance(value, list):
363347
val = value
364-
elif isinstance(value, text_types):
348+
elif isinstance(value, (str, bytes)):
365349
val = [value]
366350
elif value is None:
367351
val = []
@@ -481,7 +465,7 @@ def full_name(self):
481465
def full_name(self, value):
482466
self.original = value
483467
self._full_name = value
484-
if isinstance(value, binary_type):
468+
if isinstance(value, bytes):
485469
self._full_name = value.decode(self.encoding)
486470
self.parse_full_name()
487471

@@ -657,7 +641,7 @@ def parse_full_name(self):
657641

658642
self.suffix_list += parts[1:]
659643
pieces = self.parse_pieces(parts[0].split(' '))
660-
log.debug("pieces: %s", u(pieces))
644+
log.debug("pieces: %s", str(pieces))
661645
for i, piece in enumerate(pieces):
662646
try:
663647
nxt = pieces[i + 1]
@@ -686,7 +670,7 @@ def parse_full_name(self):
686670
# last [suffix], title first middles[,] suffix [,suffix]
687671
# parts[0], parts[1], parts[2:...]
688672

689-
log.debug("post-comma pieces: %s", u(post_comma_pieces))
673+
log.debug("post-comma pieces: %s", str(post_comma_pieces))
690674

691675
# lastname part may have suffixes in it
692676
lastname_pieces = self.parse_pieces(parts[0].split(' '), 1)
@@ -747,7 +731,7 @@ def parse_pieces(self, parts, additional_parts_count=0):
747731

748732
output = []
749733
for part in parts:
750-
if not isinstance(part, text_types):
734+
if not isinstance(part, (str, bytes)):
751735
raise TypeError("Name parts must be strings. "
752736
"Got {0}".format(type(part)))
753737
output += [x.strip(' ,') for x in part.split(' ')]
@@ -981,7 +965,7 @@ def capitalize(self, force=None):
981965
'Shirley MacLaine'
982966
983967
"""
984-
name = u(self)
968+
name = str(self)
985969
force = self.C.force_mixed_case_capitalization \
986970
if force is None else force
987971

nameparser/util.py

Lines changed: 1 addition & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -2,36 +2,10 @@
22

33
# http://code.google.com/p/python-nameparser/issues/detail?id=10
44
log = logging.getLogger('HumanName')
5-
try:
6-
log.addHandler(logging.NullHandler())
7-
except AttributeError:
8-
class NullHandler(logging.Handler):
9-
def emit(self, record):
10-
pass
11-
log.addHandler(NullHandler())
5+
log.addHandler(logging.NullHandler())
126
log.setLevel(logging.ERROR)
137

148

15-
import sys
16-
if sys.version_info[0] < 3:
17-
18-
text_type = unicode
19-
binary_type = str
20-
21-
def u(x, encoding=None):
22-
if encoding:
23-
return unicode(x, encoding)
24-
else:
25-
return unicode(x)
26-
27-
else:
28-
text_type = str
29-
binary_type = bytes
30-
31-
def u(x, encoding=None):
32-
return text_type(x)
33-
34-
text_types = (text_type, binary_type)
359
def lc(value):
3610
"""Lower case and remove any periods to normalize for comparison."""
3711
if not value:

0 commit comments

Comments
 (0)