Merge pull request #161 from derek73/drop-python-2-cleanup

derek73 · web-flow · commit e9fd4873fa4a · 2026-06-07T14:31:56.000-07:00
Complete Python 2 removal and Python 3-only tooling
diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
@@ -17,7 +17,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.10", "3.11", "3.12", "3.13"]
+        python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"]
 
     steps:
     - uses: actions/checkout@v4
@@ -28,12 +28,13 @@ jobs:
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip setuptools
+        python -m pip install build
         python -m pip install twine
         python -m pip install sphinx
         if [ -f dev-requirements.txt ]; then pip install -r dev-requirements.txt; fi
     - name: Run Tests
       run: |
         python tests.py
-        python setup.py sdist
+        python -m build --sdist
         twine check dist/*
         sphinx-build -b html docs dist/docs
diff --git a/nameparser/config/__init__.py b/nameparser/config/__init__.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 """
 The :py:mod:`nameparser.config` module manages the configuration of the
 nameparser. 
@@ -11,7 +10,7 @@
 
     >>> from nameparser.config import CONSTANTS
     >>> CONSTANTS.titles.remove('hon').add('chemistry','dean') # doctest: +ELLIPSIS
-    SetManager(set([u'msgt', ..., u'adjutant']))
+    SetManager({'msgt', ..., 'adjutant'})
 
 You can also adjust the configuration of individual instances by passing
 ``None`` as the second argument upon instantiation.
@@ -21,22 +20,16 @@
     >>> from nameparser import HumanName
     >>> hn = HumanName("Dean Robert Johns", None)
     >>> hn.C.titles.add('dean') # doctest: +ELLIPSIS
-    SetManager(set([u'msgt', ..., u'adjutant']))
+    SetManager({'msgt', ..., 'adjutant'})
     >>> hn.parse_full_name() # need to run this again after config changes
 
 **Potential Gotcha**: If you do not pass ``None`` as the second argument,
 ``hn.C`` will be a reference to the module config, possibly yielding 
 unexpected results. See `Customizing the Parser <customize.html>`_.
 """
-from __future__ import unicode_literals
 import sys
-try:
-    # Python 3.3+
-    from collections.abc import Set
-except ImportError:
-    from collections import Set
+from collections.abc import Set
 
-from nameparser.util import binary_type
 from nameparser.util import lc
 from nameparser.config.prefixes import PREFIXES
 from nameparser.config.capitalization import CAPITALIZATION_EXCEPTIONS
@@ -80,9 +73,6 @@ def __contains__(self, value):
     def __len__(self):
         return len(self.elements)
 
-    def next(self):
-        return self.__next__()
-
     def __next__(self):
         if self.count >= len(self.elements):
             self.count = 0
@@ -102,7 +92,7 @@ def add_with_encoding(self, s, encoding=None):
         if sys.stdin:
             stdin_encoding = sys.stdin.encoding
         encoding = encoding or stdin_encoding or DEFAULT_ENCODING
-        if type(s) == binary_type:
+        if isinstance(s, bytes):
             s = s.decode(encoding)
         self.elements.add(lc(s))
 
diff --git a/nameparser/config/capitalization.py b/nameparser/config/capitalization.py
@@ -1,6 +1,3 @@
-# -*- coding: utf-8 -*-
-from __future__ import unicode_literals
-
 CAPITALIZATION_EXCEPTIONS = (
     ('ii', 'II'),
     ('iii', 'III'),
diff --git a/nameparser/config/conjunctions.py b/nameparser/config/conjunctions.py
@@ -1,6 +1,3 @@
-# -*- coding: utf-8 -*-
-from __future__ import unicode_literals
-
 CONJUNCTIONS = set([
     '&',
     'and',
diff --git a/nameparser/config/prefixes.py b/nameparser/config/prefixes.py
@@ -1,6 +1,3 @@
-# -*- coding: utf-8 -*-
-from __future__ import unicode_literals
-
 #: Name pieces that appear before a last name. Prefixes join to the piece
 #: that follows them to make one new piece. They can be chained together, e.g
 #: "von der" and "de la". Because they only appear in middle or last names,
diff --git a/nameparser/config/regexes.py b/nameparser/config/regexes.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-from __future__ import unicode_literals
 import re
 
 # emoji regex from https://stackoverflow.com/questions/26568722/remove-unicode-emoji-using-re-in-python
diff --git a/nameparser/config/suffixes.py b/nameparser/config/suffixes.py
@@ -1,6 +1,3 @@
-# -*- coding: utf-8 -*-
-from __future__ import unicode_literals
-
 SUFFIX_NOT_ACRONYMS = set([
     'dr',
     'esq',
diff --git a/nameparser/config/titles.py b/nameparser/config/titles.py
@@ -1,6 +1,3 @@
-# -*- coding: utf-8 -*-
-from __future__ import unicode_literals
-
 FIRST_NAME_TITLES = set([
     'aunt',
     'auntie',
diff --git a/nameparser/parser.py b/nameparser/parser.py
@@ -1,13 +1,7 @@
-# -*- coding: utf-8 -*-
-from __future__ import unicode_literals
-
-import sys
 import re
 from operator import itemgetter
 from itertools import groupby
 
-from nameparser.util import u
-from nameparser.util import text_types, binary_type
 from nameparser.util import lc
 from nameparser.util import log
 from nameparser.config import CONSTANTS
@@ -123,10 +117,10 @@ def __eq__(self, other):
         HumanName instances are equal to other objects whose
         lower case unicode representation is the same.
         """
-        return (u(self)).lower() == (u(other)).lower()
+        return str(self).lower() == str(other).lower()
 
     def __ne__(self, other):
-        return not (u(self)).lower() == (u(other)).lower()
+        return not str(self).lower() == str(other).lower()
 
     def __getitem__(self, key):
         if isinstance(key, slice):
@@ -140,9 +134,6 @@ def __setitem__(self, key, value):
         else:
             raise KeyError("Not a valid HumanName attribute", key)
 
-    def next(self):
-        return self.__next__()
-
     def __next__(self):
         if self._count >= len(self._members):
             self._count = 0
@@ -152,7 +143,7 @@ def __next__(self):
             self._count = c + 1
             return getattr(self, self._members[c]) or next(self)
 
-    def __unicode__(self):
+    def __str__(self):
         if self.string_format:
             # string_format = "{title} {first} {middle} {last} {suffix} ({nickname})"
             _s = self.string_format.format(**self.as_dict())
@@ -164,11 +155,6 @@ def __unicode__(self):
     def __hash__(self):
         return hash(str(self))
 
-    def __str__(self):
-        if sys.version_info[0] >= 3:
-            return self.__unicode__()
-        return self.__unicode__().encode(self.encoding)
-
     def __repr__(self):
         if self.unparsable:
             _string = "<%(class)s : [ Unparsable ] >" % {'class': self.__class__.__name__, }
@@ -182,9 +168,7 @@ def __repr__(self):
                 'suffix': self.suffix or '',
                 'nickname': self.nickname or '',
             }
-        if sys.version_info[0] >= 3:
-            return _string
-        return _string.encode(self.encoding)
+        return _string
 
     def as_dict(self, include_empty=True):
         """
@@ -361,7 +345,7 @@ def surnames(self):
     def _set_list(self, attr, value):
         if isinstance(value, list):
             val = value
-        elif isinstance(value, text_types):
+        elif isinstance(value, (str, bytes)):
             val = [value]
         elif value is None:
             val = []
@@ -481,7 +465,7 @@ def full_name(self):
     def full_name(self, value):
         self.original = value
         self._full_name = value
-        if isinstance(value, binary_type):
+        if isinstance(value, bytes):
             self._full_name = value.decode(self.encoding)
         self.parse_full_name()
 
@@ -657,7 +641,7 @@ def parse_full_name(self):
 
                 self.suffix_list += parts[1:]
                 pieces = self.parse_pieces(parts[0].split(' '))
-                log.debug("pieces: %s", u(pieces))
+                log.debug("pieces: %s", str(pieces))
                 for i, piece in enumerate(pieces):
                     try:
                         nxt = pieces[i + 1]
@@ -686,7 +670,7 @@ def parse_full_name(self):
                 # last [suffix], title first middles[,] suffix [,suffix]
                 #      parts[0],      parts[1],              parts[2:...]
 
-                log.debug("post-comma pieces: %s", u(post_comma_pieces))
+                log.debug("post-comma pieces: %s", str(post_comma_pieces))
 
                 # lastname part may have suffixes in it
                 lastname_pieces = self.parse_pieces(parts[0].split(' '), 1)
@@ -747,7 +731,7 @@ def parse_pieces(self, parts, additional_parts_count=0):
 
         output = []
         for part in parts:
-            if not isinstance(part, text_types):
+            if not isinstance(part, (str, bytes)):
                 raise TypeError("Name parts must be strings. "
                                 "Got {0}".format(type(part)))
             output += [x.strip(' ,') for x in part.split(' ')]
@@ -981,7 +965,7 @@ def capitalize(self, force=None):
             'Shirley MacLaine'
 
         """
-        name = u(self)
+        name = str(self)
         force = self.C.force_mixed_case_capitalization \
             if force is None else force
 
diff --git a/nameparser/util.py b/nameparser/util.py
@@ -2,36 +2,10 @@
 
 # http://code.google.com/p/python-nameparser/issues/detail?id=10
 log = logging.getLogger('HumanName')
-try:
-    log.addHandler(logging.NullHandler())
-except AttributeError:
-    class NullHandler(logging.Handler):
-        def emit(self, record):
-            pass
-    log.addHandler(NullHandler())
+log.addHandler(logging.NullHandler())
 log.setLevel(logging.ERROR)
 
 
-import sys
-if sys.version_info[0] < 3:
-
-    text_type = unicode
-    binary_type = str
-
-    def u(x, encoding=None):
-        if encoding:
-            return unicode(x, encoding)
-        else:
-            return unicode(x)
-
-else:
-    text_type = str
-    binary_type = bytes
-
-    def u(x, encoding=None):
-        return text_type(x)
-
-text_types = (text_type, binary_type)
 def lc(value):
     """Lower case and remove any periods to normalize for comparison."""
     if not value:
diff --git a/setup.cfg b/setup.cfg
diff --git a/setup.py b/setup.py
@@ -1,8 +1,5 @@
 #!/usr/bin/env python
-try:
-  from setuptools import setup
-except ImportError:
-  from distutils.core import setup
+from setuptools import setup
 import nameparser
 import os
 
@@ -33,6 +30,7 @@ def read(fname):
           'Programming Language :: Python :: 3.11',
           'Programming Language :: Python :: 3.12',
           'Programming Language :: Python :: 3.13',
+          'Programming Language :: Python :: 3.14',
           'Development Status :: 5 - Production/Stable',
           'Natural Language :: English',
           "Topic :: Software Development :: Libraries :: Python Modules",
diff --git a/tests.py b/tests.py

Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,3 @@`
`1`		`-# -- coding: utf-8 --`
`2`		`-from __future__ import unicode_literals`
`3`	`1`	`import re`
`4`	`2`
`5`	`3`	`# emoji regex from https://stackoverflow.com/questions/26568722/remove-unicode-emoji-using-re-in-python`