Skip to content

Commit 46107ad

Browse files
authored
gh-92455: Respect case-sensitive mimetype suffixes (GH-148782)
1 parent 9074876 commit 46107ad

4 files changed

Lines changed: 72 additions & 5 deletions

File tree

Doc/library/mimetypes.rst

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,8 @@ the information :func:`init` sets up.
3939
(e.g. :program:`compress` or :program:`gzip`). The encoding is suitable for use
4040
as a :mailheader:`Content-Encoding` header, **not** as a
4141
:mailheader:`Content-Transfer-Encoding` header. The mappings are table driven.
42-
Encoding suffixes are case sensitive; type suffixes are first tried case
43-
sensitively, then case insensitively.
42+
Encoding suffixes are case-sensitive. Suffix mappings and type suffixes are
43+
first tried case-sensitively, then case-insensitively.
4444

4545
The optional *strict* argument is a flag specifying whether the list of known MIME types
4646
is limited to only the official types `registered with IANA
@@ -131,6 +131,8 @@ behavior of the module.
131131
is already known the extension will be added to the list of known extensions.
132132
Valid extensions are empty or start with a ``'.'``.
133133

134+
Registered lower-case extensions are matched case-insensitively.
135+
134136
When *strict* is ``True`` (the default), the mapping will be added to the
135137
official MIME types, otherwise to the non-standard ones.
136138

@@ -312,6 +314,8 @@ than one MIME-type database; it provides an interface similar to the one of the
312314
extension is already known, the new type will replace the old one. When the type
313315
is already known the extension will be added to the list of known extensions.
314316

317+
Registered lower-case extensions are matched case-insensitively.
318+
315319
When *strict* is ``True`` (the default), the mapping will be added to the
316320
official MIME types, otherwise to the non-standard ones.
317321

Lib/mimetypes.py

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,9 @@ def add_type(self, type, ext, strict=True):
8686
is already known the extension will be added
8787
to the list of known extensions.
8888
89+
Registered lower-case extensions are matched
90+
case-insensitively.
91+
8992
If strict is true, information will be added to
9093
list of standard types, else to the list of non-standard
9194
types.
@@ -172,23 +175,33 @@ def guess_file_type(self, path, *, strict=True):
172175

173176
def _guess_file_type(self, path, strict, splitext):
174177
base, ext = splitext(path)
175-
while (ext_lower := ext.lower()) in self.suffix_map:
176-
base, ext = splitext(base + self.suffix_map[ext_lower])
178+
while True:
179+
if ext in self.suffix_map:
180+
suffix = self.suffix_map[ext]
181+
elif (ext_lower := ext.lower()) in self.suffix_map:
182+
suffix = self.suffix_map[ext_lower]
183+
else:
184+
break
185+
base, ext = splitext(base + suffix)
177186
# encodings_map is case sensitive
178187
if ext in self.encodings_map:
179188
encoding = self.encodings_map[ext]
180189
base, ext = splitext(base)
181190
else:
182191
encoding = None
183-
ext = ext.lower()
192+
ext_lower = ext.lower()
184193
types_map = self.types_map[True]
185194
if ext in types_map:
186195
return types_map[ext], encoding
196+
if ext_lower in types_map:
197+
return types_map[ext_lower], encoding
187198
elif strict:
188199
return None, encoding
189200
types_map = self.types_map[False]
190201
if ext in types_map:
191202
return types_map[ext], encoding
203+
if ext_lower in types_map:
204+
return types_map[ext_lower], encoding
192205
else:
193206
return None, encoding
194207

@@ -386,6 +399,9 @@ def add_type(type, ext, strict=True):
386399
is already known the extension will be added
387400
to the list of known extensions.
388401
402+
Registered lower-case extensions are matched
403+
case-insensitively.
404+
389405
If strict is true, information will be added to
390406
list of standard types, else to the list of non-standard
391407
types.

Lib/test/test_mimetypes.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -287,6 +287,50 @@ def test_case_sensitivity(self):
287287
eq(self.db.guess_file_type("foobar.tar.z"), (None, None))
288288
eq(self.db.guess_type("scheme:foobar.tar.z"), (None, None))
289289

290+
def test_suffix_map_case_sensitive_preferred(self):
291+
self.db.suffix_map[".TEST-SUFFIX"] = ".tar.gz"
292+
self.db.suffix_map[".test-suffix"] = ".tar.xz"
293+
self.assertEqual(
294+
self.db.guess_file_type("example.TEST-SUFFIX"),
295+
("application/x-tar", "gzip"),
296+
)
297+
self.assertEqual(
298+
self.db.guess_file_type("example.test-suffix"),
299+
("application/x-tar", "xz"),
300+
)
301+
302+
def test_added_types_case_sensitive_preferred(self):
303+
self.db.add_type("text/x-test-uppercase-r", ".R")
304+
self.db.add_type("text/x-test-lowercase-r", ".r")
305+
self.assertEqual(
306+
self.db.guess_file_type("example.R"),
307+
("text/x-test-uppercase-r", None),
308+
)
309+
self.assertEqual(
310+
self.db.guess_file_type("example.r"),
311+
("text/x-test-lowercase-r", None),
312+
)
313+
self.db.add_type("text/x-test-uppercase-non-strict",
314+
".NON-STRICT-EXT", strict=False)
315+
self.db.add_type("text/x-test-lowercase-non-strict",
316+
".non-strict-ext", strict=False)
317+
self.assertEqual(
318+
self.db.guess_file_type("example.NON-STRICT-EXT"),
319+
(None, None),
320+
)
321+
self.assertEqual(
322+
self.db.guess_file_type("example.non-strict-ext"),
323+
(None, None),
324+
)
325+
self.assertEqual(
326+
self.db.guess_file_type("example.NON-STRICT-EXT", strict=False),
327+
("text/x-test-uppercase-non-strict", None),
328+
)
329+
self.assertEqual(
330+
self.db.guess_file_type("example.non-strict-ext", strict=False),
331+
("text/x-test-lowercase-non-strict", None),
332+
)
333+
290334
def test_default_data(self):
291335
eq = self.assertEqual
292336
eq(self.db.guess_file_type("foo.html"), ("text/html", None))
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Fix :mod:`mimetypes` to prefer case-sensitive matches for suffix mappings and
2+
MIME type suffixes before falling back to case-insensitive matches.
3+
Contributed by Xiao Yuan.

0 commit comments

Comments
 (0)