Skip to content

Commit 5fac25f

Browse files
authored
Version 2.1.1 (#142)
1 parent 22a17be commit 5fac25f

4 files changed

Lines changed: 36 additions & 9 deletions

File tree

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
Changelog
22
=========
33

4+
Version 2.1.1
5+
-------------
6+
7+
- Fixing #141 deep scan no longer overrides valid binary format matches (e.g. JPEG) for files without extensions (thanks to marph91)
8+
49
Version 2.1.0
510
-------------
611

puremagic/main.py

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
)
3636

3737
__author__ = "Chris Griffith"
38-
__version__ = "2.1.0"
38+
__version__ = "2.1.1"
3939
__all__ = [
4040
"magic_file",
4141
"magic_string",
@@ -212,13 +212,12 @@ def perform_magic(header: bytes, footer: bytes, mime: bool, ext=None, filename=N
212212
raise PureValueError("Input was empty")
213213
infos = identify_all(header, footer, ext)
214214
if filename and os.path.isfile(filename) and os.getenv("PUREMAGIC_DEEPSCAN") != "0":
215-
results = run_deep_scan(infos, filename, header, footer, raise_on_none=True)
216-
if results:
217-
if results[0].extension == "":
218-
raise PureError("Could not identify file")
215+
results = run_deep_scan(infos, filename, header, footer, raise_on_none=not infos)
216+
if results and results[0].extension != "":
219217
if mime:
220218
return results[0].mime_type
221219
return results[0].extension or ""
220+
# Deep scan returned empty extension or no results — fall through to original matches
222221
if not infos:
223222
raise PureError("Could not identify file")
224223
info = infos[0]
@@ -556,13 +555,18 @@ def run_deep_scan(
556555

557556
# No specific scanner matched — try the catch-all text scanner
558557
# Only override when existing matches are very low confidence (e.g. 2-byte BOM signatures)
559-
if matches[0].confidence < 0.5:
558+
# Only let the catch-all text scanner override when existing matches are
559+
# generic text types (e.g. BOM-only signatures). If the magic database
560+
# already identified a specific non-text file type, trust it over a generic text guess.
561+
best_mime = matches[0].mime_type or ""
562+
is_generic = best_mime.startswith("text/") or best_mime == "application/octet-stream" or not best_mime
563+
if matches[0].confidence < 0.5 and is_generic:
560564
try:
561565
result = catch_all_deep_scan(filename, head, foot)
562566
except Exception:
563567
pass
564568
else:
565-
if result and result.confidence > matches[0].confidence:
569+
if result and result.extension and result.confidence > matches[0].confidence:
566570
return [
567571
PureMagicWithConfidence(
568572
confidence=result.confidence,

test/test_common_extensions.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -269,7 +269,6 @@ def test_riff_wav_mime():
269269
assert mime == "audio/wav"
270270

271271

272-
273272
def test_cfbf_doc():
274273
"""CFBF scanner correctly identifies Word .doc"""
275274
ext = puremagic.from_file(os.path.join(OFFICE_DIR, "test.doc"))

test/test_scanners.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import puremagic
2-
from test.common import OFFICE_DIR, SYSTEM_DIR, AUDIO_DIR
2+
from test.common import IMAGE_DIR, OFFICE_DIR, SYSTEM_DIR, AUDIO_DIR
33
from puremagic.scanners import python_scanner, json_scanner, sndhdr_scanner
44

55
sample_text = b"""Lorem ipsum dolor sit amet, consectetur adipiscing elit,{ending}
@@ -104,6 +104,25 @@ def test_eml_scanner():
104104
assert results[0].confidence == 1.0
105105

106106

107+
def test_jpg_without_extension():
108+
# GH #141: JPEG file without extension should still be identified as image/jpeg
109+
import struct
110+
111+
data = b"\xff\xd8\xff\xe0"
112+
data += struct.pack(">H", 16)
113+
data += b"JFIF\x00\x01\x01\x00"
114+
data += struct.pack(">HH", 1, 1)
115+
data += b"\x00\x00\xff\xd9"
116+
117+
no_ext_file = IMAGE_DIR / "test_jpeg_no_ext"
118+
no_ext_file.write_bytes(data)
119+
try:
120+
result = puremagic.from_file(no_ext_file, mime=True)
121+
assert result == "image/jpeg", f"Expected image/jpeg, got {result}"
122+
finally:
123+
no_ext_file.unlink()
124+
125+
107126
def test_sndhdr_scanner():
108127
# Test the sndhdr scanner with sndr file
109128
sndr_file = AUDIO_DIR / "test.sndr"

0 commit comments

Comments
 (0)