From da0125e9547646927668938bb49c39bb1d4b5353 Mon Sep 17 00:00:00 2001
From: Yash Chhabria <yash2998chhabria@gmail.com>
Date: Mon, 16 Mar 2026 11:08:09 -0700
Subject: [PATCH 1/4] fix: accept executorch flatbuffer binaries

Recognize valid ExecuTorch FlatBuffers programs in .pte files,
prevent file-type validation noise for those binaries, and add
regression coverage for scanner and detection helpers.

Co-Authored-By: Codex <noreply@openai.com>
---
 CHANGELOG.md                              |  4 ++++
 modelaudit/scanners/executorch_scanner.py | 19 ++++++++++++++++++-
 modelaudit/utils/file/detection.py        | 10 +++++++++-
 tests/conftest.py                         |  1 +
 tests/scanners/test_executorch_scanner.py | 11 +++++++++++
 tests/utils/file/test_filetype.py         |  7 +++++++
 6 files changed, 50 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 44ce0e28..7b296120 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -81,6 +81,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Fixed
 
+- eliminate false positives for valid ExecuTorch FlatBuffers binaries and file-type validation on public `.pte` models
+
+### Fixed
+
 - **security:** remove `dill.load` / `dill.loads` from the pickle safe-global allowlist so recursive dill deserializers stay flagged as dangerous loader entry points
 - **security:** add exact dangerous helper coverage for validated torch and NumPy refs such as `numpy.f2py.crackfortran.getlincoef`, `torch._dynamo.guards.GuardBuilder.get`, and `torch.utils.collect_env.run`
 - **security:** add exact dangerous-global coverage for `numpy.load`, `site.main`, `_io.FileIO`, `test.support.script_helper.assert_python_ok`, `_osx_support._read_output`, `_aix_support._read_cmd_output`, `_pyrepl.pager.pipe_pager`, `torch.serialization.load`, and `torch._inductor.codecache.compile_file` (9 PickleScan-only loader and execution primitives)
diff --git a/modelaudit/scanners/executorch_scanner.py b/modelaudit/scanners/executorch_scanner.py
index 102369ea..154945e5 100644
--- a/modelaudit/scanners/executorch_scanner.py
+++ b/modelaudit/scanners/executorch_scanner.py
@@ -37,6 +37,11 @@ def _read_header(path: str, length: int = 4) -> bytes:
         except Exception:
             return b""
 
+    @staticmethod
+    def _is_executorch_binary(header: bytes) -> bool:
+        # Real-world .pte files use a FlatBuffers-style file identifier at bytes 4..7.
+        return len(header) >= 8 and header[4:6] == b"ET"
+
     def scan(self, path: str) -> ScanResult:
         path_check_result = self._check_path(path)
         if path_check_result:
@@ -50,7 +55,19 @@ def scan(self, path: str) -> ScanResult:
         file_size = self.get_file_size(path)
         result.metadata["file_size"] = file_size
 
-        header = self._read_header(path)
+        header = self._read_header(path, length=8)
+        if self._is_executorch_binary(header):
+            result.add_check(
+                name="ExecuTorch Binary Format Validation",
+                passed=True,
+                message="Valid ExecuTorch binary program format detected",
+                location=path,
+                details={"path": path, "format": "executorch_binary"},
+            )
+            result.bytes_scanned = file_size
+            result.finish(success=True)
+            return result
+
         if not header.startswith(b"PK"):
             result.add_check(
                 name="ExecuTorch Archive Format Validation",
diff --git a/modelaudit/utils/file/detection.py b/modelaudit/utils/file/detection.py
index f4ffd1f6..40bfb6c5 100644
--- a/modelaudit/utils/file/detection.py
+++ b/modelaudit/utils/file/detection.py
@@ -189,6 +189,11 @@ def _is_lightgbm_signature(prefix: bytes) -> bool:
     return (starts_with_tree or "tree=" in preview) and header_hits >= 3 and tree_hits >= 2 and not xgboost_like
 
 
+def _is_executorch_binary_signature(prefix: bytes) -> bool:
+    """Recognize ExecuTorch FlatBuffers binaries by their file identifier."""
+    return len(prefix) >= 8 and prefix[4:8] == b"ET12"
+
+
 def _is_zlib_header(prefix: bytes) -> bool:
     if len(prefix) < 2:
         return False
@@ -312,6 +317,9 @@ def detect_file_format_from_magic(path: str) -> str:
             magic8 = header[:8]
             magic16 = header[:16]
 
+            if _is_executorch_binary_signature(header):
+                return "executorch"
+
             # Try the new pattern matching approach first
             format_result = detect_format_from_magic_bytes(magic4, magic8, magic16)
             if format_result != "unknown":
@@ -845,7 +853,7 @@ def validate_file_type(path: str) -> bool:
 
         # ExecuTorch files should be zip archives
         if ext_format == "executorch":
-            return header_format == "zip"
+            return header_format == "zip" or _is_executorch_binary_signature(read_magic_bytes(path, 16))
 
         # Keras files can be either ZIP (Keras 3.x) or HDF5 (legacy Keras)
         if ext_format == "keras":
diff --git a/tests/conftest.py b/tests/conftest.py
index 848f456b..599cd7ba 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -102,6 +102,7 @@ def pytest_runtest_setup(item):
             "test_mxnet_scanner.py",  # MXNet scanner tests
             "test_tf_metagraph_scanner.py",  # TensorFlow MetaGraph scanner tests
             "test_torchserve_mar_scanner.py",  # TorchServe .mar scanner tests
+            "test_executorch_scanner.py",  # ExecuTorch scanner tests
             "test_telemetry.py",  # telemetry payload and availability tests
             "test_telemetry_decoupling.py",  # telemetry failure-isolation tests
             "test_debug_command.py",  # debug output telemetry flags
diff --git a/tests/scanners/test_executorch_scanner.py b/tests/scanners/test_executorch_scanner.py
index b026e287..95a1791e 100644
--- a/tests/scanners/test_executorch_scanner.py
+++ b/tests/scanners/test_executorch_scanner.py
@@ -55,3 +55,14 @@ def test_executorch_scanner_invalid_zip(tmp_path):
     result = scanner.scan(str(file_path))
     assert not result.success
     assert any("executorch" in i.message.lower() for i in result.issues)
+
+
+def test_executorch_scanner_accepts_binary_program_header(tmp_path):
+    file_path = tmp_path / "program.pte"
+    file_path.write_bytes(b"\x40\x00\x00\x00ET12eh00\x20\x00\x00\x00\xe8\x8c\x01\x00\x00\x00\x00\x00")
+    scanner = ExecuTorchScanner()
+    result = scanner.scan(str(file_path))
+    assert result.success is True
+    assert result.bytes_scanned == file_path.stat().st_size
+    assert not any("not a valid executorch archive" in issue.message.lower() for issue in result.issues)
+    assert not any("file type validation failed" in issue.message.lower() for issue in result.issues)
diff --git a/tests/utils/file/test_filetype.py b/tests/utils/file/test_filetype.py
index 65bc34e5..276ca05f 100644
--- a/tests/utils/file/test_filetype.py
+++ b/tests/utils/file/test_filetype.py
@@ -523,6 +523,13 @@ def test_validate_file_type(tmp_path):
         mar.writestr("weights.bin", b"weights")
         mar.writestr("handler.py", b"def handle(data, context):\n    return data\n")
     assert validate_file_type(str(mar_path)) is True
+
+    # ExecuTorch binaries use a FlatBuffers identifier at bytes 4..7.
+    executorch_path = tmp_path / "program.pte"
+    executorch_path.write_bytes(b"\x40\x00\x00\x00ET12eh00" + b"\x20\x00\x00\x00" + b"\x00" * 16)
+    assert detect_file_format_from_magic(str(executorch_path)) == "executorch"
+    assert validate_file_type(str(executorch_path)) is True
+
     # Llamafile wrappers validate by extension with scanner-level marker checks.
     llamafile_path = tmp_path / "model.llamafile"
     llamafile_path.write_bytes(b"\x7fELF" + b"\x00" * 32 + b"llamafile")

From 6c5ba7bce055a9a4b6787d2584f3d753b2649a88 Mon Sep 17 00:00:00 2001
From: Yash Chhabria <yash2998chhabria@gmail.com>
Date: Mon, 16 Mar 2026 11:17:44 -0700
Subject: [PATCH 2/4] fix: address executorch review feedback

Align ExecuTorch binary signature checks with the shared detector,
remove the duplicate changelog heading, and add the missing test
return annotation.

Co-Authored-By: Codex <noreply@openai.com>
---
 CHANGELOG.md                              | 3 ---
 modelaudit/scanners/executorch_scanner.py | 4 ++--
 tests/scanners/test_executorch_scanner.py | 2 +-
 3 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7b296120..007fab45 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -82,9 +82,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Fixed
 
 - eliminate false positives for valid ExecuTorch FlatBuffers binaries and file-type validation on public `.pte` models
-
-### Fixed
-
 - **security:** remove `dill.load` / `dill.loads` from the pickle safe-global allowlist so recursive dill deserializers stay flagged as dangerous loader entry points
 - **security:** add exact dangerous helper coverage for validated torch and NumPy refs such as `numpy.f2py.crackfortran.getlincoef`, `torch._dynamo.guards.GuardBuilder.get`, and `torch.utils.collect_env.run`
 - **security:** add exact dangerous-global coverage for `numpy.load`, `site.main`, `_io.FileIO`, `test.support.script_helper.assert_python_ok`, `_osx_support._read_output`, `_aix_support._read_cmd_output`, `_pyrepl.pager.pipe_pager`, `torch.serialization.load`, and `torch._inductor.codecache.compile_file` (9 PickleScan-only loader and execution primitives)
diff --git a/modelaudit/scanners/executorch_scanner.py b/modelaudit/scanners/executorch_scanner.py
index 154945e5..087e9cfa 100644
--- a/modelaudit/scanners/executorch_scanner.py
+++ b/modelaudit/scanners/executorch_scanner.py
@@ -39,8 +39,8 @@ def _read_header(path: str, length: int = 4) -> bytes:
 
     @staticmethod
     def _is_executorch_binary(header: bytes) -> bool:
-        # Real-world .pte files use a FlatBuffers-style file identifier at bytes 4..7.
-        return len(header) >= 8 and header[4:6] == b"ET"
+        # Real-world .pte files use the FlatBuffers file identifier "ET12" at bytes 4..7.
+        return len(header) >= 8 and header[4:8] == b"ET12"
 
     def scan(self, path: str) -> ScanResult:
         path_check_result = self._check_path(path)
diff --git a/tests/scanners/test_executorch_scanner.py b/tests/scanners/test_executorch_scanner.py
index 95a1791e..19e777a0 100644
--- a/tests/scanners/test_executorch_scanner.py
+++ b/tests/scanners/test_executorch_scanner.py
@@ -57,7 +57,7 @@ def test_executorch_scanner_invalid_zip(tmp_path):
     assert any("executorch" in i.message.lower() for i in result.issues)
 
 
-def test_executorch_scanner_accepts_binary_program_header(tmp_path):
+def test_executorch_scanner_accepts_binary_program_header(tmp_path) -> None:
     file_path = tmp_path / "program.pte"
     file_path.write_bytes(b"\x40\x00\x00\x00ET12eh00\x20\x00\x00\x00\xe8\x8c\x01\x00\x00\x00\x00\x00")
     scanner = ExecuTorchScanner()

From 3cf01089d1b5a741f08fb097a3b2b17aa6adf560 Mon Sep 17 00:00:00 2001
From: Yash Chhabria <yash2998chhabria@gmail.com>
Date: Mon, 16 Mar 2026 12:17:31 -0700
Subject: [PATCH 3/4] fix: annotate executorch regression test

---
 tests/scanners/test_executorch_scanner.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/scanners/test_executorch_scanner.py b/tests/scanners/test_executorch_scanner.py
index 19e777a0..0f4566b1 100644
--- a/tests/scanners/test_executorch_scanner.py
+++ b/tests/scanners/test_executorch_scanner.py
@@ -57,7 +57,7 @@ def test_executorch_scanner_invalid_zip(tmp_path):
     assert any("executorch" in i.message.lower() for i in result.issues)
 
 
-def test_executorch_scanner_accepts_binary_program_header(tmp_path) -> None:
+def test_executorch_scanner_accepts_binary_program_header(tmp_path: Path) -> None:
     file_path = tmp_path / "program.pte"
     file_path.write_bytes(b"\x40\x00\x00\x00ET12eh00\x20\x00\x00\x00\xe8\x8c\x01\x00\x00\x00\x00\x00")
     scanner = ExecuTorchScanner()

From 74a8bd6152cb667e6943a854fde7a10e1c5dff2c Mon Sep 17 00:00:00 2001
From: mldangelo <michael.l.dangelo@gmail.com>
Date: Tue, 17 Mar 2026 07:37:31 -0700
Subject: [PATCH 4/4] fix(executorch): validate versioned flatbuffer binaries

---
 modelaudit/scanners/executorch_scanner.py |  8 +--
 modelaudit/utils/file/detection.py        | 59 +++++++++++++++++++++--
 tests/scanners/test_executorch_scanner.py | 31 +++++++++++-
 tests/utils/file/test_filetype.py         | 25 +++++++++-
 4 files changed, 109 insertions(+), 14 deletions(-)

diff --git a/modelaudit/scanners/executorch_scanner.py b/modelaudit/scanners/executorch_scanner.py
index 087e9cfa..93729de3 100644
--- a/modelaudit/scanners/executorch_scanner.py
+++ b/modelaudit/scanners/executorch_scanner.py
@@ -7,6 +7,7 @@
 from typing import Any, ClassVar
 
 from ..utils import sanitize_archive_path
+from ..utils.file.detection import _is_valid_executorch_binary
 from .base import BaseScanner, IssueSeverity, ScanResult
 from .pickle_scanner import PickleScanner
 
@@ -37,11 +38,6 @@ def _read_header(path: str, length: int = 4) -> bytes:
         except Exception:
             return b""
 
-    @staticmethod
-    def _is_executorch_binary(header: bytes) -> bool:
-        # Real-world .pte files use the FlatBuffers file identifier "ET12" at bytes 4..7.
-        return len(header) >= 8 and header[4:8] == b"ET12"
-
     def scan(self, path: str) -> ScanResult:
         path_check_result = self._check_path(path)
         if path_check_result:
@@ -56,7 +52,7 @@ def scan(self, path: str) -> ScanResult:
         result.metadata["file_size"] = file_size
 
         header = self._read_header(path, length=8)
-        if self._is_executorch_binary(header):
+        if _is_valid_executorch_binary(path):
             result.add_check(
                 name="ExecuTorch Binary Format Validation",
                 passed=True,
diff --git a/modelaudit/utils/file/detection.py b/modelaudit/utils/file/detection.py
index 40bfb6c5..deb69dd8 100644
--- a/modelaudit/utils/file/detection.py
+++ b/modelaudit/utils/file/detection.py
@@ -190,8 +190,59 @@ def _is_lightgbm_signature(prefix: bytes) -> bool:
 
 
 def _is_executorch_binary_signature(prefix: bytes) -> bool:
-    """Recognize ExecuTorch FlatBuffers binaries by their file identifier."""
-    return len(prefix) >= 8 and prefix[4:8] == b"ET12"
+    """Recognize versioned ExecuTorch FlatBuffers binaries by their file identifier."""
+    return len(prefix) >= 8 and prefix[4:6] == b"ET" and prefix[6:8].isdigit()
+
+
+def _is_valid_executorch_binary(path: str | Path) -> bool:
+    """Validate the minimal FlatBuffers structure for ExecuTorch binaries."""
+    file_path = Path(path)
+    if not file_path.is_file():
+        return False
+
+    try:
+        file_size = file_path.stat().st_size
+        if file_size < 16:
+            return False
+
+        with file_path.open("rb") as f:
+            header = f.read(8)
+            if not _is_executorch_binary_signature(header):
+                return False
+
+            root_table_offset = struct.unpack("<I", header[:4])[0]
+            if root_table_offset < 12 or root_table_offset + 4 > file_size:
+                return False
+
+            f.seek(root_table_offset)
+            table_header = f.read(4)
+            if len(table_header) != 4:
+                return False
+
+            vtable_back_offset = struct.unpack("<i", table_header)[0]
+            if vtable_back_offset <= 0 or vtable_back_offset > root_table_offset:
+                return False
+
+            vtable_offset = root_table_offset - vtable_back_offset
+            if vtable_offset < 8 or vtable_offset + 4 > file_size:
+                return False
+
+            f.seek(vtable_offset)
+            vtable_header = f.read(4)
+            if len(vtable_header) != 4:
+                return False
+
+            vtable_size, object_size = struct.unpack("<HH", vtable_header)
+            if vtable_size < 4 or object_size < 4:
+                return False
+            if vtable_offset + vtable_size > file_size:
+                return False
+            if root_table_offset + object_size > file_size:
+                return False
+    except (OSError, struct.error):
+        return False
+
+    return True
 
 
 def _is_zlib_header(prefix: bytes) -> bool:
@@ -317,7 +368,7 @@ def detect_file_format_from_magic(path: str) -> str:
             magic8 = header[:8]
             magic16 = header[:16]
 
-            if _is_executorch_binary_signature(header):
+            if _is_valid_executorch_binary(file_path):
                 return "executorch"
 
             # Try the new pattern matching approach first
@@ -853,7 +904,7 @@ def validate_file_type(path: str) -> bool:
 
         # ExecuTorch files should be zip archives
         if ext_format == "executorch":
-            return header_format == "zip" or _is_executorch_binary_signature(read_magic_bytes(path, 16))
+            return header_format == "zip" or _is_valid_executorch_binary(path)
 
         # Keras files can be either ZIP (Keras 3.x) or HDF5 (legacy Keras)
         if ext_format == "keras":
diff --git a/tests/scanners/test_executorch_scanner.py b/tests/scanners/test_executorch_scanner.py
index 0f4566b1..d33cd8d8 100644
--- a/tests/scanners/test_executorch_scanner.py
+++ b/tests/scanners/test_executorch_scanner.py
@@ -6,6 +6,13 @@
 from modelaudit.scanners.executorch_scanner import ExecuTorchScanner
 
 
+def create_executorch_binary(tmp_path: Path, *, identifier: bytes = b"ET12") -> Path:
+    binary_path = tmp_path / "program.pte"
+    # Minimal valid FlatBuffer with the ExecuTorch file identifier.
+    binary_path.write_bytes(b"\x0c\x00\x00\x00" + identifier + b"\x04\x00\x04\x00\x04\x00\x00\x00")
+    return binary_path
+
+
 def create_executorch_archive(tmp_path: Path, *, malicious: bool = False) -> Path:
     zip_path = tmp_path / "model.ptl"
     with zipfile.ZipFile(zip_path, "w") as z:
@@ -58,11 +65,31 @@ def test_executorch_scanner_invalid_zip(tmp_path):
 
 
 def test_executorch_scanner_accepts_binary_program_header(tmp_path: Path) -> None:
-    file_path = tmp_path / "program.pte"
-    file_path.write_bytes(b"\x40\x00\x00\x00ET12eh00\x20\x00\x00\x00\xe8\x8c\x01\x00\x00\x00\x00\x00")
+    file_path = create_executorch_binary(tmp_path)
     scanner = ExecuTorchScanner()
     result = scanner.scan(str(file_path))
     assert result.success is True
     assert result.bytes_scanned == file_path.stat().st_size
     assert not any("not a valid executorch archive" in issue.message.lower() for issue in result.issues)
     assert not any("file type validation failed" in issue.message.lower() for issue in result.issues)
+
+
+def test_executorch_scanner_accepts_versioned_binary_program_header(tmp_path: Path) -> None:
+    file_path = create_executorch_binary(tmp_path, identifier=b"ET13")
+    scanner = ExecuTorchScanner()
+    result = scanner.scan(str(file_path))
+
+    assert result.success is True
+    assert result.bytes_scanned == file_path.stat().st_size
+    assert not result.issues
+
+
+def test_executorch_scanner_rejects_invalid_binary_signature_match(tmp_path: Path) -> None:
+    file_path = tmp_path / "fake-program.pte"
+    file_path.write_bytes(b"JUNKET12notflatbufferatall")
+
+    scanner = ExecuTorchScanner()
+    result = scanner.scan(str(file_path))
+
+    assert result.success is False
+    assert any(issue.rule_code == "S104" for issue in result.issues)
diff --git a/tests/utils/file/test_filetype.py b/tests/utils/file/test_filetype.py
index 276ca05f..316122fb 100644
--- a/tests/utils/file/test_filetype.py
+++ b/tests/utils/file/test_filetype.py
@@ -218,6 +218,22 @@ def test_detect_torch7_formats_by_signature(tmp_path: Path) -> None:
     assert detect_file_format_from_magic(str(torch7_path)) == "torch7"
     assert validate_file_type(str(torch7_path)) is True
 
+
+def test_detect_executorch_binary_requires_valid_flatbuffer_structure(tmp_path: Path) -> None:
+    executorch_path = tmp_path / "program.pte"
+    executorch_path.write_bytes(b"\x0c\x00\x00\x00ET13\x04\x00\x04\x00\x04\x00\x00\x00")
+
+    assert detect_file_format(str(executorch_path)) == "executorch"
+    assert detect_file_format_from_magic(str(executorch_path)) == "executorch"
+    assert validate_file_type(str(executorch_path)) is True
+
+    fake_executorch_path = tmp_path / "fake-program.pte"
+    fake_executorch_path.write_bytes(b"JUNKET12notflatbufferatall")
+
+    assert detect_file_format(str(fake_executorch_path)) == "executorch"
+    assert detect_file_format_from_magic(str(fake_executorch_path)) == "unknown"
+    assert validate_file_type(str(fake_executorch_path)) is False
+
     fake_torch7 = tmp_path / "fake.t7"
     fake_torch7.write_text("not torch7")
     assert detect_file_format(str(fake_torch7)) == "unknown"
@@ -524,12 +540,17 @@ def test_validate_file_type(tmp_path):
         mar.writestr("handler.py", b"def handle(data, context):\n    return data\n")
     assert validate_file_type(str(mar_path)) is True
 
-    # ExecuTorch binaries use a FlatBuffers identifier at bytes 4..7.
+    # ExecuTorch binaries require a valid FlatBuffers layout in addition to the file identifier.
     executorch_path = tmp_path / "program.pte"
-    executorch_path.write_bytes(b"\x40\x00\x00\x00ET12eh00" + b"\x20\x00\x00\x00" + b"\x00" * 16)
+    executorch_path.write_bytes(b"\x0c\x00\x00\x00ET13\x04\x00\x04\x00\x04\x00\x00\x00")
     assert detect_file_format_from_magic(str(executorch_path)) == "executorch"
     assert validate_file_type(str(executorch_path)) is True
 
+    invalid_executorch_path = tmp_path / "invalid-program.pte"
+    invalid_executorch_path.write_bytes(b"\x0c\x00\x00\x00ETAA\x04\x00\x04\x00\x04\x00\x00\x00")
+    assert detect_file_format_from_magic(str(invalid_executorch_path)) == "unknown"
+    assert validate_file_type(str(invalid_executorch_path)) is False
+
     # Llamafile wrappers validate by extension with scanner-level marker checks.
     llamafile_path = tmp_path / "model.llamafile"
     llamafile_path.write_bytes(b"\x7fELF" + b"\x00" * 32 + b"llamafile")