malwarefrank · malwarefrank · Jan 31, 2026 · Jan 24, 2026
diff --git a/src/dnfile/base.py b/src/dnfile/base.py
@@ -752,6 +752,7 @@ def __init__(
         strings_heap: Optional["stream.StringsHeap"],
         guid_heap: Optional["stream.GuidHeap"],
         blob_heap: Optional["stream.BlobHeap"],
+        mdtables: "stream.MetaDataTables",
         lazy_load=False
     ):
         """
@@ -793,6 +794,32 @@ def init_row():
             )
 
         self.rows: List[RowType]
+        # initialized table data to an empty byte sequence
+        self._table_data: bytes = b""
+
+        # store heap info
+        self._strings_heap: Optional["stream.StringsHeap"] = strings_heap
+        self._guid_heap: Optional["stream.GuidHeap"] = guid_heap
+        self._blob_heap: Optional["stream.BlobHeap"] = blob_heap
+        self._strings_offset_size = strings_offset_size
+        self._guid_offset_size = guid_offset_size
+        self._blob_offset_size = blob_offset_size
+        self._tables_rowcounts = tables_rowcounts
+
+        # calculate the row size and table size in bytes
+        fake_row = init_row()
+        self.row_size = fake_row.row_size
+        table_size = self.row_size * self.num_rows
+
+        # sanity check: if table size is larger than the containing stream
+        if self.rva + table_size > mdtables.rva + mdtables.sizeof():
+            # initialize an empty row list
+            self.rows = []
+            # indicate error
+            logger.warning(f"Metadata table {self.name} with row_size {self.row_size} and num_rows {self.num_rows} does not fit in MD stream size {mdtables.sizeof()}")
+            # do not try to parse rows in this table
+            return
+
         if lazy_load and num_rows > 0:
             self.rows = _LazyList(self._lazy_parse_rows, num_rows)
             try:
@@ -812,18 +839,6 @@ def init_row():
                     # this probably means invalid data.
                     logger.warning("failed to construct %s row %d", self.name, e)
 
-        # store heap info
-        self._strings_heap: Optional["stream.StringsHeap"] = strings_heap
-        self._guid_heap: Optional["stream.GuidHeap"] = guid_heap
-        self._blob_heap: Optional["stream.BlobHeap"] = blob_heap
-        self._strings_offset_size = strings_offset_size
-        self._guid_offset_size = guid_offset_size
-        self._blob_offset_size = blob_offset_size
-        self._tables_rowcounts = tables_rowcounts
-
-        self._table_data: bytes = b""
-        self.row_size: int = self._get_row_size()
-
     def _get_row_size(self):
         if not self.rows:
             return 0

diff --git a/src/dnfile/mdtable.py b/src/dnfile/mdtable.py
@@ -2155,6 +2155,7 @@ def createTable(
         strings_heap: Optional["stream.StringsHeap"],
         guid_heap: Optional["stream.GuidHeap"],
         blob_heap: Optional["stream.BlobHeap"],
+        mdtables: "stream.MetaDataTables",
         lazy_load=False
     ) -> ClrMetaDataTable:
         if number not in cls._table_number_map:
@@ -2169,6 +2170,7 @@ def createTable(
             strings_heap,
             guid_heap,
             blob_heap,
+            mdtables,
             lazy_load,
         )
         return table
diff --git a/src/dnfile/stream.py b/src/dnfile/stream.py
@@ -550,7 +550,11 @@ def parse(self, streams: List[base.ClrStream], lazy_load=False):
             # if table bit is set
             if header_struct.MaskValid & 2 ** i != 0:
                 # read the row count
-                table_rowcounts.append(self.get_dword_at_rva(cur_rva))
+                row_count = self.get_dword_at_rva(cur_rva)
+                # sanity check
+                if row_count > self.sizeof():
+                    logger.warning(f"invalid table {i} row_count {row_count} larger than stream size {self.sizeof()}")
+                table_rowcounts.append(row_count)
                 # increment to next dword
                 cur_rva += 4
             else:
@@ -576,6 +580,7 @@ def parse(self, streams: List[base.ClrStream], lazy_load=False):
                         strings_heap,
                         guid_heap,
                         blob_heap,
+                        self,
                         lazy_load,
                     )
                 except errors.dnFormatError as e:
@@ -618,22 +623,47 @@ def full_loader():
             # Setup lazy loading for all tables
             for table in self.tables_list:
                 if table.row_size > 0 and table.num_rows > 0:
+                    table.rva = cur_rva
+                    table.file_offset = self.get_file_offset(table.rva)
+                    # calculate the table size
+                    table_size = table.row_size * table.num_rows
+                    # sanity check: if table size is more than data in stream
+                    if cur_rva + table_size > self.rva + self.sizeof():
+                        # the table is too large
+                        err_msg = f"Metadata table {table.name} with row_size {table.row_size} and num_rows {table.num_rows} is larger than stream size {self.sizeof()}"
+                        deferred_exceptions.append(
+                            errors.dnFormatError(err_msg)
+                        )
+                        logging.warning(err_msg)
+                        # stop processing tables
+                        break
                     table_data = self.get_data_at_rva(
                         cur_rva, table.row_size * table.num_rows
                     )
                     table.setup_lazy_load(cur_rva, table_data, full_loader)
-                    table.file_offset = self.get_file_offset(cur_rva)
                     cur_rva += table.row_size * table.num_rows
         else:
             #### parse each table
             # here, cur_rva points to start of table rows
             for table in self.tables_list:
                 if table.row_size > 0 and table.num_rows > 0:
+                    table.rva = cur_rva
+                    table.file_offset = self.get_file_offset(cur_rva)
+                    # calculate the table size
+                    table_size = table.row_size * table.num_rows
+                    # sanity check: if table size is more than data in stream
+                    if cur_rva + table_size > self.rva + self.sizeof():
+                        # the table is too large
+                        err_msg = f"Metadata table {table.name} with row_size {table.row_size} and num_rows {table.num_rows} is larger than stream size {self.sizeof()}"
+                        deferred_exceptions.append(
+                            errors.dnFormatError(err_msg)
+                        )
+                        logging.warning(err_msg)
+                        # stop processing tables
+                        break
                     table_data = self.get_data_at_rva(
                         cur_rva, table.row_size * table.num_rows
                     )
-                    table.rva = cur_rva
-                    table.file_offset = self.get_file_offset(cur_rva)
                     # parse structures (populates .struct for each row)
                     table.parse_rows(cur_rva, table_data)
                     # move to next set of rows

diff --git a/tests/test_parse.py b/tests/test_parse.py
@@ -88,6 +88,8 @@ def test_tables():
     dn = dnfile.dnPE(path)
     assert dn.net is not None
 
+    assert len(dn.net.mdtables.tables_list) == 9
+
     for table in ["Module", "TypeRef", "TypeDef", "MethodDef", "Param", "MemberRef", "CustomAttribute", "Assembly", "AssemblyRef"]:
         assert hasattr(dn.net.mdtables, table)