diff --git a/src/dnfile/base.py b/src/dnfile/base.py index e0bfbf2..f67e276 100644 --- a/src/dnfile/base.py +++ b/src/dnfile/base.py @@ -752,6 +752,7 @@ def __init__( strings_heap: Optional["stream.StringsHeap"], guid_heap: Optional["stream.GuidHeap"], blob_heap: Optional["stream.BlobHeap"], + mdtables: "stream.MetaDataTables", lazy_load=False ): """ @@ -793,6 +794,32 @@ def init_row(): ) self.rows: List[RowType] + # initialized table data to an empty byte sequence + self._table_data: bytes = b"" + + # store heap info + self._strings_heap: Optional["stream.StringsHeap"] = strings_heap + self._guid_heap: Optional["stream.GuidHeap"] = guid_heap + self._blob_heap: Optional["stream.BlobHeap"] = blob_heap + self._strings_offset_size = strings_offset_size + self._guid_offset_size = guid_offset_size + self._blob_offset_size = blob_offset_size + self._tables_rowcounts = tables_rowcounts + + # calculate the row size and table size in bytes + fake_row = init_row() + self.row_size = fake_row.row_size + table_size = self.row_size * self.num_rows + + # sanity check: if table size is larger than the containing stream + if self.rva + table_size > mdtables.rva + mdtables.sizeof(): + # initialize an empty row list + self.rows = [] + # indicate error + logger.warning(f"Metadata table {self.name} with row_size {self.row_size} and num_rows {self.num_rows} does not fit in MD stream size {mdtables.sizeof()}") + # do not try to parse rows in this table + return + if lazy_load and num_rows > 0: self.rows = _LazyList(self._lazy_parse_rows, num_rows) try: @@ -812,18 +839,6 @@ def init_row(): # this probably means invalid data. logger.warning("failed to construct %s row %d", self.name, e) - # store heap info - self._strings_heap: Optional["stream.StringsHeap"] = strings_heap - self._guid_heap: Optional["stream.GuidHeap"] = guid_heap - self._blob_heap: Optional["stream.BlobHeap"] = blob_heap - self._strings_offset_size = strings_offset_size - self._guid_offset_size = guid_offset_size - self._blob_offset_size = blob_offset_size - self._tables_rowcounts = tables_rowcounts - - self._table_data: bytes = b"" - self.row_size: int = self._get_row_size() - def _get_row_size(self): if not self.rows: return 0 diff --git a/src/dnfile/mdtable.py b/src/dnfile/mdtable.py index 07e35f1..8efd333 100644 --- a/src/dnfile/mdtable.py +++ b/src/dnfile/mdtable.py @@ -2155,6 +2155,7 @@ def createTable( strings_heap: Optional["stream.StringsHeap"], guid_heap: Optional["stream.GuidHeap"], blob_heap: Optional["stream.BlobHeap"], + mdtables: "stream.MetaDataTables", lazy_load=False ) -> ClrMetaDataTable: if number not in cls._table_number_map: @@ -2169,6 +2170,7 @@ def createTable( strings_heap, guid_heap, blob_heap, + mdtables, lazy_load, ) return table diff --git a/src/dnfile/stream.py b/src/dnfile/stream.py index fbc8c1e..f7b35a5 100644 --- a/src/dnfile/stream.py +++ b/src/dnfile/stream.py @@ -550,7 +550,11 @@ def parse(self, streams: List[base.ClrStream], lazy_load=False): # if table bit is set if header_struct.MaskValid & 2 ** i != 0: # read the row count - table_rowcounts.append(self.get_dword_at_rva(cur_rva)) + row_count = self.get_dword_at_rva(cur_rva) + # sanity check + if row_count > self.sizeof(): + logger.warning(f"invalid table {i} row_count {row_count} larger than stream size {self.sizeof()}") + table_rowcounts.append(row_count) # increment to next dword cur_rva += 4 else: @@ -576,6 +580,7 @@ def parse(self, streams: List[base.ClrStream], lazy_load=False): strings_heap, guid_heap, blob_heap, + self, lazy_load, ) except errors.dnFormatError as e: @@ -618,22 +623,47 @@ def full_loader(): # Setup lazy loading for all tables for table in self.tables_list: if table.row_size > 0 and table.num_rows > 0: + table.rva = cur_rva + table.file_offset = self.get_file_offset(table.rva) + # calculate the table size + table_size = table.row_size * table.num_rows + # sanity check: if table size is more than data in stream + if cur_rva + table_size > self.rva + self.sizeof(): + # the table is too large + err_msg = f"Metadata table {table.name} with row_size {table.row_size} and num_rows {table.num_rows} is larger than stream size {self.sizeof()}" + deferred_exceptions.append( + errors.dnFormatError(err_msg) + ) + logging.warning(err_msg) + # stop processing tables + break table_data = self.get_data_at_rva( cur_rva, table.row_size * table.num_rows ) table.setup_lazy_load(cur_rva, table_data, full_loader) - table.file_offset = self.get_file_offset(cur_rva) cur_rva += table.row_size * table.num_rows else: #### parse each table # here, cur_rva points to start of table rows for table in self.tables_list: if table.row_size > 0 and table.num_rows > 0: + table.rva = cur_rva + table.file_offset = self.get_file_offset(cur_rva) + # calculate the table size + table_size = table.row_size * table.num_rows + # sanity check: if table size is more than data in stream + if cur_rva + table_size > self.rva + self.sizeof(): + # the table is too large + err_msg = f"Metadata table {table.name} with row_size {table.row_size} and num_rows {table.num_rows} is larger than stream size {self.sizeof()}" + deferred_exceptions.append( + errors.dnFormatError(err_msg) + ) + logging.warning(err_msg) + # stop processing tables + break table_data = self.get_data_at_rva( cur_rva, table.row_size * table.num_rows ) - table.rva = cur_rva - table.file_offset = self.get_file_offset(cur_rva) # parse structures (populates .struct for each row) table.parse_rows(cur_rva, table_data) # move to next set of rows diff --git a/tests/test_parse.py b/tests/test_parse.py index 323b052..b506440 100644 --- a/tests/test_parse.py +++ b/tests/test_parse.py @@ -88,6 +88,8 @@ def test_tables(): dn = dnfile.dnPE(path) assert dn.net is not None + assert len(dn.net.mdtables.tables_list) == 9 + for table in ["Module", "TypeRef", "TypeDef", "MethodDef", "Param", "MemberRef", "CustomAttribute", "Assembly", "AssemblyRef"]: assert hasattr(dn.net.mdtables, table)