diff --git a/emsymbolizer.py b/emsymbolizer.py index 36c4c33a4cb3f..40b29b2a958d9 100755 --- a/emsymbolizer.py +++ b/emsymbolizer.py @@ -110,6 +110,7 @@ def __init__(self, source=None, line=0, column=0, func=None): def __init__(self): self.version = None self.sources = [] + self.funcs = [] self.mappings = {} self.offsets = [] @@ -121,6 +122,7 @@ def parse(self, filename): self.version = source_map_json['version'] self.sources = source_map_json['sources'] + self.funcs = source_map_json['names'] chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=' vlq_map = {c: i for i, c in enumerate(chars)} @@ -148,6 +150,7 @@ def decodeVLQ(string): src = 0 line = 1 col = 1 + func = 0 for segment in source_map_json['mappings'].split(','): data = decodeVLQ(segment) info = [] @@ -162,7 +165,9 @@ def decodeVLQ(string): if len(data) >= 4: col += data[3] info.append(col) - # TODO: see if we need the name, which is the next field (data[4]) + if len(data) == 5: + func += data[4] + info.append(func) self.mappings[offset] = WasmSourceMap.Location(*info) self.offsets.append(offset) @@ -189,6 +194,7 @@ def lookup(self, offset): self.sources[info.source] if info.source is not None else None, info.line, info.column, + self.funcs[info.func] if info.func is not None else None, ) diff --git a/test/test_other.py b/test/test_other.py index deb68ad3fe1d6..52ffb368b2255 100644 --- a/test/test_other.py +++ b/test/test_other.py @@ -10938,10 +10938,11 @@ def check_dwarf_loc_info(address, funcs, locs): for loc in locs: self.assertIn(loc, out) - def check_source_map_loc_info(address, loc): + def check_source_map_loc_info(address, func, loc): out = self.run_process( [emsymbolizer, '-s', 'sourcemap', 'test_dwarf.wasm', address], stdout=PIPE).stdout + self.assertIn(func, out) self.assertIn(loc, out) # We test two locations within test_dwarf.c: @@ -10968,13 +10969,17 @@ def check_source_map_loc_info(address, loc): # 1. Test DWARF + source map together # For DWARF, we check for the full inlined info for both function names and - # source locations. Source maps provide neither function names nor inlined - # info. So we only check for the source location of the outermost function. + # source locations. Source maps does not provide inlined info. So we only + # check for the info of the outermost function. check_dwarf_loc_info(out_to_js_call_addr, out_to_js_call_func, out_to_js_call_loc) - check_source_map_loc_info(out_to_js_call_addr, out_to_js_call_loc[0]) + check_source_map_loc_info(out_to_js_call_addr, out_to_js_call_func[0], + out_to_js_call_loc[0]) check_dwarf_loc_info(unreachable_addr, unreachable_func, unreachable_loc) - check_source_map_loc_info(unreachable_addr, unreachable_loc[0]) + # Source map shows the original (inlined) source location with the function + # name that was inlined into + check_source_map_loc_info(unreachable_addr, unreachable_func[1], + unreachable_loc[0]) # 2. Test source map only # The addresses, function names, and source locations are the same across @@ -10982,8 +10987,10 @@ def check_source_map_loc_info(address, loc): # don't need to recompute them self.run_process([EMCC, test_file('core/test_dwarf.c'), '-gsource-map', '-O1', '-o', 'test_dwarf.js']) - check_source_map_loc_info(out_to_js_call_addr, out_to_js_call_loc[0]) - check_source_map_loc_info(unreachable_addr, unreachable_loc[0]) + check_source_map_loc_info(out_to_js_call_addr, out_to_js_call_func[0], + out_to_js_call_loc[0]) + check_source_map_loc_info(unreachable_addr, unreachable_func[1], + unreachable_loc[0]) # 3. Test DWARF only self.run_process([EMCC, test_file('core/test_dwarf.c'), diff --git a/tools/wasm-sourcemap.py b/tools/wasm-sourcemap.py index e9f39bd591110..df03d3e2488ef 100755 --- a/tools/wasm-sourcemap.py +++ b/tools/wasm-sourcemap.py @@ -11,6 +11,7 @@ """ import argparse +import bisect import json import logging from math import floor, log @@ -27,6 +28,7 @@ from tools import utils from tools.system_libs import DETERMINISTIC_PREFIX from tools.shared import path_from_root +from tools import webassembly EMSCRIPTEN_PREFIX = utils.normalize_path(path_from_root()) @@ -300,19 +302,57 @@ def read_dwarf_entries(wasm, options): return sorted(entries, key=lambda entry: entry['address']) -def build_sourcemap(entries, code_section_offset, options): +def read_func_ranges(wasm_input): + with webassembly.Module(wasm_input) as module: + if not module.has_name_section(): + return [] + funcs = module.get_functions() + func_names = module.get_function_names()[module.num_imported_funcs():] + assert len(funcs) == len(func_names) + + # Replace '__original_main' with 'main' + try: + original_main_index = func_names.index('__original_main') + func_names[original_main_index] = 'main' + except ValueError: + pass + + func_ranges = [(n, (f.offset, f.offset + f.size)) for n, f in zip(func_names, funcs)] + return func_ranges + + +def build_sourcemap(entries, func_ranges, code_section_offset, options): base_path = options.basepath collect_sources = options.sources prefixes = SourceMapPrefixes(options.prefix, options.load_prefix, base_path) + func_low_pcs = [item[1][0] for item in func_ranges] + sources = [] sources_content = [] + names = [item[0] for item in func_ranges] mappings = [] sources_map = {} last_address = 0 last_source_id = 0 last_line = 1 last_column = 1 + last_func_id = 0 + + # Get the function ID that the given address falls into + def get_function_id(address): + if not func_ranges: + return None + index = bisect.bisect_right(func_low_pcs, address) + if index == 0: # The address is lower than the first function's start + return None + candidate_index = index - 1 + name, (low_pc, high_pc) = func_ranges[candidate_index] + # Check the address within the candidate's [low_pc, high_pc) range. If not, + # it is in a gap between functions. + if low_pc <= address < high_pc: + return candidate_index + return None for entry in entries: line = entry['line'] @@ -343,21 +383,27 @@ def build_sourcemap(entries, code_section_offset, options): sources_content.append(None) else: source_id = sources_map[source_name] + func_id = get_function_id(address) address_delta = address - last_address source_id_delta = source_id - last_source_id line_delta = line - last_line column_delta = column - last_column - mappings.append(encode_vlq(address_delta) + encode_vlq(source_id_delta) + encode_vlq(line_delta) + encode_vlq(column_delta)) last_address = address last_source_id = source_id last_line = line last_column = column + mapping = encode_vlq(address_delta) + encode_vlq(source_id_delta) + encode_vlq(line_delta) + encode_vlq(column_delta) + if func_id is not None: + func_id_delta = func_id - last_func_id + last_func_id = func_id + mapping += encode_vlq(func_id_delta) + mappings.append(mapping) return {'version': 3, 'sources': sources, 'sourcesContent': sources_content, - 'names': [], + 'names': names, 'mappings': ','.join(mappings)} @@ -369,11 +415,12 @@ def main(): wasm = infile.read() entries = read_dwarf_entries(wasm_input, options) + func_ranges = read_func_ranges(wasm_input) code_section_offset = get_code_section_offset(wasm) logger.debug('Saving to %s' % options.output) - map = build_sourcemap(entries, code_section_offset, options) + map = build_sourcemap(entries, func_ranges, code_section_offset, options) with open(options.output, 'w', encoding='utf-8') as outfile: json.dump(map, outfile, separators=(',', ':'), ensure_ascii=False) diff --git a/tools/webassembly.py b/tools/webassembly.py index 0a9fdf61a975b..6c864eb8b2303 100644 --- a/tools/webassembly.py +++ b/tools/webassembly.py @@ -522,6 +522,34 @@ def get_function_types(self): def has_name_section(self): return self.get_custom_section('name') is not None + @memoize + def get_function_names(self): + num_funcs = self.num_imported_funcs() + len(self.get_functions()) + names = [None] * num_funcs + + name_section = self.get_custom_section('name') + if not name_section: + return names + + self.seek(name_section.offset) + self.read_string() # section name + section_end = name_section.offset + name_section.size + + while self.tell() < section_end: + subsection_id = self.read_byte() + subsection_size = self.read_uleb() + if subsection_id == 1: # function names + count = self.read_uleb() + for _ in range(count): + func_idx = self.read_uleb() + func_name = self.read_string() + assert func_idx < len(names) + names[func_idx] = func_name + else: + self.skip(subsection_size) + + return names + @once def _calc_indexes(self): self.imports_by_kind = {}