11use crate :: prelude:: * ;
2- use object:: { Object , ObjectSymbol , ObjectSymbolTable } ;
2+ use object:: { Object , ObjectSegment , ObjectSymbol , ObjectSymbolTable } ;
33use std:: {
44 collections:: HashMap ,
5+ fmt:: Debug ,
56 io:: Write ,
67 path:: { Path , PathBuf } ,
78} ;
89
9- #[ derive( Debug , Hash , PartialEq , Eq , Clone ) ]
10+ #[ derive( Hash , PartialEq , Eq , Clone ) ]
1011struct Symbol {
11- offset : u64 ,
12+ addr : u64 ,
1213 size : u64 ,
1314 name : String ,
1415}
1516
17+ impl Debug for Symbol {
18+ fn fmt ( & self , f : & mut std:: fmt:: Formatter < ' _ > ) -> std:: fmt:: Result {
19+ write ! (
20+ f,
21+ "Symbol {{ offset: {:x}, size: {:x}, name: {} }}" ,
22+ self . addr, self . size, self . name
23+ )
24+ }
25+ }
26+
1627#[ derive( Debug , Clone ) ]
1728pub struct ModuleSymbols {
18- path : PathBuf ,
1929 symbols : Vec < Symbol > ,
2030}
2131
2232impl ModuleSymbols {
23- pub fn new < P : AsRef < Path > > ( path : P ) -> anyhow:: Result < Self > {
33+ pub fn new < P : AsRef < Path > > (
34+ path : P ,
35+ runtime_start_addr : u64 ,
36+ runtime_offset : u64 ,
37+ ) -> anyhow:: Result < Self > {
2438 let content = std:: fs:: read ( path. as_ref ( ) ) ?;
2539 let object = object:: File :: parse ( & * content) ?;
2640
@@ -29,7 +43,7 @@ impl ModuleSymbols {
2943 if let Some ( symbol_table) = object. symbol_table ( ) {
3044 symbols. extend ( symbol_table. symbols ( ) . filter_map ( |symbol| {
3145 Some ( Symbol {
32- offset : symbol. address ( ) ,
46+ addr : symbol. address ( ) ,
3347 size : symbol. size ( ) ,
3448 name : symbol. name ( ) . ok ( ) ?. to_string ( ) ,
3549 } )
@@ -39,52 +53,110 @@ impl ModuleSymbols {
3953 if let Some ( symbol_table) = object. dynamic_symbol_table ( ) {
4054 symbols. extend ( symbol_table. symbols ( ) . filter_map ( |symbol| {
4155 Some ( Symbol {
42- offset : symbol. address ( ) ,
56+ addr : symbol. address ( ) ,
4357 size : symbol. size ( ) ,
4458 name : symbol. name ( ) . ok ( ) ?. to_string ( ) ,
4559 } )
4660 } ) ) ;
4761 }
4862
49- symbols. retain ( |symbol| symbol. offset > 0 && symbol. size > 0 ) ;
63+ symbols. retain ( |symbol| symbol. addr > 0 && symbol. size > 0 ) ;
5064 if symbols. is_empty ( ) {
5165 return Err ( anyhow:: anyhow!( "No symbols found" ) ) ;
5266 }
5367
54- // The base_addr from the mapping is where the module is actually loaded in memory (See ProcessSymbols::add_mapping),
55- // but the symbol addresses from the ELF file assume the module is loaded at its preferred virtual address. We need to:
56- // 1. Find the module's preferred base address from the ELF file or symbols
57- // 2. Calculate the offset: actual_base - preferred_base
58- // 3. Apply this offset to the symbol addresses
59-
60- // Find the preferred base address from the minimum symbol address
61- let preferred_base = symbols. iter ( ) . map ( |s| s. offset ) . min ( ) . unwrap_or ( 0 ) & !0xfff ; // Align to page boundary
62-
63- // Convert absolute addresses to relative offsets
68+ let load_bias = Self :: compute_load_bias ( runtime_start_addr, runtime_offset, & object) ?;
6469 for symbol in & mut symbols {
65- symbol. offset = symbol. offset . saturating_sub ( preferred_base ) ;
70+ symbol. addr = symbol. addr . wrapping_add ( load_bias ) ;
6671 }
6772
68- Ok ( Self {
69- path : path. as_ref ( ) . to_path_buf ( ) ,
70- symbols,
71- } )
73+ Ok ( Self { symbols } )
7274 }
7375
74- fn append_to_file < P : AsRef < Path > > ( & self , path : P , base_addr : u64 ) -> anyhow:: Result < ( ) > {
76+ fn compute_load_bias (
77+ runtime_start_addr : u64 ,
78+ runtime_offset : u64 ,
79+ object : & object:: File ,
80+ ) -> anyhow:: Result < u64 > {
81+ // The addresses of symbols read from an ELF file on disk are not their final runtime addresses.
82+ // This is due to Address Space Layout Randomization (ASLR) and the way the OS loader maps
83+ // file segments into virtual memory.
84+ //
85+ // Step 1: Find the corresponding ELF segment.
86+ // We must find the `PT_LOAD` segment that corresponds to the executable memory region we found
87+ // in /proc/<pid>/maps. We do this by comparing the `runtime_offset` against the offset in the file.
88+ //
89+ // For example, if we have the following `/proc/<pid>/maps` output:
90+ // ```
91+ // 00400000-00402000 r--p 00000000 fe:01 114429641 /runner/testdata/perf_map/go_fib.bin
92+ // 00402000-0050f000 r-xp 00002000 fe:01 114429641 /runner/testdata/perf_map/go_fib.bin <-- we find this
93+ // 0050f000-0064b000 r--p 0010f000 fe:01 114429641 /runner/testdata/perf_map/go_fib.bin
94+ // 0064b000-0064c000 r--p 0024a000 fe:01 114429641 /runner/testdata/perf_map/go_fib.bin
95+ // 0064c000-0065e000 rw-p 0024b000 fe:01 114429641 /runner/testdata/perf_map/go_fib.bin
96+ // 0065e000-00684000 rw-p 00000000 00:00 0
97+ // ```
98+ //
99+ // We'll match the PT_LOAD segment with the same offset (0x2000):
100+ // ```
101+ // $ readelf -l testdata/perf_map/go_fib.bin
102+ // Elf file type is EXEC (Executable file)
103+ // Entry point 0x402490
104+ // There are 15 program headers, starting at offset 64
105+ //
106+ // Program Headers:
107+ // Type Offset VirtAddr PhysAddr
108+ // PHDR 0x0000000000000040 0x0000000000400040 0x0000000000400040
109+ // 0x0000000000000348 0x0000000000000348 R 0x8
110+ // INTERP 0x0000000000000430 0x0000000000400430 0x0000000000400430
111+ // 0x0000000000000053 0x0000000000000053 R 0x1
112+ // LOAD 0x0000000000000000 0x0000000000400000 0x0000000000400000
113+ // 0x0000000000001640 0x0000000000001640 R 0x1000
114+ // LOAD 0x0000000000002000 0x0000000000402000 0x0000000000402000 <-- we'll match this
115+ // 0x000000000010ceb1 0x000000000010ceb1 R E 0x1000
116+ // ```
117+ let load_segment = object
118+ . segments ( )
119+ . find ( |segment| {
120+ // When the kernel loads an ELF file, it maps entire pages (usually 4KB aligned),
121+ // not just the exact segment boundaries. Here's what happens:
122+ //
123+ // **ELF File Structure**:
124+ // - LOAD segment 1: file offset 0x0 - 0x4d26a (data/code)
125+ // - LOAD segment 2: file offset 0x4d26c - 0x13c4b6 (executable code)
126+ //
127+ // **Kernel Memory Mapping**: The kernel rounds down to page boundaries when mapping:
128+ // - Maps pages starting at offset 0x0 (covers segment 1)
129+ // - Maps pages starting at offset 0x4d000 (page-aligned, covers segment 2)
130+ //
131+ // (the example values are based on the `test_rust_divan_symbols` test)
132+ let ( file_offset, file_size) = segment. file_range ( ) ;
133+ runtime_offset >= file_offset && runtime_offset < file_offset + file_size
134+ } )
135+ . context ( "Failed to find a matching PT_LOAD segment" ) ?;
136+
137+ // Step 2: Calculate the "load bias".
138+ // The bias is the difference between where the segment *actually* is in memory versus where the
139+ // ELF file *preferred* it to be.
140+ //
141+ // load_bias = runtime_start_addr - segment_preferred_vaddr
142+ //
143+ // - `runtime_start_addr`: The actual base address of this segment in memory (from `/proc/maps`).
144+ // - `load_segment.address()`: The preferred virtual address (`p_vaddr`) from the ELF file itself.
145+ //
146+ // This single calculation correctly handles both PIE/shared-objects and non-PIE executables:
147+ // - For PIE/.so files: `0x7f... (random) - 0x... (small) = <large_bias>`
148+ // - For non-PIE files: `0x402000 (fixed) - 0x402000 (fixed) = 0`
149+ Ok ( runtime_start_addr. wrapping_sub ( load_segment. address ( ) ) )
150+ }
151+
152+ fn append_to_file < P : AsRef < Path > > ( & self , path : P ) -> anyhow:: Result < ( ) > {
75153 let mut file = std:: fs:: OpenOptions :: new ( )
76154 . create ( true )
77155 . append ( true )
78156 . open ( path) ?;
79157
80158 for symbol in & self . symbols {
81- writeln ! (
82- file,
83- "{:x} {:x} {}" ,
84- base_addr + symbol. offset,
85- symbol. size,
86- symbol. name
87- ) ?;
159+ writeln ! ( file, "{:x} {:x} {}" , symbol. addr, symbol. size, symbol. name) ?;
88160 }
89161
90162 Ok ( ( ) )
@@ -113,23 +185,21 @@ impl ProcessSymbols {
113185 module_path : P ,
114186 start_addr : u64 ,
115187 end_addr : u64 ,
188+ file_offset : u64 ,
116189 ) {
117190 if self . pid != pid {
118191 warn ! ( "pid mismatch: {} != {}" , self . pid, pid) ;
119192 return ;
120193 }
121194
195+ debug ! ( "Loading module symbols at {start_addr:x}-{end_addr:x} (offset: {file_offset:x})" ) ;
122196 let path = module_path. as_ref ( ) . to_path_buf ( ) ;
123- match ModuleSymbols :: new ( module_path) {
197+ match ModuleSymbols :: new ( module_path, start_addr , file_offset ) {
124198 Ok ( symbol) => {
125199 self . modules . entry ( path. clone ( ) ) . or_insert ( symbol) ;
126200 }
127201 Err ( error) => {
128- debug ! (
129- "Failed to load symbols for module {}: {}" ,
130- path. display( ) ,
131- error
132- ) ;
202+ debug ! ( "Failed to load symbols for module {path:?}: {error}" ) ;
133203 }
134204 }
135205
@@ -155,17 +225,48 @@ impl ProcessSymbols {
155225
156226 let symbols_path = folder. as_ref ( ) . join ( format ! ( "perf-{}.map" , self . pid) ) ;
157227 for module in self . modules . values ( ) {
158- let Some ( ( base_addr, _) ) = self
159- . module_mappings
160- . get ( & module. path )
161- . and_then ( |bounds| bounds. iter ( ) . min_by_key ( |( start, _) | start) )
162- else {
163- warn ! ( "No bounds found for module: {}" , module. path. display( ) ) ;
164- continue ;
165- } ;
166- module. append_to_file ( & symbols_path, * base_addr) ?;
228+ module. append_to_file ( & symbols_path) ?;
167229 }
168230
169231 Ok ( ( ) )
170232 }
171233}
234+
235+ #[ cfg( test) ]
236+ mod tests {
237+ use super :: * ;
238+
239+ #[ test]
240+ fn test_golang_symbols ( ) {
241+ let module_symbols =
242+ ModuleSymbols :: new ( "testdata/perf_map/go_fib.bin" , 0x00402000 , 0x00002000 ) . unwrap ( ) ;
243+ insta:: assert_debug_snapshot!( module_symbols. symbols) ;
244+ }
245+
246+ #[ test]
247+ fn test_cpp_symbols ( ) {
248+ const MODULE_PATH : & str = "testdata/perf_map/cpp_my_benchmark.bin" ;
249+ let module_symbols = ModuleSymbols :: new ( MODULE_PATH , 0x00400000 , 0x00000000 ) . unwrap ( ) ;
250+ insta:: assert_debug_snapshot!( module_symbols. symbols) ;
251+ }
252+
253+ #[ test]
254+ fn test_rust_divan_symbols ( ) {
255+ const MODULE_PATH : & str = "testdata/perf_map/divan_sleep_benches.bin" ;
256+
257+ // Segments in the file:
258+ // Segment: Segment { address: 0, size: 4d26a }
259+ // Segment: Segment { address: 4e26c, size: ef24a }
260+ // Segment: Segment { address: 13e4b8, size: ab48 }
261+ // Segment: Segment { address: 1499b0, size: 11a5 }
262+ //
263+ // Segments in memory:
264+ // 0x0000555555554000 0x00005555555a2000 0x4e000 0x0 r--p
265+ // 0x00005555555a2000 0x0000555555692000 0xf0000 0x4d000 r-xp <--
266+ // 0x0000555555692000 0x000055555569d000 0xb000 0x13c000 r--p
267+ // 0x000055555569d000 0x000055555569f000 0x2000 0x146000 rw-p
268+ //
269+ let module_symbols = ModuleSymbols :: new ( MODULE_PATH , 0x00005555555a2000 , 0x4d000 ) . unwrap ( ) ;
270+ insta:: assert_debug_snapshot!( module_symbols. symbols) ;
271+ }
272+ }
0 commit comments