From 58e0f1572947e6f690cc01b8e83c7368d5beb76e Mon Sep 17 00:00:00 2001 From: Prathamesh Kulkarni Date: Tue, 25 Nov 2025 12:20:58 +0000 Subject: [PATCH] Add perf timestamps to top level functions. This patch is a counter part to GCC's patch for enabling time profile based reordering based on perf timestamps with AutoFDO: https://gcc.gnu.org/pipermail/gcc-patches/2025-October/696758.html The patch adds a new timestamp field to Symbol, and emits it to gcov for version >= 3. Currently, the patch only records timestamps from PMU mode (PerfDataSampleReader), I will post a follow up patch shortly for recording timestamps with SPE. Results: On an internal large workload with GCC's AutoFDO and time profile reordering (enabled by this patch), we see an improvement of ~32%. --- profile.cc | 11 +++++++++++ profile.h | 3 ++- profile_reader.cc | 6 ++++++ profile_writer.cc | 4 ++++ sample_reader.cc | 3 +++ sample_reader.h | 6 ++++++ symbol_map.cc | 9 +++++++-- symbol_map.h | 7 ++++++- 8 files changed, 45 insertions(+), 4 deletions(-) diff --git a/profile.cc b/profile.cc index ac0ef84..c6f71cf 100644 --- a/profile.cc +++ b/profile.cc @@ -60,6 +60,15 @@ void Profile::AggregatePerFunctionProfile(bool check_lbr_entry) { } } + const AddressTimestampMap *address_timestamp_map = &sample_reader_->address_timestamp_map (); + for (const auto &[addr, timestamp] : *address_timestamp_map) { + uint64_t vaddr = symbol_map_->get_static_vaddr(addr); + ProfileMaps *maps = GetProfileMaps(vaddr); + if (maps != nullptr && maps->timestamp == 0) { + maps->timestamp = timestamp; + } + } + #if defined(HAVE_LLVM) std::unique_ptr Disassembler; if (check_lbr_entry) { @@ -149,6 +158,8 @@ void Profile::ProcessPerFunctionProfile(absl::string_view func_name, maps.end_addr); // LOG(INFO) << "Built instruction map for func: " << func_name; + symbol_map_->AddSymbolTimestamp (func_name, maps.timestamp); + AddressCountMap map; const AddressCountMap *map_ptr; if (absl::GetFlag(FLAGS_use_lbr)) { diff --git a/profile.h b/profile.h index e590fa8..0d2850a 100644 --- a/profile.h +++ b/profile.h @@ -52,10 +52,11 @@ class Profile { // Internal data structure that aggregates profile for each symbol. struct ProfileMaps { ProfileMaps(uint64_t start, uint64_t end) - : start_addr(start), end_addr(end) {} + : start_addr(start), end_addr(end), timestamp (0) {} uint64_t GetAggregatedCount() const; uint64_t start_addr; uint64_t end_addr; + uint64_t timestamp; AddressCountMap address_count_map; RangeCountMap range_count_map; BranchCountMap branch_count_map; diff --git a/profile_reader.cc b/profile_reader.cc index 8097a50..20b73ae 100644 --- a/profile_reader.cc +++ b/profile_reader.cc @@ -33,8 +33,13 @@ void AutoFDOProfileReader::ReadFunctionProfile() { void AutoFDOProfileReader::ReadSymbolProfile(const SourceStack &stack, bool update) { uint64_t head_count; + uint64_t timestamp = 0; + if (stack.size() == 0) { head_count = gcov_read_counter(); + if (absl::GetFlag(FLAGS_gcov_version) >= 3) { + timestamp = gcov_read_counter(); + } } else { head_count = 0; } @@ -43,6 +48,7 @@ void AutoFDOProfileReader::ReadSymbolProfile(const SourceStack &stack, uint32_t num_callsites = gcov_read_unsigned(); if (stack.size() == 0) { symbol_map_->AddSymbol(name); + symbol_map_->AddSymbolTimestamp(name, timestamp); if (!force_update_ && symbol_map_->GetSymbolByName(name)->total_count > 0) { update = false; } diff --git a/profile_writer.cc b/profile_writer.cc index 875ed06..0da198e 100644 --- a/profile_writer.cc +++ b/profile_writer.cc @@ -124,6 +124,9 @@ class SourceProfileWriter: public SymbolTraverser { virtual void VisitTopSymbol(const std::string &name, const Symbol *node) { gcov_write_counter(node->head_count); + if (absl::GetFlag(FLAGS_gcov_version) >= 3) { + gcov_write_counter(node->timestamp); + } gcov_write_unsigned(GetStringIndex(Symbol::Name(name.c_str()))); } @@ -326,6 +329,7 @@ class ProfileDumper : public SymbolTraverser { node->Dump(0); absl::PrintF("node->head_count: %u\n", node->head_count); printf("GetStringIndex(%s): %u\n", name.c_str(), GetStringIndex(name)); + absl::PrintF("node->timestamp: %lu\n", (unsigned long) node->timestamp); printf("\n"); } diff --git a/sample_reader.cc b/sample_reader.cc index f37be53..ac5d6ab 100644 --- a/sample_reader.cc +++ b/sample_reader.cc @@ -296,6 +296,9 @@ bool PerfDataSampleReader::Append(const std::string &profile_file) { } if (MatchBinary(event.dso_and_offset)) { address_count_map_[event.dso_and_offset.offset()]++; + uint64_t address = event.dso_and_offset.offset(); + uint64_t timestamp = event.event_ptr->timestamp(); + address_timestamp_map_.insert({address, timestamp}); } int start_index = 0; while (start_index < event.branch_stack.size() && diff --git a/sample_reader.h b/sample_reader.h index 09f057b..3426f08 100644 --- a/sample_reader.h +++ b/sample_reader.h @@ -31,6 +31,7 @@ typedef std::pair Range; typedef std::map RangeCountMap; typedef std::pair Branch; typedef std::map BranchCountMap; +typedef std::map AddressTimestampMap; // Reads in the profile data, and represent it in address_count_map_. class SampleReader { @@ -52,6 +53,10 @@ class SampleReader { return branch_count_map_; } + const AddressTimestampMap &address_timestamp_map() const { + return address_timestamp_map_; + } + std::set GetSampledAddresses() const; // Returns the sample count for a given instruction. @@ -77,6 +82,7 @@ class SampleReader { AddressCountMap address_count_map_; RangeCountMap range_count_map_; BranchCountMap branch_count_map_; + AddressTimestampMap address_timestamp_map_; bool is_kernel_ = false; }; diff --git a/symbol_map.cc b/symbol_map.cc index 2483835..61f968a 100644 --- a/symbol_map.cc +++ b/symbol_map.cc @@ -513,6 +513,11 @@ void SymbolMap::AddSymbolEntryCount(absl::string_view symbol_name, symbol->total_count += total_count; } +void SymbolMap::AddSymbolTimestamp(absl::string_view symbol_name, uint64_t timestamp) { + Symbol *symbol = map_.find(symbol_name)->second; + symbol->timestamp = timestamp; +} + Symbol *SymbolMap::TraverseInlineStack(absl::string_view symbol_name, const SourceStack &src, uint64_t count, DataSource data_source) { @@ -655,8 +660,8 @@ void Symbol::DumpBody(int indent, bool for_analysis) const { void Symbol::Dump(int indent) const { const std::string printed_name = getPrintName(info.func_name); if (indent == 0) { - absl::PrintF("%s total:%u head:%u\n", printed_name, total_count, - head_count); + absl::PrintF("%s total:%u head:%u timestamp: %lu\n", printed_name, total_count, + head_count, (unsigned long) timestamp); } else { absl::PrintF("%s total:%u\n", printed_name, total_count); } diff --git a/symbol_map.h b/symbol_map.h index 9e47211..8b0aa9e 100644 --- a/symbol_map.h +++ b/symbol_map.h @@ -153,7 +153,8 @@ class Symbol { total_count_incl(0), head_count(0), callsites(0), - pos_counts() { + pos_counts(), + timestamp(0) { } // This constructor is used to create aliased symbol. @@ -242,6 +243,8 @@ class Symbol { CallsiteMap callsites; // Map from source location to count and instruction number. PositionCountMap pos_counts; + // timestamp of first executed instruction + uint64_t timestamp; }; // Vector of unique pointers to symbols. @@ -420,6 +423,8 @@ class SymbolMap { void AddSymbolEntryCount(absl::string_view symbol, uint64_t head_count, uint64_t total_count = 0); + void AddSymbolTimestamp(absl::string_view symbol, uint64_t timestamp); + // DataSource represents what kind of data is used to generate afdo profile. // PERFDATA: convert perf.data to afdo profile. // AFDOPROTO: convert afdo proto generated by GWP autofdo pipeline to