Skip to content

Commit 206e4af

Browse files
authored
feature: add support for custom perf events in flamegraph generation (#614)
* feat: add support for custom perf events in flamegraph generation Signed-off-by: Harper, Jason M <jason.m.harper@intel.com> * update Superuser requirement to false for telemetry scripts (#615) * update Superuser requirement to false for telemetry scripts Signed-off-by: Harper, Jason M <jason.m.harper@intel.com> * turbostat and processwatch require sudo Signed-off-by: Harper, Jason M <jason.m.harper@intel.com> --------- Signed-off-by: Harper, Jason M <jason.m.harper@intel.com> * feat: add support for custom perf events in flamegraph generation Signed-off-by: Harper, Jason M <jason.m.harper@intel.com> --------- Signed-off-by: Harper, Jason M <jason.m.harper@intel.com>
1 parent 3c7830e commit 206e4af

6 files changed

Lines changed: 103 additions & 45 deletions

File tree

cmd/flamegraph/flamegraph.go

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ var examples = []string{
2727
fmt.Sprintf(" Flamegraph from local host: $ %s %s", app.Name, cmdName),
2828
fmt.Sprintf(" Flamegraph from remote target: $ %s %s --target 192.168.1.1 --user fred --key fred_key", app.Name, cmdName),
2929
fmt.Sprintf(" Flamegraph from multiple targets: $ %s %s --targets targets.yaml", app.Name, cmdName),
30+
fmt.Sprintf(" Flamegraph for cache misses: $ %s %s --perf-event cache-misses", app.Name, cmdName),
3031
}
3132

3233
var Cmd = &cobra.Command{
@@ -48,6 +49,7 @@ var (
4849
flagPids []int
4950
flagNoSystemSummary bool
5051
flagMaxDepth int
52+
flagPerfEvent string
5153
)
5254

5355
const (
@@ -56,6 +58,7 @@ const (
5658
flagPidsName = "pids"
5759
flagNoSystemSummaryName = "no-summary"
5860
flagMaxDepthName = "max-depth"
61+
flagPerfEventName = "perf-event"
5962
)
6063

6164
func init() {
@@ -66,6 +69,7 @@ func init() {
6669
Cmd.Flags().IntSliceVar(&flagPids, flagPidsName, nil, "")
6770
Cmd.Flags().BoolVar(&flagNoSystemSummary, flagNoSystemSummaryName, false, "")
6871
Cmd.Flags().IntVar(&flagMaxDepth, flagMaxDepthName, 0, "")
72+
Cmd.Flags().StringVar(&flagPerfEvent, flagPerfEventName, "cycles:P", "")
6973

7074
workflow.AddTargetFlags(Cmd)
7175

@@ -113,13 +117,17 @@ func getFlagGroups() []app.FlagGroup {
113117
Help: "comma separated list of PIDs. If not specified, all PIDs will be collected",
114118
},
115119
{
116-
Name: app.FlagFormatName,
117-
Help: fmt.Sprintf("choose output format(s) from: %s", strings.Join(append([]string{report.FormatAll}, report.FormatHtml, report.FormatTxt, report.FormatJson), ", ")),
120+
Name: flagPerfEventName,
121+
Help: "perf event to use for native sampling (e.g., cpu-cycles, instructions, cache-misses, branches, context-switches, mem-loads, mem-stores, etc.)",
118122
},
119123
{
120124
Name: flagMaxDepthName,
121125
Help: "maximum render depth of call stack in flamegraph (0 = no limit)",
122126
},
127+
{
128+
Name: app.FlagFormatName,
129+
Help: fmt.Sprintf("choose output format(s) from: %s", strings.Join(append([]string{report.FormatAll}, report.FormatHtml, report.FormatTxt, report.FormatJson), ", ")),
130+
},
123131
{
124132
Name: flagNoSystemSummaryName,
125133
Help: "do not include system summary table in report",
@@ -183,7 +191,7 @@ func runCmd(cmd *cobra.Command, args []string) error {
183191
if !flagNoSystemSummary {
184192
tables = append(tables, app.TableDefinitions[app.SystemSummaryTableName])
185193
}
186-
tables = append(tables, tableDefinitions[CallStackFrequencyTableName])
194+
tables = append(tables, tableDefinitions[FlameGraphTableName])
187195
reportingCommand := workflow.ReportingCommand{
188196
Cmd: cmd,
189197
ReportNamePost: "flame",
@@ -192,11 +200,12 @@ func runCmd(cmd *cobra.Command, args []string) error {
192200
"Duration": strconv.Itoa(flagDuration),
193201
"PIDs": strings.Join(util.IntSliceToStringSlice(flagPids), ","),
194202
"MaxDepth": strconv.Itoa(flagMaxDepth),
203+
"PerfEvent": flagPerfEvent,
195204
},
196205
Tables: tables,
197206
}
198207

199-
report.RegisterHTMLRenderer(CallStackFrequencyTableName, callStackFrequencyTableHTMLRenderer)
208+
report.RegisterHTMLRenderer(FlameGraphTableName, callStackFrequencyTableHTMLRenderer)
200209

201210
return reportingCommand.Run()
202211
}

cmd/flamegraph/flamegraph_renderers.go

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,7 @@ func renderFlameGraph(header string, tableValues table.TableValues, field string
185185
fg := texttemplate.Must(texttemplate.New("flameGraphTemplate").Parse(flameGraphTemplate))
186186
buf := new(bytes.Buffer)
187187
err = fg.Execute(buf, flameGraphTemplateStruct{
188-
ID: fmt.Sprintf("%d%s", util.RandUint(10000), header),
188+
ID: fmt.Sprintf("%d%s", util.RandUint(10000), strings.Split(header, " ")[0]),
189189
Data: jsonStacks,
190190
Header: header,
191191
})
@@ -223,7 +223,14 @@ func callStackFrequencyTableHTMLRenderer(tableValues table.TableValues, targetNa
223223
}
224224
</style>
225225
`
226-
out += renderFlameGraph("Native", tableValues, "Native Stacks")
227-
out += renderFlameGraph("Java", tableValues, "Java Stacks")
226+
// get the perf event from the table values
227+
perfEventFieldIndex, err := table.GetFieldIndex("Perf Event", tableValues)
228+
if err != nil {
229+
slog.Error("didn't find expected field (Perf Event) in table", slog.String("error", err.Error()))
230+
return out
231+
}
232+
perfEvent := tableValues.Fields[perfEventFieldIndex].Values[0]
233+
out += renderFlameGraph(fmt.Sprintf("Native (%s)", perfEvent), tableValues, "Native Stacks")
234+
out += renderFlameGraph("Java (async-profiler)", tableValues, "Java Stacks")
228235
return out
229236
}

cmd/flamegraph/flamegraph_tables.go

Lines changed: 37 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -18,35 +18,36 @@ import (
1818

1919
// flamegraph table names
2020
const (
21-
CallStackFrequencyTableName = "Call Stack Frequency"
21+
FlameGraphTableName = "Flamegraph"
2222
)
2323

2424
// flamegraph tables
2525
var tableDefinitions = map[string]table.TableDefinition{
26-
CallStackFrequencyTableName: {
27-
Name: CallStackFrequencyTableName,
28-
MenuLabel: CallStackFrequencyTableName,
26+
FlameGraphTableName: {
27+
Name: FlameGraphTableName,
28+
MenuLabel: FlameGraphTableName,
2929
ScriptNames: []string{
30-
script.CollapsedCallStacksScriptName,
30+
script.FlameGraphScriptName,
3131
},
32-
FieldsFunc: callStackFrequencyTableValues},
32+
FieldsFunc: flameGraphTableValues},
3333
}
3434

35-
func callStackFrequencyTableValues(outputs map[string]script.ScriptOutput) []table.Field {
35+
func flameGraphTableValues(outputs map[string]script.ScriptOutput) []table.Field {
3636
fields := []table.Field{
3737
{Name: "Native Stacks", Values: []string{nativeFoldedFromOutput(outputs)}},
3838
{Name: "Java Stacks", Values: []string{javaFoldedFromOutput(outputs)}},
3939
{Name: "Maximum Render Depth", Values: []string{maxRenderDepthFromOutput(outputs)}},
40+
{Name: "Perf Event", Values: []string{perfEventFromOutput(outputs)}},
4041
}
4142
return fields
4243
}
4344

4445
func javaFoldedFromOutput(outputs map[string]script.ScriptOutput) string {
45-
if outputs[script.CollapsedCallStacksScriptName].Stdout == "" {
46+
if outputs[script.FlameGraphScriptName].Stdout == "" {
4647
slog.Warn("collapsed call stack output is empty")
4748
return ""
4849
}
49-
sections := extract.GetSectionsFromOutput(outputs[script.CollapsedCallStacksScriptName].Stdout)
50+
sections := extract.GetSectionsFromOutput(outputs[script.FlameGraphScriptName].Stdout)
5051
if len(sections) == 0 {
5152
slog.Warn("no sections in collapsed call stack output")
5253
return ""
@@ -84,11 +85,11 @@ func javaFoldedFromOutput(outputs map[string]script.ScriptOutput) string {
8485
}
8586

8687
func nativeFoldedFromOutput(outputs map[string]script.ScriptOutput) string {
87-
if outputs[script.CollapsedCallStacksScriptName].Stdout == "" {
88+
if outputs[script.FlameGraphScriptName].Stdout == "" {
8889
slog.Warn("collapsed call stack output is empty")
8990
return ""
9091
}
91-
sections := extract.GetSectionsFromOutput(outputs[script.CollapsedCallStacksScriptName].Stdout)
92+
sections := extract.GetSectionsFromOutput(outputs[script.FlameGraphScriptName].Stdout)
9293
if len(sections) == 0 {
9394
slog.Warn("no sections in collapsed call stack output")
9495
return ""
@@ -103,6 +104,11 @@ func nativeFoldedFromOutput(outputs map[string]script.ScriptOutput) string {
103104
}
104105
}
105106
if dwarfFolded == "" && fpFolded == "" {
107+
slog.Warn("no native folded stacks found")
108+
// "event syntax error: 'foo'" indicates that the perf event specified is invalid/unsupported
109+
if strings.Contains(outputs[script.FlameGraphScriptName].Stderr, "event syntax error") {
110+
slog.Error("unsupported perf event specified", slog.String("error", outputs[script.FlameGraphScriptName].Stderr))
111+
}
106112
return ""
107113
}
108114
folded, err := mergeSystemFolded(fpFolded, dwarfFolded)
@@ -113,11 +119,11 @@ func nativeFoldedFromOutput(outputs map[string]script.ScriptOutput) string {
113119
}
114120

115121
func maxRenderDepthFromOutput(outputs map[string]script.ScriptOutput) string {
116-
if outputs[script.CollapsedCallStacksScriptName].Stdout == "" {
122+
if outputs[script.FlameGraphScriptName].Stdout == "" {
117123
slog.Warn("collapsed call stack output is empty")
118124
return ""
119125
}
120-
sections := extract.GetSectionsFromOutput(outputs[script.CollapsedCallStacksScriptName].Stdout)
126+
sections := extract.GetSectionsFromOutput(outputs[script.FlameGraphScriptName].Stdout)
121127
if len(sections) == 0 {
122128
slog.Warn("no sections in collapsed call stack output")
123129
return ""
@@ -130,6 +136,24 @@ func maxRenderDepthFromOutput(outputs map[string]script.ScriptOutput) string {
130136
return ""
131137
}
132138

139+
func perfEventFromOutput(outputs map[string]script.ScriptOutput) string {
140+
if outputs[script.FlameGraphScriptName].Stdout == "" {
141+
slog.Warn("collapsed call stack output is empty")
142+
return ""
143+
}
144+
sections := extract.GetSectionsFromOutput(outputs[script.FlameGraphScriptName].Stdout)
145+
if len(sections) == 0 {
146+
slog.Warn("no sections in collapsed call stack output")
147+
return ""
148+
}
149+
for header, content := range sections {
150+
if header == "perf_event" {
151+
return strings.TrimSpace(content)
152+
}
153+
}
154+
return ""
155+
}
156+
133157
// ProcessStacks ...
134158
// [processName][callStack]=count
135159
type ProcessStacks map[string]Stacks

internal/script/scripts.go

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ const (
120120
GaudiTelemetryScriptName = "gaudi telemetry"
121121
PDUTelemetryScriptName = "pdu telemetry"
122122
// flamegraph scripts
123-
CollapsedCallStacksScriptName = "collapsed call stacks"
123+
FlameGraphScriptName = "flamegraph"
124124
// lock scripts
125125
ProfileKernelLockScriptName = "profile kernel lock"
126126
)
@@ -1445,13 +1445,14 @@ done
14451445
Superuser: false,
14461446
},
14471447
// flamegraph scripts
1448-
CollapsedCallStacksScriptName: {
1449-
Name: CollapsedCallStacksScriptName,
1448+
FlameGraphScriptName: {
1449+
Name: FlameGraphScriptName,
14501450
ScriptTemplate: `# Combined (perf record and async profiler) call stack collection
14511451
pids={{.PIDs}}
14521452
duration={{.Duration}}
14531453
frequency={{.Frequency}}
14541454
maxdepth={{.MaxDepth}}
1455+
perf_event={{.PerfEvent}}
14551456
14561457
ap_interval=0
14571458
if [ "$frequency" -ne 0 ]; then
@@ -1497,6 +1498,9 @@ print_results() {
14971498
echo "########## maximum depth ##########"
14981499
echo "$maxdepth"
14991500
1501+
echo "########## perf_event ##########"
1502+
echo "$perf_event"
1503+
15001504
if [ -f perf_dwarf_folded ]; then
15011505
echo "########## perf_dwarf ##########"
15021506
cat perf_dwarf_folded
@@ -1559,9 +1563,9 @@ fi
15591563
15601564
# Start profiling with perf in frame pointer mode
15611565
if [ -n "$pids" ]; then
1562-
perf record -F "$frequency" -p "$pids" -g -o perf_fp_data -m 129 &
1566+
perf record -e "$perf_event" -F "$frequency" -p "$pids" -g -o perf_fp_data -m 129 &
15631567
else
1564-
perf record -F "$frequency" -a -g -o perf_fp_data -m 129 &
1568+
perf record -e "$perf_event" -F "$frequency" -a -g -o perf_fp_data -m 129 &
15651569
fi
15661570
perf_fp_pid=$!
15671571
if ! kill -0 $perf_fp_pid 2>/dev/null; then
@@ -1572,9 +1576,9 @@ fi
15721576
15731577
# Start profiling with perf in dwarf mode
15741578
if [ -n "$pids" ]; then
1575-
perf record -F "$frequency" -p "$pids" -g -o perf_dwarf_data -m 257 --call-graph dwarf,8192 &
1579+
perf record -e "$perf_event" -F "$frequency" -p "$pids" -g -o perf_dwarf_data -m 257 --call-graph dwarf,8192 &
15761580
else
1577-
perf record -F "$frequency" -a -g -o perf_dwarf_data -m 257 --call-graph dwarf,8192 &
1581+
perf record -e "$perf_event" -F "$frequency" -a -g -o perf_dwarf_data -m 257 --call-graph dwarf,8192 &
15781582
fi
15791583
perf_dwarf_pid=$!
15801584
if ! kill -0 $perf_dwarf_pid 2>/dev/null; then

tools/stackcollapse-perf/stackcollapse-perf.go

Lines changed: 21 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ func main() {
114114
input = os.Stdin
115115
}
116116

117-
err = ProcessStacks(input, os.Stdout, config)
117+
err = ProcessStacks(input, os.Stdout, os.Stderr, config)
118118
if err != nil {
119119
fmt.Fprintf(os.Stderr, "Error processing stacks: %s\n", err)
120120
os.Exit(1)
@@ -133,12 +133,14 @@ var (
133133

134134
// ProcessStacks processes stack traces from the input reader and writes the collapsed stacks to the output writer.
135135
// It uses the provided configuration to control the processing behavior.
136-
func ProcessStacks(input io.Reader, output io.Writer, config Config) error {
136+
func ProcessStacks(input io.Reader, output io.Writer, errorOutput io.Writer, config Config) error {
137137
var stack []string
138138
var processName string
139139
var period int
140140
aggregator := NewStackAggregator()
141141
scanner := bufio.NewScanner(input)
142+
eventFilter := config.EventFilter // if not set, it will be set to the first event encountered
143+
skipStackLines := false // whether to skip stack lines based on event filtering
142144

143145
// main loop, read lines from stdin
144146
for scanner.Scan() {
@@ -165,25 +167,35 @@ func ProcessStacks(input io.Reader, output io.Writer, config Config) error {
165167
}
166168
// check for event record
167169
if eventLineRegex.MatchString(line) {
170+
skipStackLines = false
168171
var err error
169-
processName, period, err = handleEventRecord(line, config)
172+
var event string
173+
processName, period, event, err = handleEventRecord(line, config)
170174
if err != nil {
171-
fmt.Fprintf(output, "Error: %s\n", err)
175+
fmt.Fprintf(errorOutput, "Error: %s\n", err)
176+
skipStackLines = true
177+
continue
178+
}
179+
if eventFilter == "" {
180+
eventFilter = event // default to first event
181+
} else if event != eventFilter {
182+
fmt.Fprintf(errorOutput, "Skipping event %s, filtering for %s\n", event, eventFilter)
183+
skipStackLines = true // need to skip stack lines for this event
172184
}
173185
continue
174186
}
175187
// check for stack line
176-
if stackLineRegex.MatchString(line) {
188+
if stackLineRegex.MatchString(line) && !skipStackLines {
177189
err := handleStackLine(line, &stack, processName, config)
178190
if err != nil {
179-
fmt.Fprintf(output, "Error: %s\n", err)
191+
fmt.Fprintf(errorOutput, "Error: %s\n", err)
180192
}
181193
continue
182194
}
183195
}
184196
// Check for errors during scanning
185197
if err := scanner.Err(); err != nil {
186-
fmt.Fprintf(os.Stderr, "Error reading input: %s\n", err)
198+
fmt.Fprintf(errorOutput, "Error reading input: %s\n", err)
187199
return err
188200
}
189201
// Output results
@@ -199,7 +211,7 @@ func ProcessStacks(input io.Reader, output io.Writer, config Config) error {
199211
}
200212

201213
// handleEventRecord parses an event record line and updates the process name and period based on the configuration.
202-
func handleEventRecord(line string, config Config) (processName string, period int, err error) {
214+
func handleEventRecord(line string, config Config) (processName string, period int, event string, err error) {
203215
matches := eventLineRegex.FindStringSubmatch(line)
204216
if matches == nil {
205217
return
@@ -224,14 +236,7 @@ func handleEventRecord(line string, config Config) (processName string, period i
224236
}
225237
period = eventPeriodInt
226238
}
227-
event := eventMatches[2]
228-
229-
if config.EventFilter == "" {
230-
config.EventFilter = event
231-
} else if event != config.EventFilter {
232-
err = fmt.Errorf("event type mismatch: %s != %s", event, config.EventFilter)
233-
return
234-
}
239+
event = eventMatches[2]
235240
}
236241

237242
if config.IncludeTid {

0 commit comments

Comments
 (0)