Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ add_subdirectory(contrib/librseq-cmake)
add_subdirectory(contrib/libbacktrace-cmake)
add_subdirectory(contrib/googletest-cmake)
add_subdirectory(contrib/benchmark-cmake)
add_subdirectory(contrib/systemtap-sdt)

option(BUILD_POCO "Build Poco library (required by http-perf)" OFF)
if(BUILD_POCO)
Expand Down
231 changes: 144 additions & 87 deletions bb
Original file line number Diff line number Diff line change
Expand Up @@ -391,26 +391,30 @@ def _render_flamegraph(folded_file: str, out_svg: str, title: str) -> None:
)


def _run_flamegraph(preset: str, name: str, client_args: list[str]) -> None:
def _run_profiler(
preset: str,
name: str,
client_args: list[str],
profiler_args: list[str],
out_suffix: str,
) -> str:
cmd_build(preset, ["profiler"])

profiler_bin = os.path.join(ROOT, f"build/{preset}/bin/profiler")
folded_stacks = os.path.join(ROOT, f"build/{preset}/{name}.flamegraph.folded")
out_svg = os.path.join(ROOT, f"build/{preset}/{name}.flamegraph.svg")
out_file = os.path.join(ROOT, f"build/{preset}/{name}.{out_suffix}")
verbose_flag = ["--verbose"] if log.isEnabledFor(logging.DEBUG) else []

log.info("profiling %s -> %s", name, out_svg)
log.info("profiling %s -> %s", name, out_file)

client = start_process(*client_args, stdout=subprocess.DEVNULL)

try:
with open(folded_stacks, "w") as f:
with open(out_file, "w") as f:
profiler = start_process(
profiler_bin,
"--pid",
str(client.pid),
"--off-cpu",
"--kernel-stacks",
*profiler_args,
*verbose_flag,
stdout=f,
)
Expand All @@ -427,12 +431,35 @@ def _run_flamegraph(preset: str, name: str, client_args: list[str]) -> None:
client.kill()
raise

return out_file


def _run_flamegraph(preset: str, name: str, client_args: list[str]) -> None:
folded_stacks = _run_profiler(
preset,
name,
client_args,
["--on-cpu", "--off-cpu", "--kernel-stacks"],
"flamegraph.folded",
)

out_svg = os.path.join(ROOT, f"build/{preset}/{name}.flamegraph.svg")
_render_flamegraph(folded_stacks, out_svg, f"{name} on-CPU + off-CPU")

log.info("folded stacks: %s", folded_stacks)
log.info("flamegraph: %s", out_svg)


def _run_latency(preset: str, name: str, client_args: list[str]) -> None:
latency_report = _run_profiler(preset, name, client_args, ["--usdt"], "latency.txt")

# Echo the latency table to the user in addition to leaving it on disk.
with open(latency_report) as f:
sys.stdout.write(f.read())

log.info("latency report: %s", latency_report)


def _print_counters(data: dict[str, Any]) -> None:
counters = data.get("counters", {})
if not counters:
Expand Down Expand Up @@ -504,6 +531,7 @@ class NetPerfParams:
connections: list[int] = field(default_factory=lambda: [1000])
delay: str = "0"
flamegraph: bool = False
latency: bool = False
print_counters: bool = False
timeout: int = 180

Expand Down Expand Up @@ -553,31 +581,31 @@ def _cmd_net_perf_impl(preset: str, params: NetPerfParams, binary: str) -> None:
)

try:
if params.flamegraph:
_run_flamegraph(
preset,
binary,
[
"taskset",
"-c",
client_cpus,
net_perf,
"client",
"--host",
params.host,
"--port",
str(params.port),
"--connections",
str(params.connections[0]),
"--msg-size",
str(params.msg_size),
"--duration",
str(params.duration),
"--warmup",
str(params.warmup),
*verbose_flag,
],
)
if params.flamegraph or params.latency:
client_cmd = [
"taskset",
"-c",
client_cpus,
net_perf,
"client",
"--host",
params.host,
"--port",
str(params.port),
"--connections",
str(params.connections[0]),
"--msg-size",
str(params.msg_size),
"--duration",
str(params.duration),
"--warmup",
str(params.warmup),
*verbose_flag,
]
if params.flamegraph:
_run_flamegraph(preset, binary, client_cmd)
else:
_run_latency(preset, binary, client_cmd)
else:
print(_perf_row(_NP_HEADERS, _NP_WIDTH))
print(_perf_sep(_NP_WIDTH))
Expand Down Expand Up @@ -645,6 +673,7 @@ class FilePerfParams:
iodepth: list[int] = field(default_factory=lambda: [16])
rw: list[str] = field(default_factory=lambda: ["randread"])
flamegraph: bool = False
latency: bool = False
print_counters: bool = False
timeout: int = 180

Expand Down Expand Up @@ -683,32 +712,32 @@ def cmd_file_perf(preset: str, params: FilePerfParams) -> None:
verbose_flag = ["--verbose"] if log.isEnabledFor(logging.DEBUG) else []

try:
if params.flamegraph:
if params.flamegraph or params.latency:
jobs, depth, mode = configs[0]
_run_flamegraph(
preset,
"file-perf",
[
file_perf,
"--numjobs",
str(jobs),
"--iodepth",
str(depth),
"--bs",
params.bs,
"--rw",
mode,
"--size",
params.size,
"--runtime",
str(params.duration),
"--warmup",
str(params.warmup),
"--filename",
params.file,
*verbose_flag,
],
)
client_cmd = [
file_perf,
"--numjobs",
str(jobs),
"--iodepth",
str(depth),
"--bs",
params.bs,
"--rw",
mode,
"--size",
params.size,
"--runtime",
str(params.duration),
"--warmup",
str(params.warmup),
"--filename",
params.file,
*verbose_flag,
]
if params.flamegraph:
_run_flamegraph(preset, "file-perf", client_cmd)
else:
_run_latency(preset, "file-perf", client_cmd)
else:
print(_perf_row(_FP_HEADERS, _FP_WIDTHS))
print(_perf_sep(_FP_WIDTHS))
Expand Down Expand Up @@ -960,6 +989,7 @@ class HttpPerfParams:
delay: str = "0"
threads: bool = False
flamegraph: bool = False
latency: bool = False
print_counters: bool = False
timeout: int = 180

Expand Down Expand Up @@ -1046,30 +1076,31 @@ def cmd_http_perf(preset: str, params: HttpPerfParams) -> None:
verbose_flag = ["--verbose"] if log.isEnabledFor(logging.DEBUG) else []

try:
if params.flamegraph:
_run_flamegraph(
preset,
"http-perf-" + mode,
[
"taskset",
"-c",
client_cpus,
http_perf,
"client",
"--host",
params.host,
"--port",
str(params.port),
"--connections",
str(params.connections[0]),
"--duration",
str(params.duration),
"--warmup",
str(params.warmup),
*threads_flag,
*verbose_flag,
],
)
if params.flamegraph or params.latency:
client_cmd = [
"taskset",
"-c",
client_cpus,
http_perf,
"client",
"--host",
params.host,
"--port",
str(params.port),
"--connections",
str(params.connections[0]),
"--duration",
str(params.duration),
"--warmup",
str(params.warmup),
*threads_flag,
*verbose_flag,
]
tag = "http-perf-" + mode
if params.flamegraph:
_run_flamegraph(preset, tag, client_cmd)
else:
_run_latency(preset, tag, client_cmd)
else:
print(_perf_row(_HP_HEADERS, _HP_WIDTHS))
print(_perf_sep(_HP_WIDTHS))
Expand Down Expand Up @@ -1159,6 +1190,7 @@ class S3PerfParams:
rw: list[str] = field(default_factory=lambda: ["read"])
threads: bool = False
flamegraph: bool = False
latency: bool = False
data_dir: str = "/dev/shm/minio-data"
print_counters: bool = False
timeout: int = 180
Expand Down Expand Up @@ -1303,13 +1335,14 @@ def cmd_s3_perf(preset: str, params: S3PerfParams) -> None:

executor = "threads" if params.threads else "fibers"

if params.flamegraph:
if params.flamegraph or params.latency:
jobs, depth, mode = configs[0]
_run_flamegraph(
preset,
f"s3-perf-{mode}-{executor}",
["taskset", "-c", client_cpus] + make_cmd(jobs, depth, mode),
)
client_cmd = ["taskset", "-c", client_cpus] + make_cmd(jobs, depth, mode)
tag = f"s3-perf-{mode}-{executor}"
if params.flamegraph:
_run_flamegraph(preset, tag, client_cmd)
else:
_run_latency(preset, tag, client_cmd)
else:
print(_perf_row(_S3P_HEADERS, _S3P_WIDTHS))
print(_perf_sep(_S3P_WIDTHS))
Expand Down Expand Up @@ -1552,6 +1585,12 @@ def _build_parser() -> argparse.ArgumentParser:
action="store_true",
help="profile process and generate flamegraph SVG",
)
file_perf_parser.add_argument(
"--latency",
dest="file_latency",
action="store_true",
help="profile process with USDT probes and print fiber latency breakdown",
)
file_perf_parser.add_argument(
"--print-counters",
dest="file_print_counters",
Expand Down Expand Up @@ -1681,6 +1720,12 @@ def _build_parser() -> argparse.ArgumentParser:
action="store_true",
help="profile client and generate flamegraph SVG",
)
parser.add_argument(
"--latency",
dest="net_latency",
action="store_true",
help="profile client with USDT probes and print fiber latency breakdown",
)
parser.add_argument(
"--timeout",
dest="net_timeout",
Expand Down Expand Up @@ -1797,6 +1842,12 @@ def _build_parser() -> argparse.ArgumentParser:
action="store_true",
help="profile client and generate flamegraph SVG",
)
http_perf_parser.add_argument(
"--latency",
dest="http_latency",
action="store_true",
help="profile client with USDT probes and print fiber latency breakdown",
)
http_perf_parser.add_argument(
"--print-counters",
dest="http_print_counters",
Expand Down Expand Up @@ -1880,6 +1931,12 @@ def _build_parser() -> argparse.ArgumentParser:
action="store_true",
help="profile first config and generate flamegraph SVG",
)
s3_perf_parser.add_argument(
"--latency",
dest="s3_latency",
action="store_true",
help="profile first config with USDT probes and print fiber latency breakdown",
)
s3_perf_parser.add_argument(
"--data-dir",
dest="s3_data_dir",
Expand Down
12 changes: 12 additions & 0 deletions contrib/systemtap-sdt/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Vendored copy of systemtap's <sys/sdt.h> + <sys/sdt-config.h>, the two
# header files needed to emit USDT (Statically Defined Tracepoint) probes.
# Both files are dedicated to the public domain (CC0) by upstream specifically
# so user programs incorporating them are not GPL-tainted; no further attribution
# or LICENSE artifact is required.
#
# We vendor instead of relying on the systemtap-sdt-dev distro package so that
# the silk build has zero extra apt dependencies for USDT.

add_library(systemtap-sdt INTERFACE)
target_include_directories(systemtap-sdt INTERFACE ${CMAKE_CURRENT_SOURCE_DIR})
add_library(SystemTap::Sdt ALIAS systemtap-sdt)
6 changes: 6 additions & 0 deletions contrib/systemtap-sdt/sys/sdt-config.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
/* includes/sys/sdt-config.h. Generated from sdt-config.h.in by configure.

This file just defines _SDT_ASM_SECTION_AUTOGROUP_SUPPORT to 0 or 1 to
indicate whether the assembler supports "?" in .pushsection directives. */

#define _SDT_ASM_SECTION_AUTOGROUP_SUPPORT 1
Loading
Loading