From c6572fa1ea9cb55341adff492b8ff4495ec5dee4 Mon Sep 17 00:00:00 2001 From: thesues Date: Thu, 12 Jun 2025 20:34:00 +0000 Subject: [PATCH] add new rdma batch APIs --- Dockerfile.build | 4 +- README.md | 17 +- infinistore/__init__.py | 2 + infinistore/example/client_async_single2.py | 69 + infinistore/lib.py | 198 +- infinistore/test_infinistore.py | 223 +- src/allocate_response.fbs | 11 - src/backward.hpp | 4465 +++++++++++++++++++ src/ibv_helper.cpp | 2 +- src/infinistore.cpp | 275 +- src/libinfinistore.cpp | 271 +- src/libinfinistore.h | 57 +- src/mempool.cpp | 10 +- src/meson.build | 17 +- src/meta_request.fbs | 5 +- src/protocol.h | 2 +- src/pybind.cpp | 41 +- src/utils.cpp | 55 +- src/utils.h | 11 + 19 files changed, 5433 insertions(+), 302 deletions(-) create mode 100644 infinistore/example/client_async_single2.py delete mode 100644 src/allocate_response.fbs create mode 100644 src/backward.hpp diff --git a/Dockerfile.build b/Dockerfile.build index 990dcee..80c56c4 100644 --- a/Dockerfile.build +++ b/Dockerfile.build @@ -1,6 +1,6 @@ FROM quay.io/pypa/manylinux_2_28_x86_64 -RUN yum -y install rdma-core-devel libuv-devel +RUN yum -y install rdma-core-devel libuv-devel elfutils-devel RUN dnf clean all RUN dnf makecache @@ -42,7 +42,7 @@ ENV PATH=/usr/local/flatbuffers/bin:$PATH RUN rm -rf /tmp/flatbuffers # Install boost -RUN dnf install -y boost boost-devel +RUN dnf install -y boost boost-devel elfutils-devel # The above get the build environment ready! WORKDIR /app diff --git a/README.md b/README.md index 3a0849f..8c808e1 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,12 @@ Integration with SGLang and other inference engines are in progress. ## Install from PIP +RDMA library + +``` +apt install ibverbs-utils libibverbs-dev +``` + Most users just need to deploy and run InfiniStore, and they don't need to understand how InfiniStore works internally. For these users, PIP is the recommended way to install: ``` @@ -42,21 +48,12 @@ apt install libuv1-dev apt install libflatbuffers-dev apt install libspdlog-dev libfmt-dev apt install ibverbs-utils libibverbs-dev -apt install libboost-dev libboost-stacktrace-dev +apt install libboost-dev libdw-dev pip install --no-build-isolation -e . pip install pre-commit pre-commit install ``` -## Verify Your Installation - -After installation, either from PIP or from source code, run the following command to verify your installation is successful: - -``` -infinistore --manage-port 8088 -curl http://127.0.0.1:8088/selftest -``` - # Run InfiniStore ## Run As a Standalone Service diff --git a/infinistore/__init__.py b/infinistore/__init__.py index 46bcd6c..01e7bcd 100644 --- a/infinistore/__init__.py +++ b/infinistore/__init__.py @@ -13,6 +13,7 @@ InfiniStoreException, InfiniStoreKeyNotFound, evict_cache, + Desc, ) __all__ = [ @@ -30,4 +31,5 @@ "InfiniStoreException", "InfiniStoreKeyNotFound", "evict_cache", + "Desc", ] diff --git a/infinistore/example/client_async_single2.py b/infinistore/example/client_async_single2.py new file mode 100644 index 0000000..0df882c --- /dev/null +++ b/infinistore/example/client_async_single2.py @@ -0,0 +1,69 @@ +import infinistore +import uuid +import asyncio +import ctypes +from infinistore import Desc + + +def generate_uuid(): + return str(uuid.uuid4()) + + +config = infinistore.ClientConfig( + host_addr="127.0.0.1", + service_port=12345, + log_level="info", + connection_type=infinistore.TYPE_RDMA, + ib_port=1, + link_type=infinistore.LINK_ETHERNET, + dev_name="mlx5_0", +) + + +def get_ptr(mv: memoryview): + return ctypes.addressof(ctypes.c_char.from_buffer(mv)) + + +async def main(): + rdma_conn = infinistore.InfinityConnection(config) + + # FIXME: This is a blocking call, should be async + await rdma_conn.connect_async() + + key = generate_uuid() + + size = 128 * 1024 + src = bytearray(size) + dst = memoryview(bytearray(size)) + + def register_mr(): + rdma_conn.register_mr(get_ptr(src), len(src)) + rdma_conn.register_mr(get_ptr(dst), len(dst)) + + await asyncio.to_thread(register_mr) + + # set src + for i in range(size): + src[i] = i % 256 + + is_exist = await asyncio.to_thread(rdma_conn.check_exist, key) + assert not is_exist + + await rdma_conn.rdma_write_cache_async2( + [Desc(key, get_ptr(src), size), Desc(key + "_1", get_ptr(src), size)] + ) + ret = await rdma_conn.rdma_read_cache_async2( + [ + (key, get_ptr(dst), size), + (key + "_not_exist", get_ptr(dst), size), + (key + "_not_exist1", get_ptr(dst), size), + ] + ) + print(f"Read result: {ret}") + assert ret == [0, -1, -1] + + assert src == dst + rdma_conn.close() + + +asyncio.run(main()) diff --git a/infinistore/lib.py b/infinistore/lib.py index f5e5bda..c9a656a 100644 --- a/infinistore/lib.py +++ b/infinistore/lib.py @@ -8,6 +8,8 @@ from functools import singledispatchmethod from typing import Optional, Union, List, Tuple import socket +from dataclasses import dataclass + os.environ["OPENBLAS_NUM_THREADS"] = "1" os.environ["MKL_NUM_THREADS"] = "1" @@ -24,7 +26,22 @@ LINK_IB = "IB" -# Define exceptions which can be caught by the client such as KeyNotFound +@dataclass +class Desc: + key: str + addr: int + size: int + + def __iter__(self): + return iter((self.key, self.addr, self.size)) + + def __post_init__(self): + if self.key == "": + raise Exception("key is empty") + if self.addr == 0: + raise Exception("addr is 0") + if self.size <= 0: + raise Exception("size <= 0") class InfiniStoreException(Exception): @@ -422,6 +439,61 @@ def tcp_write_cache(self, key: str, ptr: int, size: int, **kwargs): if ret < 0: raise Exception(f"Failed to write to infinistore, ret = {ret}") + async def rdma_write_cache_async2(self, blocks: List[Desc]): + """ + Asynchronously writes a list of blocks to the RDMA cache. + + This method sends the provided blocks to the infinistore using RDMA in an asynchronous manner. + It acquires a semaphore to limit concurrent operations, and uses a callback to signal completion. + If the RDMA connection is not established, an exception is raised. + + Args: + blocks (List[Desc]): A list of tuples, where each tuple contains (key, address, size) describing a block to write. + + Raises: + Exception: If the RDMA connection is not established. + Exception: If the write operation fails (either synchronously or asynchronously). + + Returns: + int: The return code from the RDMA write operation (typically 200 for success). + + """ + if not self.rdma_connected: + raise Exception("this function is only valid for connected rdma") + + if len(blocks) > 8 * 32: + raise Exception( + f"too many blocks, max 256 blocks per write, got {len(blocks)}" + ) + + await self.semaphore.acquire() + loop = asyncio.get_running_loop() + future = loop.create_future() + + keys, addrs, sizes = zip(*blocks) + + def _callback(code): + if code != 200: + loop.call_soon_threadsafe( + future.set_exception, + Exception(f"Failed to write to infinistore, ret = {code}"), + ) + else: + loop.call_soon_threadsafe(future.set_result, code) + + ret = self.conn.w_rdma_async2( + keys, + addrs, + sizes, + _callback, + ) + if ret < 0: + raise Exception(f"Failed to write to infinistore, ret = {ret}") + try: + return await future + finally: + self.semaphore.release() + async def rdma_write_cache_async( self, blocks: List[Tuple[str, int]], block_size: int, ptr: int ): @@ -453,6 +525,11 @@ async def rdma_write_cache_async( if not self.rdma_connected: raise Exception("this function is only valid for connected rdma") + if len(blocks) > 8 * 32: + raise Exception( + f"too many blocks, max 256 blocks per write, got {len(blocks)}" + ) + await self.semaphore.acquire() loop = asyncio.get_running_loop() future = loop.create_future() @@ -467,7 +544,6 @@ def _callback(code): ) else: loop.call_soon_threadsafe(future.set_result, code) - self.semaphore.release() ret = self.conn.w_rdma_async( keys, @@ -478,10 +554,94 @@ def _callback(code): ) if ret < 0: raise Exception(f"Failed to write to infinistore, ret = {ret}") - return await future + try: + return await future + finally: + self.semaphore.release() + + @staticmethod + def is_bit_set(n: int, bit: int) -> bool: + """ + Check if a specific bit is set in an integer. + + Args: + n (int): The integer to check. + bit (int): The bit position to check (0-indexed). + + Returns: + bool: True if the bit is set, False otherwise. + """ + return (n >> bit) & 1 == 1 + + async def rdma_read_cache_async2(self, blocks: List[Desc]): + """ + Asynchronously reads a batch of blocks from the RDMA cache. + This method initiates an asynchronous RDMA read operation for the provided list of blocks. + It ensures that the RDMA connection is established and manages concurrency using a semaphore. + The method returns when the RDMA read operation completes, or raises an exception if the operation fails. + Args: + blocks (List[Desc]): A list of block descriptors, where each descriptor is a tuple containing + (key, address, size) for the block to be read. + Raises: + Exception: If the RDMA connection is not established. + InfiniStoreKeyNotFound: If some keys are not found in the store (error code 404). + Exception: If the RDMA read operation fails with an error code other than 200. + Returns: + int: The result code (typically 200 for success) from the RDMA read operation. + Note: + This function should only be called when the RDMA connection is active. + It uses a semaphore to limit concurrent RDMA operations. + """ + + if not self.rdma_connected: + raise Exception("this function is only valid for connected rdma") + + if len(blocks) > 8 * 32: + raise Exception( + f"too many blocks, max 256 blocks per read, got {len(blocks)}" + ) + + await self.semaphore.acquire() + loop = asyncio.get_running_loop() + future = loop.create_future() + keys, addrs, sizes = zip(*blocks) + + def _callback(code, payload): + if code == 404: + loop.call_soon_threadsafe( + future.set_exception, InfiniStoreKeyNotFound("all keys not found") + ) + elif code == 206: + # payload is unsigned int[8] + ret = [0] * len(blocks) + for i, _ in enumerate(blocks): + # iter all bit to convert to array + if self.is_bit_set(payload[i // 8], i % 32): + ret[i] = -1 + loop.call_soon_threadsafe(future.set_result, ret) + elif code == 200: + loop.call_soon_threadsafe(future.set_result, code) + else: + loop.call_soon_threadsafe( + future.set_exception, + Exception(f"Failed to read to infinistore, ret = {code}"), + ) + + ret = self.conn.r_rdma_async2( + keys, + addrs, + sizes, + _callback, + ) + if ret < 0: + raise Exception(f"Failed to read to infinistore, ret = {ret}") + try: + return await future + finally: + self.semaphore.release() async def rdma_read_cache_async( - self, blocks: List[Tuple[str, int]], block_size: int, ptr: int + self, blocks: List[Tuple[str, int, int]], block_size: int, ptr: int ): """ Asynchronously reads data from the RDMA cache. @@ -509,25 +669,36 @@ async def rdma_read_cache_async( """ if not self.rdma_connected: raise Exception("this function is only valid for connected rdma") - pass + + if len(blocks) > 8 * 32: + raise Exception( + f"too many blocks, max 256 blocks per read, got {len(blocks)}" + ) await self.semaphore.acquire() loop = asyncio.get_running_loop() future = loop.create_future() - def _callback(code): + def _callback(code, payload): if code == 404: loop.call_soon_threadsafe( - future.set_exception, InfiniStoreKeyNotFound("some keys not found") + future.set_exception, InfiniStoreKeyNotFound("all keys not found") ) - elif code != 200: + elif code == 206: + # payload is unsigned int[8] + ret = [0] * len(blocks) + for i, _ in enumerate(blocks): + # iter all bit to convert to array + if self.is_bit_set(payload[i // 8], i % 32): + ret[i] = -1 + loop.call_soon_threadsafe(future.set_result, ret) + elif code == 200: + loop.call_soon_threadsafe(future.set_result, code) + else: loop.call_soon_threadsafe( future.set_exception, Exception(f"Failed to read to infinistore, ret = {code}"), ) - else: - loop.call_soon_threadsafe(future.set_result, code) - self.semaphore.release() keys, offsets = zip(*blocks) ret = self.conn.r_rdma_async( @@ -539,7 +710,10 @@ def _callback(code): ) if ret < 0: raise Exception(f"Failed to read to infinistore, ret = {ret}") - return await future + try: + return await future + finally: + self.semaphore.release() def check_exist(self, key: str): """ diff --git a/infinistore/test_infinistore.py b/infinistore/test_infinistore.py index 368f735..8cd5ffa 100644 --- a/infinistore/test_infinistore.py +++ b/infinistore/test_infinistore.py @@ -153,7 +153,7 @@ def test_batch_read_write_cache(server, separated_gpu): host_addr="127.0.0.1", service_port=92345, link_type=infinistore.LINK_ETHERNET, - dev_name="RDMA_DEV[0]", + dev_name=f"{RDMA_DEV[0]}", ) config.connection_type = infinistore.TYPE_RDMA @@ -273,7 +273,7 @@ def test_key_check(server): host_addr="127.0.0.1", service_port=92345, link_type=infinistore.LINK_ETHERNET, - dev_name="RDMA_DEV[0]", + dev_name=f"{RDMA_DEV[0]}", connection_type=infinistore.TYPE_RDMA, ) conn = infinistore.InfinityConnection(config) @@ -293,7 +293,7 @@ def test_get_match_last_index(server): host_addr="127.0.0.1", service_port=92345, link_type=infinistore.LINK_ETHERNET, - dev_name="RDMA_DEV[0]", + dev_name=f"{RDMA_DEV[0]}", connection_type=infinistore.TYPE_RDMA, ) conn = infinistore.InfinityConnection(config) @@ -311,6 +311,46 @@ def test_get_match_last_index(server): conn.close() +def test_partial_read_write(server): + config = infinistore.ClientConfig( + host_addr="127.0.0.1", + service_port=92345, + link_type=infinistore.LINK_ETHERNET, + dev_name=f"{RDMA_DEV[0]}", + connection_type=infinistore.TYPE_RDMA, + ) + conn = infinistore.InfinityConnection(config) + conn.connect() + + src = torch.randn(4096, device="cuda", dtype=torch.float32) + torch.cuda.synchronize(src.device) + + conn.register_mr(src.data_ptr(), src.numel() * src.element_size()) + + element_size = torch._utils._element_size(torch.float32) + + async def write_and_read(): + # Write only the first 1024 elements + await conn.rdma_write_cache_async2( + [infinistore.Desc("key1", src.data_ptr(), 1024 * element_size)] + ) + ret = await conn.rdma_read_cache_async2( + [ + infinistore.Desc("key1", src.data_ptr(), 1024 * element_size), + infinistore.Desc( + "key_unknown", + src.data_ptr() + 1024 * element_size, + 1024 * element_size, + ), + ] + ) + assert ret == [0, -1] + + asyncio.run(write_and_read()) + + conn.close() + + def test_key_not_found(server): config = infinistore.ClientConfig( host_addr="127.0.0.1", @@ -569,3 +609,180 @@ def test_overwrite_tcp(server): assert len(dst) == len(src) finally: conn.close() + + +@pytest.mark.parametrize("dtype", [torch.float16, torch.float32]) +def test_basic_read_write_cache_async2(server, dtype): + config = infinistore.ClientConfig( + host_addr="127.0.0.1", + service_port=92345, + link_type=infinistore.LINK_ETHERNET, + dev_name=f"{RDMA_DEV[0]}", + ) + + config.connection_type = infinistore.TYPE_RDMA + + conn = infinistore.InfinityConnection(config) + conn.connect() + + # key is random string + key = generate_random_string(10) + src = [i for i in range(4096)] + + src_tensor = torch.tensor(src, device="cuda:0", dtype=dtype) + + torch.cuda.synchronize(src_tensor.device) + + conn.register_mr( + src_tensor.data_ptr(), src_tensor.numel() * src_tensor.element_size() + ) + element_size = torch._utils._element_size(dtype) + + async def run_write(): + desc = infinistore.Desc(key, src_tensor.data_ptr(), len(src) * element_size) + await conn.rdma_write_cache_async2([desc]) + + asyncio.run(run_write()) + conn.close() + + conn = infinistore.InfinityConnection(config) + conn.connect() + + dst = torch.zeros(4096, device="cuda:0", dtype=dtype) + + conn.register_mr(dst.data_ptr(), dst.numel() * dst.element_size()) + + async def run_read(): + desc = infinistore.Desc(key, dst.data_ptr(), len(dst) * element_size) + await conn.rdma_read_cache_async2([desc]) + + asyncio.run(run_read()) + assert torch.equal(src_tensor, dst) + conn.close() + + +@pytest.mark.parametrize("separated_gpu", [False, True]) +def test_batch_read_write_cache_async2(server, separated_gpu): + config = infinistore.ClientConfig( + host_addr="127.0.0.1", + service_port=92345, + link_type=infinistore.LINK_ETHERNET, + dev_name=f"{RDMA_DEV[0]}", + ) + + config.connection_type = infinistore.TYPE_RDMA + + # test if we have multiple GPUs + if separated_gpu: + if get_gpu_count() >= 2: + src_device = "cuda:0" + dst_device = "cuda:1" + else: + # skip if we don't have enough GPUs + return + else: + src_device = "cuda:0" + dst_device = "cuda:0" + + conn = infinistore.InfinityConnection(config) + conn.connect() + + num_of_blocks = 10 + block_size = 4096 + src = [i for i in range(num_of_blocks * block_size)] + + src_tensor = torch.tensor(src, device=src_device, dtype=torch.float32) + torch.cuda.synchronize(src_tensor.device) + + async def run(): + # write/read 3 times + for i in range(3): + keys = [generate_random_string(num_of_blocks) for i in range(10)] + conn.register_mr( + src_tensor.data_ptr(), src_tensor.numel() * src_tensor.element_size() + ) + + # Create write descriptors for async2 API + write_descs = [] + for j in range(num_of_blocks): + desc = infinistore.Desc( + keys[j], src_tensor.data_ptr() + j * block_size * 4, block_size * 4 + ) + write_descs.append(desc) + + await conn.rdma_write_cache_async2(write_descs) + + dst = torch.zeros( + num_of_blocks * block_size, device=dst_device, dtype=torch.float32 + ) + conn.register_mr(dst.data_ptr(), dst.numel() * dst.element_size()) + + # Create read descriptors for async2 API + read_descs = [] + for j in range(num_of_blocks): + desc = infinistore.Desc( + keys[j], dst.data_ptr() + j * block_size * 4, block_size * 4 + ) + read_descs.append(desc) + + await conn.rdma_read_cache_async2(read_descs) + assert torch.equal(src_tensor.cpu(), dst.cpu()) + + asyncio.run(run()) + conn.close() + + +def test_async2_api_with_mixed_sizes(server): + config = infinistore.ClientConfig( + host_addr="127.0.0.1", + service_port=92345, + link_type=infinistore.LINK_ETHERNET, + dev_name=f"{RDMA_DEV[0]}", + connection_type=infinistore.TYPE_RDMA, + ) + conn = infinistore.InfinityConnection(config) + + # use asyncio + async def run(): + await conn.connect_async() + + # Test with different sized blocks + keys = [generate_random_string(5) for _ in range(3)] + sizes = [1024, 2048, 4096] + + src_tensors = [] + dst_tensors = [] + write_descs = [] + read_descs = [] + + # Create tensors and descriptors for different sizes + for i, size in enumerate(sizes): + src = torch.randn(size, device="cuda", dtype=torch.float32) + dst = torch.zeros(size, device="cuda", dtype=torch.float32) + + src_tensors.append(src) + dst_tensors.append(dst) + + write_descs.append(infinistore.Desc(keys[i], src.data_ptr(), size * 4)) + read_descs.append(infinistore.Desc(keys[i], dst.data_ptr(), size * 4)) + + def register_mr(): + for src, dst in zip(src_tensors, dst_tensors): + conn.register_mr(src.data_ptr(), src.numel() * src.element_size()) + conn.register_mr(dst.data_ptr(), dst.numel() * dst.element_size()) + + await asyncio.to_thread(register_mr) + + # Write all blocks + await conn.rdma_write_cache_async2(write_descs) + + # Read all blocks + await conn.rdma_read_cache_async2(read_descs) + + # Verify data integrity + for src, dst in zip(src_tensors, dst_tensors): + assert torch.equal(src.cpu(), dst.cpu()) + + conn.close() + + asyncio.run(run()) diff --git a/src/allocate_response.fbs b/src/allocate_response.fbs deleted file mode 100644 index 2415f9b..0000000 --- a/src/allocate_response.fbs +++ /dev/null @@ -1,11 +0,0 @@ -//RDMA allocate response -struct RemoteBlock { - rkey: uint; - remote_addr: ulong; -} - -table RdmaAllocateResponse { - blocks: [RemoteBlock]; - error_code: uint; -} -root_type RdmaAllocateResponse; diff --git a/src/backward.hpp b/src/backward.hpp new file mode 100644 index 0000000..0035ffc --- /dev/null +++ b/src/backward.hpp @@ -0,0 +1,4465 @@ +/* + * backward.hpp + * Copyright 2013 Google Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef H_6B9572DA_A64B_49E6_B234_051480991C89 +#define H_6B9572DA_A64B_49E6_B234_051480991C89 + +#ifndef __cplusplus +#error "It's not going to compile without a C++ compiler..." +#endif + +#if defined(BACKWARD_CXX11) +#elif defined(BACKWARD_CXX98) +#else +#if __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1800) +#define BACKWARD_CXX11 +#define BACKWARD_ATLEAST_CXX11 +#define BACKWARD_ATLEAST_CXX98 +#if __cplusplus >= 201703L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201703L) +#define BACKWARD_ATLEAST_CXX17 +#endif +#else +#define BACKWARD_CXX98 +#define BACKWARD_ATLEAST_CXX98 +#endif +#endif + +// You can define one of the following (or leave it to the auto-detection): +// +// #define BACKWARD_SYSTEM_LINUX +// - specialization for linux +// +// #define BACKWARD_SYSTEM_DARWIN +// - specialization for Mac OS X 10.5 and later. +// +// #define BACKWARD_SYSTEM_WINDOWS +// - specialization for Windows (Clang 9 and MSVC2017) +// +// #define BACKWARD_SYSTEM_UNKNOWN +// - placebo implementation, does nothing. +// +#if defined(BACKWARD_SYSTEM_LINUX) +#elif defined(BACKWARD_SYSTEM_DARWIN) +#elif defined(BACKWARD_SYSTEM_UNKNOWN) +#elif defined(BACKWARD_SYSTEM_WINDOWS) +#else +#if defined(__linux) || defined(__linux__) +#define BACKWARD_SYSTEM_LINUX +#elif defined(__APPLE__) +#define BACKWARD_SYSTEM_DARWIN +#elif defined(_WIN32) +#define BACKWARD_SYSTEM_WINDOWS +#else +#define BACKWARD_SYSTEM_UNKNOWN +#endif +#endif + +#define NOINLINE __attribute__((noinline)) + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(BACKWARD_SYSTEM_LINUX) + +// On linux, backtrace can back-trace or "walk" the stack using the following +// libraries: +// +// #define BACKWARD_HAS_UNWIND 1 +// - unwind comes from libgcc, but I saw an equivalent inside clang itself. +// - with unwind, the stacktrace is as accurate as it can possibly be, since +// this is used by the C++ runtime in gcc/clang for stack unwinding on +// exception. +// - normally libgcc is already linked to your program by default. +// +// #define BACKWARD_HAS_LIBUNWIND 1 +// - libunwind provides, in some cases, a more accurate stacktrace as it knows +// to decode signal handler frames and lets us edit the context registers when +// unwinding, allowing stack traces over bad function references. +// +// #define BACKWARD_HAS_BACKTRACE == 1 +// - backtrace seems to be a little bit more portable than libunwind, but on +// linux, it uses unwind anyway, but abstract away a tiny information that is +// sadly really important in order to get perfectly accurate stack traces. +// - backtrace is part of the (e)glib library. +// +// The default is: +// #define BACKWARD_HAS_UNWIND == 1 +// +// Note that only one of the define should be set to 1 at a time. +// +#if BACKWARD_HAS_UNWIND == 1 +#elif BACKWARD_HAS_LIBUNWIND == 1 +#elif BACKWARD_HAS_BACKTRACE == 1 +#else +#undef BACKWARD_HAS_UNWIND +#define BACKWARD_HAS_UNWIND 1 +#undef BACKWARD_HAS_LIBUNWIND +#define BACKWARD_HAS_LIBUNWIND 0 +#undef BACKWARD_HAS_BACKTRACE +#define BACKWARD_HAS_BACKTRACE 0 +#endif + +// On linux, backward can extract detailed information about a stack trace +// using one of the following libraries: +// +// #define BACKWARD_HAS_DW 1 +// - libdw gives you the most juicy details out of your stack traces: +// - object filename +// - function name +// - source filename +// - line and column numbers +// - source code snippet (assuming the file is accessible) +// - variable names (if not optimized out) +// - variable values (not supported by backward-cpp) +// - You need to link with the lib "dw": +// - apt-get install libdw-dev +// - g++/clang++ -ldw ... +// +// #define BACKWARD_HAS_BFD 1 +// - With libbfd, you get a fair amount of details: +// - object filename +// - function name +// - source filename +// - line numbers +// - source code snippet (assuming the file is accessible) +// - You need to link with the lib "bfd": +// - apt-get install binutils-dev +// - g++/clang++ -lbfd ... +// +// #define BACKWARD_HAS_DWARF 1 +// - libdwarf gives you the most juicy details out of your stack traces: +// - object filename +// - function name +// - source filename +// - line and column numbers +// - source code snippet (assuming the file is accessible) +// - variable names (if not optimized out) +// - variable values (not supported by backward-cpp) +// - You need to link with the lib "dwarf": +// - apt-get install libdwarf-dev +// - g++/clang++ -ldwarf ... +// +// #define BACKWARD_HAS_BACKTRACE_SYMBOL 1 +// - backtrace provides minimal details for a stack trace: +// - object filename +// - function name +// - backtrace is part of the (e)glib library. +// +// The default is: +// #define BACKWARD_HAS_BACKTRACE_SYMBOL == 1 +// +// Note that only one of the define should be set to 1 at a time. +// +#if BACKWARD_HAS_DW == 1 +#elif BACKWARD_HAS_BFD == 1 +#elif BACKWARD_HAS_DWARF == 1 +#elif BACKWARD_HAS_BACKTRACE_SYMBOL == 1 +#else +#undef BACKWARD_HAS_DW +#define BACKWARD_HAS_DW 0 +#undef BACKWARD_HAS_BFD +#define BACKWARD_HAS_BFD 0 +#undef BACKWARD_HAS_DWARF +#define BACKWARD_HAS_DWARF 0 +#undef BACKWARD_HAS_BACKTRACE_SYMBOL +#define BACKWARD_HAS_BACKTRACE_SYMBOL 1 +#endif + +#include +#include +#ifdef __ANDROID__ +// Old Android API levels define _Unwind_Ptr in both link.h and +// unwind.h Rename the one in link.h as we are not going to be using +// it +#define _Unwind_Ptr _Unwind_Ptr_Custom +#include +#undef _Unwind_Ptr +#else +#include +#endif +#if defined(__ppc__) || defined(__powerpc) || defined(__powerpc__) || defined(__POWERPC__) +// Linux kernel header required for the struct pt_regs definition +// to access the NIP (Next Instruction Pointer) register value +#include +#endif +#include +#include +#include +#include +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#include +#undef _GNU_SOURCE +#else +#include +#endif + +#if BACKWARD_HAS_BFD == 1 +// NOTE: defining PACKAGE{,_VERSION} is required before including +// bfd.h on some platforms, see also: +// https://sourceware.org/bugzilla/show_bug.cgi?id=14243 +#ifndef PACKAGE +#define PACKAGE +#endif +#ifndef PACKAGE_VERSION +#define PACKAGE_VERSION +#endif +#include +#endif + +#if BACKWARD_HAS_DW == 1 +#include +#include +#include +#endif + +#if BACKWARD_HAS_DWARF == 1 +#include +#include +#include + +#include +#include +#endif + +#if (BACKWARD_HAS_BACKTRACE == 1) || (BACKWARD_HAS_BACKTRACE_SYMBOL == 1) +// then we shall rely on backtrace +#include +#endif + +#endif // defined(BACKWARD_SYSTEM_LINUX) + +#if defined(BACKWARD_SYSTEM_DARWIN) +// On Darwin, backtrace can back-trace or "walk" the stack using the following +// libraries: +// +// #define BACKWARD_HAS_UNWIND 1 +// - unwind comes from libgcc, but I saw an equivalent inside clang itself. +// - with unwind, the stacktrace is as accurate as it can possibly be, since +// this is used by the C++ runtime in gcc/clang for stack unwinding on +// exception. +// - normally libgcc is already linked to your program by default. +// +// #define BACKWARD_HAS_LIBUNWIND 1 +// - libunwind comes from clang, which implements an API compatible version. +// - libunwind provides, in some cases, a more accurate stacktrace as it knows +// to decode signal handler frames and lets us edit the context registers when +// unwinding, allowing stack traces over bad function references. +// +// #define BACKWARD_HAS_BACKTRACE == 1 +// - backtrace is available by default, though it does not produce as much +// information as another library might. +// +// The default is: +// #define BACKWARD_HAS_UNWIND == 1 +// +// Note that only one of the define should be set to 1 at a time. +// +#if BACKWARD_HAS_UNWIND == 1 +#elif BACKWARD_HAS_BACKTRACE == 1 +#elif BACKWARD_HAS_LIBUNWIND == 1 +#else +#undef BACKWARD_HAS_UNWIND +#define BACKWARD_HAS_UNWIND 1 +#undef BACKWARD_HAS_BACKTRACE +#define BACKWARD_HAS_BACKTRACE 0 +#undef BACKWARD_HAS_LIBUNWIND +#define BACKWARD_HAS_LIBUNWIND 0 +#endif + +// On Darwin, backward can extract detailed information about a stack trace +// using one of the following libraries: +// +// #define BACKWARD_HAS_BACKTRACE_SYMBOL 1 +// - backtrace provides minimal details for a stack trace: +// - object filename +// - function name +// +// The default is: +// #define BACKWARD_HAS_BACKTRACE_SYMBOL == 1 +// +#if BACKWARD_HAS_BACKTRACE_SYMBOL == 1 +#else +#undef BACKWARD_HAS_BACKTRACE_SYMBOL +#define BACKWARD_HAS_BACKTRACE_SYMBOL 1 +#endif + +#include +#include +#include +#include +#include +#include + +#if (BACKWARD_HAS_BACKTRACE == 1) || (BACKWARD_HAS_BACKTRACE_SYMBOL == 1) +#include +#endif +#endif // defined(BACKWARD_SYSTEM_DARWIN) + +#if defined(BACKWARD_SYSTEM_WINDOWS) + +#include + +#include +#include +#include + +#ifdef _WIN64 +typedef SSIZE_T ssize_t; +#else +typedef int ssize_t; +#endif + +#ifndef NOMINMAX +#define NOMINMAX +#endif +#include +#include +#include +#include + +#ifndef __clang__ +#undef NOINLINE +#define NOINLINE __declspec(noinline) +#endif + +#ifdef _MSC_VER +#pragma comment(lib, "psapi.lib") +#pragma comment(lib, "dbghelp.lib") +#endif + +// Comment / packing is from stackoverflow: +// https://stackoverflow.com/questions/6205981/windows-c-stack-trace-from-a-running-app/28276227#28276227 +// Some versions of imagehlp.dll lack the proper packing directives themselves +// so we need to do it. +#pragma pack(push, before_imagehlp, 8) +#include +#pragma pack(pop, before_imagehlp) + +// TODO maybe these should be undefined somewhere else? +#undef BACKWARD_HAS_UNWIND +#undef BACKWARD_HAS_BACKTRACE +#if BACKWARD_HAS_PDB_SYMBOL == 1 +#else +#undef BACKWARD_HAS_PDB_SYMBOL +#define BACKWARD_HAS_PDB_SYMBOL 1 +#endif + +#endif + +#if BACKWARD_HAS_UNWIND == 1 + +#include +// while gcc's unwind.h defines something like that: +// extern _Unwind_Ptr _Unwind_GetIP (struct _Unwind_Context *); +// extern _Unwind_Ptr _Unwind_GetIPInfo (struct _Unwind_Context *, int *); +// +// clang's unwind.h defines something like this: +// uintptr_t _Unwind_GetIP(struct _Unwind_Context* __context); +// +// Even if the _Unwind_GetIPInfo can be linked to, it is not declared, worse we +// cannot just redeclare it because clang's unwind.h doesn't define _Unwind_Ptr +// anyway. +// +// Luckily we can play on the fact that the guard macros have a different name: +#ifdef __CLANG_UNWIND_H +// In fact, this function still comes from libgcc (on my different linux boxes, +// clang links against libgcc). +#include +extern "C" uintptr_t _Unwind_GetIPInfo(_Unwind_Context *, int *); +#endif + +#endif // BACKWARD_HAS_UNWIND == 1 + +#if BACKWARD_HAS_LIBUNWIND == 1 +#define UNW_LOCAL_ONLY +#include +#endif // BACKWARD_HAS_LIBUNWIND == 1 + +#ifdef BACKWARD_ATLEAST_CXX11 +#include +#include // for std::swap +namespace backward { +namespace details { +template +struct hashtable { + typedef std::unordered_map type; +}; +using std::move; +} // namespace details +} // namespace backward +#else // NOT BACKWARD_ATLEAST_CXX11 +#define nullptr NULL +#define override +#include +namespace backward { +namespace details { +template +struct hashtable { + typedef std::map type; +}; +template +const T &move(const T &v) { + return v; +} +template +T &move(T &v) { + return v; +} +} // namespace details +} // namespace backward +#endif // BACKWARD_ATLEAST_CXX11 + +namespace backward { +namespace details { +#if defined(BACKWARD_SYSTEM_WINDOWS) +const char kBackwardPathDelimiter[] = ";"; +#else +const char kBackwardPathDelimiter[] = ":"; +#endif +} // namespace details +} // namespace backward + +namespace backward { + +namespace system_tag { +struct linux_tag; // seems that I cannot call that "linux" because the name +// is already defined... so I am adding _tag everywhere. +struct darwin_tag; +struct windows_tag; +struct unknown_tag; + +#if defined(BACKWARD_SYSTEM_LINUX) +typedef linux_tag current_tag; +#elif defined(BACKWARD_SYSTEM_DARWIN) +typedef darwin_tag current_tag; +#elif defined(BACKWARD_SYSTEM_WINDOWS) +typedef windows_tag current_tag; +#elif defined(BACKWARD_SYSTEM_UNKNOWN) +typedef unknown_tag current_tag; +#else +#error "May I please get my system defines?" +#endif +} // namespace system_tag + +namespace trace_resolver_tag { +#if defined(BACKWARD_SYSTEM_LINUX) +struct libdw; +struct libbfd; +struct libdwarf; +struct backtrace_symbol; + +#if BACKWARD_HAS_DW == 1 +typedef libdw current; +#elif BACKWARD_HAS_BFD == 1 +typedef libbfd current; +#elif BACKWARD_HAS_DWARF == 1 +typedef libdwarf current; +#elif BACKWARD_HAS_BACKTRACE_SYMBOL == 1 +typedef backtrace_symbol current; +#else +#error "You shall not pass, until you know what you want." +#endif +#elif defined(BACKWARD_SYSTEM_DARWIN) +struct backtrace_symbol; + +#if BACKWARD_HAS_BACKTRACE_SYMBOL == 1 +typedef backtrace_symbol current; +#else +#error "You shall not pass, until you know what you want." +#endif +#elif defined(BACKWARD_SYSTEM_WINDOWS) +struct pdb_symbol; +#if BACKWARD_HAS_PDB_SYMBOL == 1 +typedef pdb_symbol current; +#else +#error "You shall not pass, until you know what you want." +#endif +#endif +} // namespace trace_resolver_tag + +namespace details { + +template +struct rm_ptr { + typedef T type; +}; + +template +struct rm_ptr { + typedef T type; +}; + +template +struct rm_ptr { + typedef const T type; +}; + +template +struct deleter { + template + void operator()(U &ptr) const { + (*F)(ptr); + } +}; + +template +struct default_delete { + void operator()(T &ptr) const { delete ptr; } +}; + +template > +class handle { + struct dummy; + T _val; + bool _empty; + +#ifdef BACKWARD_ATLEAST_CXX11 + handle(const handle &) = delete; + handle &operator=(const handle &) = delete; +#endif + + public: + ~handle() { + if (!_empty) { + Deleter()(_val); + } + } + + explicit handle() : _val(), _empty(true) {} + explicit handle(T val) : _val(val), _empty(false) { + if (!_val) + _empty = true; + } + +#ifdef BACKWARD_ATLEAST_CXX11 + handle(handle &&from) : _empty(true) { swap(from); } + handle &operator=(handle &&from) { + swap(from); + return *this; + } +#else + explicit handle(const handle &from) : _empty(true) { + // some sort of poor man's move semantic. + swap(const_cast(from)); + } + handle &operator=(const handle &from) { + // some sort of poor man's move semantic. + swap(const_cast(from)); + return *this; + } +#endif + + void reset(T new_val) { + handle tmp(new_val); + swap(tmp); + } + + void update(T new_val) { + _val = new_val; + _empty = !static_cast(new_val); + } + + operator const dummy *() const { + if (_empty) { + return nullptr; + } + return reinterpret_cast(_val); + } + T get() { return _val; } + T release() { + _empty = true; + return _val; + } + void swap(handle &b) { + using std::swap; + swap(b._val, _val); // can throw, we are safe here. + swap(b._empty, _empty); // should not throw: if you cannot swap two + // bools without throwing... It's a lost cause anyway! + } + + T &operator->() { return _val; } + const T &operator->() const { return _val; } + + typedef typename rm_ptr::type &ref_t; + typedef const typename rm_ptr::type &const_ref_t; + ref_t operator*() { return *_val; } + const_ref_t operator*() const { return *_val; } + ref_t operator[](size_t idx) { return _val[idx]; } + + // Watch out, we've got a badass over here + T *operator&() { + _empty = false; + return &_val; + } +}; + +// Default demangler implementation (do nothing). +template +struct demangler_impl { + static std::string demangle(const char *funcname) { return funcname; } +}; + +#if defined(BACKWARD_SYSTEM_LINUX) || defined(BACKWARD_SYSTEM_DARWIN) + +template <> +struct demangler_impl { + demangler_impl() : _demangle_buffer_length(0) {} + + std::string demangle(const char *funcname) { + using namespace details; + char *result = abi::__cxa_demangle(funcname, _demangle_buffer.get(), + &_demangle_buffer_length, nullptr); + if (result) { + _demangle_buffer.update(result); + return result; + } + return funcname; + } + + private: + details::handle _demangle_buffer; + size_t _demangle_buffer_length; +}; + +#endif // BACKWARD_SYSTEM_LINUX || BACKWARD_SYSTEM_DARWIN + +struct demangler : public demangler_impl {}; + +// Split a string on the platform's PATH delimiter. Example: if delimiter +// is ":" then: +// "" --> [] +// ":" --> ["",""] +// "::" --> ["","",""] +// "/a/b/c" --> ["/a/b/c"] +// "/a/b/c:/d/e/f" --> ["/a/b/c","/d/e/f"] +// etc. +inline std::vector split_source_prefixes(const std::string &s) { + std::vector out; + size_t last = 0; + size_t next = 0; + size_t delimiter_size = sizeof(kBackwardPathDelimiter) - 1; + while ((next = s.find(kBackwardPathDelimiter, last)) != std::string::npos) { + out.push_back(s.substr(last, next - last)); + last = next + delimiter_size; + } + if (last <= s.length()) { + out.push_back(s.substr(last)); + } + return out; +} + +} // namespace details + +/*************** A TRACE ***************/ + +struct Trace { + void *addr; + size_t idx; + + Trace() : addr(nullptr), idx(0) {} + + explicit Trace(void *_addr, size_t _idx) : addr(_addr), idx(_idx) {} +}; + +struct ResolvedTrace : public Trace { + struct SourceLoc { + std::string function; + std::string filename; + unsigned line; + unsigned col; + + SourceLoc() : line(0), col(0) {} + + bool operator==(const SourceLoc &b) const { + return function == b.function && filename == b.filename && line == b.line && + col == b.col; + } + + bool operator!=(const SourceLoc &b) const { return !(*this == b); } + }; + + // In which binary object this trace is located. + std::string object_filename; + + // The function in the object that contain the trace. This is not the same + // as source.function which can be an function inlined in object_function. + std::string object_function; + + // The source location of this trace. It is possible for filename to be + // empty and for line/col to be invalid (value 0) if this information + // couldn't be deduced, for example if there is no debug information in the + // binary object. + SourceLoc source; + + // An optionals list of "inliners". All the successive sources location + // from where the source location of the trace (the attribute right above) + // is inlined. It is especially useful when you compiled with optimization. + typedef std::vector source_locs_t; + source_locs_t inliners; + + ResolvedTrace() : Trace() {} + ResolvedTrace(const Trace &mini_trace) : Trace(mini_trace) {} +}; + +/*************** STACK TRACE ***************/ + +// default implementation. +template +class StackTraceImpl { + public: + size_t size() const { return 0; } + Trace operator[](size_t) const { return Trace(); } + size_t load_here(size_t = 0) { return 0; } + size_t load_from(void *, size_t = 0, void * = nullptr, void * = nullptr) { return 0; } + size_t thread_id() const { return 0; } + void skip_n_firsts(size_t) {} + void *const *begin() const { return nullptr; } +}; + +class StackTraceImplBase { + public: + StackTraceImplBase() : _thread_id(0), _skip(0), _context(nullptr), _error_addr(nullptr) {} + + size_t thread_id() const { return _thread_id; } + + void skip_n_firsts(size_t n) { _skip = n; } + + protected: + void load_thread_info() { +#ifdef BACKWARD_SYSTEM_LINUX +#ifndef __ANDROID__ + _thread_id = static_cast(syscall(SYS_gettid)); +#else + _thread_id = static_cast(gettid()); +#endif + if (_thread_id == static_cast(getpid())) { + // If the thread is the main one, let's hide that. + // I like to keep little secret sometimes. + _thread_id = 0; + } +#elif defined(BACKWARD_SYSTEM_DARWIN) + _thread_id = reinterpret_cast(pthread_self()); + if (pthread_main_np() == 1) { + // If the thread is the main one, let's hide that. + _thread_id = 0; + } +#endif + } + + void set_context(void *context) { _context = context; } + void *context() const { return _context; } + + void set_error_addr(void *error_addr) { _error_addr = error_addr; } + void *error_addr() const { return _error_addr; } + + size_t skip_n_firsts() const { return _skip; } + + private: + size_t _thread_id; + size_t _skip; + void *_context; + void *_error_addr; +}; + +class StackTraceImplHolder : public StackTraceImplBase { + public: + size_t size() const { + return (_stacktrace.size() >= skip_n_firsts()) ? _stacktrace.size() - skip_n_firsts() : 0; + } + Trace operator[](size_t idx) const { + if (idx >= size()) { + return Trace(); + } + return Trace(_stacktrace[idx + skip_n_firsts()], idx); + } + void *const *begin() const { + if (size()) { + return &_stacktrace[skip_n_firsts()]; + } + return nullptr; + } + + protected: + std::vector _stacktrace; +}; + +#if BACKWARD_HAS_UNWIND == 1 + +namespace details { + +template +class Unwinder { + public: + size_t operator()(F &f, size_t depth) { + _f = &f; + _index = -1; + _depth = depth; + _Unwind_Backtrace(&this->backtrace_trampoline, this); + if (_index == -1) { + // _Unwind_Backtrace has failed to obtain any backtraces + return 0; + } + else { + return static_cast(_index); + } + } + + private: + F *_f; + ssize_t _index; + size_t _depth; + + static _Unwind_Reason_Code backtrace_trampoline(_Unwind_Context *ctx, void *self) { + return (static_cast(self))->backtrace(ctx); + } + + _Unwind_Reason_Code backtrace(_Unwind_Context *ctx) { + if (_index >= 0 && static_cast(_index) >= _depth) + return _URC_END_OF_STACK; + + int ip_before_instruction = 0; + uintptr_t ip = _Unwind_GetIPInfo(ctx, &ip_before_instruction); + + if (!ip_before_instruction) { + // calculating 0-1 for unsigned, looks like a possible bug to sanitizers, + // so let's do it explicitly: + if (ip == 0) { + ip = std::numeric_limits::max(); // set it to 0xffff... (as + // from casting 0-1) + } + else { + ip -= 1; // else just normally decrement it (no overflow/underflow will + // happen) + } + } + + if (_index >= 0) { // ignore first frame. + (*_f)(static_cast(_index), reinterpret_cast(ip)); + } + _index += 1; + return _URC_NO_REASON; + } +}; + +template +size_t unwind(F f, size_t depth) { + Unwinder unwinder; + return unwinder(f, depth); +} + +} // namespace details + +template <> +class StackTraceImpl : public StackTraceImplHolder { + public: + NOINLINE + size_t load_here(size_t depth = 32, void *context = nullptr, void *error_addr = nullptr) { + load_thread_info(); + set_context(context); + set_error_addr(error_addr); + if (depth == 0) { + return 0; + } + _stacktrace.resize(depth); + size_t trace_cnt = details::unwind(callback(*this), depth); + _stacktrace.resize(trace_cnt); + skip_n_firsts(0); + return size(); + } + size_t load_from(void *addr, size_t depth = 32, void *context = nullptr, + void *error_addr = nullptr) { + load_here(depth + 8, context, error_addr); + + for (size_t i = 0; i < _stacktrace.size(); ++i) { + if (_stacktrace[i] == addr) { + skip_n_firsts(i); + break; + } + } + + _stacktrace.resize(std::min(_stacktrace.size(), skip_n_firsts() + depth)); + return size(); + } + + private: + struct callback { + StackTraceImpl &self; + callback(StackTraceImpl &_self) : self(_self) {} + + void operator()(size_t idx, void *addr) { self._stacktrace[idx] = addr; } + }; +}; + +#elif BACKWARD_HAS_LIBUNWIND == 1 + +template <> +class StackTraceImpl : public StackTraceImplHolder { + public: + __attribute__((noinline)) size_t load_here(size_t depth = 32, void *_context = nullptr, + void *_error_addr = nullptr) { + set_context(_context); + set_error_addr(_error_addr); + load_thread_info(); + if (depth == 0) { + return 0; + } + _stacktrace.resize(depth + 1); + + int result = 0; + + unw_context_t ctx; + size_t index = 0; + + // Add the tail call. If the Instruction Pointer is the crash address it + // means we got a bad function pointer dereference, so we "unwind" the + // bad pointer manually by using the return address pointed to by the + // Stack Pointer as the Instruction Pointer and letting libunwind do + // the rest + + if (context()) { + ucontext_t *uctx = reinterpret_cast(context()); +#ifdef REG_RIP // x86_64 + if (uctx->uc_mcontext.gregs[REG_RIP] == reinterpret_cast(error_addr())) { + uctx->uc_mcontext.gregs[REG_RIP] = + *reinterpret_cast(uctx->uc_mcontext.gregs[REG_RSP]); + } + _stacktrace[index] = reinterpret_cast(uctx->uc_mcontext.gregs[REG_RIP]); + ++index; + ctx = *reinterpret_cast(uctx); +#elif defined(REG_EIP) // x86_32 + if (uctx->uc_mcontext.gregs[REG_EIP] == reinterpret_cast(error_addr())) { + uctx->uc_mcontext.gregs[REG_EIP] = + *reinterpret_cast(uctx->uc_mcontext.gregs[REG_ESP]); + } + _stacktrace[index] = reinterpret_cast(uctx->uc_mcontext.gregs[REG_EIP]); + ++index; + ctx = *reinterpret_cast(uctx); +#elif defined(__arm__) // clang libunwind/arm + // libunwind uses its own context type for ARM unwinding. + // Copy the registers from the signal handler's context so we can + // unwind + unw_getcontext(&ctx); + ctx.regs[UNW_ARM_R0] = uctx->uc_mcontext.arm_r0; + ctx.regs[UNW_ARM_R1] = uctx->uc_mcontext.arm_r1; + ctx.regs[UNW_ARM_R2] = uctx->uc_mcontext.arm_r2; + ctx.regs[UNW_ARM_R3] = uctx->uc_mcontext.arm_r3; + ctx.regs[UNW_ARM_R4] = uctx->uc_mcontext.arm_r4; + ctx.regs[UNW_ARM_R5] = uctx->uc_mcontext.arm_r5; + ctx.regs[UNW_ARM_R6] = uctx->uc_mcontext.arm_r6; + ctx.regs[UNW_ARM_R7] = uctx->uc_mcontext.arm_r7; + ctx.regs[UNW_ARM_R8] = uctx->uc_mcontext.arm_r8; + ctx.regs[UNW_ARM_R9] = uctx->uc_mcontext.arm_r9; + ctx.regs[UNW_ARM_R10] = uctx->uc_mcontext.arm_r10; + ctx.regs[UNW_ARM_R11] = uctx->uc_mcontext.arm_fp; + ctx.regs[UNW_ARM_R12] = uctx->uc_mcontext.arm_ip; + ctx.regs[UNW_ARM_R13] = uctx->uc_mcontext.arm_sp; + ctx.regs[UNW_ARM_R14] = uctx->uc_mcontext.arm_lr; + ctx.regs[UNW_ARM_R15] = uctx->uc_mcontext.arm_pc; + + // If we have crashed in the PC use the LR instead, as this was + // a bad function dereference + if (reinterpret_cast(error_addr()) == uctx->uc_mcontext.arm_pc) { + ctx.regs[UNW_ARM_R15] = uctx->uc_mcontext.arm_lr - sizeof(unsigned long); + } + _stacktrace[index] = reinterpret_cast(ctx.regs[UNW_ARM_R15]); + ++index; +#elif defined(__aarch64__) // gcc libunwind/arm64 + unw_getcontext(&ctx); + // If the IP is the same as the crash address we have a bad function + // dereference The caller's address is pointed to by the link pointer, so + // we dereference that value and set it to be the next frame's IP. + if (uctx->uc_mcontext.pc == reinterpret_cast<__uint64_t>(error_addr())) { + uctx->uc_mcontext.pc = uctx->uc_mcontext.regs[UNW_TDEP_IP]; + } + + // 29 general purpose registers + for (int i = UNW_AARCH64_X0; i <= UNW_AARCH64_X28; i++) { + ctx.uc_mcontext.regs[i] = uctx->uc_mcontext.regs[i]; + } + ctx.uc_mcontext.sp = uctx->uc_mcontext.sp; + ctx.uc_mcontext.pc = uctx->uc_mcontext.pc; + ctx.uc_mcontext.fault_address = uctx->uc_mcontext.fault_address; + _stacktrace[index] = reinterpret_cast(ctx.uc_mcontext.pc); + ++index; +#elif defined(__APPLE__) && defined(__x86_64__) + unw_getcontext(&ctx); + // OS X's implementation of libunwind uses its own context object + // so we need to convert the passed context to libunwind's format + // (information about the data layout taken from unw_getcontext.s + // in Apple's libunwind source + ctx.data[0] = uctx->uc_mcontext->__ss.__rax; + ctx.data[1] = uctx->uc_mcontext->__ss.__rbx; + ctx.data[2] = uctx->uc_mcontext->__ss.__rcx; + ctx.data[3] = uctx->uc_mcontext->__ss.__rdx; + ctx.data[4] = uctx->uc_mcontext->__ss.__rdi; + ctx.data[5] = uctx->uc_mcontext->__ss.__rsi; + ctx.data[6] = uctx->uc_mcontext->__ss.__rbp; + ctx.data[7] = uctx->uc_mcontext->__ss.__rsp; + ctx.data[8] = uctx->uc_mcontext->__ss.__r8; + ctx.data[9] = uctx->uc_mcontext->__ss.__r9; + ctx.data[10] = uctx->uc_mcontext->__ss.__r10; + ctx.data[11] = uctx->uc_mcontext->__ss.__r11; + ctx.data[12] = uctx->uc_mcontext->__ss.__r12; + ctx.data[13] = uctx->uc_mcontext->__ss.__r13; + ctx.data[14] = uctx->uc_mcontext->__ss.__r14; + ctx.data[15] = uctx->uc_mcontext->__ss.__r15; + ctx.data[16] = uctx->uc_mcontext->__ss.__rip; + + // If the IP is the same as the crash address we have a bad function + // dereference The caller's address is pointed to by %rsp, so we + // dereference that value and set it to be the next frame's IP. + if (uctx->uc_mcontext->__ss.__rip == reinterpret_cast<__uint64_t>(error_addr())) { + ctx.data[16] = *reinterpret_cast<__uint64_t *>(uctx->uc_mcontext->__ss.__rsp); + } + _stacktrace[index] = reinterpret_cast(ctx.data[16]); + ++index; +#elif defined(__APPLE__) + unw_getcontext(&ctx) + // TODO: Convert the ucontext_t to libunwind's unw_context_t like + // we do in 64 bits + if (ctx.uc_mcontext->__ss.__eip == reinterpret_cast(error_addr())) { + ctx.uc_mcontext->__ss.__eip = ctx.uc_mcontext->__ss.__esp; + } + _stacktrace[index] = reinterpret_cast(ctx.uc_mcontext->__ss.__eip); + ++index; +#endif + } + + unw_cursor_t cursor; + if (context()) { +#if defined(UNW_INIT_SIGNAL_FRAME) + result = unw_init_local2(&cursor, &ctx, UNW_INIT_SIGNAL_FRAME); +#else + result = unw_init_local(&cursor, &ctx); +#endif + } + else { + unw_getcontext(&ctx); + ; + result = unw_init_local(&cursor, &ctx); + } + + if (result != 0) + return 1; + + unw_word_t ip = 0; + + while (index <= depth && unw_step(&cursor) > 0) { + result = unw_get_reg(&cursor, UNW_REG_IP, &ip); + if (result == 0) { + _stacktrace[index] = reinterpret_cast(--ip); + ++index; + } + } + --index; + + _stacktrace.resize(index + 1); + skip_n_firsts(0); + return size(); + } + + size_t load_from(void *addr, size_t depth = 32, void *context = nullptr, + void *error_addr = nullptr) { + load_here(depth + 8, context, error_addr); + + for (size_t i = 0; i < _stacktrace.size(); ++i) { + if (_stacktrace[i] == addr) { + skip_n_firsts(i); + _stacktrace[i] = (void *)((uintptr_t)_stacktrace[i]); + break; + } + } + + _stacktrace.resize(std::min(_stacktrace.size(), skip_n_firsts() + depth)); + return size(); + } +}; + +#elif defined(BACKWARD_HAS_BACKTRACE) + +template <> +class StackTraceImpl : public StackTraceImplHolder { + public: + NOINLINE + size_t load_here(size_t depth = 32, void *context = nullptr, void *error_addr = nullptr) { + set_context(context); + set_error_addr(error_addr); + load_thread_info(); + if (depth == 0) { + return 0; + } + _stacktrace.resize(depth + 1); + size_t trace_cnt = backtrace(&_stacktrace[0], _stacktrace.size()); + _stacktrace.resize(trace_cnt); + skip_n_firsts(1); + return size(); + } + + size_t load_from(void *addr, size_t depth = 32, void *context = nullptr, + void *error_addr = nullptr) { + load_here(depth + 8, context, error_addr); + + for (size_t i = 0; i < _stacktrace.size(); ++i) { + if (_stacktrace[i] == addr) { + skip_n_firsts(i); + _stacktrace[i] = (void *)((uintptr_t)_stacktrace[i] + 1); + break; + } + } + + _stacktrace.resize(std::min(_stacktrace.size(), skip_n_firsts() + depth)); + return size(); + } +}; + +#elif defined(BACKWARD_SYSTEM_WINDOWS) + +template <> +class StackTraceImpl : public StackTraceImplHolder { + public: + // We have to load the machine type from the image info + // So we first initialize the resolver, and it tells us this info + void set_machine_type(DWORD machine_type) { machine_type_ = machine_type; } + void set_context(CONTEXT *ctx) { ctx_ = ctx; } + void set_thread_handle(HANDLE handle) { thd_ = handle; } + + NOINLINE + size_t load_here(size_t depth = 32, void *context = nullptr, void *error_addr = nullptr) { + set_context(static_cast(context)); + set_error_addr(error_addr); + CONTEXT localCtx; // used when no context is provided + + if (depth == 0) { + return 0; + } + + if (!ctx_) { + ctx_ = &localCtx; + RtlCaptureContext(ctx_); + } + + if (!thd_) { + thd_ = GetCurrentThread(); + } + + HANDLE process = GetCurrentProcess(); + + STACKFRAME64 s; + memset(&s, 0, sizeof(STACKFRAME64)); + + // TODO: 32 bit context capture + s.AddrStack.Mode = AddrModeFlat; + s.AddrFrame.Mode = AddrModeFlat; + s.AddrPC.Mode = AddrModeFlat; +#if defined(_M_X64) + s.AddrPC.Offset = ctx_->Rip; + s.AddrStack.Offset = ctx_->Rsp; + s.AddrFrame.Offset = ctx_->Rbp; +#elif defined(_M_ARM64) + s.AddrPC.Offset = ctx_->Pc; + s.AddrStack.Offset = ctx_->Sp; + s.AddrFrame.Offset = ctx_->Fp; +#elif defined(_M_ARM) + s.AddrPC.Offset = ctx_->Pc; + s.AddrStack.Offset = ctx_->Sp; + s.AddrFrame.Offset = ctx_->R11; +#else + s.AddrPC.Offset = ctx_->Eip; + s.AddrStack.Offset = ctx_->Esp; + s.AddrFrame.Offset = ctx_->Ebp; +#endif + + if (!machine_type_) { +#if defined(_M_X64) + machine_type_ = IMAGE_FILE_MACHINE_AMD64; +#elif defined(_M_ARM64) + machine_type_ = IMAGE_FILE_MACHINE_ARM64; +#elif defined(_M_ARM) + machine_type_ = IMAGE_FILE_MACHINE_ARMNT; +#else + machine_type_ = IMAGE_FILE_MACHINE_I386; +#endif + } + + for (;;) { + // NOTE: this only works if PDBs are already loaded! + SetLastError(0); + if (!StackWalk64(machine_type_, process, thd_, &s, ctx_, NULL, SymFunctionTableAccess64, + SymGetModuleBase64, NULL)) + break; + + if (s.AddrReturn.Offset == 0) + break; + + _stacktrace.push_back(reinterpret_cast(s.AddrPC.Offset)); + + if (size() >= depth) + break; + } + + return size(); + } + + size_t load_from(void *addr, size_t depth = 32, void *context = nullptr, + void *error_addr = nullptr) { + load_here(depth + 8, context, error_addr); + + for (size_t i = 0; i < _stacktrace.size(); ++i) { + if (_stacktrace[i] == addr) { + skip_n_firsts(i); + break; + } + } + + _stacktrace.resize(std::min(_stacktrace.size(), skip_n_firsts() + depth)); + return size(); + } + + private: + DWORD machine_type_ = 0; + HANDLE thd_ = 0; + CONTEXT *ctx_ = nullptr; +}; + +#endif + +class StackTrace : public StackTraceImpl {}; + +/*************** TRACE RESOLVER ***************/ + +class TraceResolverImplBase { + public: + virtual ~TraceResolverImplBase() {} + + virtual void load_addresses(void *const *addresses, int address_count) { + (void)addresses; + (void)address_count; + } + + template + void load_stacktrace(ST &st) { + load_addresses(st.begin(), static_cast(st.size())); + } + + virtual ResolvedTrace resolve(ResolvedTrace t) { return t; } + + protected: + std::string demangle(const char *funcname) { return _demangler.demangle(funcname); } + + private: + details::demangler _demangler; +}; + +template +class TraceResolverImpl; + +#ifdef BACKWARD_SYSTEM_UNKNOWN + +template <> +class TraceResolverImpl : public TraceResolverImplBase {}; + +#endif + +#ifdef BACKWARD_SYSTEM_LINUX + +class TraceResolverLinuxBase : public TraceResolverImplBase { + public: + TraceResolverLinuxBase() : argv0_(get_argv0()), exec_path_(read_symlink("/proc/self/exe")) {} + std::string resolve_exec_path(Dl_info &symbol_info) const { + // mutates symbol_info.dli_fname to be filename to open and returns filename + // to display + if (symbol_info.dli_fname == argv0_) { + // dladdr returns argv[0] in dli_fname for symbols contained in + // the main executable, which is not a valid path if the + // executable was found by a search of the PATH environment + // variable; In that case, we actually open /proc/self/exe, which + // is always the actual executable (even if it was deleted/replaced!) + // but display the path that /proc/self/exe links to. + // However, this right away reduces probability of successful symbol + // resolution, because libbfd may try to find *.debug files in the + // same dir, in case symbols are stripped. As a result, it may try + // to find a file /proc/self/.debug, which obviously does + // not exist. /proc/self/exe is a last resort. First load attempt + // should go for the original executable file path. + symbol_info.dli_fname = "/proc/self/exe"; + return exec_path_; + } + else { + return symbol_info.dli_fname; + } + } + + private: + std::string argv0_; + std::string exec_path_; + + static std::string get_argv0() { + std::string argv0; + std::ifstream ifs("/proc/self/cmdline"); + std::getline(ifs, argv0, '\0'); + return argv0; + } + + static std::string read_symlink(std::string const &symlink_path) { + std::string path; + path.resize(100); + + while (true) { + ssize_t len = ::readlink(symlink_path.c_str(), &*path.begin(), path.size()); + if (len < 0) { + return ""; + } + if (static_cast(len) == path.size()) { + path.resize(path.size() * 2); + } + else { + path.resize(static_cast(len)); + break; + } + } + + return path; + } +}; + +template +class TraceResolverLinuxImpl; + +#if BACKWARD_HAS_BACKTRACE_SYMBOL == 1 + +template <> +class TraceResolverLinuxImpl : public TraceResolverLinuxBase { + public: + void load_addresses(void *const *addresses, int address_count) override { + if (address_count == 0) { + return; + } + _symbols.reset(backtrace_symbols(addresses, address_count)); + } + + ResolvedTrace resolve(ResolvedTrace trace) override { + char *filename = _symbols[trace.idx]; + char *funcname = filename; + while (*funcname && *funcname != '(') { + funcname += 1; + } + trace.object_filename.assign(filename, + funcname); // ok even if funcname is the ending + // \0 (then we assign entire string) + + if (*funcname) { // if it's not end of string (e.g. from last frame ip==0) + funcname += 1; + char *funcname_end = funcname; + while (*funcname_end && *funcname_end != ')' && *funcname_end != '+') { + funcname_end += 1; + } + *funcname_end = '\0'; + trace.object_function = this->demangle(funcname); + trace.source.function = trace.object_function; // we cannot do better. + } + return trace; + } + + private: + details::handle _symbols; +}; + +#endif // BACKWARD_HAS_BACKTRACE_SYMBOL == 1 + +#if BACKWARD_HAS_BFD == 1 + +template <> +class TraceResolverLinuxImpl : public TraceResolverLinuxBase { + public: + TraceResolverLinuxImpl() : _bfd_loaded(false) {} + + ResolvedTrace resolve(ResolvedTrace trace) override { + Dl_info symbol_info; + + // trace.addr is a virtual address in memory pointing to some code. + // Let's try to find from which loaded object it comes from. + // The loaded object can be yourself btw. + if (!dladdr(trace.addr, &symbol_info)) { + return trace; // dat broken trace... + } + + // Now we get in symbol_info: + // .dli_fname: + // pathname of the shared object that contains the address. + // .dli_fbase: + // where the object is loaded in memory. + // .dli_sname: + // the name of the nearest symbol to trace.addr, we expect a + // function name. + // .dli_saddr: + // the exact address corresponding to .dli_sname. + + if (symbol_info.dli_sname) { + trace.object_function = demangle(symbol_info.dli_sname); + } + + if (!symbol_info.dli_fname) { + return trace; + } + + trace.object_filename = resolve_exec_path(symbol_info); + bfd_fileobject *fobj; + // Before rushing to resolution need to ensure the executable + // file still can be used. For that compare inode numbers of + // what is stored by the executable's file path, and in the + // dli_fname, which not necessarily equals to the executable. + // It can be a shared library, or /proc/self/exe, and in the + // latter case has drawbacks. See the exec path resolution for + // details. In short - the dli object should be used only as + // the last resort. + // If inode numbers are equal, it is known dli_fname and the + // executable file are the same. This is guaranteed by Linux, + // because if the executable file is changed/deleted, it will + // be done in a new inode. The old file will be preserved in + // /proc/self/exe, and may even have inode 0. The latter can + // happen if the inode was actually reused, and the file was + // kept only in the main memory. + // + struct stat obj_stat; + struct stat dli_stat; + if (stat(trace.object_filename.c_str(), &obj_stat) == 0 && + stat(symbol_info.dli_fname, &dli_stat) == 0 && obj_stat.st_ino == dli_stat.st_ino) { + // The executable file, and the shared object containing the + // address are the same file. Safe to use the original path. + // this is preferable. Libbfd will search for stripped debug + // symbols in the same directory. + fobj = load_object_with_bfd(trace.object_filename); + } + else { + // The original object file was *deleted*! The only hope is + // that the debug symbols are either inside the shared + // object file, or are in the same directory, and this is + // not /proc/self/exe. + fobj = nullptr; + } + if (fobj == nullptr || !fobj->handle) { + fobj = load_object_with_bfd(symbol_info.dli_fname); + if (!fobj->handle) { + return trace; + } + } + + find_sym_result *details_selected; // to be filled. + + // trace.addr is the next instruction to be executed after returning + // from the nested stack frame. In C++ this usually relate to the next + // statement right after the function call that leaded to a new stack + // frame. This is not usually what you want to see when printing out a + // stacktrace... + find_sym_result details_call_site = + find_symbol_details(fobj, trace.addr, symbol_info.dli_fbase); + details_selected = &details_call_site; + +#if BACKWARD_HAS_UNWIND == 0 + // ...this is why we also try to resolve the symbol that is right + // before the return address. If we are lucky enough, we will get the + // line of the function that was called. But if the code is optimized, + // we might get something absolutely not related since the compiler + // can reschedule the return address with inline functions and + // tail-call optimization (among other things that I don't even know + // or cannot even dream about with my tiny limited brain). + find_sym_result details_adjusted_call_site = + find_symbol_details(fobj, (void *)(uintptr_t(trace.addr) - 1), symbol_info.dli_fbase); + + // In debug mode, we should always get the right thing(TM). + if (details_call_site.found && details_adjusted_call_site.found) { + // Ok, we assume that details_adjusted_call_site is a better estimation. + details_selected = &details_adjusted_call_site; + trace.addr = (void *)(uintptr_t(trace.addr) - 1); + } + + if (details_selected == &details_call_site && details_call_site.found) { + // we have to re-resolve the symbol in order to reset some + // internal state in BFD... so we can call backtrace_inliners + // thereafter... + details_call_site = find_symbol_details(fobj, trace.addr, symbol_info.dli_fbase); + } +#endif // BACKWARD_HAS_UNWIND + + if (details_selected->found) { + if (details_selected->filename) { + trace.source.filename = details_selected->filename; + } + trace.source.line = details_selected->line; + + if (details_selected->funcname) { + // this time we get the name of the function where the code is + // located, instead of the function were the address is + // located. In short, if the code was inlined, we get the + // function corresponding to the code. Else we already got in + // trace.function. + trace.source.function = demangle(details_selected->funcname); + + if (!symbol_info.dli_sname) { + // for the case dladdr failed to find the symbol name of + // the function, we might as well try to put something + // here. + trace.object_function = trace.source.function; + } + } + + // Maybe the source of the trace got inlined inside the function + // (trace.source.function). Let's see if we can get all the inlined + // calls along the way up to the initial call site. + trace.inliners = backtrace_inliners(fobj, *details_selected); + +#if 0 + if (trace.inliners.size() == 0) { + // Maybe the trace was not inlined... or maybe it was and we + // are lacking the debug information. Let's try to make the + // world better and see if we can get the line number of the + // function (trace.source.function) now. + // + // We will get the location of where the function start (to be + // exact: the first instruction that really start the + // function), not where the name of the function is defined. + // This can be quite far away from the name of the function + // btw. + // + // If the source of the function is the same as the source of + // the trace, we cannot say if the trace was really inlined or + // not. However, if the filename of the source is different + // between the function and the trace... we can declare it as + // an inliner. This is not 100% accurate, but better than + // nothing. + + if (symbol_info.dli_saddr) { + find_sym_result details = find_symbol_details(fobj, + symbol_info.dli_saddr, + symbol_info.dli_fbase); + + if (details.found) { + ResolvedTrace::SourceLoc diy_inliner; + diy_inliner.line = details.line; + if (details.filename) { + diy_inliner.filename = details.filename; + } + if (details.funcname) { + diy_inliner.function = demangle(details.funcname); + } else { + diy_inliner.function = trace.source.function; + } + if (diy_inliner != trace.source) { + trace.inliners.push_back(diy_inliner); + } + } + } + } +#endif + } + + return trace; + } + + private: + bool _bfd_loaded; + + typedef details::handle > bfd_handle_t; + + typedef details::handle bfd_symtab_t; + + struct bfd_fileobject { + bfd_handle_t handle; + bfd_vma base_addr; + bfd_symtab_t symtab; + bfd_symtab_t dynamic_symtab; + }; + + typedef details::hashtable::type fobj_bfd_map_t; + fobj_bfd_map_t _fobj_bfd_map; + + bfd_fileobject *load_object_with_bfd(const std::string &filename_object) { + using namespace details; + + if (!_bfd_loaded) { + using namespace details; + bfd_init(); + _bfd_loaded = true; + } + + fobj_bfd_map_t::iterator it = _fobj_bfd_map.find(filename_object); + if (it != _fobj_bfd_map.end()) { + return &it->second; + } + + // this new object is empty for now. + bfd_fileobject *r = &_fobj_bfd_map[filename_object]; + + // we do the work temporary in this one; + bfd_handle_t bfd_handle; + + int fd = open(filename_object.c_str(), O_RDONLY); + bfd_handle.reset(bfd_fdopenr(filename_object.c_str(), "default", fd)); + if (!bfd_handle) { + close(fd); + return r; + } + + if (!bfd_check_format(bfd_handle.get(), bfd_object)) { + return r; // not an object? You lose. + } + + if ((bfd_get_file_flags(bfd_handle.get()) & HAS_SYMS) == 0) { + return r; // that's what happen when you forget to compile in debug. + } + + ssize_t symtab_storage_size = bfd_get_symtab_upper_bound(bfd_handle.get()); + + ssize_t dyn_symtab_storage_size = bfd_get_dynamic_symtab_upper_bound(bfd_handle.get()); + + if (symtab_storage_size <= 0 && dyn_symtab_storage_size <= 0) { + return r; // weird, is the file is corrupted? + } + + bfd_symtab_t symtab, dynamic_symtab; + ssize_t symcount = 0, dyn_symcount = 0; + + if (symtab_storage_size > 0) { + symtab.reset( + static_cast(malloc(static_cast(symtab_storage_size)))); + symcount = bfd_canonicalize_symtab(bfd_handle.get(), symtab.get()); + } + + if (dyn_symtab_storage_size > 0) { + dynamic_symtab.reset( + static_cast(malloc(static_cast(dyn_symtab_storage_size)))); + dyn_symcount = bfd_canonicalize_dynamic_symtab(bfd_handle.get(), dynamic_symtab.get()); + } + + if (symcount <= 0 && dyn_symcount <= 0) { + return r; // damned, that's a stripped file that you got there! + } + + r->handle = move(bfd_handle); + r->symtab = move(symtab); + r->dynamic_symtab = move(dynamic_symtab); + return r; + } + + struct find_sym_result { + bool found; + const char *filename; + const char *funcname; + unsigned int line; + }; + + struct find_sym_context { + TraceResolverLinuxImpl *self; + bfd_fileobject *fobj; + void *addr; + void *base_addr; + find_sym_result result; + }; + + find_sym_result find_symbol_details(bfd_fileobject *fobj, void *addr, void *base_addr) { + find_sym_context context; + context.self = this; + context.fobj = fobj; + context.addr = addr; + context.base_addr = base_addr; + context.result.found = false; + bfd_map_over_sections(fobj->handle.get(), &find_in_section_trampoline, + static_cast(&context)); + return context.result; + } + + static void find_in_section_trampoline(bfd *, asection *section, void *data) { + find_sym_context *context = static_cast(data); + context->self->find_in_section(reinterpret_cast(context->addr), + reinterpret_cast(context->base_addr), context->fobj, + section, context->result); + } + + void find_in_section(bfd_vma addr, bfd_vma base_addr, bfd_fileobject *fobj, asection *section, + find_sym_result &result) { + if (result.found) + return; + +#ifdef bfd_get_section_flags + if ((bfd_get_section_flags(fobj->handle.get(), section) & SEC_ALLOC) == 0) +#else + if ((bfd_section_flags(section) & SEC_ALLOC) == 0) +#endif + return; // a debug section is never loaded automatically. + +#ifdef bfd_get_section_vma + bfd_vma sec_addr = bfd_get_section_vma(fobj->handle.get(), section); +#else + bfd_vma sec_addr = bfd_section_vma(section); +#endif +#ifdef bfd_get_section_size + bfd_size_type size = bfd_get_section_size(section); +#else + bfd_size_type size = bfd_section_size(section); +#endif + + // are we in the boundaries of the section? + if (addr < sec_addr || addr >= sec_addr + size) { + addr -= base_addr; // oops, a relocated object, lets try again... + if (addr < sec_addr || addr >= sec_addr + size) { + return; + } + } + +#if defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" +#endif + if (!result.found && fobj->symtab) { + result.found = bfd_find_nearest_line(fobj->handle.get(), section, fobj->symtab.get(), + addr - sec_addr, &result.filename, + &result.funcname, &result.line); + } + + if (!result.found && fobj->dynamic_symtab) { + result.found = bfd_find_nearest_line(fobj->handle.get(), section, + fobj->dynamic_symtab.get(), addr - sec_addr, + &result.filename, &result.funcname, &result.line); + } +#if defined(__clang__) +#pragma clang diagnostic pop +#endif + } + + ResolvedTrace::source_locs_t backtrace_inliners(bfd_fileobject *fobj, + find_sym_result previous_result) { + // This function can be called ONLY after a SUCCESSFUL call to + // find_symbol_details. The state is global to the bfd_handle. + ResolvedTrace::source_locs_t results; + while (previous_result.found) { + find_sym_result result; + result.found = bfd_find_inliner_info(fobj->handle.get(), &result.filename, + &result.funcname, &result.line); + + if (result.found) /* and not ( + cstrings_eq(previous_result.filename, + result.filename) and + cstrings_eq(previous_result.funcname, result.funcname) + and result.line == previous_result.line + )) */ + { + ResolvedTrace::SourceLoc src_loc; + src_loc.line = result.line; + if (result.filename) { + src_loc.filename = result.filename; + } + if (result.funcname) { + src_loc.function = demangle(result.funcname); + } + results.push_back(src_loc); + } + previous_result = result; + } + return results; + } + + bool cstrings_eq(const char *a, const char *b) { + if (!a || !b) { + return false; + } + return strcmp(a, b) == 0; + } +}; +#endif // BACKWARD_HAS_BFD == 1 + +#if BACKWARD_HAS_DW == 1 + +template <> +class TraceResolverLinuxImpl : public TraceResolverLinuxBase { + public: + TraceResolverLinuxImpl() : _dwfl_handle_initialized(false) {} + + ResolvedTrace resolve(ResolvedTrace trace) override { + using namespace details; + + Dwarf_Addr trace_addr = reinterpret_cast(trace.addr); + + if (!_dwfl_handle_initialized) { + // initialize dwfl... + _dwfl_cb.reset(new Dwfl_Callbacks); + _dwfl_cb->find_elf = &dwfl_linux_proc_find_elf; + _dwfl_cb->find_debuginfo = &dwfl_standard_find_debuginfo; + _dwfl_cb->debuginfo_path = 0; + + _dwfl_handle.reset(dwfl_begin(_dwfl_cb.get())); + _dwfl_handle_initialized = true; + + if (!_dwfl_handle) { + return trace; + } + + // ...from the current process. + dwfl_report_begin(_dwfl_handle.get()); + int r = dwfl_linux_proc_report(_dwfl_handle.get(), getpid()); + dwfl_report_end(_dwfl_handle.get(), NULL, NULL); + if (r < 0) { + return trace; + } + } + + if (!_dwfl_handle) { + return trace; + } + + // find the module (binary object) that contains the trace's address. + // This is not using any debug information, but the addresses ranges of + // all the currently loaded binary object. + Dwfl_Module *mod = dwfl_addrmodule(_dwfl_handle.get(), trace_addr); + if (mod) { + // now that we found it, lets get the name of it, this will be the + // full path to the running binary or one of the loaded library. + const char *module_name = dwfl_module_info(mod, 0, 0, 0, 0, 0, 0, 0); + if (module_name) { + trace.object_filename = module_name; + } + // We also look after the name of the symbol, equal or before this + // address. This is found by walking the symtab. We should get the + // symbol corresponding to the function (mangled) containing the + // address. If the code corresponding to the address was inlined, + // this is the name of the out-most inliner function. + const char *sym_name = dwfl_module_addrname(mod, trace_addr); + if (sym_name) { + trace.object_function = demangle(sym_name); + } + } + + // now let's get serious, and find out the source location (file and + // line number) of the address. + + // This function will look in .debug_aranges for the address and map it + // to the location of the compilation unit DIE in .debug_info and + // return it. + Dwarf_Addr mod_bias = 0; + Dwarf_Die *cudie = dwfl_module_addrdie(mod, trace_addr, &mod_bias); + +#if 1 + if (!cudie) { + // Sadly clang does not generate the section .debug_aranges, thus + // dwfl_module_addrdie will fail early. Clang doesn't either set + // the lowpc/highpc/range info for every compilation unit. + // + // So in order to save the world: + // for every compilation unit, we will iterate over every single + // DIEs. Normally functions should have a lowpc/highpc/range, which + // we will use to infer the compilation unit. + + // note that this is probably badly inefficient. + while ((cudie = dwfl_module_nextcu(mod, cudie, &mod_bias))) { + Dwarf_Die die_mem; + Dwarf_Die *fundie = find_fundie_by_pc(cudie, trace_addr - mod_bias, &die_mem); + if (fundie) { + break; + } + } + } +#endif + +//#define BACKWARD_I_DO_NOT_RECOMMEND_TO_ENABLE_THIS_HORRIBLE_PIECE_OF_CODE +#ifdef BACKWARD_I_DO_NOT_RECOMMEND_TO_ENABLE_THIS_HORRIBLE_PIECE_OF_CODE + if (!cudie) { + // If it's still not enough, lets dive deeper in the shit, and try + // to save the world again: for every compilation unit, we will + // load the corresponding .debug_line section, and see if we can + // find our address in it. + + Dwarf_Addr cfi_bias; + Dwarf_CFI *cfi_cache = dwfl_module_eh_cfi(mod, &cfi_bias); + + Dwarf_Addr bias; + while ((cudie = dwfl_module_nextcu(mod, cudie, &bias))) { + if (dwarf_getsrc_die(cudie, trace_addr - bias)) { + // ...but if we get a match, it might be a false positive + // because our (address - bias) might as well be valid in a + // different compilation unit. So we throw our last card on + // the table and lookup for the address into the .eh_frame + // section. + + handle frame; + dwarf_cfi_addrframe(cfi_cache, trace_addr - cfi_bias, &frame); + if (frame) { + break; + } + } + } + } +#endif + + if (!cudie) { + return trace; // this time we lost the game :/ + } + + // Now that we have a compilation unit DIE, this function will be able + // to load the corresponding section in .debug_line (if not already + // loaded) and hopefully find the source location mapped to our + // address. + Dwarf_Line *srcloc = dwarf_getsrc_die(cudie, trace_addr - mod_bias); + + if (srcloc) { + const char *srcfile = dwarf_linesrc(srcloc, 0, 0); + if (srcfile) { + trace.source.filename = srcfile; + } + int line = 0, col = 0; + dwarf_lineno(srcloc, &line); + dwarf_linecol(srcloc, &col); + trace.source.line = static_cast(line); + trace.source.col = static_cast(col); + } + + deep_first_search_by_pc(cudie, trace_addr - mod_bias, inliners_search_cb(trace)); + if (trace.source.function.size() == 0) { + // fallback. + trace.source.function = trace.object_function; + } + + return trace; + } + + private: + typedef details::handle > dwfl_handle_t; + details::handle > _dwfl_cb; + dwfl_handle_t _dwfl_handle; + bool _dwfl_handle_initialized; + + // defined here because in C++98, template function cannot take locally + // defined types... grrr. + struct inliners_search_cb { + void operator()(Dwarf_Die *die) { + switch (dwarf_tag(die)) { + const char *name; + case DW_TAG_subprogram: + if ((name = dwarf_diename(die))) { + trace.source.function = name; + } + break; + + case DW_TAG_inlined_subroutine: + ResolvedTrace::SourceLoc sloc; + Dwarf_Attribute attr_mem; + + if ((name = dwarf_diename(die))) { + sloc.function = name; + } + if ((name = die_call_file(die))) { + sloc.filename = name; + } + + Dwarf_Word line = 0, col = 0; + dwarf_formudata(dwarf_attr(die, DW_AT_call_line, &attr_mem), &line); + dwarf_formudata(dwarf_attr(die, DW_AT_call_column, &attr_mem), &col); + sloc.line = static_cast(line); + sloc.col = static_cast(col); + + trace.inliners.push_back(sloc); + break; + }; + } + ResolvedTrace &trace; + inliners_search_cb(ResolvedTrace &t) : trace(t) {} + }; + + static bool die_has_pc(Dwarf_Die *die, Dwarf_Addr pc) { + Dwarf_Addr low, high; + + // continuous range + if (dwarf_hasattr(die, DW_AT_low_pc) && dwarf_hasattr(die, DW_AT_high_pc)) { + if (dwarf_lowpc(die, &low) != 0) { + return false; + } + if (dwarf_highpc(die, &high) != 0) { + Dwarf_Attribute attr_mem; + Dwarf_Attribute *attr = dwarf_attr(die, DW_AT_high_pc, &attr_mem); + Dwarf_Word value; + if (dwarf_formudata(attr, &value) != 0) { + return false; + } + high = low + value; + } + return pc >= low && pc < high; + } + + // non-continuous range. + Dwarf_Addr base; + ptrdiff_t offset = 0; + while ((offset = dwarf_ranges(die, offset, &base, &low, &high)) > 0) { + if (pc >= low && pc < high) { + return true; + } + } + return false; + } + + static Dwarf_Die *find_fundie_by_pc(Dwarf_Die *parent_die, Dwarf_Addr pc, Dwarf_Die *result) { + if (dwarf_child(parent_die, result) != 0) { + return 0; + } + + Dwarf_Die *die = result; + do { + switch (dwarf_tag(die)) { + case DW_TAG_subprogram: + case DW_TAG_inlined_subroutine: + if (die_has_pc(die, pc)) { + return result; + } + }; + bool declaration = false; + Dwarf_Attribute attr_mem; + dwarf_formflag(dwarf_attr(die, DW_AT_declaration, &attr_mem), &declaration); + if (!declaration) { + // let's be curious and look deeper in the tree, + // function are not necessarily at the first level, but + // might be nested inside a namespace, structure etc. + Dwarf_Die die_mem; + Dwarf_Die *indie = find_fundie_by_pc(die, pc, &die_mem); + if (indie) { + *result = die_mem; + return result; + } + } + } while (dwarf_siblingof(die, result) == 0); + return 0; + } + + template + static bool deep_first_search_by_pc(Dwarf_Die *parent_die, Dwarf_Addr pc, CB cb) { + Dwarf_Die die_mem; + if (dwarf_child(parent_die, &die_mem) != 0) { + return false; + } + + bool branch_has_pc = false; + Dwarf_Die *die = &die_mem; + do { + bool declaration = false; + Dwarf_Attribute attr_mem; + dwarf_formflag(dwarf_attr(die, DW_AT_declaration, &attr_mem), &declaration); + if (!declaration) { + // let's be curious and look deeper in the tree, function are + // not necessarily at the first level, but might be nested + // inside a namespace, structure, a function, an inlined + // function etc. + branch_has_pc = deep_first_search_by_pc(die, pc, cb); + } + if (!branch_has_pc) { + branch_has_pc = die_has_pc(die, pc); + } + if (branch_has_pc) { + cb(die); + } + } while (dwarf_siblingof(die, &die_mem) == 0); + return branch_has_pc; + } + + static const char *die_call_file(Dwarf_Die *die) { + Dwarf_Attribute attr_mem; + Dwarf_Word file_idx = 0; + + dwarf_formudata(dwarf_attr(die, DW_AT_call_file, &attr_mem), &file_idx); + + if (file_idx == 0) { + return 0; + } + + Dwarf_Die die_mem; + Dwarf_Die *cudie = dwarf_diecu(die, &die_mem, 0, 0); + if (!cudie) { + return 0; + } + + Dwarf_Files *files = 0; + size_t nfiles; + dwarf_getsrcfiles(cudie, &files, &nfiles); + if (!files) { + return 0; + } + + return dwarf_filesrc(files, file_idx, 0, 0); + } +}; +#endif // BACKWARD_HAS_DW == 1 + +#if BACKWARD_HAS_DWARF == 1 + +template <> +class TraceResolverLinuxImpl : public TraceResolverLinuxBase { + public: + TraceResolverLinuxImpl() : _dwarf_loaded(false) {} + + ResolvedTrace resolve(ResolvedTrace trace) override { + // trace.addr is a virtual address in memory pointing to some code. + // Let's try to find from which loaded object it comes from. + // The loaded object can be yourself btw. + + Dl_info symbol_info; + int dladdr_result = 0; +#if defined(__GLIBC__) + link_map *link_map; + // We request the link map so we can get information about offsets + dladdr_result = dladdr1(trace.addr, &symbol_info, reinterpret_cast(&link_map), + RTLD_DL_LINKMAP); +#else + // Android doesn't have dladdr1. Don't use the linker map. + dladdr_result = dladdr(trace.addr, &symbol_info); +#endif + if (!dladdr_result) { + return trace; // dat broken trace... + } + + // Now we get in symbol_info: + // .dli_fname: + // pathname of the shared object that contains the address. + // .dli_fbase: + // where the object is loaded in memory. + // .dli_sname: + // the name of the nearest symbol to trace.addr, we expect a + // function name. + // .dli_saddr: + // the exact address corresponding to .dli_sname. + // + // And in link_map: + // .l_addr: + // difference between the address in the ELF file and the address + // in memory + // l_name: + // absolute pathname where the object was found + + if (symbol_info.dli_sname) { + trace.object_function = demangle(symbol_info.dli_sname); + } + + if (!symbol_info.dli_fname) { + return trace; + } + + trace.object_filename = resolve_exec_path(symbol_info); + dwarf_fileobject &fobj = load_object_with_dwarf(symbol_info.dli_fname); + if (!fobj.dwarf_handle) { + return trace; // sad, we couldn't load the object :( + } + +#if defined(__GLIBC__) + // Convert the address to a module relative one by looking at + // the module's loading address in the link map + Dwarf_Addr address = + reinterpret_cast(trace.addr) - reinterpret_cast(link_map->l_addr); +#else + Dwarf_Addr address = reinterpret_cast(trace.addr); +#endif + + if (trace.object_function.empty()) { + symbol_cache_t::iterator it = fobj.symbol_cache.lower_bound(address); + + if (it != fobj.symbol_cache.end()) { + if (it->first != address) { + if (it != fobj.symbol_cache.begin()) { + --it; + } + } + trace.object_function = demangle(it->second.c_str()); + } + } + + // Get the Compilation Unit DIE for the address + Dwarf_Die die = find_die(fobj, address); + + if (!die) { + return trace; // this time we lost the game :/ + } + + // libdwarf doesn't give us direct access to its objects, it always + // allocates a copy for the caller. We keep that copy alive in a cache + // and we deallocate it later when it's no longer required. + die_cache_entry &die_object = get_die_cache(fobj, die); + if (die_object.isEmpty()) + return trace; // We have no line section for this DIE + + die_linemap_t::iterator it = die_object.line_section.lower_bound(address); + + if (it != die_object.line_section.end()) { + if (it->first != address) { + if (it == die_object.line_section.begin()) { + // If we are on the first item of the line section + // but the address does not match it means that + // the address is below the range of the DIE. Give up. + return trace; + } + else { + --it; + } + } + } + else { + return trace; // We didn't find the address. + } + + // Get the Dwarf_Line that the address points to and call libdwarf + // to get source file, line and column info. + Dwarf_Line line = die_object.line_buffer[it->second]; + Dwarf_Error error = DW_DLE_NE; + + char *filename; + if (dwarf_linesrc(line, &filename, &error) == DW_DLV_OK) { + trace.source.filename = std::string(filename); + dwarf_dealloc(fobj.dwarf_handle.get(), filename, DW_DLA_STRING); + } + + Dwarf_Unsigned number = 0; + if (dwarf_lineno(line, &number, &error) == DW_DLV_OK) { + trace.source.line = number; + } + else { + trace.source.line = 0; + } + + if (dwarf_lineoff_b(line, &number, &error) == DW_DLV_OK) { + trace.source.col = number; + } + else { + trace.source.col = 0; + } + + std::vector namespace_stack; + deep_first_search_by_pc(fobj, die, address, namespace_stack, + inliners_search_cb(trace, fobj, die)); + + dwarf_dealloc(fobj.dwarf_handle.get(), die, DW_DLA_DIE); + + return trace; + } + + public: + static int close_dwarf(Dwarf_Debug dwarf) { return dwarf_finish(dwarf, NULL); } + + private: + bool _dwarf_loaded; + + typedef details::handle > dwarf_file_t; + + typedef details::handle > dwarf_elf_t; + + typedef details::handle > + dwarf_handle_t; + + typedef std::map die_linemap_t; + + typedef std::map die_specmap_t; + + struct die_cache_entry { + die_specmap_t spec_section; + die_linemap_t line_section; + Dwarf_Line *line_buffer; + Dwarf_Signed line_count; + Dwarf_Line_Context line_context; + + inline bool isEmpty() { + return line_buffer == NULL || line_count == 0 || line_context == NULL || + line_section.empty(); + } + + die_cache_entry() : line_buffer(0), line_count(0), line_context(0) {} + + ~die_cache_entry() { + if (line_context) { + dwarf_srclines_dealloc_b(line_context); + } + } + }; + + typedef std::map die_cache_t; + + typedef std::map symbol_cache_t; + + struct dwarf_fileobject { + dwarf_file_t file_handle; + dwarf_elf_t elf_handle; + dwarf_handle_t dwarf_handle; + symbol_cache_t symbol_cache; + + // Die cache + die_cache_t die_cache; + die_cache_entry *current_cu; + }; + + typedef details::hashtable::type fobj_dwarf_map_t; + fobj_dwarf_map_t _fobj_dwarf_map; + + static bool cstrings_eq(const char *a, const char *b) { + if (!a || !b) { + return false; + } + return strcmp(a, b) == 0; + } + + dwarf_fileobject &load_object_with_dwarf(const std::string &filename_object) { + if (!_dwarf_loaded) { + // Set the ELF library operating version + // If that fails there's nothing we can do + _dwarf_loaded = elf_version(EV_CURRENT) != EV_NONE; + } + + fobj_dwarf_map_t::iterator it = _fobj_dwarf_map.find(filename_object); + if (it != _fobj_dwarf_map.end()) { + return it->second; + } + + // this new object is empty for now + dwarf_fileobject &r = _fobj_dwarf_map[filename_object]; + + dwarf_file_t file_handle; + file_handle.reset(open(filename_object.c_str(), O_RDONLY)); + if (file_handle.get() < 0) { + return r; + } + + // Try to get an ELF handle. We need to read the ELF sections + // because we want to see if there is a .gnu_debuglink section + // that points to a split debug file + dwarf_elf_t elf_handle; + elf_handle.reset(elf_begin(file_handle.get(), ELF_C_READ, NULL)); + if (!elf_handle) { + return r; + } + + const char *e_ident = elf_getident(elf_handle.get(), 0); + if (!e_ident) { + return r; + } + + // Get the number of sections + // We use the new APIs as elf_getshnum is deprecated + size_t shdrnum = 0; + if (elf_getshdrnum(elf_handle.get(), &shdrnum) == -1) { + return r; + } + + // Get the index to the string section + size_t shdrstrndx = 0; + if (elf_getshdrstrndx(elf_handle.get(), &shdrstrndx) == -1) { + return r; + } + + std::string debuglink; + // Iterate through the ELF sections to try to get a gnu_debuglink + // note and also to cache the symbol table. + // We go the preprocessor way to avoid having to create templated + // classes or using gelf (which might throw a compiler error if 64 bit + // is not supported +#define ELF_GET_DATA(ARCH) \ + Elf_Scn *elf_section = 0; \ + Elf_Data *elf_data = 0; \ + Elf##ARCH##_Shdr *section_header = 0; \ + Elf_Scn *symbol_section = 0; \ + size_t symbol_count = 0; \ + size_t symbol_strings = 0; \ + Elf##ARCH##_Sym *symbol = 0; \ + const char *section_name = 0; \ + \ + while ((elf_section = elf_nextscn(elf_handle.get(), elf_section)) != NULL) { \ + section_header = elf##ARCH##_getshdr(elf_section); \ + if (section_header == NULL) { \ + return r; \ + } \ + \ + if ((section_name = elf_strptr(elf_handle.get(), shdrstrndx, section_header->sh_name)) == \ + NULL) { \ + return r; \ + } \ + \ + if (cstrings_eq(section_name, ".gnu_debuglink")) { \ + elf_data = elf_getdata(elf_section, NULL); \ + if (elf_data && elf_data->d_size > 0) { \ + debuglink = std::string(reinterpret_cast(elf_data->d_buf)); \ + } \ + } \ + \ + switch (section_header->sh_type) { \ + case SHT_SYMTAB: \ + symbol_section = elf_section; \ + symbol_count = section_header->sh_size / section_header->sh_entsize; \ + symbol_strings = section_header->sh_link; \ + break; \ + \ + /* We use .dynsyms as a last resort, we prefer .symtab */ \ + case SHT_DYNSYM: \ + if (!symbol_section) { \ + symbol_section = elf_section; \ + symbol_count = section_header->sh_size / section_header->sh_entsize; \ + symbol_strings = section_header->sh_link; \ + } \ + break; \ + } \ + } \ + \ + if (symbol_section && symbol_count && symbol_strings) { \ + elf_data = elf_getdata(symbol_section, NULL); \ + symbol = reinterpret_cast(elf_data->d_buf); \ + for (size_t i = 0; i < symbol_count; ++i) { \ + int type = ELF##ARCH##_ST_TYPE(symbol->st_info); \ + if (type == STT_FUNC && symbol->st_value > 0) { \ + r.symbol_cache[symbol->st_value] = \ + std::string(elf_strptr(elf_handle.get(), symbol_strings, symbol->st_name)); \ + } \ + ++symbol; \ + } \ + } + + if (e_ident[EI_CLASS] == ELFCLASS32) { + ELF_GET_DATA(32) + } + else if (e_ident[EI_CLASS] == ELFCLASS64) { + // libelf might have been built without 64 bit support +#if __LIBELF64 + ELF_GET_DATA(64) +#endif + } + + if (!debuglink.empty()) { + // We have a debuglink section! Open an elf instance on that + // file instead. If we can't open the file, then return + // the elf handle we had already opened. + dwarf_file_t debuglink_file; + debuglink_file.reset(open(debuglink.c_str(), O_RDONLY)); + if (debuglink_file.get() > 0) { + dwarf_elf_t debuglink_elf; + debuglink_elf.reset(elf_begin(debuglink_file.get(), ELF_C_READ, NULL)); + + // If we have a valid elf handle, return the new elf handle + // and file handle and discard the original ones + if (debuglink_elf) { + elf_handle = move(debuglink_elf); + file_handle = move(debuglink_file); + } + } + } + + // Ok, we have a valid ELF handle, let's try to get debug symbols + Dwarf_Debug dwarf_debug; + Dwarf_Error error = DW_DLE_NE; + dwarf_handle_t dwarf_handle; + + int dwarf_result = + dwarf_elf_init(elf_handle.get(), DW_DLC_READ, NULL, NULL, &dwarf_debug, &error); + + // We don't do any special handling for DW_DLV_NO_ENTRY specially. + // If we get an error, or the file doesn't have debug information + // we just return. + if (dwarf_result != DW_DLV_OK) { + return r; + } + + dwarf_handle.reset(dwarf_debug); + + r.file_handle = move(file_handle); + r.elf_handle = move(elf_handle); + r.dwarf_handle = move(dwarf_handle); + + return r; + } + + die_cache_entry &get_die_cache(dwarf_fileobject &fobj, Dwarf_Die die) { + Dwarf_Error error = DW_DLE_NE; + + // Get the die offset, we use it as the cache key + Dwarf_Off die_offset; + if (dwarf_dieoffset(die, &die_offset, &error) != DW_DLV_OK) { + die_offset = 0; + } + + die_cache_t::iterator it = fobj.die_cache.find(die_offset); + + if (it != fobj.die_cache.end()) { + fobj.current_cu = &it->second; + return it->second; + } + + die_cache_entry &de = fobj.die_cache[die_offset]; + fobj.current_cu = &de; + + Dwarf_Addr line_addr; + Dwarf_Small table_count; + + // The addresses in the line section are not fully sorted (they might + // be sorted by block of code belonging to the same file), which makes + // it necessary to do so before searching is possible. + // + // As libdwarf allocates a copy of everything, let's get the contents + // of the line section and keep it around. We also create a map of + // program counter to line table indices so we can search by address + // and get the line buffer index. + // + // To make things more difficult, the same address can span more than + // one line, so we need to keep the index pointing to the first line + // by using insert instead of the map's [ operator. + + // Get the line context for the DIE + if (dwarf_srclines_b(die, 0, &table_count, &de.line_context, &error) == DW_DLV_OK) { + // Get the source lines for this line context, to be deallocated + // later + if (dwarf_srclines_from_linecontext(de.line_context, &de.line_buffer, &de.line_count, + &error) == DW_DLV_OK) { + // Add all the addresses to our map + for (int i = 0; i < de.line_count; i++) { + if (dwarf_lineaddr(de.line_buffer[i], &line_addr, &error) != DW_DLV_OK) { + line_addr = 0; + } + de.line_section.insert(std::pair(line_addr, i)); + } + } + } + + // For each CU, cache the function DIEs that contain the + // DW_AT_specification attribute. When building with -g3 the function + // DIEs are separated in declaration and specification, with the + // declaration containing only the name and parameters and the + // specification the low/high pc and other compiler attributes. + // + // We cache those specifications so we don't skip over the declarations, + // because they have no pc, and we can do namespace resolution for + // DWARF function names. + Dwarf_Debug dwarf = fobj.dwarf_handle.get(); + Dwarf_Die current_die = 0; + if (dwarf_child(die, ¤t_die, &error) == DW_DLV_OK) { + for (;;) { + Dwarf_Die sibling_die = 0; + + Dwarf_Half tag_value; + dwarf_tag(current_die, &tag_value, &error); + + if (tag_value == DW_TAG_subprogram || tag_value == DW_TAG_inlined_subroutine) { + Dwarf_Bool has_attr = 0; + if (dwarf_hasattr(current_die, DW_AT_specification, &has_attr, &error) == + DW_DLV_OK) { + if (has_attr) { + Dwarf_Attribute attr_mem; + if (dwarf_attr(current_die, DW_AT_specification, &attr_mem, &error) == + DW_DLV_OK) { + Dwarf_Off spec_offset = 0; + if (dwarf_formref(attr_mem, &spec_offset, &error) == DW_DLV_OK) { + Dwarf_Off spec_die_offset; + if (dwarf_dieoffset(current_die, &spec_die_offset, &error) == + DW_DLV_OK) { + de.spec_section[spec_offset] = spec_die_offset; + } + } + } + dwarf_dealloc(dwarf, attr_mem, DW_DLA_ATTR); + } + } + } + + int result = dwarf_siblingof(dwarf, current_die, &sibling_die, &error); + if (result == DW_DLV_ERROR) { + break; + } + else if (result == DW_DLV_NO_ENTRY) { + break; + } + + if (current_die != die) { + dwarf_dealloc(dwarf, current_die, DW_DLA_DIE); + current_die = 0; + } + + current_die = sibling_die; + } + } + return de; + } + + static Dwarf_Die get_referenced_die(Dwarf_Debug dwarf, Dwarf_Die die, Dwarf_Half attr, + bool global) { + Dwarf_Error error = DW_DLE_NE; + Dwarf_Attribute attr_mem; + + Dwarf_Die found_die = NULL; + if (dwarf_attr(die, attr, &attr_mem, &error) == DW_DLV_OK) { + Dwarf_Off offset; + int result = 0; + if (global) { + result = dwarf_global_formref(attr_mem, &offset, &error); + } + else { + result = dwarf_formref(attr_mem, &offset, &error); + } + + if (result == DW_DLV_OK) { + if (dwarf_offdie(dwarf, offset, &found_die, &error) != DW_DLV_OK) { + found_die = NULL; + } + } + dwarf_dealloc(dwarf, attr_mem, DW_DLA_ATTR); + } + return found_die; + } + + static std::string get_referenced_die_name(Dwarf_Debug dwarf, Dwarf_Die die, Dwarf_Half attr, + bool global) { + Dwarf_Error error = DW_DLE_NE; + std::string value; + + Dwarf_Die found_die = get_referenced_die(dwarf, die, attr, global); + + if (found_die) { + char *name; + if (dwarf_diename(found_die, &name, &error) == DW_DLV_OK) { + if (name) { + value = std::string(name); + } + dwarf_dealloc(dwarf, name, DW_DLA_STRING); + } + dwarf_dealloc(dwarf, found_die, DW_DLA_DIE); + } + + return value; + } + + // Returns a spec DIE linked to the passed one. The caller should + // deallocate the DIE + static Dwarf_Die get_spec_die(dwarf_fileobject &fobj, Dwarf_Die die) { + Dwarf_Debug dwarf = fobj.dwarf_handle.get(); + Dwarf_Error error = DW_DLE_NE; + Dwarf_Off die_offset; + if (fobj.current_cu && dwarf_die_CU_offset(die, &die_offset, &error) == DW_DLV_OK) { + die_specmap_t::iterator it = fobj.current_cu->spec_section.find(die_offset); + + // If we have a DIE that completes the current one, check if + // that one has the pc we are looking for + if (it != fobj.current_cu->spec_section.end()) { + Dwarf_Die spec_die = 0; + if (dwarf_offdie(dwarf, it->second, &spec_die, &error) == DW_DLV_OK) { + return spec_die; + } + } + } + + // Maybe we have an abstract origin DIE with the function information? + return get_referenced_die(fobj.dwarf_handle.get(), die, DW_AT_abstract_origin, true); + } + + static bool die_has_pc(dwarf_fileobject &fobj, Dwarf_Die die, Dwarf_Addr pc) { + Dwarf_Addr low_pc = 0, high_pc = 0; + Dwarf_Half high_pc_form = 0; + Dwarf_Form_Class return_class; + Dwarf_Error error = DW_DLE_NE; + Dwarf_Debug dwarf = fobj.dwarf_handle.get(); + bool has_lowpc = false; + bool has_highpc = false; + bool has_ranges = false; + + if (dwarf_lowpc(die, &low_pc, &error) == DW_DLV_OK) { + // If we have a low_pc check if there is a high pc. + // If we don't have a high pc this might mean we have a base + // address for the ranges list or just an address. + has_lowpc = true; + + if (dwarf_highpc_b(die, &high_pc, &high_pc_form, &return_class, &error) == DW_DLV_OK) { + // We do have a high pc. In DWARF 4+ this is an offset from the + // low pc, but in earlier versions it's an absolute address. + + has_highpc = true; + // In DWARF 2/3 this would be a DW_FORM_CLASS_ADDRESS + if (return_class == DW_FORM_CLASS_CONSTANT) { + high_pc = low_pc + high_pc; + } + + // We have low and high pc, check if our address + // is in that range + return pc >= low_pc && pc < high_pc; + } + } + else { + // Reset the low_pc, in case dwarf_lowpc failing set it to some + // undefined value. + low_pc = 0; + } + + // Check if DW_AT_ranges is present and search for the PC in the + // returned ranges list. We always add the low_pc, as it not set it will + // be 0, in case we had a DW_AT_low_pc and DW_AT_ranges pair + bool result = false; + + Dwarf_Attribute attr; + if (dwarf_attr(die, DW_AT_ranges, &attr, &error) == DW_DLV_OK) { + Dwarf_Off offset; + if (dwarf_global_formref(attr, &offset, &error) == DW_DLV_OK) { + Dwarf_Ranges *ranges; + Dwarf_Signed ranges_count = 0; + Dwarf_Unsigned byte_count = 0; + + if (dwarf_get_ranges_a(dwarf, offset, die, &ranges, &ranges_count, &byte_count, + &error) == DW_DLV_OK) { + has_ranges = ranges_count != 0; + for (int i = 0; i < ranges_count; i++) { + if (ranges[i].dwr_addr1 != 0 && pc >= ranges[i].dwr_addr1 + low_pc && + pc < ranges[i].dwr_addr2 + low_pc) { + result = true; + break; + } + } + dwarf_ranges_dealloc(dwarf, ranges, ranges_count); + } + } + } + + // Last attempt. We might have a single address set as low_pc. + if (!result && low_pc != 0 && pc == low_pc) { + result = true; + } + + // If we don't have lowpc, highpc and ranges maybe this DIE is a + // declaration that relies on a DW_AT_specification DIE that happens + // later. Use the specification cache we filled when we loaded this CU. + if (!result && (!has_lowpc && !has_highpc && !has_ranges)) { + Dwarf_Die spec_die = get_spec_die(fobj, die); + if (spec_die) { + result = die_has_pc(fobj, spec_die, pc); + dwarf_dealloc(dwarf, spec_die, DW_DLA_DIE); + } + } + + return result; + } + + static void get_type(Dwarf_Debug dwarf, Dwarf_Die die, std::string &type) { + Dwarf_Error error = DW_DLE_NE; + + Dwarf_Die child = 0; + if (dwarf_child(die, &child, &error) == DW_DLV_OK) { + get_type(dwarf, child, type); + } + + if (child) { + type.insert(0, "::"); + dwarf_dealloc(dwarf, child, DW_DLA_DIE); + } + + char *name; + if (dwarf_diename(die, &name, &error) == DW_DLV_OK) { + type.insert(0, std::string(name)); + dwarf_dealloc(dwarf, name, DW_DLA_STRING); + } + else { + type.insert(0, ""); + } + } + + static std::string get_type_by_signature(Dwarf_Debug dwarf, Dwarf_Die die) { + Dwarf_Error error = DW_DLE_NE; + + Dwarf_Sig8 signature; + Dwarf_Bool has_attr = 0; + if (dwarf_hasattr(die, DW_AT_signature, &has_attr, &error) == DW_DLV_OK) { + if (has_attr) { + Dwarf_Attribute attr_mem; + if (dwarf_attr(die, DW_AT_signature, &attr_mem, &error) == DW_DLV_OK) { + if (dwarf_formsig8(attr_mem, &signature, &error) != DW_DLV_OK) { + return std::string(""); + } + } + dwarf_dealloc(dwarf, attr_mem, DW_DLA_ATTR); + } + } + + Dwarf_Unsigned next_cu_header; + Dwarf_Sig8 tu_signature; + std::string result; + bool found = false; + + while (dwarf_next_cu_header_d(dwarf, 0, 0, 0, 0, 0, 0, 0, &tu_signature, 0, &next_cu_header, + 0, &error) == DW_DLV_OK) { + if (strncmp(signature.signature, tu_signature.signature, 8) == 0) { + Dwarf_Die type_cu_die = 0; + if (dwarf_siblingof_b(dwarf, 0, 0, &type_cu_die, &error) == DW_DLV_OK) { + Dwarf_Die child_die = 0; + if (dwarf_child(type_cu_die, &child_die, &error) == DW_DLV_OK) { + get_type(dwarf, child_die, result); + found = !result.empty(); + dwarf_dealloc(dwarf, child_die, DW_DLA_DIE); + } + dwarf_dealloc(dwarf, type_cu_die, DW_DLA_DIE); + } + } + } + + if (found) { + while (dwarf_next_cu_header_d(dwarf, 0, 0, 0, 0, 0, 0, 0, 0, 0, &next_cu_header, 0, + &error) == DW_DLV_OK) { + // Reset the cu header state. Unfortunately, libdwarf's + // next_cu_header API keeps its own iterator per Dwarf_Debug + // that can't be reset. We need to keep fetching elements until + // the end. + } + } + else { + // If we couldn't resolve the type just print out the signature + std::ostringstream string_stream; + string_stream << "<0x" << std::hex << std::setfill('0'); + for (int i = 0; i < 8; ++i) { + string_stream << std::setw(2) << std::hex + << (int)(unsigned char)(signature.signature[i]); + } + string_stream << ">"; + result = string_stream.str(); + } + return result; + } + + struct type_context_t { + bool is_const; + bool is_typedef; + bool has_type; + bool has_name; + std::string text; + + type_context_t() : is_const(false), is_typedef(false), has_type(false), has_name(false) {} + }; + + // Types are resolved from right to left: we get the variable name first + // and then all specifiers (like const or pointer) in a chain of DW_AT_type + // DIEs. Call this function recursively until we get a complete type + // string. + static void set_parameter_string(dwarf_fileobject &fobj, Dwarf_Die die, + type_context_t &context) { + char *name; + Dwarf_Error error = DW_DLE_NE; + + // typedefs contain also the base type, so we skip it and only + // print the typedef name + if (!context.is_typedef) { + if (dwarf_diename(die, &name, &error) == DW_DLV_OK) { + if (!context.text.empty()) { + context.text.insert(0, " "); + } + context.text.insert(0, std::string(name)); + dwarf_dealloc(fobj.dwarf_handle.get(), name, DW_DLA_STRING); + } + } + else { + context.is_typedef = false; + context.has_type = true; + if (context.is_const) { + context.text.insert(0, "const "); + context.is_const = false; + } + } + + bool next_type_is_const = false; + bool is_keyword = true; + + Dwarf_Half tag = 0; + Dwarf_Bool has_attr = 0; + if (dwarf_tag(die, &tag, &error) == DW_DLV_OK) { + switch (tag) { + case DW_TAG_structure_type: + case DW_TAG_union_type: + case DW_TAG_class_type: + case DW_TAG_enumeration_type: + context.has_type = true; + if (dwarf_hasattr(die, DW_AT_signature, &has_attr, &error) == DW_DLV_OK) { + // If we have a signature it means the type is defined + // in .debug_types, so we need to load the DIE pointed + // at by the signature and resolve it + if (has_attr) { + std::string type = get_type_by_signature(fobj.dwarf_handle.get(), die); + if (context.is_const) + type.insert(0, "const "); + + if (!context.text.empty()) + context.text.insert(0, " "); + context.text.insert(0, type); + } + + // Treat enums like typedefs, and skip printing its + // base type + context.is_typedef = (tag == DW_TAG_enumeration_type); + } + break; + case DW_TAG_const_type: + next_type_is_const = true; + break; + case DW_TAG_pointer_type: + context.text.insert(0, "*"); + break; + case DW_TAG_reference_type: + context.text.insert(0, "&"); + break; + case DW_TAG_restrict_type: + context.text.insert(0, "restrict "); + break; + case DW_TAG_rvalue_reference_type: + context.text.insert(0, "&&"); + break; + case DW_TAG_volatile_type: + context.text.insert(0, "volatile "); + break; + case DW_TAG_typedef: + // Propagate the const-ness to the next type + // as typedefs are linked to its base type + next_type_is_const = context.is_const; + context.is_typedef = true; + context.has_type = true; + break; + case DW_TAG_base_type: + context.has_type = true; + break; + case DW_TAG_formal_parameter: + context.has_name = true; + break; + default: + is_keyword = false; + break; + } + } + + if (!is_keyword && context.is_const) { + context.text.insert(0, "const "); + } + + context.is_const = next_type_is_const; + + Dwarf_Die ref = get_referenced_die(fobj.dwarf_handle.get(), die, DW_AT_type, true); + if (ref) { + set_parameter_string(fobj, ref, context); + dwarf_dealloc(fobj.dwarf_handle.get(), ref, DW_DLA_DIE); + } + + if (!context.has_type && context.has_name) { + context.text.insert(0, "void "); + context.has_type = true; + } + } + + // Resolve the function return type and parameters + static void set_function_parameters(std::string &function_name, std::vector &ns, + dwarf_fileobject &fobj, Dwarf_Die die) { + Dwarf_Debug dwarf = fobj.dwarf_handle.get(); + Dwarf_Error error = DW_DLE_NE; + Dwarf_Die current_die = 0; + std::string parameters; + bool has_spec = true; + // Check if we have a spec DIE. If we do we use it as it contains + // more information, like parameter names. + Dwarf_Die spec_die = get_spec_die(fobj, die); + if (!spec_die) { + has_spec = false; + spec_die = die; + } + + std::vector::const_iterator it = ns.begin(); + std::string ns_name; + for (it = ns.begin(); it < ns.end(); ++it) { + ns_name.append(*it).append("::"); + } + + if (!ns_name.empty()) { + function_name.insert(0, ns_name); + } + + // See if we have a function return type. It can be either on the + // current die or in its spec one (usually true for inlined functions) + std::string return_type = get_referenced_die_name(dwarf, die, DW_AT_type, true); + if (return_type.empty()) { + return_type = get_referenced_die_name(dwarf, spec_die, DW_AT_type, true); + } + if (!return_type.empty()) { + return_type.append(" "); + function_name.insert(0, return_type); + } + + if (dwarf_child(spec_die, ¤t_die, &error) == DW_DLV_OK) { + for (;;) { + Dwarf_Die sibling_die = 0; + + Dwarf_Half tag_value; + dwarf_tag(current_die, &tag_value, &error); + + if (tag_value == DW_TAG_formal_parameter) { + // Ignore artificial (ie, compiler generated) parameters + bool is_artificial = false; + Dwarf_Attribute attr_mem; + if (dwarf_attr(current_die, DW_AT_artificial, &attr_mem, &error) == DW_DLV_OK) { + Dwarf_Bool flag = 0; + if (dwarf_formflag(attr_mem, &flag, &error) == DW_DLV_OK) { + is_artificial = flag != 0; + } + dwarf_dealloc(dwarf, attr_mem, DW_DLA_ATTR); + } + + if (!is_artificial) { + type_context_t context; + set_parameter_string(fobj, current_die, context); + + if (parameters.empty()) { + parameters.append("("); + } + else { + parameters.append(", "); + } + parameters.append(context.text); + } + } + + int result = dwarf_siblingof(dwarf, current_die, &sibling_die, &error); + if (result == DW_DLV_ERROR) { + break; + } + else if (result == DW_DLV_NO_ENTRY) { + break; + } + + if (current_die != die) { + dwarf_dealloc(dwarf, current_die, DW_DLA_DIE); + current_die = 0; + } + + current_die = sibling_die; + } + } + if (parameters.empty()) + parameters = "("; + parameters.append(")"); + + // If we got a spec DIE we need to deallocate it + if (has_spec) + dwarf_dealloc(dwarf, spec_die, DW_DLA_DIE); + + function_name.append(parameters); + } + + // defined here because in C++98, template function cannot take locally + // defined types... grrr. + struct inliners_search_cb { + void operator()(Dwarf_Die die, std::vector &ns) { + Dwarf_Error error = DW_DLE_NE; + Dwarf_Half tag_value; + Dwarf_Attribute attr_mem; + Dwarf_Debug dwarf = fobj.dwarf_handle.get(); + + dwarf_tag(die, &tag_value, &error); + + switch (tag_value) { + char *name; + case DW_TAG_subprogram: + if (!trace.source.function.empty()) + break; + if (dwarf_diename(die, &name, &error) == DW_DLV_OK) { + trace.source.function = std::string(name); + dwarf_dealloc(dwarf, name, DW_DLA_STRING); + } + else { + // We don't have a function name in this DIE. + // Check if there is a referenced non-defining + // declaration. + trace.source.function = + get_referenced_die_name(dwarf, die, DW_AT_abstract_origin, true); + if (trace.source.function.empty()) { + trace.source.function = + get_referenced_die_name(dwarf, die, DW_AT_specification, true); + } + } + + // Append the function parameters, if available + set_function_parameters(trace.source.function, ns, fobj, die); + + // If the object function name is empty, it's possible that + // there is no dynamic symbol table (maybe the executable + // was stripped or not built with -rdynamic). See if we have + // a DWARF linkage name to use instead. We try both + // linkage_name and MIPS_linkage_name because the MIPS tag + // was the unofficial one until it was adopted in DWARF4. + // Old gcc versions generate MIPS_linkage_name + if (trace.object_function.empty()) { + details::demangler demangler; + + if (dwarf_attr(die, DW_AT_linkage_name, &attr_mem, &error) != DW_DLV_OK) { + if (dwarf_attr(die, DW_AT_MIPS_linkage_name, &attr_mem, &error) != + DW_DLV_OK) { + break; + } + } + + char *linkage; + if (dwarf_formstring(attr_mem, &linkage, &error) == DW_DLV_OK) { + trace.object_function = demangler.demangle(linkage); + dwarf_dealloc(dwarf, linkage, DW_DLA_STRING); + } + dwarf_dealloc(dwarf, attr_mem, DW_DLA_ATTR); + } + break; + + case DW_TAG_inlined_subroutine: + ResolvedTrace::SourceLoc sloc; + + if (dwarf_diename(die, &name, &error) == DW_DLV_OK) { + sloc.function = std::string(name); + dwarf_dealloc(dwarf, name, DW_DLA_STRING); + } + else { + // We don't have a name for this inlined DIE, it could + // be that there is an abstract origin instead. + // Get the DW_AT_abstract_origin value, which is a + // reference to the source DIE and try to get its name + sloc.function = + get_referenced_die_name(dwarf, die, DW_AT_abstract_origin, true); + } + + set_function_parameters(sloc.function, ns, fobj, die); + + std::string file = die_call_file(dwarf, die, cu_die); + if (!file.empty()) + sloc.filename = file; + + Dwarf_Unsigned number = 0; + if (dwarf_attr(die, DW_AT_call_line, &attr_mem, &error) == DW_DLV_OK) { + if (dwarf_formudata(attr_mem, &number, &error) == DW_DLV_OK) { + sloc.line = number; + } + dwarf_dealloc(dwarf, attr_mem, DW_DLA_ATTR); + } + + if (dwarf_attr(die, DW_AT_call_column, &attr_mem, &error) == DW_DLV_OK) { + if (dwarf_formudata(attr_mem, &number, &error) == DW_DLV_OK) { + sloc.col = number; + } + dwarf_dealloc(dwarf, attr_mem, DW_DLA_ATTR); + } + + trace.inliners.push_back(sloc); + break; + }; + } + ResolvedTrace &trace; + dwarf_fileobject &fobj; + Dwarf_Die cu_die; + inliners_search_cb(ResolvedTrace &t, dwarf_fileobject &f, Dwarf_Die c) + : trace(t), fobj(f), cu_die(c) {} + }; + + static Dwarf_Die find_fundie_by_pc(dwarf_fileobject &fobj, Dwarf_Die parent_die, Dwarf_Addr pc, + Dwarf_Die result) { + Dwarf_Die current_die = 0; + Dwarf_Error error = DW_DLE_NE; + Dwarf_Debug dwarf = fobj.dwarf_handle.get(); + + if (dwarf_child(parent_die, ¤t_die, &error) != DW_DLV_OK) { + return NULL; + } + + for (;;) { + Dwarf_Die sibling_die = 0; + Dwarf_Half tag_value; + dwarf_tag(current_die, &tag_value, &error); + + switch (tag_value) { + case DW_TAG_subprogram: + case DW_TAG_inlined_subroutine: + if (die_has_pc(fobj, current_die, pc)) { + return current_die; + } + }; + bool declaration = false; + Dwarf_Attribute attr_mem; + if (dwarf_attr(current_die, DW_AT_declaration, &attr_mem, &error) == DW_DLV_OK) { + Dwarf_Bool flag = 0; + if (dwarf_formflag(attr_mem, &flag, &error) == DW_DLV_OK) { + declaration = flag != 0; + } + dwarf_dealloc(dwarf, attr_mem, DW_DLA_ATTR); + } + + if (!declaration) { + // let's be curious and look deeper in the tree, functions are + // not necessarily at the first level, but might be nested + // inside a namespace, structure, a function, an inlined + // function etc. + Dwarf_Die die_mem = 0; + Dwarf_Die indie = find_fundie_by_pc(fobj, current_die, pc, die_mem); + if (indie) { + result = die_mem; + return result; + } + } + + int res = dwarf_siblingof(dwarf, current_die, &sibling_die, &error); + if (res == DW_DLV_ERROR) { + return NULL; + } + else if (res == DW_DLV_NO_ENTRY) { + break; + } + + if (current_die != parent_die) { + dwarf_dealloc(dwarf, current_die, DW_DLA_DIE); + current_die = 0; + } + + current_die = sibling_die; + } + return NULL; + } + + template + static bool deep_first_search_by_pc(dwarf_fileobject &fobj, Dwarf_Die parent_die, Dwarf_Addr pc, + std::vector &ns, CB cb) { + Dwarf_Die current_die = 0; + Dwarf_Debug dwarf = fobj.dwarf_handle.get(); + Dwarf_Error error = DW_DLE_NE; + + if (dwarf_child(parent_die, ¤t_die, &error) != DW_DLV_OK) { + return false; + } + + bool branch_has_pc = false; + bool has_namespace = false; + for (;;) { + Dwarf_Die sibling_die = 0; + + Dwarf_Half tag; + if (dwarf_tag(current_die, &tag, &error) == DW_DLV_OK) { + if (tag == DW_TAG_namespace || tag == DW_TAG_class_type) { + char *ns_name = NULL; + if (dwarf_diename(current_die, &ns_name, &error) == DW_DLV_OK) { + if (ns_name) { + ns.push_back(std::string(ns_name)); + } + else { + ns.push_back(""); + } + dwarf_dealloc(dwarf, ns_name, DW_DLA_STRING); + } + else { + ns.push_back(""); + } + has_namespace = true; + } + } + + bool declaration = false; + Dwarf_Attribute attr_mem; + if (tag != DW_TAG_class_type && + dwarf_attr(current_die, DW_AT_declaration, &attr_mem, &error) == DW_DLV_OK) { + Dwarf_Bool flag = 0; + if (dwarf_formflag(attr_mem, &flag, &error) == DW_DLV_OK) { + declaration = flag != 0; + } + dwarf_dealloc(dwarf, attr_mem, DW_DLA_ATTR); + } + + if (!declaration) { + // let's be curious and look deeper in the tree, function are + // not necessarily at the first level, but might be nested + // inside a namespace, structure, a function, an inlined + // function etc. + branch_has_pc = deep_first_search_by_pc(fobj, current_die, pc, ns, cb); + } + + if (!branch_has_pc) { + branch_has_pc = die_has_pc(fobj, current_die, pc); + } + + if (branch_has_pc) { + cb(current_die, ns); + } + + int result = dwarf_siblingof(dwarf, current_die, &sibling_die, &error); + if (result == DW_DLV_ERROR) { + return false; + } + else if (result == DW_DLV_NO_ENTRY) { + break; + } + + if (current_die != parent_die) { + dwarf_dealloc(dwarf, current_die, DW_DLA_DIE); + current_die = 0; + } + + if (has_namespace) { + has_namespace = false; + ns.pop_back(); + } + current_die = sibling_die; + } + + if (has_namespace) { + ns.pop_back(); + } + return branch_has_pc; + } + + static std::string die_call_file(Dwarf_Debug dwarf, Dwarf_Die die, Dwarf_Die cu_die) { + Dwarf_Attribute attr_mem; + Dwarf_Error error = DW_DLE_NE; + Dwarf_Unsigned file_index; + + std::string file; + + if (dwarf_attr(die, DW_AT_call_file, &attr_mem, &error) == DW_DLV_OK) { + if (dwarf_formudata(attr_mem, &file_index, &error) != DW_DLV_OK) { + file_index = 0; + } + dwarf_dealloc(dwarf, attr_mem, DW_DLA_ATTR); + + if (file_index == 0) { + return file; + } + + char **srcfiles = 0; + Dwarf_Signed file_count = 0; + if (dwarf_srcfiles(cu_die, &srcfiles, &file_count, &error) == DW_DLV_OK) { + if (file_count > 0 && file_index <= static_cast(file_count)) { + file = std::string(srcfiles[file_index - 1]); + } + + // Deallocate all strings! + for (int i = 0; i < file_count; ++i) { + dwarf_dealloc(dwarf, srcfiles[i], DW_DLA_STRING); + } + dwarf_dealloc(dwarf, srcfiles, DW_DLA_LIST); + } + } + return file; + } + + Dwarf_Die find_die(dwarf_fileobject &fobj, Dwarf_Addr addr) { + // Let's get to work! First see if we have a debug_aranges section so + // we can speed up the search + + Dwarf_Debug dwarf = fobj.dwarf_handle.get(); + Dwarf_Error error = DW_DLE_NE; + Dwarf_Arrange *aranges; + Dwarf_Signed arrange_count; + + Dwarf_Die returnDie; + bool found = false; + if (dwarf_get_aranges(dwarf, &aranges, &arrange_count, &error) != DW_DLV_OK) { + aranges = NULL; + } + + if (aranges) { + // We have aranges. Get the one where our address is. + Dwarf_Arrange arrange; + if (dwarf_get_arrange(aranges, arrange_count, addr, &arrange, &error) == DW_DLV_OK) { + // We found our address. Get the compilation-unit DIE offset + // represented by the given address range. + Dwarf_Off cu_die_offset; + if (dwarf_get_cu_die_offset(arrange, &cu_die_offset, &error) == DW_DLV_OK) { + // Get the DIE at the offset returned by the aranges search. + // We set is_info to 1 to specify that the offset is from + // the .debug_info section (and not .debug_types) + int dwarf_result = dwarf_offdie_b(dwarf, cu_die_offset, 1, &returnDie, &error); + + found = dwarf_result == DW_DLV_OK; + } + dwarf_dealloc(dwarf, arrange, DW_DLA_ARRANGE); + } + } + + if (found) + return returnDie; // The caller is responsible for freeing the die + + // The search for aranges failed. Try to find our address by scanning + // all compilation units. + Dwarf_Unsigned next_cu_header; + Dwarf_Half tag = 0; + returnDie = 0; + + while (!found && dwarf_next_cu_header_d(dwarf, 1, 0, 0, 0, 0, 0, 0, 0, 0, &next_cu_header, + 0, &error) == DW_DLV_OK) { + if (returnDie) + dwarf_dealloc(dwarf, returnDie, DW_DLA_DIE); + + if (dwarf_siblingof(dwarf, 0, &returnDie, &error) == DW_DLV_OK) { + if ((dwarf_tag(returnDie, &tag, &error) == DW_DLV_OK) && + tag == DW_TAG_compile_unit) { + if (die_has_pc(fobj, returnDie, addr)) { + found = true; + } + } + } + } + + if (found) { + while (dwarf_next_cu_header_d(dwarf, 1, 0, 0, 0, 0, 0, 0, 0, 0, &next_cu_header, 0, + &error) == DW_DLV_OK) { + // Reset the cu header state. Libdwarf's next_cu_header API + // keeps its own iterator per Dwarf_Debug that can't be reset. + // We need to keep fetching elements until the end. + } + } + + if (found) + return returnDie; + + // We couldn't find any compilation units with ranges or a high/low pc. + // Try again by looking at all DIEs in all compilation units. + Dwarf_Die cudie; + while (dwarf_next_cu_header_d(dwarf, 1, 0, 0, 0, 0, 0, 0, 0, 0, &next_cu_header, 0, + &error) == DW_DLV_OK) { + if (dwarf_siblingof(dwarf, 0, &cudie, &error) == DW_DLV_OK) { + Dwarf_Die die_mem = 0; + Dwarf_Die resultDie = find_fundie_by_pc(fobj, cudie, addr, die_mem); + + if (resultDie) { + found = true; + break; + } + } + } + + if (found) { + while (dwarf_next_cu_header_d(dwarf, 1, 0, 0, 0, 0, 0, 0, 0, 0, &next_cu_header, 0, + &error) == DW_DLV_OK) { + // Reset the cu header state. Libdwarf's next_cu_header API + // keeps its own iterator per Dwarf_Debug that can't be reset. + // We need to keep fetching elements until the end. + } + } + + if (found) + return cudie; + + // We failed. + return NULL; + } +}; +#endif // BACKWARD_HAS_DWARF == 1 + +template <> +class TraceResolverImpl + : public TraceResolverLinuxImpl {}; + +#endif // BACKWARD_SYSTEM_LINUX + +#ifdef BACKWARD_SYSTEM_DARWIN + +template +class TraceResolverDarwinImpl; + +template <> +class TraceResolverDarwinImpl : public TraceResolverImplBase { + public: + void load_addresses(void *const *addresses, int address_count) override { + if (address_count == 0) { + return; + } + _symbols.reset(backtrace_symbols(addresses, address_count)); + } + + ResolvedTrace resolve(ResolvedTrace trace) override { + // parse: + // + + char *filename = _symbols[trace.idx]; + + // skip " " + while (*filename && *filename != ' ') filename++; + while (*filename == ' ') filename++; + + // find start of from end ( may contain a space) + char *p = filename + strlen(filename) - 1; + // skip to start of " + " + while (p > filename && *p != ' ') p--; + while (p > filename && *p == ' ') p--; + while (p > filename && *p != ' ') p--; + while (p > filename && *p == ' ') p--; + char *funcname_end = p + 1; + + // skip to start of "" + while (p > filename && *p != ' ') p--; + char *funcname = p + 1; + + // skip to start of " " + while (p > filename && *p == ' ') p--; + while (p > filename && *p != ' ') p--; + while (p > filename && *p == ' ') p--; + + // skip "", handling the case where it contains a + char *filename_end = p + 1; + if (p == filename) { + // something went wrong, give up + filename_end = filename + strlen(filename); + funcname = filename_end; + } + trace.object_filename.assign(filename, + filename_end); // ok even if filename_end is the ending \0 + // (then we assign entire string) + + if (*funcname) { // if it's not end of string + *funcname_end = '\0'; + + trace.object_function = this->demangle(funcname); + trace.object_function += " "; + trace.object_function += (funcname_end + 1); + trace.source.function = trace.object_function; // we cannot do better. + } + return trace; + } + + private: + details::handle _symbols; +}; + +template <> +class TraceResolverImpl + : public TraceResolverDarwinImpl {}; + +#endif // BACKWARD_SYSTEM_DARWIN + +#ifdef BACKWARD_SYSTEM_WINDOWS + +// Load all symbol info +// Based on: +// https://stackoverflow.com/questions/6205981/windows-c-stack-trace-from-a-running-app/28276227#28276227 + +struct module_data { + std::string image_name; + std::string module_name; + void *base_address; + DWORD load_size; +}; + +class get_mod_info { + HANDLE process; + static const int buffer_length = 4096; + + public: + get_mod_info(HANDLE h) : process(h) {} + + module_data operator()(HMODULE module) { + module_data ret; + char temp[buffer_length]; + MODULEINFO mi; + + GetModuleInformation(process, module, &mi, sizeof(mi)); + ret.base_address = mi.lpBaseOfDll; + ret.load_size = mi.SizeOfImage; + + GetModuleFileNameExA(process, module, temp, sizeof(temp)); + ret.image_name = temp; + GetModuleBaseNameA(process, module, temp, sizeof(temp)); + ret.module_name = temp; + std::vector img(ret.image_name.begin(), ret.image_name.end()); + std::vector mod(ret.module_name.begin(), ret.module_name.end()); + SymLoadModule64(process, 0, &img[0], &mod[0], (DWORD64)ret.base_address, ret.load_size); + return ret; + } +}; + +template <> +class TraceResolverImpl : public TraceResolverImplBase { + public: + TraceResolverImpl() { + HANDLE process = GetCurrentProcess(); + + std::vector modules; + DWORD cbNeeded; + std::vector module_handles(1); + SymInitialize(process, NULL, false); + DWORD symOptions = SymGetOptions(); + symOptions |= SYMOPT_LOAD_LINES | SYMOPT_UNDNAME; + SymSetOptions(symOptions); + EnumProcessModules(process, &module_handles[0], + static_cast(module_handles.size() * sizeof(HMODULE)), &cbNeeded); + module_handles.resize(cbNeeded / sizeof(HMODULE)); + EnumProcessModules(process, &module_handles[0], + static_cast(module_handles.size() * sizeof(HMODULE)), &cbNeeded); + std::transform(module_handles.begin(), module_handles.end(), std::back_inserter(modules), + get_mod_info(process)); + void *base = modules[0].base_address; + IMAGE_NT_HEADERS *h = ImageNtHeader(base); + image_type = h->FileHeader.Machine; + } + + static const int max_sym_len = 255; + struct symbol_t { + SYMBOL_INFO sym; + char buffer[max_sym_len]; + } sym; + + DWORD64 displacement; + + ResolvedTrace resolve(ResolvedTrace t) override { + HANDLE process = GetCurrentProcess(); + + char name[256]; + + memset(&sym, 0, sizeof(sym)); + sym.sym.SizeOfStruct = sizeof(SYMBOL_INFO); + sym.sym.MaxNameLen = max_sym_len; + + if (!SymFromAddr(process, (ULONG64)t.addr, &displacement, &sym.sym)) { + // TODO: error handling everywhere + char *lpMsgBuf; + DWORD dw = GetLastError(); + + if (FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | + FORMAT_MESSAGE_IGNORE_INSERTS, + NULL, dw, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), + (char *)&lpMsgBuf, 0, NULL)) { + std::fprintf(stderr, "%s\n", lpMsgBuf); + LocalFree(lpMsgBuf); + } + + // abort(); + } + UnDecorateSymbolName(sym.sym.Name, (PSTR)name, 256, UNDNAME_COMPLETE); + + DWORD offset = 0; + IMAGEHLP_LINE line; + if (SymGetLineFromAddr(process, (ULONG64)t.addr, &offset, &line)) { + t.object_filename = line.FileName; + t.source.filename = line.FileName; + t.source.line = line.LineNumber; + t.source.col = offset; + } + + t.source.function = name; + t.object_filename = ""; + t.object_function = name; + + return t; + } + + DWORD machine_type() const { return image_type; } + + private: + DWORD image_type; +}; + +#endif + +class TraceResolver : public TraceResolverImpl {}; + +/*************** CODE SNIPPET ***************/ + +class SourceFile { + public: + typedef std::vector > lines_t; + + SourceFile() {} + SourceFile(const std::string &path) { + // 1. If BACKWARD_CXX_SOURCE_PREFIXES is set then assume it contains + // a colon-separated list of path prefixes. Try prepending each + // to the given path until a valid file is found. + const std::vector &prefixes = get_paths_from_env_variable(); + for (size_t i = 0; i < prefixes.size(); ++i) { + // Double slashes (//) should not be a problem. + std::string new_path = prefixes[i] + '/' + path; + _file.reset(new std::ifstream(new_path.c_str())); + if (is_open()) + break; + } + // 2. If no valid file found then fallback to opening the path as-is. + if (!_file || !is_open()) { + _file.reset(new std::ifstream(path.c_str())); + } + } + bool is_open() const { return _file->is_open(); } + + lines_t &get_lines(unsigned line_start, unsigned line_count, lines_t &lines) { + using namespace std; + // This function make uses of the dumbest algo ever: + // 1) seek(0) + // 2) read lines one by one and discard until line_start + // 3) read line one by one until line_start + line_count + // + // If you are getting snippets many time from the same file, it is + // somewhat a waste of CPU, feel free to benchmark and propose a + // better solution ;) + + _file->clear(); + _file->seekg(0); + string line; + unsigned line_idx; + + for (line_idx = 1; line_idx < line_start; ++line_idx) { + std::getline(*_file, line); + if (!*_file) { + return lines; + } + } + + // think of it like a lambda in C++98 ;) + // but look, I will reuse it two times! + // What a good boy am I. + struct isspace { + bool operator()(char c) { return std::isspace(c); } + }; + + bool started = false; + for (; line_idx < line_start + line_count; ++line_idx) { + getline(*_file, line); + if (!*_file) { + return lines; + } + if (!started) { + if (std::find_if(line.begin(), line.end(), not_isspace()) == line.end()) + continue; + started = true; + } + lines.push_back(make_pair(line_idx, line)); + } + + lines.erase(std::find_if(lines.rbegin(), lines.rend(), not_isempty()).base(), lines.end()); + return lines; + } + + lines_t get_lines(unsigned line_start, unsigned line_count) { + lines_t lines; + return get_lines(line_start, line_count, lines); + } + + // there is no find_if_not in C++98, lets do something crappy to + // workaround. + struct not_isspace { + bool operator()(char c) { return !std::isspace(c); } + }; + // and define this one here because C++98 is not happy with local defined + // struct passed to template functions, fuuuu. + struct not_isempty { + bool operator()(const lines_t::value_type &p) { + return !(std::find_if(p.second.begin(), p.second.end(), not_isspace()) == + p.second.end()); + } + }; + + void swap(SourceFile &b) { _file.swap(b._file); } + +#ifdef BACKWARD_ATLEAST_CXX11 + SourceFile(SourceFile &&from) : _file(nullptr) { swap(from); } + SourceFile &operator=(SourceFile &&from) { + swap(from); + return *this; + } +#else + explicit SourceFile(const SourceFile &from) { + // some sort of poor man's move semantic. + swap(const_cast(from)); + } + SourceFile &operator=(const SourceFile &from) { + // some sort of poor man's move semantic. + swap(const_cast(from)); + return *this; + } +#endif + + // Allow adding to paths gotten from BACKWARD_CXX_SOURCE_PREFIXES after loading the + // library; this can be useful when the library is loaded when the locations are unknown + // Warning: Because this edits the static paths variable, it is *not* intrinsiclly thread safe + static void add_paths_to_env_variable_impl(const std::string &to_add) { + get_mutable_paths_from_env_variable().push_back(to_add); + } + + private: + details::handle > _file; + + static std::vector get_paths_from_env_variable_impl() { + std::vector paths; + const char *prefixes_str = std::getenv("BACKWARD_CXX_SOURCE_PREFIXES"); + if (prefixes_str && prefixes_str[0]) { + paths = details::split_source_prefixes(prefixes_str); + } + return paths; + } + + static std::vector &get_mutable_paths_from_env_variable() { + static volatile std::vector paths = get_paths_from_env_variable_impl(); + return const_cast &>(paths); + } + + static const std::vector &get_paths_from_env_variable() { + return get_mutable_paths_from_env_variable(); + } + +#ifdef BACKWARD_ATLEAST_CXX11 + SourceFile(const SourceFile &) = delete; + SourceFile &operator=(const SourceFile &) = delete; +#endif +}; + +class SnippetFactory { + public: + typedef SourceFile::lines_t lines_t; + + lines_t get_snippet(const std::string &filename, unsigned line_start, unsigned context_size) { + SourceFile &src_file = get_src_file(filename); + unsigned start = line_start - context_size / 2; + return src_file.get_lines(start, context_size); + } + + lines_t get_combined_snippet(const std::string &filename_a, unsigned line_a, + const std::string &filename_b, unsigned line_b, + unsigned context_size) { + SourceFile &src_file_a = get_src_file(filename_a); + SourceFile &src_file_b = get_src_file(filename_b); + + lines_t lines = src_file_a.get_lines(line_a - context_size / 4, context_size / 2); + src_file_b.get_lines(line_b - context_size / 4, context_size / 2, lines); + return lines; + } + + lines_t get_coalesced_snippet(const std::string &filename, unsigned line_a, unsigned line_b, + unsigned context_size) { + SourceFile &src_file = get_src_file(filename); + + using std::max; + using std::min; + unsigned a = min(line_a, line_b); + unsigned b = max(line_a, line_b); + + if ((b - a) < (context_size / 3)) { + return src_file.get_lines((a + b - context_size + 1) / 2, context_size); + } + + lines_t lines = src_file.get_lines(a - context_size / 4, context_size / 2); + src_file.get_lines(b - context_size / 4, context_size / 2, lines); + return lines; + } + + private: + typedef details::hashtable::type src_files_t; + src_files_t _src_files; + + SourceFile &get_src_file(const std::string &filename) { + src_files_t::iterator it = _src_files.find(filename); + if (it != _src_files.end()) { + return it->second; + } + SourceFile &new_src_file = _src_files[filename]; + new_src_file = SourceFile(filename); + return new_src_file; + } +}; + +/*************** PRINTER ***************/ + +namespace ColorMode { +enum type { automatic, never, always }; +} + +class cfile_streambuf : public std::streambuf { + public: + cfile_streambuf(FILE *_sink) : sink(_sink) {} + int_type underflow() override { return traits_type::eof(); } + int_type overflow(int_type ch) override { + if (traits_type::not_eof(ch) && fputc(ch, sink) != EOF) { + return ch; + } + return traits_type::eof(); + } + + std::streamsize xsputn(const char_type *s, std::streamsize count) override { + return static_cast(fwrite(s, sizeof *s, static_cast(count), sink)); + } + +#ifdef BACKWARD_ATLEAST_CXX11 + public: + cfile_streambuf(const cfile_streambuf &) = delete; + cfile_streambuf &operator=(const cfile_streambuf &) = delete; +#else + private: + cfile_streambuf(const cfile_streambuf &); + cfile_streambuf &operator=(const cfile_streambuf &); +#endif + + private: + FILE *sink; + std::vector buffer; +}; + +#ifdef BACKWARD_SYSTEM_LINUX + +namespace Color { +enum type { yellow = 33, purple = 35, reset = 39 }; +} // namespace Color + +class Colorize { + public: + Colorize(std::ostream &os) : _os(os), _reset(false), _enabled(false) {} + + void activate(ColorMode::type mode) { _enabled = mode == ColorMode::always; } + + void activate(ColorMode::type mode, FILE *fp) { activate(mode, fileno(fp)); } + + void set_color(Color::type ccode) { + if (!_enabled) + return; + + // I assume that the terminal can handle basic colors. Seriously I + // don't want to deal with all the termcap shit. + _os << "\033[" << static_cast(ccode) << "m"; + _reset = (ccode != Color::reset); + } + + ~Colorize() { + if (_reset) { + set_color(Color::reset); + } + } + + private: + void activate(ColorMode::type mode, int fd) { + activate(mode == ColorMode::automatic && isatty(fd) ? ColorMode::always : mode); + } + + std::ostream &_os; + bool _reset; + bool _enabled; +}; + +#else // ndef BACKWARD_SYSTEM_LINUX + +namespace Color { +enum type { yellow = 0, purple = 0, reset = 0 }; +} // namespace Color + +class Colorize { + public: + Colorize(std::ostream &) {} + void activate(ColorMode::type) {} + void activate(ColorMode::type, FILE *) {} + void set_color(Color::type) {} +}; + +#endif // BACKWARD_SYSTEM_LINUX + +class Printer { + public: + bool snippet; + ColorMode::type color_mode; + bool address; + bool object; + int inliner_context_size; + int trace_context_size; + bool reverse; + + Printer() + : snippet(true), + color_mode(ColorMode::automatic), + address(false), + object(false), + inliner_context_size(5), + trace_context_size(7), + reverse(true) {} + + template + FILE *print(ST &st, FILE *fp = stderr) { + cfile_streambuf obuf(fp); + std::ostream os(&obuf); + Colorize colorize(os); + colorize.activate(color_mode, fp); + print_stacktrace(st, os, colorize); + return fp; + } + + template + std::ostream &print(ST &st, std::ostream &os) { + Colorize colorize(os); + colorize.activate(color_mode); + print_stacktrace(st, os, colorize); + return os; + } + + template + FILE *print(IT begin, IT end, FILE *fp = stderr, size_t thread_id = 0) { + cfile_streambuf obuf(fp); + std::ostream os(&obuf); + Colorize colorize(os); + colorize.activate(color_mode, fp); + print_stacktrace(begin, end, os, thread_id, colorize); + return fp; + } + + template + std::ostream &print(IT begin, IT end, std::ostream &os, size_t thread_id = 0) { + Colorize colorize(os); + colorize.activate(color_mode); + print_stacktrace(begin, end, os, thread_id, colorize); + return os; + } + + TraceResolver const &resolver() const { return _resolver; } + + private: + TraceResolver _resolver; + SnippetFactory _snippets; + + template + void print_stacktrace(ST &st, std::ostream &os, Colorize &colorize) { + print_header(os, st.thread_id()); + _resolver.load_stacktrace(st); + if (reverse) { + for (size_t trace_idx = st.size(); trace_idx > 0; --trace_idx) { + print_trace(os, _resolver.resolve(st[trace_idx - 1]), colorize); + } + } + else { + for (size_t trace_idx = 0; trace_idx < st.size(); ++trace_idx) { + print_trace(os, _resolver.resolve(st[trace_idx]), colorize); + } + } + } + + template + void print_stacktrace(IT begin, IT end, std::ostream &os, size_t thread_id, + Colorize &colorize) { + print_header(os, thread_id); + for (; begin != end; ++begin) { + print_trace(os, *begin, colorize); + } + } + + void print_header(std::ostream &os, size_t thread_id) { + os << "Stack trace (most recent call last)"; + if (thread_id) { + os << " in thread " << thread_id; + } + os << ":\n"; + } + + void print_trace(std::ostream &os, const ResolvedTrace &trace, Colorize &colorize) { + os << "#" << std::left << std::setw(2) << trace.idx << std::right; + bool already_indented = true; + + if (!trace.source.filename.size() || object) { + os << " Object \"" << trace.object_filename << "\", at " << trace.addr << ", in " + << trace.object_function << "\n"; + already_indented = false; + } + + for (size_t inliner_idx = trace.inliners.size(); inliner_idx > 0; --inliner_idx) { + if (!already_indented) { + os << " "; + } + const ResolvedTrace::SourceLoc &inliner_loc = trace.inliners[inliner_idx - 1]; + print_source_loc(os, " | ", inliner_loc); + if (snippet) { + print_snippet(os, " | ", inliner_loc, colorize, Color::purple, + inliner_context_size); + } + already_indented = false; + } + + if (trace.source.filename.size()) { + if (!already_indented) { + os << " "; + } + print_source_loc(os, " ", trace.source, trace.addr); + if (snippet) { + print_snippet(os, " ", trace.source, colorize, Color::yellow, + trace_context_size); + } + } + } + + void print_snippet(std::ostream &os, const char *indent, + const ResolvedTrace::SourceLoc &source_loc, Colorize &colorize, + Color::type color_code, int context_size) { + using namespace std; + typedef SnippetFactory::lines_t lines_t; + + lines_t lines = _snippets.get_snippet(source_loc.filename, source_loc.line, + static_cast(context_size)); + + for (lines_t::const_iterator it = lines.begin(); it != lines.end(); ++it) { + if (it->first == source_loc.line) { + colorize.set_color(color_code); + os << indent << ">"; + } + else { + os << indent << " "; + } + os << std::setw(4) << it->first << ": " << it->second << "\n"; + if (it->first == source_loc.line) { + colorize.set_color(Color::reset); + } + } + } + + void print_source_loc(std::ostream &os, const char *indent, + const ResolvedTrace::SourceLoc &source_loc, void *addr = nullptr) { + os << indent << "Source \"" << source_loc.filename << "\", line " << source_loc.line + << ", in " << source_loc.function; + + if (address && addr != nullptr) { + os << " [" << addr << "]"; + } + os << "\n"; + } +}; + +/*************** SIGNALS HANDLING ***************/ + +#if defined(BACKWARD_SYSTEM_LINUX) || defined(BACKWARD_SYSTEM_DARWIN) + +class SignalHandling { + public: + static std::vector make_default_signals() { + const int posix_signals[] = { + // Signals for which the default action is "Core". + SIGABRT, // Abort signal from abort(3) + SIGBUS, // Bus error (bad memory access) + SIGFPE, // Floating point exception + SIGILL, // Illegal Instruction + SIGIOT, // IOT trap. A synonym for SIGABRT + SIGQUIT, // Quit from keyboard + SIGSEGV, // Invalid memory reference + SIGSYS, // Bad argument to routine (SVr4) + SIGTRAP, // Trace/breakpoint trap + SIGXCPU, // CPU time limit exceeded (4.2BSD) + SIGXFSZ, // File size limit exceeded (4.2BSD) +#if defined(BACKWARD_SYSTEM_DARWIN) + SIGEMT, // emulation instruction executed +#endif + }; + return std::vector(posix_signals, + posix_signals + sizeof posix_signals / sizeof posix_signals[0]); + } + + SignalHandling(const std::vector &posix_signals = make_default_signals()) + : _loaded(false) { + bool success = true; + + const size_t stack_size = 1024 * 1024 * 8; + _stack_content.reset(static_cast(malloc(stack_size))); + if (_stack_content) { + stack_t ss; + ss.ss_sp = _stack_content.get(); + ss.ss_size = stack_size; + ss.ss_flags = 0; + if (sigaltstack(&ss, nullptr) < 0) { + success = false; + } + } + else { + success = false; + } + + for (size_t i = 0; i < posix_signals.size(); ++i) { + struct sigaction action; + memset(&action, 0, sizeof action); + action.sa_flags = static_cast(SA_SIGINFO | SA_ONSTACK | SA_NODEFER | SA_RESETHAND); + sigfillset(&action.sa_mask); + sigdelset(&action.sa_mask, posix_signals[i]); +#if defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wdisabled-macro-expansion" +#endif + action.sa_sigaction = &sig_handler; +#if defined(__clang__) +#pragma clang diagnostic pop +#endif + + int r = sigaction(posix_signals[i], &action, nullptr); + if (r < 0) + success = false; + } + + _loaded = success; + } + + bool loaded() const { return _loaded; } + + static void handleSignal(int, siginfo_t *info, void *_ctx) { + ucontext_t *uctx = static_cast(_ctx); + + StackTrace st; + void *error_addr = nullptr; +#ifdef REG_RIP // x86_64 + error_addr = reinterpret_cast(uctx->uc_mcontext.gregs[REG_RIP]); +#elif defined(REG_EIP) // x86_32 + error_addr = reinterpret_cast(uctx->uc_mcontext.gregs[REG_EIP]); +#elif defined(__arm__) + error_addr = reinterpret_cast(uctx->uc_mcontext.arm_pc); +#elif defined(__aarch64__) +#if defined(__APPLE__) + error_addr = reinterpret_cast(uctx->uc_mcontext->__ss.__pc); +#else + error_addr = reinterpret_cast(uctx->uc_mcontext.pc); +#endif +#elif defined(__mips__) + error_addr = reinterpret_cast( + reinterpret_cast(&uctx->uc_mcontext)->sc_pc); +#elif defined(__APPLE__) && defined(__POWERPC__) + error_addr = reinterpret_cast(uctx->uc_mcontext->__ss.__srr0); +#elif defined(__ppc__) || defined(__powerpc) || defined(__powerpc__) || defined(__POWERPC__) + error_addr = reinterpret_cast(uctx->uc_mcontext.regs->nip); +#elif defined(__riscv) + error_addr = reinterpret_cast(uctx->uc_mcontext.__gregs[REG_PC]); +#elif defined(__s390x__) + error_addr = reinterpret_cast(uctx->uc_mcontext.psw.addr); +#elif defined(__APPLE__) && defined(__x86_64__) + error_addr = reinterpret_cast(uctx->uc_mcontext->__ss.__rip); +#elif defined(__APPLE__) + error_addr = reinterpret_cast(uctx->uc_mcontext->__ss.__eip); +#elif defined(__loongarch__) + error_addr = reinterpret_cast(uctx->uc_mcontext.__pc); +#else +#warning ":/ sorry, ain't know no nothing none not of your architecture!" +#endif + if (error_addr) { + st.load_from(error_addr, 32, reinterpret_cast(uctx), info->si_addr); + } + else { + st.load_here(32, reinterpret_cast(uctx), info->si_addr); + } + + Printer printer; + printer.address = true; + printer.print(st, stderr); + +#if (defined(_XOPEN_SOURCE) && _XOPEN_SOURCE >= 700) || \ + (defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE >= 200809L) + psiginfo(info, nullptr); +#else + (void)info; +#endif + } + + private: + details::handle _stack_content; + bool _loaded; + +#ifdef __GNUC__ + __attribute__((noreturn)) +#endif + static void + sig_handler(int signo, siginfo_t *info, void *_ctx) { + handleSignal(signo, info, _ctx); + + // try to forward the signal. + raise(info->si_signo); + + // terminate the process immediately. + puts("watf? exit"); + _exit(EXIT_FAILURE); + } +}; + +#endif // BACKWARD_SYSTEM_LINUX || BACKWARD_SYSTEM_DARWIN + +#ifdef BACKWARD_SYSTEM_WINDOWS + +class SignalHandling { + public: + SignalHandling(const std::vector & = std::vector()) + : reporter_thread_([]() { + /* We handle crashes in a utility thread: + backward structures and some Windows functions called here + need stack space, which we do not have when we encounter a + stack overflow. + To support reporting stack traces during a stack overflow, + we create a utility thread at startup, which waits until a + crash happens or the program exits normally. */ + + { + std::unique_lock lk(mtx()); + cv().wait(lk, [] { return crashed() != crash_status::running; }); + } + if (crashed() == crash_status::crashed) { + handle_stacktrace(skip_recs()); + } + { + std::unique_lock lk(mtx()); + crashed() = crash_status::ending; + } + cv().notify_one(); + }) { + SetUnhandledExceptionFilter(crash_handler); + + signal(SIGABRT, signal_handler); + _set_abort_behavior(0, _WRITE_ABORT_MSG | _CALL_REPORTFAULT); + + std::set_terminate(&terminator); +#ifndef BACKWARD_ATLEAST_CXX17 + std::set_unexpected(&terminator); +#endif + _set_purecall_handler(&terminator); + _set_invalid_parameter_handler(&invalid_parameter_handler); + } + bool loaded() const { return true; } + + ~SignalHandling() { + { + std::unique_lock lk(mtx()); + crashed() = crash_status::normal_exit; + } + + cv().notify_one(); + + reporter_thread_.join(); + } + + private: + static CONTEXT *ctx() { + static CONTEXT data; + return &data; + } + + enum class crash_status { running, crashed, normal_exit, ending }; + + static crash_status &crashed() { + static crash_status data; + return data; + } + + static std::mutex &mtx() { + static std::mutex data; + return data; + } + + static std::condition_variable &cv() { + static std::condition_variable data; + return data; + } + + static HANDLE &thread_handle() { + static HANDLE handle; + return handle; + } + + std::thread reporter_thread_; + + // TODO: how not to hardcode these? + static const constexpr int signal_skip_recs = +#ifdef __clang__ + // With clang, RtlCaptureContext also captures the stack frame of the + // current function Below that, there are 3 internal Windows functions + 4 +#else + // With MSVC cl, RtlCaptureContext misses the stack frame of the current + // function The first entries during StackWalk are the 3 internal Windows + // functions + 3 +#endif + ; + + static int &skip_recs() { + static int data; + return data; + } + + static inline void terminator() { + crash_handler(signal_skip_recs); + abort(); + } + + static inline void signal_handler(int) { + crash_handler(signal_skip_recs); + abort(); + } + + static inline void __cdecl invalid_parameter_handler(const wchar_t *, const wchar_t *, + const wchar_t *, unsigned int, uintptr_t) { + crash_handler(signal_skip_recs); + abort(); + } + + NOINLINE static LONG WINAPI crash_handler(EXCEPTION_POINTERS *info) { + // The exception info supplies a trace from exactly where the issue was, + // no need to skip records + crash_handler(0, info->ContextRecord); + return EXCEPTION_CONTINUE_SEARCH; + } + + NOINLINE static void crash_handler(int skip, CONTEXT *ct = nullptr) { + if (ct == nullptr) { + RtlCaptureContext(ctx()); + } + else { + memcpy(ctx(), ct, sizeof(CONTEXT)); + } + DuplicateHandle(GetCurrentProcess(), GetCurrentThread(), GetCurrentProcess(), + &thread_handle(), 0, FALSE, DUPLICATE_SAME_ACCESS); + + skip_recs() = skip; + + { + std::unique_lock lk(mtx()); + crashed() = crash_status::crashed; + } + + cv().notify_one(); + + { + std::unique_lock lk(mtx()); + cv().wait(lk, [] { return crashed() != crash_status::crashed; }); + } + } + + static void handle_stacktrace(int skip_frames = 0) { + // printer creates the TraceResolver, which can supply us a machine type + // for stack walking. Without this, StackTrace can only guess using some + // macros. + // StackTrace also requires that the PDBs are already loaded, which is done + // in the constructor of TraceResolver + Printer printer; + + StackTrace st; + st.set_machine_type(printer.resolver().machine_type()); + st.set_thread_handle(thread_handle()); + st.load_here(32 + skip_frames, ctx()); + st.skip_n_firsts(skip_frames); + + printer.address = true; + printer.print(st, std::cerr); + } +}; + +#endif // BACKWARD_SYSTEM_WINDOWS + +#ifdef BACKWARD_SYSTEM_UNKNOWN + +class SignalHandling { + public: + SignalHandling(const std::vector & = std::vector()) {} + bool init() { return false; } + bool loaded() { return false; } +}; + +#endif // BACKWARD_SYSTEM_UNKNOWN + +} // namespace backward + +#endif /* H_GUARD */ diff --git a/src/ibv_helper.cpp b/src/ibv_helper.cpp index 82498c7..6c9f1e7 100644 --- a/src/ibv_helper.cpp +++ b/src/ibv_helper.cpp @@ -21,7 +21,7 @@ int ibv_read_sysfs_file(const char *dir, const char *file, char *buf, size_t siz if (len > 0) { if (buf[len - 1] == '\n') buf[--len] = '\0'; - else if (len < size) + else if ((size_t)len < size) buf[len] = '\0'; else /* We would have to truncate the contents to NULL diff --git a/src/infinistore.cpp b/src/infinistore.cpp index d5cf5a3..edea975 100644 --- a/src/infinistore.cpp +++ b/src/infinistore.cpp @@ -12,6 +12,7 @@ #include #include +#include #include #include #include @@ -22,6 +23,7 @@ #include "ibv_helper.h" #include "protocol.h" #include "rdma.h" +#include "utils.h" server_config_t global_config; @@ -40,6 +42,15 @@ bool extend_in_flight = false; std::list> lru_queue; std::unordered_map> kv_map; +struct InflightOP { + boost::intrusive_ptr ptr; + uint64_t remote_addr; + uint32_t remote_rkey; + InflightOP(boost::intrusive_ptr ptr, uint64_t remote_addr, uint32_t remote_rkey) + : ptr(std::move(ptr)), remote_addr(remote_addr), remote_rkey(remote_rkey) {} + InflightOP(const InflightOP &) = default; +}; + typedef enum { READ_HEADER, READ_BODY, @@ -66,8 +77,8 @@ struct Client { struct ibv_mr *recv_mr_[MAX_RECV_WR] = {}; // RDMA send buffer - char *send_buffer_ = NULL; - struct ibv_mr *send_mr_ = NULL; + boost::circular_buffer send_buffers_{MAX_RECV_WR}; + int outstanding_rdma_ops_ = 0; std::deque> outstanding_rdma_ops_queue_; @@ -84,6 +95,21 @@ struct Client { uv_poll_t poll_handle_; + struct RdmaCompletionInfo { + int ret_code; + unsigned int payload_[8]; // 8 unsigned int for payload + std::unique_ptr> inflight_rdma_ops; + RdmaCompletionInfo(int ret_code, const unsigned int *payload, + std::unique_ptr> inflight_rdma_ops) + : ret_code(ret_code), inflight_rdma_ops(std::move(inflight_rdma_ops)) { + if (payload) { + memcpy(payload_, payload, sizeof(unsigned int) * 8); + } + else { + } + } + }; + Client() = default; Client(const Client &) = delete; ~Client(); @@ -91,7 +117,7 @@ struct Client { void cq_poll_handle(uv_poll_t *handle, int status, int events); int read_rdma_cache(const RemoteMetaRequest *req); int write_rdma_cache(const RemoteMetaRequest *req); - void post_ack(int return_code); + void post_ack(int return_code, unsigned int payload[8]); int allocate_rdma(const RemoteMetaRequest *req); // send response to client through TCP void send_resp(int return_code, void *buf, size_t size); @@ -103,9 +129,9 @@ struct Client { int delete_keys(const DeleteKeysRequest *request); int rdma_exchange(); int prepare_recv_rdma_request(int buf_idx); - void perform_batch_rdma(const RemoteMetaRequest *remote_meta_req, - std::vector> *inflight_rdma_ops, - enum ibv_wr_opcode opcode); + void perform_batch_rdma(std::unique_ptr> inflight_rdma_ops, + enum ibv_wr_opcode opcode, const int ret_code, + const unsigned int *ret_payload); }; typedef struct Client client_t; @@ -122,14 +148,8 @@ Client::~Client() { handle_ = NULL; } - if (send_mr_) { - ibv_dereg_mr(send_mr_); - send_mr_ = NULL; - } - - if (send_buffer_) { - free(send_buffer_); - send_buffer_ = NULL; + for (auto send_buffer : send_buffers_) { + delete send_buffer; } for (int i = 0; i < MAX_RECV_WR; i++) { @@ -296,16 +316,32 @@ int Client::tcp_payload_request(const TCPPayloadRequest *req) { return 0; } -void Client::post_ack(int return_code) { +void Client::post_ack(int return_code, unsigned int payload[8]) { // send an error code back - struct ibv_send_wr wr = {0}; + struct ibv_send_wr wr {}; struct ibv_send_wr *bad_wr = NULL; - wr.wr_id = 0; wr.opcode = IBV_WR_SEND_WITH_IMM; wr.imm_data = return_code; - wr.send_flags = 0; - wr.sg_list = NULL; - wr.num_sge = 0; + wr.wr_id = 0; + struct ibv_sge sge {}; + if (payload != nullptr) { + assert(!send_buffers_.empty()); + auto *buffer = send_buffers_.front(); + send_buffers_.pop_front(); + memcpy(buffer->buffer_, payload, sizeof(unsigned int) * 8); + + sge.addr = (uintptr_t)buffer->buffer_; + sge.length = sizeof(unsigned int) * 8; // 8 unsigned int + sge.lkey = buffer->mr_->lkey; + wr.sg_list = &sge; + wr.num_sge = 1; + wr.wr_id = (uintptr_t)buffer; // use buffer as wr_id to track it + } + else { + wr.sg_list = NULL; + wr.num_sge = 0; + } + wr.send_flags = IBV_SEND_SIGNALED; wr.next = NULL; int ret = ibv_post_send(rdma_ctx_.qp, &wr, &bad_wr); if (ret) { @@ -332,7 +368,7 @@ void Client::cq_poll_handle(uv_poll_t *handle, int status, int events) { ERROR("Failed to request CQ notification"); return; } - struct ibv_wc wc = {0}; + struct ibv_wc wc = {}; while (ibv_poll_cq(cq, 1, &wc) > 0) { if (wc.status == IBV_WC_SUCCESS) { if (wc.opcode == IBV_WC_RECV) { // recv RDMA read/write request @@ -344,14 +380,14 @@ void Client::cq_poll_handle(uv_poll_t *handle, int status, int events) { case OP_RDMA_WRITE: { int ret = write_rdma_cache(request); if (ret != 0) { - post_ack(ret); + post_ack(ret, nullptr); } break; } case OP_RDMA_READ: { int ret = read_rdma_cache(request); if (ret != 0) { - post_ack(ret); + post_ack(ret, nullptr); } break; } @@ -366,17 +402,11 @@ void Client::cq_poll_handle(uv_poll_t *handle, int status, int events) { return; } } - else if (wc.opcode == IBV_WC_SEND) { // allocate: response sent - DEBUG("allocate response sent"); - } - else if (wc.opcode == - IBV_WC_RECV_RDMA_WITH_IMM) { // write cache: we already have all data now. - // client should not use WRITE_WITH_IMM to notify. - // it should use COMMIT message to notify. - WARN("WRITE_WITH_IMM is not supported in server side"); - if (prepare_recv_rdma_request(wc.wr_id) < 0) { - ERROR("Failed to prepare recv rdma request"); - return; + else if (wc.opcode == IBV_WC_SEND) { + if (wc.wr_id != 0) { + assert(!send_buffers_.full()); + auto *buffer = (Buffer *)wc.wr_id; + send_buffers_.push_back(buffer); } } else if (wc.opcode == IBV_WC_RDMA_WRITE || wc.opcode == IBV_WC_RDMA_READ) { @@ -390,7 +420,6 @@ void Client::cq_poll_handle(uv_poll_t *handle, int status, int events) { struct ibv_send_wr *wrs = item.first; struct ibv_sge *sges = item.second; ibv_send_wr *bad_wr = nullptr; - DEBUG("IBV POST SEND, wr_id: {}", wrs[0].wr_id); int ret = ibv_post_send(rdma_ctx_.qp, &wrs[0], &bad_wr); if (ret) { ERROR("Failed to post RDMA write {}", strerror(ret)); @@ -402,26 +431,26 @@ void Client::cq_poll_handle(uv_poll_t *handle, int status, int events) { outstanding_rdma_ops_queue_.pop_front(); } + auto *complete_info = (RdmaCompletionInfo *)wc.wr_id; if (wc.wr_id > 0) { // last WR will inform that all RDMA write is finished,so we can dereference PTR if (wc.opcode == IBV_WC_RDMA_READ) { - auto inflight_rdma_writes = - (std::vector> *)wc.wr_id; - for (auto ptr : *inflight_rdma_writes) { - kv_map[ptr->key] = ptr; - DEBUG("writing key done, {}", ptr->key); - lru_queue.push_back(ptr); - ptr->lru_it = --lru_queue.end(); + for (auto inflight_op : *complete_info->inflight_rdma_ops) { + kv_map[inflight_op.ptr->key] = inflight_op.ptr; + DEBUG("writing key done, {}", inflight_op.ptr->key); + lru_queue.push_back(inflight_op.ptr); + inflight_op.ptr->lru_it = --lru_queue.end(); } - delete inflight_rdma_writes; - post_ack(FINISH); + post_ack(complete_info->ret_code, nullptr); } else if (wc.opcode == IBV_WC_RDMA_WRITE) { - post_ack(FINISH); - auto inflight_rdma_reads = - (std::vector> *)wc.wr_id; - delete inflight_rdma_reads; + assert(complete_info->payload_ != nullptr); + post_ack(complete_info->ret_code, complete_info->payload_); } + else { + ERROR("Unexpected wc opcode: {}", (int)wc.opcode); + } + delete complete_info; } } else { @@ -452,8 +481,8 @@ void extend_mempool() { } int Client::prepare_recv_rdma_request(int buf_idx) { - struct ibv_sge sge = {0}; - struct ibv_recv_wr rwr = {0}; + struct ibv_sge sge {}; + struct ibv_recv_wr rwr {}; struct ibv_recv_wr *bad_wr = NULL; sge.addr = (uintptr_t)(recv_buffer_[buf_idx]); sge.length = PROTOCOL_BUFFER_SIZE; @@ -470,9 +499,10 @@ int Client::prepare_recv_rdma_request(int buf_idx) { return 0; } -void Client::perform_batch_rdma(const RemoteMetaRequest *remote_meta_req, - std::vector> *inflight_rdma_ops, - enum ibv_wr_opcode opcode) { +void Client::perform_batch_rdma(std::unique_ptr> inflight_rdma_ops, + enum ibv_wr_opcode opcode, const int ret_code, + const unsigned int *ret_payload // unsigned int[8] +) { assert(opcode == IBV_WR_RDMA_READ || opcode == IBV_WR_RDMA_WRITE); const size_t max_wr = MAX_WR_BATCH; @@ -491,36 +521,35 @@ void Client::perform_batch_rdma(const RemoteMetaRequest *remote_meta_req, sges = new struct ibv_sge[max_wr]; } - int n = remote_meta_req->keys()->size(); + int n = inflight_rdma_ops->size(); + for (int i = 0; i < n; i++) { - sges[num_wr].addr = (uintptr_t)(*inflight_rdma_ops)[i]->ptr; - sges[num_wr].length = (*inflight_rdma_ops)[i]->size; - sges[num_wr].lkey = mm->get_lkey((*inflight_rdma_ops)[i]->pool_idx); + sges[num_wr].addr = (uintptr_t)(*inflight_rdma_ops)[i].ptr->ptr; + sges[num_wr].length = (*inflight_rdma_ops)[i].ptr->size; + sges[num_wr].lkey = mm->get_lkey((*inflight_rdma_ops)[i].ptr->pool_idx); wrs[num_wr].wr_id = 0; wrs[num_wr].opcode = opcode; wrs[num_wr].sg_list = &sges[num_wr]; wrs[num_wr].num_sge = 1; - wrs[num_wr].wr.rdma.remote_addr = remote_meta_req->remote_addrs()->Get(i); - wrs[num_wr].wr.rdma.rkey = remote_meta_req->rkey(); - // wrs[num_wr].wr.rdma.rkey = remote_meta_req->rkey(); - wrs[num_wr].next = (num_wr == max_wr - 1 || i == (int)remote_meta_req->keys()->size() - 1) - ? nullptr - : &wrs[num_wr + 1]; + wrs[num_wr].wr.rdma.remote_addr = (*inflight_rdma_ops)[i].remote_addr; + wrs[num_wr].wr.rdma.rkey = (*inflight_rdma_ops)[i].remote_rkey; - wrs[num_wr].send_flags = - (num_wr == max_wr - 1 || i == (int)remote_meta_req->keys()->size() - 1) - ? IBV_SEND_SIGNALED - : 0; + wrs[num_wr].next = (num_wr == max_wr - 1 || i == n - 1) ? nullptr : &wrs[num_wr + 1]; - if (i == remote_meta_req->keys()->size() - 1) { - wrs[num_wr].wr_id = (uintptr_t)inflight_rdma_ops; + wrs[num_wr].send_flags = (num_wr == max_wr - 1 || i == n - 1) ? IBV_SEND_SIGNALED : 0; + + if (i == n - 1) { + // last WR will inform that all RDMA write is finished, so we can dereference PTR + auto *completion_info = + new RdmaCompletionInfo(ret_code, ret_payload, std::move(inflight_rdma_ops)); + wrs[num_wr].wr_id = (uintptr_t)completion_info; } num_wr++; - if (num_wr == max_wr || i == remote_meta_req->keys()->size() - 1) { + if (num_wr == max_wr || i == n - 1) { if (!wr_full) { struct ibv_send_wr *bad_wr = nullptr; int ret = ibv_post_send(rdma_ctx_.qp, &wrs[0], &bad_wr); @@ -563,36 +592,49 @@ int Client::write_rdma_cache(const RemoteMetaRequest *remote_meta_req) { return INVALID_REQ; } - // allocate memory - int block_size = remote_meta_req->block_size(); - int n = remote_meta_req->keys()->size(); + if (remote_meta_req->rkey()->size() != remote_meta_req->remote_addrs()->size()) { + ERROR("keys size and block_size size mismatch"); + return INVALID_REQ; + } + + int n = remote_meta_req->remote_addrs()->size(); // create something. - auto *inflight_rdma_writes = new std::vector>; + auto inflight_rdma_writes = std::make_unique>(); inflight_rdma_writes->reserve(n); evict_cache(ON_DEMAND_MIN_THRESHOLD, ON_DEMAND_MAX_THRESHOLD); + bool allocated = false; + int key_idx = 0; - bool allocated = - mm->allocate(block_size, n, [&](void *addr, uint32_t lkey, uint32_t rkey, int pool_idx) { - const auto *key = remote_meta_req->keys()->Get(key_idx); - auto ptr = boost::intrusive_ptr(new PTR(addr, block_size, pool_idx, key->str())); - DEBUG("writing key: {}", key->str()); - inflight_rdma_writes->push_back(ptr); - key_idx++; - }); + for (const auto *key : *remote_meta_req->keys()) { + int block_size = remote_meta_req->block_size()->Get(key_idx); + allocated = mm->allocate( + block_size, 1, [&](void *addr, uint32_t lkey, uint32_t rkey, int pool_idx) { + auto ptr = + boost::intrusive_ptr(new PTR(addr, block_size, pool_idx, key->str())); + DEBUG("writing key: {}", key->str()); + inflight_rdma_writes->push_back( + InflightOP(ptr, remote_meta_req->remote_addrs()->Get(key_idx), + remote_meta_req->rkey()->Get(key_idx))); + }); + if (!allocated) { + ERROR("Failed to allocate memory"); + return OUT_OF_MEMORY; + } + key_idx++; + } if (!allocated) { ERROR("Failed to allocate memory"); - delete inflight_rdma_writes; return OUT_OF_MEMORY; } // perform rdma read to receive data from client // read remote address data to local address - perform_batch_rdma(remote_meta_req, inflight_rdma_writes, IBV_WR_RDMA_READ); + perform_batch_rdma(std::move(inflight_rdma_writes), IBV_WR_RDMA_READ, FINISH, nullptr); return 0; } @@ -605,36 +647,50 @@ int Client::read_rdma_cache(const RemoteMetaRequest *remote_meta_req) { return INVALID_REQ; } - auto *inflight_rdma_reads = new std::vector>; + auto inflight_rdma_reads = std::make_unique>(); - inflight_rdma_reads->reserve(remote_meta_req->keys()->size()); + unsigned int ret_payload[8] = {0}; - for (const auto *key : *remote_meta_req->keys()) { - auto it = kv_map.find(key->str()); + for (size_t i = 0; i < remote_meta_req->keys()->size(); i++) { + auto it = kv_map.find(remote_meta_req->keys()->Get(i)->str()); if (it == kv_map.end()) { - WARN("Key not found: {}", key->str()); - return KEY_NOT_FOUND; + WARN("Key not found: {}", remote_meta_req->keys()->Get(i)->str()); + ret_payload[i / 8] |= (1 << (i % 8)); // set the bit for this key not found + continue; } const auto &ptr = it->second; - - if (ptr->size > remote_meta_req->block_size()) { + if (ptr->size > (size_t)(remote_meta_req->block_size()->Get(i))) { WARN("remote region does not enough size: key:{}, actual size: {}, remote size :{}", - key->str(), ptr->size, remote_meta_req->block_size()); - return INVALID_REQ; + remote_meta_req->keys()->Get(i)->str(), ptr->size, + remote_meta_req->block_size()->Get(i)); + ret_payload[i / 8] |= (1 << (i % 8)); // set the bit for this key not found + continue; } - inflight_rdma_reads->push_back(ptr); + inflight_rdma_reads->push_back(InflightOP(ptr, remote_meta_req->remote_addrs()->Get(i), + remote_meta_req->rkey()->Get(i))); } // loop over inflight_rdma_reads to update lru_queue - for (auto ptr : *inflight_rdma_reads) { - lru_queue.erase(ptr->lru_it); - lru_queue.push_back(ptr); - ptr->lru_it = --lru_queue.end(); + for (auto inflight_op : *inflight_rdma_reads) { + lru_queue.erase(inflight_op.ptr->lru_it); + lru_queue.push_back(inflight_op.ptr); + inflight_op.ptr->lru_it = --lru_queue.end(); } - // write to remote address data from local address - perform_batch_rdma(remote_meta_req, inflight_rdma_reads, IBV_WR_RDMA_WRITE); + int ret_code = FINISH; + if (inflight_rdma_reads->empty()) { + // no keys to read, just return + INFO("No keys to read, return FINISH"); + return KEY_NOT_FOUND; + } + if (inflight_rdma_reads->size() < remote_meta_req->keys()->size()) { + // some keys not found, set ret_code to PARTIAL_SUCCESS + ret_code = PARTIAL_SUCCESS; + } + + // write to remote address data from local address + perform_batch_rdma(std::move(inflight_rdma_reads), IBV_WR_RDMA_WRITE, ret_code, ret_payload); return 0; } @@ -672,8 +728,6 @@ void on_write(uv_write_t *req, int status) { int Client::rdma_exchange() { INFO("do rdma exchange..."); - int ret; - if (rdma_connected_ == true) { ERROR("RDMA already connected"); return SYSTEM_ERROR; @@ -703,15 +757,8 @@ int Client::rdma_exchange() { rdma_connected_ = true; - if (posix_memalign((void **)&send_buffer_, 4096, PROTOCOL_BUFFER_SIZE) != 0) { - ERROR("Failed to allocate send buffer"); - return SYSTEM_ERROR; - } - - send_mr_ = ibv_reg_mr(rdma_dev.pd, send_buffer_, PROTOCOL_BUFFER_SIZE, IBV_ACCESS_LOCAL_WRITE); - if (!send_mr_) { - ERROR("Failed to register MR"); - return SYSTEM_ERROR; + for (int i = 0; i < MAX_RECV_WR; i++) { + send_buffers_.push_back(new Buffer(rdma_dev.pd, 32)); } for (int i = 0; i < MAX_RECV_WR; i++) { @@ -941,8 +988,8 @@ void on_read(uv_stream_t *stream, ssize_t nread, const uv_buf_t *buf) { } case READ_VALUE_THROUGH_TCP: { size_t to_copy = MIN(nread - offset, client->expected_bytes_ - client->bytes_read_); - memcpy(client->current_tcp_task_->ptr + client->bytes_read_, buf->base + offset, - to_copy); + memcpy(static_cast(client->current_tcp_task_->ptr) + client->bytes_read_, + buf->base + offset, to_copy); client->bytes_read_ += to_copy; offset += to_copy; if (client->bytes_read_ == client->expected_bytes_) { diff --git a/src/libinfinistore.cpp b/src/libinfinistore.cpp index 3f5f79d..474021c 100644 --- a/src/libinfinistore.cpp +++ b/src/libinfinistore.cpp @@ -18,28 +18,6 @@ #include "rdma.h" #include "utils.h" -SendBuffer::SendBuffer(struct ibv_pd *pd, size_t size) { - if (posix_memalign(&buffer_, 4096, PROTOCOL_BUFFER_SIZE) != 0) { - assert(false); - } - mr_ = ibv_reg_mr(pd, buffer_, PROTOCOL_BUFFER_SIZE, IBV_ACCESS_LOCAL_WRITE); - assert(mr_ != NULL); -} - -SendBuffer::~SendBuffer() { - DEBUG("destroying send buffer"); - assert(buffer_ != NULL); - assert(mr_ != NULL); - if (mr_) { - ibv_dereg_mr(mr_); - mr_ = nullptr; - } - if (buffer_) { - free(buffer_); - buffer_ = nullptr; - } -} - /* because python will always hold GIL when doing ~Connection(), which could lead to deadlock, so we have to explicitly call close() to stop cq_handler. @@ -85,10 +63,16 @@ Connection::~Connection() { // throw std::runtime_error("user should call close() before destroying connection"); } - SendBuffer *buffer; - while (send_buffers_.pop(buffer)) { - if (buffer) - delete buffer; + SendBuffer *send_buffer; + while (send_buffers_.pop(send_buffer)) { + if (send_buffer) + delete send_buffer; + } + + RecvBuffer *recv_buffer; + while (recv_buffers_.pop(recv_buffer)) { + if (recv_buffer) + delete recv_buffer; } for (auto it = local_mr_.begin(); it != local_mr_.end(); it++) { @@ -131,27 +115,31 @@ void Connection::cq_handler() { if (wc[i].opcode == IBV_WC_SEND) { // read cache/allocate msg/commit msg: request sent - DEBUG("read cache/allocated/commit msg request send {}, ", - (uintptr_t)wc[i].wr_id); + DEBUG("read/write request send {}, ", (uintptr_t)wc[i].wr_id); release_send_buffer((SendBuffer *)wc[i].wr_id); } - else if (wc[i].opcode == IBV_WC_RECV) { // allocate msg recved. + else if (wc[i].opcode == IBV_WC_RECV) { rdma_info_base *ptr = reinterpret_cast(wc[i].wr_id); switch (ptr->get_wr_type()) { case WrType::RDMA_READ_ACK: { - DEBUG("read cache done: Received IMM, imm_data: {}", - wc[i].imm_data); auto *info = reinterpret_cast(ptr); - info->callback(wc[i].imm_data); + // Access the buffer directly when passing to callback + info->callback(wc[i].imm_data, reinterpret_cast( + info->recv_buffer->buffer_)); + // Release the associated recv buffer + if (info->recv_buffer) { + release_recv_buffer(info->recv_buffer); + } delete info; break; } case WrType::RDMA_WRITE_ACK: { - DEBUG("RDMA write cache done: Received IMM, imm_data: {}", - wc[i].imm_data); auto *info = reinterpret_cast(ptr); info->callback(wc[i].imm_data); - DEBUG("RDMA_WRITE_ACK callback done"); + // Release the associated recv buffer + if (info->recv_buffer) { + release_recv_buffer(info->recv_buffer); + } delete info; break; } @@ -191,6 +179,20 @@ SendBuffer *Connection::get_send_buffer() { void Connection::release_send_buffer(SendBuffer *buffer) { send_buffers_.push(buffer); } +RecvBuffer *Connection::get_recv_buffer() { + /* + if recv buffer list is empty,we just report error, and return NULL + normal user should not have too many inflight requests, so we just report error + */ + assert(!recv_buffers_.empty()); + + RecvBuffer *buffer; + assert(recv_buffers_.pop(buffer)); + return buffer; +} + +void Connection::release_recv_buffer(RecvBuffer *buffer) { recv_buffers_.push(buffer); } + int Connection::setup_rdma(client_config_t config) { // if (init_rdma_resources(config) < 0) { // ERROR("Failed to initialize RDMA resources"); @@ -235,6 +237,11 @@ int Connection::setup_rdma(client_config_t config) { send_buffers_.push(new SendBuffer(rdma_dev_.pd, PROTOCOL_BUFFER_SIZE)); } + // Initialize receive buffers (32 bytes each) + for (int i = 0; i < MAX_RECV_WR; i++) { + recv_buffers_.push(new RecvBuffer(rdma_dev_.pd, 32)); + } + stop_ = false; cq_future_ = std::async(std::launch::async, [this]() { cq_handler(); }); @@ -468,20 +475,33 @@ int Connection::delete_keys(const std::vector &keys) { return count; } -void Connection::post_recv_ack(rdma_info_base *info) { - struct ibv_recv_wr recv_wr = {0}; +int Connection::post_recv_ack(rdma_info_base *info) { + RecvBuffer *recv_buffer = get_recv_buffer(); + + // Associate the recv_buffer with the info structure + info->recv_buffer = recv_buffer; + + struct ibv_recv_wr recv_wr {}; struct ibv_recv_wr *bad_recv_wr = NULL; + struct ibv_sge sge {}; - recv_wr.wr_id = (uintptr_t)info; + sge.addr = (uintptr_t)recv_buffer->buffer_; + sge.length = 32; + sge.lkey = recv_buffer->mr_->lkey; + recv_wr.wr_id = (uintptr_t)info; recv_wr.next = NULL; - recv_wr.sg_list = NULL; - recv_wr.num_sge = 0; + recv_wr.sg_list = &sge; + recv_wr.num_sge = 1; int ret = ibv_post_recv(ctx_.qp, &recv_wr, &bad_recv_wr); if (ret) { ERROR("Failed to post recv wr :{}", strerror(ret)); + // Release the buffer back to the pool on error + release_recv_buffer(recv_buffer); + info->recv_buffer = nullptr; } + return ret; } std::vector *Connection::r_tcp(const std::string &key) { @@ -594,48 +614,44 @@ int Connection::w_tcp(const std::string &key, void *ptr, size_t size) { } int Connection::w_rdma_async(const std::vector &keys, - const std::vector offsets, int block_size, void *base_ptr, + const std::vector &local_address, + const std::vector &block_sizes, std::function callback) { - assert(base_ptr != NULL); - assert(offsets.size() == keys.size()); - - if (!local_mr_.count((uintptr_t)base_ptr)) { - ERROR("Please register memory first {}", (uint64_t)base_ptr); - return -1; - } - - struct ibv_mr *mr = local_mr_[(uintptr_t)base_ptr]; - - // remote_meta_request req = { - // .keys = keys, - // .block_size = block_size, - // .op = OP_RDMA_WRITE, - // .remote_addrs = remote_addrs, - // } + assert(local_address.size() > 0); + assert(local_address.size() == keys.size()); + assert(keys.size() == block_sizes.size()); SendBuffer *send_buffer = get_send_buffer(); FixedBufferAllocator allocator(send_buffer->buffer_, PROTOCOL_BUFFER_SIZE); FlatBufferBuilder builder(64 << 10, &allocator); auto keys_offset = builder.CreateVectorOfStrings(keys); - - // address is base_ptr + offset - std::vector remote_addrs; - for (size_t i = 0; i < offsets.size(); i++) { - remote_addrs.push_back((unsigned long)base_ptr + offsets[i]); - } - auto remote_addrs_offset = builder.CreateVector(remote_addrs); - auto req = CreateRemoteMetaRequest(builder, keys_offset, block_size, mr->rkey, + auto remote_addrs_offset = builder.CreateVector(local_address); + auto sizes_offset = builder.CreateVector(block_sizes); + + // build rkey array + std::vector rkeys; + for (size_t i = 0; i < local_address.size(); i++) { + uintptr_t address = local_address[i]; + size_t size = block_sizes[i]; + auto mr = mr_contains((void *)address, size); + rkeys.push_back(mr->rkey); + } + auto rkeys_offset = builder.CreateVector(rkeys); + auto req = CreateRemoteMetaRequest(builder, keys_offset, sizes_offset, rkeys_offset, remote_addrs_offset, OP_RDMA_WRITE); - builder.Finish(req); // post recv msg first auto *info = new rdma_write_info(callback); - post_recv_ack(info); + if (post_recv_ack(info) < 0) { + ERROR("Failed to post recv ack for RDMA write"); + delete info; // Clean up if post_recv_ack fails + return -1; + } // send msg - struct ibv_sge sge = {0}; - struct ibv_send_wr wr = {0}; + struct ibv_sge sge {}; + struct ibv_send_wr wr {}; struct ibv_send_wr *bad_wr = NULL; sge.addr = (uintptr_t)builder.GetBufferPointer(); sge.length = builder.GetSize(); @@ -657,55 +673,42 @@ int Connection::w_rdma_async(const std::vector &keys, } int Connection::r_rdma_async(const std::vector &keys, - const std::vector offsets, int block_size, void *base_ptr, - std::function callback) { - assert(base_ptr != NULL); - - if (!local_mr_.count((uintptr_t)base_ptr)) { - ERROR("Please register memory first"); - return -1; - } - - INFO("r_rdma,, block_size: {}, base_ptr: {}", block_size, base_ptr); - struct ibv_mr *mr = local_mr_[(uintptr_t)base_ptr]; - assert(mr != NULL); - - auto *info = new rdma_read_info([callback](unsigned int code) { callback(code); }); - post_recv_ack(info); - - // std::vector keys; - std::vector remote_addrs; - for (auto &offset : offsets) { - remote_addrs.push_back((uintptr_t)(base_ptr + offset)); - } + const std::vector &local_address, + const std::vector &block_sizes, + std::function callback) { + assert(local_address.size() == keys.size()); + assert(block_sizes.size() == keys.size()); - /* - remote_meta_req = { - .keys = keys, - .block_size = block_size, - .rkey = mr->rkey, - .remote_addrs = remote_addrs, - .op = OP_RDMA_READ, - } - */ SendBuffer *send_buffer = get_send_buffer(); FixedBufferAllocator allocator(send_buffer->buffer_, PROTOCOL_BUFFER_SIZE); FlatBufferBuilder builder(64 << 10, &allocator); - auto keys_offset = builder.CreateVectorOfStrings(keys); - auto remote_addrs_offset = builder.CreateVector(remote_addrs); - auto req = CreateRemoteMetaRequest(builder, keys_offset, block_size, mr->rkey, + auto remote_addrs_offset = builder.CreateVector(local_address); + auto sizes_offset = builder.CreateVector(block_sizes); + + // build rkey array + std::vector rkeys; + for (size_t i = 0; i < local_address.size(); i++) { + uintptr_t address = local_address[i]; + size_t size = block_sizes[i]; + auto mr = mr_contains((void *)address, size); + rkeys.push_back(mr->rkey); + } + auto rkeys_offset = builder.CreateVector(rkeys); + auto req = CreateRemoteMetaRequest(builder, keys_offset, sizes_offset, rkeys_offset, remote_addrs_offset, OP_RDMA_READ); - builder.Finish(req); + auto *info = new rdma_read_info(callback); + post_recv_ack(info); + // send RDMA request - struct ibv_sge sge = {0}; + struct ibv_sge sge {}; sge.addr = (uintptr_t)builder.GetBufferPointer(); sge.length = builder.GetSize(); sge.lkey = send_buffer->mr_->lkey; - struct ibv_send_wr wr = {0}; + struct ibv_send_wr wr {}; struct ibv_send_wr *bad_wr = NULL; wr.wr_id = (uintptr_t)send_buffer; @@ -725,12 +728,62 @@ int Connection::r_rdma_async(const std::vector &keys, return 0; } +struct ibv_mr *Connection::mr_contains(void *base_ptr, size_t ptr_region_size) { + assert(base_ptr != NULL); + + uintptr_t base_ptr_int = (uintptr_t)base_ptr; + uintptr_t end_ptr_int = base_ptr_int + ptr_region_size; + + auto it = local_mr_.upper_bound({base_ptr_int, 0}); + if (it != local_mr_.begin()) { + --it; + if (it->first.first <= base_ptr_int && it->first.first + it->first.second >= end_ptr_int) { + return it->second; + } + } + return nullptr; +} + +bool Connection::mr_overlap(void *base_ptr, size_t ptr_region_size) { + assert(base_ptr != NULL); + + uintptr_t base_ptr_int = (uintptr_t)base_ptr; + uintptr_t end_ptr_int = base_ptr_int + ptr_region_size; + + auto it = local_mr_.lower_bound({base_ptr_int, 0}); + if (it != local_mr_.end()) { + if (it->first.first < end_ptr_int) { + // corner case: allow exactly the same region + if (base_ptr_int == it->first.first && + base_ptr_int + ptr_region_size == it->first.first + it->first.second) { + WARN("overlap with existing mr, but same region"); + return false; + } + return true; + } + } + + // check the last element + if (it != local_mr_.begin()) { + --it; + size_t prev_end = it->first.first + it->first.second; + if (base_ptr_int < prev_end) { + return true; + } + } + + return false; +} + int Connection::register_mr(void *base_ptr, size_t ptr_region_size) { assert(base_ptr != NULL); - if (local_mr_.count((uintptr_t)base_ptr)) { - WARN("this memory address is already registered!"); - ibv_dereg_mr(local_mr_[(uintptr_t)base_ptr]); + + // detect overlap of _local_mr_ + if (mr_overlap(base_ptr, ptr_region_size)) { + ERROR("overlap with existing mr"); + return -1; } + struct ibv_mr *mr; mr = ibv_reg_mr(rdma_dev_.pd, base_ptr, ptr_region_size, IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ); @@ -739,6 +792,6 @@ int Connection::register_mr(void *base_ptr, size_t ptr_region_size) { return -1; } INFO("register mr done for base_ptr: {}, size: {}", (uintptr_t)base_ptr, ptr_region_size); - local_mr_[(uintptr_t)base_ptr] = mr; + local_mr_.insert({{(uintptr_t)base_ptr, ptr_region_size}, mr}); return 0; } diff --git a/src/libinfinistore.h b/src/libinfinistore.h index e263a3f..3d88985 100644 --- a/src/libinfinistore.h +++ b/src/libinfinistore.h @@ -18,18 +18,11 @@ #include "log.h" #include "protocol.h" #include "rdma.h" +#include "utils.h" -// RDMA send buffer -// because write_cache will be invoked asynchronously, -// so each request will have a standalone send buffer. -struct SendBuffer { - void *buffer_ = NULL; - struct ibv_mr *mr_ = NULL; - - SendBuffer(struct ibv_pd *pd, size_t size); - SendBuffer(const SendBuffer &) = delete; - ~SendBuffer(); -}; +// Type aliases for clarity +using SendBuffer = Buffer; +using RecvBuffer = Buffer; enum class WrType { BASE, @@ -42,6 +35,8 @@ struct rdma_info_base { WrType wr_type; public: + RecvBuffer *recv_buffer = nullptr; // Associated receive buffer + rdma_info_base(WrType wr_type) : wr_type(wr_type) {} virtual ~rdma_info_base() = default; WrType get_wr_type() const { return wr_type; } @@ -55,8 +50,8 @@ struct rdma_write_info : rdma_info_base { struct rdma_read_info : rdma_info_base { // call back function. - std::function callback; - rdma_read_info(std::function callback) + std::function callback; + rdma_read_info(std::function callback) : rdma_info_base(WrType::RDMA_READ_ACK), callback(callback) {} }; @@ -70,7 +65,16 @@ class Connection { rdma_conn_info_t local_info_; rdma_conn_info_t remote_info_; - std::unordered_map local_mr_; + // binary tree + + struct cmp { + bool operator()(const std::pair &a, + const std::pair &b) const { + return a.first < b.first; + } + }; + // sort by uintptr_t + std::map, struct ibv_mr *, cmp> local_mr_; /* This is MAX_RECV_WR not MAX_SEND_WR, @@ -78,6 +82,9 @@ class Connection { */ boost::lockfree::spsc_queue send_buffers_{MAX_RECV_WR}; + // Receive buffers for RDMA operations, each buffer is 32 bytes + boost::lockfree::spsc_queue recv_buffers_{MAX_RECV_WR}; + // struct ibv_comp_channel *comp_channel_ = NULL; std::future cq_future_; // cq thread @@ -93,10 +100,14 @@ class Connection { void close_conn(); int init_connection(client_config_t config); int setup_rdma(client_config_t config); - int r_rdma_async(const std::vector &keys, const std::vector offsets, - int block_size, void *base_ptr, std::function callback); - int w_rdma_async(const std::vector &keys, const std::vector offsets, - int block_size, void *base_ptr, std::function callback); + + int r_rdma_async(const std::vector &keys, + const std::vector &local_address, + const std::vector &sizes, + std::function callback); + int w_rdma_async(const std::vector &keys, + const std::vector &local_address, + const std::vector &sizes, std::function callback); int w_tcp(const std::string &key, void *ptr, size_t size); std::vector *r_tcp(const std::string &key); @@ -107,15 +118,19 @@ class Connection { int exchange_conn_info(); - void post_recv_ack(rdma_info_base *info); + int post_recv_ack(rdma_info_base *info); void cq_handler(); // TODO: refactor to c++ style SendBuffer *get_send_buffer(); void release_send_buffer(SendBuffer *buffer); - SendBuffer *get_recv_buffer(); - void release_recv_buffer(SendBuffer *buffer); + RecvBuffer *get_recv_buffer(); + void release_recv_buffer(RecvBuffer *buffer); + + private: + bool mr_overlap(void *base_ptr, size_t ptr_region_size); + struct ibv_mr *mr_contains(void *base_ptr, size_t ptr_region_size); }; #endif // LIBINFINISTORE_H diff --git a/src/mempool.cpp b/src/mempool.cpp index 4017ff5..55e1480 100644 --- a/src/mempool.cpp +++ b/src/mempool.cpp @@ -14,12 +14,12 @@ MemoryPool::MemoryPool(size_t pool_size, size_t block_size, struct ibv_pd* pd) : pool_(nullptr), pool_size_(pool_size), block_size_(block_size), - pd_(pd), - mr_(nullptr), + total_blocks_(pool_size / block_size), last_search_position_(0), - allocated_blocks_(0) { - // calculate total blocks - total_blocks_ = pool_size_ / block_size_; + allocated_blocks_(0), + mr_(nullptr), + pd_(pd) { + // verify blocks calculation assert(pool_size % block_size == 0); INFO( diff --git a/src/meson.build b/src/meson.build index 490ed02..e944f71 100644 --- a/src/meson.build +++ b/src/meson.build @@ -9,6 +9,9 @@ default_options: [ 'debug=true' # -g ]) +add_project_arguments('-Wno-unused-parameter', language: 'cpp') + + python3 = import('python').find_installation('python3', pure: false) # C/C++ compiler interface @@ -16,7 +19,6 @@ cc = meson.get_compiler('cpp') fb_sources = [ 'meta_request.fbs', - 'allocate_response.fbs', 'delete_keys.fbs', 'get_match_last_index.fbs', 'tcp_payload_request.fbs' @@ -45,9 +47,9 @@ endforeach pybind_dep = dependency('pybind11', required: true) libuv_dep = dependency('libuv', required: true) fmt_dep = dependency('fmt', required: true) -boost_stack_dep = dependency('boost', modules: ['stacktrace_basic'], required: true) ibverbs_dep = dependency('libibverbs', required: true) - +dw_dep = dependency('libdw', required: true) +dl_dep = cc.find_library('dl', required: true) # Source files for the extension ext_sources = [ 'libinfinistore.cpp', @@ -65,6 +67,13 @@ ext_sources = [ # Build the Python extension module python3.extension_module('_infinistore', sources: ext_sources + generated_headers, - dependencies: [pybind_dep, libuv_dep, fmt_dep, boost_stack_dep, ibverbs_dep], + dependencies: [ + pybind_dep, + libuv_dep, + fmt_dep, + ibverbs_dep, + dl_dep, + dw_dep + ], install: true ) \ No newline at end of file diff --git a/src/meta_request.fbs b/src/meta_request.fbs index dd8a756..6302dfc 100644 --- a/src/meta_request.fbs +++ b/src/meta_request.fbs @@ -1,8 +1,9 @@ //RDMA read/write request +// allow len(blocks_size) == 1 and len(rkey) == 1 table RemoteMetaRequest { keys: [string]; - block_size: int; - rkey: uint; // rkey + block_size: [uint]; + rkey: [uint]; // rkey remote_addrs: [ulong]; // GPU addresses op: byte; } diff --git a/src/protocol.h b/src/protocol.h index db6f79d..6ec89ce 100644 --- a/src/protocol.h +++ b/src/protocol.h @@ -8,7 +8,6 @@ #include "flatbuffers/flatbuffers.h" // RDMA protocols -#include "allocate_response_generated.h" #include "meta_request_generated.h" // local TCP protocols @@ -60,6 +59,7 @@ std::string op_name(char op); #define RETRY 408 #define SYSTEM_ERROR 503 #define OUT_OF_MEMORY 507 +#define PARTIAL_SUCCESS 206 #define RETURN_CODE_SIZE sizeof(int) diff --git a/src/pybind.cpp b/src/pybind.cpp index 205d7b3..7eb2a7b 100644 --- a/src/pybind.cpp +++ b/src/pybind.cpp @@ -68,17 +68,52 @@ PYBIND11_MODULE(_infinistore, m) { [](Connection &self, const std::vector &keys, const std::vector offsets, int block_size, uintptr_t base_ptr, std::function callback) { - return self.w_rdma_async(keys, offsets, block_size, (void *)base_ptr, callback); + std::vector local_address; + for (size_t i = 0; i < keys.size(); i++) { + local_address.push_back(base_ptr + offsets[i]); + } + std::vector sizes(keys.size(), block_size); + + return self.w_rdma_async(keys, local_address, sizes, callback); }, py::call_guard(), "write rdma async") + .def( + "w_rdma_async2", + [](Connection &self, const std::vector &keys, + const std::vector &local_address, + const std::vector &block_sizes, std::function callback) { + return self.w_rdma_async(keys, local_address, block_sizes, callback); + }, + py::call_guard(), "write rdma async2") .def( "r_rdma_async", [](Connection &self, const std::vector &keys, const std::vector offsets, int block_size, uintptr_t base_ptr, - std::function callback) { - return self.r_rdma_async(keys, offsets, block_size, (void *)base_ptr, callback); + std::function)> py_callback) { + auto cpp_callback = [py_callback](unsigned int code, unsigned int payload[8]) { + std::vector py_payload(payload, payload + 8); + py_callback(code, py_payload); + }; + std::vector sizes(keys.size(), block_size); + std::vector local_address; + for (size_t i = 0; i < keys.size(); i++) { + local_address.push_back((uintptr_t)base_ptr + offsets[i]); + } + return self.r_rdma_async(keys, local_address, sizes, cpp_callback); }, py::call_guard(), "Read remote memory asynchronously") + .def( + "r_rdma_async2", + [](Connection &self, const std::vector &keys, + const std::vector &local_address, const std::vector &block_sizes, + std::function)> py_callback) { + auto cpp_callback = [py_callback](unsigned int code, unsigned int payload[8]) { + std::vector py_payload(payload, payload + 8); + py_callback(code, py_payload); + }; + return self.r_rdma_async(keys, local_address, block_sizes, cpp_callback); + }, + py::call_guard(), "Read remote memory asynchronously2") .def("init_connection", &Connection::init_connection, py::call_guard(), "init connection") .def("setup_rdma", &Connection::setup_rdma, py::call_guard(), diff --git a/src/utils.cpp b/src/utils.cpp index d435747..b288b70 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -9,11 +9,13 @@ #include #include -#include +#define BACKWARD_HAS_DW 1 #include #include #include +#include +#include "backward.hpp" #include "log.h" int send_exact(int socket, const void* buffer, size_t length) { @@ -93,10 +95,33 @@ void print_rdma_conn_info(rdma_conn_info_t* info, bool is_remote) { void signal_handler(int signum) { INFO("Interrupt signal ({}) received.", signum); - boost::stacktrace::stacktrace st; + + using namespace backward; + StackTrace st; + st.load_here(32); + + TraceResolver tr; + tr.load_stacktrace(st); + std::ostringstream oss; - oss << st; - ERROR("Stacktrace:\n{}", oss.str()); + oss << "Detailed Stacktrace:\n"; + + for (size_t i = 0; i < st.size(); ++i) { + ResolvedTrace trace = tr.resolve(st[i]); + oss << "#" << i << " "; + + if (!trace.object_function.empty()) { + oss << trace.object_function; + } + + if (!trace.source.filename.empty()) { + oss << " at " << trace.source.filename << ":" << trace.source.line; + } + + oss << "\n"; + } + + ERROR("{}", oss.str()); exit(1); } @@ -113,3 +138,25 @@ template void print_vector(float* ptr, size_t size); template void print_vector(double* ptr, size_t size); template void print_vector(int* ptr, size_t size); template void print_vector(char* ptr, size_t size); + +// Buffer implementation +Buffer::Buffer(struct ibv_pd* pd, size_t size) : size_(size) { + if (posix_memalign(&buffer_, 4096, size) != 0) { + throw std::runtime_error("Failed to allocate buffer"); + } + + mr_ = ibv_reg_mr(pd, buffer_, size, IBV_ACCESS_LOCAL_WRITE); + if (!mr_) { + free(buffer_); + throw std::runtime_error("Failed to register memory region"); + } +} + +Buffer::~Buffer() { + if (mr_) { + ibv_dereg_mr(mr_); + } + if (buffer_) { + free(buffer_); + } +} diff --git a/src/utils.h b/src/utils.h index 42a1020..135c77f 100644 --- a/src/utils.h +++ b/src/utils.h @@ -9,6 +9,17 @@ #include "protocol.h" +// Generic RDMA buffer for both send and receive operations +struct Buffer { + void *buffer_ = NULL; + struct ibv_mr *mr_ = NULL; + size_t size_; + + Buffer(struct ibv_pd *pd, size_t size); + Buffer(const Buffer &) = delete; + ~Buffer(); +}; + int send_exact(int socket, const void *buffer, size_t length); int recv_exact(int socket, void *buffer, size_t length);