From 26f73c9f8a549228433682a62d140a4454769c23 Mon Sep 17 00:00:00 2001 From: Songhao Jia Date: Tue, 17 Mar 2026 10:34:14 -0700 Subject: [PATCH] prefetch data blob before .so loading Differential Revision: D96948202 --- backends/cuda/runtime/cuda_backend.cpp | 49 ++++++++++++++++++++++++-- 1 file changed, 46 insertions(+), 3 deletions(-) diff --git a/backends/cuda/runtime/cuda_backend.cpp b/backends/cuda/runtime/cuda_backend.cpp index 5960142d2b4..a6092c0b32d 100644 --- a/backends/cuda/runtime/cuda_backend.cpp +++ b/backends/cuda/runtime/cuda_backend.cpp @@ -13,6 +13,11 @@ #include #include #include +#if !defined(_WIN32) +#include +#include +#include +#endif #include #include @@ -275,6 +280,45 @@ class ET_EXPERIMENTAL CudaBackend final method_name.empty() ? "so_blob" : method_name + "_so_blob"; const NamedDataMap* named_data_map = context.get_named_data_map(); + ET_CHECK_OR_RETURN_ERROR( + named_data_map != nullptr, + Internal, + "CudaBackend requires a NamedDataMap for weight loading"); + + // Prefetch the weights blob — trigger async readahead so pages are + // resident by the time update_constants_from_blob memcpy's them. + // This overlaps disk I/O with the .so write + dlopen. + std::string weights_blob_key = + method_name.empty() ? "weights_blob" : method_name + "_weights_blob"; +#if !defined(_WIN32) + { + auto prefetch_buf = named_data_map->get_data(weights_blob_key.c_str()); + if (prefetch_buf.ok() && prefetch_buf->data() != nullptr) { + uintptr_t addr = reinterpret_cast(prefetch_buf->data()); + size_t page_size = getpagesize(); + uintptr_t aligned_addr = addr & ~(page_size - 1); + size_t aligned_size = prefetch_buf->size() + (addr - aligned_addr); + int ret = madvise( + reinterpret_cast(aligned_addr), + aligned_size, + MADV_WILLNEED); + if (ret != 0) { + ET_LOG( + Info, + "CudaBackend::init - madvise(MADV_WILLNEED) failed for %s: %s", + weights_blob_key.c_str(), + strerror(errno)); + } else { + ET_LOG( + Info, + "CudaBackend::init - Prefetching %s (%.1f MB)", + weights_blob_key.c_str(), + prefetch_buf->size() / (1024.0 * 1024.0)); + } + } + } +#endif + auto aoti_dso_buffer = named_data_map->get_data(so_blob_key.c_str()); ET_CHECK_OR_RETURN_ERROR( aoti_dso_buffer.ok(), @@ -338,9 +382,8 @@ class ET_EXPERIMENTAL CudaBackend final handle->container_handle = container_handle; - // Look into named data map for constant data - std::string weights_blob_key = - method_name.empty() ? "weights_blob" : method_name + "_weights_blob"; + // Look into named data map for constant data (key computed above for + // prefetch) auto buffer_res = named_data_map->get_data(weights_blob_key.c_str()); if (buffer_res.ok() && handle->update_constants_from_blob != nullptr) { ET_LOG(Info, "Found %s in named data map", weights_blob_key.c_str());