Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 59 additions & 21 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,9 +1,20 @@
cmake_minimum_required(VERSION 3.15...3.27)
project(libCacheSim-python)
set(DESCRIPTION "The libCacheSim Python Package")
set(PROJECT_WEB "http://cachemon.github.io/libCacheSim-python")
set(PROJECT_WEB "https://docs.libcachesim.com/python")

# Auto-initialize submodules if not already done
# Options from the libCacheSim library
option(ENABLE_GLCACHE "enable group-learned cache" ON)
option(ENABLE_LRB "enable LRB" ON)
option(ENABLE_3L_CACHE "enable 3LCache" ON)

if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Debug CACHE STRING "Debug" FORCE)
endif()

message(STATUS "CMAKE_BUILD_TYPE ${CMAKE_BUILD_TYPE}")

# 1. Auto-initialize submodules if not already done
find_package(Git QUIET)
if(GIT_FOUND AND EXISTS "${PROJECT_SOURCE_DIR}/.git")
# Check if submodule is initialized
Expand All @@ -18,33 +29,31 @@ if(GIT_FOUND AND EXISTS "${PROJECT_SOURCE_DIR}/.git")
endif()
endif()

# Auto-build libCacheSim if needed
# 2. Auto-build libCacheSim if needed
set(LIBCACHESIM_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/src/libCacheSim")
if(NOT EXISTS "${LIBCACHESIM_SOURCE_DIR}/CMakeLists.txt")
message(FATAL_ERROR "libCacheSim submodule not found. Please run 'git submodule update --init --recursive'")
endif()

# Build libCacheSim first
set(LIBCACHESIM_BUILD_DIR "${CMAKE_CURRENT_SOURCE_DIR}/src/libCacheSim/build")
if(NOT EXISTS "${LIBCACHESIM_BUILD_DIR}/export_vars.cmake")
message(STATUS "Building libCacheSim...")
execute_process(
COMMAND ${CMAKE_COMMAND} -S ${LIBCACHESIM_SOURCE_DIR} -B ${LIBCACHESIM_BUILD_DIR} -G Ninja
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
RESULT_VARIABLE CMAKE_CONFIG_RESULT
)
if(NOT CMAKE_CONFIG_RESULT EQUAL "0")
message(FATAL_ERROR "Failed to configure libCacheSim")
endif()
message(STATUS "Building libCacheSim...")
execute_process(
COMMAND ${CMAKE_COMMAND} -S ${LIBCACHESIM_SOURCE_DIR} -B ${LIBCACHESIM_BUILD_DIR} -G Ninja -DENABLE_LRB=${ENABLE_LRB} -DENABLE_GLCACHE=${ENABLE_GLCACHE} -DENABLE_3L_CACHE=${ENABLE_3L_CACHE} -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
RESULT_VARIABLE CMAKE_CONFIG_RESULT
)
if(NOT CMAKE_CONFIG_RESULT EQUAL "0")
message(FATAL_ERROR "Failed to configure libCacheSim")
endif()

execute_process(
COMMAND ${CMAKE_COMMAND} --build ${LIBCACHESIM_BUILD_DIR}
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
RESULT_VARIABLE CMAKE_BUILD_RESULT
)
if(NOT CMAKE_BUILD_RESULT EQUAL "0")
message(FATAL_ERROR "Failed to build libCacheSim")
endif()
execute_process(
COMMAND ${CMAKE_COMMAND} --build ${LIBCACHESIM_BUILD_DIR}
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
RESULT_VARIABLE CMAKE_BUILD_RESULT
)
if(NOT CMAKE_BUILD_RESULT EQUAL "0")
message(FATAL_ERROR "Failed to build libCacheSim")
endif()

# Note(haocheng): now we still utilize the exported cache from
Expand Down Expand Up @@ -100,6 +109,34 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
find_package(Python REQUIRED COMPONENTS Interpreter Development.Module)
find_package(pybind11 CONFIG REQUIRED)

set(optional_dependency_libs "")

# Find optional deps
if(ENABLE_GLCACHE)
find_package(xgboost REQUIRED)
include_directories(${XGBOOST_INCLUDE_DIR})
list(APPEND optional_dependency_libs xgboost::xgboost)
add_compile_definitions(ENABLE_GLCACHE=1)
message(STATUS "XGBOOST_INCLUDE_DIR=${XGBOOST_INCLUDE_DIR}")
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Consider adding a check to ensure that XGBOOST_INCLUDE_DIR is not already defined before setting it. This can prevent potential conflicts if the variable is defined elsewhere.

if(ENABLE_GLCACHE)
    find_package(xgboost REQUIRED)
    if(NOT DEFINED XGBOOST_INCLUDE_DIR)
        include_directories(${XGBOOST_INCLUDE_DIR})
    endif()
    list(APPEND optional_dependency_libs xgboost::xgboost)
    add_compile_definitions(ENABLE_GLCACHE=1)
    message(STATUS "XGBOOST_INCLUDE_DIR=${XGBOOST_INCLUDE_DIR}")
endif()

endif()

foreach(FEATURE ENABLE_LRB ENABLE_3L_CACHE)
if(${FEATURE})
find_path(LIGHTGBM_PATH LightGBM)
if(NOT LIGHTGBM_PATH)
message(FATAL_ERROR "LIGHTGBM_PATH not found")
endif()
# include_directories(${LIGHTGBM_PATH})

find_library(LIGHTGBM_LIB _lightgbm)
if(NOT LIGHTGBM_LIB)
message(FATAL_ERROR "LIGHTGBM_LIB not found")
endif()
list(APPEND optional_dependency_libs ${LIGHTGBM_LIB})
add_compile_definitions(${FEATURE}=1)
endif()
endforeach()

# Include directories for dependencies
include_directories(${GLib_INCLUDE_DIRS})
include_directories(${GLib_CONFIG_INCLUDE_DIR})
Expand Down Expand Up @@ -160,6 +197,7 @@ target_link_libraries(libcachesim_python PRIVATE
pybind11::module
${GLib_LIBRARIES}
${ZSTD_LIBRARIES}
${optional_dependency_libs}
)

# Add platform-specific link options and libraries
Expand Down
22 changes: 15 additions & 7 deletions libcachesim/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,8 @@ def __init__(
_cache=LIRS_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata))
)

def insert(self, req: Request) -> Optional[CacheObject]:
return super().insert(req)

class TwoQ(CacheBase):
"""2Q replacement algorithm
Expand Down Expand Up @@ -360,7 +362,7 @@ def __init__(
update_weight: bool = True,
lru_weight: float = 0.5,
):
cache_specific_params = f"update-weight={update_weight}, lru-weight={lru_weight}"
cache_specific_params = f"update-weight={int(update_weight)}, lru-weight={lru_weight}"
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

It's good to see the explicit conversion of update_weight to an integer. This ensures that the value passed to the underlying C++ code is of the correct type. Without this conversion, there could be unexpected behavior or errors due to type mismatches.

cache_specific_params = f"update-weight={int(update_weight)}, lru-weight={lru_weight}"

super().__init__(
_cache=LeCaR_init(
_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata), cache_specific_params
Expand All @@ -383,7 +385,7 @@ class ClockPro(CacheBase):
"""Clock-Pro replacement algorithm

Special parameters:
init_req: initial reference count (default: 0)
init_ref: initial reference count (default: 0)
init_ratio_cold: initial ratio of cold pages (default: 1)
"""

Expand All @@ -393,10 +395,10 @@ def __init__(
default_ttl: int = 86400 * 300,
hashpower: int = 24,
consider_obj_metadata: bool = False,
init_req: int = 0,
init_ref: int = 0,
init_ratio_cold: float = 0.5,
):
cache_specific_params = f"init-req={init_req}, init-ratio-cold={init_ratio_cold}"
cache_specific_params = f"init-ref={init_ref}, init-ratio-cold={init_ratio_cold}"
super().__init__(
_cache=ClockPro_init(
_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata), cache_specific_params
Expand Down Expand Up @@ -451,13 +453,19 @@ def __init__(


class LRUProb(CacheBase):
"""LRU with Probabilistic Replacement (no special parameters)"""
"""LRU with Probabilistic Replacement

Special parameters:
prob: probability of promoting an object to the head of the queue (default: 0.5)
"""

def __init__(
self, cache_size: int, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False
self, cache_size: int, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False,
prob: float = 0.5,
):
cache_specific_params = f"prob={prob}"
super().__init__(
_cache=LRU_Prob_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata))
_cache=LRU_Prob_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata), cache_specific_params)
)


Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ environment = { LCS_BUILD_DIR = "{project}/src/libCacheSim/build", MACOSX_DEPLOY
test-command = "python -c 'import libcachesim; print(\"Import successful\")'"

[tool.cibuildwheel.linux]
before-all = "yum install -y yum-utils && yum-config-manager --set-enabled crb && yum install -y ninja-build cmake libzstd-devel glib2-devel"
before-all = "yum install -y yum-utils && yum-config-manager --set-enabled crb && yum install -y ninja-build cmake libzstd-devel glib2-devel xgboost-devel lightgbm-devel"
before-build = "pip install pybind11 && git submodule update --init --recursive && python {project}/scripts/smart_build.py"

[tool.cibuildwheel.macos]
Expand Down
10 changes: 2 additions & 8 deletions scripts/install.sh
Original file line number Diff line number Diff line change
@@ -1,13 +1,7 @@
git submodule update --init --recursive
# Sync submodules
git submodule update --recursive --remote
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Consider adding a check to ensure that the git submodule update command succeeds. This can be done by checking the exit code of the command and exiting the script with an error message if it fails.

Suggested change
git submodule update --recursive --remote
git submodule update --recursive --remote
if [ $? -ne 0 ]; then
echo "Error: git submodule update failed"
exit 1
fi


# Build the main libCacheSim C++ library first
echo "Building main libCacheSim library..."
pushd src/libCacheSim
bash scripts/install_dependency.sh
rm -rf build
cmake -G Ninja -B build
ninja -C build
popd

# Now build and install the Python binding
echo "Building Python binding..."
Expand Down
33 changes: 21 additions & 12 deletions src/export_cache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ auto make_cache_wrapper(const std::string& fn_name) {
cache_t* ptr = InitFn(cc_params, params_cstr);
return std::unique_ptr<cache_t, CacheDeleter>(ptr);
},
"cc_params"_a, "cache_specific_params"_a = "");
"cc_params"_a, "cache_specific_params"_a = "");
};
}

Expand All @@ -280,7 +280,8 @@ void export_cache(py::module& m) {
.def(
"find",
[](cache_t& self, const request_t& req, const bool update_cache) {
return self.find(&self, &req, update_cache);
cache_obj_t* obj = self.find(&self, &req, update_cache);
return py::cast(obj, py::return_value_policy::reference);
Comment on lines +283 to +284
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Using py::cast with py::return_value_policy::reference is the correct way to return a reference to a C++ object from a pybind11 function. This ensures that the Python object holds a reference to the C++ object, and that the C++ object is not deleted while the Python object is still alive.

Suggested change
cache_obj_t* obj = self.find(&self, &req, update_cache);
return py::cast(obj, py::return_value_policy::reference);
cache_obj_t* obj = self.find(&self, &req, update_cache);
return py::cast(obj, py::return_value_policy::reference);

},
"req"_a, "update_cache"_a = true)
.def(
Expand All @@ -289,16 +290,23 @@ void export_cache(py::module& m) {
return self.can_insert(&self, &req);
},
"req"_a)
.def(
"insert",
[](cache_t& self, const request_t& req) {
return self.insert(&self, &req);
},
"req"_a)
.def(
"insert",
[](cache_t& self, const request_t& req) -> std::optional<cache_obj_t*> {
cache_obj_t* inserted = self.insert(&self, &req);
if (inserted == nullptr) {
return std::nullopt;
}
return inserted;
},
"req"_a,
py::return_value_policy::reference // optional still respected
)

.def(
"need_eviction",
[](cache_t& self, const request_t& req) {
return self.need_eviction(&self, &req);
return self.get_occupied_byte(&self) + req.obj_size > self.cache_size;
},
Comment on lines +309 to 310
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The condition self.get_occupied_byte(&self) + req.obj_size > self.cache_size is used to determine if eviction is needed. This is a good optimization to avoid unnecessary eviction attempts when there is enough space in the cache.

Suggested change
return self.get_occupied_byte(&self) + req.obj_size > self.cache_size;
},
return self.get_occupied_byte(&self) + req.obj_size > self.cache_size;

"req"_a)
.def(
Expand All @@ -316,7 +324,8 @@ void export_cache(py::module& m) {
.def(
"to_evict",
[](cache_t& self, const request_t& req) {
return self.to_evict(&self, &req);
cache_obj_t* obj = self.to_evict(&self, &req);
return py::cast(obj, py::return_value_policy::reference);
},
"req"_a)
.def("get_occupied_byte",
Expand Down Expand Up @@ -348,7 +357,7 @@ void export_cache(py::module& m) {
params->default_ttl = default_ttl;
params->hashpower = hashpower;
params->consider_obj_metadata = consider_obj_metadata;
return params;
return std::unique_ptr<common_cache_params_t, CommonCacheParamsDeleter>(params);
}),
"cache_size"_a, "default_ttl"_a = 86400 * 300, "hashpower"_a = 24,
"consider_obj_metadata"_a = false)
Expand Down Expand Up @@ -407,7 +416,7 @@ void export_cache(py::module& m) {
req->hv = hv;
req->next_access_vtime = next_access_vtime;
req->ttl = ttl;
return req;
return std::unique_ptr<request_t, RequestDeleter>(req);
}),
"obj_size"_a = 1, "op"_a = OP_NOP, "valid"_a = true, "obj_id"_a = 0,
"clock_time"_a = 0, "hv"_a = 0, "next_access_vtime"_a = -2,
Expand Down
Loading
Loading