cacheMon · haochengxia · Aug 2, 2025 · Aug 1, 2025 · Aug 1, 2025 · Aug 1, 2025
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -1,9 +1,20 @@
 cmake_minimum_required(VERSION 3.15...3.27)
 project(libCacheSim-python)
 set(DESCRIPTION "The libCacheSim Python Package")
-set(PROJECT_WEB "http://cachemon.github.io/libCacheSim-python")
+set(PROJECT_WEB "https://docs.libcachesim.com/python")
 
-# Auto-initialize submodules if not already done
+# Options from the libCacheSim library
+option(ENABLE_GLCACHE "enable group-learned cache" ON)
+option(ENABLE_LRB "enable LRB" ON)
+option(ENABLE_3L_CACHE "enable 3LCache" ON)
+
+if(NOT CMAKE_BUILD_TYPE)
+  set(CMAKE_BUILD_TYPE Debug CACHE STRING "Debug" FORCE)
+endif()
+
+message(STATUS "CMAKE_BUILD_TYPE ${CMAKE_BUILD_TYPE}")
+
+# 1. Auto-initialize submodules if not already done
 find_package(Git QUIET)
 if(GIT_FOUND AND EXISTS "${PROJECT_SOURCE_DIR}/.git")
     # Check if submodule is initialized
@@ -18,33 +29,31 @@ if(GIT_FOUND AND EXISTS "${PROJECT_SOURCE_DIR}/.git")
     endif()
 endif()
 
-# Auto-build libCacheSim if needed
+# 2. Auto-build libCacheSim if needed
 set(LIBCACHESIM_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/src/libCacheSim")
 if(NOT EXISTS "${LIBCACHESIM_SOURCE_DIR}/CMakeLists.txt")
     message(FATAL_ERROR "libCacheSim submodule not found. Please run 'git submodule update --init --recursive'")
 endif()
 
 # Build libCacheSim first
 set(LIBCACHESIM_BUILD_DIR "${CMAKE_CURRENT_SOURCE_DIR}/src/libCacheSim/build")
-if(NOT EXISTS "${LIBCACHESIM_BUILD_DIR}/export_vars.cmake")
-    message(STATUS "Building libCacheSim...")
-    execute_process(
-        COMMAND ${CMAKE_COMMAND} -S ${LIBCACHESIM_SOURCE_DIR} -B ${LIBCACHESIM_BUILD_DIR} -G Ninja
-        WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
-        RESULT_VARIABLE CMAKE_CONFIG_RESULT
-    )
-    if(NOT CMAKE_CONFIG_RESULT EQUAL "0")
-        message(FATAL_ERROR "Failed to configure libCacheSim")
-    endif()
+message(STATUS "Building libCacheSim...")
+execute_process(
+    COMMAND ${CMAKE_COMMAND} -S ${LIBCACHESIM_SOURCE_DIR} -B ${LIBCACHESIM_BUILD_DIR} -G Ninja -DENABLE_LRB=${ENABLE_LRB} -DENABLE_GLCACHE=${ENABLE_GLCACHE} -DENABLE_3L_CACHE=${ENABLE_3L_CACHE} -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
+    WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+    RESULT_VARIABLE CMAKE_CONFIG_RESULT
+)
+if(NOT CMAKE_CONFIG_RESULT EQUAL "0")
+    message(FATAL_ERROR "Failed to configure libCacheSim")
+endif()
 
-    execute_process(
-        COMMAND ${CMAKE_COMMAND} --build ${LIBCACHESIM_BUILD_DIR}
-        WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
-        RESULT_VARIABLE CMAKE_BUILD_RESULT
-    )
-    if(NOT CMAKE_BUILD_RESULT EQUAL "0")
-        message(FATAL_ERROR "Failed to build libCacheSim")
-    endif()
+execute_process(
+    COMMAND ${CMAKE_COMMAND} --build ${LIBCACHESIM_BUILD_DIR}
+    WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+    RESULT_VARIABLE CMAKE_BUILD_RESULT
+)
+if(NOT CMAKE_BUILD_RESULT EQUAL "0")
+    message(FATAL_ERROR "Failed to build libCacheSim")
 endif()
 
 # Note(haocheng): now we still utilize the exported cache from 
@@ -100,6 +109,34 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
 find_package(Python REQUIRED COMPONENTS Interpreter Development.Module)
 find_package(pybind11 CONFIG REQUIRED)
 
+set(optional_dependency_libs "")
+
+# Find optional deps
+if(ENABLE_GLCACHE)
+    find_package(xgboost REQUIRED)
+    include_directories(${XGBOOST_INCLUDE_DIR})
+    list(APPEND optional_dependency_libs xgboost::xgboost)
+    add_compile_definitions(ENABLE_GLCACHE=1)
+    message(STATUS "XGBOOST_INCLUDE_DIR=${XGBOOST_INCLUDE_DIR}")
+endif()
+
+foreach(FEATURE ENABLE_LRB ENABLE_3L_CACHE)
+    if(${FEATURE})
+        find_path(LIGHTGBM_PATH LightGBM)
+        if(NOT LIGHTGBM_PATH)
+            message(FATAL_ERROR "LIGHTGBM_PATH not found")
+        endif()
+        # include_directories(${LIGHTGBM_PATH})
+
+        find_library(LIGHTGBM_LIB _lightgbm)
+        if(NOT LIGHTGBM_LIB)
+            message(FATAL_ERROR "LIGHTGBM_LIB not found")
+        endif()
+        list(APPEND optional_dependency_libs ${LIGHTGBM_LIB})
+        add_compile_definitions(${FEATURE}=1)
+    endif()
+endforeach()
+
 # Include directories for dependencies
 include_directories(${GLib_INCLUDE_DIRS})
 include_directories(${GLib_CONFIG_INCLUDE_DIR})
@@ -160,6 +197,7 @@ target_link_libraries(libcachesim_python PRIVATE
     pybind11::module
     ${GLib_LIBRARIES}
     ${ZSTD_LIBRARIES}
+    ${optional_dependency_libs}
 )
 
 # Add platform-specific link options and libraries

diff --git a/libcachesim/cache.py b/libcachesim/cache.py
@@ -281,6 +281,8 @@ def __init__(
             _cache=LIRS_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata))
         )
 
+    def insert(self, req: Request) -> Optional[CacheObject]:
+        return super().insert(req)
 
 class TwoQ(CacheBase):
     """2Q replacement algorithm
@@ -360,7 +362,7 @@ def __init__(
         update_weight: bool = True,
         lru_weight: float = 0.5,
     ):
-        cache_specific_params = f"update-weight={update_weight}, lru-weight={lru_weight}"
+        cache_specific_params = f"update-weight={int(update_weight)}, lru-weight={lru_weight}"
         super().__init__(
             _cache=LeCaR_init(
                 _create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata), cache_specific_params
@@ -383,7 +385,7 @@ class ClockPro(CacheBase):
     """Clock-Pro replacement algorithm
 
     Special parameters:
-    init_req: initial reference count (default: 0)
+    init_ref: initial reference count (default: 0)
     init_ratio_cold: initial ratio of cold pages (default: 1)
     """
 
@@ -393,10 +395,10 @@ def __init__(
         default_ttl: int = 86400 * 300,
         hashpower: int = 24,
         consider_obj_metadata: bool = False,
-        init_req: int = 0,
+        init_ref: int = 0,
         init_ratio_cold: float = 0.5,
     ):
-        cache_specific_params = f"init-req={init_req}, init-ratio-cold={init_ratio_cold}"
+        cache_specific_params = f"init-ref={init_ref}, init-ratio-cold={init_ratio_cold}"
         super().__init__(
             _cache=ClockPro_init(
                 _create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata), cache_specific_params
@@ -451,13 +453,19 @@ def __init__(
 
 
 class LRUProb(CacheBase):
-    """LRU with Probabilistic Replacement (no special parameters)"""
+    """LRU with Probabilistic Replacement
+
+    Special parameters:
+    prob: probability of promoting an object to the head of the queue (default: 0.5)
+    """
 
     def __init__(
-        self, cache_size: int, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False
+        self, cache_size: int, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False,
+        prob: float = 0.5,
     ):
+        cache_specific_params = f"prob={prob}"
         super().__init__(
-            _cache=LRU_Prob_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata))
+            _cache=LRU_Prob_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata), cache_specific_params)
         )
 
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -86,7 +86,7 @@ environment = { LCS_BUILD_DIR = "{project}/src/libCacheSim/build", MACOSX_DEPLOY
 test-command = "python -c 'import libcachesim; print(\"Import successful\")'"
 
 [tool.cibuildwheel.linux]
-before-all = "yum install -y yum-utils && yum-config-manager --set-enabled crb && yum install -y ninja-build cmake libzstd-devel glib2-devel"
+before-all = "yum install -y yum-utils && yum-config-manager --set-enabled crb && yum install -y ninja-build cmake libzstd-devel glib2-devel xgboost-devel lightgbm-devel"
 before-build = "pip install pybind11 && git submodule update --init --recursive && python {project}/scripts/smart_build.py"
 
 [tool.cibuildwheel.macos]

diff --git a/scripts/install.sh b/scripts/install.sh
@@ -1,13 +1,7 @@
 git submodule update --init --recursive
+# Sync submodules
+git submodule update --recursive --remote
-git submodule update --recursive --remote
+git submodule update --recursive --remote
+if [ $? -ne 0 ]; then
+  echo "Error: git submodule update failed"
+  exit 1
+fi
-git submodule update --recursive --remote
+git submodule update --recursive --remote
+if [ $? -ne 0 ]; then
+  echo "Error: git submodule update failed"
+  exit 1
+fi
 
-# Build the main libCacheSim C++ library first
-echo "Building main libCacheSim library..."
-pushd src/libCacheSim
-bash scripts/install_dependency.sh
-rm -rf build
-cmake -G Ninja -B build
-ninja -C build
-popd
 
 # Now build and install the Python binding
 echo "Building Python binding..."

diff --git a/src/export_cache.cpp b/src/export_cache.cpp
@@ -256,7 +256,7 @@ auto make_cache_wrapper(const std::string& fn_name) {
           cache_t* ptr = InitFn(cc_params, params_cstr);
           return std::unique_ptr<cache_t, CacheDeleter>(ptr);
         },
-        "cc_params"_a, "cache_specific_params"_a = "");
+        "cc_params"_a, "cache_specific_params"_a = ""); 
   };
 }
 
@@ -280,7 +280,8 @@ void export_cache(py::module& m) {
       .def(
           "find",
           [](cache_t& self, const request_t& req, const bool update_cache) {
-            return self.find(&self, &req, update_cache);
+            cache_obj_t* obj = self.find(&self, &req, update_cache);
+            return py::cast(obj, py::return_value_policy::reference);
-            cache_obj_t* obj = self.find(&self, &req, update_cache);
-            return py::cast(obj, py::return_value_policy::reference);
+cache_obj_t* obj = self.find(&self, &req, update_cache);
+            return py::cast(obj, py::return_value_policy::reference);
-            cache_obj_t* obj = self.find(&self, &req, update_cache);
-            return py::cast(obj, py::return_value_policy::reference);
+cache_obj_t* obj = self.find(&self, &req, update_cache);
+            return py::cast(obj, py::return_value_policy::reference);
           },
           "req"_a, "update_cache"_a = true)
       .def(
@@ -289,16 +290,23 @@ void export_cache(py::module& m) {
             return self.can_insert(&self, &req);
           },
           "req"_a)
-      .def(
-          "insert",
-          [](cache_t& self, const request_t& req) {
-            return self.insert(&self, &req);
-          },
-          "req"_a)
+          .def(
+            "insert",
+            [](cache_t& self, const request_t& req) -> std::optional<cache_obj_t*> {
+              cache_obj_t* inserted = self.insert(&self, &req);
+              if (inserted == nullptr) {
+                return std::nullopt;
+              }
+              return inserted;
+            },
+            "req"_a,
+            py::return_value_policy::reference  // optional still respected
+        )
+
       .def(
           "need_eviction",
           [](cache_t& self, const request_t& req) {
-            return self.need_eviction(&self, &req);
+            return self.get_occupied_byte(&self) + req.obj_size > self.cache_size;
           },
-            return self.get_occupied_byte(&self) + req.obj_size > self.cache_size;
-          },
+return self.get_occupied_byte(&self) + req.obj_size > self.cache_size;
-            return self.get_occupied_byte(&self) + req.obj_size > self.cache_size;
-          },
+return self.get_occupied_byte(&self) + req.obj_size > self.cache_size;
           "req"_a)
       .def(
@@ -316,7 +324,8 @@ void export_cache(py::module& m) {
       .def(
           "to_evict",
           [](cache_t& self, const request_t& req) {
-            return self.to_evict(&self, &req);
+            cache_obj_t* obj = self.to_evict(&self, &req);
+            return py::cast(obj, py::return_value_policy::reference);
           },
           "req"_a)
       .def("get_occupied_byte",
@@ -348,7 +357,7 @@ void export_cache(py::module& m) {
              params->default_ttl = default_ttl;
              params->hashpower = hashpower;
              params->consider_obj_metadata = consider_obj_metadata;
-             return params;
+             return std::unique_ptr<common_cache_params_t, CommonCacheParamsDeleter>(params);
            }),
            "cache_size"_a, "default_ttl"_a = 86400 * 300, "hashpower"_a = 24,
            "consider_obj_metadata"_a = false)
@@ -407,7 +416,7 @@ void export_cache(py::module& m) {
              req->hv = hv;
              req->next_access_vtime = next_access_vtime;
              req->ttl = ttl;
-             return req;
+             return std::unique_ptr<request_t, RequestDeleter>(req);
            }),
            "obj_size"_a = 1, "op"_a = OP_NOP, "valid"_a = true, "obj_id"_a = 0,
            "clock_time"_a = 0, "hv"_a = 0, "next_access_vtime"_a = -2,

diff --git a/src/libCacheSim b/src/libCacheSim
+36 −77		README.md
+ −		doc/assets/logo.jpg
+ −		doc/assets/logo_circle.png
+12 −12		libCacheSim/cache/eviction/3LCache/ThreeLCache.cpp
+4 −2		libCacheSim/cache/eviction/3LCache/ThreeLCache.hpp
+1 −1		libCacheSim/cache/eviction/3LCache/cache.h
+4 −3		libCacheSim/cache/eviction/GLCache/GLCache.c
+0 −1		libCacheSim/cache/eviction/GLCache/README
+2 −2		libCacheSim/cache/eviction/GLCache/train.c
+1 −1		libCacheSim/cache/eviction/LRB/cache.h
+12 −11		libCacheSim/cache/eviction/LRB/lrb.cpp
+1 −1		libCacheSim/cache/eviction/LRB/lrb.h