diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
index 74af5dd..7fd4104 100644
--- a/.github/workflows/pre-commit.yml
+++ b/.github/workflows/pre-commit.yml
@@ -8,6 +8,6 @@ jobs:
     env:
       SKIP: no-commit-to-branch
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
       - uses: actions/setup-python@v3
       - uses: pre-commit/action@v3.0.0
diff --git a/.github/workflows/run-test.yml b/.github/workflows/run-test.yml
index 2567747..3e342bc 100644
--- a/.github/workflows/run-test.yml
+++ b/.github/workflows/run-test.yml
@@ -8,6 +8,7 @@ jobs:
     steps:
       - uses: actions/checkout@v4
         with:
+          submodule: recursive
           fetch-depth: 0
       - name: Setup Python
         uses: actions/setup-python@v5
@@ -17,7 +18,6 @@ jobs:
         run: |
           pip install --upgrade pip setuptools wheel
           sudo apt-get install lcov
-          pip install pybind11 numpy
           FLAGS="-fprofile-arcs -ftest-coverage"
           export CFLAGS="$FLAGS"
           export CXXFLAGS="$FLAGS"
diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml
index e95f803..6f98af7 100644
--- a/.github/workflows/wheels.yml
+++ b/.github/workflows/wheels.yml
@@ -13,23 +13,19 @@ jobs:
     name: Build source distribution
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v3
-        with:
-          fetch-depth: 0
+      - uses: actions/checkout@v4
       - uses: actions/setup-python@v3
         name: Install Python
         with:
           python-version: '3.13'
       - name: Build sdist
-        run: pip install pybind11 setuptools_scm && python setup.py sdist
+        run: pip install build && python -m build --sdist
       - uses: actions/upload-artifact@v4
         with:
           path: dist/*.tar.gz
   build_wheels:
     name: Build wheels on ${{ matrix.os }}
-    runs-on: ${{ matrix.os }}
-    env:
-      CIBW_ENVIRONMENT: "${{ matrix.cibw.env || '' }}"
+    runs-on: ${{ matrix.runs-on }}
     strategy:
       matrix:
         include:
@@ -41,29 +37,29 @@ jobs:
             runs-on: ubuntu-24.04-arm
             cibw:
               cflags: ''
-#          - os: windows-intel
-#            runs-on: windows-latest
-#            cibw:
-#              cflags: ''
-#          - os: windows-arm
-#            runs-on: windows-11-arm
-#            cibw:
-#              cflags: ''
-#          - os: macos-intel
-#            runs-on: macos-13
-#            cibw:
-#              cflags: ''
-#          - os: macos-arm
-#            runs-on: macos-latest
-#            cibw:
-#              cflags: ''
+          - os: windows-intel
+            runs-on: windows-latest
+            cibw:
+              cflags: ''
+          - os: macos-intel
+            runs-on: macos-13
+            cibw:
+              cflags: ''
+          - os: macos-arm
+            runs-on: macos-latest
+            cibw:
+              cflags: ''
     steps:
       - uses: actions/checkout@v4
+        with:
+          submodule: recursive
+          fetch-depth: 0
       - name: Build wheels
         uses: pypa/cibuildwheel@v3.0.1
         env:
           CIBW_PLATFORM: ${{ matrix.platform || 'auto' }}
           CFLAGS: ${{ matrix.cibw.cflags }}
+          CXXFLAGS: ${{ matrix.cibw.cflags }}
       - uses: actions/upload-artifact@v4
         with:
           name: cibw-wheels-${{ matrix.os }}-${{ strategy.job-index }}
@@ -71,26 +67,25 @@ jobs:
   upload_pypi:
     needs: [build_wheels, build_sdist]
     runs-on: ubuntu-latest
+    permissions:
+      id-token: write
     steps:
-      - uses: actions/download-artifact@v2
+      - uses: actions/download-artifact@v4
         with:
-          name: artifact
+          pattern: cibw-*
           path: dist
+          merge-multiple: true
       - name: Publish package to TestPyPI
-        uses: pypa/gh-action-pypi-publish@master
+        uses: pypa/gh-action-pypi-publish@release/v1
         with:
-          user: __token__
-          password: ${{ secrets.TEST_PYPI_APITOKEN }}
           packages_dir: dist/
           repository_url: https://test.pypi.org/legacy/
           verbose: true
           skip_existing: true
       - name: Publish package to PyPI
         if: github.event_name == 'release'
-        uses: pypa/gh-action-pypi-publish@master
+        uses: pypa/gh-action-pypi-publish@release/v1
         with:
-          user: __token__
-          password: ${{ secrets.PYPI_APITOKEN }}
           packages_dir: dist/
           verbose: true
           skip_existing: true
diff --git a/.gitignore b/.gitignore
index 0e75210..fe2ebf8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -28,3 +28,4 @@ docs/build/
 docs/source/api_reference/
 .cache/
 .coverage
+src/irspack/_version.py
diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 0000000..e69de29
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0cb2047..7411084 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,20 +1,46 @@
-cmake_minimum_required(VERSION 3.0.0)
-project(rs_evaluation VERSION 0.1.0)
-set(CMAKE_BUILD_TYPE DEBUG)
-set(CMAKE_CXX_FLAGS "-std=c++11 -march=native -fPIC -O0 -g")
+cmake_minimum_required(VERSION 3.15...3.27)
+project(irspack VERSION 0.1.0)
+
+if (CMAKE_VERSION VERSION_LESS 3.18)
+  set(DEV_MODULE Development)
+else()
+  set(DEV_MODULE Development.Module)
+endif()
+
+
+if (NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
+  set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build." FORCE)
+  set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo")
+endif()
+
+if (NOT EXISTS "${CMAKE_CURRENT_BINARY_DIR}/eigen-3.4.0")
+  file(DOWNLOAD https://gitlab.com/libeigen/eigen/-/archive/3.4.0/eigen-3.4.0.zip "${CMAKE_CURRENT_BINARY_DIR}/eigen-3.4.0.zip")
+  file(ARCHIVE_EXTRACT INPUT "${CMAKE_CURRENT_BINARY_DIR}/eigen-3.4.0.zip")
+endif()
 
 find_package(Threads REQUIRED)
 
-include_directories(eigen-3.3.7 cpp_source)
-set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
 
+include_directories("${CMAKE_BINARY_DIR}/eigen-3.4.0" cpp_source)
+set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
 set(CPACK_PROJECT_NAME ${PROJECT_NAME})
 set(CPACK_PROJECT_VERSION ${PROJECT_VERSION})
 include(CPack)
 
-add_subdirectory(pybind11)
-pybind11_add_module(_ials cpp_source/als/wrapper.cpp)
-#pybind11_add_module(irspack.recommenders._knn cpp_source/knn/wrapper.cpp)
-#pybind11_add_module(_util_cpp cpp_source/util.cpp)
-#pybind11_add_module(irspack._evapuator cpp_source/evaluator.cpp)
-#pybind11_add_module(irspack._rwr cpp_source/rws.cpp)
+
+# Detect the installed nanobind package and import it into CMake
+find_package(Python 3.8 COMPONENTS Interpreter ${DEV_MODULE} REQUIRED)
+execute_process(
+  COMMAND "${Python_EXECUTABLE}" -m nanobind --cmake_dir
+  OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE nanobind_ROOT)
+find_package(nanobind CONFIG REQUIRED)
+
+nanobind_add_module(_ials_core cpp_source/als/wrapper.cpp)
+nanobind_add_module(_knn cpp_source/knn/wrapper.cpp)
+nanobind_add_module(_util_cpp cpp_source/util.cpp)
+nanobind_add_module(_core_evaluator cpp_source/evaluator.cpp)
+
+install(TARGETS _ials_core LIBRARY DESTINATION irspack/recommenders)
+install(TARGETS _core_evaluator LIBRARY DESTINATION irspack/evaluation)
+install(TARGETS _util_cpp LIBRARY DESTINATION irspack/utils)
+install(TARGETS _knn LIBRARY DESTINATION irspack/recommenders)
diff --git a/cpp_source/als/definitions.hpp b/cpp_source/als/definitions.hpp
index ca975ab..2705fef 100644
--- a/cpp_source/als/definitions.hpp
+++ b/cpp_source/als/definitions.hpp
@@ -1,6 +1,5 @@
 #include <Eigen/Core>
 #include <Eigen/Sparse>
-#include <vector>
 
 namespace irspack {
 namespace ials {
diff --git a/cpp_source/als/wrapper.cpp b/cpp_source/als/wrapper.cpp
index f4e4853..7ea2b40 100644
--- a/cpp_source/als/wrapper.cpp
+++ b/cpp_source/als/wrapper.cpp
@@ -1,21 +1,19 @@
 #include "IALSLearningConfig.hpp"
 #include "IALSTrainer.hpp"
-#include "pybind11/cast.h"
 #include <Eigen/Sparse>
 #include <cstddef>
-#include <pybind11/eigen.h>
-#include <pybind11/pybind11.h>
-#include <pybind11/stl.h>
-#include <pybind11/stl_bind.h>
-#include <sstream>
-#include <stdexcept>
-#include <vector>
+#include <nanobind/nanobind.h>
+#include <nanobind/nb_defs.h>
+#include <nanobind/eigen/dense.h>
+#include <nanobind/eigen/sparse.h>
+#include <nanobind/stl/tuple.h>
+#include <nanobind/stl/string.h>
+#include <tuple>
 
-namespace py = pybind11;
 using namespace irspack::ials;
-using std::vector;
+using namespace nanobind;
 
-PYBIND11_MODULE(_ials, m) {
+NB_MODULE(_ials_core, m) {
   std::stringstream doc_stream;
   doc_stream << "irspack's core module for \"IALSRecommender\"." << std::endl
              << "Built to use" << std::endl
@@ -23,42 +21,37 @@ PYBIND11_MODULE(_ials, m) {
 
   m.doc() = doc_stream.str();
 
-  py::enum_<LossType>(m, "LossType")
+  nanobind::enum_<LossType>(m, "LossType")
       .value("ORIGINAL", LossType::ORIGINAL)
       .value("IALSPP", LossType::IALSPP)
       .export_values();
 
-  py::enum_<SolverType>(m, "SolverType")
+  nanobind::enum_<SolverType>(m, "SolverType")
       .value("CHOLESKY", SolverType::Cholesky)
       .value("CG", SolverType::CG)
       .value("IALSPP", SolverType::IALSPP)
       .export_values();
 
   auto model_config =
-      py::class_<IALSModelConfig>(m, "IALSModelConfig")
-          .def(py::init<size_t, Real, Real, Real, Real, int, LossType>())
-          .def(py::pickle(
-              [](const IALSModelConfig &config) {
-                return py::make_tuple(config.K, config.alpha0, config.reg,
-                                      config.nu, config.init_stdev,
-                                      config.random_seed, config.loss_type);
-              },
-              [](py::tuple t) {
-                if (t.size() != 7)
-                  throw std::runtime_error("invalid state");
-
-                size_t K = t[0].cast<size_t>();
-                Real alpha0 = t[1].cast<Real>();
-                Real reg = t[2].cast<Real>();
-                Real nu = t[3].cast<Real>();
-                Real init_stdev = t[4].cast<Real>();
-                int random_seed = t[5].cast<int>();
-                LossType loss_type = t[6].cast<LossType>();
-                return IALSModelConfig(K, alpha0, reg, nu, init_stdev,
-                                       random_seed, loss_type);
-              }));
-  py::class_<IALSModelConfig::Builder>(m, "IALSModelConfigBuilder")
-      .def(py::init<>())
+      nanobind::class_<IALSModelConfig>(m, "IALSModelConfig")
+          .def(nanobind::init<size_t, Real, Real, Real, Real, int, LossType>())
+          .def("__getstate__",
+               [](const IALSModelConfig &config) {
+                 return nanobind::make_tuple(
+                     config.K, config.alpha0, config.reg, config.nu,
+                     config.init_stdev, config.random_seed, config.loss_type);
+               })
+          .def("__setstate__",
+               [](IALSModelConfig &ials_model_config,
+                  const std::tuple<size_t, Real, Real, Real, Real, int,
+                                   LossType> &state) {
+                 new (&ials_model_config) IALSModelConfig(
+                     std::get<0>(state), std::get<1>(state), std::get<2>(state),
+                     std::get<3>(state), std::get<4>(state), std::get<5>(state),
+                     std::get<6>(state));
+               });
+  nanobind::class_<IALSModelConfig::Builder>(m, "IALSModelConfigBuilder")
+      .def(nanobind::init<>())
       .def("build", &IALSModelConfig::Builder::build)
       .def("set_K", &IALSModelConfig::Builder::set_K)
       .def("set_alpha0", &IALSModelConfig::Builder::set_alpha0)
@@ -69,30 +62,28 @@ PYBIND11_MODULE(_ials, m) {
       .def("set_loss_type", &IALSModelConfig::Builder::set_loss_type);
 
   auto solver_config =
-      py::class_<SolverConfig>(m, "IALSSolverConfig")
-          .def(py::init<size_t, SolverType, size_t, size_t, size_t>())
-          .def(py::pickle(
-              [](const SolverConfig &config) {
-                return py::make_tuple(
+      nanobind::class_<SolverConfig>(m, "IALSSolverConfig")
+          .def(nanobind::init<size_t, SolverType, size_t, size_t, size_t>())
+          .def(
+            "__getstate__", [](const SolverConfig &config) {
+                return std::make_tuple(
                     config.n_threads, config.solver_type, config.max_cg_steps,
                     config.ialspp_subspace_dimension, config.ialspp_iteration);
-              },
-              [](py::tuple t) {
-                if (t.size() != 5)
-                  throw std::runtime_error("invalid state");
-
-                size_t n_threads = t[0].cast<size_t>();
-                SolverType solver_type = t[1].cast<SolverType>();
-                size_t max_cg_steps = t[2].cast<size_t>();
-                size_t ialspp_subspace_dimension = t[3].cast<size_t>();
-                size_t ialspp_iteration = t[4].cast<size_t>();
-                return SolverConfig(n_threads, solver_type, max_cg_steps,
-                                    ialspp_subspace_dimension,
-                                    ialspp_iteration);
-              }));
+              }
+          )
+          .def(
+            "__setstate__", [](SolverConfig &config, const std::tuple<size_t, SolverType, size_t, size_t, size_t> &state) {
+              new (&config) SolverConfig(
+                std::get<0>(state),
+                std::get<1>(state),
+                std::get<2>(state),
+                std::get<3>(state),
+                std::get<4>(state));
+            }
+          );
 
-  py::class_<SolverConfig::Builder>(m, "IALSSolverConfigBuilder")
-      .def(py::init<>())
+  nanobind::class_<SolverConfig::Builder>(m, "IALSSolverConfigBuilder")
+      .def(nanobind::init<>())
       .def("build", &SolverConfig::Builder::build)
       .def("set_n_threads", &SolverConfig::Builder::set_n_threads)
       .def("set_solver_type", &SolverConfig::Builder::set_solver_type)
@@ -102,25 +93,23 @@ PYBIND11_MODULE(_ials, m) {
       .def("set_ialspp_iteration",
            &SolverConfig::Builder::set_ialspp_iteration);
 
-  py::class_<IALSTrainer>(m, "IALSTrainer")
-      .def(py::init<IALSModelConfig, const SparseMatrix &>())
+  nanobind::class_<IALSTrainer>(m, "IALSTrainer")
+      .def(nanobind::init<IALSModelConfig, const SparseMatrix &>())
       .def("step", &IALSTrainer::step)
       .def("user_scores", &IALSTrainer::user_scores)
       .def("transform_user", &IALSTrainer::transform_user)
       .def("transform_item", &IALSTrainer::transform_item)
       .def("compute_loss", &IALSTrainer::compute_loss)
-      .def_readwrite("user", &IALSTrainer::user)
-      .def_readwrite("item", &IALSTrainer::item)
-      .def(py::pickle(
-          [](const IALSTrainer &trainer) {
-            return py::make_tuple(trainer.config_, trainer.user, trainer.item);
-          },
-          [](py::tuple t) {
-            if (t.size() != 3)
-              throw std::runtime_error("Invalid state!");
-            IALSTrainer trainer(t[0].cast<IALSModelConfig>(),
-                                t[1].cast<DenseMatrix>(),
-                                t[2].cast<DenseMatrix>());
-            return trainer;
-          }));
+      .def_rw("user", &IALSTrainer::user)
+      .def_rw("item", &IALSTrainer::item)
+      .def("__getstate__", [](const IALSTrainer & trainer) {
+            return std::make_tuple(trainer.config_, trainer.user, trainer.item);
+      })
+      .def("__setstate__", [](IALSTrainer & trainer, const std::tuple<IALSModelConfig, DenseMatrix, DenseMatrix> & state) {
+          new (&trainer) IALSTrainer(
+            std::get<0>(state),             std::get<1>(state),
+            std::get<2>(state)
+
+          );
+      });
 }
diff --git a/cpp_source/evaluator.cpp b/cpp_source/evaluator.cpp
index 32e5b0d..02793b4 100644
--- a/cpp_source/evaluator.cpp
+++ b/cpp_source/evaluator.cpp
@@ -1,20 +1,26 @@
 #include <Eigen/Core>
 #include <Eigen/Sparse>
+#include <numeric>
 #include <algorithm>
 #include <atomic>
 #include <cstddef>
+#include <map>
 #include <future>
-#include <iostream>
 #include <iterator>
-#include <stdexcept>
 #include <string>
+#include <tuple>
 #include <unordered_set>
-#include <valarray>
 #include <vector>
 
-#include <pybind11/eigen.h>
-#include <pybind11/pybind11.h>
-#include <pybind11/stl.h>
+#include <nanobind/nanobind.h>
+#include <nanobind/nb_defs.h>
+#include <nanobind/eigen/dense.h>
+#include <nanobind/eigen/sparse.h>
+#include <nanobind/stl/vector.h>
+#include <nanobind/stl/map.h>
+#include <nanobind/stl/string.h>
+#include <nanobind/stl/tuple.h>
+
 
 #include "argcheck.hpp"
 
@@ -416,48 +422,49 @@ Metrics evaluate_list_vs_list(const vector<vector<size_t>> &recommendation,
 
 } // namespace irspack
 
-namespace py = pybind11;
 using namespace irspack;
 using namespace irspack::evaluation;
 using std::vector;
 
-PYBIND11_MODULE(_core, m) {
-  py::class_<Metrics>(m, "Metrics")
-      .def(py::init<size_t>())
+NB_MODULE(_core_evaluator, m) {
+  nanobind::class_<Metrics>(m, "Metrics")
+      .def(nanobind::init<size_t>())
       .def("merge",
            [](Metrics &this_, const Metrics &other) { this_.merge(other); })
       .def("as_dict", &Metrics::as_dict);
 
-  py::class_<EvaluatorCore>(m, "EvaluatorCore")
-      .def(py::init<const SparseMatrix &, const vector<vector<size_t>> &>(),
-           py::arg("grount_truth"), py::arg("recommendable"))
+  nanobind::class_<EvaluatorCore>(m, "EvaluatorCore")
+      .def(nanobind::init<const SparseMatrix &, const vector<vector<size_t>> &>(),
+           nanobind::arg("grount_truth"), nanobind::arg("recommendable"))
       .def("get_metrics_f64",
            static_cast<Metrics (EvaluatorCore::*)(
                const Eigen::Ref<DenseMatrix<double>> &, size_t, size_t, size_t,
                bool)>(&EvaluatorCore::get_metrics<double>),
-           py::arg("score_array"), py::arg("cutoff"), py::arg("offset"),
-           py::arg("n_threads"), py::arg("recall_with_cutoff") = false)
+           nanobind::arg("score_array"), nanobind::arg("cutoff"), nanobind::arg("offset"),
+           nanobind::arg("n_threads"), nanobind::arg("recall_with_cutoff") = false)
       .def("get_metrics_f32",
            static_cast<Metrics (EvaluatorCore::*)(
                const Eigen::Ref<DenseMatrix<float>> &, size_t, size_t, size_t,
                bool)>(&EvaluatorCore::get_metrics<float>),
-           py::arg("score_array"), py::arg("cutoff"), py::arg("offset"),
-           py::arg("n_threads"), py::arg("recall_with_cutoff") = false)
+           nanobind::arg("score_array"), nanobind::arg("cutoff"), nanobind::arg("offset"),
+           nanobind::arg("n_threads"), nanobind::arg("recall_with_cutoff") = false)
       .def("get_ground_truth", &EvaluatorCore::get_ground_truth)
       .def("cache_X_as_set", &EvaluatorCore::cache_X_map)
-      .def(py::pickle(
-          [](const EvaluatorCore &evaluator) {
-            return py::make_tuple(evaluator.get_ground_truth(),
-                                  evaluator.get_recommendable_items());
-          },
-          [](py::tuple t) {
-            if (t.size() != 2)
-              throw std::runtime_error("invalid state");
-            return EvaluatorCore(t[0].cast<SparseMatrix>(),
-                                 t[1].cast<vector<vector<size_t>>>());
-          }));
-
+      .def("__getstate__", [](const EvaluatorCore & evaluator) {
+            return std::make_tuple(
+                evaluator.get_ground_truth(), evaluator.get_recommendable_items()
+            );
+      })
+      .def("__setstate__", [](EvaluatorCore & evaluator, const std::tuple<SparseMatrix, vector<vector<size_t>>>& state) {
+        new (&evaluator) EvaluatorCore(
+          std::get<0>(state),
+          std::get<1>(state)
+
+
+        );
+      }
+      );
   m.def("evaluate_list_vs_list", &evaluate_list_vs_list,
-        py::arg("recomemndations"), py::arg("grount_truths"),
-        py::arg("n_items"), py::arg("n_threads"));
+        nanobind::arg("recomemndations"), nanobind::arg("grount_truths"),
+        nanobind::arg("n_items"), nanobind::arg("n_threads"));
 }
diff --git a/cpp_source/knn/knn.hpp b/cpp_source/knn/knn.hpp
index fc3c8d2..32f4d5f 100644
--- a/cpp_source/knn/knn.hpp
+++ b/cpp_source/knn/knn.hpp
@@ -4,7 +4,6 @@
 #include <Eigen/Sparse>
 #include <cstddef>
 #include <future>
-#include <iostream>
 #include <random>
 #include <stdexcept>
 #include <thread>
diff --git a/cpp_source/knn/wrapper.cpp b/cpp_source/knn/wrapper.cpp
index 7513b7e..0eca685 100644
--- a/cpp_source/knn/wrapper.cpp
+++ b/cpp_source/knn/wrapper.cpp
@@ -1,63 +1,61 @@
-#include "knn.hpp"
 #include "similarities.hpp"
 #include <Eigen/Sparse>
 #include <cstddef>
-#include <pybind11/eigen.h>
-#include <pybind11/pybind11.h>
-#include <pybind11/stl.h>
-#include <stdexcept>
+#include <nanobind/nanobind.h>
+#include <nanobind/nb_defs.h>
+#include <nanobind/eigen/dense.h>
+#include <nanobind/eigen/sparse.h>
 
-namespace py = pybind11;
 using Real = double;
 
-PYBIND11_MODULE(_knn, m) {
-  py::class_<KNN::CosineSimilarityComputer<Real>>(m, "CosineSimilarityComputer")
-      .def(py::init<const KNN::CosineSimilarityComputer<Real>::CSRMatrix &,
+NB_MODULE(_knn, m) {
+  nanobind::class_<KNN::CosineSimilarityComputer<Real>>(m, "CosineSimilarityComputer")
+      .def(nanobind::init<const KNN::CosineSimilarityComputer<Real>::CSRMatrix &,
                     Real, bool, size_t, size_t>(),
-           py::arg("X"), py::arg("shrinkage"), py::arg("normalize"),
-           py::arg("n_threads") = 1, py::arg("max_chunk_size") = 128)
+           nanobind::arg("X"), nanobind::arg("shrinkage"), nanobind::arg("normalize"),
+           nanobind::arg("n_threads") = 1, nanobind::arg("max_chunk_size") = 128)
       .def("compute_similarity",
            &KNN::CosineSimilarityComputer<Real>::compute_similarity);
 
-  py::class_<KNN::JaccardSimilarityComputer<Real>>(m,
+  nanobind::class_<KNN::JaccardSimilarityComputer<Real>>(m,
                                                    "JaccardSimilarityComputer")
-      .def(py::init<const KNN::JaccardSimilarityComputer<Real>::CSRMatrix &,
+      .def(nanobind::init<const KNN::JaccardSimilarityComputer<Real>::CSRMatrix &,
                     Real, size_t, size_t>(),
-           py::arg("X"), py::arg("shrinkage"), py::arg("n_threads") = 1,
-           py::arg("max_chunk_size") = 128)
+           nanobind::arg("X"), nanobind::arg("shrinkage"), nanobind::arg("n_threads") = 1,
+           nanobind::arg("max_chunk_size") = 128)
       .def("compute_similarity",
            &KNN::JaccardSimilarityComputer<Real>::compute_similarity);
 
-  py::class_<KNN::TverskyIndexComputer<Real>>(m, "TverskyIndexComputer")
-      .def(py::init<const KNN::TverskyIndexComputer<Real>::CSRMatrix &, Real,
+  nanobind::class_<KNN::TverskyIndexComputer<Real>>(m, "TverskyIndexComputer")
+      .def(nanobind::init<const KNN::TverskyIndexComputer<Real>::CSRMatrix &, Real,
                     Real, Real, size_t, size_t>(),
-           py::arg("X"), py::arg("shrinkage"), py::arg("alpha"),
-           py::arg("beta"), py::arg("n_threads") = 1,
-           py::arg("max_chunk_size") = 128)
+           nanobind::arg("X"), nanobind::arg("shrinkage"), nanobind::arg("alpha"),
+           nanobind::arg("beta"), nanobind::arg("n_threads") = 1,
+           nanobind::arg("max_chunk_size") = 128)
       .def("compute_similarity",
            &KNN::TverskyIndexComputer<Real>::compute_similarity);
 
-  py::class_<KNN::AsymmetricCosineSimilarityComputer<Real>>(
+  nanobind::class_<KNN::AsymmetricCosineSimilarityComputer<Real>>(
       m, "AsymmetricSimilarityComputer")
-      .def(py::init<
+      .def(nanobind::init<
                const KNN::AsymmetricCosineSimilarityComputer<Real>::CSRMatrix &,
                Real, Real, size_t, size_t>(),
-           py::arg("X"), py::arg("shrinkage"), py::arg("alpha"),
-           py::arg("n_threads") = 1, py::arg("max_chunk_size") = 128)
+           nanobind::arg("X"), nanobind::arg("shrinkage"), nanobind::arg("alpha"),
+           nanobind::arg("n_threads") = 1, nanobind::arg("max_chunk_size") = 128)
       .def("compute_similarity",
            &KNN::AsymmetricCosineSimilarityComputer<Real>::compute_similarity);
 
-  py::class_<KNN::P3alphaComputer<Real>>(m, "P3alphaComputer")
-      .def(py::init<const KNN::P3alphaComputer<Real>::CSRMatrix &, Real, size_t,
+  nanobind::class_<KNN::P3alphaComputer<Real>>(m, "P3alphaComputer")
+      .def(nanobind::init<const KNN::P3alphaComputer<Real>::CSRMatrix &, Real, size_t,
                     size_t>(),
-           py::arg("X"), py::arg("alpha") = 0, py::arg("n_threads") = 1,
-           py::arg("max_chunk_size") = 128)
+           nanobind::arg("X"), nanobind::arg("alpha") = 0, nanobind::arg("n_threads") = 1,
+           nanobind::arg("max_chunk_size") = 128)
       .def("compute_W", &KNN::P3alphaComputer<Real>::compute_W);
 
-  py::class_<KNN::RP3betaComputer<Real>>(m, "RP3betaComputer")
-      .def(py::init<const KNN::RP3betaComputer<Real>::CSRMatrix &, Real, Real,
+  nanobind::class_<KNN::RP3betaComputer<Real>>(m, "RP3betaComputer")
+      .def(nanobind::init<const KNN::RP3betaComputer<Real>::CSRMatrix &, Real, Real,
                     size_t, size_t>(),
-           py::arg("X"), py::arg("alpha") = 0, py::arg("beta") = 0,
-           py::arg("n_threads") = 1, py::arg("max_chunk_size") = 128)
+           nanobind::arg("X"), nanobind::arg("alpha") = 0, nanobind::arg("beta") = 0,
+           nanobind::arg("n_threads") = 1, nanobind::arg("max_chunk_size") = 128)
       .def("compute_W", &KNN::RP3betaComputer<Real>::compute_W);
 }
diff --git a/cpp_source/util.cpp b/cpp_source/util.cpp
index d134422..f51be5e 100644
--- a/cpp_source/util.cpp
+++ b/cpp_source/util.cpp
@@ -1,15 +1,15 @@
 #include "util.hpp"
-#include "pybind11/cast.h"
 #include <cstddef>
-#include <pybind11/eigen.h>
-#include <pybind11/pybind11.h>
-#include <pybind11/stl.h>
-#include <stdexcept>
-#include <vector>
+#include <nanobind/nanobind.h>
+#include <nanobind/nb_defs.h>
+#include <nanobind/eigen/dense.h>
+#include <nanobind/eigen/sparse.h>
+#include <nanobind/stl/pair.h>
+#include <nanobind/stl/vector.h>
+
 
-namespace py = pybind11;
 using namespace irspack;
-PYBIND11_MODULE(_util_cpp, m) {
+NB_MODULE(_util_cpp, m) {
 
   m.def("remove_diagonal", &sparse_util::remove_diagonal<double>);
   m.def("sparse_mm_threaded", &sparse_util::parallel_sparse_product<double>);
@@ -18,26 +18,26 @@ PYBIND11_MODULE(_util_cpp, m) {
   m.def("rowwise_train_test_split_by_fixed_n",
         &sparse_util::SplitFixedN<double>::split);
   m.def("okapi_BM_25_weight", &sparse_util::okapi_BM_25_weight<double>,
-        py::arg("X"), py::arg("k1") = 1.2, py::arg("b") = 0.75);
-  m.def("tf_idf_weight", &sparse_util::tf_idf_weight<double>, py::arg("X"),
-        py::arg("smooth") = true);
+        nanobind::arg("X"), nanobind::arg("k1") = 1.2, nanobind::arg("b") = 0.75);
+  m.def("tf_idf_weight", &sparse_util::tf_idf_weight<double>, nanobind::arg("X"),
+        nanobind::arg("smooth") = true);
 
   m.def("slim_weight_allow_negative", &sparse_util::SLIM<float, false>,
-        py::arg("X"), py::arg("n_threads"), py::arg("n_iter"),
-        py::arg("l2_coeff"), py::arg("l1_coeff"), py::arg("tol"),
-        py::arg("top_k") = -1);
+        nanobind::arg("X"), nanobind::arg("n_threads"), nanobind::arg("n_iter"),
+        nanobind::arg("l2_coeff"), nanobind::arg("l1_coeff"), nanobind::arg("tol"),
+        nanobind::arg("top_k") = -1);
 
   m.def("slim_weight_positive_only", &sparse_util::SLIM<float, true>,
-        py::arg("X"), py::arg("n_threads"), py::arg("n_iter"),
-        py::arg("l2_coeff"), py::arg("l1_coeff"), py::arg("tol"),
-        py::arg("top_k") = -1);
+        nanobind::arg("X"), nanobind::arg("n_threads"), nanobind::arg("n_iter"),
+        nanobind::arg("l2_coeff"), nanobind::arg("l1_coeff"), nanobind::arg("tol"),
+        nanobind::arg("top_k") = -1);
 
   m.def("retrieve_recommend_from_score_f64",
-        &sparse_util::retrieve_recommend_from_score<double>, py::arg("score"),
-        py::arg("allowed_indices"), py::arg("cutoff"),
-        py::arg("n_threads") = 1);
+        &sparse_util::retrieve_recommend_from_score<double>, nanobind::arg("score"),
+        nanobind::arg("allowed_indices"), nanobind::arg("cutoff"),
+        nanobind::arg("n_threads") = 1);
   m.def("retrieve_recommend_from_score_f32",
-        &sparse_util::retrieve_recommend_from_score<float>, py::arg("score"),
-        py::arg("allowed_indices"), py::arg("cutoff"),
-        py::arg("n_threads") = 1);
+        &sparse_util::retrieve_recommend_from_score<float>, nanobind::arg("score"),
+        nanobind::arg("allowed_indices"), nanobind::arg("cutoff"),
+        nanobind::arg("n_threads") = 1);
 }
diff --git a/create_pb_stubs.sh b/create_pb_stubs.sh
index 03b5564..252880e 100755
--- a/create_pb_stubs.sh
+++ b/create_pb_stubs.sh
@@ -1,21 +1,13 @@
 #!/bin/bash
 modules=( \
-"irspack.recommenders._ials" \
-"irspack.evaluation._core" \
+"irspack.recommenders._ials_core" \
+"irspack.evaluation._core_evaluator" \
 "irspack.recommenders._knn" \
 "irspack.utils._util_cpp"
 )
 for module_name in "${modules[@]}"
 do
     echo "Create stub for $module_name"
-    pybind11-stubgen -o stubs --no-setup-py "$module_name"
     output_path="src/$(echo "${module_name}" | sed 's/\./\//g').pyi"
-    input_path="stubs/$(echo "${module_name}" | sed 's/\./\//g')-stubs/__init__.pyi"
-    rm "${output_path}"
-    echo 'm: int
-n: int
-from numpy import float32
-' >> "${output_path}"
-    cat "${input_path}" >> "${output_path}"
-    black "${output_path}"
+    python -m "nanobind.stubgen" -m "$module_name" -o "$output_path"
 done
diff --git a/py.typed b/py.typed
new file mode 100644
index 0000000..e69de29
diff --git a/pyproject.toml b/pyproject.toml
index 0b20321..84f4da3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,30 +1,69 @@
-[build-system]
-requires = [
-  "setuptools>=42",
-  "wheel",
-  "pybind11>=2.8.0",
-  "requests",
-  "setuptools_scm[toml]>=6.2",
+[project]
+license = "MIT"
+dynamic = ["version"]
+name = "irspack"
+description = "Implicit feedback-based recommender systems, packed for practitioners."
+authors = [
+    { name = "Tomoki Ohtsuki", email = "tomoki.ohtsuki.19937@outlook.jp" },
+]
+dependencies = [
+  "httpx",
+  "gidgethub[httpx]>4.0.0",
+  "numpy >= 2.0",
+  "fastprogress >= 0.2",
+  "optuna>=2.5.0",
+  "pandas>=2.2.0",
+  "scikit-learn>=0.21.0",
+  "scipy>=1.0",
+  "colorlog>=4",
+  "pydantic>=2.0"
 ]
 
-build-backend = "setuptools.build_meta"
+[project.urls]
+Homepage = "https://github.com/tohtsky/irspack"
+
+[tool.scikit-build]
+# Protect the configuration against future changes in scikit-build-core
+minimum-version = "0.4"
+# Setuptools-style build caching in a local directory
+build-dir = "build/{wheel_tag}"
+# Build stable ABI wheels for CPython 3.12+
+wheel.py-api = "cp312"
+metadata.version.provider = "scikit_build_core.metadata.setuptools_scm"
+sdist.include = ["src/irspack/_version.py"]
+
+
+[build-system]
+requires = ["scikit-build-core >=0.4.3", "nanobind >=1.3.2", "requests", "setuptools_scm[toml]>=8", "setuptools>=64", "wheel"]
+build-backend = "scikit_build_core.build"
+
+[tool.setuptools_scm]
+write_to = "src/irspack/_version.py"
+local_scheme = "no-local-version"
 
 [tool.black]
-ensure_newline_before_comments = true
-force_grid_wrap = 0
-include_trailing_comma = true
-line_length = 88
-multi_line_output = 3
-use_parentheses = true
 
 [tool.isort]
 ensure_newline_before_comments = true
 force_grid_wrap = 0
 include_trailing_comma = true
-known_third_party = ["pybind11"]
+known_third_party = ["nanobind"]
 line_length = 88
 multi_line_output = 3
 use_parentheses = true
 
 [tool.pycln]
 all = true
+
+[tool.cibuildwheel]
+build-verbosity = 1
+skip = ["cp38-*", "pp38-*"]
+archs = ["auto64"]
+
+[tool.cibuildwheel.linux]
+# Export a variable
+environment-pass = ["CFLAGS", "CXXFLAGS"]
+
+
+[tool.cibuildwheel.macos.environment]
+MACOSX_DEPLOYMENT_TARGET = "10.14"
diff --git a/requirements.txt b/requirements.txt
deleted file mode 100644
index 13663fa..0000000
--- a/requirements.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-numpy >= 1.11
-fastprogress >= 0.2
-optuna>=2.5.0
-pandas>=1.0.0
-scikit-learn>=0.21.0
-scipy>=1.0
-colorlog>=4
-pybind11>=2.4
-requests
diff --git a/setup.py b/setup.py
deleted file mode 100644
index 67179f7..0000000
--- a/setup.py
+++ /dev/null
@@ -1,92 +0,0 @@
-import os
-from pathlib import Path
-from typing import Any, List, Tuple
-
-from pybind11.setup_helpers import Pybind11Extension, build_ext
-from setuptools import find_packages, setup
-
-SETUP_DIRECTORY = Path(__file__).resolve().parent
-
-with (SETUP_DIRECTORY / "Readme.md").open() as ifs:
-    LONG_DESCRIPTION = ifs.read()
-
-install_requires = (
-    [
-        "numpy>=1.12.0",
-        "fastprogress>=0.2",
-        "optuna>=2.5.0",
-        "pandas>=1.0.0",
-        "scipy>=1.0",
-        "colorlog>=4",
-        "pydantic>=1.8.2",
-        "typing_extensions>=3.10",
-    ],
-)
-
-
-class get_eigen_include(object):
-    EIGEN3_URL = "https://gitlab.com/libeigen/eigen/-/archive/3.3.7/eigen-3.3.7.zip"
-    EIGEN3_DIRNAME = "eigen-3.3.7"
-
-    def __str__(self) -> str:
-        eigen_include_dir = os.environ.get("EIGEN3_INCLUDE_DIR", None)
-
-        if eigen_include_dir is not None:
-            return eigen_include_dir
-
-        target_dir = SETUP_DIRECTORY / self.EIGEN3_DIRNAME
-        if target_dir.exists():
-            return target_dir.name
-
-        download_target_dir = SETUP_DIRECTORY / "eigen3.zip"
-        import zipfile
-
-        import requests
-
-        response = requests.get(self.EIGEN3_URL, stream=True)
-        with download_target_dir.open("wb") as ofs:
-            for chunk in response.iter_content(chunk_size=1024):
-                ofs.write(chunk)
-
-        with zipfile.ZipFile(download_target_dir) as ifs:
-            ifs.extractall()
-
-        return target_dir.name
-
-
-module_name_and_sources: List[Tuple[str, List[str]]] = [
-    ("irspack.evaluation._core", ["cpp_source/evaluator.cpp"]),
-    ("irspack.recommenders._ials", ["cpp_source/als/wrapper.cpp"]),
-    ("irspack.recommenders._knn", ["cpp_source/knn/wrapper.cpp"]),
-    ("irspack.utils._util_cpp", ["cpp_source/util.cpp"]),
-]
-ext_modules = [
-    Pybind11Extension(module_name, sources, include_dirs=[get_eigen_include()])
-    for module_name, sources in module_name_and_sources
-]
-
-
-def local_scheme(version: Any) -> str:
-    return ""
-
-
-setup(
-    name="irspack",
-    # version=get_version(),
-    url="https://irspack.readthedocs.io/",
-    use_scm_version={
-        "local_scheme": local_scheme
-    },  # https://github.com/pypa/setuptools_scm/issues/342
-    author="Tomoki Ohtsuki",
-    author_email="tomoki.otsuki129@gmail.com",
-    description="Implicit feedback-based recommender systems, packed for practitioners.",
-    long_description=LONG_DESCRIPTION,
-    long_description_content_type="text/markdown",
-    ext_modules=ext_modules,
-    install_requires=install_requires,
-    include_package_data=True,
-    cmdclass={"build_ext": build_ext},
-    packages=find_packages("src"),
-    python_requires=">=3.7",
-    package_dir={"": "src"},
-)
diff --git a/src/irspack/__init__.py b/src/irspack/__init__.py
index 59ccfc4..1bb7be6 100644
--- a/src/irspack/__init__.py
+++ b/src/irspack/__init__.py
@@ -1,15 +1,9 @@
-try:
-    from importlib.metadata import PackageNotFoundError, version
-except ImportError:
-    # Python < 3.8
-    from importlib_metadata import PackageNotFoundError  # type: ignore
-    from importlib_metadata import version  # type: ignore
+from importlib.metadata import PackageNotFoundError, version
 
 try:
-    __version__ = version("irspack")
+    from ._version import __version__
 except PackageNotFoundError:  # pragma: no cover
-    # package is not installed
-    pass  # pragma: no cover
+    ___version__ = "0.0.0"
 
 from .definitions import DenseScoreArray, InteractionMatrix, UserIndexArray
 from .evaluation import *
diff --git a/src/irspack/evaluation/__init__.py b/src/irspack/evaluation/__init__.py
index 08776e2..71c177f 100644
--- a/src/irspack/evaluation/__init__.py
+++ b/src/irspack/evaluation/__init__.py
@@ -1,4 +1,4 @@
-from ._core import EvaluatorCore, Metrics
+from ._core_evaluator import EvaluatorCore, Metrics
 from .evaluator import (
     METRIC_NAMES,
     Evaluator,
diff --git a/src/irspack/evaluation/_core.pyi b/src/irspack/evaluation/_core.pyi
deleted file mode 100644
index 659b27e..0000000
--- a/src/irspack/evaluation/_core.pyi
+++ /dev/null
@@ -1,55 +0,0 @@
-m: int
-n: int
-import typing
-
-import numpy
-import scipy.sparse
-from numpy import float32
-
-import irspack.evaluation._core
-
-_Shape = typing.Tuple[int, ...]
-
-__all__ = ["EvaluatorCore", "Metrics", "evaluate_list_vs_list"]
-
-class EvaluatorCore:
-    def __getstate__(self) -> tuple: ...
-    def __init__(
-        self,
-        grount_truth: scipy.sparse.csr_matrix[numpy.float64],
-        recommendable: typing.List[typing.List[int]],
-    ) -> None: ...
-    def __setstate__(self, arg0: tuple) -> None: ...
-    def cache_X_as_set(self, arg0: int) -> None: ...
-    def get_ground_truth(self) -> scipy.sparse.csr_matrix[numpy.float64]: ...
-    def get_metrics_f32(
-        self,
-        score_array: numpy.ndarray[typing.Tuple[int, int], numpy.dtype[numpy.float32]],
-        cutoff: int,
-        offset: int,
-        n_threads: int,
-        recall_with_cutoff: bool = False,
-    ) -> Metrics: ...
-    def get_metrics_f64(
-        self,
-        score_array: numpy.ndarray[typing.Tuple[int, int], numpy.dtype[numpy.float64]],
-        cutoff: int,
-        offset: int,
-        n_threads: int,
-        recall_with_cutoff: bool = False,
-    ) -> Metrics: ...
-    pass
-
-class Metrics:
-    def __init__(self, arg0: int) -> None: ...
-    def as_dict(self) -> typing.Dict[str, float]: ...
-    def merge(self, arg0: Metrics) -> None: ...
-    pass
-
-def evaluate_list_vs_list(
-    recomemndations: typing.List[typing.List[int]],
-    grount_truths: typing.List[typing.List[int]],
-    n_items: int,
-    n_threads: int,
-) -> Metrics:
-    pass
diff --git a/src/irspack/evaluation/_core_evaluator.pyi b/src/irspack/evaluation/_core_evaluator.pyi
new file mode 100644
index 0000000..3887692
--- /dev/null
+++ b/src/irspack/evaluation/_core_evaluator.pyi
@@ -0,0 +1,48 @@
+from collections.abc import Sequence
+from typing import Annotated
+
+import scipy.sparse
+from numpy.typing import ArrayLike
+
+class Metrics:
+    def __init__(self, arg: int, /) -> None: ...
+    def merge(self, arg: Metrics, /) -> None: ...
+    def as_dict(self) -> dict[str, float]: ...
+
+class EvaluatorCore:
+    def __init__(
+        self,
+        grount_truth: scipy.sparse.csr_matrix[float],
+        recommendable: Sequence[Sequence[int]],
+    ) -> None: ...
+    def get_metrics_f64(
+        self,
+        score_array: Annotated[ArrayLike, dict(dtype="float64", shape=(None, None))],
+        cutoff: int,
+        offset: int,
+        n_threads: int,
+        recall_with_cutoff: bool = False,
+    ) -> Metrics: ...
+    def get_metrics_f32(
+        self,
+        score_array: Annotated[ArrayLike, dict(dtype="float32", shape=(None, None))],
+        cutoff: int,
+        offset: int,
+        n_threads: int,
+        recall_with_cutoff: bool = False,
+    ) -> Metrics: ...
+    def get_ground_truth(self) -> scipy.sparse.csr_matrix[float]: ...
+    def cache_X_as_set(self, arg: int, /) -> None: ...
+    def __getstate__(
+        self,
+    ) -> tuple[scipy.sparse.csr_matrix[float], list[list[int]]]: ...
+    def __setstate__(
+        self, arg: tuple[scipy.sparse.csr_matrix[float], Sequence[Sequence[int]]], /
+    ) -> None: ...
+
+def evaluate_list_vs_list(
+    recomemndations: Sequence[Sequence[int]],
+    grount_truths: Sequence[Sequence[int]],
+    n_items: int,
+    n_threads: int,
+) -> Metrics: ...
diff --git a/src/irspack/evaluation/evaluate_df_to_df.py b/src/irspack/evaluation/evaluate_df_to_df.py
index c77530f..cb24550 100644
--- a/src/irspack/evaluation/evaluate_df_to_df.py
+++ b/src/irspack/evaluation/evaluate_df_to_df.py
@@ -3,7 +3,7 @@
 import pandas as pd
 
 from .._threading import get_n_threads
-from ._core import evaluate_list_vs_list
+from ._core_evaluator import evaluate_list_vs_list
 
 
 def evaluate_recommendation_df(
diff --git a/src/irspack/evaluation/evaluator.py b/src/irspack/evaluation/evaluator.py
index 8b566ff..2fee6aa 100644
--- a/src/irspack/evaluation/evaluator.py
+++ b/src/irspack/evaluation/evaluator.py
@@ -8,7 +8,7 @@
 
 from .._threading import get_n_threads
 from ..definitions import DenseScoreArray, InteractionMatrix
-from ._core import EvaluatorCore, Metrics
+from ._core_evaluator import EvaluatorCore, Metrics
 
 if TYPE_CHECKING:
     from ..recommenders.base import BaseRecommender
diff --git a/src/irspack/recommenders/_ials.pyi b/src/irspack/recommenders/_ials.pyi
deleted file mode 100644
index 83c62f7..0000000
--- a/src/irspack/recommenders/_ials.pyi
+++ /dev/null
@@ -1,191 +0,0 @@
-m: int
-n: int
-
-"""irspack's core module for "IALSRecommender".
-Built to use
-	SSE, SSE2"""
-from __future__ import annotations
-
-import typing
-
-import numpy
-import scipy.sparse
-
-import irspack.recommenders._ials
-
-__all__ = [
-    "CG",
-    "CHOLESKY",
-    "IALSModelConfig",
-    "IALSModelConfigBuilder",
-    "IALSPP",
-    "IALSSolverConfig",
-    "IALSSolverConfigBuilder",
-    "IALSTrainer",
-    "LossType",
-    "ORIGINAL",
-    "SolverType",
-]
-
-class IALSModelConfig:
-    def __getstate__(self) -> tuple: ...
-    def __init__(
-        self,
-        arg0: int,
-        arg1: float,
-        arg2: float,
-        arg3: float,
-        arg4: float,
-        arg5: int,
-        arg6: LossType,
-    ) -> None: ...
-    def __setstate__(self, arg0: tuple) -> None: ...
-    pass
-
-class IALSModelConfigBuilder:
-    def __init__(self) -> None: ...
-    def build(self) -> IALSModelConfig: ...
-    def set_K(self, arg0: int) -> IALSModelConfigBuilder: ...
-    def set_alpha0(self, arg0: float) -> IALSModelConfigBuilder: ...
-    def set_init_stdev(self, arg0: float) -> IALSModelConfigBuilder: ...
-    def set_loss_type(self, arg0: LossType) -> IALSModelConfigBuilder: ...
-    def set_nu(self, arg0: float) -> IALSModelConfigBuilder: ...
-    def set_random_seed(self, arg0: int) -> IALSModelConfigBuilder: ...
-    def set_reg(self, arg0: float) -> IALSModelConfigBuilder: ...
-    pass
-
-class IALSSolverConfig:
-    def __getstate__(self) -> tuple: ...
-    def __init__(
-        self, arg0: int, arg1: SolverType, arg2: int, arg3: int, arg4: int
-    ) -> None: ...
-    def __setstate__(self, arg0: tuple) -> None: ...
-    pass
-
-class IALSSolverConfigBuilder:
-    def __init__(self) -> None: ...
-    def build(self) -> IALSSolverConfig: ...
-    def set_ialspp_iteration(self, arg0: int) -> IALSSolverConfigBuilder: ...
-    def set_ialspp_subspace_dimension(self, arg0: int) -> IALSSolverConfigBuilder: ...
-    def set_max_cg_steps(self, arg0: int) -> IALSSolverConfigBuilder: ...
-    def set_n_threads(self, arg0: int) -> IALSSolverConfigBuilder: ...
-    def set_solver_type(self, arg0: SolverType) -> IALSSolverConfigBuilder: ...
-    pass
-
-class IALSTrainer:
-    def __getstate__(self) -> tuple: ...
-    def __init__(
-        self, arg0: IALSModelConfig, arg1: scipy.sparse.csr_matrix[numpy.float32]
-    ) -> None: ...
-    def __setstate__(self, arg0: tuple) -> None: ...
-    def compute_loss(self, arg0: IALSSolverConfig) -> float: ...
-    def step(self, arg0: IALSSolverConfig) -> None: ...
-    def transform_item(
-        self, arg0: scipy.sparse.csr_matrix[numpy.float32], arg1: IALSSolverConfig
-    ) -> numpy.ndarray[typing.Tuple[int, int], numpy.dtype[numpy.float32]]: ...
-    def transform_user(
-        self, arg0: scipy.sparse.csr_matrix[numpy.float32], arg1: IALSSolverConfig
-    ) -> numpy.ndarray[typing.Tuple[int, int], numpy.dtype[numpy.float32]]: ...
-    def user_scores(
-        self, arg0: int, arg1: int, arg2: IALSSolverConfig
-    ) -> numpy.ndarray[typing.Tuple[int, int], numpy.dtype[numpy.float32]]: ...
-    @property
-    def item(self) -> numpy.ndarray[typing.Tuple[int, int], numpy.dtype[numpy.float32]]:
-        """
-        :type: numpy.ndarray[typing.Tuple[int, int], numpy.dtype[numpy.float32]]
-        """
-
-    @item.setter
-    def item(
-        self, arg0: numpy.ndarray[typing.Tuple[int, int], numpy.dtype[numpy.float32]]
-    ) -> None:
-        pass
-
-    @property
-    def user(self) -> numpy.ndarray[typing.Tuple[int, int], numpy.dtype[numpy.float32]]:
-        """
-        :type: numpy.ndarray[typing.Tuple[int, int], numpy.dtype[numpy.float32]]
-        """
-
-    @user.setter
-    def user(
-        self, arg0: numpy.ndarray[typing.Tuple[int, int], numpy.dtype[numpy.float32]]
-    ) -> None:
-        pass
-    pass
-
-class LossType:
-    """
-    Members:
-
-      ORIGINAL
-
-      IALSPP
-    """
-
-    def __eq__(self, other: object) -> bool: ...
-    def __getstate__(self) -> int: ...
-    def __hash__(self) -> int: ...
-    def __index__(self) -> int: ...
-    def __init__(self, value: int) -> None: ...
-    def __int__(self) -> int: ...
-    def __ne__(self, other: object) -> bool: ...
-    def __repr__(self) -> str: ...
-    def __setstate__(self, state: int) -> None: ...
-    @property
-    def name(self) -> str:
-        """
-        :type: str
-        """
-
-    @property
-    def value(self) -> int:
-        """
-        :type: int
-        """
-    IALSPP: irspack.recommenders._ials.LossType  # value = <LossType.IALSPP: 1>
-    ORIGINAL: irspack.recommenders._ials.LossType  # value = <LossType.ORIGINAL: 0>
-    __members__: dict  # value = {'ORIGINAL': <LossType.ORIGINAL: 0>, 'IALSPP': <LossType.IALSPP: 1>}
-    pass
-
-class SolverType:
-    """
-    Members:
-
-      CHOLESKY
-
-      CG
-
-      IALSPP
-    """
-
-    def __eq__(self, other: object) -> bool: ...
-    def __getstate__(self) -> int: ...
-    def __hash__(self) -> int: ...
-    def __index__(self) -> int: ...
-    def __init__(self, value: int) -> None: ...
-    def __int__(self) -> int: ...
-    def __ne__(self, other: object) -> bool: ...
-    def __repr__(self) -> str: ...
-    def __setstate__(self, state: int) -> None: ...
-    @property
-    def name(self) -> str:
-        """
-        :type: str
-        """
-
-    @property
-    def value(self) -> int:
-        """
-        :type: int
-        """
-    CG: irspack.recommenders._ials.SolverType  # value = <SolverType.CG: 1>
-    CHOLESKY: irspack.recommenders._ials.SolverType  # value = <SolverType.CHOLESKY: 0>
-    IALSPP: irspack.recommenders._ials.SolverType  # value = <SolverType.IALSPP: 2>
-    __members__: dict  # value = {'CHOLESKY': <SolverType.CHOLESKY: 0>, 'CG': <SolverType.CG: 1>, 'IALSPP': <SolverType.IALSPP: 2>}
-    pass
-
-CG: irspack.recommenders._ials.SolverType  # value = <SolverType.CG: 1>
-CHOLESKY: irspack.recommenders._ials.SolverType  # value = <SolverType.CHOLESKY: 0>
-IALSPP: irspack.recommenders._ials.SolverType  # value = <SolverType.IALSPP: 2>
-ORIGINAL: irspack.recommenders._ials.LossType  # value = <LossType.ORIGINAL: 0>
diff --git a/src/irspack/recommenders/_ials_core.pyi b/src/irspack/recommenders/_ials_core.pyi
new file mode 100644
index 0000000..7ba242a
--- /dev/null
+++ b/src/irspack/recommenders/_ials_core.pyi
@@ -0,0 +1,121 @@
+import enum
+from typing import Annotated
+
+import scipy.sparse
+from numpy.typing import ArrayLike
+
+class LossType(enum.Enum):
+    ORIGINAL = 0
+
+    IALSPP = 1
+
+ORIGINAL: LossType = LossType.ORIGINAL
+
+IALSPP: SolverType = SolverType.IALSPP
+
+class SolverType(enum.Enum):
+    CHOLESKY = 0
+
+    CG = 1
+
+    IALSPP = 2
+
+CHOLESKY: SolverType = SolverType.CHOLESKY
+
+CG: SolverType = SolverType.CG
+
+class IALSModelConfig:
+    def __init__(
+        self,
+        arg0: int,
+        arg1: float,
+        arg2: float,
+        arg3: float,
+        arg4: float,
+        arg5: int,
+        arg6: LossType,
+        /,
+    ) -> None: ...
+    def __getstate__(self) -> tuple: ...
+    def __setstate__(
+        self, arg: tuple[int, float, float, float, float, int, LossType], /
+    ) -> None: ...
+
+class IALSModelConfigBuilder:
+    def __init__(self) -> None: ...
+    def build(self) -> IALSModelConfig: ...
+    def set_K(self, arg: int, /) -> IALSModelConfigBuilder: ...
+    def set_alpha0(self, arg: float, /) -> IALSModelConfigBuilder: ...
+    def set_reg(self, arg: float, /) -> IALSModelConfigBuilder: ...
+    def set_nu(self, arg: float, /) -> IALSModelConfigBuilder: ...
+    def set_init_stdev(self, arg: float, /) -> IALSModelConfigBuilder: ...
+    def set_random_seed(self, arg: int, /) -> IALSModelConfigBuilder: ...
+    def set_loss_type(self, arg: LossType, /) -> IALSModelConfigBuilder: ...
+
+class IALSSolverConfig:
+    def __init__(
+        self, arg0: int, arg1: SolverType, arg2: int, arg3: int, arg4: int, /
+    ) -> None: ...
+    def __getstate__(self) -> tuple[int, SolverType, int, int, int]: ...
+    def __setstate__(self, arg: tuple[int, SolverType, int, int, int], /) -> None: ...
+
+class IALSSolverConfigBuilder:
+    def __init__(self) -> None: ...
+    def build(self) -> IALSSolverConfig: ...
+    def set_n_threads(self, arg: int, /) -> IALSSolverConfigBuilder: ...
+    def set_solver_type(self, arg: SolverType, /) -> IALSSolverConfigBuilder: ...
+    def set_max_cg_steps(self, arg: int, /) -> IALSSolverConfigBuilder: ...
+    def set_ialspp_subspace_dimension(self, arg: int, /) -> IALSSolverConfigBuilder: ...
+    def set_ialspp_iteration(self, arg: int, /) -> IALSSolverConfigBuilder: ...
+
+class IALSTrainer:
+    def __init__(
+        self, arg0: IALSModelConfig, arg1: scipy.sparse.csr_matrix[float], /
+    ) -> None: ...
+    def step(self, arg: IALSSolverConfig, /) -> None: ...
+    def user_scores(
+        self, arg0: int, arg1: int, arg2: IALSSolverConfig, /
+    ) -> Annotated[ArrayLike, dict(dtype="float32", shape=(None, None), order="C")]: ...
+    def transform_user(
+        self, arg0: scipy.sparse.csr_matrix[float], arg1: IALSSolverConfig, /
+    ) -> Annotated[ArrayLike, dict(dtype="float32", shape=(None, None), order="C")]: ...
+    def transform_item(
+        self, arg0: scipy.sparse.csr_matrix[float], arg1: IALSSolverConfig, /
+    ) -> Annotated[ArrayLike, dict(dtype="float32", shape=(None, None), order="C")]: ...
+    def compute_loss(self, arg: IALSSolverConfig, /) -> float: ...
+    @property
+    def user(
+        self,
+    ) -> Annotated[ArrayLike, dict(dtype="float32", shape=(None, None), order="C")]: ...
+    @user.setter
+    def user(
+        self,
+        arg: Annotated[ArrayLike, dict(dtype="float32", shape=(None, None), order="C")],
+        /,
+    ) -> None: ...
+    @property
+    def item(
+        self,
+    ) -> Annotated[ArrayLike, dict(dtype="float32", shape=(None, None), order="C")]: ...
+    @item.setter
+    def item(
+        self,
+        arg: Annotated[ArrayLike, dict(dtype="float32", shape=(None, None), order="C")],
+        /,
+    ) -> None: ...
+    def __getstate__(
+        self,
+    ) -> tuple[
+        IALSModelConfig,
+        Annotated[ArrayLike, dict(dtype="float32", shape=(None, None), order="C")],
+        Annotated[ArrayLike, dict(dtype="float32", shape=(None, None), order="C")],
+    ]: ...
+    def __setstate__(
+        self,
+        arg: tuple[
+            IALSModelConfig,
+            Annotated[ArrayLike, dict(dtype="float32", shape=(None, None), order="C")],
+            Annotated[ArrayLike, dict(dtype="float32", shape=(None, None), order="C")],
+        ],
+        /,
+    ) -> None: ...
diff --git a/src/irspack/recommenders/_knn.pyi b/src/irspack/recommenders/_knn.pyi
index 8da7123..37f0f90 100644
--- a/src/irspack/recommenders/_knn.pyi
+++ b/src/irspack/recommenders/_knn.pyi
@@ -1,103 +1,78 @@
-m: int
-n: int
-import typing
-
-import numpy
 import scipy.sparse
-from numpy import float32
-
-import irspack.recommenders._knn
-
-_Shape = typing.Tuple[int, ...]
 
-__all__ = [
-    "AsymmetricSimilarityComputer",
-    "CosineSimilarityComputer",
-    "JaccardSimilarityComputer",
-    "P3alphaComputer",
-    "RP3betaComputer",
-    "TverskyIndexComputer",
-]
-
-class AsymmetricSimilarityComputer:
+class CosineSimilarityComputer:
     def __init__(
         self,
-        X: scipy.sparse.csr_matrix[numpy.float64],
+        X: scipy.sparse.csr_matrix[float],
         shrinkage: float,
-        alpha: float,
+        normalize: bool,
         n_threads: int = 1,
         max_chunk_size: int = 128,
     ) -> None: ...
     def compute_similarity(
-        self, arg0: scipy.sparse.csr_matrix[numpy.float64], arg1: int
-    ) -> scipy.sparse.csr_matrix[numpy.float64]: ...
-    pass
+        self, arg0: scipy.sparse.csr_matrix[float], arg1: int, /
+    ) -> scipy.sparse.csr_matrix[float]: ...
 
-class CosineSimilarityComputer:
+class JaccardSimilarityComputer:
     def __init__(
         self,
-        X: scipy.sparse.csr_matrix[numpy.float64],
+        X: scipy.sparse.csr_matrix[float],
         shrinkage: float,
-        normalize: bool,
         n_threads: int = 1,
         max_chunk_size: int = 128,
     ) -> None: ...
     def compute_similarity(
-        self, arg0: scipy.sparse.csr_matrix[numpy.float64], arg1: int
-    ) -> scipy.sparse.csr_matrix[numpy.float64]: ...
-    pass
+        self, arg0: scipy.sparse.csr_matrix[float], arg1: int, /
+    ) -> scipy.sparse.csr_matrix[float]: ...
 
-class JaccardSimilarityComputer:
+class TverskyIndexComputer:
     def __init__(
         self,
-        X: scipy.sparse.csr_matrix[numpy.float64],
+        X: scipy.sparse.csr_matrix[float],
         shrinkage: float,
+        alpha: float,
+        beta: float,
         n_threads: int = 1,
         max_chunk_size: int = 128,
     ) -> None: ...
     def compute_similarity(
-        self, arg0: scipy.sparse.csr_matrix[numpy.float64], arg1: int
-    ) -> scipy.sparse.csr_matrix[numpy.float64]: ...
-    pass
+        self, arg0: scipy.sparse.csr_matrix[float], arg1: int, /
+    ) -> scipy.sparse.csr_matrix[float]: ...
 
-class P3alphaComputer:
+class AsymmetricSimilarityComputer:
     def __init__(
         self,
-        X: scipy.sparse.csr_matrix[numpy.float64],
-        alpha: float = 0,
+        X: scipy.sparse.csr_matrix[float],
+        shrinkage: float,
+        alpha: float,
         n_threads: int = 1,
         max_chunk_size: int = 128,
     ) -> None: ...
-    def compute_W(
-        self, arg0: scipy.sparse.csr_matrix[numpy.float64], arg1: int
-    ) -> scipy.sparse.csc_matrix[numpy.float64]: ...
-    pass
+    def compute_similarity(
+        self, arg0: scipy.sparse.csr_matrix[float], arg1: int, /
+    ) -> scipy.sparse.csr_matrix[float]: ...
 
-class RP3betaComputer:
+class P3alphaComputer:
     def __init__(
         self,
-        X: scipy.sparse.csr_matrix[numpy.float64],
+        X: scipy.sparse.csr_matrix[float],
         alpha: float = 0,
-        beta: float = 0,
         n_threads: int = 1,
         max_chunk_size: int = 128,
     ) -> None: ...
     def compute_W(
-        self, arg0: scipy.sparse.csr_matrix[numpy.float64], arg1: int
-    ) -> scipy.sparse.csc_matrix[numpy.float64]: ...
-    pass
+        self, arg0: scipy.sparse.csr_matrix[float], arg1: int, /
+    ) -> scipy.sparse.csc_matrix[float]: ...
 
-class TverskyIndexComputer:
+class RP3betaComputer:
     def __init__(
         self,
-        X: scipy.sparse.csr_matrix[numpy.float64],
-        shrinkage: float,
-        alpha: float,
-        beta: float,
+        X: scipy.sparse.csr_matrix[float],
+        alpha: float = 0,
+        beta: float = 0,
         n_threads: int = 1,
         max_chunk_size: int = 128,
     ) -> None: ...
-    def compute_similarity(
-        self, arg0: scipy.sparse.csr_matrix[numpy.float64], arg1: int
-    ) -> scipy.sparse.csr_matrix[numpy.float64]: ...
-    pass
+    def compute_W(
+        self, arg0: scipy.sparse.csr_matrix[float], arg1: int, /
+    ) -> scipy.sparse.csc_matrix[float]: ...
diff --git a/src/irspack/recommenders/ials.py b/src/irspack/recommenders/ials.py
index b7bf851..188d7b2 100644
--- a/src/irspack/recommenders/ials.py
+++ b/src/irspack/recommenders/ials.py
@@ -23,9 +23,9 @@
     ParameterRange,
     UniformIntegerRange,
 )
-from ._ials import IALSModelConfigBuilder, IALSSolverConfigBuilder
-from ._ials import IALSTrainer as CoreTrainer
-from ._ials import LossType, SolverType
+from ._ials_core import IALSModelConfigBuilder, IALSSolverConfigBuilder
+from ._ials_core import IALSTrainer as CoreTrainer
+from ._ials_core import LossType, SolverType
 from .base import (
     BaseRecommenderWithItemEmbedding,
     BaseRecommenderWithUserEmbedding,
diff --git a/src/irspack/utils/_util_cpp.pyi b/src/irspack/utils/_util_cpp.pyi
index bfa8425..ad24c0d 100644
--- a/src/irspack/utils/_util_cpp.pyi
+++ b/src/irspack/utils/_util_cpp.pyi
@@ -1,98 +1,57 @@
-m: int
-n: int
-import typing
+from collections.abc import Sequence
+from typing import Annotated
 
-import numpy
 import scipy.sparse
-from numpy import float32
-
-import irspack.utils._util_cpp
-
-_Shape = typing.Tuple[int, ...]
-
-__all__ = [
-    "okapi_BM_25_weight",
-    "remove_diagonal",
-    "retrieve_recommend_from_score_f32",
-    "retrieve_recommend_from_score_f64",
-    "rowwise_train_test_split_by_fixed_n",
-    "rowwise_train_test_split_by_ratio",
-    "slim_weight_allow_negative",
-    "slim_weight_positive_only",
-    "sparse_mm_threaded",
-    "tf_idf_weight",
-]
-
-def okapi_BM_25_weight(
-    X: scipy.sparse.csr_matrix[numpy.float64], k1: float = 1.2, b: float = 0.75
-) -> scipy.sparse.csr_matrix[numpy.float64]:
-    pass
+from numpy.typing import ArrayLike
 
 def remove_diagonal(
-    arg0: scipy.sparse.csr_matrix[numpy.float64],
-) -> scipy.sparse.csr_matrix[numpy.float64]:
-    pass
-
-def retrieve_recommend_from_score_f32(
-    score: numpy.ndarray[typing.Tuple[int, int], numpy.dtype[numpy.float32]],
-    allowed_indices: typing.List[typing.List[int]],
-    cutoff: int,
-    n_threads: int = 1,
-) -> typing.List[typing.List[typing.Tuple[int, float]]]:
-    pass
-
-def retrieve_recommend_from_score_f64(
-    score: numpy.ndarray[typing.Tuple[int, int], numpy.dtype[numpy.float64]],
-    allowed_indices: typing.List[typing.List[int]],
-    cutoff: int,
-    n_threads: int = 1,
-) -> typing.List[typing.List[typing.Tuple[int, float]]]:
-    pass
-
-def rowwise_train_test_split_by_fixed_n(
-    arg0: scipy.sparse.csr_matrix[numpy.float64], arg1: int, arg2: int
-) -> typing.Tuple[
-    scipy.sparse.csr_matrix[numpy.float64], scipy.sparse.csr_matrix[numpy.float64]
-]:
-    pass
-
+    arg: scipy.sparse.csr_matrix[float], /
+) -> scipy.sparse.csr_matrix[float]: ...
+def sparse_mm_threaded(
+    arg0: scipy.sparse.csr_matrix[float],
+    arg1: scipy.sparse.csc_matrix[float],
+    arg2: int,
+    /,
+) -> Annotated[ArrayLike, dict(dtype="float64", shape=(None, None), order="C")]: ...
 def rowwise_train_test_split_by_ratio(
-    arg0: scipy.sparse.csr_matrix[numpy.float64], arg1: int, arg2: float, arg3: bool
-) -> typing.Tuple[
-    scipy.sparse.csr_matrix[numpy.float64], scipy.sparse.csr_matrix[numpy.float64]
-]:
-    pass
-
+    arg0: scipy.sparse.csr_matrix[float], arg1: int, arg2: float, arg3: bool, /
+) -> tuple[scipy.sparse.csr_matrix[float], scipy.sparse.csr_matrix[float]]: ...
+def rowwise_train_test_split_by_fixed_n(
+    arg0: scipy.sparse.csr_matrix[float], arg1: int, arg2: int, /
+) -> tuple[scipy.sparse.csr_matrix[float], scipy.sparse.csr_matrix[float]]: ...
+def okapi_BM_25_weight(
+    X: scipy.sparse.csr_matrix[float], k1: float = 1.2, b: float = 0.75
+) -> scipy.sparse.csr_matrix[float]: ...
+def tf_idf_weight(
+    X: scipy.sparse.csr_matrix[float], smooth: bool = True
+) -> scipy.sparse.csr_matrix[float]: ...
 def slim_weight_allow_negative(
-    X: scipy.sparse.csr_matrix[numpy.float32],
+    X: scipy.sparse.csr_matrix[float],
     n_threads: int,
     n_iter: int,
     l2_coeff: float,
     l1_coeff: float,
     tol: float,
     top_k: int = -1,
-) -> scipy.sparse.csc_matrix[numpy.float32]:
-    pass
-
+) -> scipy.sparse.csc_matrix[float]: ...
 def slim_weight_positive_only(
-    X: scipy.sparse.csr_matrix[numpy.float32],
+    X: scipy.sparse.csr_matrix[float],
     n_threads: int,
     n_iter: int,
     l2_coeff: float,
     l1_coeff: float,
     tol: float,
     top_k: int = -1,
-) -> scipy.sparse.csc_matrix[numpy.float32]:
-    pass
-
-def sparse_mm_threaded(
-    arg0: scipy.sparse.csr_matrix[numpy.float64],
-    arg1: scipy.sparse.csc_matrix[numpy.float64],
-    arg2: int,
-) -> numpy.ndarray[typing.Tuple[int, int], numpy.dtype[numpy.float64]]:
-    pass
-
-def tf_idf_weight(
-    X: scipy.sparse.csr_matrix[numpy.float64], smooth: bool = True
-) -> scipy.sparse.csr_matrix[numpy.float64]:
-    pass
+) -> scipy.sparse.csc_matrix[float]: ...
+def retrieve_recommend_from_score_f64(
+    score: Annotated[ArrayLike, dict(dtype="float64", shape=(None, None), order="C")],
+    allowed_indices: Sequence[Sequence[int]],
+    cutoff: int,
+    n_threads: int = 1,
+) -> list[list[tuple[int, float]]]: ...
+def retrieve_recommend_from_score_f32(
+    score: Annotated[ArrayLike, dict(dtype="float32", shape=(None, None), order="C")],
+    allowed_indices: Sequence[Sequence[int]],
+    cutoff: int,
+    n_threads: int = 1,
+) -> list[list[tuple[int, float]]]: ...