Adapt to the new ThreadingContext API of gemma.cpp

ufownl · ufownl · commit b78ed8a1ebeb · 2025-07-23T14:03:40.000+08:00
diff --git a/README.md b/README.md
@@ -68,13 +68,13 @@ end
 
 Show information of cgemma module.
 
-### cgemma.scheduler.config
+### cgemma.scheduler
 
-**syntax:** `<boolean>ok, <string>err = cgemma.scheduler.config(<table>options)`
+**syntax:** `<cgemma.scheduler>sched, <string>err = cgemma.scheduler([<table>options])`
 
-Configure the backend scheduler.
+Create a scheduler instance.
 
-A successful call returns `true`. Otherwise, it returns `false` and a string describing the error.
+A successful call returns a scheduler instance. Otherwise, it returns `nil` and a string describing the error.
 
 Available options and default values:
 
@@ -92,20 +92,12 @@ Available options and default values:
 }
 ```
 
-> [!NOTE]
-> This method can only be called for configuration before the backend scheduler initialization is triggered. If the backend scheduler is triggered without being configured, it will be initialized with default options.
-
 ### cgemma.scheduler.cpu\_topology
 
-**syntax:** `<string>desc, <string>err = cgemma.scheduler.cpu_topology()`
+**syntax:** `<string>desc = sched:cpu_topology()`
 
 Query CPU topology.
 
-A successful call returns the CPU topology information. Otherwise, it returns `nil` and a string describing the error.
-
-> [!NOTE]
-> Calling this method will trigger the backend scheduler initialization.
-
 ### cgemma.new
 
 **syntax:** `<cgemma.instance>inst, <string>err = cgemma.new(<table>options)`
@@ -123,13 +115,14 @@ Available options:
   map = -1,  -- Enable memory-mapping? (-1 means auto, 0 means no, 1 means yes)
   to_bf16 = -1,  -- Convert weights to bf16? (-1 means auto, 0 means no, 1 means yes)
   seed = 42,  -- Random seed. (default is random setting)
+  scheduler = sched_inst,  -- Instance of scheduler, if not provided a default
+                           -- scheduler will be attached.
   disabled_words = {...},  -- Words you don't want to generate.
 }
 ```
 
 > [!NOTE]
-> 1. If the weights file is not in the new single-file format, then `tokenizer` and `model` options are required;
-> 2. Calling this method will trigger the backend scheduler initialization.
+> If the weights file is not in the new single-file format, then `tokenizer` are required;
 
 ### cgemma.instance.disabled\_tokens
 
diff --git a/demo/src/app.lua b/demo/src/app.lua
@@ -1,11 +1,13 @@
-local ok, err = require("cgemma").scheduler.config(config().scheduler)
-if not ok then
+local sched, err = require("cgemma").scheduler(config().scheduler)
+if not sched then
   ngx.log(ngx.ERR, "cgemma error: ", err)
 end
 
 function gemma_inst()
   if not worker_gemma_inst then
-    local gemma, err = require("cgemma").new(config().gemma)
+    local gemma_cfg = config().gemma
+    gemma_cfg.scheduler = sched
+    local gemma, err = require("cgemma").new(gemma_cfg)
     if not gemma then
       ngx.log(ngx.ERR, "cgemma error: ", err)
       ngx.exit(ngx.HTTP_INTERNAL_SERVER_ERROR)
diff --git a/demo/src/init_kaggle.lua b/demo/src/init_kaggle.lua
@@ -1,6 +1,5 @@
 function config()
   return {
-    scheduler = {},
     gemma = {
       tokenizer = "tokenizer.spm",
       weights = "4b-it-sfp.sbs"
diff --git a/src/batch.cpp b/src/batch.cpp
@@ -106,7 +106,7 @@ gcpp::TimingInfo generate(cgemma::instance* inst, const std::vector<cgemma::sess
       .kv_cache = ctx.sess->kv_cache()
     });
   }
-  inst->model().GenerateBatch(cfg, queries, inst->env(), timing);
+  inst->model().GenerateBatch(cfg, queries, inst->matmul_env(), timing);
   return timing;
 }
 
diff --git a/src/cgemma.cpp b/src/cgemma.cpp
@@ -3,7 +3,6 @@
 #include "session.hpp"
 #include "image_tokens.hpp"
 #include "batch.hpp"
-#include "scheduler.hpp"
 #include <hwy/timer.h>
 #include <hwy/per_target.h>
 #include <hwy/targets.h>
@@ -41,10 +40,12 @@ int info(lua_State* L) {
 int luaopen_cgemma(lua_State* L) {
   constexpr const luaL_Reg entries[] = {
     {"info", info},
+    {"scheduler", cgemma::scheduler::create},
     {"new", cgemma::instance::create},
     {"batch", cgemma::batch},
     {nullptr, nullptr}
   };
+  cgemma::scheduler::declare(L);
   cgemma::instance::declare(L);
   cgemma::session::declare(L);
   cgemma::image_tokens::declare(L);
@@ -55,7 +56,5 @@ int luaopen_cgemma(lua_State* L) {
   lua_setfield(L, -2, "_NAME");
   lua_pushliteral(L, "1.0");
   lua_setfield(L, -2, "_VERSION");
-  cgemma::scheduler::declare(L);
-  lua_setfield(L, -2, "scheduler");
   return 1;
 }
diff --git a/src/image_tokens.cpp b/src/image_tokens.cpp
@@ -74,12 +74,14 @@ int create(lua_State* L) {
     gcpp::ImageTokens tks(
       "image_tokens",
       gcpp::Extents2D(model_cfg.vit_config.seq_len / (model_cfg.vit_config.pool_dim * model_cfg.vit_config.pool_dim), model_cfg.model_dim),
+      inst->threading_ctx().allocator,
       gcpp::MatPadding::kOdd
     );
+    tks.AllocateAndAttachRowPtrs(inst->matmul_env().row_ptrs);
     gcpp::RuntimeConfig cfg;
     cfg.gen = &inst->rnd();
     cfg.verbosity = 0;
-    inst->model().GenerateImageTokens(cfg, tks.Rows(), img, tks, inst->env());
+    inst->model().GenerateImageTokens(cfg, tks.Rows(), img, tks, inst->matmul_env());
     auto ud = lua_newuserdata(L, sizeof(gcpp::ImageTokens));
     new(ud) gcpp::ImageTokens(std::move(tks));
     luaL_getmetatable(L, name);
diff --git a/src/instance.cpp b/src/instance.cpp
@@ -32,15 +32,19 @@ int disabled_tokens(lua_State* L) {
 
 namespace cgemma {
 
-instance::instance(int argc, char* argv[], unsigned int seed)
+instance::instance(int argc, char* argv[], unsigned int seed, scheduler* sched)
   : args_(argc, argv)
-  , rnd_(seed) {
-  env_ = std::make_unique<gcpp::MatMulEnv>(gcpp::ThreadingContext::Get());
+  , rnd_(seed)
+  , sched_(sched) {
+  if (!sched_) {
+    default_sched_ = std::make_unique<scheduler>();
+    sched_ = default_sched_.get();
+  }
   // Disable heuristics loading weights into BF16
   gcpp::InferenceArgs infa;
   infa.prefill_tbatch_size = 0;
   infa.decode_qbatch_size = 0;
-  model_ = std::make_unique<gcpp::Gemma>(args_, infa, env_->ctx.pools);
+  model_ = std::make_unique<gcpp::Gemma>(args_, infa, threading_ctx());
 }
 
 bool instance::instruction_tuned() const {
@@ -120,8 +124,11 @@ int instance::create(lua_State* L) {
       seed = rd();
     }
     lua_pop(L, 1);
+    lua_getfield(L, 1, "scheduler");
+    auto sched = scheduler::to(L, -1);
+    lua_pop(L, 1);
     auto ud = lua_newuserdata(L, sizeof(instance));
-    auto inst = new(ud) instance(argc, argv, seed);
+    auto inst = new(ud) instance(argc, argv, seed, sched);
     luaL_getmetatable(L, name);
     lua_setmetatable(L, -2);
     lua_getfield(L, 1, "disabled_words");
diff --git a/src/instance.hpp b/src/instance.hpp
@@ -1,7 +1,7 @@
 #ifndef CGEMMA_INSTANCE_HPP
 #define CGEMMA_INSTANCE_HPP
 
-#include <lua.hpp>
+#include "scheduler.hpp"
 #include <gemma/gemma.h>
 #include <gemma/gemma_args.h>
 #include <unordered_set>
@@ -13,15 +13,14 @@ namespace cgemma {
 constexpr const int PAD_ID = 0;
 constexpr const int UNK_ID = 3;
 
-class session;
-
 class instance {
 public:
-  instance(int argc, char* argv[], unsigned int seed);
+  instance(int argc, char* argv[], unsigned int seed, scheduler* sched);
 
   const gcpp::LoaderArgs& args() const { return args_; }
   std::mt19937& rnd() { return rnd_; }
-  gcpp::MatMulEnv& env() const { return *env_; }
+  gcpp::ThreadingContext& threading_ctx() const { return sched_->threading_ctx(); }
+  gcpp::MatMulEnv& matmul_env() const { return sched_->matmul_env(); }
   gcpp::Gemma& model() const { return *model_; }
   const std::unordered_set<int>& disabled_tokens() const { return disabled_tokens_; }
   size_t max_tokens() const { return model_->GetModelConfig().max_seq_len; }
@@ -35,7 +34,8 @@ class instance {
 private:
   gcpp::LoaderArgs args_;
   std::mt19937 rnd_;
-  std::unique_ptr<gcpp::MatMulEnv> env_;
+  scheduler* sched_;
+  std::unique_ptr<scheduler> default_sched_;
   std::unique_ptr<gcpp::Gemma> model_;
   std::unordered_set<int> disabled_tokens_;
 };
diff --git a/src/scheduler.cpp b/src/scheduler.cpp
@@ -6,12 +6,61 @@ namespace {
 
 constexpr const char name[] = "cgemma.scheduler";
 
-int config(lua_State* L) {
-  if (gcpp::ThreadingContext::IsInitialized()) {
-    lua_pushnil(L);
-    lua_pushstring(L, "Scheduler had been initialized.");
-    return 2;
-  }
+int cpu_topology(lua_State* L) {
+  auto sched = cgemma::scheduler::check(L, 1);
+  lua_pushstring(L, sched->cpu_topology());
+  return 1;
+}
+
+int destroy(lua_State* L) {
+  cgemma::scheduler::check(L, 1)->~scheduler();
+  return 0;
+}
+
+}
+
+namespace cgemma {
+
+scheduler::scheduler()
+  : ctx_(args_)
+  , env_(ctx_) {
+  // nop
+}
+
+scheduler::scheduler(int args, char* argv[])
+  : args_(args, argv)
+  , ctx_(args_)
+  , env_(ctx_) {
+  // nop
+}
+
+void scheduler::declare(lua_State* L) {
+  constexpr const luaL_Reg metatable[] = {
+    {"__gc", destroy},
+    {nullptr, nullptr}
+  };
+  constexpr const luaL_Reg methods[] = {
+    {"cpu_topology", ::cpu_topology},
+    {nullptr, nullptr}
+  };
+  luaL_newmetatable(L, name);
+  luaL_register(L, nullptr, metatable);
+  lua_pushlstring(L, name, sizeof(name) - 1);
+  lua_setfield(L, -2, "_NAME");
+  lua_newtable(L);
+  luaL_register(L, nullptr, methods);
+  lua_setfield(L, -2, "__index");
+}
+
+scheduler* scheduler::to(lua_State* L, int index) {
+  return static_cast<scheduler*>(utils::userdata(L, index, name));
+}
+
+scheduler* scheduler::check(lua_State* L, int index) {
+  return static_cast<scheduler*>(luaL_checkudata(L, index, name));
+}
+
+int scheduler::create(lua_State* L) {
   constexpr const char* available_options[] = {
     "--num_threads", "--pin", "--bind",
     "--skip_packages", "--max_packages",
@@ -21,52 +70,32 @@ int config(lua_State* L) {
   constexpr const int n = sizeof(available_options) / sizeof(available_options[0]);
   int argc = 1;
   char* argv[n * 2 + 1] = {const_cast<char*>("lua-cgemma")};
-  luaL_checktype(L, 1, LUA_TTABLE);
-  for (auto opt: available_options) {
-    auto k = opt + 2;
-    lua_getfield(L, 1, k);
-    auto v = lua_tostring(L, -1);
-    if (v) {
-      argv[argc++] = const_cast<char*>(opt);
-      argv[argc++] = const_cast<char*>(v);
+  auto nargs = lua_gettop(L);
+  if (nargs > 0) {
+    luaL_checktype(L, 1, LUA_TTABLE);
+    for (auto opt: available_options) {
+      auto k = opt + 2;
+      lua_getfield(L, 1, k);
+      auto v = lua_tostring(L, -1);
+      if (v) {
+        argv[argc++] = const_cast<char*>(opt);
+        argv[argc++] = const_cast<char*>(v);
+      }
+      lua_pop(L, 1);
     }
-    lua_pop(L, 1);
   }
-  gcpp::ThreadingContext::SetArgs(gcpp::ThreadingArgs(argc, argv));
-  if (gcpp::ThreadingContext::IsInitialized()) {
-    lua_pushnil(L);
-    lua_pushstring(L, "Scheduler had been initialized.");
-    return 2;
-  }
-  lua_pushboolean(L, 1);
-  return 1;
-}
-
-int cpu_topology(lua_State* L) {
+  auto ud = lua_newuserdata(L, sizeof(scheduler));
   try {
-    lua_pushstring(L, gcpp::ThreadingContext::Get().topology.TopologyString());
+    new(ud) scheduler(argc, argv);
+    luaL_getmetatable(L, name);
+    lua_setmetatable(L, -2);
     return 1;
   } catch (const std::exception& e) {
+    lua_pop(L, 1);
     lua_pushnil(L);
     lua_pushstring(L, e.what());
     return 2;
   }
 }
 
 }
-
-namespace cgemma { namespace scheduler {
-
-void declare(lua_State* L) {
-  constexpr const luaL_Reg entries[] = {
-    {"config", config},
-    {"cpu_topology", cpu_topology},
-    {nullptr, nullptr}
-  };
-  lua_newtable(L);
-  luaL_register(L, nullptr, entries);
-  lua_pushliteral(L, "cgemma.scheduler");
-  lua_setfield(L, -2, "_NAME");
-}
-
-} }
diff --git a/src/scheduler.hpp b/src/scheduler.hpp
@@ -2,11 +2,31 @@
 #define CGEMMA_SCHEDULER_HPP
 
 #include <lua.hpp>
+#include <util/threading_context.h>
+#include <ops/matmul.h>
 
-namespace cgemma { namespace scheduler {
+namespace cgemma {
 
-void declare(lua_State* L);
+class scheduler {
+public:
+  scheduler();
+  scheduler(int argc, char* argv[]);
 
-} }
+  const char* cpu_topology() const { return ctx_.topology.TopologyString(); }
+  gcpp::ThreadingContext& threading_ctx() { return ctx_; }
+  gcpp::MatMulEnv& matmul_env() { return env_; }
+
+  static void declare(lua_State* L);
+  static scheduler* to(lua_State* L, int index);
+  static scheduler* check(lua_State* L, int index);
+  static int create(lua_State* L);
+
+private:
+  gcpp::ThreadingArgs args_;
+  gcpp::ThreadingContext ctx_;
+  gcpp::MatMulEnv env_;
+};
+
+}
 
 #endif  // CGEMMA_SCHEDULER_HPP
diff --git a/src/session.cpp b/src/session.cpp
diff --git a/tools/dump_prompt.lua b/tools/dump_prompt.lua

Original file line number	Diff line number	Diff line change
`@@ -106,7 +106,7 @@ gcpp::TimingInfo generate(cgemma::instance* inst, const std::vector<cgemma::sess`
`106`	`106`	`.kv_cache = ctx.sess->kv_cache()`
`107`	`107`	`});`
`108`	`108`	`}`
`109`		`- inst->model().GenerateBatch(cfg, queries, inst->env(), timing);`
	`109`	`+ inst->model().GenerateBatch(cfg, queries, inst->matmul_env(), timing);`
`110`	`110`	`return timing;`
`111`	`111`	`}`
`112`	`112`