Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit fd7c40d

Browse files
authored
feat: support running multiple engines at the same time (#891)
1 parent d579684 commit fd7c40d

File tree

4 files changed

+66
-48
lines changed

4 files changed

+66
-48
lines changed

cortex-cpp/controllers/health.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
void health::asyncHandleHttpRequest(
55
const HttpRequestPtr &req,
66
std::function<void(const HttpResponsePtr &)> &&callback) {
7-
auto resp = cortex_utils::nitroHttpResponse();
7+
auto resp = cortex_utils::CreateCortexHttpResponse();
88
resp->setStatusCode(k200OK);
99
resp->setContentTypeCode(CT_TEXT_HTML);
1010
resp->setBody("cortex-cpp is alive!!!");

cortex-cpp/controllers/server.cc

Lines changed: 59 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -32,12 +32,16 @@ server::~server() {}
3232
void server::ChatCompletion(
3333
const HttpRequestPtr& req,
3434
std::function<void(const HttpResponsePtr&)>&& callback) {
35+
if (!HasFieldInReq(req, callback, "engine")) {
36+
return;
37+
}
38+
3539
auto engine_type =
3640
(*(req->getJsonObject())).get("engine", cur_engine_type_).asString();
3741
if (!IsEngineLoaded(engine_type)) {
3842
Json::Value res;
3943
res["message"] = "Engine is not loaded yet";
40-
auto resp = cortex_utils::nitroHttpJsonResponse(res);
44+
auto resp = cortex_utils::CreateCortexHttpJsonResponse(res);
4145
resp->setStatusCode(k409Conflict);
4246
callback(resp);
4347
LOG_WARN << "Engine is not loaded yet";
@@ -69,7 +73,7 @@ void server::Embedding(const HttpRequestPtr& req,
6973
if (!IsEngineLoaded(engine_type)) {
7074
Json::Value res;
7175
res["message"] = "Engine is not loaded yet";
72-
auto resp = cortex_utils::nitroHttpJsonResponse(res);
76+
auto resp = cortex_utils::CreateCortexHttpJsonResponse(res);
7377
resp->setStatusCode(k409Conflict);
7478
callback(resp);
7579
LOG_WARN << "Engine is not loaded yet";
@@ -91,12 +95,16 @@ void server::Embedding(const HttpRequestPtr& req,
9195
void server::UnloadModel(
9296
const HttpRequestPtr& req,
9397
std::function<void(const HttpResponsePtr&)>&& callback) {
98+
if (!HasFieldInReq(req, callback, "engine")) {
99+
return;
100+
}
101+
94102
auto engine_type =
95103
(*(req->getJsonObject())).get("engine", cur_engine_type_).asString();
96104
if (!IsEngineLoaded(engine_type)) {
97105
Json::Value res;
98106
res["message"] = "Engine is not loaded yet";
99-
auto resp = cortex_utils::nitroHttpJsonResponse(res);
107+
auto resp = cortex_utils::CreateCortexHttpJsonResponse(res);
100108
resp->setStatusCode(k409Conflict);
101109
callback(resp);
102110
LOG_WARN << "Engine is not loaded yet";
@@ -107,7 +115,7 @@ void server::UnloadModel(
107115
->UnloadModel(
108116
req->getJsonObject(),
109117
[cb = std::move(callback)](Json::Value status, Json::Value res) {
110-
auto resp = cortex_utils::nitroHttpJsonResponse(res);
118+
auto resp = cortex_utils::CreateCortexHttpJsonResponse(res);
111119
resp->setStatusCode(static_cast<drogon::HttpStatusCode>(
112120
status["status_code"].asInt()));
113121
cb(resp);
@@ -118,12 +126,16 @@ void server::UnloadModel(
118126
void server::ModelStatus(
119127
const HttpRequestPtr& req,
120128
std::function<void(const HttpResponsePtr&)>&& callback) {
129+
if (!HasFieldInReq(req, callback, "engine")) {
130+
return;
131+
}
132+
121133
auto engine_type =
122134
(*(req->getJsonObject())).get("engine", cur_engine_type_).asString();
123135
if (!IsEngineLoaded(engine_type)) {
124136
Json::Value res;
125137
res["message"] = "Engine is not loaded yet";
126-
auto resp = cortex_utils::nitroHttpJsonResponse(res);
138+
auto resp = cortex_utils::CreateCortexHttpJsonResponse(res);
127139
resp->setStatusCode(k409Conflict);
128140
callback(resp);
129141
LOG_WARN << "Engine is not loaded yet";
@@ -135,7 +147,7 @@ void server::ModelStatus(
135147
->GetModelStatus(
136148
req->getJsonObject(),
137149
[cb = std::move(callback)](Json::Value status, Json::Value res) {
138-
auto resp = cortex_utils::nitroHttpJsonResponse(res);
150+
auto resp = cortex_utils::CreateCortexHttpJsonResponse(res);
139151
resp->setStatusCode(static_cast<drogon::HttpStatusCode>(
140152
status["status_code"].asInt()));
141153
cb(resp);
@@ -145,35 +157,35 @@ void server::ModelStatus(
145157

146158
void server::GetModels(const HttpRequestPtr& req,
147159
std::function<void(const HttpResponsePtr&)>&& callback) {
148-
if (!IsEngineLoaded(cur_engine_type_)) {
160+
if (engines_.empty()) {
149161
Json::Value res;
150162
res["message"] = "Engine is not loaded yet";
151-
auto resp = cortex_utils::nitroHttpJsonResponse(res);
163+
auto resp = cortex_utils::CreateCortexHttpJsonResponse(res);
152164
resp->setStatusCode(k409Conflict);
153165
callback(resp);
154166
LOG_WARN << "Engine is not loaded yet";
155167
return;
156168
}
157169

158170
LOG_TRACE << "Start to get models";
159-
auto& en = std::get<EngineI*>(engines_[cur_engine_type_].engine);
160-
if (en->IsSupported("GetModels")) {
161-
en->GetModels(
162-
req->getJsonObject(),
163-
[cb = std::move(callback)](Json::Value status, Json::Value res) {
164-
auto resp = cortex_utils::nitroHttpJsonResponse(res);
165-
resp->setStatusCode(static_cast<drogon::HttpStatusCode>(
166-
status["status_code"].asInt()));
167-
cb(resp);
168-
});
169-
} else {
170-
Json::Value res;
171-
res["message"] = "Method is not supported yet";
172-
auto resp = cortex_utils::nitroHttpJsonResponse(res);
173-
resp->setStatusCode(k500InternalServerError);
174-
callback(resp);
175-
LOG_WARN << "Method is not supported yet";
171+
Json::Value resp_data(Json::arrayValue);
172+
for (auto const& [k, v] : engines_) {
173+
auto e = std::get<EngineI*>(v.engine);
174+
if (e->IsSupported("GetModels")) {
175+
e->GetModels(req->getJsonObject(),
176+
[&resp_data](Json::Value status, Json::Value res) {
177+
for (auto r : res["data"]) {
178+
resp_data.append(r);
179+
}
180+
});
181+
}
176182
}
183+
Json::Value root;
184+
root["data"] = resp_data;
185+
root["object"] = "list";
186+
auto resp = cortex_utils::CreateCortexHttpJsonResponse(root);
187+
resp->setStatusCode(drogon::HttpStatusCode::k200OK);
188+
callback(resp);
177189

178190
LOG_TRACE << "Done get models";
179191
}
@@ -193,7 +205,7 @@ void server::GetEngines(
193205
res["object"] = "list";
194206
res["data"] = engine_array;
195207

196-
auto resp = cortex_utils::nitroHttpJsonResponse(res);
208+
auto resp = cortex_utils::CreateCortexHttpJsonResponse(res);
197209
callback(resp);
198210
}
199211

@@ -218,7 +230,7 @@ void server::FineTuning(
218230

219231
Json::Value res;
220232
res["message"] = "Could not load engine " + engine_type;
221-
auto resp = cortex_utils::nitroHttpJsonResponse(res);
233+
auto resp = cortex_utils::CreateCortexHttpJsonResponse(res);
222234
resp->setStatusCode(k500InternalServerError);
223235
callback(resp);
224236
return;
@@ -236,15 +248,15 @@ void server::FineTuning(
236248
en->HandlePythonFileExecutionRequest(
237249
req->getJsonObject(),
238250
[cb = std::move(callback)](Json::Value status, Json::Value res) {
239-
auto resp = cortex_utils::nitroHttpJsonResponse(res);
251+
auto resp = cortex_utils::CreateCortexHttpJsonResponse(res);
240252
resp->setStatusCode(static_cast<drogon::HttpStatusCode>(
241253
status["status_code"].asInt()));
242254
cb(resp);
243255
});
244256
} else {
245257
Json::Value res;
246258
res["message"] = "Method is not supported yet";
247-
auto resp = cortex_utils::nitroHttpJsonResponse(res);
259+
auto resp = cortex_utils::CreateCortexHttpJsonResponse(res);
248260
resp->setStatusCode(k500InternalServerError);
249261
callback(resp);
250262
LOG_WARN << "Method is not supported yet";
@@ -259,8 +271,6 @@ void server::LoadModel(const HttpRequestPtr& req,
259271

260272
// We have not loaded engine yet, should load it before using it
261273
if (engines_.find(engine_type) == engines_.end()) {
262-
// We only use single engine so unload all engines before load new engine
263-
UnloadEngines();
264274
auto get_engine_path = [](std::string_view e) {
265275
if (e == kLlamaEngine) {
266276
return cortex_utils::kLlamaLibPath;
@@ -291,7 +301,7 @@ void server::LoadModel(const HttpRequestPtr& req,
291301

292302
Json::Value res;
293303
res["message"] = "Could not load engine " + engine_type;
294-
auto resp = cortex_utils::nitroHttpJsonResponse(res);
304+
auto resp = cortex_utils::CreateCortexHttpJsonResponse(res);
295305
resp->setStatusCode(k500InternalServerError);
296306
callback(resp);
297307
return;
@@ -308,7 +318,7 @@ void server::LoadModel(const HttpRequestPtr& req,
308318
auto& en = std::get<EngineI*>(engines_[engine_type].engine);
309319
en->LoadModel(req->getJsonObject(), [cb = std::move(callback)](
310320
Json::Value status, Json::Value res) {
311-
auto resp = cortex_utils::nitroHttpJsonResponse(res);
321+
auto resp = cortex_utils::CreateCortexHttpJsonResponse(res);
312322
resp->setStatusCode(
313323
static_cast<drogon::HttpStatusCode>(status["status_code"].asInt()));
314324
cb(resp);
@@ -345,15 +355,15 @@ void server::ProcessStreamRes(std::function<void(const HttpResponsePtr&)> cb,
345355
return n;
346356
};
347357

348-
auto resp = cortex_utils::nitroStreamResponse(chunked_content_provider,
349-
"chat_completions.txt");
358+
auto resp = cortex_utils::CreateCortexStreamResponse(chunked_content_provider,
359+
"chat_completions.txt");
350360
cb(resp);
351361
}
352362

353363
void server::ProcessNonStreamRes(std::function<void(const HttpResponsePtr&)> cb,
354364
SyncQueue& q) {
355365
auto [status, res] = q.wait_and_pop();
356-
auto resp = cortex_utils::nitroHttpJsonResponse(res);
366+
auto resp = cortex_utils::CreateCortexHttpJsonResponse(res);
357367
resp->setStatusCode(
358368
static_cast<drogon::HttpStatusCode>(status["status_code"].asInt()));
359369
cb(resp);
@@ -363,14 +373,20 @@ bool server::IsEngineLoaded(const std::string& e) {
363373
return engines_.find(e) != engines_.end();
364374
}
365375

366-
void server::UnloadEngines() {
367-
// We unload all engines except python engine
368-
for (auto it = engines_.begin(); it != engines_.end();) {
369-
if (it->first != kPythonRuntimeEngine) {
370-
it = engines_.erase(it);
371-
} else
372-
it++;
376+
bool server::HasFieldInReq(
377+
const HttpRequestPtr& req,
378+
std::function<void(const HttpResponsePtr&)>& callback,
379+
const std::string& field) {
380+
if (auto o = req->getJsonObject(); !o || (*o)[field].isNull()) {
381+
Json::Value res;
382+
res["message"] = "No " + field + " field in request body";
383+
auto resp = cortex_utils::CreateCortexHttpJsonResponse(res);
384+
resp->setStatusCode(k409Conflict);
385+
callback(resp);
386+
LOG_WARN << "No " << field << " field in request body";
387+
return false;
373388
}
389+
return true;
374390
}
375391

376392
} // namespace inferences

cortex-cpp/controllers/server.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,9 @@ class server : public drogon::HttpController<server>,
9999
SyncQueue& q);
100100
bool IsEngineLoaded(const std::string& e);
101101

102-
void UnloadEngines();
102+
bool HasFieldInReq(const HttpRequestPtr& req,
103+
std::function<void(const HttpResponsePtr&)>& callback,
104+
const std::string& field);
103105

104106
private:
105107
struct SyncQueue {

cortex-cpp/utils/cortex_utils.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -258,7 +258,7 @@ inline void nitro_logo() {
258258
std::cout << resetColor; // Reset color at the endreturn;
259259
}
260260

261-
inline drogon::HttpResponsePtr nitroHttpResponse() {
261+
inline drogon::HttpResponsePtr CreateCortexHttpResponse() {
262262
auto resp = drogon::HttpResponse::newHttpResponse();
263263
#ifdef ALLOW_ALL_CORS
264264
LOG_INFO << "Respond for all cors!";
@@ -267,7 +267,7 @@ inline drogon::HttpResponsePtr nitroHttpResponse() {
267267
return resp;
268268
}
269269

270-
inline drogon::HttpResponsePtr nitroHttpJsonResponse(const Json::Value& data) {
270+
inline drogon::HttpResponsePtr CreateCortexHttpJsonResponse(const Json::Value& data) {
271271
auto resp = drogon::HttpResponse::newHttpJsonResponse(data);
272272
#ifdef ALLOW_ALL_CORS
273273
LOG_INFO << "Respond for all cors!";
@@ -277,7 +277,7 @@ inline drogon::HttpResponsePtr nitroHttpJsonResponse(const Json::Value& data) {
277277
return resp;
278278
};
279279

280-
inline drogon::HttpResponsePtr nitroStreamResponse(
280+
inline drogon::HttpResponsePtr CreateCortexStreamResponse(
281281
const std::function<std::size_t(char*, std::size_t)>& callback,
282282
const std::string& attachmentFileName = "") {
283283
auto resp = drogon::HttpResponse::newStreamResponse(

0 commit comments

Comments
 (0)