Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit 4705a1b

Browse files
committed
chore: cleanup
1 parent af28b07 commit 4705a1b

File tree

2 files changed

+44
-43
lines changed

2 files changed

+44
-43
lines changed

engine/extensions/local-engine/local_engine.cc

Lines changed: 36 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -447,13 +447,10 @@ void LocalEngine::HandleEmbedding(std::shared_ptr<Json::Value> json_body,
447447
if (server_map_.find(model_id) != server_map_.end()) {
448448
auto& s = server_map_[model_id];
449449
auto url = url_parser::Url{
450-
.protocol = "http",
451-
.host = s.host + ":" + std::to_string(s.port),
452-
.pathParams =
453-
{
454-
"v1",
455-
"embeddings",
456-
},
450+
/*.protocol*/ "http",
451+
/*.host*/ s.host + ":" + std::to_string(s.port),
452+
/*.pathParams*/ {"v1", "embeddings"},
453+
/* .queries = */ {},
457454
};
458455

459456
auto response = curl_utils::SimplePostJson(url.ToFullPath(),
@@ -495,9 +492,10 @@ void LocalEngine::LoadModel(std::shared_ptr<Json::Value> json_body,
495492
auto wait_for_server_up = [this](const std::string& model,
496493
const std::string& host, int port) {
497494
auto url = url_parser::Url{
498-
.protocol = "http",
499-
.host = host + ":" + std::to_string(port),
500-
.pathParams = {"health"},
495+
/*.protocol*/ "http",
496+
/*.host*/ host + ":" + std::to_string(port),
497+
/*.pathParams*/ {"health"},
498+
/*.queries*/ {},
501499
};
502500
while (server_map_.find(model) != server_map_.end()) {
503501
auto res = curl_utils::SimpleGet(url.ToFullPath());
@@ -519,6 +517,11 @@ void LocalEngine::LoadModel(std::shared_ptr<Json::Value> json_body,
519517
server_map_[model_id].host = "127.0.0.1";
520518
server_map_[model_id].port = GenerateRandomInteger(39400, 39999);
521519
auto& s = server_map_[model_id];
520+
s.pre_prompt = json_body->get("pre_prompt", "").asString();
521+
s.user_prompt = json_body->get("user_prompt", "USER: ").asString();
522+
s.ai_prompt = json_body->get("ai_prompt", "ASSISTANT: ").asString();
523+
s.system_prompt =
524+
json_body->get("system_prompt", "ASSISTANT's RULE: ").asString();
522525
std::vector<std::string> params = ConvertJsonToParamsVector(*json_body);
523526
params.push_back("--host");
524527
params.push_back(s.host);
@@ -696,18 +699,24 @@ void LocalEngine::HandleOpenAiChatCompletion(
696699
// llama.cpp server only supports n = 1
697700
(*json_body)["n"] = 1;
698701

702+
auto url = url_parser::Url{
703+
/*.protocol*/ "http",
704+
/*.host*/ s.host + ":" + std::to_string(s.port),
705+
/*.pathParams*/ {"v1", "chat", "completions"},
706+
/*.queries*/ {},
707+
};
708+
699709
if (is_stream) {
700-
q_.RunInQueue([s, json_body, callback, model] {
710+
q_.RunInQueue([s, json_body, callback, model, url = std::move(url)] {
701711
auto curl = curl_easy_init();
702712
if (!curl) {
703713
CTL_WRN("Failed to initialize CURL");
704714
return;
705715
}
706716

707-
auto url = "http://" + s.host + ":" + std::to_string(s.port) +
708-
"/v1/chat/completions";
709-
curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
717+
curl_easy_setopt(curl, CURLOPT_URL, url.ToFullPath().c_str());
710718
curl_easy_setopt(curl, CURLOPT_POST, 1L);
719+
CTL_INF(url.ToFullPath());
711720

712721
struct curl_slist* headers = nullptr;
713722
headers = curl_slist_append(headers, "Content-Type: application/json");
@@ -754,16 +763,6 @@ void LocalEngine::HandleOpenAiChatCompletion(
754763
});
755764

756765
} else {
757-
auto url = url_parser::Url{
758-
.protocol = "http",
759-
.host = s.host + ":" + std::to_string(s.port),
760-
.pathParams =
761-
{
762-
"v1",
763-
"chat",
764-
"completions",
765-
},
766-
};
767766
Json::Value result;
768767
// multiple choices
769768
for (int i = 0; i < n; i++) {
@@ -810,6 +809,8 @@ void LocalEngine::HandleOpenAiChatCompletion(
810809
}
811810
}
812811

812+
// (sang) duplicate code but it is easier to clean when
813+
// llama-server upstream is fully OpenAI API Compatible
813814
void LocalEngine::HandleNonOpenAiChatCompletion(
814815
std::shared_ptr<Json::Value> json_body, http_callback&& callback,
815816
const std::string& model) {
@@ -881,17 +882,23 @@ void LocalEngine::HandleNonOpenAiChatCompletion(
881882
(*json_body)["n"] = 1;
882883
int n_probs = json_body->get("n_probs", 0).asInt();
883884

885+
auto url = url_parser::Url{
886+
/*.protocol*/ "http",
887+
/*.host*/ s.host + ":" + std::to_string(s.port),
888+
/*.pathParams*/ {"v1", "completions"},
889+
/*.queries*/ {},
890+
};
891+
884892
if (is_stream) {
885-
q_.RunInQueue([s, json_body, callback, n_probs, model] {
893+
q_.RunInQueue([s, json_body, callback, n_probs, model,
894+
url = std::move(url)] {
886895
auto curl = curl_easy_init();
887896
if (!curl) {
888897
CTL_WRN("Failed to initialize CURL");
889898
return;
890899
}
891900

892-
auto url =
893-
"http://" + s.host + ":" + std::to_string(s.port) + "/v1/completions";
894-
curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
901+
curl_easy_setopt(curl, CURLOPT_URL, url.ToFullPath().c_str());
895902
curl_easy_setopt(curl, CURLOPT_POST, 1L);
896903

897904
struct curl_slist* headers = nullptr;
@@ -939,15 +946,7 @@ void LocalEngine::HandleNonOpenAiChatCompletion(
939946
});
940947

941948
} else {
942-
auto url = url_parser::Url{
943-
.protocol = "http",
944-
.host = s.host + ":" + std::to_string(s.port),
945-
.pathParams =
946-
{
947-
"v1",
948-
"completions",
949-
},
950-
};
949+
951950
Json::Value result;
952951
int prompt_tokens = 0;
953952
int predicted_tokens = 0;

engine/services/engine_service.h

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -63,11 +63,10 @@ class EngineService : public EngineServiceI {
6363
std::shared_ptr<cortex::TaskQueue> q_ = nullptr;
6464

6565
public:
66-
EngineService(
67-
std::shared_ptr<DownloadService> download_service,
68-
std::shared_ptr<cortex::DylibPathManager> dylib_path_manager,
69-
std::shared_ptr<DatabaseService> db_service,
70-
std::shared_ptr<cortex::TaskQueue> q)
66+
EngineService(std::shared_ptr<DownloadService> download_service,
67+
std::shared_ptr<cortex::DylibPathManager> dylib_path_manager,
68+
std::shared_ptr<DatabaseService> db_service,
69+
std::shared_ptr<cortex::TaskQueue> q)
7170
: download_service_{download_service},
7271
dylib_path_manager_{dylib_path_manager},
7372
hw_inf_{.sys_inf = system_info_utils::GetSystemInfo(),
@@ -77,7 +76,10 @@ class EngineService : public EngineServiceI {
7776
q_(q) {}
7877

7978
EngineService(std::shared_ptr<cortex::DylibPathManager> dylib_path_manager)
80-
: dylib_path_manager_(dylib_path_manager) {}
79+
: dylib_path_manager_(dylib_path_manager),
80+
hw_inf_{.sys_inf = system_info_utils::GetSystemInfo(),
81+
.cuda_driver_version =
82+
system_info_utils::GetDriverAndCudaVersion().second} {}
8183

8284
std::vector<EngineInfo> GetEngineInfoList() const;
8385

0 commit comments

Comments
 (0)