Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit cf1a2ca

Browse files
committed
chore: cleanup
1 parent 220a974 commit cf1a2ca

16 files changed

+204
-163
lines changed

engine/cli/command_line_parser.cc

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -51,9 +51,7 @@ CommandLineParser::CommandLineParser()
5151
download_service_{std::make_shared<DownloadService>()},
5252
dylib_path_manager_{std::make_shared<cortex::DylibPathManager>()},
5353
db_service_{std::make_shared<DatabaseService>()},
54-
engine_service_{std::make_shared<EngineService>(
55-
download_service_, dylib_path_manager_, db_service_,
56-
std::make_shared<cortex::TaskQueue>(1, "q"))} {}
54+
engine_service_{std::make_shared<EngineService>(dylib_path_manager_)} {}
5755

5856
bool CommandLineParser::SetupCommand(int argc, char** argv) {
5957
app_.usage("Usage:\n" + commands::GetCortexBinary() +
@@ -124,14 +122,14 @@ bool CommandLineParser::SetupCommand(int argc, char** argv) {
124122
}
125123
}
126124
#endif
127-
// auto config = file_manager_utils::GetCortexConfig();
128-
// if (!config.llamacppVersion.empty() &&
129-
// config.latestLlamacppRelease != config.llamacppVersion) {
130-
// CLI_LOG(
131-
// "\nNew llama.cpp version available: " << config.latestLlamacppRelease);
132-
// CLI_LOG("To update, run: " << commands::GetCortexBinary()
133-
// << " engines update llama-cpp");
134-
// }
125+
auto config = file_manager_utils::GetCortexConfig();
126+
if (!config.llamacppVersion.empty() &&
127+
config.latestLlamacppRelease != config.llamacppVersion) {
128+
CLI_LOG(
129+
"\nNew llama.cpp version available: " << config.latestLlamacppRelease);
130+
CLI_LOG("To update, run: " << commands::GetCortexBinary()
131+
<< " engines update llama-cpp");
132+
}
135133

136134
return true;
137135
}

engine/cli/commands/cortex_upd_cmd.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -515,10 +515,10 @@ bool CortexUpdCmd::GetLinuxInstallScript(const std::string& v,
515515
const std::string& channel) {
516516
std::vector<std::string> path_list;
517517
if (channel == "nightly") {
518-
path_list = {"menloresearch", "cortex.cpp", "dev", "engine",
518+
path_list = {kMenloOrg, "cortex.cpp", "dev", "engine",
519519
"templates", "linux", "install.sh"};
520520
} else {
521-
path_list = {"menloresearch", "cortex.cpp", "main", "engine",
521+
path_list = {kMenloOrg, "cortex.cpp", "main", "engine",
522522
"templates", "linux", "install.sh"};
523523
}
524524
auto url_obj = url_parser::Url{

engine/cli/commands/cortex_upd_cmd.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,9 +79,9 @@ inline std::vector<std::string> GetReleasePath() {
7979
if (CORTEX_VARIANT == file_manager_utils::kNightlyVariant) {
8080
return {"cortex", "latest", "version.json"};
8181
} else if (CORTEX_VARIANT == file_manager_utils::kBetaVariant) {
82-
return {"repos", "menloresearch", "cortex.cpp", "releases"};
82+
return {"repos", kMenloOrg, "cortex.cpp", "releases"};
8383
} else {
84-
return {"repos", "menloresearch", "cortex.cpp", "releases", "latest"};
84+
return {"repos", kMenloOrg, "cortex.cpp", "releases", "latest"};
8585
}
8686
}
8787

engine/cli/commands/engine_install_cmd.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,8 +92,8 @@ bool EngineInstallCmd::Exec(const std::string& engine,
9292
auto v_name = variant["name"].asString();
9393
if ((string_utils::StringContainsIgnoreCase(v_name,
9494
hw_inf_.sys_inf->os) ||
95-
(hw_inf_.sys_inf->os == "linux" &&
96-
string_utils::StringContainsIgnoreCase(v_name, "ubuntu"))) &&
95+
(hw_inf_.sys_inf->os == kLinuxOs &&
96+
string_utils::StringContainsIgnoreCase(v_name, kUbuntuOs))) &&
9797
string_utils::StringContainsIgnoreCase(v_name,
9898
hw_inf_.sys_inf->arch)) {
9999
variant_selections.push_back(variant["name"].asString());

engine/cli/commands/server_start_cmd.cc

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -106,11 +106,7 @@ bool ServerStartCmd::Exec(const std::string& host, int port,
106106
#else
107107
std::vector<std::string> commands;
108108
// Some engines requires to add lib search path before process being created
109-
auto download_srv = std::make_shared<DownloadService>();
110-
auto dylib_path_mng = std::make_shared<cortex::DylibPathManager>();
111-
auto db_srv = std::make_shared<DatabaseService>();
112-
EngineService(download_srv, dylib_path_mng, db_srv,
113-
std::make_shared<cortex::TaskQueue>(1, "task_queue"))
109+
EngineService(std::make_shared<cortex::DylibPathManager>())
114110
.RegisterEngineLibPath();
115111

116112
std::string p = cortex_utils::GetCurrentPath() + "/" + exe;

engine/cli/main.cc

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -148,14 +148,14 @@ int main(int argc, char* argv[]) {
148148
std::chrono::hours(24);
149149
should_check_for_latest_llamacpp_version = now > last_check;
150150
}
151-
152-
if (false) {
151+
152+
if (should_check_for_latest_llamacpp_version) {
153153
std::thread t1([]() {
154154
// TODO: namh current we only check for llamacpp. Need to add support for other engine
155155
auto get_latest_version = []() -> cpp::result<std::string, std::string> {
156156
try {
157157
auto res = github_release_utils::GetReleaseByVersion(
158-
"menloresearch", "cortex.llamacpp", "latest");
158+
kGgmlOrg, kLlamaRepo, "latest");
159159
if (res.has_error()) {
160160
CTL_ERR("Failed to get latest llama.cpp version: " << res.error());
161161
return cpp::fail("Failed to get latest llama.cpp version: " +
@@ -171,6 +171,7 @@ int main(int argc, char* argv[]) {
171171
};
172172

173173
auto res = get_latest_version();
174+
174175
if (res.has_error()) {
175176
CTL_ERR("Failed to get latest llama.cpp version: " << res.error());
176177
return;

engine/extensions/local-engine/local_engine.cc

Lines changed: 39 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -447,13 +447,10 @@ void LocalEngine::HandleEmbedding(std::shared_ptr<Json::Value> json_body,
447447
if (server_map_.find(model_id) != server_map_.end()) {
448448
auto& s = server_map_[model_id];
449449
auto url = url_parser::Url{
450-
.protocol = "http",
451-
.host = s.host + ":" + std::to_string(s.port),
452-
.pathParams =
453-
{
454-
"v1",
455-
"embeddings",
456-
},
450+
/*.protocol*/ "http",
451+
/*.host*/ s.host + ":" + std::to_string(s.port),
452+
/*.pathParams*/ {"v1", "embeddings"},
453+
/* .queries = */ {},
457454
};
458455

459456
auto response = curl_utils::SimplePostJson(url.ToFullPath(),
@@ -495,9 +492,10 @@ void LocalEngine::LoadModel(std::shared_ptr<Json::Value> json_body,
495492
auto wait_for_server_up = [this](const std::string& model,
496493
const std::string& host, int port) {
497494
auto url = url_parser::Url{
498-
.protocol = "http",
499-
.host = host + ":" + std::to_string(port),
500-
.pathParams = {"health"},
495+
/*.protocol*/ "http",
496+
/*.host*/ host + ":" + std::to_string(port),
497+
/*.pathParams*/ {"health"},
498+
/*.queries*/ {},
501499
};
502500
while (server_map_.find(model) != server_map_.end()) {
503501
auto res = curl_utils::SimpleGet(url.ToFullPath());
@@ -519,6 +517,11 @@ void LocalEngine::LoadModel(std::shared_ptr<Json::Value> json_body,
519517
server_map_[model_id].host = "127.0.0.1";
520518
server_map_[model_id].port = GenerateRandomInteger(39400, 39999);
521519
auto& s = server_map_[model_id];
520+
s.pre_prompt = json_body->get("pre_prompt", "").asString();
521+
s.user_prompt = json_body->get("user_prompt", "USER: ").asString();
522+
s.ai_prompt = json_body->get("ai_prompt", "ASSISTANT: ").asString();
523+
s.system_prompt =
524+
json_body->get("system_prompt", "ASSISTANT's RULE: ").asString();
522525
std::vector<std::string> params = ConvertJsonToParamsVector(*json_body);
523526
params.push_back("--host");
524527
params.push_back(s.host);
@@ -530,21 +533,21 @@ void LocalEngine::LoadModel(std::shared_ptr<Json::Value> json_body,
530533

531534
std::vector<std::string> v;
532535
v.reserve(params.size() + 1);
533-
auto engine_dir = engine_service_.GetEngineDirPath("llama.cpp");
536+
auto engine_dir = engine_service_.GetEngineDirPath(kLlamaRepo);
534537
if (engine_dir.has_error()) {
535538
CTL_WRN(engine_dir.error());
536539
server_map_.erase(model_id);
537540
return;
538541
}
539-
auto exe = (engine_dir.value().first / "llama-server").string();
542+
auto exe = (engine_dir.value().first / kLlamaServer).string();
540543

541544
v.push_back(exe);
542545
v.insert(v.end(), params.begin(), params.end());
543546
engine_service_.RegisterEngineLibPath();
544547

545548
auto log_path =
546549
(file_manager_utils::GetCortexLogPath() / "logs" / "cortex.log").string();
547-
CTL_INF("log: " << log_path);
550+
CTL_DBG("log: " << log_path);
548551
auto result = cortex::process::SpawnProcess(v, log_path, log_path);
549552
if (result.has_error()) {
550553
CTL_ERR("Fail to spawn process. " << result.error());
@@ -696,18 +699,24 @@ void LocalEngine::HandleOpenAiChatCompletion(
696699
// llama.cpp server only supports n = 1
697700
(*json_body)["n"] = 1;
698701

702+
auto url = url_parser::Url{
703+
/*.protocol*/ "http",
704+
/*.host*/ s.host + ":" + std::to_string(s.port),
705+
/*.pathParams*/ {"v1", "chat", "completions"},
706+
/*.queries*/ {},
707+
};
708+
699709
if (is_stream) {
700-
q_.RunInQueue([s, json_body, callback, model] {
710+
q_.RunInQueue([s, json_body, callback, model, url = std::move(url)] {
701711
auto curl = curl_easy_init();
702712
if (!curl) {
703713
CTL_WRN("Failed to initialize CURL");
704714
return;
705715
}
706716

707-
auto url = "http://" + s.host + ":" + std::to_string(s.port) +
708-
"/v1/chat/completions";
709-
curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
717+
curl_easy_setopt(curl, CURLOPT_URL, url.ToFullPath().c_str());
710718
curl_easy_setopt(curl, CURLOPT_POST, 1L);
719+
CTL_INF(url.ToFullPath());
711720

712721
struct curl_slist* headers = nullptr;
713722
headers = curl_slist_append(headers, "Content-Type: application/json");
@@ -754,16 +763,6 @@ void LocalEngine::HandleOpenAiChatCompletion(
754763
});
755764

756765
} else {
757-
auto url = url_parser::Url{
758-
.protocol = "http",
759-
.host = s.host + ":" + std::to_string(s.port),
760-
.pathParams =
761-
{
762-
"v1",
763-
"chat",
764-
"completions",
765-
},
766-
};
767766
Json::Value result;
768767
// multiple choices
769768
for (int i = 0; i < n; i++) {
@@ -810,6 +809,8 @@ void LocalEngine::HandleOpenAiChatCompletion(
810809
}
811810
}
812811

812+
// (sang) duplicate code but it is easier to clean when
813+
// llama-server upstream is fully OpenAI API Compatible
813814
void LocalEngine::HandleNonOpenAiChatCompletion(
814815
std::shared_ptr<Json::Value> json_body, http_callback&& callback,
815816
const std::string& model) {
@@ -881,17 +882,23 @@ void LocalEngine::HandleNonOpenAiChatCompletion(
881882
(*json_body)["n"] = 1;
882883
int n_probs = json_body->get("n_probs", 0).asInt();
883884

885+
auto url = url_parser::Url{
886+
/*.protocol*/ "http",
887+
/*.host*/ s.host + ":" + std::to_string(s.port),
888+
/*.pathParams*/ {"v1", "completions"},
889+
/*.queries*/ {},
890+
};
891+
884892
if (is_stream) {
885-
q_.RunInQueue([s, json_body, callback, n_probs, model] {
893+
q_.RunInQueue([s, json_body, callback, n_probs, model,
894+
url = std::move(url)] {
886895
auto curl = curl_easy_init();
887896
if (!curl) {
888897
CTL_WRN("Failed to initialize CURL");
889898
return;
890899
}
891900

892-
auto url =
893-
"http://" + s.host + ":" + std::to_string(s.port) + "/v1/completions";
894-
curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
901+
curl_easy_setopt(curl, CURLOPT_URL, url.ToFullPath().c_str());
895902
curl_easy_setopt(curl, CURLOPT_POST, 1L);
896903

897904
struct curl_slist* headers = nullptr;
@@ -939,15 +946,7 @@ void LocalEngine::HandleNonOpenAiChatCompletion(
939946
});
940947

941948
} else {
942-
auto url = url_parser::Url{
943-
.protocol = "http",
944-
.host = s.host + ":" + std::to_string(s.port),
945-
.pathParams =
946-
{
947-
"v1",
948-
"completions",
949-
},
950-
};
949+
951950
Json::Value result;
952951
int prompt_tokens = 0;
953952
int predicted_tokens = 0;

0 commit comments

Comments
 (0)