Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit 3789902

Browse files
committed
upgraded server with latest api
1 parent 6a970fe commit 3789902

File tree

2 files changed

+768
-113
lines changed

2 files changed

+768
-113
lines changed

controllers/llamaCPP.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ void llamaCPP::warmupModel() {
135135
pseudo["prompt"] = "Hello";
136136
pseudo["n_predict"] = 2;
137137
pseudo["stream"] = false;
138-
const int task_id = llama.request_completion(pseudo, false, false);
138+
const int task_id = llama.request_completion(pseudo, false, false, -1);
139139
std::string completion_text;
140140
task_result result = llama.next_result(task_id);
141141
if (!result.error && result.stop) {
@@ -292,7 +292,7 @@ void llamaCPP::chatCompletion(
292292
LOG_INFO << "Current completion text";
293293
LOG_INFO << formatted_output;
294294
#endif
295-
const int task_id = llama.request_completion(data, false, false);
295+
const int task_id = llama.request_completion(data, false, false, -1);
296296
LOG_INFO << "Resolved request for task_id:" << task_id;
297297

298298
if (is_streamed) {
@@ -383,7 +383,7 @@ void llamaCPP::embedding(
383383
prompt = "";
384384
}
385385
const int task_id = llama.request_completion(
386-
{{"prompt", prompt}, {"n_predict", 0}}, false, true);
386+
{{"prompt", prompt}, {"n_predict", 0}}, false, true, -1);
387387
task_result result = llama.next_result(task_id);
388388
std::vector<float> embedding_result = result.result_json["embedding"];
389389
auto resp = nitro_utils::nitroHttpResponse();

0 commit comments

Comments
 (0)