@@ -170,6 +170,9 @@ void llamaCPP::chatCompletion(
170170 data[" cache_prompt" ] = true ;
171171 data[" n_keep" ] = -1 ;
172172
173+ // Passing load value
174+ data[" repeat_last_n" ] = this ->repeat_last_n ;
175+
173176 data[" stream" ] = (*jsonBody).get (" stream" , false ).asBool ();
174177 data[" n_predict" ] = (*jsonBody).get (" max_tokens" , 500 ).asInt ();
175178 data[" top_p" ] = (*jsonBody).get (" top_p" , 0.95 ).asFloat ();
@@ -376,7 +379,7 @@ void llamaCPP::loadModel(
376379 params.n_ctx = (*jsonBody).get (" ctx_len" , 2048 ).asInt ();
377380 params.embedding = (*jsonBody).get (" embedding" , true ).asBool ();
378381 // Check if n_parallel exists in jsonBody, if not, set to drogon_thread
379- params.n_batch = (*jsonBody).get (" n_batch" ,512 ).asInt ();
382+ params.n_batch = (*jsonBody).get (" n_batch" , 512 ).asInt ();
380383 params.n_parallel = (*jsonBody).get (" n_parallel" , drogon_thread).asInt ();
381384 params.n_threads =
382385 (*jsonBody)
@@ -388,7 +391,8 @@ void llamaCPP::loadModel(
388391 this ->ai_prompt = (*jsonBody).get (" ai_prompt" , " ASSISTANT: " ).asString ();
389392 this ->system_prompt =
390393 (*jsonBody).get (" system_prompt" , " ASSISTANT's RULE: " ).asString ();
391- this ->pre_prompt = (*jsonBody).get (" pre_prompt" ," " ).asString ();
394+ this ->pre_prompt = (*jsonBody).get (" pre_prompt" , " " ).asString ();
395+ this ->repeat_last_n = (*jsonBody).get (" repeat_last_n" , 32 ).asInt ();
392396 }
393397#ifdef GGML_USE_CUBLAS
394398 LOG_INFO << " Setting up GGML CUBLAS PARAMS" ;
0 commit comments