@@ -447,13 +447,10 @@ void LocalEngine::HandleEmbedding(std::shared_ptr<Json::Value> json_body,
447447 if (server_map_.find (model_id) != server_map_.end ()) {
448448 auto & s = server_map_[model_id];
449449 auto url = url_parser::Url{
450- .protocol = " http" ,
451- .host = s.host + " :" + std::to_string (s.port ),
452- .pathParams =
453- {
454- " v1" ,
455- " embeddings" ,
456- },
450+ /* .protocol*/ " http" ,
451+ /* .host*/ s.host + " :" + std::to_string (s.port ),
452+ /* .pathParams*/ {" v1" , " embeddings" },
453+ /* .queries = */ {},
457454 };
458455
459456 auto response = curl_utils::SimplePostJson (url.ToFullPath (),
@@ -495,9 +492,10 @@ void LocalEngine::LoadModel(std::shared_ptr<Json::Value> json_body,
495492 auto wait_for_server_up = [this ](const std::string& model,
496493 const std::string& host, int port) {
497494 auto url = url_parser::Url{
498- .protocol = " http" ,
499- .host = host + " :" + std::to_string (port),
500- .pathParams = {" health" },
495+ /* .protocol*/ " http" ,
496+ /* .host*/ host + " :" + std::to_string (port),
497+ /* .pathParams*/ {" health" },
498+ /* .queries*/ {},
501499 };
502500 while (server_map_.find (model) != server_map_.end ()) {
503501 auto res = curl_utils::SimpleGet (url.ToFullPath ());
@@ -519,6 +517,11 @@ void LocalEngine::LoadModel(std::shared_ptr<Json::Value> json_body,
519517 server_map_[model_id].host = " 127.0.0.1" ;
520518 server_map_[model_id].port = GenerateRandomInteger (39400 , 39999 );
521519 auto & s = server_map_[model_id];
520+ s.pre_prompt = json_body->get (" pre_prompt" , " " ).asString ();
521+ s.user_prompt = json_body->get (" user_prompt" , " USER: " ).asString ();
522+ s.ai_prompt = json_body->get (" ai_prompt" , " ASSISTANT: " ).asString ();
523+ s.system_prompt =
524+ json_body->get (" system_prompt" , " ASSISTANT's RULE: " ).asString ();
522525 std::vector<std::string> params = ConvertJsonToParamsVector (*json_body);
523526 params.push_back (" --host" );
524527 params.push_back (s.host );
@@ -696,18 +699,24 @@ void LocalEngine::HandleOpenAiChatCompletion(
696699 // llama.cpp server only supports n = 1
697700 (*json_body)[" n" ] = 1 ;
698701
702+ auto url = url_parser::Url{
703+ /* .protocol*/ " http" ,
704+ /* .host*/ s.host + " :" + std::to_string (s.port ),
705+ /* .pathParams*/ {" v1" , " chat" , " completions" },
706+ /* .queries*/ {},
707+ };
708+
699709 if (is_stream) {
700- q_.RunInQueue ([s, json_body, callback, model] {
710+ q_.RunInQueue ([s, json_body, callback, model, url = std::move (url) ] {
701711 auto curl = curl_easy_init ();
702712 if (!curl) {
703713 CTL_WRN (" Failed to initialize CURL" );
704714 return ;
705715 }
706716
707- auto url = " http://" + s.host + " :" + std::to_string (s.port ) +
708- " /v1/chat/completions" ;
709- curl_easy_setopt (curl, CURLOPT_URL, url.c_str ());
717+ curl_easy_setopt (curl, CURLOPT_URL, url.ToFullPath ().c_str ());
710718 curl_easy_setopt (curl, CURLOPT_POST, 1L );
719+ CTL_INF (url.ToFullPath ());
711720
712721 struct curl_slist * headers = nullptr ;
713722 headers = curl_slist_append (headers, " Content-Type: application/json" );
@@ -754,16 +763,6 @@ void LocalEngine::HandleOpenAiChatCompletion(
754763 });
755764
756765 } else {
757- auto url = url_parser::Url{
758- .protocol = " http" ,
759- .host = s.host + " :" + std::to_string (s.port ),
760- .pathParams =
761- {
762- " v1" ,
763- " chat" ,
764- " completions" ,
765- },
766- };
767766 Json::Value result;
768767 // multiple choices
769768 for (int i = 0 ; i < n; i++) {
@@ -810,6 +809,8 @@ void LocalEngine::HandleOpenAiChatCompletion(
810809 }
811810}
812811
812+ // (sang) duplicate code but it is easier to clean when
813+ // llama-server upstream is fully OpenAI API Compatible
813814void LocalEngine::HandleNonOpenAiChatCompletion (
814815 std::shared_ptr<Json::Value> json_body, http_callback&& callback,
815816 const std::string& model) {
@@ -881,17 +882,23 @@ void LocalEngine::HandleNonOpenAiChatCompletion(
881882 (*json_body)[" n" ] = 1 ;
882883 int n_probs = json_body->get (" n_probs" , 0 ).asInt ();
883884
885+ auto url = url_parser::Url{
886+ /* .protocol*/ " http" ,
887+ /* .host*/ s.host + " :" + std::to_string (s.port ),
888+ /* .pathParams*/ {" v1" , " completions" },
889+ /* .queries*/ {},
890+ };
891+
884892 if (is_stream) {
885- q_.RunInQueue ([s, json_body, callback, n_probs, model] {
893+ q_.RunInQueue ([s, json_body, callback, n_probs, model,
894+ url = std::move (url)] {
886895 auto curl = curl_easy_init ();
887896 if (!curl) {
888897 CTL_WRN (" Failed to initialize CURL" );
889898 return ;
890899 }
891900
892- auto url =
893- " http://" + s.host + " :" + std::to_string (s.port ) + " /v1/completions" ;
894- curl_easy_setopt (curl, CURLOPT_URL, url.c_str ());
901+ curl_easy_setopt (curl, CURLOPT_URL, url.ToFullPath ().c_str ());
895902 curl_easy_setopt (curl, CURLOPT_POST, 1L );
896903
897904 struct curl_slist * headers = nullptr ;
@@ -939,15 +946,7 @@ void LocalEngine::HandleNonOpenAiChatCompletion(
939946 });
940947
941948 } else {
942- auto url = url_parser::Url{
943- .protocol = " http" ,
944- .host = s.host + " :" + std::to_string (s.port ),
945- .pathParams =
946- {
947- " v1" ,
948- " completions" ,
949- },
950- };
949+
951950 Json::Value result;
952951 int prompt_tokens = 0 ;
953952 int predicted_tokens = 0 ;
0 commit comments