2424#define CPPHTTPLIB_NO_EXCEPTIONS 1
2525#endif
2626
27+ #include " common/base.h"
2728#include " utils/json.hpp"
2829
2930// auto generated files (update with ./deps.sh)
@@ -2510,45 +2511,42 @@ append_to_generated_text_from_generated_token_probs(llama_server_context &llama,
25102511using namespace drogon ;
25112512
25122513namespace inferences {
2513- class llamaCPP : public drogon ::HttpController<llamaCPP> {
2514+ class llamaCPP : public drogon ::HttpController<llamaCPP>, public ChatProvider {
25142515public:
2515- llamaCPP () {
2516- // Some default values for now below
2517- log_disable (); // Disable the log to file feature, reduce bloat for
2518- // target
2519- // system ()
2520- }
2521-
2522- ~llamaCPP () { stopBackgroundTask (); }
2516+ llamaCPP ();
2517+ ~llamaCPP ();
25232518 METHOD_LIST_BEGIN
25242519 // list path definitions here;
2525- METHOD_ADD (llamaCPP::chatCompletion , " chat_completion" , Post);
2520+ METHOD_ADD (llamaCPP::inference , " chat_completion" , Post);
25262521 METHOD_ADD (llamaCPP::embedding, " embedding" , Post);
25272522 METHOD_ADD (llamaCPP::loadModel, " loadmodel" , Post);
25282523 METHOD_ADD (llamaCPP::unloadModel, " unloadmodel" , Get);
25292524 METHOD_ADD (llamaCPP::modelStatus, " modelstatus" , Get);
25302525
25312526 // Openai compatible path
2532- ADD_METHOD_TO (llamaCPP::chatCompletion , " /v1/chat/completions" , Post);
2533- ADD_METHOD_TO (llamaCPP::handlePrelight, " /v1/chat/completions" , Options);
2527+ ADD_METHOD_TO (llamaCPP::inference , " /v1/chat/completions" , Post);
2528+ // ADD_METHOD_TO(llamaCPP::handlePrelight, "/v1/chat/completions", Options); NOTE: prelight will be added back when browser support is properly planned
25342529
25352530 ADD_METHOD_TO (llamaCPP::embedding, " /v1/embeddings" , Post);
2536- ADD_METHOD_TO (llamaCPP::handlePrelight, " /v1/embeddings" , Options);
2531+ // ADD_METHOD_TO(llamaCPP::handlePrelight, "/v1/embeddings", Options);
25372532
25382533 // PATH_ADD("/llama/chat_completion", Post);
25392534 METHOD_LIST_END
2540- void chatCompletion (const HttpRequestPtr &req,
2541- std::function<void (const HttpResponsePtr &)> &&callback);
2542- void handlePrelight (const HttpRequestPtr &req,
2543- std::function<void (const HttpResponsePtr &)> &&callback);
2544- void embedding (const HttpRequestPtr &req,
2545- std::function<void (const HttpResponsePtr &)> &&callback);
2546- void loadModel (const HttpRequestPtr &req,
2547- std::function<void (const HttpResponsePtr &)> &&callback);
2548- void unloadModel (const HttpRequestPtr &req,
2549- std::function<void (const HttpResponsePtr &)> &&callback);
2550- void modelStatus (const HttpRequestPtr &req,
2551- std::function<void (const HttpResponsePtr &)> &&callback);
2535+ void
2536+ inference (const HttpRequestPtr &req,
2537+ std::function<void (const HttpResponsePtr &)> &&callback) override ;
2538+ void
2539+ embedding (const HttpRequestPtr &req,
2540+ std::function<void (const HttpResponsePtr &)> &&callback) override ;
2541+ void
2542+ loadModel (const HttpRequestPtr &req,
2543+ std::function<void (const HttpResponsePtr &)> &&callback) override ;
2544+ void
2545+ unloadModel (const HttpRequestPtr &req,
2546+ std::function<void (const HttpResponsePtr &)> &&callback) override ;
2547+ void
2548+ modelStatus (const HttpRequestPtr &req,
2549+ std::function<void (const HttpResponsePtr &)> &&callback) override ;
25522550
25532551private:
25542552 llama_server_context llama;
@@ -2569,8 +2567,7 @@ class llamaCPP : public drogon::HttpController<llamaCPP> {
25692567 std::string grammar_file_content;
25702568
25712569 bool loadModelImpl (std::shared_ptr<Json::Value> jsonBody);
2572- void
2573- chatCompletionImpl (std::shared_ptr<Json::Value> jsonBody,
2570+ void inferenceImpl (std::shared_ptr<Json::Value> jsonBody,
25742571 std::function<void (const HttpResponsePtr &)> &callback);
25752572 void embeddingImpl (std::shared_ptr<Json::Value> jsonBody,
25762573 std::function<void (const HttpResponsePtr &)> &callback);
0 commit comments