@@ -132,6 +132,15 @@ std::string create_return_json(const std::string &id, const std::string &model,
132132 return Json::writeString (writer, root);
133133}
134134
135+ llamaCPP::llamaCPP () {
136+ // Some default values for now below
137+ log_disable (); // Disable the log to file feature, reduce bloat for
138+ // target
139+ // system ()
140+ };
141+
142+ llamaCPP::~llamaCPP () { stopBackgroundTask (); }
143+
135144void llamaCPP::warmupModel () {
136145 json pseudo;
137146
@@ -148,29 +157,18 @@ void llamaCPP::warmupModel() {
148157 return ;
149158}
150159
151- void llamaCPP::handlePrelight (
152- const HttpRequestPtr &req,
153- std::function<void (const HttpResponsePtr &)> &&callback) {
154- auto resp = drogon::HttpResponse::newHttpResponse ();
155- resp->setStatusCode (drogon::HttpStatusCode::k200OK);
156- resp->addHeader (" Access-Control-Allow-Origin" , " *" );
157- resp->addHeader (" Access-Control-Allow-Methods" , " POST, OPTIONS" );
158- resp->addHeader (" Access-Control-Allow-Headers" , " *" );
159- callback (resp);
160- }
161-
162- void llamaCPP::chatCompletion (
160+ void llamaCPP::inference (
163161 const HttpRequestPtr &req,
164162 std::function<void (const HttpResponsePtr &)> &&callback) {
165163
166164 const auto &jsonBody = req->getJsonObject ();
167165 // Check if model is loaded
168166 checkModelLoaded (callback);
169167
170- chatCompletionImpl (jsonBody, callback);
168+ inferenceImpl (jsonBody, callback);
171169}
172170
173- void llamaCPP::chatCompletionImpl (
171+ void llamaCPP::inferenceImpl (
174172 std::shared_ptr<Json::Value> jsonBody,
175173 std::function<void (const HttpResponsePtr &)> &callback) {
176174
0 commit comments