fix: add metadata

sangjanai · sangjanai · commit 099fe586b19e · 2025-01-20T17:38:47.000+07:00
diff --git a/engine/controllers/models.cc b/engine/controllers/models.cc
@@ -810,11 +810,11 @@ void Models::GetModelSources(
     resp->setStatusCode(k400BadRequest);
     callback(resp);
   } else {
-    auto const& info = res.value();
+    auto& info = res.value();
     Json::Value ret;
     Json::Value data(Json::arrayValue);
-    for (auto const& i : info) {
-      data.append(i);
+    for (auto& i : info) {
+      data.append(i.second.ToJson());
     }
     ret["data"] = data;
     auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret);
diff --git a/engine/database/models.cc b/engine/database/models.cc
@@ -304,4 +304,34 @@ cpp::result<std::vector<std::string>, std::string> Models::GetModels(
   }
 }
 
+cpp::result<std::vector<ModelEntry>, std::string>
+Models::GetDownloadableModels() const {
+  try {
+    std::vector<ModelEntry> res;
+    SQLite::Statement query(
+        db_,
+        "SELECT model_id, author_repo_id, branch_name, "
+        "path_to_model_yaml, model_alias, model_format, "
+        "model_source, status, engine, metadata FROM models "
+        "WHERE status = \"downloadable\"");
+    while (query.executeStep()) {
+      ModelEntry entry;
+      entry.model = query.getColumn(0).getString();
+      entry.author_repo_id = query.getColumn(1).getString();
+      entry.branch_name = query.getColumn(2).getString();
+      entry.path_to_model_yaml = query.getColumn(3).getString();
+      entry.model_alias = query.getColumn(4).getString();
+      entry.model_format = query.getColumn(5).getString();
+      entry.model_source = query.getColumn(6).getString();
+      entry.status = StringToStatus(query.getColumn(7).getString());
+      entry.engine = query.getColumn(8).getString();
+      entry.metadata = query.getColumn(9).getString();
+      res.push_back(entry);
+    }
+    return res;
+  } catch (const std::exception& e) {
+    return cpp::fail(e.what());
+  }
+}
+
 }  // namespace cortex::db
diff --git a/engine/database/models.h b/engine/database/models.h
@@ -10,7 +10,6 @@ namespace cortex::db {
 
 enum class ModelStatus { Remote, Downloaded, Downloadable };
 
-
 struct ModelEntry {
   std::string model;
   std::string author_repo_id;
@@ -60,6 +59,7 @@ class Models {
   cpp::result<std::vector<std::string>, std::string> GetModelSources() const;
   cpp::result<std::vector<std::string>, std::string> GetModels(
       const std::string& model_src) const;
+  cpp::result<std::vector<ModelEntry>, std::string> GetDownloadableModels() const;
 };
 
 }  // namespace cortex::db
diff --git a/engine/services/database_service.cc b/engine/services/database_service.cc
@@ -127,4 +127,9 @@ cpp::result<std::vector<std::string>, std::string> DatabaseService::GetModels(
     const std::string& model_src) const {
   return cortex::db::Models().GetModels(model_src);
 }
+
+cpp::result<std::vector<ModelEntry>, std::string>
+DatabaseService::GetDownloadableModels() const {
+  return cortex::db::Models().GetDownloadableModels();
+}
 // end models
diff --git a/engine/services/database_service.h b/engine/services/database_service.h
@@ -63,6 +63,8 @@ class DatabaseService {
   cpp::result<std::vector<std::string>, std::string> GetModelSources() const;
   cpp::result<std::vector<std::string>, std::string> GetModels(
       const std::string& model_src) const;
+  cpp::result<std::vector<ModelEntry>, std::string> GetDownloadableModels()
+      const;
 
  private:
 };
diff --git a/engine/services/model_source_service.cc b/engine/services/model_source_service.cc
@@ -144,9 +144,25 @@ cpp::result<bool, std::string> ModelSourceService::RemoveModelSource(
   return true;
 }
 
-cpp::result<std::vector<std::string>, std::string>
+cpp::result<std::unordered_map<std::string, ModelSource>, std::string>
 ModelSourceService::GetModelSources() {
-  return db_service_->GetModelSources();
+  auto res = db_service_->GetDownloadableModels();
+  if (res.has_error()) {
+    return cpp::fail(res.error());
+  }
+  auto& models = res.value();
+  std::unordered_map<std::string, ModelSource> ms;
+  for (auto const& m : models) {
+    auto meta_json = json_helper::ParseJsonString(m.metadata);
+    ms[m.model_source].models.push_back({m.model, meta_json["size"].asUInt64()});
+    meta_json.removeMember("size");
+    if (ms[m.model_source].metadata.empty()) {
+      ms[m.model_source].metadata = json_helper::DumpJsonString(meta_json);
+    }
+    ms[m.model_source].id = m.model_source;
+    LOG_INFO << m.model;
+  }
+  return ms;
 }
 
 cpp::result<bool, std::string> ModelSourceService::AddHfOrg(
@@ -155,32 +171,17 @@ cpp::result<bool, std::string> ModelSourceService::AddHfOrg(
                                    author);
   if (res.has_value()) {
     auto models = ParseJsonString(res.value());
-    // Get models from db
-
-    auto model_list_before = db_service_->GetModels(model_source)
-                                 .value_or(std::vector<std::string>{});
-    std::unordered_set<std::string> updated_model_list;
     // Add new models
     for (auto const& m : models) {
       CTL_DBG(m.id);
+
       auto author_model = string_utils::SplitBy(m.id, "/");
       if (author_model.size() == 2) {
         auto const& author = author_model[0];
         auto const& model_name = author_model[1];
-        auto add_res = AddRepoSiblings(model_source, author, model_name)
-                           .value_or(std::unordered_set<std::string>{});
-        for (auto const& a : add_res) {
-          updated_model_list.insert(a);
-        }
-      }
-    }
-
-    // Clean up
-    for (auto const& mid : model_list_before) {
-      if (updated_model_list.find(mid) == updated_model_list.end()) {
-        if (auto del_res = db_service_->DeleteModelEntry(mid);
-            del_res.has_error()) {
-          CTL_INF(del_res.error());
+        auto r = AddHfRepo(model_source + "/" + model_name, author, model_name);
+        if (r.has_error()) {
+          CTL_WRN(r.error());
         }
       }
     }
@@ -231,8 +232,32 @@ ModelSourceService::AddRepoSiblings(const std::string& model_source,
         "supported.");
   }
 
+  auto siblings_fs = hu::GetSiblingsFileSize(author, model_name);
+
+  if (siblings_fs.has_error()) {
+    return cpp::fail("Could not get siblings file size: " + author + "/" +
+                     model_name);
+  }
+
+  auto readme = hu::GetReadMe(author, model_name);
+  std::string desc;
+  if (!readme.has_error()) {
+    desc = readme.value();
+  }
+
+  auto meta_json = json_helper::ParseJsonString(repo_info->metadata);
+  for (auto& m : meta_json["siblings"]) {
+    m["size"] = siblings_fs.value()
+                    .file_sizes.at(m["rfilename"].asString())
+                    .size_in_bytes;
+  }
+  meta_json["description"] = desc;
+  LOG_DEBUG << meta_json.toStyledString();
+
   for (const auto& sibling : repo_info->siblings) {
     if (string_utils::EndsWith(sibling.rfilename, ".gguf")) {
+      meta_json["size"] =
+          siblings_fs.value().file_sizes.at(sibling.rfilename).size_in_bytes;
       std::string model_id =
           author + ":" + model_name + ":" + sibling.rfilename;
       cortex::db::ModelEntry e = {
@@ -245,7 +270,7 @@ ModelSourceService::AddRepoSiblings(const std::string& model_source,
           .model_source = model_source,
           .status = cortex::db::ModelStatus::Downloadable,
           .engine = "llama-cpp",
-          .metadata = repo_info->metadata};
+          .metadata = json_helper::DumpJsonString(meta_json)};
       if (!db_service_->HasModel(model_id)) {
         if (auto add_res = db_service_->AddModelEntry(e); add_res.has_error()) {
           CTL_INF(add_res.error());
@@ -273,46 +298,16 @@ cpp::result<bool, std::string> ModelSourceService::AddCortexsoOrg(
       "https://huggingface.co/api/models?author=cortexso");
   if (res.has_value()) {
     auto models = ParseJsonString(res.value());
-    // Get models from db
-
-    auto model_list_before = db_service_->GetModels(model_source)
-                                 .value_or(std::vector<std::string>{});
-    std::unordered_set<std::string> updated_model_list;
     for (auto const& m : models) {
       CTL_INF(m.id);
       auto author_model = string_utils::SplitBy(m.id, "/");
       if (author_model.size() == 2) {
         auto const& author = author_model[0];
         auto const& model_name = author_model[1];
-        auto branches = huggingface_utils::GetModelRepositoryBranches(
-            "cortexso", model_name);
-        if (branches.has_error()) {
-          CTL_INF(branches.error());
-          continue;
-        }
-
-        auto repo_info = hu::GetHuggingFaceModelRepoInfo(author, model_name);
-        if (repo_info.has_error()) {
-          CTL_INF(repo_info.error());
-          continue;
-        }
-        for (auto const& [branch, _] : branches.value()) {
-          CTL_INF(branch);
-          auto add_res = AddCortexsoRepoBranch(model_source, author, model_name,
-                                               branch, repo_info->metadata)
-                             .value_or(std::unordered_set<std::string>{});
-          for (auto const& a : add_res) {
-            updated_model_list.insert(a);
-          }
-        }
-      }
-    }
-    // Clean up
-    for (auto const& mid : model_list_before) {
-      if (updated_model_list.find(mid) == updated_model_list.end()) {
-        if (auto del_res = db_service_->DeleteModelEntry(mid);
-            del_res.has_error()) {
-          CTL_INF(del_res.error());
+        auto r = AddCortexsoRepo(model_source + "/" + model_name, author,
+                                 model_name);
+        if (r.has_error()) {
+          CTL_WRN(r.error());
         }
       }
     }
@@ -384,27 +379,32 @@ ModelSourceService::AddCortexsoRepoBranch(const std::string& model_source,
   }
 
   bool has_gguf = false;
+  uint64_t model_size = 0;
   for (const auto& value : result.value()) {
     auto path = value["path"].asString();
     if (path.find(".gguf") != std::string::npos) {
       has_gguf = true;
+      model_size = value["size"].asUInt64();
     }
   }
   if (!has_gguf) {
     CTL_INF("Only support gguf file format! - branch: " << branch);
     return {};
   } else {
+    auto meta_json = json_helper::ParseJsonString(metadata);
+    meta_json["size"] = model_size;
     std::string model_id = model_name + ":" + branch;
-    cortex::db::ModelEntry e = {.model = model_id,
-                                .author_repo_id = author,
-                                .branch_name = branch,
-                                .path_to_model_yaml = "",
-                                .model_alias = "",
-                                .model_format = "cortexso",
-                                .model_source = model_source,
-                                .status = cortex::db::ModelStatus::Downloadable,
-                                .engine = "llama-cpp",
-                                .metadata = metadata};
+    cortex::db::ModelEntry e = {
+        .model = model_id,
+        .author_repo_id = author,
+        .branch_name = branch,
+        .path_to_model_yaml = "",
+        .model_alias = "",
+        .model_format = "cortexso",
+        .model_source = model_source,
+        .status = cortex::db::ModelStatus::Downloadable,
+        .engine = "llama-cpp",
+        .metadata = json_helper::DumpJsonString(meta_json)};
     if (!db_service_->HasModel(model_id)) {
       CTL_INF("Adding model to db: " << model_name << ":" << branch);
       if (auto res = db_service_->AddModelEntry(e);
diff --git a/engine/services/model_source_service.h b/engine/services/model_source_service.h
@@ -5,6 +5,35 @@
 #include "services/database_service.h"
 #include "utils/result.hpp"
 
+struct ModelSourceInfo {
+  std::string id;
+  uint64_t size;
+  Json::Value ToJson() const {
+    Json::Value root;
+    root["id"] = id;
+    root["size"] = size;
+    return root;
+  }
+};
+
+struct ModelSource {
+  std::string id;
+  std::vector<ModelSourceInfo> models;
+  std::string metadata;
+
+  Json::Value ToJson() {
+    Json::Value root;
+    root["id"] = id;
+    Json::Value models_json;
+    for (auto const& m : models) {
+      models_json.append(m.ToJson());
+    }
+    root["models"] = models_json;
+    root["metadata"] = metadata;
+    return root;
+  };
+};
+
 class ModelSourceService {
  public:
   explicit ModelSourceService(std::shared_ptr<DatabaseService> db_service);
@@ -16,7 +45,8 @@ class ModelSourceService {
   cpp::result<bool, std::string> RemoveModelSource(
       const std::string& model_source);
 
-  cpp::result<std::vector<std::string>, std::string> GetModelSources();
+  cpp::result<std::unordered_map<std::string, ModelSource>, std::string>
+  GetModelSources();
 
  private:
   cpp::result<bool, std::string> AddHfOrg(const std::string& model_source,
diff --git a/engine/utils/huggingface_utils.h b/engine/utils/huggingface_utils.h