1414namespace hu = huggingface_utils;
1515
1616namespace {
17+ constexpr const int kModeSourceCacheSecs = 600 ;
18+
19+ std::string GenSourceId (const std::string& author_hub,
20+ const std::string& model_name) {
21+ return author_hub + " /" + model_name;
22+ }
23+
1724std::vector<ModelInfo> ParseJsonString (const std::string& json_str) {
1825 std::vector<ModelInfo> models;
1926
@@ -79,19 +86,34 @@ cpp::result<bool, std::string> ModelSourceService::AddModelSource(
7986 }
8087
8188 if (auto is_org = r.pathParams .size () == 1 ; is_org) {
82- auto & author = r.pathParams [0 ];
83- if (author == " cortexso" ) {
84- return AddCortexsoOrg (model_source);
85- } else {
86- return AddHfOrg (model_source, author);
87- }
89+ return cpp::fail (" Only support repository model source, url: " +
90+ model_source);
91+ // TODO(sang)
92+ // auto& hub_author = r.pathParams[0];
93+ // if (hub_author == "cortexso") {
94+ // return AddCortexsoOrg(model_source);
95+ // } else {
96+ // return AddHfOrg(model_source, hub_author);
97+ // }
8898 } else { // Repo
89- auto const & author = r.pathParams [0 ];
99+ auto const & hub_author = r.pathParams [0 ];
90100 auto const & model_name = r.pathParams [1 ];
101+ // Return cache value
102+ if (auto key = GenSourceId (hub_author, model_name);
103+ src_cache_.find (key) != src_cache_.end ()) {
104+ auto now = std::chrono::system_clock::now ();
105+ if (std::chrono::duration_cast<std::chrono::seconds>(now -
106+ src_cache_.at (key))
107+ .count () < kModeSourceCacheSecs ) {
108+ CTL_DBG (" Return cache value for model source: " << model_source);
109+ return true ;
110+ }
111+ }
112+
91113 if (r.pathParams [0 ] == " cortexso" ) {
92- return AddCortexsoRepo (model_source, author , model_name);
114+ return AddCortexsoRepo (model_source, hub_author , model_name);
93115 } else {
94- return AddHfRepo (model_source, author , model_name);
116+ return AddHfRepo (model_source, hub_author , model_name);
95117 }
96118 }
97119 }
@@ -190,9 +212,9 @@ cpp::result<ModelSource, std::string> ModelSourceService::GetModelSource(
190212}
191213
192214cpp::result<std::vector<std::string>, std::string>
193- ModelSourceService::GetRepositoryList (std::string_view author ,
215+ ModelSourceService::GetRepositoryList (std::string_view hub_author ,
194216 std::string_view tag_filter) {
195- std::string as (author );
217+ std::string as (hub_author );
196218 auto get_repo_list = [this , &as, &tag_filter] {
197219 std::vector<std::string> repo_list;
198220 auto const & mis = cortexso_repos_.at (as);
@@ -227,9 +249,9 @@ ModelSourceService::GetRepositoryList(std::string_view author,
227249}
228250
229251cpp::result<bool , std::string> ModelSourceService::AddHfOrg (
230- const std::string& model_source, const std::string& author ) {
252+ const std::string& model_source, const std::string& hub_author ) {
231253 auto res = curl_utils::SimpleGet (" https://huggingface.co/api/models?author=" +
232- author );
254+ hub_author );
233255 if (res.has_value ()) {
234256 auto models = ParseJsonString (res.value ());
235257 // Add new models
@@ -238,9 +260,10 @@ cpp::result<bool, std::string> ModelSourceService::AddHfOrg(
238260
239261 auto author_model = string_utils::SplitBy (m.id , " /" );
240262 if (author_model.size () == 2 ) {
241- auto const & author = author_model[0 ];
263+ auto const & hub_author = author_model[0 ];
242264 auto const & model_name = author_model[1 ];
243- auto r = AddHfRepo (model_source + " /" + model_name, author, model_name);
265+ auto r =
266+ AddHfRepo (model_source + " /" + model_name, hub_author, model_name);
244267 if (r.has_error ()) {
245268 CTL_WRN (r.error ());
246269 }
@@ -253,14 +276,14 @@ cpp::result<bool, std::string> ModelSourceService::AddHfOrg(
253276}
254277
255278cpp::result<bool , std::string> ModelSourceService::AddHfRepo (
256- const std::string& model_source, const std::string& author ,
279+ const std::string& model_source, const std::string& hub_author ,
257280 const std::string& model_name) {
258281 // Get models from db
259282
260283 auto model_list_before = db_service_->GetModels (model_source)
261284 .value_or (std::vector<cortex::db::ModelEntry>{});
262285 std::unordered_set<std::string> updated_model_list;
263- auto add_res = AddRepoSiblings (model_source, author , model_name);
286+ auto add_res = AddRepoSiblings (model_source, hub_author , model_name);
264287 if (add_res.has_error ()) {
265288 return cpp::fail (add_res.error ());
266289 } else {
@@ -274,15 +297,17 @@ cpp::result<bool, std::string> ModelSourceService::AddHfRepo(
274297 }
275298 }
276299 }
300+ src_cache_[GenSourceId (hub_author, model_name)] =
301+ std::chrono::system_clock::now ();
277302 return true ;
278303}
279304
280305cpp::result<std::unordered_set<std::string>, std::string>
281306ModelSourceService::AddRepoSiblings (const std::string& model_source,
282- const std::string& author ,
307+ const std::string& hub_author ,
283308 const std::string& model_name) {
284309 std::unordered_set<std::string> res;
285- auto repo_info = hu::GetHuggingFaceModelRepoInfo (author , model_name);
310+ auto repo_info = hu::GetHuggingFaceModelRepoInfo (hub_author , model_name);
286311 if (repo_info.has_error ()) {
287312 return cpp::fail (repo_info.error ());
288313 }
@@ -293,14 +318,14 @@ ModelSourceService::AddRepoSiblings(const std::string& model_source,
293318 " supported." );
294319 }
295320
296- auto siblings_fs = hu::GetSiblingsFileSize (author , model_name);
321+ auto siblings_fs = hu::GetSiblingsFileSize (hub_author , model_name);
297322
298323 if (siblings_fs.has_error ()) {
299- return cpp::fail (" Could not get siblings file size: " + author + " / " +
300- model_name);
324+ return cpp::fail (" Could not get siblings file size: " +
325+ GenSourceId (hub_author, model_name) );
301326 }
302327
303- auto readme = hu::GetReadMe (author , model_name);
328+ auto readme = hu::GetReadMe (hub_author , model_name);
304329 std::string desc;
305330 if (!readme.has_error ()) {
306331 desc = readme.value ();
@@ -326,10 +351,10 @@ ModelSourceService::AddRepoSiblings(const std::string& model_source,
326351 siblings_fs_v.file_sizes .at (sibling.rfilename ).size_in_bytes ;
327352 }
328353 std::string model_id =
329- author + " :" + model_name + " :" + sibling.rfilename ;
354+ hub_author + " :" + model_name + " :" + sibling.rfilename ;
330355 cortex::db::ModelEntry e = {
331356 .model = model_id,
332- .author_repo_id = author ,
357+ .author_repo_id = hub_author ,
333358 .branch_name = " main" ,
334359 .path_to_model_yaml = " " ,
335360 .model_alias = " " ,
@@ -369,9 +394,9 @@ cpp::result<bool, std::string> ModelSourceService::AddCortexsoOrg(
369394 CTL_INF (m.id );
370395 auto author_model = string_utils::SplitBy (m.id , " /" );
371396 if (author_model.size () == 2 ) {
372- auto const & author = author_model[0 ];
397+ auto const & hub_author = author_model[0 ];
373398 auto const & model_name = author_model[1 ];
374- auto r = AddCortexsoRepo (model_source + " /" + model_name, author ,
399+ auto r = AddCortexsoRepo (model_source + " /" + model_name, hub_author ,
375400 model_name);
376401 if (r.has_error ()) {
377402 CTL_WRN (r.error ());
@@ -386,7 +411,7 @@ cpp::result<bool, std::string> ModelSourceService::AddCortexsoOrg(
386411}
387412
388413cpp::result<bool , std::string> ModelSourceService::AddCortexsoRepo (
389- const std::string& model_source, const std::string& author ,
414+ const std::string& model_source, const std::string& hub_author ,
390415 const std::string& model_name) {
391416 auto begin = std::chrono::system_clock::now ();
392417 auto branches =
@@ -395,17 +420,23 @@ cpp::result<bool, std::string> ModelSourceService::AddCortexsoRepo(
395420 return cpp::fail (branches.error ());
396421 }
397422
398- auto repo_info = hu::GetHuggingFaceModelRepoInfo (author , model_name);
423+ auto repo_info = hu::GetHuggingFaceModelRepoInfo (hub_author , model_name);
399424 if (repo_info.has_error ()) {
400425 return cpp::fail (repo_info.error ());
401426 }
402427
403- auto readme = hu::GetReadMe (author , model_name);
428+ auto readme = hu::GetReadMe (hub_author , model_name);
404429 std::string desc;
405430 if (!readme.has_error ()) {
406431 desc = readme.value ();
407432 }
408433
434+ auto author = hub_author;
435+ if (auto model_author = hu::GetModelAuthorCortexsoHub (model_name);
436+ model_author.has_value () && !model_author->empty ()) {
437+ author = *model_author;
438+ }
439+
409440 // Get models from db
410441 auto model_list_before = db_service_->GetModels (model_source)
411442 .value_or (std::vector<cortex::db::ModelEntry>{});
@@ -442,6 +473,8 @@ cpp::result<bool, std::string> ModelSourceService::AddCortexsoRepo(
442473 " Duration ms: " << std::chrono::duration_cast<std::chrono::milliseconds>(
443474 end - begin)
444475 .count ());
476+ src_cache_[GenSourceId (hub_author, model_name)] =
477+ std::chrono::system_clock::now ();
445478 return true ;
446479}
447480
0 commit comments