1+ #include " tokenizers_cpp.h"
2+
3+ // export LoadBlobJsonAndEncode(const std::string &, const std::string &, std::vector<int32_t> &)
4+ extern " C" __declspec(dllexport) void LoadBlobJsonAndEncode (const std::string& json_blob, const std::string& text, std::vector<int32_t >& token_ids)
5+ {
6+ auto tokenizer = tokenizers::Tokenizer::FromBlobJSON (json_blob);
7+ token_ids = tokenizer->Encode (text);
8+ return ;
9+ }
10+
11+ // export LoadBlobJsonAndEncodeBatch(const std::string &, const std::vector<std::string> &, std::vector<std::vector<int32_t>> &)
12+ extern " C" __declspec(dllexport) void LoadBlobJsonAndEncodeBatch (const std::string& json_blob, const std::vector<std::string>& texts, std::vector<std::vector<int32_t >>& token_ids_batch)
13+ {
14+ auto tokenizer = tokenizers::Tokenizer::FromBlobJSON (json_blob);
15+ token_ids_batch = tokenizer->EncodeBatch (texts);
16+ return ;
17+ }
18+
19+ // export LoadBlobSentencePieceAndEncode(const std::string &, const std::string &, std::vector<int32_t> &)
20+ extern " C" __declspec(dllexport) void LoadBlobSentencePieceAndEncode (const std::string& model_blob, const std::string& text, std::vector<int32_t >& token_ids)
21+ {
22+ auto tokenizer = tokenizers::Tokenizer::FromBlobSentencePiece (model_blob);
23+ token_ids = tokenizer->Encode (text);
24+ return ;
25+ }
26+
27+ // export LoadBlobSentencePieceAndEncodeBatch(const std::string &, const std::vector<std::string> &, std::vector<std::vector<int32_t>> &)
28+ extern " C" __declspec(dllexport) void LoadBlobSentencePieceAndEncodeBatch (const std::string& model_blob, const std::vector<std::string>& texts, std::vector<std::vector<int32_t >>& token_ids_batch)
29+ {
30+ auto tokenizer = tokenizers::Tokenizer::FromBlobSentencePiece (model_blob);
31+ token_ids_batch = tokenizer->EncodeBatch (texts);
32+ return ;
33+ }
34+
35+ // export LoadBlobRWKVWorldAndEncode(const std::string &, const std::string &, std::vector<int32_t> &)
36+ extern " C" __declspec(dllexport) void LoadBlobRWKVWorldAndEncode (const std::string& model_blob, const std::string& text, std::vector<int32_t >& token_ids)
37+ {
38+ auto tokenizer = tokenizers::Tokenizer::FromBlobRWKVWorld (model_blob);
39+ token_ids = tokenizer->Encode (text);
40+ return ;
41+ }
42+
43+ // export LoadBlobRWKVWorldAndEncodeBatch(const std::string &, const std::vector<std::string> &, std::vector<std::vector<int32_t>> &)
44+ extern " C" __declspec(dllexport) void LoadBlobRWKVWorldAndEncodeBatch (const std::string& model_blob, const std::vector<std::string>& texts, std::vector<std::vector<int32_t >>& token_ids_batch)
45+ {
46+ auto tokenizer = tokenizers::Tokenizer::FromBlobRWKVWorld (model_blob);
47+ token_ids_batch = tokenizer->EncodeBatch (texts);
48+ return ;
49+ }
50+
51+ // export LoadBlobByteLevelBPEAndEncode(const std::string &, const std::string &, const std::string &, const std::string &, std::vector<int32_t> &)
52+ extern " C" __declspec(dllexport) void LoadBlobByteLevelBPEAndEncode (const std::string& vocab_blob, const std::string& merges_blob, const std::string& added_tokens, const std::string& text, std::vector<int32_t >& token_ids)
53+ {
54+ auto tokenizer = tokenizers::Tokenizer::FromBlobByteLevelBPE (vocab_blob, merges_blob, added_tokens);
55+ token_ids = tokenizer->Encode (text);
56+ return ;
57+ }
58+
59+ // export LoadBlobByteLevelBPEAndEncodeBatch(const std::string &, const std::string &, const std::string &, const std::vector<std::string> &, std::vector<std::vector<int32_t>> &)
60+ extern " C" __declspec(dllexport) void LoadBlobByteLevelBPEAndEncodeBatch (const std::string& vocab_blob, const std::string& merges_blob, const std::string& added_tokens, const std::vector<std::string>& texts, std::vector<std::vector<int32_t >>& token_ids_batch)
61+ {
62+ auto tokenizer = tokenizers::Tokenizer::FromBlobByteLevelBPE (vocab_blob, merges_blob, added_tokens);
63+ token_ids_batch = tokenizer->EncodeBatch (texts);
64+ return ;
65+ }
0 commit comments