Skip to content

Commit c6d0f8f

Browse files
author
PARTH SHAH
committed
Add support for generating SHARED library with -DBUILD_SHARED_LIBS=ON flag
1 parent aae1209 commit c6d0f8f

File tree

2 files changed

+73
-1
lines changed

2 files changed

+73
-1
lines changed

CMakeLists.txt

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
cmake_minimum_required(VERSION 3.19...3.30)
22
project(tokenizers_cpp C CXX)
33

4+
option(BUILD_SHARED_LIBS "Build tokenizers_cpp as a shared library" OFF)
5+
46
set(CMAKE_CXX_STANDARD 17)
57
set(CMAKE_CXX_STANDARD_REQUIRED ON)
68
set(CMAKE_CXX_EXTENSIONS OFF)
@@ -157,7 +159,12 @@ set(
157159
src/huggingface_tokenizer.cc
158160
src/rwkv_world_tokenizer.cc
159161
)
160-
add_library(tokenizers_cpp STATIC ${TOKENIZER_CPP_SRCS})
162+
163+
if(BUILD_SHARED_LIBS)
164+
add_library(tokenizers_cpp SHARED ${TOKENIZER_CPP_SRCS} src/static_exports.cc)
165+
else()
166+
add_library(tokenizers_cpp STATIC ${TOKENIZER_CPP_SRCS})
167+
endif()
161168
target_include_directories(tokenizers_cpp PRIVATE sentencepiece/src)
162169
target_include_directories(tokenizers_cpp PRIVATE msgpack/include)
163170
target_include_directories(tokenizers_cpp PUBLIC ${TOKENIZERS_CPP_INCLUDE})

src/static_exports.cc

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
#include "tokenizers_cpp.h"
2+
3+
// export LoadBlobJsonAndEncode(const std::string &, const std::string &, std::vector<int32_t> &)
4+
extern "C" __declspec(dllexport) void LoadBlobJsonAndEncode(const std::string& json_blob, const std::string& text, std::vector<int32_t>& token_ids)
5+
{
6+
auto tokenizer = tokenizers::Tokenizer::FromBlobJSON(json_blob);
7+
token_ids = tokenizer->Encode(text);
8+
return;
9+
}
10+
11+
// export LoadBlobJsonAndEncodeBatch(const std::string &, const std::vector<std::string> &, std::vector<std::vector<int32_t>> &)
12+
extern "C" __declspec(dllexport) void LoadBlobJsonAndEncodeBatch(const std::string& json_blob, const std::vector<std::string>& texts, std::vector<std::vector<int32_t>>& token_ids_batch)
13+
{
14+
auto tokenizer = tokenizers::Tokenizer::FromBlobJSON(json_blob);
15+
token_ids_batch = tokenizer->EncodeBatch(texts);
16+
return;
17+
}
18+
19+
// export LoadBlobSentencePieceAndEncode(const std::string &, const std::string &, std::vector<int32_t> &)
20+
extern "C" __declspec(dllexport) void LoadBlobSentencePieceAndEncode(const std::string& model_blob, const std::string& text, std::vector<int32_t>& token_ids)
21+
{
22+
auto tokenizer = tokenizers::Tokenizer::FromBlobSentencePiece(model_blob);
23+
token_ids = tokenizer->Encode(text);
24+
return;
25+
}
26+
27+
// export LoadBlobSentencePieceAndEncodeBatch(const std::string &, const std::vector<std::string> &, std::vector<std::vector<int32_t>> &)
28+
extern "C" __declspec(dllexport) void LoadBlobSentencePieceAndEncodeBatch(const std::string& model_blob, const std::vector<std::string>& texts, std::vector<std::vector<int32_t>>& token_ids_batch)
29+
{
30+
auto tokenizer = tokenizers::Tokenizer::FromBlobSentencePiece(model_blob);
31+
token_ids_batch = tokenizer->EncodeBatch(texts);
32+
return;
33+
}
34+
35+
// export LoadBlobRWKVWorldAndEncode(const std::string &, const std::string &, std::vector<int32_t> &)
36+
extern "C" __declspec(dllexport) void LoadBlobRWKVWorldAndEncode(const std::string& model_blob, const std::string& text, std::vector<int32_t>& token_ids)
37+
{
38+
auto tokenizer = tokenizers::Tokenizer::FromBlobRWKVWorld(model_blob);
39+
token_ids = tokenizer->Encode(text);
40+
return;
41+
}
42+
43+
// export LoadBlobRWKVWorldAndEncodeBatch(const std::string &, const std::vector<std::string> &, std::vector<std::vector<int32_t>> &)
44+
extern "C" __declspec(dllexport) void LoadBlobRWKVWorldAndEncodeBatch(const std::string& model_blob, const std::vector<std::string>& texts, std::vector<std::vector<int32_t>>& token_ids_batch)
45+
{
46+
auto tokenizer = tokenizers::Tokenizer::FromBlobRWKVWorld(model_blob);
47+
token_ids_batch = tokenizer->EncodeBatch(texts);
48+
return;
49+
}
50+
51+
// export LoadBlobByteLevelBPEAndEncode(const std::string &, const std::string &, const std::string &, const std::string &, std::vector<int32_t> &)
52+
extern "C" __declspec(dllexport) void LoadBlobByteLevelBPEAndEncode(const std::string& vocab_blob, const std::string& merges_blob, const std::string& added_tokens, const std::string& text, std::vector<int32_t>& token_ids)
53+
{
54+
auto tokenizer = tokenizers::Tokenizer::FromBlobByteLevelBPE(vocab_blob, merges_blob, added_tokens);
55+
token_ids = tokenizer->Encode(text);
56+
return;
57+
}
58+
59+
// export LoadBlobByteLevelBPEAndEncodeBatch(const std::string &, const std::string &, const std::string &, const std::vector<std::string> &, std::vector<std::vector<int32_t>> &)
60+
extern "C" __declspec(dllexport) void LoadBlobByteLevelBPEAndEncodeBatch(const std::string& vocab_blob, const std::string& merges_blob, const std::string& added_tokens, const std::vector<std::string>& texts, std::vector<std::vector<int32_t>>& token_ids_batch)
61+
{
62+
auto tokenizer = tokenizers::Tokenizer::FromBlobByteLevelBPE(vocab_blob, merges_blob, added_tokens);
63+
token_ids_batch = tokenizer->EncodeBatch(texts);
64+
return;
65+
}

0 commit comments

Comments
 (0)