File tree Expand file tree Collapse file tree 2 files changed +13
-0
lines changed
Expand file tree Collapse file tree 2 files changed +13
-0
lines changed Original file line number Diff line number Diff line change @@ -88,6 +88,8 @@ set(TOKENIZERS_CPP_CARGO_SOURCE_PATH ${TOKENIZERS_CPP_ROOT}/rust)
8888option (MSGPACK_USE_BOOST "Use Boost libraried" OFF )
8989add_subdirectory (msgpack)
9090
91+ option (MLC_ENABLE_SENTENCEPIECE_TOKENIZER "Enable SentencePiece tokenizer" OFF )
92+
9193if (MSVC )
9294 set (TOKENIZERS_RUST_LIB "${TOKENIZERS_CPP_CARGO_BINARY_DIR} /tokenizers_c.lib" )
9395else ()
@@ -120,6 +122,9 @@ add_library(tokenizer_cpp_objs OBJECT ${TOKENIZER_CPP_SRCS})
120122target_include_directories (tokenizer_cpp_objs PRIVATE sentencepiece/src)
121123target_include_directories (tokenizer_cpp_objs PRIVATE msgpack/include )
122124target_include_directories (tokenizer_cpp_objs PUBLIC ${TOKENIZERS_CPP_INCLUDE} )
125+ if (MLC_ENABLE_SENTENCEPIECE_TOKENIZER STREQUAL "ON" )
126+ target_compile_definitions (tokenizer_cpp_objs PUBLIC MLC_ENABLE_SENTENCEPIECE_TOKENIZER)
127+ endif ()
123128target_link_libraries (tokenizer_cpp_objs PRIVATE msgpack-cxx)
124129
125130# sentencepiece config
Original file line number Diff line number Diff line change 1010
1111namespace tokenizers {
1212
13+ #ifdef MLC_ENABLE_SENTENCEPIECE_TOKENIZER
1314class SentencePieceTokenizer : public Tokenizer {
1415 public:
1516 explicit SentencePieceTokenizer (const std::string& model_blob) {
@@ -46,4 +47,11 @@ class SentencePieceTokenizer : public Tokenizer {
4647std::unique_ptr<Tokenizer> Tokenizer::FromBlobSentencePiece (const std::string& model_blob) {
4748 return std::make_unique<SentencePieceTokenizer>(model_blob);
4849}
50+ #else
51+ std::unique_ptr<Tokenizer> Tokenizer::FromBlobSentencePiece (const std::string& model_blob) {
52+ assert (false );
53+ throw ;
54+ }
55+ #endif // MLC_ENABLE_SENTENCEPIECE_TOKENIZER
56+
4957} // namespace tokenizers
You can’t perform that action at this time.
0 commit comments