File tree Expand file tree Collapse file tree 3 files changed +19
-2
lines changed
Expand file tree Collapse file tree 3 files changed +19
-2
lines changed Original file line number Diff line number Diff line change @@ -15,7 +15,11 @@ namespace tokenizers {
1515 */
1616class HFTokenizer : public Tokenizer {
1717 public:
18- explicit HFTokenizer (TokenizerHandle handle) : handle_(handle) {}
18+ explicit HFTokenizer (TokenizerHandle handle) : handle_(handle) {
19+ #ifdef COMPILE_WASM_RUNTIME
20+ setenv (" TOKENIZERS_PARALLELISM" , " false" , true );
21+ #endif
22+ }
1923
2024 HFTokenizer (const HFTokenizer&) = delete ;
2125 HFTokenizer (HFTokenizer&& other) { std::swap (other.handle_ , handle_); }
Original file line number Diff line number Diff line change @@ -5,7 +5,7 @@ rustup target add wasm32-unknown-emscripten
55
66mkdir -p build
77cd build
8- emcmake cmake ../.. -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS=" -O3"
8+ emcmake cmake ../.. -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS=" -O3 -DCOMPILE_WASM_RUNTIME "
99emmake make tokenizers_cpp tokenizers_c sentencepiece-static -j8
1010cd ..
1111
Original file line number Diff line number Diff line change @@ -48,8 +48,21 @@ async function testLlamaTokenizer() {
4848 }
4949}
5050
51+ // Without COMPILE_WASM_RUNTIME, this triggers parallel processing, leading to error
52+ async function testBertTokenizer ( ) {
53+ console . log ( "Bert Tokenizer" ) ;
54+ const modelBuffer = await ( await
55+ fetch ( "https://huggingface.co/Snowflake/snowflake-arctic-embed-l/raw/main/tokenizer.json" )
56+ ) . arrayBuffer ( ) ;
57+ const tok = await Tokenizer . fromJSON ( modelBuffer ) ;
58+ const text = "What is the capital of Canada?" ;
59+ const ids = tok . encode ( text ) ;
60+ console . log ( ids ) ;
61+ }
62+
5163async function main ( ) {
5264 await testJSONTokenizer ( )
65+ await testBertTokenizer ( ) ;
5366 await testLlamaTokenizer ( )
5467}
5568
You can’t perform that action at this time.
0 commit comments