From f17baa7d71a6dda04df67b1676fcabed9f35d2c6 Mon Sep 17 00:00:00 2001 From: sssshhhhhh <193317444+sssshhhhhh@users.noreply.github.com> Date: Sat, 31 Jan 2026 22:48:01 +1100 Subject: [PATCH] Assume less about whisper vocab --- src/models/whisper.cc | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/models/whisper.cc b/src/models/whisper.cc index 7b071ace6..8c3612f1e 100644 --- a/src/models/whisper.cc +++ b/src/models/whisper.cc @@ -69,9 +69,12 @@ namespace ctranslate2 { _no_speech_id = vocabulary.to_id("<|nospeech|>"); if (_no_speech_id == vocabulary.unk_id()) _no_speech_id = vocabulary.to_id("<|nocaptions|>"); - _is_multilingual = vocabulary.size() >= 51865; + _is_multilingual = vocabulary.to_id("") != vocabulary.unk_id(); _n_mels = _encoder->input_size(); - _num_languages = vocabulary.size() - 51765 - (_is_multilingual ? 1 : 0); + // vocab: text tokens..., <|endoftext|>, <|startoftranscript|>, + // lang tokens..., <|translate|>, <|transcribe|>, <|startoflm|>, + // <|startofprev|>, <|nospeech|>, <|notimestamps|>, time tokens... + _num_languages = _no_speech_id - _sot_id - 5; } StorageView WhisperReplica::encode(StorageView features, const bool to_cpu) {