From 1ab4db358431066d0df3704785518f30324a6635 Mon Sep 17 00:00:00 2001 From: Achyut Krishna Byanjankar Date: Sat, 18 Apr 2026 18:29:05 -0700 Subject: [PATCH] whisper : validate vocab size and per-token length when loading model whisper_model_load reads n_vocab (int32) and per-token length (uint32) directly from the model file with no bounds check. A malformed or fuzzed model (e.g. an 8-byte AFL++ finding) can set these to values that cause std::vector::resize to throw bad_alloc, which is uncaught and terminates the process with SIGABRT (signal 6) before any error is reported. Cap n_vocab at 2^20 tokens (real models top out around 52k) and each per-token length at 2^16 bytes. On violation, log a clear error message and return false so whisper_init_from_file_with_params_no_state can fail gracefully. Fixes #3674 --- src/whisper.cpp | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/src/whisper.cpp b/src/whisper.cpp index 2f356da0f06..f61846951df 100644 --- a/src/whisper.cpp +++ b/src/whisper.cpp @@ -1587,14 +1587,22 @@ static bool whisper_model_load(struct whisper_model_loader * loader, whisper_con // load vocab { + // Upper bounds for values read from untrusted model files. A malformed + // or fuzzed file can otherwise set these to values that cause + // std::vector::resize to throw (bad_alloc) or std::string to terminate + // the process with SIGABRT during vocab construction. + // ref: https://github.com/ggml-org/whisper.cpp/issues/3674 + constexpr int32_t max_n_vocab = 1 << 20; // ~1M tokens (largest real models are ~52k) + constexpr uint32_t max_word_len = 1 << 16; // 64 KiB per vocab token + int32_t n_vocab = 0; read_safe(loader, n_vocab); - //if (n_vocab != model.hparams.n_vocab) { - // WHISPER_LOG_ERROR("%s: invalid model file '%s' (bad vocab size %d != %d)\n", - // __func__, fname.c_str(), n_vocab, model.hparams.n_vocab); - // return false; - //} + if (n_vocab < 0 || n_vocab > max_n_vocab) { + WHISPER_LOG_ERROR("%s: invalid vocab size %d (expected 0..%d); malformed model file\n", + __func__, n_vocab, max_n_vocab); + return false; + } std::string word; std::vector tmp; @@ -1605,6 +1613,12 @@ static bool whisper_model_load(struct whisper_model_loader * loader, whisper_con uint32_t len; read_safe(loader, len); + if (len > max_word_len) { + WHISPER_LOG_ERROR("%s: invalid vocab entry %d length %u (max %u); malformed model file\n", + __func__, i, len, max_word_len); + return false; + } + if (len > 0) { tmp.resize(len); loader->read(loader->context, &tmp[0], tmp.size()); // read to buffer