@@ -26,8 +26,11 @@ BlockManagerImpl::BlockManagerImpl(const Options& options)
2626 CHECK_GT (options.num_blocks (), 0 ) << " No blocks to allocate" ;
2727 CHECK_GT (options.block_size (), 0 ) << " Block size must be positive" ;
2828 if (options_.enable_prefix_cache ()) {
29- prefix_cache_ = create_prefix_cache (options.block_size (),
30- options.enable_cache_upload ());
29+ PrefixCache::Options prefix_cache_options;
30+ prefix_cache_options.block_size (options.block_size ())
31+ .enable_cache_upload (options.enable_cache_upload ())
32+ .enable_mm_prefix_cache (options.enable_mm_prefix_cache ());
33+ prefix_cache_ = create_prefix_cache (prefix_cache_options);
3134 CHECK (prefix_cache_) << " Failed to create prefix cache!" ;
3235 }
3336
@@ -122,14 +125,15 @@ bool BlockManagerImpl::has_enough_blocks(uint32_t num_blocks) {
122125}
123126
124127std::vector<Block> BlockManagerImpl::allocate_shared (
128+ Sequence* sequence,
125129 const Slice<int32_t >& tokens_ids,
126130 const Slice<Block>& existed_shared_blocks) {
127131 // only allocate shared blocks for prefill sequences
128132 if (options_.enable_prefix_cache ()) {
129133 AUTO_COUNTER (prefix_cache_latency_seconds_match);
130134
131135 std::vector<Block> shared_blocks =
132- prefix_cache_->match (tokens_ids, existed_shared_blocks);
136+ prefix_cache_->match (sequence, tokens_ids, existed_shared_blocks);
133137
134138 const size_t prefix_length =
135139 shared_blocks.empty () ? 0
@@ -148,13 +152,17 @@ std::vector<Block> BlockManagerImpl::allocate_shared(
148152 return {};
149153}
150154
151- void BlockManagerImpl::cache (const Slice<int32_t >& token_ids,
155+ void BlockManagerImpl::cache (Sequence* sequence,
156+ const Slice<int32_t >& token_ids,
152157 std::vector<Block>& blocks,
153158 size_t existed_shared_blocks_num) {
154159 if (options_.enable_prefix_cache ()) {
155160 AUTO_COUNTER (prefix_cache_latency_seconds_insert);
156161 // Add the kv cache to the prefix cache
157- prefix_cache_->insert (token_ids, blocks, existed_shared_blocks_num);
162+ prefix_cache_->insert (sequence,
163+ token_ids,
164+ blocks,
165+ existed_shared_blocks_num);
158166 }
159167}
160168
0 commit comments