Skip to content

Commit 6707274

Browse files
committed
Update prefix cache match to have total and match length
1 parent 5752e58 commit 6707274

File tree

4 files changed

+21
-19
lines changed

4 files changed

+21
-19
lines changed

pkg/epp/datalayer/plugins/data_types.go

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,21 +25,28 @@ const (
2525
)
2626

2727
type PrefixCacheMatchInfo struct {
28-
matchPercentage float64
28+
matchLength int
29+
totalBlocks int
2930
}
3031

31-
func NewPrefixCacheMatchInfo(matchPercentage float64) *PrefixCacheMatchInfo {
32+
func NewPrefixCacheMatchInfo(matchLen int, blockHashLen int) *PrefixCacheMatchInfo {
3233
return &PrefixCacheMatchInfo{
33-
matchPercentage: matchPercentage,
34+
matchLength: matchLen,
35+
totalBlocks: blockHashLen,
3436
}
3537
}
3638

37-
func (p *PrefixCacheMatchInfo) MatchPercentage() float64 {
38-
return p.matchPercentage
39+
func (p *PrefixCacheMatchInfo) MatchLength() int {
40+
return p.matchLength
41+
}
42+
43+
func (p *PrefixCacheMatchInfo) TotalLength() int {
44+
return p.totalBlocks
3945
}
4046

4147
func (p *PrefixCacheMatchInfo) Clone() datalayer.Cloneable {
4248
return &PrefixCacheMatchInfo{
43-
matchPercentage: p.matchPercentage,
49+
matchLength: p.matchLength,
50+
totalBlocks: p.totalBlocks,
4451
}
4552
}

pkg/epp/scheduling/framework/plugins/multi/prefix/plugin.go

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -215,8 +215,8 @@ func (p *Plugin) Consumes() map[string]any {
215215
return map[string]any{}
216216
}
217217

218+
// PrepareRequestData hashes prompt, finds longest prefix match and stores it in pod as attribute.
218219
func (p *Plugin) PrepareRequestData(ctx context.Context, request *types.LLMRequest, pods []types.Pod) error {
219-
// pre score step, hashing prompt and find longest prefix match.
220220
hashes := hashPrompt(ctx, request, getBlockSize(pods, p.config), p.config.MaxPrefixBlocksToMatch)
221221
state := &SchedulingContextState{
222222
PrefixHashes: hashes,
@@ -228,15 +228,10 @@ func (p *Plugin) PrepareRequestData(ctx context.Context, request *types.LLMReque
228228
}
229229

230230
total := len(state.PrefixHashes)
231-
podScoreFunc := func(pod types.Pod) float64 {
232-
if total == 0 {
233-
return 0
234-
}
235-
matchLen := state.PrefixCacheServers[ServerID(pod.GetPod().NamespacedName)]
236-
return float64(matchLen) / float64(total)
237-
}
231+
238232
for _, pod := range pods {
239-
pod.Put(dplugins.PrefixCacheMatchInfoKey, dplugins.NewPrefixCacheMatchInfo(podScoreFunc(pod)))
233+
matchLen := state.PrefixCacheServers[ServerID(pod.GetPod().NamespacedName)]
234+
pod.Put(dplugins.PrefixCacheMatchInfoKey, dplugins.NewPrefixCacheMatchInfo(matchLen, total))
240235
}
241236
return nil
242237
}

pkg/epp/scheduling/framework/plugins/scorer/prefix_cache_match_scorer.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ func (s *PrefixCacheScorer) WithName(name string) *PrefixCacheScorer {
7070
}
7171

7272
func (s *PrefixCacheScorer) Score(_ context.Context, cycleState *types.CycleState, _ *types.LLMRequest, pods []types.Pod) map[types.Pod]float64 {
73-
// calculate the scores of pods
73+
// Calculate the scores of pods based on prefix cache match percent.
7474
scores := make(map[types.Pod]float64, len(pods))
7575

7676
for _, pod := range pods {
@@ -79,7 +79,7 @@ func (s *PrefixCacheScorer) Score(_ context.Context, cycleState *types.CycleStat
7979
scores[pod] = 0.0
8080
continue
8181
}
82-
scores[pod] = matchPercent.(*dplugins.PrefixCacheMatchInfo).MatchPercentage()
82+
scores[pod] = float64(matchPercent.(*dplugins.PrefixCacheMatchInfo).MatchLength()) / float64(matchPercent.(*dplugins.PrefixCacheMatchInfo).TotalLength()) * 100
8383
}
8484
return scores
8585
}

pkg/epp/scheduling/framework/plugins/scorer/prefix_cache_match_scorer_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,10 +70,10 @@ func (p *mockPod) Keys() []string {
7070

7171
func TestPrefixCacheScorer_Score(t *testing.T) {
7272
pod1 := newMockPod()
73-
pod1.Put(dplugins.PrefixCacheMatchInfoKey, dplugins.NewPrefixCacheMatchInfo(50.0))
73+
pod1.Put(dplugins.PrefixCacheMatchInfoKey, dplugins.NewPrefixCacheMatchInfo(5, 10))
7474

7575
pod2 := newMockPod()
76-
pod2.Put(dplugins.PrefixCacheMatchInfoKey, dplugins.NewPrefixCacheMatchInfo(100.0))
76+
pod2.Put(dplugins.PrefixCacheMatchInfoKey, dplugins.NewPrefixCacheMatchInfo(10, 10))
7777

7878
pod3 := newMockPod()
7979

0 commit comments

Comments
 (0)