CacheForge/models.py at main · dorodb-web22/CacheForge · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

"""
Data models for the CacheForge Environment.

CacheForge simulates a production multi-tier cache system (L1 / L2 / L3)
where an RL agent optimises caching strategies by tuning TTLs, resizing
tiers, choosing eviction policies, and shifting data between tiers.
"""

from typing import Dict, Literal

from openenv.core.env_server.types import Action, Observation
from pydantic import Field


# ---------------------------------------------------------------------------
# Observation
# ---------------------------------------------------------------------------

class CacheforgeObservation(Observation):
    """
    Observation emitted after every step.

    Exposes the key performance indicators an agent needs to reason about
    cache health:
        - hit / miss rates across the whole hierarchy
        - average request latency (weighted by tier)
        - total memory usage as a fraction of capacity
        - current request throughput
        - ratio of accesses that target the "hot" key set
        - per-tier utilisation breakdown
    """

    hit_rate: float = Field(
        default=0.0,
        ge=0.0,
        le=1.0,
        description="Cache hit rate across all tiers (0.0 – 1.0)",
    )
    miss_rate: float = Field(
        default=1.0,
        ge=0.0,
        le=1.0,
        description="Cache miss rate (1 - hit_rate)",
    )
    avg_latency: float = Field(
        default=0.0,
        ge=0.0,
        description="Average request latency in milliseconds",
    )
    memory_usage: float = Field(
        default=0.0,
        ge=0.0,
        description="Total memory usage as a fraction of max capacity (0.0 – 1.0+)",
    )
    request_rate: int = Field(
        default=0,
        ge=0,
        description="Number of requests processed in this step",
    )
    hot_keys_ratio: float = Field(
        default=0.0,
        ge=0.0,
        le=1.0,
        description="Fraction of requests that targeted hot keys",
    )
    cache_distribution: Dict[str, float] = Field(
        default_factory=lambda: {"L1": 0.0, "L2": 0.0, "L3": 0.0},
        description="Per-tier utilisation (items stored / tier capacity)",
    )


# ---------------------------------------------------------------------------
# Action
# ---------------------------------------------------------------------------

class CacheforgeAction(Action):
    """
    Action the agent submits each step.

    The four knobs mirror real-world cache configuration surfaces:
        adjust_ttl      – lengthen or shorten time-to-live globally
        resize_cache    – grow or shrink total capacity (relative)
        eviction_policy – choose an eviction strategy
        tier_shift      – promote / demote data between tiers
    """

    adjust_ttl: int = Field(
        default=0,
        ge=-10,
        le=10,
        description="TTL adjustment in seconds (-10 to +10)",
    )
    resize_cache: float = Field(
        default=0.0,
        ge=-0.2,
        le=0.2,
        description="Relative cache capacity change (-0.2 to +0.2)",
    )
    eviction_policy: Literal["LRU", "LFU", "FIFO"] = Field(
        default="LRU",
        description='Eviction policy: "LRU", "LFU", or "FIFO"',
    )
    tier_shift: Literal["L1→L2", "L2→L3", "none"] = Field(
        default="none",
        description='Tier data migration: "L1→L2", "L2→L3", or "none"',
    )