diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml
index 949a8a106..296e1c052 100644
--- a/.github/configs/amd-master.yaml
+++ b/.github/configs/amd-master.yaml
@@ -568,7 +568,7 @@ kimik2.5-fp4-mi355x-atom:
       - { tp: 4, conc-start: 4, conc-end: 128 }
 
 minimaxm2.5-fp8-mi355x-vllm:
-  image: vllm/vllm-openai-rocm:v0.19.0
+  image: vllm/vllm-openai-rocm:v0.19.1
   model: MiniMaxAI/MiniMax-M2.5
   model-prefix: minimaxm2.5
   runner: mi355x
@@ -589,6 +589,13 @@ minimaxm2.5-fp8-mi355x-vllm:
       - { tp: 2, ep: 2, conc-start: 2, conc-end: 256 }
       - { tp: 4, ep: 4, conc-start: 4, conc-end: 512 }
       - { tp: 8, ep: 8, conc-start: 2, conc-end: 2 }
+    agentic-coding:
+    - duration: 1800
+      search-space:
+      - { tp: 4, offloading: none, conc-list: [2, 4, 8, 16, 32] }
+      - { tp: 4, offloading: lmcache_cpu, conc-list: [2, 4, 8, 16, 32] }
+      - { tp: 8, offloading: none, conc-list: [2, 4, 8, 16, 32] }
+      - { tp: 8, offloading: lmcache_cpu, conc-list: [2, 4, 8, 16, 32] }
 
 minimaxm2.5-fp8-mi355x-atom:
   image: rocm/atom:rocm7.2.1-ubuntu24.04-pytorch2.9.1-atom0.1.2
@@ -662,7 +669,7 @@ minimaxm2.5-fp4-mi355x-vllm:
       - { tp: 4, conc-start: 4, conc-end: 64 }
 
 minimaxm2.5-fp8-mi300x-vllm:
-  image: vllm/vllm-openai-rocm:v0.16.0
+  image: vllm/vllm-openai-rocm:v0.19.1
   model: MiniMaxAI/MiniMax-M2.5
   model-prefix: minimaxm2.5
   runner: mi300x
@@ -681,9 +688,16 @@ minimaxm2.5-fp8-mi300x-vllm:
       search-space:
       - { tp: 2, conc-start: 4, conc-end: 64 }
       - { tp: 4, conc-start: 4, conc-end: 64 }
+    agentic-coding:
+    - duration: 1800
+      search-space:
+      - { tp: 2, offloading: none, conc-list: [2, 4, 8, 16] }
+      - { tp: 2, offloading: lmcache_cpu, conc-list: [2, 4, 8, 16] }
+      - { tp: 8, offloading: none, conc-list: [2, 4, 8, 16, 32] }
+      - { tp: 8, offloading: lmcache_cpu, conc-list: [2, 4, 8, 16, 32] }
 
 minimaxm2.5-fp8-mi325x-vllm:
-  image: vllm/vllm-openai-rocm:v0.18.0
+  image: vllm/vllm-openai-rocm:v0.19.1
   model: MiniMaxAI/MiniMax-M2.5
   model-prefix: minimaxm2.5
   runner: mi325x
@@ -702,6 +716,13 @@ minimaxm2.5-fp8-mi325x-vllm:
       search-space:
       - { tp: 2, conc-start: 4, conc-end: 64 }
       - { tp: 8, ep: 8, conc-start: 4, conc-end: 256 }
+    agentic-coding:
+    - duration: 1800
+      search-space:
+      - { tp: 2, offloading: none, conc-list: [2, 4, 8, 16] }
+      - { tp: 2, offloading: lmcache_cpu, conc-list: [2, 4, 8, 16] }
+      - { tp: 8, offloading: none, conc-list: [2, 4, 8, 16, 32] }
+      - { tp: 8, offloading: lmcache_cpu, conc-list: [2, 4, 8, 16, 32] }
 
 gptoss-fp4-mi300x-vllm:
   image: vllm/vllm-openai-rocm:v0.17.0
diff --git a/benchmarks/benchmark_lib.sh b/benchmarks/benchmark_lib.sh
index 4c0c8642e..75c72316f 100644
--- a/benchmarks/benchmark_lib.sh
+++ b/benchmarks/benchmark_lib.sh
@@ -902,6 +902,19 @@ resolve_trace_source() {
     hf download --repo-type dataset "$dataset"
 }
 
+install_lmcache_hip() {
+    # LMCache PyPI wheel ships CUDA-only c_ops.so; must build from source for ROCm.
+    # `pip install lmcache` ignores BUILD_WITH_HIP and installs the pre-built CUDA wheel.
+    # We must clone and build with --no-build-isolation to get the HIP c_ops.so.
+    local lmcache_dir
+    lmcache_dir="$(mktemp -d)/LMCache"
+    echo "Building LMCache from source with HIP support..."
+    git clone --depth 1 https://github.com/LMCache/LMCache.git "$lmcache_dir"
+    SETUPTOOLS_SCM_PRETEND_VERSION=0.4.4 BUILD_WITH_HIP=1 \
+        agentic_pip_install -e "$lmcache_dir" --no-build-isolation
+    echo "LMCache HIP build complete."
+}
+
 install_agentic_deps() {
     agentic_pip_install --quiet urllib3 requests 2>/dev/null || true
     agentic_pip_install -q -r "$AGENTIC_DIR/requirements.txt"
diff --git a/benchmarks/single_node/agentic/minimaxm2.5_fp8_mi300x.sh b/benchmarks/single_node/agentic/minimaxm2.5_fp8_mi300x.sh
new file mode 100755
index 000000000..47d17137a
--- /dev/null
+++ b/benchmarks/single_node/agentic/minimaxm2.5_fp8_mi300x.sh
@@ -0,0 +1,123 @@
+#!/usr/bin/env bash
+set -euo pipefail
+set -x
+
+# Agentic trace replay benchmark for MiniMax-M2.5 FP8 on MI300X using vLLM.
+# Supports LMCache CPU DRAM offloading for KV cache.
+#
+# Required env vars:
+#   MODEL, TP, CONC, OFFLOADING, TOTAL_CPU_DRAM_GB, RESULT_DIR
+
+source "$(dirname "$0")/../../benchmark_lib.sh"
+
+check_env_vars MODEL TP CONC OFFLOADING TOTAL_CPU_DRAM_GB RESULT_DIR
+
+PORT=${PORT:-8888}
+DURATION=${DURATION:-1800}
+MAX_DELAY=${MAX_DELAY:-60}
+ADVANCE_MIN=${ADVANCE_MIN:-0.0}
+ADVANCE_MAX=${ADVANCE_MAX:-0.7}
+# Agentic matrix entries don't set max-model-len, so the workflow passes 0.
+# ${:-DEFAULT} only fires on unset/empty, so handle 0 explicitly.
+if [ -z "${MAX_MODEL_LEN:-}" ] || [ "$MAX_MODEL_LEN" = "0" ]; then
+    MAX_MODEL_LEN=131072
+fi
+
+if [[ -n "${SLURM_JOB_ID:-}" ]]; then
+    echo "JOB $SLURM_JOB_ID running on ${SLURMD_NODENAME:-unknown}"
+fi
+
+if [[ "$MODEL" != /* ]]; then hf download "$MODEL"; fi
+rocm-smi
+
+# If the machine runs a MEC FW older than 177, RCCL cannot reclaim some memory.
+# See https://rocm.docs.amd.com/en/docs-6.4.3/about/release-notes.html#amdgpu-driver-updates
+version=`rocm-smi --showfw | grep MEC | head -n 1 | awk '{print $NF}'`
+if [[ "$version" == "" || $version -lt 177 ]]; then
+  export HSA_NO_SCRATCH_RECLAIM=1
+fi
+
+# Ray compatibility in vLLM 0.14+ needs HIP_VISIBLE_DEVICES to match ROCR_VISIBLE_DEVICES
+if [ -n "${ROCR_VISIBLE_DEVICES:-}" ]; then
+    export HIP_VISIBLE_DEVICES="$ROCR_VISIBLE_DEVICES"
+fi
+
+export AMDGCN_USE_BUFFER_OPS=0
+export VLLM_ROCM_USE_AITER=1
+export VLLM_ROCM_QUICK_REDUCE_QUANTIZATION=INT4
+export PYTHONNOUSERSITE=1
+
+# ---- Resolve traces and install deps ----------------------------------------
+resolve_trace_source
+install_agentic_deps
+
+# ---- Server config ----------------------------------------------------------
+SERVER_LOG="$RESULT_DIR/server.log"
+mkdir -p "$RESULT_DIR"
+
+OFFLOAD_ARGS=""
+PREFIX_CACHE_FLAG="--no-enable-prefix-caching"
+
+case "$OFFLOADING" in
+    none)
+        ;;
+    cpu)
+        OFFLOAD_ARGS="--kv_offloading_backend native --kv_offloading_size $TOTAL_CPU_DRAM_GB --disable-hybrid-kv-cache-manager"
+        ;;
+    lmcache_cpu)
+        # LMCache CPU DRAM offloading via LMCacheConnectorV1.
+        # Critical: PYTHONHASHSEED=0 is mandatory for cache key consistency
+        # across TP workers. Without it, hit rate is 0%.
+        install_lmcache_hip
+        export PYTHONHASHSEED=0
+        export LMCACHE_LOCAL_CPU=true
+        export LMCACHE_CHUNK_SIZE=256
+        export LMCACHE_MAX_LOCAL_CPU_SIZE=$((TOTAL_CPU_DRAM_GB / TP))
+        # LMCache reuses vLLM's prefix cache hash function, so prefix caching
+        # must be enabled (unlike native CPU offloading).
+        PREFIX_CACHE_FLAG="--enable-prefix-caching"
+        OFFLOAD_ARGS="--kv-transfer-config {\"kv_connector\":\"LMCacheConnectorV1\",\"kv_role\":\"kv_both\"}"
+        ;;
+    *)
+        echo "Error: unsupported OFFLOADING value '$OFFLOADING' (expected one of: none, cpu, lmcache_cpu)" >&2
+        exit 1
+        ;;
+esac
+
+echo "Starting vllm server..."
+
+vllm serve $MODEL \
+--host 0.0.0.0 \
+--port $PORT \
+--trust-remote-code \
+--tool-call-parser minimax_m2 \
+--reasoning-parser minimax_m2 \
+--enable-auto-tool-choice \
+--attention-backend ROCM_AITER_UNIFIED_ATTN \
+--tensor-parallel-size=$TP \
+--gpu-memory-utilization 0.85 \
+--max-model-len $MAX_MODEL_LEN \
+--max-num-seqs $CONC \
+--block-size=64 \
+--kv-cache-dtype fp8 \
+$PREFIX_CACHE_FLAG \
+$OFFLOAD_ARGS > "$SERVER_LOG" 2>&1 &
+SERVER_PID=$!
+echo "Server PID: $SERVER_PID"
+
+wait_for_server_ready --port "$PORT" --server-log "$SERVER_LOG" --server-pid "$SERVER_PID"
+
+# ---- Run benchmark ----------------------------------------------------------
+build_replay_cmd "$RESULT_DIR"
+
+echo "$REPLAY_CMD" > "$RESULT_DIR/benchmark_command.txt"
+
+set -x
+$REPLAY_CMD 2>&1 | tee "$RESULT_DIR/benchmark.log" || true
+set +x
+
+write_agentic_result_json "$RESULT_DIR"
+
+# ---- Post-processing --------------------------------------------------------
+python3 "$AGENTIC_DIR/scripts/analyze_benchmark_distributions.py" \
+    "$RESULT_DIR/trace_replay" -o "$RESULT_DIR" 2>&1 || true
diff --git a/benchmarks/single_node/agentic/minimaxm2.5_fp8_mi325x.sh b/benchmarks/single_node/agentic/minimaxm2.5_fp8_mi325x.sh
new file mode 100755
index 000000000..6090a4408
--- /dev/null
+++ b/benchmarks/single_node/agentic/minimaxm2.5_fp8_mi325x.sh
@@ -0,0 +1,123 @@
+#!/usr/bin/env bash
+set -euo pipefail
+set -x
+
+# Agentic trace replay benchmark for MiniMax-M2.5 FP8 on MI325X using vLLM.
+# Supports LMCache CPU DRAM offloading for KV cache.
+#
+# Required env vars:
+#   MODEL, TP, CONC, OFFLOADING, TOTAL_CPU_DRAM_GB, RESULT_DIR
+
+source "$(dirname "$0")/../../benchmark_lib.sh"
+
+check_env_vars MODEL TP CONC OFFLOADING TOTAL_CPU_DRAM_GB RESULT_DIR
+
+PORT=${PORT:-8888}
+DURATION=${DURATION:-1800}
+MAX_DELAY=${MAX_DELAY:-60}
+ADVANCE_MIN=${ADVANCE_MIN:-0.0}
+ADVANCE_MAX=${ADVANCE_MAX:-0.7}
+# Agentic matrix entries don't set max-model-len, so the workflow passes 0.
+# ${:-DEFAULT} only fires on unset/empty, so handle 0 explicitly.
+if [ -z "${MAX_MODEL_LEN:-}" ] || [ "$MAX_MODEL_LEN" = "0" ]; then
+    MAX_MODEL_LEN=131072
+fi
+
+if [[ -n "${SLURM_JOB_ID:-}" ]]; then
+    echo "JOB $SLURM_JOB_ID running on ${SLURMD_NODENAME:-unknown}"
+fi
+
+if [[ "$MODEL" != /* ]]; then hf download "$MODEL"; fi
+rocm-smi
+
+# If the machine runs a MEC FW older than 177, RCCL cannot reclaim some memory.
+# See https://rocm.docs.amd.com/en/docs-6.4.3/about/release-notes.html#amdgpu-driver-updates
+version=`rocm-smi --showfw | grep MEC | head -n 1 | awk '{print $NF}'`
+if [[ "$version" == "" || $version -lt 177 ]]; then
+  export HSA_NO_SCRATCH_RECLAIM=1
+fi
+
+# Ray compatibility in vLLM 0.14+ needs HIP_VISIBLE_DEVICES to match ROCR_VISIBLE_DEVICES
+if [ -n "${ROCR_VISIBLE_DEVICES:-}" ]; then
+    export HIP_VISIBLE_DEVICES="$ROCR_VISIBLE_DEVICES"
+fi
+
+export AMDGCN_USE_BUFFER_OPS=0
+export VLLM_ROCM_USE_AITER=1
+export VLLM_ROCM_QUICK_REDUCE_QUANTIZATION=INT4
+export PYTHONNOUSERSITE=1
+
+# ---- Resolve traces and install deps ----------------------------------------
+resolve_trace_source
+install_agentic_deps
+
+# ---- Server config ----------------------------------------------------------
+SERVER_LOG="$RESULT_DIR/server.log"
+mkdir -p "$RESULT_DIR"
+
+OFFLOAD_ARGS=""
+PREFIX_CACHE_FLAG="--no-enable-prefix-caching"
+
+case "$OFFLOADING" in
+    none)
+        ;;
+    cpu)
+        OFFLOAD_ARGS="--kv_offloading_backend native --kv_offloading_size $TOTAL_CPU_DRAM_GB --disable-hybrid-kv-cache-manager"
+        ;;
+    lmcache_cpu)
+        # LMCache CPU DRAM offloading via LMCacheConnectorV1.
+        # Critical: PYTHONHASHSEED=0 is mandatory for cache key consistency
+        # across TP workers. Without it, hit rate is 0%.
+        install_lmcache_hip
+        export PYTHONHASHSEED=0
+        export LMCACHE_LOCAL_CPU=true
+        export LMCACHE_CHUNK_SIZE=256
+        export LMCACHE_MAX_LOCAL_CPU_SIZE=$((TOTAL_CPU_DRAM_GB / TP))
+        # LMCache reuses vLLM's prefix cache hash function, so prefix caching
+        # must be enabled (unlike native CPU offloading).
+        PREFIX_CACHE_FLAG="--enable-prefix-caching"
+        OFFLOAD_ARGS="--kv-transfer-config {\"kv_connector\":\"LMCacheConnectorV1\",\"kv_role\":\"kv_both\"}"
+        ;;
+    *)
+        echo "Error: unsupported OFFLOADING value '$OFFLOADING' (expected one of: none, cpu, lmcache_cpu)" >&2
+        exit 1
+        ;;
+esac
+
+echo "Starting vllm server..."
+
+vllm serve $MODEL \
+--host 0.0.0.0 \
+--port $PORT \
+--trust-remote-code \
+--tool-call-parser minimax_m2 \
+--reasoning-parser minimax_m2 \
+--enable-auto-tool-choice \
+--attention-backend ROCM_AITER_UNIFIED_ATTN \
+--tensor-parallel-size=$TP \
+--gpu-memory-utilization 0.85 \
+--max-model-len $MAX_MODEL_LEN \
+--max-num-seqs $CONC \
+--block-size=64 \
+--kv-cache-dtype fp8 \
+$PREFIX_CACHE_FLAG \
+$OFFLOAD_ARGS > "$SERVER_LOG" 2>&1 &
+SERVER_PID=$!
+echo "Server PID: $SERVER_PID"
+
+wait_for_server_ready --port "$PORT" --server-log "$SERVER_LOG" --server-pid "$SERVER_PID"
+
+# ---- Run benchmark ----------------------------------------------------------
+build_replay_cmd "$RESULT_DIR"
+
+echo "$REPLAY_CMD" > "$RESULT_DIR/benchmark_command.txt"
+
+set -x
+$REPLAY_CMD 2>&1 | tee "$RESULT_DIR/benchmark.log" || true
+set +x
+
+write_agentic_result_json "$RESULT_DIR"
+
+# ---- Post-processing --------------------------------------------------------
+python3 "$AGENTIC_DIR/scripts/analyze_benchmark_distributions.py" \
+    "$RESULT_DIR/trace_replay" -o "$RESULT_DIR" 2>&1 || true
diff --git a/benchmarks/single_node/agentic/minimaxm2.5_fp8_mi355x.sh b/benchmarks/single_node/agentic/minimaxm2.5_fp8_mi355x.sh
new file mode 100755
index 000000000..dea4dec32
--- /dev/null
+++ b/benchmarks/single_node/agentic/minimaxm2.5_fp8_mi355x.sh
@@ -0,0 +1,123 @@
+#!/usr/bin/env bash
+set -euo pipefail
+set -x
+
+# Agentic trace replay benchmark for MiniMax-M2.5 FP8 on MI355X using vLLM.
+# Supports LMCache CPU DRAM offloading for KV cache.
+#
+# Required env vars:
+#   MODEL, TP, CONC, OFFLOADING, TOTAL_CPU_DRAM_GB, RESULT_DIR
+
+source "$(dirname "$0")/../../benchmark_lib.sh"
+
+check_env_vars MODEL TP CONC OFFLOADING TOTAL_CPU_DRAM_GB RESULT_DIR
+
+PORT=${PORT:-8888}
+DURATION=${DURATION:-1800}
+MAX_DELAY=${MAX_DELAY:-60}
+ADVANCE_MIN=${ADVANCE_MIN:-0.0}
+ADVANCE_MAX=${ADVANCE_MAX:-0.7}
+# Agentic matrix entries don't set max-model-len, so the workflow passes 0.
+# ${:-DEFAULT} only fires on unset/empty, so handle 0 explicitly.
+if [ -z "${MAX_MODEL_LEN:-}" ] || [ "$MAX_MODEL_LEN" = "0" ]; then
+    MAX_MODEL_LEN=131072
+fi
+
+if [[ -n "${SLURM_JOB_ID:-}" ]]; then
+    echo "JOB $SLURM_JOB_ID running on ${SLURMD_NODENAME:-unknown}"
+fi
+
+if [[ "$MODEL" != /* ]]; then hf download "$MODEL"; fi
+rocm-smi
+
+# If the machine runs a MEC FW older than 177, RCCL cannot reclaim some memory.
+# See https://rocm.docs.amd.com/en/docs-6.4.3/about/release-notes.html#amdgpu-driver-updates
+version=`rocm-smi --showfw | grep MEC | head -n 1 | awk '{print $NF}'`
+if [[ "$version" == "" || $version -lt 177 ]]; then
+  export HSA_NO_SCRATCH_RECLAIM=1
+fi
+
+# Ray compatibility in vLLM 0.14+ needs HIP_VISIBLE_DEVICES to match ROCR_VISIBLE_DEVICES
+if [ -n "${ROCR_VISIBLE_DEVICES:-}" ]; then
+    export HIP_VISIBLE_DEVICES="$ROCR_VISIBLE_DEVICES"
+fi
+
+export AMDGCN_USE_BUFFER_OPS=0
+export VLLM_ROCM_USE_AITER=1
+export VLLM_ROCM_QUICK_REDUCE_QUANTIZATION=INT4
+export PYTHONNOUSERSITE=1
+
+# ---- Resolve traces and install deps ----------------------------------------
+resolve_trace_source
+install_agentic_deps
+
+# ---- Server config ----------------------------------------------------------
+SERVER_LOG="$RESULT_DIR/server.log"
+mkdir -p "$RESULT_DIR"
+
+OFFLOAD_ARGS=""
+PREFIX_CACHE_FLAG="--no-enable-prefix-caching"
+
+case "$OFFLOADING" in
+    none)
+        ;;
+    cpu)
+        OFFLOAD_ARGS="--kv_offloading_backend native --kv_offloading_size $TOTAL_CPU_DRAM_GB --disable-hybrid-kv-cache-manager"
+        ;;
+    lmcache_cpu)
+        # LMCache CPU DRAM offloading via LMCacheConnectorV1.
+        # Critical: PYTHONHASHSEED=0 is mandatory for cache key consistency
+        # across TP workers. Without it, hit rate is 0%.
+        install_lmcache_hip
+        export PYTHONHASHSEED=0
+        export LMCACHE_LOCAL_CPU=true
+        export LMCACHE_CHUNK_SIZE=256
+        export LMCACHE_MAX_LOCAL_CPU_SIZE=$((TOTAL_CPU_DRAM_GB / TP))
+        # LMCache reuses vLLM's prefix cache hash function, so prefix caching
+        # must be enabled (unlike native CPU offloading).
+        PREFIX_CACHE_FLAG="--enable-prefix-caching"
+        OFFLOAD_ARGS="--kv-transfer-config {\"kv_connector\":\"LMCacheConnectorV1\",\"kv_role\":\"kv_both\"}"
+        ;;
+    *)
+        echo "Error: unsupported OFFLOADING value '$OFFLOADING' (expected one of: none, cpu, lmcache_cpu)" >&2
+        exit 1
+        ;;
+esac
+
+echo "Starting vllm server..."
+
+vllm serve $MODEL \
+--host 0.0.0.0 \
+--port $PORT \
+--trust-remote-code \
+--tool-call-parser minimax_m2 \
+--reasoning-parser minimax_m2 \
+--enable-auto-tool-choice \
+--attention-backend ROCM_AITER_UNIFIED_ATTN \
+--tensor-parallel-size=$TP \
+--gpu-memory-utilization 0.85 \
+--max-model-len $MAX_MODEL_LEN \
+--max-num-seqs $CONC \
+--block-size=64 \
+--kv-cache-dtype fp8 \
+$PREFIX_CACHE_FLAG \
+$OFFLOAD_ARGS > "$SERVER_LOG" 2>&1 &
+SERVER_PID=$!
+echo "Server PID: $SERVER_PID"
+
+wait_for_server_ready --port "$PORT" --server-log "$SERVER_LOG" --server-pid "$SERVER_PID"
+
+# ---- Run benchmark ----------------------------------------------------------
+build_replay_cmd "$RESULT_DIR"
+
+echo "$REPLAY_CMD" > "$RESULT_DIR/benchmark_command.txt"
+
+set -x
+$REPLAY_CMD 2>&1 | tee "$RESULT_DIR/benchmark.log" || true
+set +x
+
+write_agentic_result_json "$RESULT_DIR"
+
+# ---- Post-processing --------------------------------------------------------
+python3 "$AGENTIC_DIR/scripts/analyze_benchmark_distributions.py" \
+    "$RESULT_DIR/trace_replay" -o "$RESULT_DIR" 2>&1 || true
diff --git a/utils/matrix_logic/test_validation.py b/utils/matrix_logic/test_validation.py
index 1274fd86a..ab3c4b51e 100644
--- a/utils/matrix_logic/test_validation.py
+++ b/utils/matrix_logic/test_validation.py
@@ -11,7 +11,12 @@
     MultiNodeSeqLenConfig,
     SingleNodeMasterConfigEntry,
     MultiNodeMasterConfigEntry,
+    SingleNodeAgenticMatrixEntry,
+    AgenticCodingSearchSpaceEntry,
+    AgenticCodingConfig,
+    SingleNodeScenarios,
     validate_matrix_entry,
+    validate_agentic_matrix_entry,
     validate_master_config,
     validate_runner_config,
     load_config_files,
@@ -875,3 +880,304 @@ def test_validation_runs_by_default(self, tmp_path):
         with pytest.raises(ValueError) as exc_info:
             load_runner_file(str(runner_file))
         assert "must be a list" in str(exc_info.value)
+
+
+# =============================================================================
+# Test AgenticCodingSearchSpaceEntry
+# =============================================================================
+
+class TestAgenticCodingSearchSpaceEntry:
+    """Tests for AgenticCodingSearchSpaceEntry model."""
+
+    def test_valid_with_offloading_none(self):
+        """Valid entry with offloading=none should pass."""
+        entry = AgenticCodingSearchSpaceEntry(**{
+            "tp": 8,
+            "offloading": "none",
+            "conc-list": [2, 4, 8, 16],
+        })
+        assert entry.tp == 8
+        assert entry.offloading == "none"
+        assert entry.conc_list == [2, 4, 8, 16]
+
+    def test_valid_with_offloading_cpu(self):
+        """Valid entry with offloading=cpu should pass."""
+        entry = AgenticCodingSearchSpaceEntry(**{
+            "tp": 4,
+            "offloading": "cpu",
+            "conc-list": [4, 8],
+        })
+        assert entry.offloading == "cpu"
+
+    def test_valid_with_offloading_lmcache(self):
+        """Valid entry with offloading=lmcache_cpu should pass."""
+        entry = AgenticCodingSearchSpaceEntry(**{
+            "tp": 2,
+            "offloading": "lmcache_cpu",
+            "conc-list": [2, 4, 8, 16],
+        })
+        assert entry.offloading == "lmcache_cpu"
+        assert entry.tp == 2
+
+    def test_valid_with_offloading_ssd(self):
+        """Valid entry with offloading=ssd should pass."""
+        entry = AgenticCodingSearchSpaceEntry(**{
+            "tp": 8,
+            "offloading": "ssd",
+            "conc-start": 4,
+            "conc-end": 32,
+        })
+        assert entry.offloading == "ssd"
+
+    def test_invalid_offloading_value(self):
+        """Invalid offloading value should fail."""
+        with pytest.raises(Exception):
+            AgenticCodingSearchSpaceEntry(**{
+                "tp": 8,
+                "offloading": "invalid",
+                "conc-list": [4],
+            })
+
+    def test_offloading_defaults_to_none(self):
+        """Offloading should default to none."""
+        entry = AgenticCodingSearchSpaceEntry(**{
+            "tp": 8,
+            "conc-list": [4, 8],
+        })
+        assert entry.offloading == "none"
+
+    def test_must_specify_tp_or_prefill_decode(self):
+        """Must specify either tp or both prefill and decode."""
+        with pytest.raises(Exception) as exc_info:
+            AgenticCodingSearchSpaceEntry(**{
+                "offloading": "lmcache_cpu",
+                "conc-list": [4],
+            })
+        assert "must specify at least tp" in str(exc_info.value).lower()
+
+    def test_tp_with_prefill_decode_allowed(self):
+        """tp can coexist with prefill/decode for disaggregated serving."""
+        entry = AgenticCodingSearchSpaceEntry(**{
+            "tp": 8,
+            "prefill": {
+                "num-worker": 1, "tp": 4, "ep": 4, "dp-attn": False,
+            },
+            "decode": {
+                "num-worker": 1, "tp": 8, "ep": 8, "dp-attn": False,
+            },
+            "conc-list": [4],
+        })
+        assert entry.tp == 8
+        assert entry.prefill.tp == 4
+        assert entry.decode.tp == 8
+
+    def test_prefill_without_decode_rejected(self):
+        """Specifying only prefill without decode should fail."""
+        with pytest.raises(Exception) as exc_info:
+            AgenticCodingSearchSpaceEntry(**{
+                "tp": 8,
+                "prefill": {
+                    "num-worker": 1, "tp": 4, "ep": 4, "dp-attn": False,
+                },
+                "conc-list": [4],
+            })
+        assert "both prefill and decode" in str(exc_info.value).lower()
+
+    def test_decode_without_prefill_rejected(self):
+        """Specifying only decode without prefill should fail."""
+        with pytest.raises(Exception) as exc_info:
+            AgenticCodingSearchSpaceEntry(**{
+                "tp": 8,
+                "decode": {
+                    "num-worker": 1, "tp": 8, "ep": 8, "dp-attn": False,
+                },
+                "conc-list": [4],
+            })
+        assert "both prefill and decode" in str(exc_info.value).lower()
+
+
+# =============================================================================
+# Test SingleNodeAgenticMatrixEntry
+# =============================================================================
+
+class TestSingleNodeAgenticMatrixEntry:
+    """Tests for SingleNodeAgenticMatrixEntry model."""
+
+    @pytest.fixture
+    def valid_agentic_entry(self):
+        return {
+            "image": "vllm/vllm-openai-rocm:v0.19.1",
+            "model": "MiniMaxAI/MiniMax-M2.5",
+            "model-prefix": "minimaxm2.5",
+            "precision": "fp8",
+            "framework": "vllm",
+            "runner": "mi300x",
+            "tp": 2,
+            "ep": 1,
+            "dp-attn": False,
+            "conc": 8,
+            "offloading": "lmcache_cpu",
+            "duration": 1800,
+            "exp-name": "minimaxm2.5_tp2_conc8_offloadlmcache_cpu",
+            "scenario-type": "agentic-coding",
+        }
+
+    def test_valid_lmcache_entry(self, valid_agentic_entry):
+        """Valid agentic entry with lmcache offloading should pass."""
+        entry = SingleNodeAgenticMatrixEntry(**valid_agentic_entry)
+        assert entry.offloading == "lmcache_cpu"
+        assert entry.tp == 2
+        assert entry.conc == 8
+        assert entry.scenario_type == "agentic-coding"
+
+    def test_valid_none_offloading(self, valid_agentic_entry):
+        """Valid agentic entry with no offloading should pass."""
+        valid_agentic_entry["offloading"] = "none"
+        entry = SingleNodeAgenticMatrixEntry(**valid_agentic_entry)
+        assert entry.offloading == "none"
+
+    def test_valid_cpu_offloading(self, valid_agentic_entry):
+        """Valid agentic entry with cpu offloading should pass."""
+        valid_agentic_entry["offloading"] = "cpu"
+        entry = SingleNodeAgenticMatrixEntry(**valid_agentic_entry)
+        assert entry.offloading == "cpu"
+
+    def test_invalid_offloading_rejected(self, valid_agentic_entry):
+        """Invalid offloading value should fail."""
+        valid_agentic_entry["offloading"] = "gpu"
+        with pytest.raises(Exception):
+            SingleNodeAgenticMatrixEntry(**valid_agentic_entry)
+
+    def test_missing_offloading_fails(self, valid_agentic_entry):
+        """Missing offloading field should fail (no default on matrix entry)."""
+        del valid_agentic_entry["offloading"]
+        with pytest.raises(Exception):
+            SingleNodeAgenticMatrixEntry(**valid_agentic_entry)
+
+    def test_extra_field_forbidden(self, valid_agentic_entry):
+        """Extra fields should be rejected."""
+        valid_agentic_entry["extra-field"] = "value"
+        with pytest.raises(Exception):
+            SingleNodeAgenticMatrixEntry(**valid_agentic_entry)
+
+    def test_validate_agentic_matrix_entry_function(self, valid_agentic_entry):
+        """validate_agentic_matrix_entry should accept valid entry."""
+        result = validate_agentic_matrix_entry(valid_agentic_entry)
+        assert result == valid_agentic_entry
+
+    def test_validate_agentic_matrix_entry_invalid(self, valid_agentic_entry):
+        """validate_agentic_matrix_entry should reject invalid entry."""
+        del valid_agentic_entry["tp"]
+        with pytest.raises(ValueError) as exc_info:
+            validate_agentic_matrix_entry(valid_agentic_entry)
+        assert "failed validation" in str(exc_info.value)
+
+
+# =============================================================================
+# Test AgenticCodingConfig
+# =============================================================================
+
+class TestAgenticCodingConfig:
+    """Tests for AgenticCodingConfig model."""
+
+    def test_valid_with_lmcache_and_none(self):
+        """Config with both lmcache and none offloading entries should pass."""
+        config = AgenticCodingConfig(**{
+            "duration": 1800,
+            "search-space": [
+                {"tp": 2, "offloading": "none", "conc-list": [2, 4, 8]},
+                {"tp": 2, "offloading": "lmcache_cpu", "conc-list": [2, 4, 8]},
+            ],
+        })
+        assert config.duration == 1800
+        assert len(config.search_space) == 2
+        assert config.search_space[0].offloading == "none"
+        assert config.search_space[1].offloading == "lmcache_cpu"
+
+    def test_duration_defaults_to_1800(self):
+        """Duration should default to 1800."""
+        config = AgenticCodingConfig(**{
+            "search-space": [
+                {"tp": 8, "offloading": "none", "conc-list": [4]},
+            ],
+        })
+        assert config.duration == 1800
+
+
+# =============================================================================
+# Test Master Config with Agentic Scenarios
+# =============================================================================
+
+class TestMasterConfigWithAgentic:
+    """Tests for master config entries containing agentic-coding scenarios."""
+
+    def test_single_node_with_agentic_only(self):
+        """Single node config with only agentic-coding scenario should pass."""
+        config = SingleNodeMasterConfigEntry(**{
+            "image": "vllm/vllm-openai-rocm:v0.19.1",
+            "model": "MiniMaxAI/MiniMax-M2.5",
+            "model-prefix": "minimaxm2.5",
+            "precision": "fp8",
+            "framework": "vllm",
+            "runner": "mi300x",
+            "multinode": False,
+            "scenarios": {
+                "agentic-coding": [
+                    {
+                        "duration": 1800,
+                        "search-space": [
+                            {"tp": 2, "offloading": "lmcache_cpu", "conc-list": [2, 4, 8]},
+                        ],
+                    }
+                ],
+            },
+        })
+        assert config.scenarios.agentic_coding is not None
+        assert len(config.scenarios.agentic_coding) == 1
+        assert config.scenarios.agentic_coding[0].search_space[0].offloading == "lmcache_cpu"
+
+    def test_single_node_with_both_scenarios(self):
+        """Single node config with both fixed-seq-len and agentic-coding should pass."""
+        config = SingleNodeMasterConfigEntry(**{
+            "image": "vllm/vllm-openai-rocm:v0.19.1",
+            "model": "MiniMaxAI/MiniMax-M2.5",
+            "model-prefix": "minimaxm2.5",
+            "precision": "fp8",
+            "framework": "vllm",
+            "runner": "mi300x",
+            "multinode": False,
+            "scenarios": {
+                "fixed-seq-len": [
+                    {
+                        "isl": 1024, "osl": 1024,
+                        "search-space": [{"tp": 2, "conc-start": 4, "conc-end": 64}],
+                    }
+                ],
+                "agentic-coding": [
+                    {
+                        "duration": 1800,
+                        "search-space": [
+                            {"tp": 2, "offloading": "none", "conc-list": [2, 4, 8]},
+                            {"tp": 2, "offloading": "lmcache_cpu", "conc-list": [2, 4, 8]},
+                        ],
+                    }
+                ],
+            },
+        })
+        assert config.scenarios.fixed_seq_len is not None
+        assert config.scenarios.agentic_coding is not None
+
+    def test_scenarios_must_have_at_least_one(self):
+        """Scenarios must have at least one scenario type."""
+        with pytest.raises(Exception) as exc_info:
+            SingleNodeMasterConfigEntry(**{
+                "image": "test",
+                "model": "test",
+                "model-prefix": "test",
+                "precision": "fp8",
+                "framework": "vllm",
+                "runner": "mi300x",
+                "multinode": False,
+                "scenarios": {},
+            })
+        assert "At least one scenario" in str(exc_info.value)
diff --git a/utils/matrix_logic/validation.py b/utils/matrix_logic/validation.py
index dd245aec7..385e7c75b 100644
--- a/utils/matrix_logic/validation.py
+++ b/utils/matrix_logic/validation.py
@@ -156,7 +156,7 @@ class SingleNodeAgenticMatrixEntry(BaseModel):
     ep: int
     dp_attn: bool = Field(alias=Fields.DP_ATTN.value)
     conc: int
-    offloading: Literal["none", "cpu", "ssd"] = Field(alias=Fields.OFFLOADING.value)
+    offloading: Literal["none", "cpu", "lmcache_cpu", "ssd"] = Field(alias=Fields.OFFLOADING.value)
     duration: int = Field(default=1800, alias=Fields.DURATION.value)
     exp_name: str = Field(alias=Fields.EXP_NAME.value)
     scenario_type: str = Field(alias=Fields.SCENARIO_TYPE.value)
@@ -338,7 +338,7 @@ class AgenticCodingSearchSpaceEntry(BaseModel):
         default="none", alias=Fields.SPEC_DECODING.value)
     prefill: Optional[WorkerConfig] = None
     decode: Optional[WorkerConfig] = None
-    offloading: Literal["none", "cpu", "ssd"] = Field(default="none", alias=Fields.OFFLOADING.value)
+    offloading: Literal["none", "cpu", "lmcache_cpu", "ssd"] = Field(default="none", alias=Fields.OFFLOADING.value)
     conc_start: Optional[int] = Field(default=None, alias=Fields.CONC_START.value)
     conc_end: Optional[int] = Field(default=None, alias=Fields.CONC_END.value)
     conc_list: Optional[List[int]] = Field(default=None, alias=Fields.CONC_LIST.value)
@@ -349,15 +349,13 @@ def validate_conc_fields(self):
 
     @model_validator(mode='after')
     def validate_topology_fields(self):
-        has_single_node = self.tp is not None
-        has_any_multinode_field = self.prefill is not None or self.decode is not None
-        has_complete_multinode = self.prefill is not None and self.decode is not None
-        if has_single_node:
-            valid = not has_any_multinode_field
-        else:
-            valid = has_complete_multinode
-        if not valid:
-            raise ValueError("Agentic search-space entries must specify either tp or both prefill and decode")
+        has_tp = self.tp is not None
+        has_prefill = self.prefill is not None
+        has_decode = self.decode is not None
+        if has_prefill != has_decode:
+            raise ValueError("Agentic search-space entries must specify both prefill and decode, not just one")
+        if not has_tp and not has_prefill:
+            raise ValueError("Agentic search-space entries must specify at least tp or both prefill and decode")
         return self