Skip to content

Commit 8cb5a33

Browse files
Copilotchand1012
andcommitted
feat(cache): implement prompt prefix cache stabilization from upstream PR anomalyco#14743
- Add OPENCODE_EXPERIMENTAL_CACHE_STABILIZATION and OPENCODE_EXPERIMENTAL_CACHE_1H_TTL flags - Split system prompt into 2 blocks (stable/dynamic) for better cache reuse - Freeze date and instructions behind OPENCODE_EXPERIMENTAL_CACHE_STABILIZATION flag - Remove Instance.directory from bash tool schema for cross-repo cache hits - Sort skill tools alphabetically for deterministic ordering - Add extended TTL support for first system cache marker - Add cache audit display in TUI sidebar behind OPENCODE_CACHE_AUDIT env var - Fix llama-server compatibility: join system blocks for non-Anthropic providers - Update tests for all changed functionality Co-authored-by: chand1012 <3521582+chand1012@users.noreply.github.com>
1 parent 3b361d6 commit 8cb5a33

15 files changed

Lines changed: 249 additions & 108 deletions

File tree

packages/opencode/src/cli/cmd/tui/routes/session/sidebar.tsx

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,10 +53,17 @@ export function Sidebar(props: { sessionID: string; overlay?: boolean }) {
5353
if (!last) return
5454
const total =
5555
last.tokens.input + last.tokens.output + last.tokens.reasoning + last.tokens.cache.read + last.tokens.cache.write
56+
const totalInput = last.tokens.input + last.tokens.cache.read + last.tokens.cache.write
5657
const model = sync.data.provider.find((x) => x.id === last.providerID)?.models[last.modelID]
5758
return {
5859
tokens: total.toLocaleString(),
5960
percentage: model?.limit.context ? Math.round((total / model.limit.context) * 100) : null,
61+
cacheHitPercent: totalInput > 0 ? ((last.tokens.cache.read / totalInput) * 100).toFixed(3) : null,
62+
cacheRead: last.tokens.cache.read,
63+
cacheWrite: last.tokens.cache.write,
64+
cacheNew: last.tokens.input,
65+
cacheInput: totalInput,
66+
cacheOutput: last.tokens.output,
6067
}
6168
})
6269

@@ -106,6 +113,19 @@ export function Sidebar(props: { sessionID: string; overlay?: boolean }) {
106113
<text fg={theme.textMuted}>{context()?.percentage ?? 0}% used</text>
107114
<text fg={theme.textMuted}>{cost()} spent</text>
108115
</box>
116+
<Show when={process.env["OPENCODE_CACHE_AUDIT"] && context()?.cacheHitPercent != null}>
117+
<box>
118+
<text fg={theme.text}>
119+
<b>Cache Audit</b>
120+
</text>
121+
<text fg={theme.textMuted}>{context()!.cacheInput.toLocaleString()} input tokens</text>
122+
<text fg={theme.textMuted}> {context()!.cacheNew.toLocaleString()} new</text>
123+
<text fg={theme.textMuted}> {context()!.cacheRead.toLocaleString()} cache read</text>
124+
<text fg={theme.textMuted}> {context()!.cacheWrite.toLocaleString()} cache write</text>
125+
<text fg={theme.textMuted}>{context()!.cacheHitPercent}% hit rate</text>
126+
<text fg={theme.textMuted}>{context()!.cacheOutput.toLocaleString()} output tokens</text>
127+
</box>
128+
</Show>
109129
<Show when={mcpEntries().length > 0}>
110130
<box>
111131
<box

packages/opencode/src/flag/flag.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,8 @@ export namespace Flag {
5959
export const OPENCODE_EXPERIMENTAL_PLAN_MODE = OPENCODE_EXPERIMENTAL || truthy("OPENCODE_EXPERIMENTAL_PLAN_MODE")
6060
export const OPENCODE_EXPERIMENTAL_WORKSPACES = OPENCODE_EXPERIMENTAL || truthy("OPENCODE_EXPERIMENTAL_WORKSPACES")
6161
export const OPENCODE_EXPERIMENTAL_MARKDOWN = !falsy("OPENCODE_EXPERIMENTAL_MARKDOWN")
62+
export const OPENCODE_EXPERIMENTAL_CACHE_STABILIZATION = truthy("OPENCODE_EXPERIMENTAL_CACHE_STABILIZATION")
63+
export const OPENCODE_EXPERIMENTAL_CACHE_1H_TTL = truthy("OPENCODE_EXPERIMENTAL_CACHE_1H_TTL")
6264
export const OPENCODE_MODELS_URL = process.env["OPENCODE_MODELS_URL"]
6365
export const OPENCODE_MODELS_PATH = process.env["OPENCODE_MODELS_PATH"]
6466
export const OPENCODE_DISABLE_CHANNEL_DB = truthy("OPENCODE_DISABLE_CHANNEL_DB")

packages/opencode/src/provider/transform.ts

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -171,10 +171,12 @@ export namespace ProviderTransform {
171171
return msgs
172172
}
173173

174-
function applyCaching(msgs: ModelMessage[], model: Provider.Model): ModelMessage[] {
174+
function applyCaching(msgs: ModelMessage[], model: Provider.Model, extendedTTL?: boolean): ModelMessage[] {
175175
const system = msgs.filter((msg) => msg.role === "system").slice(0, 2)
176176
const final = msgs.filter((msg) => msg.role !== "system").slice(-2)
177177

178+
// Use 1h cache TTL on first system block (2x write cost vs 1.25x for default 5-min)
179+
const anthropicCache = extendedTTL ? { type: "ephemeral", ttl: "1h" } : { type: "ephemeral" }
178180
const providerOptions = {
179181
anthropic: {
180182
cacheControl: { type: "ephemeral" },
@@ -194,18 +196,21 @@ export namespace ProviderTransform {
194196
}
195197

196198
for (const msg of unique([...system, ...final])) {
199+
const options = msg === system[0]
200+
? { ...providerOptions, anthropic: { cacheControl: anthropicCache } }
201+
: providerOptions
197202
const useMessageLevelOptions = model.providerID === "anthropic" || model.providerID.includes("bedrock")
198203
const shouldUseContentOptions = !useMessageLevelOptions && Array.isArray(msg.content) && msg.content.length > 0
199204

200205
if (shouldUseContentOptions) {
201206
const lastContent = msg.content[msg.content.length - 1]
202207
if (lastContent && typeof lastContent === "object") {
203-
lastContent.providerOptions = mergeDeep(lastContent.providerOptions ?? {}, providerOptions)
208+
lastContent.providerOptions = mergeDeep(lastContent.providerOptions ?? {}, options)
204209
continue
205210
}
206211
}
207212

208-
msg.providerOptions = mergeDeep(msg.providerOptions ?? {}, providerOptions)
213+
msg.providerOptions = mergeDeep(msg.providerOptions ?? {}, options)
209214
}
210215

211216
return msgs
@@ -261,7 +266,7 @@ export namespace ProviderTransform {
261266
model.api.npm === "@ai-sdk/anthropic") &&
262267
model.api.npm !== "@ai-sdk/gateway"
263268
) {
264-
msgs = applyCaching(msgs, model)
269+
msgs = applyCaching(msgs, model, (options.extendedTTL as boolean) ?? Flag.OPENCODE_EXPERIMENTAL_CACHE_1H_TTL)
265270
}
266271

267272
// Remap providerOptions keys from stored providerID to expected SDK key

packages/opencode/src/session/instruction.ts

Lines changed: 29 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -71,14 +71,15 @@ export namespace InstructionPrompt {
7171

7272
export async function systemPaths() {
7373
const config = await Config.get()
74-
const paths = new Set<string>()
74+
const global = new Set<string>()
75+
const project = new Set<string>()
7576

7677
if (!Flag.OPENCODE_DISABLE_PROJECT_CONFIG) {
7778
for (const file of FILES) {
7879
const matches = await Filesystem.findUp(file, Instance.directory, Instance.worktree)
7980
if (matches.length > 0) {
8081
matches.forEach((p) => {
81-
paths.add(path.resolve(p))
82+
project.add(path.resolve(p))
8283
})
8384
break
8485
}
@@ -87,7 +88,7 @@ export namespace InstructionPrompt {
8788

8889
for (const file of globalFiles()) {
8990
if (await Filesystem.exists(file)) {
90-
paths.add(path.resolve(file))
91+
global.add(path.resolve(file))
9192
break
9293
}
9394
}
@@ -106,22 +107,29 @@ export namespace InstructionPrompt {
106107
}).catch(() => [])
107108
: await resolveRelative(instruction)
108109
matches.forEach((p) => {
109-
paths.add(path.resolve(p))
110+
project.add(path.resolve(p))
110111
})
111112
}
112113
}
113114

114-
return paths
115+
return { global, project }
115116
}
116117

117-
export async function system() {
118-
const config = await Config.get()
118+
export type SystemInstructions = { global: string[]; project: string[] }
119+
120+
let cached: SystemInstructions | undefined
121+
122+
export async function system(): Promise<SystemInstructions> {
123+
if (Flag.OPENCODE_EXPERIMENTAL_CACHE_STABILIZATION && cached) return cached
124+
119125
const paths = await systemPaths()
126+
const config = await Config.get()
120127

121-
const files = Array.from(paths).map(async (p) => {
122-
const content = await Filesystem.readText(p).catch(() => "")
123-
return content ? "Instructions from: " + p + "\n" + content : ""
124-
})
128+
const readPaths = (set: Set<string>) =>
129+
Array.from(set).map(async (p) => {
130+
const content = await Filesystem.readText(p).catch(() => "")
131+
return content ? "Instructions from: " + p + "\n" + content : ""
132+
})
125133

126134
const urls: string[] = []
127135
if (config.instructions) {
@@ -138,7 +146,14 @@ export namespace InstructionPrompt {
138146
.then((x) => (x ? "Instructions from: " + url + "\n" + x : "")),
139147
)
140148

141-
return Promise.all([...files, ...fetches]).then((result) => result.filter(Boolean))
149+
const [global, project] = await Promise.all([
150+
Promise.all(readPaths(paths.global)).then((r) => r.filter(Boolean)),
151+
Promise.all([...readPaths(paths.project), ...fetches]).then((r) => r.filter(Boolean)),
152+
])
153+
154+
const result = { global, project }
155+
if (Flag.OPENCODE_EXPERIMENTAL_CACHE_STABILIZATION) cached = result
156+
return result
142157
}
143158

144159
export function loaded(messages: MessageV2.WithParts[]) {
@@ -166,7 +181,8 @@ export namespace InstructionPrompt {
166181
}
167182

168183
export async function resolve(messages: MessageV2.WithParts[], filepath: string, messageID: string) {
169-
const system = await systemPaths()
184+
const paths = await systemPaths()
185+
const system = new Set([...paths.global, ...paths.project])
170186
const already = loaded(messages)
171187
const results: { filepath: string; content: string }[] = []
172188

packages/opencode/src/session/llm.ts

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ export namespace LLM {
3333
model: Provider.Model
3434
agent: Agent.Info
3535
system: string[]
36+
systemSplit?: number
3637
abort: AbortSignal
3738
messages: ModelMessage[]
3839
small?: boolean
@@ -64,20 +65,27 @@ export namespace LLM {
6465
])
6566
const isCodex = provider.id === "openai" && auth?.type === "oauth"
6667

67-
const system = []
68-
system.push(
69-
[
70-
// use agent prompt otherwise provider prompt
71-
// For Codex sessions, skip SystemPrompt.provider() since it's sent via options.instructions
72-
...(input.agent.prompt ? [input.agent.prompt] : isCodex ? [] : SystemPrompt.provider(input.model)),
73-
// any custom prompt passed into this call
74-
...input.system,
75-
// any custom prompt from last user message
76-
...(input.user.system ? [input.user.system] : []),
77-
]
78-
.filter((x) => x)
79-
.join("\n"),
80-
)
68+
// use agent prompt otherwise provider prompt
69+
// For Codex sessions, skip SystemPrompt.provider() since it's sent via options.instructions
70+
const prompt = input.agent.prompt ? [input.agent.prompt] : isCodex ? [] : SystemPrompt.provider(input.model)
71+
const split = input.systemSplit ?? input.system.length
72+
const system = [
73+
// block 1: provider/agent prompt + global instructions (stable across repos)
74+
[...prompt, ...input.system.slice(0, split)].filter(Boolean).join("\n"),
75+
// block 2: env + project instructions + any custom prompt from last user message (dynamic)
76+
[...input.system.slice(split), ...(input.user.system ? [input.user.system] : [])].filter(Boolean).join("\n"),
77+
].filter(Boolean)
78+
79+
// For non-Anthropic native API providers (OpenAI, OpenAI-compatible, llama-server, etc.),
80+
// join system blocks into a single message to avoid "system message must be at the beginning"
81+
// errors. Only Anthropic native API benefits from the 2-block split for cache marker placement.
82+
const isAnthropicNative = input.model.api.npm === "@ai-sdk/anthropic" ||
83+
input.model.api.npm === "@ai-sdk/google-vertex/anthropic"
84+
if (!isAnthropicNative && system.length > 1) {
85+
const joined = system.join("\n")
86+
system.length = 0
87+
system.push(joined)
88+
}
8189

8290
const header = system[0]
8391
await Plugin.trigger(

packages/opencode/src/session/prompt.ts

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -650,13 +650,10 @@ export namespace SessionPrompt {
650650

651651
await Plugin.trigger("experimental.chat.messages.transform", {}, { messages: msgs })
652652

653-
// Build system prompt, adding structured output instruction if needed
654-
const skills = await SystemPrompt.skills(agent)
655-
const system = [
656-
...(await SystemPrompt.environment(model)),
657-
...(skills ? [skills] : []),
658-
...(await InstructionPrompt.system()),
659-
]
653+
// Build system prompt: global instructions first (stable), then env + project (dynamic)
654+
const instructions = await InstructionPrompt.system()
655+
const system = [...instructions.global, ...(await SystemPrompt.environment(model)), ...instructions.project]
656+
const systemSplit = instructions.global.length
660657
const format = lastUser.format ?? { type: "text" }
661658
if (format.type === "json_schema") {
662659
system.push(STRUCTURED_OUTPUT_SYSTEM_PROMPT)
@@ -668,6 +665,7 @@ export namespace SessionPrompt {
668665
abort,
669666
sessionID,
670667
system,
668+
systemSplit,
671669
messages: [
672670
...MessageV2.toModelMessages(msgs, model),
673671
...(isLastStep

packages/opencode/src/session/system.ts

Lines changed: 7 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,7 @@ import PROMPT_GEMINI from "./prompt/gemini.txt"
1010
import PROMPT_CODEX from "./prompt/codex_header.txt"
1111
import PROMPT_TRINITY from "./prompt/trinity.txt"
1212
import type { Provider } from "@/provider/provider"
13-
import type { Agent } from "@/agent/agent"
14-
import { PermissionNext } from "@/permission/next"
15-
import { Skill } from "@/skill"
13+
import { Flag } from "@/flag/flag"
1614

1715
export namespace SystemPrompt {
1816
export function instructions() {
@@ -29,18 +27,22 @@ export namespace SystemPrompt {
2927
return [PROMPT_ANTHROPIC_WITHOUT_TODO]
3028
}
3129

30+
let cachedDate: Date | undefined
31+
3232
export async function environment(model: Provider.Model) {
3333
const project = Instance.project
34+
const date = Flag.OPENCODE_EXPERIMENTAL_CACHE_STABILIZATION
35+
? (cachedDate ??= new Date())
36+
: new Date()
3437
return [
3538
[
3639
`You are powered by the model named ${model.api.id}. The exact model ID is ${model.providerID}/${model.api.id}`,
3740
`Here is some useful information about the environment you are running in:`,
3841
`<env>`,
3942
` Working directory: ${Instance.directory}`,
40-
` Workspace root folder: ${Instance.worktree}`,
4143
` Is directory a git repo: ${project.vcs === "git" ? "yes" : "no"}`,
4244
` Platform: ${process.platform}`,
43-
` Today's date: ${new Date().toDateString()}`,
45+
` Today's date: ${date.toDateString()}`,
4446
`</env>`,
4547
`<directories>`,
4648
` ${
@@ -55,18 +57,4 @@ export namespace SystemPrompt {
5557
].join("\n"),
5658
]
5759
}
58-
59-
export async function skills(agent: Agent.Info) {
60-
if (PermissionNext.disabled(["skill"], agent.permission).has("skill")) return
61-
62-
const list = await Skill.available(agent)
63-
64-
return [
65-
"Skills provide specialized instructions and workflows for specific tasks.",
66-
"Use the skill tool to load a skill when a task matches its description.",
67-
// the agents seem to ingest the information about skills a bit better if we present a more verbose
68-
// version of them here and a less verbose version in tool description, rather than vice versa.
69-
Skill.fmt(list, { verbose: true }),
70-
].join("\n")
71-
}
7260
}

packages/opencode/src/skill/skill.ts

Lines changed: 0 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,6 @@ import { Bus } from "@/bus"
1313
import { Session } from "@/session"
1414
import { Discovery } from "./discovery"
1515
import { Glob } from "../util/glob"
16-
import { pathToFileURL } from "url"
17-
import type { Agent } from "@/agent/agent"
18-
import { PermissionNext } from "@/permission/next"
1916

2017
export namespace Skill {
2118
const log = Log.create({ service: "skill" })
@@ -189,30 +186,4 @@ export namespace Skill {
189186
export async function dirs() {
190187
return state().then((x) => x.dirs)
191188
}
192-
193-
export async function available(agent?: Agent.Info) {
194-
const list = await all()
195-
if (!agent) return list
196-
return list.filter((skill) => PermissionNext.evaluate("skill", skill.name, agent.permission).action !== "deny")
197-
}
198-
199-
export function fmt(list: Info[], opts: { verbose: boolean }) {
200-
if (list.length === 0) {
201-
return "No skills are currently available."
202-
}
203-
if (opts.verbose) {
204-
return [
205-
"<available_skills>",
206-
...list.flatMap((skill) => [
207-
` <skill>`,
208-
` <name>${skill.name}</name>`,
209-
` <description>${skill.description}</description>`,
210-
` <location>${pathToFileURL(skill.location).href}</location>`,
211-
` </skill>`,
212-
]),
213-
"</available_skills>",
214-
].join("\n")
215-
}
216-
return ["## Available Skills", ...list.flatMap((skill) => `- **${skill.name}**: ${skill.description}`)].join("\n")
217-
}
218189
}

packages/opencode/src/tool/bash.ts

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -57,16 +57,17 @@ export const BashTool = Tool.define("bash", async () => {
5757
log.info("bash tool using shell", { shell })
5858

5959
return {
60-
description: DESCRIPTION.replaceAll("${directory}", Instance.directory)
61-
.replaceAll("${maxLines}", String(Truncate.MAX_LINES))
62-
.replaceAll("${maxBytes}", String(Truncate.MAX_BYTES)),
60+
description: DESCRIPTION.replaceAll("${maxLines}", String(Truncate.MAX_LINES)).replaceAll(
61+
"${maxBytes}",
62+
String(Truncate.MAX_BYTES),
63+
),
6364
parameters: z.object({
6465
command: z.string().describe("The command to execute"),
6566
timeout: z.number().describe("Optional timeout in milliseconds").optional(),
6667
workdir: z
6768
.string()
6869
.describe(
69-
`The working directory to run the command in. Defaults to ${Instance.directory}. Use this instead of 'cd' commands.`,
70+
`The working directory to run the command in. Defaults to the current working directory. Use this instead of 'cd' commands.`,
7071
)
7172
.optional(),
7273
description: z

packages/opencode/src/tool/bash.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Executes a given bash command in a persistent shell session with optional timeout, ensuring proper handling and security measures.
22

3-
All commands run in ${directory} by default. Use the `workdir` parameter if you need to run a command in a different directory. AVOID using `cd <directory> && <command>` patterns - use `workdir` instead.
3+
All commands run in the current working directory by default. Use the `workdir` parameter if you need to run a command in a different directory. AVOID using `cd <directory> && <command>` patterns - use `workdir` instead.
44

55
IMPORTANT: This tool is for terminal operations like git, npm, docker, etc. DO NOT use it for file operations (reading, writing, editing, searching, finding files) - use the specialized tools for this instead.
66

0 commit comments

Comments
 (0)