Skip to content
20 changes: 20 additions & 0 deletions packages/opencode/src/cli/cmd/tui/routes/session/sidebar.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,17 @@ export function Sidebar(props: { sessionID: string; overlay?: boolean }) {
if (!last) return
const total =
last.tokens.input + last.tokens.output + last.tokens.reasoning + last.tokens.cache.read + last.tokens.cache.write
const totalInput = last.tokens.input + last.tokens.cache.read + last.tokens.cache.write
const model = sync.data.provider.find((x) => x.id === last.providerID)?.models[last.modelID]
return {
tokens: total.toLocaleString(),
percentage: model?.limit.context ? Math.round((total / model.limit.context) * 100) : null,
cacheHitPercent: totalInput > 0 ? ((last.tokens.cache.read / totalInput) * 100).toFixed(3) : null,
cacheRead: last.tokens.cache.read,
cacheWrite: last.tokens.cache.write,
cacheNew: last.tokens.input,
cacheInput: totalInput,
cacheOutput: last.tokens.output,
}
})

Expand Down Expand Up @@ -106,6 +113,19 @@ export function Sidebar(props: { sessionID: string; overlay?: boolean }) {
<text fg={theme.textMuted}>{context()?.percentage ?? 0}% used</text>
<text fg={theme.textMuted}>{cost()} spent</text>
</box>
<Show when={process.env["OPENCODE_CACHE_AUDIT"] && context()?.cacheHitPercent != null}>
<box>
<text fg={theme.text}>
<b>Cache Audit</b>
</text>
<text fg={theme.textMuted}>{context()!.cacheInput.toLocaleString()} input tokens</text>
<text fg={theme.textMuted}> {context()!.cacheNew.toLocaleString()} new</text>
<text fg={theme.textMuted}> {context()!.cacheRead.toLocaleString()} cache read</text>
<text fg={theme.textMuted}> {context()!.cacheWrite.toLocaleString()} cache write</text>
<text fg={theme.textMuted}>{context()!.cacheHitPercent}% hit rate</text>
<text fg={theme.textMuted}>{context()!.cacheOutput.toLocaleString()} output tokens</text>
</box>
</Show>
<Show when={mcpEntries().length > 0}>
<box>
<box
Expand Down
2 changes: 2 additions & 0 deletions packages/opencode/src/flag/flag.ts
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ export namespace Flag {
export const OPENCODE_EXPERIMENTAL_PLAN_MODE = OPENCODE_EXPERIMENTAL || truthy("OPENCODE_EXPERIMENTAL_PLAN_MODE")
export const OPENCODE_EXPERIMENTAL_WORKSPACES = OPENCODE_EXPERIMENTAL || truthy("OPENCODE_EXPERIMENTAL_WORKSPACES")
export const OPENCODE_EXPERIMENTAL_MARKDOWN = !falsy("OPENCODE_EXPERIMENTAL_MARKDOWN")
export const OPENCODE_EXPERIMENTAL_CACHE_STABILIZATION = truthy("OPENCODE_EXPERIMENTAL_CACHE_STABILIZATION")
export const OPENCODE_EXPERIMENTAL_CACHE_1H_TTL = truthy("OPENCODE_EXPERIMENTAL_CACHE_1H_TTL")
export const OPENCODE_MODELS_URL = process.env["OPENCODE_MODELS_URL"]
export const OPENCODE_MODELS_PATH = process.env["OPENCODE_MODELS_PATH"]
export const OPENCODE_DB = process.env["OPENCODE_DB"]
Expand Down
13 changes: 9 additions & 4 deletions packages/opencode/src/provider/transform.ts
Original file line number Diff line number Diff line change
Expand Up @@ -171,10 +171,12 @@ export namespace ProviderTransform {
return msgs
}

function applyCaching(msgs: ModelMessage[], model: Provider.Model): ModelMessage[] {
function applyCaching(msgs: ModelMessage[], model: Provider.Model, extendedTTL?: boolean): ModelMessage[] {
const system = msgs.filter((msg) => msg.role === "system").slice(0, 2)
const final = msgs.filter((msg) => msg.role !== "system").slice(-2)

// Use 1h cache TTL on first system block (2x write cost vs 1.25x for default 5-min)
const anthropicCache = extendedTTL ? { type: "ephemeral", ttl: "1h" } : { type: "ephemeral" }
const providerOptions = {
anthropic: {
cacheControl: { type: "ephemeral" },
Expand All @@ -194,18 +196,21 @@ export namespace ProviderTransform {
}

for (const msg of unique([...system, ...final])) {
const options = msg === system[0]
? { ...providerOptions, anthropic: { cacheControl: anthropicCache } }
: providerOptions
const useMessageLevelOptions = model.providerID === "anthropic" || model.providerID.includes("bedrock")
const shouldUseContentOptions = !useMessageLevelOptions && Array.isArray(msg.content) && msg.content.length > 0

if (shouldUseContentOptions) {
const lastContent = msg.content[msg.content.length - 1]
if (lastContent && typeof lastContent === "object") {
lastContent.providerOptions = mergeDeep(lastContent.providerOptions ?? {}, providerOptions)
lastContent.providerOptions = mergeDeep(lastContent.providerOptions ?? {}, options)
continue
}
}

msg.providerOptions = mergeDeep(msg.providerOptions ?? {}, providerOptions)
msg.providerOptions = mergeDeep(msg.providerOptions ?? {}, options)
}

return msgs
Expand Down Expand Up @@ -261,7 +266,7 @@ export namespace ProviderTransform {
model.api.npm === "@ai-sdk/anthropic") &&
model.api.npm !== "@ai-sdk/gateway"
) {
msgs = applyCaching(msgs, model)
msgs = applyCaching(msgs, model, (options.extendedTTL as boolean) ?? Flag.OPENCODE_EXPERIMENTAL_CACHE_1H_TTL)
}

// Remap providerOptions keys from stored providerID to expected SDK key
Expand Down
9 changes: 9 additions & 0 deletions packages/opencode/src/session/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -846,6 +846,15 @@ export namespace Session {
},
}

// OPENCODE_CACHE_AUDIT=1 enables per-call cache token accounting in the log
if (process.env["OPENCODE_CACHE_AUDIT"]) {
const totalInputTokens = tokens.input + tokens.cache.read + tokens.cache.write
const cacheHitPercent = totalInputTokens > 0 ? ((tokens.cache.read / totalInputTokens) * 100).toFixed(1) : "0.0"
log.info(
`[CACHE] ${input.model.id} input=${totalInputTokens} (cache_read=${tokens.cache.read} cache_write=${tokens.cache.write} new=${tokens.input}) hit=${cacheHitPercent}% output=${tokens.output} total=${tokens.total ?? 0}`,
)
}

const costInfo =
input.model.cost?.experimentalOver200K && tokens.input + tokens.cache.read > 200_000
? input.model.cost.experimentalOver200K
Expand Down
42 changes: 29 additions & 13 deletions packages/opencode/src/session/instruction.ts
Original file line number Diff line number Diff line change
Expand Up @@ -71,14 +71,15 @@ export namespace InstructionPrompt {

export async function systemPaths() {
const config = await Config.get()
const paths = new Set<string>()
const global = new Set<string>()
const project = new Set<string>()

if (!Flag.OPENCODE_DISABLE_PROJECT_CONFIG) {
for (const file of FILES) {
const matches = await Filesystem.findUp(file, Instance.directory, Instance.worktree)
if (matches.length > 0) {
matches.forEach((p) => {
paths.add(path.resolve(p))
project.add(path.resolve(p))
})
break
}
Expand All @@ -87,7 +88,7 @@ export namespace InstructionPrompt {

for (const file of globalFiles()) {
if (await Filesystem.exists(file)) {
paths.add(path.resolve(file))
global.add(path.resolve(file))
break
}
}
Expand All @@ -106,22 +107,29 @@ export namespace InstructionPrompt {
}).catch(() => [])
: await resolveRelative(instruction)
matches.forEach((p) => {
paths.add(path.resolve(p))
project.add(path.resolve(p))
})
}
}

return paths
return { global, project }
}

export async function system() {
const config = await Config.get()
export type SystemInstructions = { global: string[]; project: string[] }

let cached: SystemInstructions | undefined

export async function system(): Promise<SystemInstructions> {
if (Flag.OPENCODE_EXPERIMENTAL_CACHE_STABILIZATION && cached) return cached

const paths = await systemPaths()
const config = await Config.get()

const files = Array.from(paths).map(async (p) => {
const content = await Filesystem.readText(p).catch(() => "")
return content ? "Instructions from: " + p + "\n" + content : ""
})
const readPaths = (set: Set<string>) =>
Array.from(set).map(async (p) => {
const content = await Filesystem.readText(p).catch(() => "")
return content ? "Instructions from: " + p + "\n" + content : ""
})

const urls: string[] = []
if (config.instructions) {
Expand All @@ -138,7 +146,14 @@ export namespace InstructionPrompt {
.then((x) => (x ? "Instructions from: " + url + "\n" + x : "")),
)

return Promise.all([...files, ...fetches]).then((result) => result.filter(Boolean))
const [global, project] = await Promise.all([
Promise.all(readPaths(paths.global)).then((r) => r.filter(Boolean)),
Promise.all([...readPaths(paths.project), ...fetches]).then((r) => r.filter(Boolean)),
])

const result = { global, project }
if (Flag.OPENCODE_EXPERIMENTAL_CACHE_STABILIZATION) cached = result
return result
}

export function loaded(messages: MessageV2.WithParts[]) {
Expand Down Expand Up @@ -166,7 +181,8 @@ export namespace InstructionPrompt {
}

export async function resolve(messages: MessageV2.WithParts[], filepath: string, messageID: string) {
const system = await systemPaths()
const paths = await systemPaths()
const system = new Set([...paths.global, ...paths.project])
const already = loaded(messages)
const results: { filepath: string; content: string }[] = []

Expand Down
29 changes: 15 additions & 14 deletions packages/opencode/src/session/llm.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ export namespace LLM {
agent: Agent.Info
permission?: Permission.Ruleset
system: string[]
systemSplit?: number
abort: AbortSignal
messages: ModelMessage[]
small?: boolean
Expand Down Expand Up @@ -67,19 +68,19 @@ export namespace LLM {
// TODO: move this to a proper hook
const isOpenaiOauth = provider.id === "openai" && auth?.type === "oauth"

const system: string[] = []
system.push(
[
// use agent prompt otherwise provider prompt
...(input.agent.prompt ? [input.agent.prompt] : SystemPrompt.provider(input.model)),
// any custom prompt passed into this call
...input.system,
// any custom prompt from last user message
...(input.user.system ? [input.user.system] : []),
]
.filter((x) => x)
.join("\n"),
)
const prompt = input.agent.prompt ? [input.agent.prompt] : SystemPrompt.provider(input.model)
const split = input.systemSplit ?? input.system.length
const shouldSplit = provider.options?.["splitSystemPrompt"] !== false
const system = shouldSplit
? [
// block 1: provider/agent prompt + global instructions (stable across repos)
[...prompt, ...input.system.slice(0, split)].filter(Boolean).join("\n"),
// block 2: env + project instructions + any custom prompt from last user message (dynamic)
[...input.system.slice(split), ...(input.user.system ? [input.user.system] : [])].filter(Boolean).join("\n"),
].filter(Boolean)
: [
[...prompt, ...input.system, ...(input.user.system ? [input.user.system] : [])].filter(Boolean).join("\n"),
].filter(Boolean)

const header = system[0]
await Plugin.trigger(
Expand All @@ -88,7 +89,7 @@ export namespace LLM {
{ system },
)
// rejoin to maintain 2-part structure for caching if header unchanged
if (system.length > 2 && system[0] === header) {
if (shouldSplit && system.length > 2 && system[0] === header) {
const rest = system.slice(1)
system.length = 0
system.push(header, rest.join("\n"))
Expand Down
11 changes: 8 additions & 3 deletions packages/opencode/src/session/prompt.ts
Original file line number Diff line number Diff line change
Expand Up @@ -652,13 +652,17 @@ export namespace SessionPrompt {

await Plugin.trigger("experimental.chat.messages.transform", {}, { messages: msgs })

// Build system prompt, adding structured output instruction if needed
// Build system prompt: global instructions + global skills first (stable), then env + project (dynamic)
const instructions = await InstructionPrompt.system()
const skills = await SystemPrompt.skills(agent)
const system = [
...instructions.global,
...(skills.global ? [skills.global] : []),
...(await SystemPrompt.environment(model)),
...(skills ? [skills] : []),
...(await InstructionPrompt.system()),
...(skills.project ? [skills.project] : []),
...instructions.project,
]
const systemSplit = instructions.global.length + (skills.global ? 1 : 0)
const format = lastUser.format ?? { type: "text" }
if (format.type === "json_schema") {
system.push(STRUCTURED_OUTPUT_SYSTEM_PROMPT)
Expand All @@ -671,6 +675,7 @@ export namespace SessionPrompt {
abort,
sessionID,
system,
systemSplit,
messages: [
...MessageV2.toModelMessages(msgs, model),
...(isLastStep
Expand Down
34 changes: 27 additions & 7 deletions packages/opencode/src/session/system.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import type { Provider } from "@/provider/provider"
import type { Agent } from "@/agent/agent"
import { Permission } from "@/permission"
import { Skill } from "@/skill"
import { Flag } from "@/flag/flag"

export namespace SystemPrompt {
export function provider(model: Provider.Model) {
Expand All @@ -25,8 +26,13 @@ export namespace SystemPrompt {
return [PROMPT_DEFAULT]
}

let cachedDate: Date | undefined

export async function environment(model: Provider.Model) {
const project = Instance.project
const date = Flag.OPENCODE_EXPERIMENTAL_CACHE_STABILIZATION
? (cachedDate ??= new Date())
: new Date()
return [
[
`You are powered by the model named ${model.api.id}. The exact model ID is ${model.providerID}/${model.api.id}`,
Expand All @@ -36,7 +42,7 @@ export namespace SystemPrompt {
` Workspace root folder: ${Instance.worktree}`,
` Is directory a git repo: ${project.vcs === "git" ? "yes" : "no"}`,
` Platform: ${process.platform}`,
` Today's date: ${new Date().toDateString()}`,
` Today's date: ${date.toDateString()}`,
Copy link
Copy Markdown

@kamelkace kamelkace Mar 4, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it make sense to change the wording here, to hint to the LLM that this isn't a live updating value? Otherwise it might make some weird choices elsewhere for long lived conversations. E.g.

Suggested change
` Today's date: ${date.toDateString()}`,
` Session started at: ${date.toDateString()}`,

Copy link
Copy Markdown
Author

@bhagirathsinh-vaghela bhagirathsinh-vaghela Mar 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point — this is better to show when the date is frozen. I'm keeping Today's date in this PR for now since it's what all OpenCode users expect(at least by experience even if they are not aware), but I'm not against the change if maintainers agree.

Separately, I've been experimenting locally with a progressive disclosure approach — making the env block fully static, instructing the model to fetch cwd, date, platform, etc. via tool calls when needed. Eliminates the block 2 cache write entirely at the cost of an occasional extra round-trip.

Interesting finding in this approach: completely removing the env block tended to result in models not bothering to fetch the info at all and assume things which is non deterministic. A static block with explicit "figure out when needed" instructions worked much better, at least with Anthropic models.

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Separately, I've been experimenting locally with a progressive disclosure approach — making the env block fully static, instructing the model to fetch cwd, date, platform, etc. via tool calls when needed. [...] A static block with explicit "figure out when needed" instructions worked much better, at least with Anthropic models.

Hmm! I'll have to give that a shot when I patch from this PR later; I'm running locally against one of the Qwen3.5 models, so it'll be interesting data to see how they respond.

`</env>`,
`<directories>`,
` ${
Expand All @@ -52,17 +58,31 @@ export namespace SystemPrompt {
]
}

export async function skills(agent: Agent.Info) {
if (Permission.disabled(["skill"], agent.permission).has("skill")) return
export async function skills(agent: Agent.Info): Promise<{ global?: string; project?: string }> {
if (Permission.disabled(["skill"], agent.permission).has("skill")) return {}

const list = await Skill.available(agent)
const globalSkills = list.filter((s) => s.scope === "global")
const projectSkills = list.filter((s) => s.scope === "project")

return [
// the agents seem to ingest the information about skills a bit better if we present a more verbose
// version of them here and a less verbose version in tool description, rather than vice versa.
const preamble = [
"Skills provide specialized instructions and workflows for specific tasks.",
"Use the skill tool to load a skill when a task matches its description.",
// the agents seem to ingest the information about skills a bit better if we present a more verbose
// version of them here and a less verbose version in tool description, rather than vice versa.
Skill.fmt(list, { verbose: true }),
].join("\n")

const global = globalSkills.length > 0
? [preamble, Skill.fmt(globalSkills, { verbose: true })].join("\n")
: undefined

const project = projectSkills.length > 0
? [
...(globalSkills.length === 0 ? [preamble] : []),
Skill.fmt(projectSkills, { verbose: true }),
].join("\n")
: undefined

return { global, project }
}
}
Loading
Loading