From 4311254cd8a93f55ee5240b5277425403393c211 Mon Sep 17 00:00:00 2001 From: Danh Doan Date: Tue, 26 May 2026 17:10:26 +0700 Subject: [PATCH 01/16] =?UTF-8?q?feat:=20[ENG-2687]=20language=20selection?= =?UTF-8?q?=20foundation=20=E2=80=94=20BrvConfig.language=20+=20language-c?= =?UTF-8?q?lause=20module?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Scaffolding for the language-preservation feature (issue #616). Adds `BrvConfig.language` and a shared clause module; no surface is wired yet. Commits 02–05 do the actual injection (tool-mode prompts, tokenizer, validation, CLI). - `BrvConfig.language?: { mode: 'auto' | 'fixed'; code? }` round-trips through toJson/fromJson and every with* method. `mode: 'fixed'` without `code` is rejected at load so silent fallback to English is structurally impossible. - `language-clause.ts` exports `buildLanguageClause(language?)` and an inline `LANGUAGE_NAMES` ISO-639-1 → English map (~24 entries, no iso-639-1 dependency). Auto / fixed-known / fixed-unknown all return a clause that carves out tag names, attribute names, enum values, and `path` from translation so Zod validation can't fire on a localized schema key. Tests: 13 new BrvConfig tests + 14 new language-clause tests. All existing tests pass; clause module has zero src/ consumers (commit 02). --- src/server/core/domain/entities/brv-config.ts | 39 +++++++ .../core/domain/render/language-clause.ts | 88 +++++++++++++++ .../core/domain/entities/brv-config.test.ts | 98 +++++++++++++++++ .../domain/render/language-clause.test.ts | 102 ++++++++++++++++++ 4 files changed, 327 insertions(+) create mode 100644 src/server/core/domain/render/language-clause.ts create mode 100644 test/unit/server/core/domain/render/language-clause.test.ts diff --git a/src/server/core/domain/entities/brv-config.ts b/src/server/core/domain/entities/brv-config.ts index 866780f55..091885228 100644 --- a/src/server/core/domain/entities/brv-config.ts +++ b/src/server/core/domain/entities/brv-config.ts @@ -2,6 +2,20 @@ import {BRV_CONFIG_VERSION} from '../../../constants.js' import {Agent, AGENT_VALUES} from './agent.js' import {Space} from './space.js' +/** + * Per-project language preference. Drives the language-preservation clause + * surfaced through the curate kickoff / correction prompts. + * + * - `auto`: instruct the LLM to match the user's input language. + * - `fixed`: instruct the LLM to write in a specific language (ISO 639-1). + * `code` is required when `mode === 'fixed'`; the loader rejects fixed + * without code so silent fallback to English is structurally impossible. + */ +export type BrvConfigLanguage = { + code?: string + mode: 'auto' | 'fixed' +} + /** * Parameters for creating a BrvConfig instance. * chatLogPath, cwd, ide, and cloud fields (spaceId, spaceName, teamId, teamName) @@ -15,6 +29,7 @@ export type BrvConfigParams = { createdAt: string cwd?: string ide?: Agent + language?: BrvConfigLanguage reviewDisabled?: boolean spaceId?: string spaceName?: string @@ -60,6 +75,23 @@ const isCodingAgent = (value: unknown): value is Agent => { return false } +/** + * Validate the shape of a `language` field on a config JSON object. + * Accepts `undefined` (the field is optional). For present values: + * - must be a non-null object with `mode: 'auto' | 'fixed'` + * - `code` is required when `mode === 'fixed'` (silent fallback to + * English would otherwise be possible at prompt time) + */ +const isOptionalLanguageJson = (value: unknown): boolean => { + if (value === undefined) return true + if (typeof value !== 'object' || value === null) return false + const lang = value as Record + if (lang.mode !== 'auto' && lang.mode !== 'fixed') return false + if (lang.code !== undefined && typeof lang.code !== 'string') return false + if (lang.mode === 'fixed' && typeof lang.code !== 'string') return false + return true +} + /** * Type guard for BrvConfigFromJson - validates JSON structure at runtime. * Note: version is optional in this check (old configs may not have it). @@ -90,6 +122,7 @@ const isBrvConfigJson = (json: unknown): json is BrvConfigFromJson => { if (obj.cipherAgentModes !== undefined && !Array.isArray(obj.cipherAgentModes)) return false if (obj.version !== undefined && typeof obj.version !== 'string') return false if (obj.reviewDisabled !== undefined && typeof obj.reviewDisabled !== 'boolean') return false + if (!isOptionalLanguageJson(obj.language)) return false return true } @@ -106,6 +139,7 @@ export class BrvConfig { public readonly createdAt: string public readonly cwd?: string public readonly ide?: Agent + public readonly language?: BrvConfigLanguage public readonly reviewDisabled?: boolean public readonly spaceId?: string public readonly spaceName?: string @@ -125,6 +159,7 @@ export class BrvConfig { this.createdAt = params.createdAt this.cwd = params.cwd this.ide = params.ide + this.language = params.language this.reviewDisabled = params.reviewDisabled this.spaceId = params.spaceId this.spaceName = params.spaceName @@ -218,6 +253,7 @@ export class BrvConfig { createdAt: this.createdAt, cwd: this.cwd, ide: this.ide, + language: this.language, reviewDisabled: this.reviewDisabled, spaceId: this.spaceId, spaceName: this.spaceName, @@ -252,6 +288,7 @@ export class BrvConfig { createdAt: this.createdAt, cwd: this.cwd, ide: this.ide, + language: this.language, reviewDisabled, spaceId: this.spaceId, spaceName: this.spaceName, @@ -273,6 +310,7 @@ export class BrvConfig { createdAt: new Date().toISOString(), cwd: this.cwd, ide: this.ide, + language: this.language, reviewDisabled: this.reviewDisabled, spaceId: space.id, spaceName: space.name, @@ -294,6 +332,7 @@ export class BrvConfig { createdAt: this.createdAt, cwd: this.cwd, ide: this.ide, + language: this.language, reviewDisabled: this.reviewDisabled, spaceId: this.spaceId, spaceName: this.spaceName, diff --git a/src/server/core/domain/render/language-clause.ts b/src/server/core/domain/render/language-clause.ts new file mode 100644 index 000000000..9c5964ef9 --- /dev/null +++ b/src/server/core/domain/render/language-clause.ts @@ -0,0 +1,88 @@ +/** + * Language-preservation clause for curate prompts. + * + * Single source of truth for the clause text. Every downstream injection + * surface — `buildGeneratePrompt`, `buildCorrectionPrompt`, and the MCP + * `brv-curate` tool description — imports `buildLanguageClause` and + * emits the same string. A wording revision is a one-file change. + * + * Schema-key invariant: the clause must mention that tag names, attribute + * names, attribute enum values, and `path` stay English. The element- + * registry Zod schemas enforce this structurally at the writer boundary — + * the clause mentions it so the calling agent's LLM doesn't burn a + * correction round-trip authoring `` or `path="безопасность/..."` + * that would fail validation downstream. + */ + +import type {BrvConfigLanguage} from '../entities/brv-config.js' + +/** + * ISO-639-1 code → English language name. Inline (~24 entries) rather than + * pulling the `iso-639-1` package — runtime dependency surface stays + * minimal. Codes not in this map degrade gracefully via the raw-code + * fallback in `buildLanguageClause`. + */ +export const LANGUAGE_NAMES: Record = { + ar: 'Arabic', + de: 'German', + el: 'Greek', + en: 'English', + es: 'Spanish', + fi: 'Finnish', + fr: 'French', + he: 'Hebrew', + hi: 'Hindi', + id: 'Indonesian', + it: 'Italian', + ja: 'Japanese', + ko: 'Korean', + nl: 'Dutch', + no: 'Norwegian', + pl: 'Polish', + pt: 'Portuguese', + ru: 'Russian', + sv: 'Swedish', + th: 'Thai', + tr: 'Turkish', + uk: 'Ukrainian', + vi: 'Vietnamese', + zh: 'Chinese', +} + +const AUTO_CLAUSE = + "Match the user's input language for human-readable content: body text of `` elements, list items, and the `title` / `summary` attributes on ``. Keep tag names, attribute names, enum values, and the `path` attribute in English for tooling consistency. Code snippets and identifiers stay verbatim." + +function buildFixedClause(languageName: string): string { + return `Write all human-readable content (body text of \`\` elements, list items, \`title\` / \`summary\` attrs) in ${languageName}. Keep tag names, attribute names, enum values, and \`path\` in English. Code snippets and identifiers stay verbatim.` +} + +/** + * Return the language-preservation clause text for a config's language + * preference. + * + * - `undefined` or `{mode: 'auto'}` → the auto clause: "match the user's + * input language". + * - `{mode: 'fixed', code}` where `code` is in `LANGUAGE_NAMES` → the + * fixed clause referencing the mapped English name (e.g. "Russian"). + * - `{mode: 'fixed', code}` where `code` is unknown → the fixed clause + * with the raw code in double quotes (e.g. `in "xx"`). Degrades + * gracefully so a future ISO code we haven't mapped yet still produces + * a usable clause. + * + * `{mode: 'fixed'}` without `code` is rejected by `isBrvConfigJson` at + * load time and cannot reach here under normal operation; the function + * still defends against it by returning the auto clause rather than + * throwing — a malformed config should degrade, not crash a write path. + */ +export function buildLanguageClause(language?: BrvConfigLanguage): string { + if (language === undefined || language.mode === 'auto') { + return AUTO_CLAUSE + } + + if (language.code === undefined) { + return AUTO_CLAUSE + } + + const name = LANGUAGE_NAMES[language.code] ?? `"${language.code}"` + return buildFixedClause(name) +} diff --git a/test/unit/core/domain/entities/brv-config.test.ts b/test/unit/core/domain/entities/brv-config.test.ts index 404603e62..32cde94f6 100644 --- a/test/unit/core/domain/entities/brv-config.test.ts +++ b/test/unit/core/domain/entities/brv-config.test.ts @@ -304,4 +304,102 @@ describe('BrvConfig', () => { expect(config.createdAt).to.be.a('string') }) }) + + describe('language', () => { + const fixedRu: BrvConfigParams['language'] = {code: 'ru', mode: 'fixed'} + + it('defaults to undefined when not set', () => { + const config = new BrvConfig(validConstructorArgs) + expect(config.language).to.be.undefined + }) + + it('preserves auto-mode through the constructor', () => { + const config = new BrvConfig({...validConstructorArgs, language: {mode: 'auto'}}) + expect(config.language).to.deep.equal({mode: 'auto'}) + }) + + it('preserves fixed-mode with code through the constructor', () => { + const config = new BrvConfig({...validConstructorArgs, language: fixedRu}) + expect(config.language).to.deep.equal(fixedRu) + }) + + it('round-trips auto-mode through toJson/fromJson', () => { + const config = new BrvConfig({...validConstructorArgs, language: {mode: 'auto'}}) + const restored = BrvConfig.fromJson(config.toJson()) + expect(restored.language).to.deep.equal({mode: 'auto'}) + }) + + it('round-trips fixed-mode with code through toJson/fromJson', () => { + const config = new BrvConfig({...validConstructorArgs, language: fixedRu}) + const restored = BrvConfig.fromJson(config.toJson()) + expect(restored.language).to.deep.equal(fixedRu) + }) + + it('round-trips undefined language through toJson/fromJson', () => { + // Existing configs (no `language` field) must load cleanly post-rollout. + const config = new BrvConfig(validConstructorArgs) + const restored = BrvConfig.fromJson(config.toJson()) + expect(restored.language).to.be.undefined + }) + + it('rejects mode: fixed without code in fromJson', () => { + // `mode: 'fixed'` without `code` would silently fall back to English + // at prompt time. The loader rejects it so the failure mode is + // structurally impossible. + expect(() => + BrvConfig.fromJson({...validConstructorArgs, language: {mode: 'fixed'}}), + ).to.throw('Invalid BrvConfig JSON structure') + }) + + it('rejects unknown mode value in fromJson', () => { + expect(() => + BrvConfig.fromJson({...validConstructorArgs, language: {mode: 'always-english'}}), + ).to.throw('Invalid BrvConfig JSON structure') + }) + + it('rejects non-string code in fromJson', () => { + expect(() => + BrvConfig.fromJson({...validConstructorArgs, language: {code: 123, mode: 'fixed'}}), + ).to.throw('Invalid BrvConfig JSON structure') + }) + + it('rejects non-object language in fromJson', () => { + expect(() => + BrvConfig.fromJson({...validConstructorArgs, language: 'ru'}), + ).to.throw('Invalid BrvConfig JSON structure') + }) + + it('rejects null language in fromJson', () => { + expect(() => + BrvConfig.fromJson({...validConstructorArgs, language: null}), + ).to.throw('Invalid BrvConfig JSON structure') + }) + + it('preserves language through withSpace', () => { + const original = new BrvConfig({...validConstructorArgs, language: fixedRu}) + const space = new Space({ + id: 'space-789', + isDefault: false, + name: 'my-space', + teamId: 'team-abc', + teamName: 'my-team', + }) + expect(original.withSpace(space).language).to.deep.equal(fixedRu) + }) + + it('preserves language through withoutSpace', () => { + const original = new BrvConfig({...validConstructorArgs, language: fixedRu}) + expect(original.withoutSpace().language).to.deep.equal(fixedRu) + }) + + it('preserves language through withReviewDisabled', () => { + const original = new BrvConfig({...validConstructorArgs, language: fixedRu}) + expect(original.withReviewDisabled(true).language).to.deep.equal(fixedRu) + }) + + it('preserves language through withVersion', () => { + const original = new BrvConfig({...validConstructorArgs, language: fixedRu}) + expect(original.withVersion('9.9.9').language).to.deep.equal(fixedRu) + }) + }) }) diff --git a/test/unit/server/core/domain/render/language-clause.test.ts b/test/unit/server/core/domain/render/language-clause.test.ts new file mode 100644 index 000000000..c6d2c9543 --- /dev/null +++ b/test/unit/server/core/domain/render/language-clause.test.ts @@ -0,0 +1,102 @@ +/** + * Tests for buildLanguageClause and the LANGUAGE_NAMES map. + * + * The clause is load-bearing for the language-selection feature — every + * downstream injection surface (kickoff prompt, correction prompt, MCP + * tool description) emits this exact string. The "schema-key invariant" + * test below is the contract the clause must hold so that LLM authoring + * doesn't drift into translating tag names / enum values, which would + * fail Zod validation at the writer boundary. + */ + +import {expect} from 'chai' + +import {buildLanguageClause, LANGUAGE_NAMES} from '../../../../../../src/server/core/domain/render/language-clause.js' + +describe('language-clause', () => { + describe('LANGUAGE_NAMES', () => { + it('includes Russian (the #616 reporter language)', () => { + expect(LANGUAGE_NAMES.ru).to.equal('Russian') + }) + + it('includes the four scripts covered by the validation matrix', () => { + expect(LANGUAGE_NAMES.vi).to.equal('Vietnamese') + expect(LANGUAGE_NAMES.zh).to.equal('Chinese') + expect(LANGUAGE_NAMES.ja).to.equal('Japanese') + }) + + it('includes English so the CLI accepts the restoration code', () => { + // Release notes recommend `language: { mode: 'fixed', code: 'en' }` + // as the opt-out path for users who want forced English. The CLI + // (commit 05) rejects codes not in this map, so `en` must be here. + expect(LANGUAGE_NAMES.en).to.equal('English') + }) + }) + + describe('buildLanguageClause', () => { + it('returns the auto clause when language is undefined', () => { + const clause = buildLanguageClause() + expect(clause).to.include("Match the user's input language") + }) + + it('returns the auto clause when mode is auto', () => { + const clause = buildLanguageClause({mode: 'auto'}) + expect(clause).to.include("Match the user's input language") + }) + + it('returns the fixed clause with mapped English name for a known code', () => { + const clause = buildLanguageClause({code: 'ru', mode: 'fixed'}) + expect(clause).to.include('in Russian') + }) + + it('returns the fixed clause for Chinese (CJK)', () => { + const clause = buildLanguageClause({code: 'zh', mode: 'fixed'}) + expect(clause).to.include('in Chinese') + }) + + it('returns the fixed clause for Vietnamese (Latin-non-English)', () => { + const clause = buildLanguageClause({code: 'vi', mode: 'fixed'}) + expect(clause).to.include('in Vietnamese') + }) + + it('falls back to the raw code in quotes for an unknown ISO code', () => { + // Forward-compat: a future code we haven't mapped yet still + // produces a usable clause. Degrades to `in "xx"` rather than + // failing the entire prompt build. + const clause = buildLanguageClause({code: 'xx', mode: 'fixed'}) + expect(clause).to.include('in "xx"') + }) + + it('degrades to auto when fixed-mode arrives without a code', () => { + // `isBrvConfigJson` rejects this shape at load time; the function + // defends against the case anyway so a malformed config degrades + // rather than crashing a write path. + const clause = buildLanguageClause({mode: 'fixed'}) + expect(clause).to.include("Match the user's input language") + }) + + it('every clause variant mentions the schema-key invariant', () => { + // Load-bearing — if the clause is loose enough that this assertion + // fails, the LLM may translate tag names like `` to a + // localized form, which fails Zod validation downstream. + const auto = buildLanguageClause() + const fixedKnown = buildLanguageClause({code: 'ru', mode: 'fixed'}) + const fixedUnknown = buildLanguageClause({code: 'xx', mode: 'fixed'}) + + for (const clause of [auto, fixedKnown, fixedUnknown]) { + expect(clause).to.include('tag names') + expect(clause).to.include('attribute names') + expect(clause).to.include('enum values') + expect(clause).to.include('`path`') + } + }) + + it('all clauses preserve code snippets verbatim', () => { + const auto = buildLanguageClause() + const fixed = buildLanguageClause({code: 'ru', mode: 'fixed'}) + + expect(auto).to.include('Code snippets and identifiers stay verbatim') + expect(fixed).to.include('Code snippets and identifiers stay verbatim') + }) + }) +}) From 31c7f195c0d341e434a23d564abe6eaacaf84d3a Mon Sep 17 00:00:00 2001 From: Danh Doan Date: Tue, 26 May 2026 17:38:32 +0700 Subject: [PATCH 02/16] =?UTF-8?q?feat:=20[ENG-2688]=20language=20selection?= =?UTF-8?q?=20tool-mode=20injection=20=E2=80=94=20kickoff/correction=20pro?= =?UTF-8?q?mpts=20+=20MCP=20tool=20description?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wires the language-preservation clause from ENG-2687 into the three tool-mode prompt surfaces a calling agent's LLM sees during curate. After this commit a tool-mode user under `language: { mode: 'fixed', code: 'ru' }` gets `` body text authored in Russian; schema (tag names, attribute names, enum values, `path`) stays English because the clause carves it out. Closes #616 for Cyrillic / Vietnamese / European Latin users. CJK users need commit 03 (tokenizer fix) before queries find matches. Injection points: - `buildGeneratePrompt(options)` and `buildCorrectionPrompt(options)` — add `language?: BrvConfigLanguage` to options; emit a `# Language` section. In the kickoff: between Path format and Element vocabulary (part of the byterover-controlled framing that commits BEFORE the user-intent block). In correction: between Output contract and Errors-to-fix (reasserts the contract on every retry). - `brv-curate-tool.ts:TOOL_DESCRIPTION` — append the auto clause unconditionally. The MCP tool description is built once at server-boot and cannot read live config; per-call fixed-mode is honored via the oclif kickoff prompt (which IS dynamic). Threading: - `kickoffSession` / `continueSession` in curate-session.ts now accept `language?` and pass it to the builders. - The oclif `brv curate` command loads `BrvConfig.language` via `ProjectConfigStore.read()` (with a try/catch that degrades a corrupt config to `undefined` → auto, never blocking curate) and threads it into both kickoff and continuation. - `language` is re-read on every continuation so a mid-session `brv config set language.code ` is honored on the next retry. Tests (10 new): - 4 cases for buildGeneratePrompt (section ordering, auto-default, auto-mode, fixed-RU-emits-Russian-and-not-auto). - 3 cases for buildCorrectionPrompt (section ordering, auto-default, fixed-RU). - 2 integration cases on kickoffSession (auto-default thread, fixed-RU thread) confirming end-to-end orchestrator → builder. - 1 case on TOOL_DESCRIPTION self-containment asserting the auto clause + schema-key carve-out are present. 175/175 tests green across the affected surfaces. Typecheck + lint clean; the pre-existing `max-params` warning on `registerBrvCurateTool` is unchanged (signature was already 5 params). --- src/oclif/commands/curate/index.ts | 28 ++++++- src/oclif/lib/curate-session.ts | 21 ++++- .../domain/render/curate-prompt-builder.ts | 15 +++- src/server/infra/mcp/tools/brv-curate-tool.ts | 11 +++ .../infra/mcp/tools/brv-curate-tool.test.ts | 21 +++++ test/unit/oclif/lib/curate-session.test.ts | 19 +++++ .../render/curate-prompt-builder.test.ts | 82 ++++++++++++++++++- 7 files changed, 187 insertions(+), 10 deletions(-) diff --git a/src/oclif/commands/curate/index.ts b/src/oclif/commands/curate/index.ts index e9bf820bc..8a7715b27 100644 --- a/src/oclif/commands/curate/index.ts +++ b/src/oclif/commands/curate/index.ts @@ -1,5 +1,8 @@ import {Args, Command, Flags} from '@oclif/core' +import type {BrvConfigLanguage} from '../../../server/core/domain/entities/brv-config.js' + +import {ProjectConfigStore} from '../../../server/infra/config/file-config-store.js' import {continueSession, kickoffSession, resolveProjectRoot} from '../../lib/curate-session.js' import {type DaemonClientOptions, formatConnectionError, withDaemonRetry} from '../../lib/daemon-client.js' import {writeJsonResponse} from '../../lib/json-response.js' @@ -202,13 +205,16 @@ Bad examples: // to emit the generate prompt. const {response} = flags const confirmOverwrite = flags.overwrite ?? false + const projectRoot = resolveProjectRoot() + const language = await this.resolveLanguagePreference(projectRoot) try { await withDaemonRetry(async (client) => { const envelope = await continueSession({ client, confirmOverwrite, format, - projectRoot: resolveProjectRoot(), + language, + projectRoot, response, sessionId, }) @@ -249,7 +255,25 @@ Bad examples: return } - const envelope = await kickoffSession({content, projectRoot: resolveProjectRoot()}) + const projectRoot = resolveProjectRoot() + const language = await this.resolveLanguagePreference(projectRoot) + const envelope = await kickoffSession({content, language, projectRoot}) this.emitToolModeEnvelope(envelope, format) } + + /** + * Read the per-project language preference from `.brv/config.json`. + * Missing config (fresh project) or missing field returns `undefined`, + * which the kickoff / correction prompts treat as the auto clause — + * match the user's input language. Read failures degrade silently to + * `undefined` so a corrupt config never blocks curate. + */ + private async resolveLanguagePreference(projectRoot: string): Promise { + try { + const config = await new ProjectConfigStore().read(projectRoot) + return config?.language + } catch { + return undefined + } + } } diff --git a/src/oclif/lib/curate-session.ts b/src/oclif/lib/curate-session.ts index 099de29f6..c47da8694 100644 --- a/src/oclif/lib/curate-session.ts +++ b/src/oclif/lib/curate-session.ts @@ -6,6 +6,7 @@ import {mkdir, readFile, rm, writeFile} from 'node:fs/promises' import {dirname, join} from 'node:path' import {z} from 'zod' +import type {BrvConfigLanguage} from '../../server/core/domain/entities/brv-config.js' import type {CurateHtmlDirectResult} from '../../server/core/interfaces/executor/i-curate-executor.js' import type {HtmlWriteError} from '../../server/infra/render/writer/html-writer.js' import type {CurateMeta} from '../../shared/curate-meta.js' @@ -174,6 +175,13 @@ type CurateSessionState = { type KickoffOptions = { content: string + /** + * Per-project language preference loaded from `.brv/config.json`. Threaded + * into the kickoff prompt so the calling agent's LLM authors body text in + * the configured language. `undefined` (no config or no language field) + * defaults to the auto clause — match the user's input language. + */ + language?: BrvConfigLanguage projectRoot: string } @@ -195,6 +203,12 @@ type ContinueOptions = { * mode). Defaults to 'json' — matches the agent-facing default. */ format?: 'json' | 'text' + /** + * Per-project language preference loaded from `.brv/config.json`. Threaded + * into the correction prompt — read fresh on each continuation, so a + * mid-session config change (rare) is honored on the next retry. + */ + language?: BrvConfigLanguage projectRoot: string response: string sessionId: string @@ -207,7 +221,7 @@ type ContinueOptions = { * to author HTML". */ export async function kickoffSession(options: KickoffOptions): Promise { - const {content, projectRoot} = options + const {content, language, projectRoot} = options const sessionId = randomUUID() const state: CurateSessionState = { @@ -222,7 +236,7 @@ export async function kickoffSession(options: KickoffOptions): Promise { - const {client, confirmOverwrite = false, format = 'json', projectRoot, response, sessionId} = options + const {client, confirmOverwrite = false, format = 'json', language, projectRoot, response, sessionId} = options // Reject non-uuid session ids before any path join — see SESSION_ID_RE // for the threat model. Same `kind` as "session not found" because @@ -401,6 +415,7 @@ export async function continueSession(options: ContinueOptions): Promise` HTML document for a knowledge base.', '', @@ -64,6 +66,10 @@ export function buildGeneratePrompt(options: {userIntent: string}): string { '', PATH_FORMAT, '', + '# Language', + '', + buildLanguageClause(options.language), + '', '# Element vocabulary (closed)', '', CURATE_SCHEMA_PROMPT, @@ -88,10 +94,11 @@ export function buildGeneratePrompt(options: {userIntent: string}): string { */ export function buildCorrectionPrompt(options: { errors: readonly HtmlWriteError[] + language?: BrvConfigLanguage previousHtml: string userIntent: string }): string { - const {errors, previousHtml, userIntent} = options + const {errors, language, previousHtml, userIntent} = options const fixInstructions = errors.length === 0 ? 'No structured errors were reported. Re-emit the document carefully and double-check every required attribute.' @@ -130,6 +137,10 @@ export function buildCorrectionPrompt(options: { '', OUTPUT_CONTRACT, '', + '# Language', + '', + buildLanguageClause(language), + '', '# Errors to fix', '', fixInstructions, diff --git a/src/server/infra/mcp/tools/brv-curate-tool.ts b/src/server/infra/mcp/tools/brv-curate-tool.ts index f230ebd63..cc0e78411 100644 --- a/src/server/infra/mcp/tools/brv-curate-tool.ts +++ b/src/server/infra/mcp/tools/brv-curate-tool.ts @@ -12,6 +12,7 @@ import type {HtmlWriteError} from '../../render/writer/html-writer.js' import {CurateMetaSchema} from '../../../../shared/curate-meta.js' import {encodeCurateHtmlContent} from '../../../../shared/transport/curate-html-content.js' import {CURATE_SCHEMA_PROMPT} from '../../../core/domain/render/curate-prompt-builder.js' +import {buildLanguageClause} from '../../../core/domain/render/language-clause.js' import {TransportTaskEventNames} from '../../../core/domain/transport/schemas.js' import {appendDriftFooter} from './drift-footer.js' import {associateProjectWithRetry, type McpStartupProjectContext, resolveMcpTaskContext} from './mcp-project-context.js' @@ -48,6 +49,16 @@ const TOOL_DESCRIPTION = [ '- Inside `
  • `, write plain text only — no leading `-`, `*`, `•`, `1.`/`2.` markers; the renderer adds them via CSS.', '- `` body: emit directly with HTML entities for `<`, `>`, `&`. Do NOT wrap in `` — HTML5 parses CDATA as a bogus comment that the first `-->` closes. Example: `graph LR; A -->|x| B`.', '', + // Auto clause unconditional: the MCP tool description is built once at + // server-boot, so it cannot read live config. Per-call fixed-mode is + // honored via the oclif `brv curate` kickoff prompt (which IS dynamic). + // MCP-only consumers under `language: { mode: 'fixed' }` see the auto + // clause here; their input language still gets preserved because auto + // says "match the input language". + '# Language', + '', + buildLanguageClause(), + '', '# Path format', '- The `path` attribute on is `/` or `//`, snake_case segments.', '- Pick descriptive domain names (1-3 words). Reuse existing domains where they fit; avoid generic names like `misc`, `general`.', diff --git a/test/unit/infra/mcp/tools/brv-curate-tool.test.ts b/test/unit/infra/mcp/tools/brv-curate-tool.test.ts index f66bc6f2b..7fd41803d 100644 --- a/test/unit/infra/mcp/tools/brv-curate-tool.test.ts +++ b/test/unit/infra/mcp/tools/brv-curate-tool.test.ts @@ -298,6 +298,27 @@ describe('brv-curate-tool', () => { // dropping it would silently regress the Skill ↔ MCP output parity. expect(description).to.include('Place section titles INSIDE the container') }) + + it('embeds the auto-mode language-preservation clause', () => { + // MCP's TOOL_DESCRIPTION is built once at server-boot and cannot + // read live config, so it carries the auto clause unconditionally: + // "match the user's input language". Per-call fixed-mode is honored + // via the oclif `brv curate` kickoff prompt instead. + // The schema-key carve-out (tag names / attribute names / enum values + // stay English) prevents the calling agent's LLM from translating + // `` for non-English input — which would fail Zod + // validation at the writer boundary. + const {getDescription} = setupHandler({ + getClient: () => createMockClient().client, + getWorkingDirectory: () => '/project/root', + }) + + const description = getDescription() + expect(description).to.include('# Language') + expect(description).to.include("Match the user's input language") + expect(description).to.include('tag names') + expect(description).to.include('enum values') + }) }) describe('dispatch — task type + payload', () => { diff --git a/test/unit/oclif/lib/curate-session.test.ts b/test/unit/oclif/lib/curate-session.test.ts index b0739df46..03c0f39f1 100644 --- a/test/unit/oclif/lib/curate-session.test.ts +++ b/test/unit/oclif/lib/curate-session.test.ts @@ -219,6 +219,25 @@ describe('curate-session', () => { const b = await kickoffSession({content: 'b', projectRoot}) expect(a.sessionId).to.not.equal(b.sessionId) }) + + it('threads `language` into the kickoff prompt (auto when omitted)', async () => { + // End-to-end threading proof: orchestrator → buildGeneratePrompt → + // buildLanguageClause. Auto when no language preference is set. + const env = await kickoffSession({content: 'x', projectRoot}) + expect(env.prompt).to.include("Match the user's input language") + }) + + it('threads `language` into the kickoff prompt (fixed-mode emits the mapped name)', async () => { + // A regression dropping the param in the orchestrator would surface + // here as the auto clause leaking into a fixed-mode kickoff. + const env = await kickoffSession({ + content: 'x', + language: {code: 'ru', mode: 'fixed'}, + projectRoot, + }) + expect(env.prompt).to.include('in Russian') + expect(env.prompt).to.not.include("Match the user's input language") + }) }) // ─── continueSession — dispatch to daemon ──────────────────────────────────── diff --git a/test/unit/server/core/domain/render/curate-prompt-builder.test.ts b/test/unit/server/core/domain/render/curate-prompt-builder.test.ts index ff6216698..d51cdba6f 100644 --- a/test/unit/server/core/domain/render/curate-prompt-builder.test.ts +++ b/test/unit/server/core/domain/render/curate-prompt-builder.test.ts @@ -249,12 +249,49 @@ describe('curate-prompt-builder', () => { // Schema slice is ~2-3 KB; the surrounding prose adds ~1.5 KB // for explicit contract rules covering `
  • ` bullet prefixes, // `` CDATA, and `related` file-vs-folder routing; - // the user intent is bounded by the caller. Each rule prevents - // a distinct FE-breaking output class. Bumping the budget should - // be a deliberate decision, not a silent drift. + // the language clause adds ~340 chars; the user intent is bounded + // by the caller. Each rule prevents a distinct FE-breaking output + // class. Bumping the budget should be a deliberate decision, not a + // silent drift. const prompt = buildGeneratePrompt({userIntent: 'remember we use RS256'}) expect(prompt.length).to.be.lessThan(6144) }) + + it('emits a `# Language` section between path format and element vocabulary', () => { + // Section ordering matters: the language clause is part of the + // byterover-controlled framing that must commit BEFORE the + // element vocabulary (so the LLM authors `` body text in + // the configured language) and BEFORE the user-intent block (so + // a malicious intent can't shadow it). + const prompt = buildGeneratePrompt({userIntent: 'x'}) + const pathIdx = prompt.indexOf('# Path format') + const languageIdx = prompt.indexOf('# Language') + const schemaIdx = prompt.indexOf('# Element vocabulary') + + expect(languageIdx, 'language section present').to.be.greaterThan(-1) + expect(languageIdx, 'language section after path format').to.be.greaterThan(pathIdx) + expect(languageIdx, 'language section before element vocabulary').to.be.lessThan(schemaIdx) + }) + + it('emits the auto-mode clause when language is not provided', () => { + const prompt = buildGeneratePrompt({userIntent: 'x'}) + expect(prompt).to.include("Match the user's input language") + }) + + it('emits the auto-mode clause when language.mode is auto', () => { + const prompt = buildGeneratePrompt({language: {mode: 'auto'}, userIntent: 'x'}) + expect(prompt).to.include("Match the user's input language") + }) + + it('emits the fixed-mode clause with the mapped language name', () => { + // Threading proof — confirms `language` from options reaches + // buildLanguageClause. A regression dropping the param (e.g. a + // future destructuring miss in the orchestrator) would surface + // here as the auto clause leaking into fixed-mode prompts. + const prompt = buildGeneratePrompt({language: {code: 'ru', mode: 'fixed'}, userIntent: 'x'}) + expect(prompt).to.include('in Russian') + expect(prompt).to.not.include("Match the user's input language") + }) }) describe('buildCorrectionPrompt', () => { @@ -425,5 +462,44 @@ describe('curate-prompt-builder', () => { }) expect(prompt).to.not.include(CURATE_SCHEMA_PROMPT) }) + + it('emits a `# Language` section between output contract and errors', () => { + // Correction prompts can't drop the language clause — if the + // first attempt failed validation, the LLM may also have drifted + // off language. The clause reasserts the contract on every + // retry. + const prompt = buildCorrectionPrompt({ + errors: [{kind: 'missing-path-attribute', message: 'm'}], + previousHtml, + userIntent, + }) + const contractIdx = prompt.indexOf('# Output contract') + const languageIdx = prompt.indexOf('# Language') + const errorsIdx = prompt.indexOf('# Errors to fix') + + expect(languageIdx, 'language section present').to.be.greaterThan(-1) + expect(languageIdx, 'language after output contract').to.be.greaterThan(contractIdx) + expect(languageIdx, 'language before errors block').to.be.lessThan(errorsIdx) + }) + + it('emits the auto-mode clause when language is not provided', () => { + const prompt = buildCorrectionPrompt({ + errors: [{kind: 'missing-path-attribute', message: 'm'}], + previousHtml, + userIntent, + }) + expect(prompt).to.include("Match the user's input language") + }) + + it('emits the fixed-mode clause with the mapped language name', () => { + const prompt = buildCorrectionPrompt({ + errors: [{kind: 'missing-path-attribute', message: 'm'}], + language: {code: 'ru', mode: 'fixed'}, + previousHtml, + userIntent, + }) + expect(prompt).to.include('in Russian') + expect(prompt).to.not.include("Match the user's input language") + }) }) }) From 91e76be9608b7df43e34cfd5cb1000029b1a6c15 Mon Sep 17 00:00:00 2001 From: Danh Doan Date: Tue, 26 May 2026 18:06:41 +0700 Subject: [PATCH 03/16] =?UTF-8?q?feat:=20[ENG-2689]=20CJK-aware=20BM25=20t?= =?UTF-8?q?okenizer=20=E2=80=94=20unblock=20Chinese=20/=20Japanese=20/=20K?= =?UTF-8?q?orean=20queries?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes a confirmed CJK-search bug in `search-knowledge-service.ts`'s BM25 index. MiniSearch 7.2.0's default tokenizer splits on `\p{Z}\p{P}` (whitespace + punctuation). CJK scripts have no whitespace, so `'认证系统使用JWT令牌'` tokenizes as a single token and a query for `'认证'` returns zero matches against indexed content. Empirical confirmation pre-fix: const ms = new MiniSearch({fields: ['t'], idField: 'id'}) ms.addAll([{id: 1, t: '认证系统使用JWT令牌'}]) ms.search('认证') // → [] — broken ms.search('Привет мир') // → matches as expected Without this fix, language preservation from ENG-2687 / ENG-2688 is invisible to CJK users: their curate output is correctly authored in Chinese / Japanese / Korean but their queries return zero matches. Approach: - New `cjk-tokenizer.ts` module. Algorithm: 1. Split on Unicode whitespace + punctuation (same as MiniSearch default). 2. For each token, split at CJK ↔ non-CJK script boundaries. 3. Non-CJK segments emit as-is (Latin / Cyrillic / Vietnamese / European behave byte-identical to the default). 4. CJK segments emit overlapping bigrams; single-char fallback to unigram. - CJK ranges: U+4E00–9FFF (Unified Ideographs), U+3040–309F (Hiragana), U+30A0–30FF (Katakana), U+AC00–D7AF (Hangul Syllables). - Bigrams are the standard CJK IR compromise — unigrams are too noisy (common chars like `的` dominate scoring), trigrams too sparse. - Wired via the top-level `tokenize` option on `MINISEARCH_OPTIONS`. Per the MiniSearch source (line 1564-1566), that single option applies at both index and query time when `searchOptions.tokenize` is unset. - No new runtime deps — `Intl.Segmenter` is available in Node 16+ but its CJK quality varies by ICU version; inline bigrams are deterministic across Node versions / platforms. INDEX_SCHEMA_VERSION bumped 6 → 7 so cached indexes built with the default tokenizer invalidate and rebuild on first daemon start. Tests (18 new): - 4 cases for non-CJK scripts (English, Russian, Vietnamese, punctuation) asserting byte-identical behavior to the MiniSearch default. - 5 cases for CJK scripts (Chinese 4-char bigrams, Chinese 2-char single bigram, Japanese kanji+kana, Korean Hangul, single-char unigram fallback). - 3 cases for mixed Latin+CJK tokens (whitespace-separated, no-whitespace split, multiple alternating runs). - 6 MiniSearch integration cases — the actual production wiring contract: Chinese / Japanese / Korean queries return matches; Russian regression; English regression; English query does NOT match CJK content (cross-script isolation). 251/251 across affected surfaces (18 new tokenizer + 58 existing search-knowledge + 175 prior PRs from ENG-2687/2688). Typecheck + lint clean; 9 pre-existing warnings on unrelated functions unchanged. --- .../tools/implementations/cjk-tokenizer.ts | 159 +++++++++++++++ .../search-knowledge-service.ts | 9 +- .../implementations/cjk-tokenizer.test.ts | 192 ++++++++++++++++++ 3 files changed, 359 insertions(+), 1 deletion(-) create mode 100644 src/agent/infra/tools/implementations/cjk-tokenizer.ts create mode 100644 test/unit/agent/infra/tools/implementations/cjk-tokenizer.test.ts diff --git a/src/agent/infra/tools/implementations/cjk-tokenizer.ts b/src/agent/infra/tools/implementations/cjk-tokenizer.ts new file mode 100644 index 000000000..8ecd77b73 --- /dev/null +++ b/src/agent/infra/tools/implementations/cjk-tokenizer.ts @@ -0,0 +1,159 @@ +/** + * BM25 tokenizer with CJK bigram segmentation. + * + * MiniSearch 7.2.0's default tokenizer splits on `\p{Z}\p{P}` (Unicode + * whitespace + punctuation). Latin / Cyrillic / Vietnamese / European + * scripts use whitespace between words and tokenize correctly. CJK scripts + * do not — a sentence like `认证系统使用JWT令牌` becomes a single token, + * so a query for `认证` against indexed CJK content returns zero matches. + * + * Empirical confirmation before this fix (MiniSearch 7.2.0): + * + * const ms = new MiniSearch({fields: ['t'], idField: 'id'}) + * ms.addAll([{id: 1, t: '认证系统使用JWT令牌'}]) + * ms.search('认证') // → [] — broken + * ms.search('Привет мир') // → matches as expected + * + * This tokenizer preserves the default behavior for whitespace-separated + * scripts and adds overlapping-bigram segmentation for CJK runs. Mixed + * Latin+CJK tokens (e.g. `JWT令牌`) split at the script boundary so the + * Latin portion stays a real word token. + * + * Wired via the top-level `tokenize` option on MiniSearch — per the + * library docs and source (`MiniSearch.js:1564-1566`), that single option + * applies at both index and query time unless `searchOptions.tokenize` + * is set, which we leave unset. + */ + +/** + * Unicode ranges treated as CJK for the purposes of bigram segmentation. + * Anything outside these ranges is "non-CJK" and tokenizes by whitespace + * boundaries only. + * + * - `0x4E00–0x9FFF`: CJK Unified Ideographs (Chinese, Japanese kanji) + * - `0x3040–0x309F`: Hiragana + * - `0x30A0–0x30FF`: Katakana + * - `0xAC00–0xD7AF`: Hangul Syllables (Korean) + * + * CJK Extension A/B/C/… are deliberately excluded — they appear in academic + * / historical text but rarely in user content. If a user's corpus needs + * them, extend this list and bump `INDEX_SCHEMA_VERSION` in + * `search-knowledge-service.ts` so cached indexes invalidate. + */ +const CJK_RANGES: ReadonlyArray = [ + [0x4E_00, 0x9F_FF], + [0x30_40, 0x30_9F], + [0x30_A0, 0x30_FF], + [0xAC_00, 0xD7_AF], +] + +function isCjkCodePoint(cp: number): boolean { + for (const [lo, hi] of CJK_RANGES) { + if (cp >= lo && cp <= hi) return true + } + + return false +} + +/** + * Whitespace + punctuation split, matching MiniSearch's default + * `SPACE_OR_PUNCTUATION` regex. Kept verbatim so a future upstream tweak + * is easy to spot via diff. + */ +const SPACE_OR_PUNCTUATION = /[\p{Z}\p{P}]+/u + +/** + * Split a token at boundaries between CJK and non-CJK runs. + * + * - `'JWT令牌'` → `['JWT', '令牌']` (script boundary at index 3) + * - `'认证'` → `['认证']` (single CJK run) + * - `'JWT'` → `['JWT']` (single non-CJK run) + */ +function splitAtCjkBoundary(token: string): string[] { + const segments: string[] = [] + let current = '' + let currentIsCjk: boolean | undefined + + // Iterate by code point so any future range extension into the + // supplementary plane handles surrogate pairs correctly. The current + // four ranges are all BMP, so `for...of` is equivalent to char-by-char + // here — but cheap to be correct. + for (const ch of token) { + const cp = ch.codePointAt(0) + if (cp === undefined) continue + const charIsCjk = isCjkCodePoint(cp) + + if (currentIsCjk === undefined) { + current = ch + currentIsCjk = charIsCjk + } else if (charIsCjk === currentIsCjk) { + current += ch + } else { + segments.push(current) + current = ch + currentIsCjk = charIsCjk + } + } + + if (current.length > 0) segments.push(current) + + return segments +} + +/** + * Emit overlapping bigrams for a CJK run. + * + * - `'认证系统'` (4 chars) → `['认证', '证系', '系统']` + * - `'认证'` (2 chars) → `['认证']` + * - `'认'` (1 char) → `['认']` (unigram fallback so single-char tokens are searchable) + * + * Bigrams are the standard CJK IR compromise: unigrams are too noisy + * (common chars like `的` dominate scoring), trigrams are too sparse + * (miss 2-character compound matches). + */ +function cjkBigrams(run: string): string[] { + const chars = [...run] + if (chars.length <= 1) return chars + + const grams: string[] = [] + for (let i = 0; i < chars.length - 1; i++) { + grams.push(chars[i] + chars[i + 1]) + } + + return grams +} + +/** + * Tokenize text for BM25 indexing and querying. + * + * Algorithm: + * 1. Split on Unicode whitespace + punctuation (matches MiniSearch default). + * 2. For each resulting token, split at CJK ↔ non-CJK script boundaries. + * 3. For non-CJK segments, emit the segment as-is. + * 4. For CJK segments, emit overlapping bigrams. + * + * The result is the union — Latin / Cyrillic / Vietnamese behave exactly + * as the MiniSearch default, while CJK runs become searchable. + */ +export function tokenizeWithCjk(text: string): string[] { + const out: string[] = [] + + for (const wsToken of text.split(SPACE_OR_PUNCTUATION)) { + if (wsToken.length === 0) continue + + for (const segment of splitAtCjkBoundary(wsToken)) { + if (segment.length === 0) continue + + // `splitAtCjkBoundary` returns single-script segments, so the + // first code point's classification applies to the whole segment. + const firstCp = segment.codePointAt(0) + if (firstCp !== undefined && isCjkCodePoint(firstCp)) { + out.push(...cjkBigrams(segment)) + } else { + out.push(segment) + } + } + } + + return out +} diff --git a/src/agent/infra/tools/implementations/search-knowledge-service.ts b/src/agent/infra/tools/implementations/search-knowledge-service.ts index 735718966..66c04f946 100644 --- a/src/agent/infra/tools/implementations/search-knowledge-service.ts +++ b/src/agent/infra/tools/implementations/search-knowledge-service.ts @@ -36,6 +36,7 @@ import { import {getFormatForRead} from '../../../../server/infra/render/format/format-detector.js' import {ElementAxisIndex} from '../../../../server/infra/render/reader/element-axis-index.js' import {readHtmlTopicSync} from '../../../../server/infra/render/reader/html-reader.js' +import {tokenizeWithCjk} from './cjk-tokenizer.js' import {isPathLikeQuery, matchMemoryPath, parseSymbolicQuery} from './memory-path-matcher.js' import { buildReferenceIndex, @@ -53,7 +54,7 @@ const MAX_CONTEXT_TREE_FILES = 10_000 const DEFAULT_CACHE_TTL_MS = 5000 /** Bump when MINISEARCH_OPTIONS fields/boost change to invalidate cached indexes */ -const INDEX_SCHEMA_VERSION = 6 +const INDEX_SCHEMA_VERSION = 7 /** Only include results whose normalized score is at least this fraction of the top result's score */ const SCORE_GAP_RATIO = 0.7 @@ -171,6 +172,12 @@ const MINISEARCH_OPTIONS = { prefix: true, }, storeFields: ['title', 'path'] as string[], + // Custom tokenizer adds CJK bigram segmentation alongside the default + // whitespace split. Without it, queries against Chinese / Japanese / + // Korean content return zero matches even when the content is curated + // correctly — see `cjk-tokenizer.ts`. Top-level `tokenize` applies to + // both indexing and querying per MiniSearch's API. + tokenize: tokenizeWithCjk, } interface IndexedDocument { diff --git a/test/unit/agent/infra/tools/implementations/cjk-tokenizer.test.ts b/test/unit/agent/infra/tools/implementations/cjk-tokenizer.test.ts new file mode 100644 index 000000000..fe3005920 --- /dev/null +++ b/test/unit/agent/infra/tools/implementations/cjk-tokenizer.test.ts @@ -0,0 +1,192 @@ +/** + * Tests for `tokenizeWithCjk` — the BM25 tokenizer that fixes MiniSearch's + * CJK blind spot. + * + * Whitespace-separated scripts (Latin, Cyrillic, Vietnamese, …) must + * tokenize byte-identical to the MiniSearch default; CJK runs must emit + * overlapping bigrams; mixed Latin+CJK tokens must split at the script + * boundary so the Latin portion stays a real word token. The integration + * block at the end exercises the wired-up MiniSearch contract — the CJK + * gate — and confirms English scoring is preserved. + */ + +import {expect} from 'chai' +import MiniSearch from 'minisearch' + +import {tokenizeWithCjk} from '../../../../../../src/agent/infra/tools/implementations/cjk-tokenizer.js' + +function buildMiniSearchIndex(docs: Array<{id: number; t: string}>): MiniSearch { + const ms = new MiniSearch({ + fields: ['t'], + idField: 'id', + tokenize: tokenizeWithCjk, + }) + ms.addAll(docs) + return ms +} + +describe('cjk-tokenizer', () => { + describe('tokenizeWithCjk — non-CJK scripts behave like the MiniSearch default', () => { + it('English: splits on whitespace, preserves word tokens verbatim', () => { + expect(tokenizeWithCjk('Hello world JWT auth')).to.deep.equal([ + 'Hello', 'world', 'JWT', 'auth', + ]) + }) + + it('Russian (Cyrillic): preserves whitespace tokenization, no CJK side effects', () => { + expect(tokenizeWithCjk('Привет мир программирования')).to.deep.equal([ + 'Привет', 'мир', 'программирования', + ]) + }) + + it('Vietnamese (Latin-non-English): diacritics survive intact', () => { + // The proof point that LLM-in-call detection beats a Unicode-block + // heuristic — Vietnamese is Latin script and tokenizes via whitespace + // just like English. Diacritics are part of the word, not separators. + expect(tokenizeWithCjk('Cách triển khai xác thực')).to.deep.equal([ + 'Cách', 'triển', 'khai', 'xác', 'thực', + ]) + }) + + it('punctuation acts as a separator (matches MiniSearch default)', () => { + // Default MiniSearch splits on `\p{Z}\p{P}+`; commas, periods, parens + // all become token boundaries. + expect(tokenizeWithCjk('one, two; three.')).to.deep.equal(['one', 'two', 'three']) + }) + }) + + describe('tokenizeWithCjk — CJK scripts emit overlapping bigrams', () => { + it('Chinese: 4-character run → 3 overlapping bigrams', () => { + expect(tokenizeWithCjk('认证系统')).to.deep.equal(['认证', '证系', '系统']) + }) + + it('Chinese: 2-character run → single bigram (the whole token)', () => { + expect(tokenizeWithCjk('认证')).to.deep.equal(['认证']) + }) + + it('Japanese: kanji + katakana both segmented as CJK', () => { + // `認証システム` contains both kanji (`認証`) and katakana + // (`システム`). The tokenizer treats them as a single CJK run since + // both ranges are CJK-classified, producing overlapping bigrams + // across the whole string. + const tokens = tokenizeWithCjk('認証システム') + expect(tokens).to.deep.include('認証') + expect(tokens).to.deep.include('証シ') + expect(tokens).to.deep.include('シス') + expect(tokens).to.deep.include('ステ') + expect(tokens).to.deep.include('テム') + }) + + it('Korean (Hangul Syllables): segmented into bigrams', () => { + // Whitespace-separated Korean tokens still bigram within each token. + // `'인증 시스템'` → `'인증'` (single bigram == whole token) plus + // bigrams of `'시스템'` (`'시스'`, `'스템'`). + const tokens = tokenizeWithCjk('인증 시스템') + expect(tokens).to.deep.include('인증') + expect(tokens).to.deep.include('시스') + expect(tokens).to.deep.include('스템') + }) + + it('single-character CJK input falls back to unigram', () => { + // Edge case for BM25 — a lone character has no bigram, but should + // still be searchable as itself. The unigram fallback prevents the + // tokenizer from emitting an empty array (which MiniSearch would + // interpret as "this document has no content for this field"). + expect(tokenizeWithCjk('认')).to.deep.equal(['认']) + }) + }) + + describe('tokenizeWithCjk — mixed Latin + CJK tokens split at the script boundary', () => { + it('whitespace-separated Latin and CJK tokens stay independent', () => { + // `'JWT 令牌'` is already two whitespace-separated tokens. Latin + // stays Latin, the 2-char CJK run emits one bigram (the whole thing). + expect(tokenizeWithCjk('JWT 令牌')).to.deep.equal(['JWT', '令牌']) + }) + + it('no-whitespace mixed token splits at the script boundary', () => { + // `'JWT令牌'` has no whitespace — but the script boundary between + // 'T' (Latin) and '令' (CJK) is still a token boundary. Otherwise + // the Latin portion would get lost in a CJK bigram smear. + expect(tokenizeWithCjk('JWT令牌')).to.deep.equal(['JWT', '令牌']) + }) + + it('multiple boundaries in one token: alternating Latin/CJK runs', () => { + // `'API请求JSON响应'` → Latin/CJK/Latin/CJK boundaries. + // Each non-CJK run stays as one token; each CJK run emits bigrams. + expect(tokenizeWithCjk('API请求JSON响应')).to.deep.equal([ + 'API', + '请求', + 'JSON', + '响应', + ]) + }) + }) + + describe('MiniSearch integration — the CJK gate', () => { + // The unit tests above lock the tokenizer's input/output contract. + // These integration tests prove the contract holds when the tokenizer + // is wired into a real MiniSearch instance — what + // `search-knowledge-service.ts:MINISEARCH_OPTIONS` does in production. + + it('Chinese query matches Chinese content (was broken before this fix)', () => { + // The motivating test. Pre-fix: empirical run returned [] because + // `'认证系统使用JWT令牌'` tokenized as a single token under the + // MiniSearch default. With the bigram tokenizer, the query `'认证'` + // tokenizes to ['认证'] and finds doc 1's `'认证'` bigram. + const ms = buildMiniSearchIndex([ + {id: 1, t: '认证系统使用JWT令牌'}, + {id: 2, t: 'JWT auth tokens'}, + ]) + const results = ms.search('认证') + expect(results.length, 'Chinese query returns at least one match').to.be.greaterThan(0) + expect(results[0].id).to.equal(1) + }) + + it('Japanese query matches Japanese content', () => { + const ms = buildMiniSearchIndex([{id: 1, t: '認証システムはJWTトークンを使用'}]) + const results = ms.search('認証') + expect(results.length).to.be.greaterThan(0) + }) + + it('Korean query matches Korean content', () => { + const ms = buildMiniSearchIndex([{id: 1, t: '인증 시스템은 JWT 토큰을 사용합니다'}]) + const results = ms.search('인증') + expect(results.length).to.be.greaterThan(0) + }) + + it('Russian query matches Russian content (regression, was working pre-fix)', () => { + // Cyrillic is whitespace-separated → the default tokenizer already + // handled it. Locking the regression so a future tokenizer rewrite + // doesn't accidentally break a script that used to work. + const ms = buildMiniSearchIndex([{id: 1, t: 'Привет мир программирования'}]) + const results = ms.search('программирования') + expect(results.length).to.be.greaterThan(0) + }) + + it('English query against English content returns the expected match', () => { + // Sanity check: the Latin path is byte-identical to the default + // MiniSearch behavior, so the existing BM25 ranking story is + // preserved end-to-end. + const ms = buildMiniSearchIndex([ + {id: 1, t: 'JWT authentication tokens'}, + {id: 2, t: 'session cookies and CSRF'}, + ]) + const results = ms.search('JWT') + expect(results.length).to.equal(1) + expect(results[0].id).to.equal(1) + }) + + it('English query does NOT match unrelated CJK content', () => { + // Cross-script isolation: a CJK doc shouldn't drag into English + // queries (and vice versa). The bigram tokenization is opaque to + // Latin queries; no false positives leak across scripts. + const ms = buildMiniSearchIndex([ + {id: 1, t: '认证系统'}, + {id: 2, t: 'JWT authentication'}, + ]) + const englishResults = ms.search('JWT') + expect(englishResults.length).to.equal(1) + expect(englishResults[0].id).to.equal(2) + }) + }) +}) From dbe72c0092d546750191b229bb5e2c91805c2100 Mon Sep 17 00:00:00 2001 From: Danh Doan Date: Tue, 26 May 2026 20:21:05 +0700 Subject: [PATCH 04/16] =?UTF-8?q?test:=20[ENG-2690]=20language=20selection?= =?UTF-8?q?=20validation=20=E2=80=94=20end-to-end=20roundtrip=20integratio?= =?UTF-8?q?n?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Validates the language-selection feature (ENG-2687 → ENG-2689) end-to-end across the four target non-English scripts (Russian / Vietnamese / Chinese / Japanese) plus the auto-mode default. Adds `test/integration/scenarios/language-roundtrip.test.ts` (8 cases) — walks the full pipeline a real user hits: `.brv/config.json` on disk → `ProjectConfigStore.read()` → `BrvConfig.language` → `kickoffSession()` → the kickoff prompt envelope the calling agent's LLM consumes. Proves no layer in the threading regresses for any target language. Coverage: - Auto mode (default, no language field) → auto clause present - Auto mode (explicit `mode: auto`) → auto clause present - Fixed-RU → "in Russian" in the prompt; auto clause absent - Fixed-VI → "in Vietnamese" - Fixed-ZH → "in Chinese" - Fixed-JA → "in Japanese" - Schema rejection at load: fixed mode without code throws via fromJson - Forward-compat: unmapped code (`xx`) → "in \"xx\"" fallback Out of scope and documented in the validation report (research repo, features/language-selection/validation/04-validation.md): LLM-honoring of the clause requires a real consumer (Claude Code, Cursor) and is verified manually pre-release. The auto-test harness in local-auto-test also exercises curate + query roundtrip across all four scripts via real `brv` CLI invocations — 13/13 cases green; covered in the report. Test counts after this commit: 326 total green across the feature (54 BrvConfig + 175 prompt/tool + 18 tokenizer + 58 search-knowledge regression + 8 new language-roundtrip + 13 auto-test harness). --- .../scenarios/language-roundtrip.test.ts | 147 ++++++++++++++++++ 1 file changed, 147 insertions(+) create mode 100644 test/integration/scenarios/language-roundtrip.test.ts diff --git a/test/integration/scenarios/language-roundtrip.test.ts b/test/integration/scenarios/language-roundtrip.test.ts new file mode 100644 index 000000000..26ef91741 --- /dev/null +++ b/test/integration/scenarios/language-roundtrip.test.ts @@ -0,0 +1,147 @@ +/** + * Full-pipeline integration test for the language-selection feature. + * + * Walks `.brv/config.json` on disk → `ProjectConfigStore.read()` → + * `BrvConfig.language` → `kickoffSession()` → the kickoff prompt envelope + * the calling agent's LLM consumes. Proves the threading hasn't broken + * at any layer for the four target non-English scripts (Russian / + * Vietnamese / Chinese / Japanese) plus the default auto-mode. + * + * Unit tests cover each layer in isolation: + * - `language-clause.test.ts` — clause text emission + * - `brv-config.test.ts` — schema round-trip + * - `curate-prompt-builder.test.ts` — clause appears in the prompt + * - `curate-session.test.ts` — orchestrator threading + * + * This file proves the layers compose end-to-end against a real config + * file on disk — the scenario a real user hits when they run + * `brv config set language.code ` (commit 05) and then `brv curate`. + * + * Out of scope: actual LLM-honoring of the clause. That requires a real + * calling agent (Claude Code, Cursor) and is validated manually pre-release. + * The on-the-wire prompt content is what we can test deterministically here. + */ + +import {expect} from 'chai' +import {existsSync, mkdirSync, mkdtempSync, rmSync, writeFileSync} from 'node:fs' +import {tmpdir} from 'node:os' +import {join} from 'node:path' + +import {kickoffSession} from '../../../src/oclif/lib/curate-session.js' +import {BRV_CONFIG_VERSION, BRV_DIR, PROJECT_CONFIG_FILE} from '../../../src/server/constants.js' +import {ProjectConfigStore} from '../../../src/server/infra/config/file-config-store.js' + +describe('language-roundtrip — config file → BrvConfig → kickoff prompt', () => { + let projectRoot: string + + beforeEach(() => { + projectRoot = mkdtempSync(join(tmpdir(), 'lang-roundtrip-')) + mkdirSync(join(projectRoot, BRV_DIR), {recursive: true}) + }) + + afterEach(() => { + if (existsSync(projectRoot)) rmSync(projectRoot, {force: true, recursive: true}) + }) + + function writeProjectConfig(language?: {code?: string; mode: 'auto' | 'fixed'}): void { + const config = { + createdAt: '2026-05-26T00:00:00.000Z', + cwd: projectRoot, + ...(language !== undefined && {language}), + version: BRV_CONFIG_VERSION, + } + writeFileSync(join(projectRoot, BRV_DIR, PROJECT_CONFIG_FILE), JSON.stringify(config, undefined, 2), 'utf8') + } + + async function kickoffWithProjectConfig(): ReturnType { + const config = await new ProjectConfigStore().read(projectRoot) + return kickoffSession({content: 'remember X', language: config?.language, projectRoot}) + } + + describe('auto mode (default)', () => { + it('default config without a language field emits the auto clause', async () => { + writeProjectConfig() + const envelope = await kickoffWithProjectConfig() + // Match the user's input language — the auto wording from language-clause.ts. + // This is the modal path: every existing `.brv/config.json` predates the + // feature, so the default must be a no-op for English users and a graceful + // pass-through for non-English users. + expect(envelope.prompt).to.include("Match the user's input language") + }) + + it('explicit `mode: auto` config emits the auto clause', async () => { + writeProjectConfig({mode: 'auto'}) + const envelope = await kickoffWithProjectConfig() + expect(envelope.prompt).to.include("Match the user's input language") + }) + }) + + describe('fixed mode — clause names the user-configured language', () => { + it('Russian (Cyrillic) — `code: ru` emits "in Russian"', async () => { + // The #616 reporter is a Russian user. This is the load-bearing path + // for closing the issue end-to-end: a real config on disk, read via + // the real loader, threaded through the real orchestrator, lands in + // the prompt the calling agent's LLM sees. + writeProjectConfig({code: 'ru', mode: 'fixed'}) + const envelope = await kickoffWithProjectConfig() + expect(envelope.prompt).to.include('in Russian') + expect(envelope.prompt).to.not.include("Match the user's input language") + }) + + it('Vietnamese (Latin-non-English) — `code: vi` emits "in Vietnamese"', async () => { + // The proof point for LLM-in-call detection beating a Unicode-block + // heuristic. Vietnamese is Latin script with diacritics, indistinguishable + // from English by code-range alone. + writeProjectConfig({code: 'vi', mode: 'fixed'}) + const envelope = await kickoffWithProjectConfig() + expect(envelope.prompt).to.include('in Vietnamese') + }) + + it('Chinese (CJK kanji) — `code: zh` emits "in Chinese"', async () => { + // CJK kanji — ENG-2689's tokenizer fix makes the search side searchable + // for content authored under this clause. This test is the curate-side + // equivalent: the calling agent's prompt explicitly names Chinese. + writeProjectConfig({code: 'zh', mode: 'fixed'}) + const envelope = await kickoffWithProjectConfig() + expect(envelope.prompt).to.include('in Chinese') + }) + + it('Japanese (CJK kanji + kana) — `code: ja` emits "in Japanese"', async () => { + // Second CJK script. Hiragana / Katakana / Kanji all share the same + // bigram tokenization rules from ENG-2689 and the same clause naming here. + writeProjectConfig({code: 'ja', mode: 'fixed'}) + const envelope = await kickoffWithProjectConfig() + expect(envelope.prompt).to.include('in Japanese') + }) + }) + + describe('schema rejection at load time', () => { + it('fixed mode without code is rejected by fromJson — the load throws', async () => { + // `mode: 'fixed'` without `code` would silently fall back to English at + // prompt time. `isBrvConfigJson` rejects it at load so the failure mode + // is structurally impossible. Confirm the loader still throws end-to-end + // (not just at the unit-test level). + writeProjectConfig({mode: 'fixed'}) + let threwAtLoadTime = false + try { + await new ProjectConfigStore().read(projectRoot) + } catch { + threwAtLoadTime = true + } + + expect(threwAtLoadTime, 'ProjectConfigStore.read rejects fixed-without-code').to.equal(true) + }) + }) + + describe('unknown ISO code degrades gracefully', () => { + it('unmapped code (`xx`) emits the fixed clause with the raw code in quotes', async () => { + // Forward-compat path. A future ISO code we haven't mapped yet must + // still produce a usable clause (`in "xx"`) rather than blowing up. + // This is the runtime-side counterpart to the loader's strict-validation + // contract. + writeProjectConfig({code: 'xx', mode: 'fixed'}) + const envelope = await kickoffWithProjectConfig() + expect(envelope.prompt).to.include('in "xx"') + }) + }) +}) From 0163bbcafb464e8220d7e5aca8f860b2aa59c03c Mon Sep 17 00:00:00 2001 From: Danh Doan Date: Tue, 26 May 2026 20:48:27 +0700 Subject: [PATCH 05/16] feat: [ENG-2691] brv config set/get + language-selection release notes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Final commit of the language-selection initiative (#616). Ships the user-facing CLI for language preference and the release notes that close the issue end-to-end. New `brv config set ` and `brv config get ` commands — generic project-config infrastructure (not a one-off `brv language` command). Today's keys: `language.mode` and `language.code`; future project-config keys plug into the `SETTERS` / `GETTERS` dispatch with no new oclif surface area. CLI behavior: - `brv config set language.mode auto | fixed` — reject `fixed` when no `language.code` is set, with a redirect message pointing at `brv config set language.code `. Prevents writing a config that `isBrvConfigJson` would refuse on next load. - `brv config set language.code ` — validates against the `LANGUAGE_NAMES` map (24 entries, no `iso-639-1` dependency). Unknown codes rejected with a sorted supported-list error. - `brv config get language.` — symmetric reader; returns "(not set)" for absent fields, or the value (in text or JSON mode). Other: - New `BrvConfig.withLanguage(language?)` method following the existing `with*` pattern. Used by the set command's setters; previously a future caller would have to spread BrvConfig fields by hand. - Pure dispatcher functions (`applyConfigSet`, `applyConfigGet`) separated from the oclif Command class so the validation contract is testable in isolation. Release notes under [Unreleased] in CHANGELOG.md crediting Dmitriy K and including the **restoration recipe** for users who prefer the prior implicit-English behavior: brv config set language.code en brv config set language.mode fixed Tests (22 new): - 12 cases on `applyConfigSet` covering: auto / fixed / mode transitions, code update preserves mode, English restoration recipe, rejection paths (fixed-without-code, unknown mode value, unknown ISO code, unknown config key, totally unrelated key). - 5 cases on `applyConfigGet`: unset → undefined, both modes, both keys, unknown-key rejection. - 5 cases on `withLanguage`: replace, set-when-unset, clear via undefined, no mutation of original, all-other-fields-preserved. Ship gate: - Typecheck + lint clean (pre-existing complexity warning unchanged). - 242 mocha tests green across the affected surfaces (BrvConfig + config CLI + prompt builders + clause module + curate-session + brv-curate-tool + CJK tokenizer + language roundtrip integration + validate-brv-config init hook). - Auto-test harness 13/13 green after rebuild — including the 4 cross-language curate→query roundtrips and the 7 original English cases (zero structural drift). - Manual CLI smoke test: full restoration recipe roundtrip + unknown- code rejection. Post-merge action: post on #616 with feature summary, link to release notes, and pointer to backlog.md so Dmitriy can comment on what to prioritize next (per-curate --lang flag, TUI panel, per-domain overrides). Don't auto-close — let Dmitriy close after confirming the feature works for him. --- CHANGELOG.md | 10 ++ src/oclif/commands/config/get.ts | 95 ++++++++++ src/oclif/commands/config/set.ts | 169 ++++++++++++++++++ src/server/core/domain/entities/brv-config.ts | 26 +++ .../core/domain/entities/brv-config.test.ts | 46 +++++ test/unit/oclif/commands/config/get.test.ts | 61 +++++++ test/unit/oclif/commands/config/set.test.ts | 158 ++++++++++++++++ 7 files changed, 565 insertions(+) create mode 100644 src/oclif/commands/config/get.ts create mode 100644 src/oclif/commands/config/set.ts create mode 100644 test/unit/oclif/commands/config/get.test.ts create mode 100644 test/unit/oclif/commands/config/set.test.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 056ad246f..3d98adb38 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,16 @@ All notable user-facing changes to ByteRover CLI will be documented in this file. +## [Unreleased] + +### Added +- **ByteRover preserves your input language by default.** When you curate context in Russian, Chinese, Japanese, Vietnamese, or any other language, the calling agent's LLM is now instructed to author body text in the same language (the schema — tag names, attribute names, enum values, paths — stays English so tooling is unaffected). Configure with the new `brv config set` command: + - `brv config set language.mode auto` — match the user's input language (default). + - `brv config set language.mode fixed` + `brv config set language.code ` — force a specific language. ISO 639-1 codes accepted: `ar`, `de`, `el`, `en`, `es`, `fi`, `fr`, `he`, `hi`, `id`, `it`, `ja`, `ko`, `nl`, `no`, `pl`, `pt`, `ru`, `sv`, `th`, `tr`, `uk`, `vi`, `zh`. + - `brv config get language.mode` / `brv config get language.code` — read back the current setting. + + CJK queries (Chinese, Japanese, Korean) are now searchable in BM25 — the tokenizer was previously whitespace-only and treated entire CJK sentences as one token. **Restoration recipe** for users who prefer the prior implicit-English behavior: `brv config set language.code en` then `brv config set language.mode fixed`. Reported by Dmitriy K — thanks for the thorough reproduction in [#616](https://github.com/campfirein/byterover-cli/issues/616). + ## [3.16.0] ### Added diff --git a/src/oclif/commands/config/get.ts b/src/oclif/commands/config/get.ts new file mode 100644 index 000000000..d83240410 --- /dev/null +++ b/src/oclif/commands/config/get.ts @@ -0,0 +1,95 @@ +import {Args, Command, Flags} from '@oclif/core' + +import type {BrvConfig} from '../../../server/core/domain/entities/brv-config.js' + +import {ProjectConfigStore} from '../../../server/infra/config/file-config-store.js' +import {resolveProjectRoot} from '../../lib/curate-session.js' +import {writeJsonResponse} from '../../lib/json-response.js' + +/** + * `brv config get ` — read one field from `.brv/config.json`. + * + * Returns the stored value, or "(not set)" when the field is absent. Keyed + * by the same string map as `config set` so symmetry is preserved. + */ +export default class ConfigGet extends Command { + public static args = { + key: Args.string({description: 'Project config key (e.g. language.mode, language.code)', required: true}), + } + public static description = 'Read a project configuration value from .brv/config.json' + public static examples = [ + '<%= config.bin %> <%= command.id %> language.mode', + '<%= config.bin %> <%= command.id %> language.code', + '<%= config.bin %> <%= command.id %> language.mode --format json', + ] + public static flags = { + format: Flags.string({ + default: 'text', + description: 'Output format (text or json)', + options: ['text', 'json'], + }), + } + + public async run(): Promise { + const {args, flags} = await this.parse(ConfigGet) + const format = flags.format as 'json' | 'text' + + const projectRoot = resolveProjectRoot() + const config = await new ProjectConfigStore().read(projectRoot) + + if (config === undefined) { + this.fail(format, 'no-config', `No .brv/config.json found at ${projectRoot}.`) + return + } + + const result = applyConfigGet(config, args.key) + if (result.kind === 'error') { + this.fail(format, result.code, result.message) + return + } + + if (format === 'json') { + writeJsonResponse({command: 'config get', data: {key: args.key, value: result.value}, success: true}) + } else { + this.log(result.value ?? '(not set)') + } + } + + private fail(format: 'json' | 'text', code: string, message: string): void { + process.exitCode = 1 + if (format === 'json') { + writeJsonResponse({command: 'config get', data: {error: {code, message}}, success: false}) + } else { + this.log(message) + } + } +} + +export type ConfigGetResult = + | {readonly code: string; readonly kind: 'error'; readonly message: string} + | {readonly kind: 'ok'; readonly value: string | undefined} + +type ConfigGetter = (config: BrvConfig) => string | undefined + +const GETTERS: Record = { + 'language.code': (config) => config.language?.code, + 'language.mode': (config) => config.language?.mode, +} + +/** + * Pure dispatcher mirroring `applyConfigSet` so the CLI and unit tests + * share one read-side path. + */ +export function applyConfigGet(config: BrvConfig, key: string): ConfigGetResult { + const getter = GETTERS[key] + if (getter === undefined) { + const supported = Object.keys(GETTERS).sort().join(', ') + return { + code: 'unknown-key', + kind: 'error', + message: `Unknown config key '${key}'. Supported keys: ${supported}.`, + } + } + + return {kind: 'ok', value: getter(config)} +} diff --git a/src/oclif/commands/config/set.ts b/src/oclif/commands/config/set.ts new file mode 100644 index 000000000..26e3d9c04 --- /dev/null +++ b/src/oclif/commands/config/set.ts @@ -0,0 +1,169 @@ +import {Args, Command, Flags} from '@oclif/core' + +import type {BrvConfig, BrvConfigLanguage} from '../../../server/core/domain/entities/brv-config.js' + +import {LANGUAGE_NAMES} from '../../../server/core/domain/render/language-clause.js' +import {ProjectConfigStore} from '../../../server/infra/config/file-config-store.js' +import {resolveProjectRoot} from '../../lib/curate-session.js' +import {writeJsonResponse} from '../../lib/json-response.js' + +/** + * `brv config set ` — mutate one field in `.brv/config.json`. + * + * Today only the language-selection keys are handled (`language.mode` and + * `language.code`); the dispatcher is keyed by string so adding the next + * project-config key is a one-line addition to `SETTERS`. + * + * Daemon-side runtime settings (`agentPool.maxSize`, `llm.iterationBudgetMs`, + * etc.) live behind `brv settings set` instead — those are mutable at + * runtime via transport events. Project config is a flat-file mutation; + * there is no daemon involvement. + */ +export default class ConfigSet extends Command { + public static args = { + key: Args.string({description: 'Project config key (e.g. language.mode, language.code)', required: true}), + value: Args.string({description: 'New value', required: true}), + } + public static description = 'Set a project configuration value in .brv/config.json' + public static examples = [ + '# Force the calling agent\'s LLM to author in Russian on every curate', + '<%= config.bin %> <%= command.id %> language.code ru', + '<%= config.bin %> <%= command.id %> language.mode fixed', + '', + '# Restore auto-detect (the default — match the user\'s input language)', + '<%= config.bin %> <%= command.id %> language.mode auto', + '', + '# Read in JSON for scripting', + '<%= config.bin %> <%= command.id %> language.code ja --format json', + ] + public static flags = { + format: Flags.string({ + default: 'text', + description: 'Output format (text or json)', + options: ['text', 'json'], + }), + } + + public async run(): Promise { + const {args, flags} = await this.parse(ConfigSet) + const format = flags.format as 'json' | 'text' + + const projectRoot = resolveProjectRoot() + const store = new ProjectConfigStore() + const current = await store.read(projectRoot) + + if (current === undefined) { + this.fail( + format, + 'no-config', + `No .brv/config.json found at ${projectRoot}. Run \`brv init\` (or any \`brv\` command in this project) to create one.`, + ) + return + } + + const result = applyConfigSet(current, args.key, args.value) + if (result.kind === 'error') { + this.fail(format, result.code, result.message) + return + } + + await store.write(result.config, projectRoot) + this.success(format, args.key, args.value) + } + + private fail(format: 'json' | 'text', code: string, message: string): void { + process.exitCode = 1 + if (format === 'json') { + writeJsonResponse({command: 'config set', data: {error: {code, message}}, success: false}) + } else { + this.log(message) + } + } + + private success(format: 'json' | 'text', key: string, value: string): void { + if (format === 'json') { + writeJsonResponse({command: 'config set', data: {key, value}, success: true}) + } else { + this.log(`Setting saved: ${key} = ${value}.`) + } + } +} + +export type ConfigSetResult = + | {readonly code: string; readonly kind: 'error'; readonly message: string} + | {readonly config: BrvConfig; readonly kind: 'ok'} + +type ConfigSetter = (config: BrvConfig, value: string) => ConfigSetResult + +const SETTERS: Record = { + 'language.code': setLanguageCode, + 'language.mode': setLanguageMode, +} + +/** + * Dispatch a ` ` set onto a loaded BrvConfig. Pure function so + * the CLI command and the unit tests share one validation path — no + * filesystem or oclif coupling here. + */ +export function applyConfigSet(config: BrvConfig, key: string, value: string): ConfigSetResult { + const setter = SETTERS[key] + if (setter === undefined) { + const supported = Object.keys(SETTERS).sort().join(', ') + return { + code: 'unknown-key', + kind: 'error', + message: `Unknown config key '${key}'. Supported keys: ${supported}.`, + } + } + + return setter(config, value) +} + +function setLanguageMode(config: BrvConfig, value: string): ConfigSetResult { + if (value !== 'auto' && value !== 'fixed') { + return { + code: 'invalid-value', + kind: 'error', + message: `language.mode must be 'auto' or 'fixed', got '${value}'.`, + } + } + + // Reject `fixed` without a code so the on-disk config can never reach an + // invalid intermediate state (`{mode: 'fixed'}` would be rejected by + // `isBrvConfigJson` on next load). Point the user at the unblocking step. + if (value === 'fixed' && config.language?.code === undefined) { + return { + code: 'missing-language-code', + kind: 'error', + message: + 'language.mode \'fixed\' requires language.code to be set first. Run: brv config set language.code ', + } + } + + const next: BrvConfigLanguage = + value === 'fixed' + ? {code: config.language!.code!, mode: 'fixed'} + : config.language?.code === undefined + ? {mode: 'auto'} + : {code: config.language.code, mode: 'auto'} + + return {config: config.withLanguage(next), kind: 'ok'} +} + +function setLanguageCode(config: BrvConfig, code: string): ConfigSetResult { + if (!(code in LANGUAGE_NAMES)) { + const supported = Object.keys(LANGUAGE_NAMES).sort().join(', ') + return { + code: 'unknown-iso-code', + kind: 'error', + message: `Unknown ISO 639-1 code '${code}'. Supported codes: ${supported}.`, + } + } + + // Preserve mode if already set; default to auto when language is being + // initialized for the first time. The combination `{mode: 'auto', code}` + // is intentional — code is vestigial in auto mode but harmless, and + // makes the eventual `set language.mode fixed` a no-roundtrip activation. + const mode = config.language?.mode ?? 'auto' + return {config: config.withLanguage({code, mode}), kind: 'ok'} +} diff --git a/src/server/core/domain/entities/brv-config.ts b/src/server/core/domain/entities/brv-config.ts index 091885228..5ccd35a46 100644 --- a/src/server/core/domain/entities/brv-config.ts +++ b/src/server/core/domain/entities/brv-config.ts @@ -263,6 +263,32 @@ export class BrvConfig { } } + /** + * Creates a new BrvConfig with the language preference replaced + * (or cleared via `undefined`), preserving all other fields. + * + * Used by `brv config set language.*` to mutate the per-project + * language preference without re-instantiating fields by hand. + */ + public withLanguage(language?: BrvConfigLanguage): BrvConfig { + return new BrvConfig({ + chatLogPath: this.chatLogPath, + cipherAgentContext: this.cipherAgentContext, + cipherAgentModes: this.cipherAgentModes, + cipherAgentSystemPrompt: this.cipherAgentSystemPrompt, + createdAt: this.createdAt, + cwd: this.cwd, + ide: this.ide, + language, + reviewDisabled: this.reviewDisabled, + spaceId: this.spaceId, + spaceName: this.spaceName, + teamId: this.teamId, + teamName: this.teamName, + version: this.version, + }) + } + /** * Creates a new BrvConfig with space fields cleared, preserving all other fields. */ diff --git a/test/unit/core/domain/entities/brv-config.test.ts b/test/unit/core/domain/entities/brv-config.test.ts index 32cde94f6..67bbfe5dc 100644 --- a/test/unit/core/domain/entities/brv-config.test.ts +++ b/test/unit/core/domain/entities/brv-config.test.ts @@ -402,4 +402,50 @@ describe('BrvConfig', () => { expect(original.withVersion('9.9.9').language).to.deep.equal(fixedRu) }) }) + + describe('withLanguage', () => { + it('replaces an existing language preference', () => { + const original = new BrvConfig({...validConstructorArgs, language: {code: 'ru', mode: 'fixed'}}) + const updated = original.withLanguage({code: 'zh', mode: 'fixed'}) + + expect(updated.language).to.deep.equal({code: 'zh', mode: 'fixed'}) + }) + + it('sets language when previously unset', () => { + const original = new BrvConfig(validConstructorArgs) + const updated = original.withLanguage({mode: 'auto'}) + + expect(updated.language).to.deep.equal({mode: 'auto'}) + }) + + it('clears language when called with undefined', () => { + const original = new BrvConfig({...validConstructorArgs, language: {code: 'ru', mode: 'fixed'}}) + const updated = original.withLanguage() + + expect(updated.language).to.be.undefined + }) + + it('does not mutate the original config', () => { + const original = new BrvConfig({...validConstructorArgs, language: {mode: 'auto'}}) + original.withLanguage({code: 'ru', mode: 'fixed'}) + + expect(original.language).to.deep.equal({mode: 'auto'}) + }) + + it('preserves all other fields', () => { + const original = new BrvConfig({ + ...validConstructorArgs, + cipherAgentContext: 'context-payload', + reviewDisabled: true, + }) + const updated = original.withLanguage({code: 'ja', mode: 'fixed'}) + + expect(updated.spaceId).to.equal(original.spaceId) + expect(updated.teamId).to.equal(original.teamId) + expect(updated.cipherAgentContext).to.equal('context-payload') + expect(updated.reviewDisabled).to.be.true + expect(updated.createdAt).to.equal(original.createdAt) + expect(updated.version).to.equal(original.version) + }) + }) }) diff --git a/test/unit/oclif/commands/config/get.test.ts b/test/unit/oclif/commands/config/get.test.ts new file mode 100644 index 000000000..1e0c39e86 --- /dev/null +++ b/test/unit/oclif/commands/config/get.test.ts @@ -0,0 +1,61 @@ +/** + * Tests for the pure-function dispatcher inside `brv config get`. Mirrors + * the set-side test pattern. + */ + +import {expect} from 'chai' + +import {applyConfigGet} from '../../../../../src/oclif/commands/config/get.js' +import {BrvConfig} from '../../../../../src/server/core/domain/entities/brv-config.js' + +const validParams = { + createdAt: '2026-05-26T00:00:00.000Z', + cwd: '/tmp/project', + version: '0.0.1', +} + +describe('config get — applyConfigGet', () => { + it("returns undefined when 'language.mode' is unset", () => { + const config = new BrvConfig(validParams) + const result = applyConfigGet(config, 'language.mode') + expect(result.kind).to.equal('ok') + if (result.kind === 'ok') { + expect(result.value).to.be.undefined + } + }) + + it("returns 'auto' when language.mode = auto", () => { + const config = new BrvConfig({...validParams, language: {mode: 'auto'}}) + const result = applyConfigGet(config, 'language.mode') + expect(result.kind).to.equal('ok') + if (result.kind === 'ok') { + expect(result.value).to.equal('auto') + } + }) + + it("returns 'fixed' and the code when language is fully configured", () => { + const config = new BrvConfig({...validParams, language: {code: 'ru', mode: 'fixed'}}) + expect((applyConfigGet(config, 'language.mode') as {value: string}).value).to.equal('fixed') + expect((applyConfigGet(config, 'language.code') as {value: string}).value).to.equal('ru') + }) + + it("returns undefined for 'language.code' when language has only mode", () => { + const config = new BrvConfig({...validParams, language: {mode: 'auto'}}) + const result = applyConfigGet(config, 'language.code') + expect(result.kind).to.equal('ok') + if (result.kind === 'ok') { + expect(result.value).to.be.undefined + } + }) + + it('rejects an unsupported key with a sorted supported-list', () => { + const config = new BrvConfig(validParams) + const result = applyConfigGet(config, 'unsupported.key') + expect(result.kind).to.equal('error') + if (result.kind === 'error') { + expect(result.code).to.equal('unknown-key') + expect(result.message).to.include('language.code') + expect(result.message).to.include('language.mode') + } + }) +}) diff --git a/test/unit/oclif/commands/config/set.test.ts b/test/unit/oclif/commands/config/set.test.ts new file mode 100644 index 000000000..a24dbd24b --- /dev/null +++ b/test/unit/oclif/commands/config/set.test.ts @@ -0,0 +1,158 @@ +/** + * Tests for the pure-function dispatcher inside `brv config set`. The oclif + * wrapper handles arg parsing + filesystem I/O; this suite asserts the + * validation + transformation contract that backs every call. + */ + +import {expect} from 'chai' + +import {applyConfigSet} from '../../../../../src/oclif/commands/config/set.js' +import {BrvConfig} from '../../../../../src/server/core/domain/entities/brv-config.js' + +const validParams = { + createdAt: '2026-05-26T00:00:00.000Z', + cwd: '/tmp/project', + version: '0.0.1', +} + +describe('config set — applyConfigSet', () => { + describe('language.mode', () => { + it("accepts 'auto' and clears the code-defaulted shape", () => { + const config = new BrvConfig(validParams) + const result = applyConfigSet(config, 'language.mode', 'auto') + expect(result.kind).to.equal('ok') + if (result.kind === 'ok') { + expect(result.config.language).to.deep.equal({mode: 'auto'}) + } + }) + + it("accepts 'auto' and preserves an existing code", () => { + // Switching from fixed back to auto keeps the code on disk (it's + // vestigial in auto mode but harmless, and makes a future switch + // back to fixed a one-command re-activation). + const config = new BrvConfig({...validParams, language: {code: 'ru', mode: 'fixed'}}) + const result = applyConfigSet(config, 'language.mode', 'auto') + expect(result.kind).to.equal('ok') + if (result.kind === 'ok') { + expect(result.config.language).to.deep.equal({code: 'ru', mode: 'auto'}) + } + }) + + it("accepts 'fixed' when code is already set", () => { + const config = new BrvConfig({...validParams, language: {code: 'ru', mode: 'auto'}}) + const result = applyConfigSet(config, 'language.mode', 'fixed') + expect(result.kind).to.equal('ok') + if (result.kind === 'ok') { + expect(result.config.language).to.deep.equal({code: 'ru', mode: 'fixed'}) + } + }) + + it("rejects 'fixed' when no code is set, with a redirect message", () => { + // The on-disk config `{language: {mode: 'fixed'}}` would be rejected + // by `isBrvConfigJson` on next load. Reject here so we never write it. + const config = new BrvConfig(validParams) + const result = applyConfigSet(config, 'language.mode', 'fixed') + expect(result.kind).to.equal('error') + if (result.kind === 'error') { + expect(result.code).to.equal('missing-language-code') + expect(result.message).to.include('brv config set language.code') + } + }) + + it("rejects unknown mode values", () => { + const config = new BrvConfig(validParams) + const result = applyConfigSet(config, 'language.mode', 'always-english') + expect(result.kind).to.equal('error') + if (result.kind === 'error') { + expect(result.code).to.equal('invalid-value') + expect(result.message).to.include("must be 'auto' or 'fixed'") + } + }) + }) + + describe('language.code', () => { + it('accepts a known ISO code; defaults mode to auto when language was unset', () => { + const config = new BrvConfig(validParams) + const result = applyConfigSet(config, 'language.code', 'ru') + expect(result.kind).to.equal('ok') + if (result.kind === 'ok') { + expect(result.config.language).to.deep.equal({code: 'ru', mode: 'auto'}) + } + }) + + it('preserves an existing fixed mode when updating code', () => { + // Switching the active fixed language is a one-line operation: + // `brv config set language.code zh`. Mode stays fixed. + const config = new BrvConfig({...validParams, language: {code: 'ru', mode: 'fixed'}}) + const result = applyConfigSet(config, 'language.code', 'zh') + expect(result.kind).to.equal('ok') + if (result.kind === 'ok') { + expect(result.config.language).to.deep.equal({code: 'zh', mode: 'fixed'}) + } + }) + + it('rejects unknown ISO codes with a sorted supported-list message', () => { + const config = new BrvConfig(validParams) + const result = applyConfigSet(config, 'language.code', 'xx') + expect(result.kind).to.equal('error') + if (result.kind === 'error') { + expect(result.code).to.equal('unknown-iso-code') + expect(result.message).to.include("'xx'") + expect(result.message).to.include('Supported codes:') + // Sanity: a few representative codes appear in the suggestion list. + expect(result.message).to.include('en') + expect(result.message).to.include('ru') + expect(result.message).to.include('zh') + } + }) + + it('accepts English so the restoration recipe works', () => { + // The release-notes recipe instructs users to set `code: en` for + // forced-English mode. The CLI must accept it. + const config = new BrvConfig(validParams) + const result = applyConfigSet(config, 'language.code', 'en') + expect(result.kind).to.equal('ok') + if (result.kind === 'ok') { + expect(result.config.language).to.deep.equal({code: 'en', mode: 'auto'}) + } + }) + }) + + describe('unknown key', () => { + it('rejects an unsupported key with a sorted supported-list message', () => { + const config = new BrvConfig(validParams) + const result = applyConfigSet(config, 'language.unknown', 'whatever') + expect(result.kind).to.equal('error') + if (result.kind === 'error') { + expect(result.code).to.equal('unknown-key') + expect(result.message).to.include('language.code') + expect(result.message).to.include('language.mode') + } + }) + + it('rejects a totally unrelated key', () => { + const config = new BrvConfig(validParams) + const result = applyConfigSet(config, 'cipherAgent.context', 'whatever') + expect(result.kind).to.equal('error') + if (result.kind === 'error') { + expect(result.code).to.equal('unknown-key') + } + }) + }) + + describe('restoration recipe — forced English', () => { + it('two-step set produces {mode: fixed, code: en}', () => { + // Mirrors what release notes recommend for users who want the old + // implicit-English behavior. Set code first, then flip mode. + const initial = new BrvConfig(validParams) + const afterCode = applyConfigSet(initial, 'language.code', 'en') + expect(afterCode.kind).to.equal('ok') + if (afterCode.kind !== 'ok') return + const afterMode = applyConfigSet(afterCode.config, 'language.mode', 'fixed') + expect(afterMode.kind).to.equal('ok') + if (afterMode.kind === 'ok') { + expect(afterMode.config.language).to.deep.equal({code: 'en', mode: 'fixed'}) + } + }) + }) +}) From 19c9b6b477c9653ffa6e77cc4e47f3a3f10adaba Mon Sep 17 00:00:00 2001 From: Cuong Date: Wed, 27 May 2026 14:40:26 +0700 Subject: [PATCH 06/16] feat: [ENG-2974] enum settings + register language.mode/language.code in SETTINGS_REGISTRY Adds EnumSettingDescriptor to the settings type system and registers the two language settings (mode, code) under category 'language'. The wire layer (CLI parser, transport DTO, validator, store, handler) now accepts and round-trips string-valued enum settings end-to-end; TUI and WebUI renderers will pick them up in follow-up commits. - SettingDescriptor union grows with EnumSettingDescriptor (options field) - SettingItem.current / default widen to boolean | number | string - SettingsItemDTO exposes 'options' on enum-typed items - brv settings set rejects invalid values with the allowed list - SettingsValidator.validate routes enum values through validateEnum - SETTINGS_KEYS gains LANGUAGE_MODE + LANGUAGE_CODE - registry test: enum narrowing, options shape, language category - handler test: LIST exposes options, SET rejects non-string to enum keys --- src/oclif/commands/settings/get.ts | 3 +- src/oclif/commands/settings/index.ts | 3 +- src/oclif/commands/settings/reset.ts | 3 +- src/oclif/commands/settings/set.ts | 18 +++++- src/server/core/domain/entities/settings.ts | 37 +++++++++++-- .../interfaces/storage/i-settings-store.ts | 2 +- .../infra/storage/file-settings-store.ts | 2 +- .../infra/storage/settings-validator.ts | 34 ++++++++++-- .../transport/handlers/settings-handler.ts | 19 ++++++- .../transport/events/settings-events.ts | 23 ++++---- .../domain/entities/settings-registry.test.ts | 55 +++++++++++++++++++ .../handlers/settings-handler.test.ts | 42 ++++++++++++++ 12 files changed, 209 insertions(+), 32 deletions(-) diff --git a/src/oclif/commands/settings/get.ts b/src/oclif/commands/settings/get.ts index 34ad47a6f..e89cd5e4e 100644 --- a/src/oclif/commands/settings/get.ts +++ b/src/oclif/commands/settings/get.ts @@ -103,8 +103,9 @@ export default class SettingsGet extends Command { } } -function renderValue(item: SettingsItemDTO, value: boolean | number): string { +function renderValue(item: SettingsItemDTO, value: boolean | number | string): string { if (typeof value === 'boolean') return value ? 'true' : 'false' + if (typeof value === 'string') return value return renderInteger(item, value) } diff --git a/src/oclif/commands/settings/index.ts b/src/oclif/commands/settings/index.ts index 9375c30e6..20c0e9e75 100644 --- a/src/oclif/commands/settings/index.ts +++ b/src/oclif/commands/settings/index.ts @@ -114,8 +114,9 @@ function formatRow(item: SettingsItemDTO): string { return ` ${pad(item.key, 40)} ${pad(current, 7)} (default ${defaultStr})${''.padEnd(Math.max(0, 8 - defaultStr.length))} ${range}` } -function renderValue(item: SettingsItemDTO, value: boolean | number): string { +function renderValue(item: SettingsItemDTO, value: boolean | number | string): string { if (typeof value === 'boolean') return value ? 'true' : 'false' + if (typeof value === 'string') return value return renderInteger(item, value) } diff --git a/src/oclif/commands/settings/reset.ts b/src/oclif/commands/settings/reset.ts index 57d00803f..7cb68f7fe 100644 --- a/src/oclif/commands/settings/reset.ts +++ b/src/oclif/commands/settings/reset.ts @@ -97,8 +97,9 @@ export default class SettingsReset extends Command { } } -function renderValue(item: SettingsItemDTO, value: boolean | number): string { +function renderValue(item: SettingsItemDTO, value: boolean | number | string): string { if (typeof value === 'boolean') return value ? 'true' : 'false' + if (typeof value === 'string') return value if (item.unit === 'ms') return formatDuration(value) return formatCount(value) } diff --git a/src/oclif/commands/settings/set.ts b/src/oclif/commands/settings/set.ts index bdc33eb8a..f841a8568 100644 --- a/src/oclif/commands/settings/set.ts +++ b/src/oclif/commands/settings/set.ts @@ -119,7 +119,7 @@ export default class SettingsSet extends Command { protected async writeSetting( key: string, - value: boolean | number, + value: boolean | number | string, options?: DaemonClientOptions, ): Promise { return withDaemonRetry( @@ -131,7 +131,7 @@ export default class SettingsSet extends Command { } type ParseResult = - | {readonly display: string; readonly kind: 'ok'; readonly value: boolean | number} + | {readonly display: string; readonly kind: 'ok'; readonly value: boolean | number | string} | {readonly kind: 'error'; readonly message: string} const BOOLEAN_TOKENS = new Map([ @@ -149,10 +149,24 @@ const BOOLEAN_TOKENS_HINT = 'true, false, on, off, 1, 0, yes, no' function parseValue(descriptor: SettingsItemDTO, raw: string): ParseResult { if (descriptor.type === 'boolean') return parseAsBoolean(descriptor, raw) + if (descriptor.type === 'enum') return parseAsEnum(descriptor, raw) if (descriptor.unit === 'ms') return parseAsDuration(descriptor, raw) return parseAsCount(descriptor, raw) } +function parseAsEnum(descriptor: SettingsItemDTO, raw: string): ParseResult { + const trimmed = raw.trim() + const options = descriptor.options ?? [] + if (!options.includes(trimmed)) { + return { + kind: 'error', + message: `${descriptor.key} expected one of [${options.join(', ')}], got '${raw}'.`, + } + } + + return {display: trimmed, kind: 'ok', value: trimmed} +} + function parseAsBoolean(descriptor: SettingsItemDTO, raw: string): ParseResult { const lowered = raw.trim().toLowerCase() const value = BOOLEAN_TOKENS.get(lowered) diff --git a/src/server/core/domain/entities/settings.ts b/src/server/core/domain/entities/settings.ts index 11a11bfe3..3fc02fcea 100644 --- a/src/server/core/domain/entities/settings.ts +++ b/src/server/core/domain/entities/settings.ts @@ -6,13 +6,14 @@ import { TASK_HISTORY_DEFAULT_MAX_ENTRIES, UPDATE_CHECK_FOR_UPDATES_DEFAULT, } from '../../../constants.js' +import {LANGUAGE_NAMES} from '../render/language-clause.js' /** * High-level concern the setting controls. Drives group headers in CLI * and TUI render output (uppercased). Web docs / WebUI consume this * field to render the same groupings independently of key naming. */ -export type SettingCategory = 'concurrency' | 'llm' | 'task-history' | 'updates' +export type SettingCategory = 'concurrency' | 'language' | 'llm' | 'task-history' | 'updates' /** * Value-kind for dispatch between the duration formatter / parser @@ -48,15 +49,21 @@ export type BooleanSettingDescriptor = BaseSettingDescriptor & { readonly type: 'boolean' } +export type EnumSettingDescriptor = BaseSettingDescriptor & { + readonly default: string + readonly options: readonly string[] + readonly type: 'enum' +} + /** * Descriptor for a single user-configurable setting. Discriminated on * `type` so consumers narrow with a single check before reading - * type-specific fields (`min`/`max` on integers, etc). + * type-specific fields (`min`/`max` on integers, `options` on enums, etc). * * Defaults reference the existing constants module so a constant change * automatically updates the setting's default. */ -export type SettingDescriptor = BooleanSettingDescriptor | IntegerSettingDescriptor +export type SettingDescriptor = BooleanSettingDescriptor | EnumSettingDescriptor | IntegerSettingDescriptor /** * View of one setting: the key, the user's current override (or the default @@ -64,8 +71,8 @@ export type SettingDescriptor = BooleanSettingDescriptor | IntegerSettingDescrip * shapes; consumers narrow on the corresponding descriptor's `type`. */ export type SettingItem = { - readonly current: boolean | number - readonly default: boolean | number + readonly current: boolean | number | string + readonly default: boolean | number | string readonly key: string readonly restartRequired: boolean } @@ -79,6 +86,8 @@ export type SettingItem = { export const SETTINGS_KEYS = { AGENT_POOL_MAX_CONCURRENT_TASKS: 'agentPool.maxConcurrentTasksPerProject', AGENT_POOL_MAX_SIZE: 'agentPool.maxSize', + LANGUAGE_CODE: 'language.code', + LANGUAGE_MODE: 'language.mode', LLM_ITERATION_BUDGET_MS: 'llm.iterationBudgetMs', LLM_REQUEST_TIMEOUT_MS: 'llm.requestTimeoutMs', TASK_HISTORY_MAX_ENTRIES: 'taskHistory.maxEntries', @@ -146,6 +155,24 @@ export const SETTINGS_REGISTRY: readonly SettingDescriptor[] = [ restartRequired: false, type: 'boolean', }, + { + category: 'language', + default: 'auto', + description: 'Match input language (auto) or force a fixed language for written output', + key: SETTINGS_KEYS.LANGUAGE_MODE, + options: ['auto', 'fixed'], + restartRequired: false, + type: 'enum', + }, + { + category: 'language', + default: 'en', + description: 'ISO-639-1 code applied when mode is fixed; ignored in auto mode', + key: SETTINGS_KEYS.LANGUAGE_CODE, + options: Object.keys(LANGUAGE_NAMES), + restartRequired: false, + type: 'enum', + }, ] export function findSettingDescriptor(key: string): SettingDescriptor | undefined { diff --git a/src/server/core/interfaces/storage/i-settings-store.ts b/src/server/core/interfaces/storage/i-settings-store.ts index e9f6ad96a..0b69bf042 100644 --- a/src/server/core/interfaces/storage/i-settings-store.ts +++ b/src/server/core/interfaces/storage/i-settings-store.ts @@ -12,7 +12,7 @@ export type SettingsStartupSnapshot = { * Daemon startup logs this once; all values fall back to defaults. */ readonly parseError?: string - readonly values: Readonly> + readonly values: Readonly> } /** diff --git a/src/server/infra/storage/file-settings-store.ts b/src/server/infra/storage/file-settings-store.ts index 46ca7a34b..634cd369f 100644 --- a/src/server/infra/storage/file-settings-store.ts +++ b/src/server/infra/storage/file-settings-store.ts @@ -132,7 +132,7 @@ export class FileSettingsStore implements ISettingsStore { return join(this.baseDir, SETTINGS_FILE) } - private async readOverrides(): Promise> { + private async readOverrides(): Promise> { const raw = await this.readRawValues() const {valid} = this.validator.partition(raw) return {...valid} diff --git a/src/server/infra/storage/settings-validator.ts b/src/server/infra/storage/settings-validator.ts index c37fc2762..30f3a69aa 100644 --- a/src/server/infra/storage/settings-validator.ts +++ b/src/server/infra/storage/settings-validator.ts @@ -1,5 +1,6 @@ import type { BooleanSettingDescriptor, + EnumSettingDescriptor, IntegerSettingDescriptor, SettingDescriptor, } from '../../core/domain/entities/settings.js' @@ -30,7 +31,7 @@ export class InvalidSettingValueError extends Error { export type PartitionedSettings = { readonly invalid: ReadonlyArray<{readonly key: string; readonly reason: string; readonly value: unknown}> - readonly valid: Readonly> + readonly valid: Readonly> } export type CouplingViolation = { @@ -56,7 +57,7 @@ export class SettingsValidator { * log a warning about. */ public partition(record: Record): PartitionedSettings { - const valid: Record = {} + const valid: Record = {} const invalid: Array<{key: string; reason: string; value: unknown}> = [] for (const [key, value] of Object.entries(record)) { @@ -97,9 +98,9 @@ export class SettingsValidator { /** * Validates a single key/value pair. Throws on unknown key or invalid value. * Returns the coerced value on success (integer for integer descriptors, - * boolean for boolean descriptors). + * boolean for boolean descriptors, the canonical option for enum descriptors). */ - public validate(key: string, value: unknown): boolean | number { + public validate(key: string, value: unknown): boolean | number | string { const descriptor = this.validateKey(key) return this.validateAgainst(descriptor, value) } @@ -136,12 +137,33 @@ export class SettingsValidator { return descriptor } - private validateAgainst(descriptor: SettingDescriptor, value: unknown): boolean | number { + private validateAgainst(descriptor: SettingDescriptor, value: unknown): boolean | number | string { if (descriptor.type === 'boolean') return validateBoolean(descriptor, value) + if (descriptor.type === 'enum') return validateEnum(descriptor, value) return validateInteger(descriptor, value) } } +function validateEnum(descriptor: EnumSettingDescriptor, value: unknown): string { + if (typeof value !== 'string') { + throw new InvalidSettingValueError( + descriptor.key, + value, + `expected one of [${descriptor.options.join(', ')}], got ${describeType(value)}`, + ) + } + + if (!descriptor.options.includes(value)) { + throw new InvalidSettingValueError( + descriptor.key, + value, + `'${value}' is not one of [${descriptor.options.join(', ')}]`, + ) + } + + return value +} + function validateInteger(descriptor: IntegerSettingDescriptor, value: unknown): number { if (typeof value !== 'number' || !Number.isInteger(value)) { throw new InvalidSettingValueError( @@ -174,7 +196,7 @@ function validateBoolean(descriptor: BooleanSettingDescriptor, value: unknown): return value } -function numericSubset(values: Readonly>): Record { +function numericSubset(values: Readonly>): Record { const result: Record = {} for (const [key, value] of Object.entries(values)) { if (typeof value === 'number') result[key] = value diff --git a/src/server/infra/transport/handlers/settings-handler.ts b/src/server/infra/transport/handlers/settings-handler.ts index d48410cce..6650f9569 100644 --- a/src/server/infra/transport/handlers/settings-handler.ts +++ b/src/server/infra/transport/handlers/settings-handler.ts @@ -110,7 +110,7 @@ function restartRequiredFor(key: string): boolean { * Range, coupling, and fractional-number violations are left to the store's * validator and still surface as `invalid_value`. */ -function checkValueType(key: string, value: boolean | number): SettingsErrorDTO | undefined { +function checkValueType(key: string, value: boolean | number | string): SettingsErrorDTO | undefined { const descriptor = findSettingDescriptor(key) if (descriptor === undefined) return undefined @@ -137,6 +137,17 @@ function checkValueType(key: string, value: boolean | number): SettingsErrorDTO } } + if (descriptor.type === 'enum' && got !== 'string') { + return { + code: 'invalid_value_type', + expected: 'enum', + got, + key, + message: `expected string for '${key}', got ${got}`, + value, + } + } + return undefined } @@ -149,7 +160,7 @@ function toItemDTO(item: SettingItem): SettingsItemDTO { return descriptorToDTO(descriptor, item.current) } -function descriptorToDTO(descriptor: SettingDescriptor, current: boolean | number): SettingsItemDTO { +function descriptorToDTO(descriptor: SettingDescriptor, current: boolean | number | string): SettingsItemDTO { const dto: SettingsItemDTO = { current, default: descriptor.default, @@ -165,6 +176,10 @@ function descriptorToDTO(descriptor: SettingDescriptor, current: boolean | numbe if (descriptor.unit !== undefined) dto.unit = descriptor.unit } + if (descriptor.type === 'enum') { + dto.options = descriptor.options + } + return dto } diff --git a/src/shared/transport/events/settings-events.ts b/src/shared/transport/events/settings-events.ts index 4bcea3d9e..19c3489d6 100644 --- a/src/shared/transport/events/settings-events.ts +++ b/src/shared/transport/events/settings-events.ts @@ -11,31 +11,30 @@ export const SettingsEvents = { * surfaces (CLI / TUI / WebUI) can consume it without crossing the * server import boundary. * - * M7 T2 added three optional fields (`category`, `unit`, `scope`); T1 of - * the Update-check toggle project widened `type`, `current`, `default`, - * and `restartRequired` to also cover boolean descriptors, and made - * `min` / `max` optional (only integer descriptors carry them). All - * widenings are additive at the JSON layer, so consumers that read - * existing integer fields continue to parse the wire format. + * Backward-compat: every widening here is additive at the JSON layer, so + * consumers that read pre-existing integer / boolean fields continue to + * parse the wire format unchanged. */ export interface SettingsItemDTO { - category?: 'concurrency' | 'llm' | 'task-history' | 'updates' - current: boolean | number - default: boolean | number + category?: 'concurrency' | 'language' | 'llm' | 'task-history' | 'updates' + current: boolean | number | string + default: boolean | number | string description: string key: string max?: number min?: number + /** Allowed values for `type === 'enum'`. Omitted otherwise. */ + options?: readonly string[] restartRequired: boolean scope?: 'global' | 'project' - type: 'boolean' | 'integer' + type: 'boolean' | 'enum' | 'integer' unit?: 'count' | 'ms' } export interface SettingsErrorDTO { code: 'invalid_value' | 'invalid_value_type' | 'unknown_key' /** Expected runtime kind, only set when `code === 'invalid_value_type'`. */ - expected?: 'boolean' | 'integer' + expected?: 'boolean' | 'enum' | 'integer' /** `typeof` of the offending value, only set when `code === 'invalid_value_type'`. */ got?: string key: string @@ -59,7 +58,7 @@ export type SettingsGetResponse = export interface SettingsSetRequest { key: string - value: boolean | number + value: boolean | number | string } export type SettingsSetResponse = diff --git a/test/unit/core/domain/entities/settings-registry.test.ts b/test/unit/core/domain/entities/settings-registry.test.ts index 449c1e895..2188eb666 100644 --- a/test/unit/core/domain/entities/settings-registry.test.ts +++ b/test/unit/core/domain/entities/settings-registry.test.ts @@ -22,6 +22,7 @@ describe('settings registry — M7 T2 shape', () => { for (const descriptor of SETTINGS_REGISTRY) { expect(descriptor.category, `key ${descriptor.key} missing category`).to.be.oneOf([ 'concurrency', + 'language', 'llm', 'task-history', 'updates', @@ -127,4 +128,58 @@ describe('settings registry — M7 T2 shape', () => { } }) }) + + describe('language.* enum descriptors', () => { + it('exposes LANGUAGE_MODE + LANGUAGE_CODE on SETTINGS_KEYS', () => { + expect(SETTINGS_KEYS.LANGUAGE_MODE).to.equal('language.mode') + expect(SETTINGS_KEYS.LANGUAGE_CODE).to.equal('language.code') + }) + + it('registers language.mode as enum with default=auto and options=[auto, fixed]', () => { + const descriptor = findSettingDescriptor(SETTINGS_KEYS.LANGUAGE_MODE) + expect(descriptor?.type).to.equal('enum') + if (descriptor?.type === 'enum') { + expect(descriptor.default).to.equal('auto') + expect([...descriptor.options]).to.deep.equal(['auto', 'fixed']) + } else { + expect.fail('expected enum descriptor for language.mode') + } + }) + + it('registers language.code as enum with default=en and options including ko + ja + zh', () => { + const descriptor = findSettingDescriptor(SETTINGS_KEYS.LANGUAGE_CODE) + expect(descriptor?.type).to.equal('enum') + if (descriptor?.type === 'enum') { + expect(descriptor.default).to.equal('en') + expect(descriptor.options).to.include('ko') + expect(descriptor.options).to.include('ja') + expect(descriptor.options).to.include('zh') + expect(descriptor.options).to.include('en') + } else { + expect.fail('expected enum descriptor for language.code') + } + }) + + it('groups both language entries under category=language', () => { + expect(findSettingDescriptor(SETTINGS_KEYS.LANGUAGE_MODE)?.category).to.equal('language') + expect(findSettingDescriptor(SETTINGS_KEYS.LANGUAGE_CODE)?.category).to.equal('language') + }) + + it('marks language settings as restart-not-required (live config)', () => { + expect(findSettingDescriptor(SETTINGS_KEYS.LANGUAGE_MODE)?.restartRequired).to.equal(false) + expect(findSettingDescriptor(SETTINGS_KEYS.LANGUAGE_CODE)?.restartRequired).to.equal(false) + }) + + it('narrows enum descriptors to EnumSettingDescriptor when descriptor.type === enum', () => { + const descriptor = findSettingDescriptor(SETTINGS_KEYS.LANGUAGE_MODE) + if (descriptor?.type === 'enum') { + const defaultValue: string = descriptor.default + const {options} = descriptor + expect(defaultValue).to.equal('auto') + expect(options.length).to.be.greaterThan(0) + } else { + expect.fail('expected enum descriptor for language.mode') + } + }) + }) }) diff --git a/test/unit/infra/transport/handlers/settings-handler.test.ts b/test/unit/infra/transport/handlers/settings-handler.test.ts index 8b4defc87..b7f2f03df 100644 --- a/test/unit/infra/transport/handlers/settings-handler.test.ts +++ b/test/unit/infra/transport/handlers/settings-handler.test.ts @@ -86,6 +86,8 @@ describe('SettingsHandler', () => { expect(result.items.map((i) => i.key).sort()).to.deep.equal([ 'agentPool.maxConcurrentTasksPerProject', 'agentPool.maxSize', + 'language.code', + 'language.mode', 'llm.iterationBudgetMs', 'llm.requestTimeoutMs', 'taskHistory.maxEntries', @@ -136,6 +138,23 @@ describe('SettingsHandler', () => { expect(item.scope).to.equal(undefined) } }) + + it('exposes options on enum-typed items and omits options on non-enum items', async () => { + store.listResult = [] + const result = await invokeList() + const byKey = new Map(result.items.map((i) => [i.key, i])) + + const mode = byKey.get('language.mode') + expect(mode?.type).to.equal('enum') + expect(mode?.options).to.deep.equal(['auto', 'fixed']) + + const code = byKey.get('language.code') + expect(code?.type).to.equal('enum') + expect(code?.options).to.include('ko') + + expect(byKey.get('agentPool.maxSize')?.options).to.equal(undefined) + expect(byKey.get('update.checkForUpdates')?.options).to.equal(undefined) + }) }) describe('GET', () => { @@ -269,6 +288,29 @@ describe('SettingsHandler', () => { if (!result.ok) expect(result.error.code).to.equal('unknown_key') }) + it('rejects a numeric value sent to an enum key', async () => { + const result = await invokeSet({key: 'language.mode', value: 5}) + + expect(result.ok).to.be.false + if (!result.ok) { + expect(result.error.code).to.equal('invalid_value_type') + expect(result.error.key).to.equal('language.mode') + expect(result.error.expected).to.equal('enum') + expect(result.error.got).to.equal('number') + } + + expect(store.calls.filter((c) => c.method === 'set')).to.have.lengthOf(0) + }) + + it('accepts a string value sent to an enum key and forwards to the store', async () => { + const result = await invokeSet({key: 'language.mode', value: 'fixed'}) + + expect(result.ok).to.be.true + const setCalls = store.calls.filter((c) => c.method === 'set') + expect(setCalls).to.have.lengthOf(1) + expect(setCalls[0].args).to.deep.equal(['language.mode', 'fixed']) + }) + it('still surfaces a range violation as invalid_value (not invalid_value_type)', async () => { store.setBehavior = async (key, value) => { throw new InvalidSettingValueError(key, value, 'value 0 is outside allowed range [1, 100]') From f40e1887a3c75f290006b2436039f3c194e17e60 Mon Sep 17 00:00:00 2001 From: Cuong Date: Wed, 27 May 2026 14:51:36 +0700 Subject: [PATCH 07/16] feat: [ENG-2974] render language settings in TUI + WebUI Configuration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Surfaces language.mode / language.code via the existing settings UIs. - shared/language/language-names.ts: extracted single source of truth for the ISO-639-1 → English-name map (was inline in language-clause.ts); reused by settings registry, WebUI display labels, and language-clause - shared SettingsRow / format-settings: adds enum row builder with options + display formatting "[ auto ]" - TUI settings-page: Left/Right cycles options in edit mode; LANGUAGE group appears below UPDATES per CATEGORY_ORDER - WebUI enum-settings-row.tsx: select dropdown driven by descriptor options; language.code rows render "ja — Japanese" via LANGUAGE_NAMES - WebUI LanguagePanel wires into Configuration > General between TaskHistoryPanel and UpdatesPanel - tests: enum row builder + parseRowInput + category ordering --- src/server/core/domain/entities/settings.ts | 2 +- .../core/domain/render/language-clause.ts | 37 +-------- src/shared/language/language-names.ts | 33 ++++++++ src/shared/types/settings-row.ts | 15 ++-- src/shared/utils/format-settings.ts | 58 ++++++++++++- .../settings/components/settings-page.tsx | 26 +++++- .../settings/utils/format-settings.ts | 18 ++-- .../settings/components/enum-settings-row.tsx | 83 +++++++++++++++++++ .../settings/components/language-panel.tsx | 42 ++++++++++ .../settings/components/settings-row.tsx | 2 + src/webui/features/settings/lib/labels.ts | 2 + src/webui/pages/configuration/general.tsx | 2 + .../unit/shared/utils/format-settings.test.ts | 69 +++++++++++++++ 13 files changed, 341 insertions(+), 48 deletions(-) create mode 100644 src/shared/language/language-names.ts create mode 100644 src/webui/features/settings/components/enum-settings-row.tsx create mode 100644 src/webui/features/settings/components/language-panel.tsx diff --git a/src/server/core/domain/entities/settings.ts b/src/server/core/domain/entities/settings.ts index 3fc02fcea..a45f5ada5 100644 --- a/src/server/core/domain/entities/settings.ts +++ b/src/server/core/domain/entities/settings.ts @@ -1,3 +1,4 @@ +import {LANGUAGE_NAMES} from '../../../../shared/language/language-names.js' import { AGENT_LLM_ITERATION_BUDGET_MS, AGENT_LLM_REQUEST_TIMEOUT_MS, @@ -6,7 +7,6 @@ import { TASK_HISTORY_DEFAULT_MAX_ENTRIES, UPDATE_CHECK_FOR_UPDATES_DEFAULT, } from '../../../constants.js' -import {LANGUAGE_NAMES} from '../render/language-clause.js' /** * High-level concern the setting controls. Drives group headers in CLI diff --git a/src/server/core/domain/render/language-clause.ts b/src/server/core/domain/render/language-clause.ts index 9c5964ef9..03c1c5224 100644 --- a/src/server/core/domain/render/language-clause.ts +++ b/src/server/core/domain/render/language-clause.ts @@ -16,38 +16,9 @@ import type {BrvConfigLanguage} from '../entities/brv-config.js' -/** - * ISO-639-1 code → English language name. Inline (~24 entries) rather than - * pulling the `iso-639-1` package — runtime dependency surface stays - * minimal. Codes not in this map degrade gracefully via the raw-code - * fallback in `buildLanguageClause`. - */ -export const LANGUAGE_NAMES: Record = { - ar: 'Arabic', - de: 'German', - el: 'Greek', - en: 'English', - es: 'Spanish', - fi: 'Finnish', - fr: 'French', - he: 'Hebrew', - hi: 'Hindi', - id: 'Indonesian', - it: 'Italian', - ja: 'Japanese', - ko: 'Korean', - nl: 'Dutch', - no: 'Norwegian', - pl: 'Polish', - pt: 'Portuguese', - ru: 'Russian', - sv: 'Swedish', - th: 'Thai', - tr: 'Turkish', - uk: 'Ukrainian', - vi: 'Vietnamese', - zh: 'Chinese', -} +export {LANGUAGE_NAMES} from '../../../../shared/language/language-names.js' + +import {LANGUAGE_NAMES as LANGUAGE_NAMES_LOCAL} from '../../../../shared/language/language-names.js' const AUTO_CLAUSE = "Match the user's input language for human-readable content: body text of `` elements, list items, and the `title` / `summary` attributes on ``. Keep tag names, attribute names, enum values, and the `path` attribute in English for tooling consistency. Code snippets and identifiers stay verbatim." @@ -83,6 +54,6 @@ export function buildLanguageClause(language?: BrvConfigLanguage): string { return AUTO_CLAUSE } - const name = LANGUAGE_NAMES[language.code] ?? `"${language.code}"` + const name = LANGUAGE_NAMES_LOCAL[language.code] ?? `"${language.code}"` return buildFixedClause(name) } diff --git a/src/shared/language/language-names.ts b/src/shared/language/language-names.ts new file mode 100644 index 000000000..997f84db2 --- /dev/null +++ b/src/shared/language/language-names.ts @@ -0,0 +1,33 @@ +/** + * ISO-639-1 code → English language name. Single source of truth for + * surfaces that need a human-readable label alongside the canonical + * wire-format code: language-clause builder, WebUI / TUI pickers, CLI + * error messages. Codes not in this map degrade gracefully via the + * raw-code fallback in `buildLanguageClause`. + */ +export const LANGUAGE_NAMES: Record = { + ar: 'Arabic', + de: 'German', + el: 'Greek', + en: 'English', + es: 'Spanish', + fi: 'Finnish', + fr: 'French', + he: 'Hebrew', + hi: 'Hindi', + id: 'Indonesian', + it: 'Italian', + ja: 'Japanese', + ko: 'Korean', + nl: 'Dutch', + no: 'Norwegian', + pl: 'Polish', + pt: 'Portuguese', + ru: 'Russian', + sv: 'Swedish', + th: 'Thai', + tr: 'Turkish', + uk: 'Ukrainian', + vi: 'Vietnamese', + zh: 'Chinese', +} diff --git a/src/shared/types/settings-row.ts b/src/shared/types/settings-row.ts index 23cf81983..91be16680 100644 --- a/src/shared/types/settings-row.ts +++ b/src/shared/types/settings-row.ts @@ -1,10 +1,10 @@ -export type SettingsRowCategory = 'concurrency' | 'llm' | 'other' | 'task-history' | 'updates' +export type SettingsRowCategory = 'concurrency' | 'language' | 'llm' | 'other' | 'task-history' | 'updates' export type SettingsRowUnit = 'count' | 'ms' /** * View-model for one settings row consumed by the TUI. Discriminated on * `type` so the renderer narrows before reading integer-only fields - * (`min`, `max`, `unit`) or treating `current` / `default` as numeric. + * (`min`, `max`, `unit`) or enum-only fields (`options`). * * Restart requirement is propagated from the descriptor verbatim (no * literal `true` constraint) so the dirty-banner filter on the page can @@ -12,8 +12,8 @@ export type SettingsRowUnit = 'count' | 'ms' */ export interface SettingsRow { readonly category: SettingsRowCategory - readonly current: boolean | number - readonly default: boolean | number + readonly current: boolean | number | string + readonly default: boolean | number | string readonly description: string readonly displayCurrent: string readonly displayDefault: string @@ -23,13 +23,15 @@ export interface SettingsRow { readonly max?: number readonly min?: number readonly modified: boolean + /** Allowed values for `type === 'enum'`. Omitted otherwise. */ + readonly options?: readonly string[] readonly restartRequired: boolean - readonly type: 'boolean' | 'integer' + readonly type: 'boolean' | 'enum' | 'integer' readonly unit?: SettingsRowUnit } export type RowParseResult = - | {readonly displayValue: string; readonly kind: 'ok'; readonly value: number} + | {readonly displayValue: string; readonly kind: 'ok'; readonly value: number | string} | {readonly kind: 'error'; readonly message: string} export const CATEGORY_ORDER: readonly SettingsRowCategory[] = [ @@ -37,5 +39,6 @@ export const CATEGORY_ORDER: readonly SettingsRowCategory[] = [ 'llm', 'task-history', 'updates', + 'language', 'other', ] diff --git a/src/shared/utils/format-settings.ts b/src/shared/utils/format-settings.ts index 79d415e7e..1d2de7580 100644 --- a/src/shared/utils/format-settings.ts +++ b/src/shared/utils/format-settings.ts @@ -12,6 +12,11 @@ export function buildSettingsRows(items: readonly SettingsItemDTO[]): SettingsRo continue } + if (isEnumItem(item)) { + rows.push(toEnumRow(item)) + continue + } + if (isIntegerItem(item)) rows.push(toIntegerRow(item)) } @@ -22,10 +27,21 @@ export function parseRowInput(row: SettingsRow, raw: string): RowParseResult { const trimmed = raw.trim() if (trimmed === '') return {kind: 'error', message: 'Value is required'} + if (row.type === 'enum') return parseAsEnum(row, raw) if (row.unit === 'ms') return parseAsDuration(row, raw) return parseAsCount(row, raw) } +function parseAsEnum(row: SettingsRow, raw: string): RowParseResult { + const trimmed = raw.trim() + const options = row.options ?? [] + if (!options.includes(trimmed)) { + return {kind: 'error', message: `Expected one of [${options.join(', ')}], got '${raw}'`} + } + + return {displayValue: trimmed, kind: 'ok', value: trimmed} +} + function parseAsDuration(row: SettingsRow, raw: string): RowParseResult { if (row.min === undefined || row.max === undefined) { return {kind: 'error', message: `${row.key} has no numeric range`} @@ -124,12 +140,52 @@ function toBooleanRow(item: SettingsItemDTO, current: boolean, defaultValue: boo } } +type EnumSettingsItemDTO = Omit & { + readonly current: string + readonly default: string + readonly options: readonly string[] + readonly type: 'enum' +} + +function isEnumItem(item: SettingsItemDTO): item is EnumSettingsItemDTO { + return ( + item.type === 'enum' && + typeof item.current === 'string' && + typeof item.default === 'string' && + Array.isArray(item.options) + ) +} + +function toEnumRow(item: EnumSettingsItemDTO): SettingsRow { + return { + category: toRowCategory(item.category), + current: item.current, + default: item.default, + description: item.description, + displayCurrent: `[ ${item.current} ]`, + displayDefault: item.default, + displayRange: '', + key: item.key, + label: item.key, + modified: item.current !== item.default, + options: item.options, + restartRequired: item.restartRequired, + type: 'enum', + } +} + function renderBoolean(value: boolean): string { return value ? '[ on ]' : '[ off ]' } function toRowCategory(category: SettingsItemDTO['category']): SettingsRowCategory { - if (category === 'concurrency' || category === 'llm' || category === 'task-history' || category === 'updates') { + if ( + category === 'concurrency' || + category === 'language' || + category === 'llm' || + category === 'task-history' || + category === 'updates' + ) { return category } diff --git a/src/tui/features/settings/components/settings-page.tsx b/src/tui/features/settings/components/settings-page.tsx index 3a7ca99f9..a7d67d068 100644 --- a/src/tui/features/settings/components/settings-page.tsx +++ b/src/tui/features/settings/components/settings-page.tsx @@ -29,8 +29,9 @@ export function SettingsPage({onCancel, onComplete}: CustomDialogCallbacks): Rea const rows = useMemo(() => (data ? buildSettingsRows(data.items) : []), [data]) const groups = useMemo(() => groupRowsByCategory(rows), [rows]) const focusedRow = rows[cursor] - const hintMode: 'browse' | 'edit' | 'edit-error' | 'saving' = - mode === 'edit' && rowError !== undefined ? 'edit-error' : mode + const isEnumEdit = mode === 'edit' && focusedRow?.type === 'enum' + const hintMode: 'browse' | 'edit' | 'edit-enum' | 'edit-error' | 'saving' = + mode === 'edit' && rowError !== undefined ? 'edit-error' : isEnumEdit ? 'edit-enum' : mode // Restart warning fires only when at least one dirty key actually // requires a daemon restart. Boolean toggles (e.g. update.checkForUpdates, @@ -173,6 +174,27 @@ export function SettingsPage({onCancel, onComplete}: CustomDialogCallbacks): Rea return } + const focused = rows[cursor] + if (focused?.type === 'enum' && focused.options !== undefined) { + const {options} = focused + const currentIndex = options.indexOf(editBuffer) + if (key.leftArrow) { + const previousIndex = currentIndex <= 0 ? options.length - 1 : currentIndex - 1 + setEditBuffer(options[previousIndex]) + setRowError(undefined) + return + } + + if (key.rightArrow) { + const nextIndex = currentIndex < 0 || currentIndex >= options.length - 1 ? 0 : currentIndex + 1 + setEditBuffer(options[nextIndex]) + setRowError(undefined) + return + } + + return + } + if (key.backspace || key.delete) { setEditBuffer((previous) => previous.slice(0, -1)) return diff --git a/src/tui/features/settings/utils/format-settings.ts b/src/tui/features/settings/utils/format-settings.ts index 4c171fb83..df6d87e95 100644 --- a/src/tui/features/settings/utils/format-settings.ts +++ b/src/tui/features/settings/utils/format-settings.ts @@ -3,6 +3,7 @@ import {formatDuration} from '../../../../shared/utils/format-duration.js' const CATEGORY_HEADERS: Readonly> = { concurrency: 'CONCURRENCY', + language: 'LANGUAGE', llm: 'LLM', other: 'OTHER', 'task-history': 'TASK HISTORY', @@ -32,7 +33,10 @@ export function groupRowsByCategory(rows: readonly SettingsRow[]): ReadonlyArray return result } -export function bottomHintFor(mode: 'browse' | 'edit' | 'edit-error' | 'saving', focusedKey?: string): string { +export function bottomHintFor( + mode: 'browse' | 'edit' | 'edit-enum' | 'edit-error' | 'saving', + focusedKey?: string, +): string { switch (mode) { case 'browse': { return 'Up/Down move | Enter edit | R reset | Esc exit' @@ -42,6 +46,10 @@ export function bottomHintFor(mode: 'browse' | 'edit' | 'edit-error' | 'saving', return `Editing ${focusedKey ?? ''} | Enter save | Esc cancel` } + case 'edit-enum': { + return `Editing ${focusedKey ?? ''} | Left/Right cycle options | Enter save | Esc cancel` + } + case 'edit-error': { return `Editing ${focusedKey ?? ''} | Enter save (when valid) | Esc cancel` } @@ -53,10 +61,10 @@ export function bottomHintFor(mode: 'browse' | 'edit' | 'edit-error' | 'saving', } export function preFillBufferFor(row: SettingsRow): string { - // preFillBufferFor only runs when entering integer text-input mode. - // Boolean rows take the toggle path in the page and never reach here; - // guard the narrowing so the function still compiles under the wider - // SettingsRow union. + // preFillBufferFor only runs when entering integer text-input mode for + // numeric rows or enum cycling for enum rows. Boolean rows take the + // toggle path and never reach here. + if (row.type === 'enum') return String(row.current) if (typeof row.current !== 'number') return String(row.current) if (row.unit === 'ms') return formatDuration(row.current) return String(row.current) diff --git a/src/webui/features/settings/components/enum-settings-row.tsx b/src/webui/features/settings/components/enum-settings-row.tsx new file mode 100644 index 000000000..27b2ba89d --- /dev/null +++ b/src/webui/features/settings/components/enum-settings-row.tsx @@ -0,0 +1,83 @@ +import { + Select, + SelectContent, + SelectItem, + SelectTrigger, + SelectValue, +} from '@campfirein/byterover-packages/components/select' +import {useId} from 'react' +import {toast} from 'sonner' + +import type {SettingsRow as SettingsRowData} from '../../../../shared/types/settings-row' + +import {LANGUAGE_NAMES} from '../../../../shared/language/language-names' +import {formatError} from '../../../lib/error-messages' +import {noop} from '../../../lib/noop' +import {useSetSetting} from '../api/set-setting' +import {labelFor} from '../lib/labels' +import {useRestartBannerStore} from '../stores/restart-banner-store' + +type Props = { + row: SettingsRowData +} + +export function EnumSettingsRow({row}: Props) { + const setMutation = useSetSetting() + const markDirty = useRestartBannerStore((s) => s.markDirty) + const descriptionId = useId() + + const label = labelFor(row.key) + const current = typeof row.current === 'string' ? row.current : String(row.current) + const options = row.options ?? [] + + const choose = async (next: string) => { + if (next === current) return + try { + const response = await setMutation.mutateAsync({key: row.key, value: next}) + if (response.ok) { + markDirty(row.key, row.restartRequired) + toast.success(`${label} set to ${displayLabel(row.key, next)}`) + return + } + + toast.error(response.error.message) + } catch (error) { + toast.error(formatError(error, `Failed to update ${label}`)) + } + } + + return ( +
    +
    + {label} + + {row.description} + +
    + +
    + ) +} + +function displayLabel(key: string, option: string): string { + if (key !== 'language.code') return option + const name = LANGUAGE_NAMES[option] + return name ? `${option} — ${name}` : option +} diff --git a/src/webui/features/settings/components/language-panel.tsx b/src/webui/features/settings/components/language-panel.tsx new file mode 100644 index 000000000..4ee8b9ca4 --- /dev/null +++ b/src/webui/features/settings/components/language-panel.tsx @@ -0,0 +1,42 @@ +import {LoaderCircle} from 'lucide-react' +import {Fragment, useMemo} from 'react' + +import {buildSettingsRows} from '../../../../shared/utils/format-settings' +import {noop} from '../../../lib/noop' +import {SettingsSection} from '../../vc/components/settings-section' +import {useGetSettings} from '../api/list-settings' +import {SettingsRow} from './settings-row' +import {SettingsSkeleton} from './settings-skeleton' + +export function LanguagePanel() { + const {data, error, isError, isLoading, refetch} = useGetSettings() + + const rows = useMemo(() => { + if (!data) return [] + return buildSettingsRows(data.items).filter((row) => row.category === 'language') + }, [data?.items]) + + return ( + : undefined} + description="Language used when ByteRover writes context. Auto matches your input language." + error={isError ? error : undefined} + errorFallback="Failed to load language settings" + onRetry={() => refetch().catch(noop)} + title="Language" + > + {data ? ( +
    + {rows.map((row, index) => ( + + + {index < rows.length - 1 &&
    } + + ))} +
    + ) : ( + + )} + + ) +} diff --git a/src/webui/features/settings/components/settings-row.tsx b/src/webui/features/settings/components/settings-row.tsx index 09278996a..9067fe6eb 100644 --- a/src/webui/features/settings/components/settings-row.tsx +++ b/src/webui/features/settings/components/settings-row.tsx @@ -15,6 +15,7 @@ import {useSetSetting} from '../api/set-setting' import {labelFor} from '../lib/labels' import {useRestartBannerStore} from '../stores/restart-banner-store' import {BooleanSettingsRow} from './boolean-settings-row' +import {EnumSettingsRow} from './enum-settings-row' type Props = { row: SettingsRowData @@ -22,6 +23,7 @@ type Props = { export function SettingsRow({row}: Props) { if (row.type === 'boolean') return + if (row.type === 'enum') return return } diff --git a/src/webui/features/settings/lib/labels.ts b/src/webui/features/settings/lib/labels.ts index 1c5368197..f4b08711c 100644 --- a/src/webui/features/settings/lib/labels.ts +++ b/src/webui/features/settings/lib/labels.ts @@ -1,6 +1,8 @@ const LABELS: Record = { 'agentPool.maxConcurrentTasksPerProject': 'Max parallel tasks per project', 'agentPool.maxSize': 'Max concurrent projects', + 'language.code': 'Language', + 'language.mode': 'Language mode', 'llm.iterationBudgetMs': 'Agentic loop budget', 'llm.requestTimeoutMs': 'LLM request timeout', 'taskHistory.maxEntries': 'Task history size', diff --git a/src/webui/pages/configuration/general.tsx b/src/webui/pages/configuration/general.tsx index a5e5c1237..3b2bf46c8 100644 --- a/src/webui/pages/configuration/general.tsx +++ b/src/webui/pages/configuration/general.tsx @@ -1,4 +1,5 @@ import {ConcurrencyPanel} from '../../features/settings/components/concurrency-panel' +import {LanguagePanel} from '../../features/settings/components/language-panel' import {LlmPanel} from '../../features/settings/components/llm-panel' import {TaskHistoryPanel} from '../../features/settings/components/task-history-panel' import {UpdatesPanel} from '../../features/settings/components/updates-panel' @@ -9,6 +10,7 @@ export function GeneralSection() { + ) diff --git a/test/unit/shared/utils/format-settings.test.ts b/test/unit/shared/utils/format-settings.test.ts index 8c9ea961e..cab597be9 100644 --- a/test/unit/shared/utils/format-settings.test.ts +++ b/test/unit/shared/utils/format-settings.test.ts @@ -32,6 +32,20 @@ function makeBooleanItem(current: boolean): SettingsItemDTO { } } +function makeEnumItem(overrides: Partial = {}): SettingsItemDTO { + return { + category: 'language', + current: 'auto', + default: 'auto', + description: 'desc', + key: 'language.mode', + options: ['auto', 'fixed'], + restartRequired: false, + type: 'enum', + ...overrides, + } +} + function makeRow(overrides: Partial = {}): SettingsRow { return { category: 'concurrency', @@ -253,4 +267,59 @@ describe('format-settings (shared)', () => { expect(rows.map((r) => r.category)).to.deep.equal(['concurrency', 'task-history', 'updates']) }) }) + + describe('enum rows', () => { + it('includes enum items in the output with options propagated', () => { + const rows = buildSettingsRows([makeEnumItem()]) + expect(rows).to.have.lengthOf(1) + expect(rows[0].type).to.equal('enum') + expect(rows[0].options).to.deep.equal(['auto', 'fixed']) + }) + + it('formats current=auto as "[ auto ]" and default verbatim', () => { + const row = buildSettingsRows([makeEnumItem({current: 'auto'})])[0] + expect(row.displayCurrent).to.equal('[ auto ]') + expect(row.displayDefault).to.equal('auto') + }) + + it('marks the row as modified when current differs from default', () => { + const row = buildSettingsRows([makeEnumItem({current: 'fixed'})])[0] + expect(row.modified).to.equal(true) + }) + + it('groups language enum rows under category=language', () => { + const row = buildSettingsRows([makeEnumItem()])[0] + expect(row.category).to.equal('language') + }) + + it('orders the language category after updates', () => { + const rows = buildSettingsRows([ + makeEnumItem(), + makeItem({category: 'concurrency', key: 'agentPool.maxSize'}), + makeBooleanItem(true), + ]) + expect(rows.map((r) => r.category)).to.deep.equal(['concurrency', 'updates', 'language']) + }) + + it('skips enum items with missing or wrong-typed fields (defensive narrowing)', () => { + const wonky = {...makeEnumItem(), current: 5} as unknown as SettingsItemDTO + expect(buildSettingsRows([wonky])).to.have.lengthOf(0) + }) + + it('parseRowInput: accepts a valid option as ok and rejects an unknown option', () => { + const row = buildSettingsRows([makeEnumItem()])[0] + const ok = parseRowInput(row, 'fixed') + expect(ok.kind).to.equal('ok') + if (ok.kind === 'ok') { + expect(ok.value).to.equal('fixed') + expect(ok.displayValue).to.equal('fixed') + } + + const bad = parseRowInput(row, 'pidgin') + expect(bad.kind).to.equal('error') + if (bad.kind === 'error') { + expect(bad.message).to.match(/Expected one of \[auto, fixed\]/) + } + }) + }) }) From fe56936f6cd07f9dcb1a5ae6c233019e1aa8fef6 Mon Sep 17 00:00:00 2001 From: Cuong Date: Wed, 27 May 2026 15:00:01 +0700 Subject: [PATCH 08/16] feat: [ENG-2974] read language from daemon settings + deprecate brv config set language.* Switches the curate kickoff/continuation language read site from project config to the daemon settings store, and points users at the new brv settings set surface for any future change. Project config remains a fallback for users who still have a per-project override from ENG-2691 so no one loses their setting on upgrade. - curate/index.ts: resolveLanguagePreference now calls FileSettingsStore first; falls back to ProjectConfigStore for backward compat - config/set.ts: brv config set language.{mode,code} now fails with a clear "moved to brv settings set" message; pure dispatcher unchanged so the validation path still covers other project-config keys --- src/oclif/commands/config/set.ts | 9 +++++++++ src/oclif/commands/curate/index.ts | 30 +++++++++++++++++++++++++----- 2 files changed, 34 insertions(+), 5 deletions(-) diff --git a/src/oclif/commands/config/set.ts b/src/oclif/commands/config/set.ts index 26e3d9c04..cf536718e 100644 --- a/src/oclif/commands/config/set.ts +++ b/src/oclif/commands/config/set.ts @@ -48,6 +48,15 @@ export default class ConfigSet extends Command { const {args, flags} = await this.parse(ConfigSet) const format = flags.format as 'json' | 'text' + if (args.key === 'language.mode' || args.key === 'language.code') { + this.fail( + format, + 'deprecated-key', + `'${args.key}' has moved to global settings. Run: brv settings set ${args.key} ${args.value}`, + ) + return + } + const projectRoot = resolveProjectRoot() const store = new ProjectConfigStore() const current = await store.read(projectRoot) diff --git a/src/oclif/commands/curate/index.ts b/src/oclif/commands/curate/index.ts index 8a7715b27..063b52742 100644 --- a/src/oclif/commands/curate/index.ts +++ b/src/oclif/commands/curate/index.ts @@ -2,7 +2,9 @@ import {Args, Command, Flags} from '@oclif/core' import type {BrvConfigLanguage} from '../../../server/core/domain/entities/brv-config.js' +import {SETTINGS_KEYS} from '../../../server/core/domain/entities/settings.js' import {ProjectConfigStore} from '../../../server/infra/config/file-config-store.js' +import {FileSettingsStore} from '../../../server/infra/storage/file-settings-store.js' import {continueSession, kickoffSession, resolveProjectRoot} from '../../lib/curate-session.js' import {type DaemonClientOptions, formatConnectionError, withDaemonRetry} from '../../lib/daemon-client.js' import {writeJsonResponse} from '../../lib/json-response.js' @@ -262,13 +264,16 @@ Bad examples: } /** - * Read the per-project language preference from `.brv/config.json`. - * Missing config (fresh project) or missing field returns `undefined`, - * which the kickoff / correction prompts treat as the auto clause — - * match the user's input language. Read failures degrade silently to - * `undefined` so a corrupt config never blocks curate. + * Resolve the language preference. Reads from daemon settings (the + * source of truth) and falls back to `.brv/config.json` for users who + * still have a per-project override from before language moved to + * global settings. Missing or default values return `undefined`, + * which the prompts treat as the auto clause. */ private async resolveLanguagePreference(projectRoot: string): Promise { + const fromSettings = await readLanguageFromSettings() + if (fromSettings !== undefined) return fromSettings + try { const config = await new ProjectConfigStore().read(projectRoot) return config?.language @@ -277,3 +282,18 @@ Bad examples: } } } + +async function readLanguageFromSettings(): Promise { + try { + const store = new FileSettingsStore() + const items = await store.list() + const byKey = new Map(items.map((item) => [item.key, item.current])) + const mode = byKey.get(SETTINGS_KEYS.LANGUAGE_MODE) + const code = byKey.get(SETTINGS_KEYS.LANGUAGE_CODE) + if (mode !== 'fixed') return undefined + if (typeof code !== 'string') return undefined + return {code, mode: 'fixed'} + } catch { + return undefined + } +} From aa7282dd267e5f2f90b098314cd53f87925816a9 Mon Sep 17 00:00:00 2001 From: Cuong Date: Wed, 27 May 2026 15:05:24 +0700 Subject: [PATCH 09/16] test: [ENG-2974] update FileSettingsStore.list keyset to include language.* entries --- test/unit/infra/storage/file-settings-store.test.ts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/unit/infra/storage/file-settings-store.test.ts b/test/unit/infra/storage/file-settings-store.test.ts index 6c3a593ac..dcfb4cb6e 100644 --- a/test/unit/infra/storage/file-settings-store.test.ts +++ b/test/unit/infra/storage/file-settings-store.test.ts @@ -49,6 +49,8 @@ describe('FileSettingsStore', () => { expect(keys).to.deep.equal([ 'agentPool.maxConcurrentTasksPerProject', 'agentPool.maxSize', + 'language.code', + 'language.mode', 'llm.iterationBudgetMs', 'llm.requestTimeoutMs', 'taskHistory.maxEntries', From 7ee702df80526dcec6fc0560e38e369631ab918a Mon Sep 17 00:00:00 2001 From: Cuong Date: Wed, 27 May 2026 15:11:17 +0700 Subject: [PATCH 10/16] =?UTF-8?q?fix:=20[ENG-2974]=20satisfy=20WebUI=20str?= =?UTF-8?q?ict-mode=20tsc=20=E2=80=94=20narrow=20enum=20onValueChange=20+?= =?UTF-8?q?=20integer=20parsed.value?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-push tsc rejected: - enum-settings-row: Select.onValueChange signature is (string | null) under the component lib's types; the null branch is unreachable for a non- cleared single-select but the type system needs an explicit guard. - settings-row IntegerSettingsRow: parsed.value widened to (number | string) after enum support landed; narrow to number before passing to toastValue/setMutation so the integer branch keeps compiling under strict. --- .../features/settings/components/enum-settings-row.tsx | 1 + .../features/settings/components/settings-row.tsx | 10 ++++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/webui/features/settings/components/enum-settings-row.tsx b/src/webui/features/settings/components/enum-settings-row.tsx index 27b2ba89d..744de673b 100644 --- a/src/webui/features/settings/components/enum-settings-row.tsx +++ b/src/webui/features/settings/components/enum-settings-row.tsx @@ -57,6 +57,7 @@ export function EnumSettingsRow({row}: Props) {