diff --git a/CHANGELOG.md b/CHANGELOG.md index d88e83482..c2e34eebb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,15 @@ All notable user-facing changes to ByteRover CLI will be documented in this file. +## [Unreleased] + +### Added +- **ByteRover preserves your input language by default.** When you curate context in Russian, Chinese, Japanese, Vietnamese, or any other language, the calling agent's LLM is now instructed to author body text in the same language (the schema — tag names, attribute names, enum values, paths — stays English so tooling is unaffected). Configure with the new `brv config set` command: + - `brv config set language.mode auto` — match the user's input language (default). + - `brv config set language.mode fixed` + `brv config set language.code ` — force a specific language. ISO 639-1 codes accepted: `ar`, `de`, `el`, `en`, `es`, `fi`, `fr`, `he`, `hi`, `id`, `it`, `ja`, `ko`, `nl`, `no`, `pl`, `pt`, `ru`, `sv`, `th`, `tr`, `uk`, `vi`, `zh`. + - `brv config get language.mode` / `brv config get language.code` — read back the current setting. + + CJK queries (Chinese, Japanese, Korean) are now searchable in BM25 — the tokenizer was previously whitespace-only and treated entire CJK sentences as one token. **Restoration recipe** for users who prefer the prior implicit-English behavior: `brv config set language.code en` then `brv config set language.mode fixed`. Reported by Dmitriy K — thanks for the thorough reproduction in [#616](https://github.com/campfirein/byterover-cli/issues/616). ## [3.16.1] ### Fixed diff --git a/src/agent/infra/agent/cipher-agent.ts b/src/agent/infra/agent/cipher-agent.ts index d4e57683a..880066f5a 100644 --- a/src/agent/infra/agent/cipher-agent.ts +++ b/src/agent/infra/agent/cipher-agent.ts @@ -21,9 +21,9 @@ import type {ToolProvider} from '../tools/tool-provider.js' import type {AgentConfig} from './agent-schemas.js' import type {ProviderUpdateConfig} from './provider-update-config.js' -import {SETTINGS_KEYS} from '../../../server/core/domain/entities/settings.js' import {TransportStateEventNames} from '../../../server/core/domain/transport/schemas.js' import {agentLog} from '../../../server/utils/process-logger.js' +import {SETTINGS_KEYS} from '../../../shared/types/settings-keys.js' import {getEffectiveMaxInputTokens, resolveRegistryProvider} from '../../core/domain/llm/index.js' import {STREAMING_EVENT_NAMES} from '../../core/domain/streaming/types.js' import {ToolName} from '../../core/domain/tools/constants.js' diff --git a/src/agent/infra/tools/implementations/cjk-tokenizer.ts b/src/agent/infra/tools/implementations/cjk-tokenizer.ts new file mode 100644 index 000000000..8ecd77b73 --- /dev/null +++ b/src/agent/infra/tools/implementations/cjk-tokenizer.ts @@ -0,0 +1,159 @@ +/** + * BM25 tokenizer with CJK bigram segmentation. + * + * MiniSearch 7.2.0's default tokenizer splits on `\p{Z}\p{P}` (Unicode + * whitespace + punctuation). Latin / Cyrillic / Vietnamese / European + * scripts use whitespace between words and tokenize correctly. CJK scripts + * do not — a sentence like `认证系统使用JWT令牌` becomes a single token, + * so a query for `认证` against indexed CJK content returns zero matches. + * + * Empirical confirmation before this fix (MiniSearch 7.2.0): + * + * const ms = new MiniSearch({fields: ['t'], idField: 'id'}) + * ms.addAll([{id: 1, t: '认证系统使用JWT令牌'}]) + * ms.search('认证') // → [] — broken + * ms.search('Привет мир') // → matches as expected + * + * This tokenizer preserves the default behavior for whitespace-separated + * scripts and adds overlapping-bigram segmentation for CJK runs. Mixed + * Latin+CJK tokens (e.g. `JWT令牌`) split at the script boundary so the + * Latin portion stays a real word token. + * + * Wired via the top-level `tokenize` option on MiniSearch — per the + * library docs and source (`MiniSearch.js:1564-1566`), that single option + * applies at both index and query time unless `searchOptions.tokenize` + * is set, which we leave unset. + */ + +/** + * Unicode ranges treated as CJK for the purposes of bigram segmentation. + * Anything outside these ranges is "non-CJK" and tokenizes by whitespace + * boundaries only. + * + * - `0x4E00–0x9FFF`: CJK Unified Ideographs (Chinese, Japanese kanji) + * - `0x3040–0x309F`: Hiragana + * - `0x30A0–0x30FF`: Katakana + * - `0xAC00–0xD7AF`: Hangul Syllables (Korean) + * + * CJK Extension A/B/C/… are deliberately excluded — they appear in academic + * / historical text but rarely in user content. If a user's corpus needs + * them, extend this list and bump `INDEX_SCHEMA_VERSION` in + * `search-knowledge-service.ts` so cached indexes invalidate. + */ +const CJK_RANGES: ReadonlyArray = [ + [0x4E_00, 0x9F_FF], + [0x30_40, 0x30_9F], + [0x30_A0, 0x30_FF], + [0xAC_00, 0xD7_AF], +] + +function isCjkCodePoint(cp: number): boolean { + for (const [lo, hi] of CJK_RANGES) { + if (cp >= lo && cp <= hi) return true + } + + return false +} + +/** + * Whitespace + punctuation split, matching MiniSearch's default + * `SPACE_OR_PUNCTUATION` regex. Kept verbatim so a future upstream tweak + * is easy to spot via diff. + */ +const SPACE_OR_PUNCTUATION = /[\p{Z}\p{P}]+/u + +/** + * Split a token at boundaries between CJK and non-CJK runs. + * + * - `'JWT令牌'` → `['JWT', '令牌']` (script boundary at index 3) + * - `'认证'` → `['认证']` (single CJK run) + * - `'JWT'` → `['JWT']` (single non-CJK run) + */ +function splitAtCjkBoundary(token: string): string[] { + const segments: string[] = [] + let current = '' + let currentIsCjk: boolean | undefined + + // Iterate by code point so any future range extension into the + // supplementary plane handles surrogate pairs correctly. The current + // four ranges are all BMP, so `for...of` is equivalent to char-by-char + // here — but cheap to be correct. + for (const ch of token) { + const cp = ch.codePointAt(0) + if (cp === undefined) continue + const charIsCjk = isCjkCodePoint(cp) + + if (currentIsCjk === undefined) { + current = ch + currentIsCjk = charIsCjk + } else if (charIsCjk === currentIsCjk) { + current += ch + } else { + segments.push(current) + current = ch + currentIsCjk = charIsCjk + } + } + + if (current.length > 0) segments.push(current) + + return segments +} + +/** + * Emit overlapping bigrams for a CJK run. + * + * - `'认证系统'` (4 chars) → `['认证', '证系', '系统']` + * - `'认证'` (2 chars) → `['认证']` + * - `'认'` (1 char) → `['认']` (unigram fallback so single-char tokens are searchable) + * + * Bigrams are the standard CJK IR compromise: unigrams are too noisy + * (common chars like `的` dominate scoring), trigrams are too sparse + * (miss 2-character compound matches). + */ +function cjkBigrams(run: string): string[] { + const chars = [...run] + if (chars.length <= 1) return chars + + const grams: string[] = [] + for (let i = 0; i < chars.length - 1; i++) { + grams.push(chars[i] + chars[i + 1]) + } + + return grams +} + +/** + * Tokenize text for BM25 indexing and querying. + * + * Algorithm: + * 1. Split on Unicode whitespace + punctuation (matches MiniSearch default). + * 2. For each resulting token, split at CJK ↔ non-CJK script boundaries. + * 3. For non-CJK segments, emit the segment as-is. + * 4. For CJK segments, emit overlapping bigrams. + * + * The result is the union — Latin / Cyrillic / Vietnamese behave exactly + * as the MiniSearch default, while CJK runs become searchable. + */ +export function tokenizeWithCjk(text: string): string[] { + const out: string[] = [] + + for (const wsToken of text.split(SPACE_OR_PUNCTUATION)) { + if (wsToken.length === 0) continue + + for (const segment of splitAtCjkBoundary(wsToken)) { + if (segment.length === 0) continue + + // `splitAtCjkBoundary` returns single-script segments, so the + // first code point's classification applies to the whole segment. + const firstCp = segment.codePointAt(0) + if (firstCp !== undefined && isCjkCodePoint(firstCp)) { + out.push(...cjkBigrams(segment)) + } else { + out.push(segment) + } + } + } + + return out +} diff --git a/src/agent/infra/tools/implementations/search-knowledge-service.ts b/src/agent/infra/tools/implementations/search-knowledge-service.ts index e45edb9d2..a650cef41 100644 --- a/src/agent/infra/tools/implementations/search-knowledge-service.ts +++ b/src/agent/infra/tools/implementations/search-knowledge-service.ts @@ -36,6 +36,7 @@ import { import {getFormatForRead} from '../../../../server/infra/render/format/format-detector.js' import {ElementAxisIndex} from '../../../../server/infra/render/reader/element-axis-index.js' import {readHtmlTopicSync} from '../../../../server/infra/render/reader/html-reader.js' +import {tokenizeWithCjk} from './cjk-tokenizer.js' import {isPathLikeQuery, matchMemoryPath, parseSymbolicQuery} from './memory-path-matcher.js' import { buildReferenceIndex, @@ -52,10 +53,7 @@ import { const MAX_CONTEXT_TREE_FILES = 10_000 const DEFAULT_CACHE_TTL_MS = 5000 -/** - * Bump when MINISEARCH_OPTIONS fields/boost change to invalidate cached indexes. - * v7 (ENG-3021): include `` alt + src in HTML topic indexed content. - */ +/** Bump when MINISEARCH_OPTIONS fields/boost change to invalidate cached indexes */ const INDEX_SCHEMA_VERSION = 7 /** Only include results whose normalized score is at least this fraction of the top result's score */ @@ -174,6 +172,12 @@ const MINISEARCH_OPTIONS = { prefix: true, }, storeFields: ['title', 'path'] as string[], + // Custom tokenizer adds CJK bigram segmentation alongside the default + // whitespace split. Without it, queries against Chinese / Japanese / + // Korean content return zero matches even when the content is curated + // correctly — see `cjk-tokenizer.ts`. Top-level `tokenize` applies to + // both indexing and querying per MiniSearch's API. + tokenize: tokenizeWithCjk, } interface IndexedDocument { diff --git a/src/oclif/commands/curate/index.ts b/src/oclif/commands/curate/index.ts index 2edcdedfb..1cbeb753d 100644 --- a/src/oclif/commands/curate/index.ts +++ b/src/oclif/commands/curate/index.ts @@ -1,7 +1,11 @@ import {Args, Command, Flags} from '@oclif/core' +import type {BrvConfigLanguage} from '../../../server/core/domain/entities/brv-config.js' import type {CurateSessionEnvelope} from '../../lib/curate-session.js' +import {ProjectConfigStore} from '../../../server/infra/config/file-config-store.js' +import {SettingsEvents, type SettingsListResponse} from '../../../shared/transport/events/settings-events.js' +import {SETTINGS_KEYS} from '../../../shared/types/settings-keys.js' import { continueSession, deleteCurateResponseFile, @@ -122,17 +126,19 @@ Bad examples: protected async dispatchContinuation(args: { confirmOverwrite: boolean format: 'json' | 'text' + language?: BrvConfigLanguage projectRoot: string response: string sessionId: string }): Promise { - const {confirmOverwrite, format, projectRoot, response, sessionId} = args + const {confirmOverwrite, format, language, projectRoot, response, sessionId} = args let envelope: CurateSessionEnvelope | undefined await withDaemonRetry(async (client) => { envelope = await continueSession({ client, confirmOverwrite, format, + language, projectRoot, response, sessionId, @@ -466,11 +472,16 @@ Bad examples: // this path so the agent retains the source it already paid an // LLM call to produce. const confirmOverwrite = flags.overwrite ?? false + // Read fresh per continuation — mirrors kickoff so a mid-session + // language change (rare) is honored on the next correction prompt. + // The same fallback chain applies (daemon settings → project config). + const language = await this.resolveLanguagePreference(projectRoot) let dispatchEnvelope: CurateSessionEnvelope try { dispatchEnvelope = await this.dispatchContinuation({ confirmOverwrite, format, + language, projectRoot, response, sessionId, @@ -550,9 +561,74 @@ Bad examples: return } - const envelope = await kickoffSession({content, projectRoot: resolveProjectRoot()}) + const projectRoot = resolveProjectRoot() + const language = await this.resolveLanguagePreference(projectRoot) + const envelope = await kickoffSession({content, language, projectRoot}) this.emitToolModeEnvelope(envelope, format) } + + /** + * Resolve the language preference. Daemon settings (the source of + * truth) take precedence; a per-project `.brv/config.json language` + * field acts as a fallback for users who configured language before + * it moved to global settings. + * + * Note on precedence: only daemon `mode: 'fixed'` short-circuits the + * fallback. An explicit daemon `mode: 'auto'` reads as "no opinion" + * and falls through to project config, so a stale project-config + * `fixed/X` will still win. This is intentional for the migration + * window — distinguishing "user explicitly chose auto" from "user + * never touched settings" needs raw-overrides access that the + * transport doesn't expose today, and the bug only manifests for + * users with a pre-existing per-project `language` field. Revisit + * once project-config language is fully sunset. + */ + private async resolveLanguagePreference(projectRoot: string): Promise { + const fromSettings = await readLanguageFromSettings() + if (fromSettings !== undefined) return fromSettings + + try { + const config = await new ProjectConfigStore().read(projectRoot) + return config?.language + } catch { + return undefined + } + } +} + +/** + * Reads the language preference from daemon settings via the same + * `SettingsEvents.LIST` transport every other settings consumer uses. + * + * Exported (and accepts a `DaemonClientOptions`) so tests can drive + * `withDaemonRetry` with a stubbed transport client. Returns `undefined` + * on any non-fixed mode, missing/non-string code, or daemon error — + * callers should treat `undefined` as "no opinion" and fall back to + * project config / the auto clause. + * + * Uses a tight retry budget by default (1 retry, 0ms delay) because this + * runs on every `brv curate` kickoff: `withDaemonRetry`'s 10× retries × + * 1s default would block kickoff for ~9s when the daemon is unreachable + * before the catch trips the project-config fallback. The caller can + * override either field by passing it in `options`. + */ +export async function readLanguageFromSettings( + options?: DaemonClientOptions, +): Promise { + try { + const response = await withDaemonRetry( + async (client) => client.requestWithAck(SettingsEvents.LIST), + {maxRetries: 1, retryDelayMs: 0, ...options}, + ) + const byKey = new Map(response.items.map((item) => [item.key, item.current])) + const mode = byKey.get(SETTINGS_KEYS.LANGUAGE_MODE) + const code = byKey.get(SETTINGS_KEYS.LANGUAGE_CODE) + if (mode !== 'fixed') return undefined + if (typeof code !== 'string') return undefined + return {code, mode: 'fixed'} + } catch { + return undefined + } } /** diff --git a/src/oclif/commands/settings/get.ts b/src/oclif/commands/settings/get.ts index 34ad47a6f..ed5f05942 100644 --- a/src/oclif/commands/settings/get.ts +++ b/src/oclif/commands/settings/get.ts @@ -82,6 +82,10 @@ export default class SettingsGet extends Command { this.log(` range: ${range}`) } + if (item.type === 'enum' && item.options !== undefined && item.options.length > 0) { + this.log(` allowed: ${item.options.join(', ')}`) + } + this.log(` scope: ${item.scope ?? 'global'}`) } @@ -99,12 +103,14 @@ export default class SettingsGet extends Command { if (item.category !== undefined) payload.category = item.category if (item.unit !== undefined) payload.unit = item.unit if (item.scope !== undefined) payload.scope = item.scope + if (item.type === 'enum' && item.options !== undefined) payload.options = item.options return payload } } -function renderValue(item: SettingsItemDTO, value: boolean | number): string { +function renderValue(item: SettingsItemDTO, value: boolean | number | string): string { if (typeof value === 'boolean') return value ? 'true' : 'false' + if (typeof value === 'string') return value return renderInteger(item, value) } diff --git a/src/oclif/commands/settings/index.ts b/src/oclif/commands/settings/index.ts index 9375c30e6..2d0265613 100644 --- a/src/oclif/commands/settings/index.ts +++ b/src/oclif/commands/settings/index.ts @@ -5,23 +5,16 @@ import { type SettingsItemDTO, type SettingsListResponse, } from '../../../shared/transport/events/settings-events.js' +import { + CATEGORY_HEADERS, + CATEGORY_ORDER, + type SettingsRowCategory, + toRowCategory, +} from '../../../shared/types/settings-row.js' import {formatCount, formatDuration} from '../../../shared/utils/format-duration.js' import {type DaemonClientOptions, formatConnectionError, withDaemonRetry} from '../../lib/daemon-client.js' import {writeJsonResponse} from '../../lib/json-response.js' -type CategoryName = 'concurrency' | 'llm' | 'task-history' | 'updates' - -const CATEGORY_ORDER: readonly CategoryName[] = ['concurrency', 'llm', 'task-history', 'updates'] - -const CATEGORY_HEADERS: Readonly> = { - concurrency: 'CONCURRENCY', - llm: 'LLM', - 'task-history': 'TASK HISTORY', - updates: 'UPDATES', -} - -const OTHER_HEADER = 'OTHER' - export default class Settings extends Command { public static description = 'List user-configurable BRV settings. Changes apply after `brv restart`.' @@ -83,22 +76,15 @@ export default class Settings extends Command { this.log('') } - const otherRows = byCategory.get('__other__') - if (otherRows && otherRows.length > 0) { - this.log(OTHER_HEADER) - for (const row of otherRows) this.log(formatRow(row)) - this.log('') - } - this.log('Set: brv settings set ') this.log('Reset: brv settings reset ') } } -function groupByCategory(items: readonly SettingsItemDTO[]): Map { - const map = new Map() +function groupByCategory(items: readonly SettingsItemDTO[]): Map { + const map = new Map() for (const item of items) { - const bucket = item.category ?? '__other__' + const bucket: SettingsRowCategory = toRowCategory(item.category) const list = map.get(bucket) ?? [] list.push(item) map.set(bucket, list) @@ -114,8 +100,9 @@ function formatRow(item: SettingsItemDTO): string { return ` ${pad(item.key, 40)} ${pad(current, 7)} (default ${defaultStr})${''.padEnd(Math.max(0, 8 - defaultStr.length))} ${range}` } -function renderValue(item: SettingsItemDTO, value: boolean | number): string { +function renderValue(item: SettingsItemDTO, value: boolean | number | string): string { if (typeof value === 'boolean') return value ? 'true' : 'false' + if (typeof value === 'string') return value return renderInteger(item, value) } diff --git a/src/oclif/commands/settings/reset.ts b/src/oclif/commands/settings/reset.ts index 57d00803f..7cb68f7fe 100644 --- a/src/oclif/commands/settings/reset.ts +++ b/src/oclif/commands/settings/reset.ts @@ -97,8 +97,9 @@ export default class SettingsReset extends Command { } } -function renderValue(item: SettingsItemDTO, value: boolean | number): string { +function renderValue(item: SettingsItemDTO, value: boolean | number | string): string { if (typeof value === 'boolean') return value ? 'true' : 'false' + if (typeof value === 'string') return value if (item.unit === 'ms') return formatDuration(value) return formatCount(value) } diff --git a/src/oclif/commands/settings/set.ts b/src/oclif/commands/settings/set.ts index bdc33eb8a..f841a8568 100644 --- a/src/oclif/commands/settings/set.ts +++ b/src/oclif/commands/settings/set.ts @@ -119,7 +119,7 @@ export default class SettingsSet extends Command { protected async writeSetting( key: string, - value: boolean | number, + value: boolean | number | string, options?: DaemonClientOptions, ): Promise { return withDaemonRetry( @@ -131,7 +131,7 @@ export default class SettingsSet extends Command { } type ParseResult = - | {readonly display: string; readonly kind: 'ok'; readonly value: boolean | number} + | {readonly display: string; readonly kind: 'ok'; readonly value: boolean | number | string} | {readonly kind: 'error'; readonly message: string} const BOOLEAN_TOKENS = new Map([ @@ -149,10 +149,24 @@ const BOOLEAN_TOKENS_HINT = 'true, false, on, off, 1, 0, yes, no' function parseValue(descriptor: SettingsItemDTO, raw: string): ParseResult { if (descriptor.type === 'boolean') return parseAsBoolean(descriptor, raw) + if (descriptor.type === 'enum') return parseAsEnum(descriptor, raw) if (descriptor.unit === 'ms') return parseAsDuration(descriptor, raw) return parseAsCount(descriptor, raw) } +function parseAsEnum(descriptor: SettingsItemDTO, raw: string): ParseResult { + const trimmed = raw.trim() + const options = descriptor.options ?? [] + if (!options.includes(trimmed)) { + return { + kind: 'error', + message: `${descriptor.key} expected one of [${options.join(', ')}], got '${raw}'.`, + } + } + + return {display: trimmed, kind: 'ok', value: trimmed} +} + function parseAsBoolean(descriptor: SettingsItemDTO, raw: string): ParseResult { const lowered = raw.trim().toLowerCase() const value = BOOLEAN_TOKENS.get(lowered) diff --git a/src/oclif/lib/curate-session.ts b/src/oclif/lib/curate-session.ts index 53d8cbee9..7905238cc 100644 --- a/src/oclif/lib/curate-session.ts +++ b/src/oclif/lib/curate-session.ts @@ -6,6 +6,7 @@ import {lstat, mkdir, readFile, rm, unlink, writeFile} from 'node:fs/promises' import {dirname, join} from 'node:path' import {z} from 'zod' +import type {BrvConfigLanguage} from '../../server/core/domain/entities/brv-config.js' import type {CurateHtmlDirectResult} from '../../server/core/interfaces/executor/i-curate-executor.js' import type {HtmlWriteError} from '../../server/infra/render/writer/html-writer.js' import type {CurateMeta} from '../../shared/curate-meta.js' @@ -174,6 +175,13 @@ type CurateSessionState = { type KickoffOptions = { content: string + /** + * Per-project language preference loaded from `.brv/config.json`. Threaded + * into the kickoff prompt so the calling agent's LLM authors body text in + * the configured language. `undefined` (no config or no language field) + * defaults to the auto clause — match the user's input language. + */ + language?: BrvConfigLanguage projectRoot: string } @@ -195,6 +203,12 @@ type ContinueOptions = { * mode). Defaults to 'json' — matches the agent-facing default. */ format?: 'json' | 'text' + /** + * Per-project language preference loaded from `.brv/config.json`. Threaded + * into the correction prompt — read fresh on each continuation, so a + * mid-session config change (rare) is honored on the next retry. + */ + language?: BrvConfigLanguage projectRoot: string response: string sessionId: string @@ -207,7 +221,7 @@ type ContinueOptions = { * to author HTML". */ export async function kickoffSession(options: KickoffOptions): Promise { - const {content, projectRoot} = options + const {content, language, projectRoot} = options const sessionId = randomUUID() const state: CurateSessionState = { @@ -222,7 +236,7 @@ export async function kickoffSession(options: KickoffOptions): Promise { - const {client, confirmOverwrite = false, format = 'json', projectRoot, response, sessionId} = options + const {client, confirmOverwrite = false, format = 'json', language, projectRoot, response, sessionId} = options // Reject non-uuid session ids before any path join — see SESSION_ID_RE // for the threat model. Same `kind` as "session not found" because @@ -556,6 +570,7 @@ export async function continueSession(options: ContinueOptions): Promise { return false } +/** + * Validate the shape of a `language` field on a config JSON object. + * Accepts `undefined` (the field is optional). For present values: + * - must be a non-null object with `mode: 'auto' | 'fixed'` + * - `code` is required when `mode === 'fixed'` (silent fallback to + * English would otherwise be possible at prompt time) + */ +const isOptionalLanguageJson = (value: unknown): boolean => { + if (value === undefined) return true + if (typeof value !== 'object' || value === null) return false + const lang = value as Record + if (lang.mode !== 'auto' && lang.mode !== 'fixed') return false + if (lang.code !== undefined && typeof lang.code !== 'string') return false + if (lang.mode === 'fixed' && typeof lang.code !== 'string') return false + return true +} + /** * Type guard for BrvConfigFromJson - validates JSON structure at runtime. * Note: version is optional in this check (old configs may not have it). @@ -90,6 +122,7 @@ const isBrvConfigJson = (json: unknown): json is BrvConfigFromJson => { if (obj.cipherAgentModes !== undefined && !Array.isArray(obj.cipherAgentModes)) return false if (obj.version !== undefined && typeof obj.version !== 'string') return false if (obj.reviewDisabled !== undefined && typeof obj.reviewDisabled !== 'boolean') return false + if (!isOptionalLanguageJson(obj.language)) return false return true } @@ -106,6 +139,7 @@ export class BrvConfig { public readonly createdAt: string public readonly cwd?: string public readonly ide?: Agent + public readonly language?: BrvConfigLanguage public readonly reviewDisabled?: boolean public readonly spaceId?: string public readonly spaceName?: string @@ -125,6 +159,7 @@ export class BrvConfig { this.createdAt = params.createdAt this.cwd = params.cwd this.ide = params.ide + this.language = params.language this.reviewDisabled = params.reviewDisabled this.spaceId = params.spaceId this.spaceName = params.spaceName @@ -218,6 +253,7 @@ export class BrvConfig { createdAt: this.createdAt, cwd: this.cwd, ide: this.ide, + language: this.language, reviewDisabled: this.reviewDisabled, spaceId: this.spaceId, spaceName: this.spaceName, @@ -252,6 +288,7 @@ export class BrvConfig { createdAt: this.createdAt, cwd: this.cwd, ide: this.ide, + language: this.language, reviewDisabled, spaceId: this.spaceId, spaceName: this.spaceName, @@ -273,6 +310,7 @@ export class BrvConfig { createdAt: new Date().toISOString(), cwd: this.cwd, ide: this.ide, + language: this.language, reviewDisabled: this.reviewDisabled, spaceId: space.id, spaceName: space.name, @@ -294,6 +332,7 @@ export class BrvConfig { createdAt: this.createdAt, cwd: this.cwd, ide: this.ide, + language: this.language, reviewDisabled: this.reviewDisabled, spaceId: this.spaceId, spaceName: this.spaceName, diff --git a/src/server/core/domain/entities/settings.ts b/src/server/core/domain/entities/settings.ts index 11a11bfe3..b44bf0991 100644 --- a/src/server/core/domain/entities/settings.ts +++ b/src/server/core/domain/entities/settings.ts @@ -1,3 +1,5 @@ +import {LANGUAGE_NAMES} from '../../../../shared/language/language-names.js' +import {SETTINGS_KEYS} from '../../../../shared/types/settings-keys.js' import { AGENT_LLM_ITERATION_BUDGET_MS, AGENT_LLM_REQUEST_TIMEOUT_MS, @@ -12,7 +14,7 @@ import { * and TUI render output (uppercased). Web docs / WebUI consume this * field to render the same groupings independently of key naming. */ -export type SettingCategory = 'concurrency' | 'llm' | 'task-history' | 'updates' +export type SettingCategory = 'concurrency' | 'language' | 'llm' | 'task-history' | 'updates' /** * Value-kind for dispatch between the duration formatter / parser @@ -48,15 +50,21 @@ export type BooleanSettingDescriptor = BaseSettingDescriptor & { readonly type: 'boolean' } +export type EnumSettingDescriptor = BaseSettingDescriptor & { + readonly default: string + readonly options: readonly string[] + readonly type: 'enum' +} + /** * Descriptor for a single user-configurable setting. Discriminated on * `type` so consumers narrow with a single check before reading - * type-specific fields (`min`/`max` on integers, etc). + * type-specific fields (`min`/`max` on integers, `options` on enums, etc). * * Defaults reference the existing constants module so a constant change * automatically updates the setting's default. */ -export type SettingDescriptor = BooleanSettingDescriptor | IntegerSettingDescriptor +export type SettingDescriptor = BooleanSettingDescriptor | EnumSettingDescriptor | IntegerSettingDescriptor /** * View of one setting: the key, the user's current override (or the default @@ -64,26 +72,12 @@ export type SettingDescriptor = BooleanSettingDescriptor | IntegerSettingDescrip * shapes; consumers narrow on the corresponding descriptor's `type`. */ export type SettingItem = { - readonly current: boolean | number - readonly default: boolean | number + readonly current: boolean | number | string + readonly default: boolean | number | string readonly key: string readonly restartRequired: boolean } -/** - * Single source of truth for setting key names. Importers must reference - * these constants instead of inline string literals so a rename of one - * key is a typecheck error at every call site (validator, bootstrap, - * agent snapshot read, CLI tests). - */ -export const SETTINGS_KEYS = { - AGENT_POOL_MAX_CONCURRENT_TASKS: 'agentPool.maxConcurrentTasksPerProject', - AGENT_POOL_MAX_SIZE: 'agentPool.maxSize', - LLM_ITERATION_BUDGET_MS: 'llm.iterationBudgetMs', - LLM_REQUEST_TIMEOUT_MS: 'llm.requestTimeoutMs', - TASK_HISTORY_MAX_ENTRIES: 'taskHistory.maxEntries', - UPDATE_CHECK_FOR_UPDATES: 'update.checkForUpdates', -} as const export const SETTINGS_REGISTRY: readonly SettingDescriptor[] = [ { @@ -146,6 +140,24 @@ export const SETTINGS_REGISTRY: readonly SettingDescriptor[] = [ restartRequired: false, type: 'boolean', }, + { + category: 'language', + default: 'auto', + description: 'Match input language (auto) or force a fixed language for written output', + key: SETTINGS_KEYS.LANGUAGE_MODE, + options: ['auto', 'fixed'], + restartRequired: false, + type: 'enum', + }, + { + category: 'language', + default: 'en', + description: 'ISO-639-1 code applied when mode is fixed; ignored in auto mode', + key: SETTINGS_KEYS.LANGUAGE_CODE, + options: Object.keys(LANGUAGE_NAMES), + restartRequired: false, + type: 'enum', + }, ] export function findSettingDescriptor(key: string): SettingDescriptor | undefined { diff --git a/src/server/core/domain/render/curate-prompt-builder.ts b/src/server/core/domain/render/curate-prompt-builder.ts index ea0a7ef52..8b2e5edf5 100644 --- a/src/server/core/domain/render/curate-prompt-builder.ts +++ b/src/server/core/domain/render/curate-prompt-builder.ts @@ -1,7 +1,9 @@ import type {HtmlWriteError} from '../../../infra/render/writer/html-writer.js' +import type {BrvConfigLanguage} from '../entities/brv-config.js' import {ELEMENT_REGISTRY} from '../../../infra/render/elements/registry.js' import {ELEMENT_NAMES} from './element-types.js' +import {buildLanguageClause} from './language-clause.js' /** * Curate-prompt builder for tool mode. @@ -52,7 +54,7 @@ export const CURATE_SCHEMA_PROMPT: string = buildSchemaPrompt() * agent ingested (READMEs, files, prior chat) so it cannot be * trusted as plain text. */ -export function buildGeneratePrompt(options: {userIntent: string}): string { +export function buildGeneratePrompt(options: {language?: BrvConfigLanguage; userIntent: string}): string { return [ 'You are authoring a `` HTML document for a knowledge base.', '', @@ -64,6 +66,10 @@ export function buildGeneratePrompt(options: {userIntent: string}): string { '', PATH_FORMAT, '', + '# Language', + '', + buildLanguageClause(options.language), + '', '# Element vocabulary (closed)', '', CURATE_SCHEMA_PROMPT, @@ -88,10 +94,11 @@ export function buildGeneratePrompt(options: {userIntent: string}): string { */ export function buildCorrectionPrompt(options: { errors: readonly HtmlWriteError[] + language?: BrvConfigLanguage previousHtml: string userIntent: string }): string { - const {errors, previousHtml, userIntent} = options + const {errors, language, previousHtml, userIntent} = options const fixInstructions = errors.length === 0 ? 'No structured errors were reported. Re-emit the document carefully and double-check every required attribute.' @@ -130,6 +137,10 @@ export function buildCorrectionPrompt(options: { '', OUTPUT_CONTRACT, '', + '# Language', + '', + buildLanguageClause(language), + '', '# Errors to fix', '', fixInstructions, diff --git a/src/server/core/domain/render/language-clause.ts b/src/server/core/domain/render/language-clause.ts new file mode 100644 index 000000000..a417f2c6f --- /dev/null +++ b/src/server/core/domain/render/language-clause.ts @@ -0,0 +1,57 @@ +/** + * Language-preservation clause for curate prompts. + * + * Single source of truth for the clause text. Every downstream injection + * surface — `buildGeneratePrompt`, `buildCorrectionPrompt`, and the MCP + * `brv-curate` tool description — imports `buildLanguageClause` and + * emits the same string. A wording revision is a one-file change. + * + * Schema-key invariant: the clause must mention that tag names, attribute + * names, attribute enum values, and `path` stay English. The element- + * registry Zod schemas enforce this structurally at the writer boundary — + * the clause mentions it so the calling agent's LLM doesn't burn a + * correction round-trip authoring `` or `path="безопасность/..."` + * that would fail validation downstream. + */ + +import type {BrvConfigLanguage} from '../entities/brv-config.js' + +import {LANGUAGE_NAMES} from '../../../../shared/language/language-names.js' + +const AUTO_CLAUSE = + "Match the user's input language for human-readable content: body text of `` elements, list items, and the `title` / `summary` attributes on ``. Keep tag names, attribute names, enum values, and the `path` attribute in English for tooling consistency. Code snippets and identifiers stay verbatim." + +function buildFixedClause(languageName: string): string { + return `Write all human-readable content (body text of \`\` elements, list items, \`title\` / \`summary\` attrs) in ${languageName}. Keep tag names, attribute names, enum values, and \`path\` in English. Code snippets and identifiers stay verbatim.` +} + +/** + * Return the language-preservation clause text for a config's language + * preference. + * + * - `undefined` or `{mode: 'auto'}` → the auto clause: "match the user's + * input language". + * - `{mode: 'fixed', code}` where `code` is in `LANGUAGE_NAMES` → the + * fixed clause referencing the mapped English name (e.g. "Russian"). + * - `{mode: 'fixed', code}` where `code` is unknown → the fixed clause + * with the raw code in double quotes (e.g. `in "xx"`). Degrades + * gracefully so a future ISO code we haven't mapped yet still produces + * a usable clause. + * + * `{mode: 'fixed'}` without `code` is rejected by `isBrvConfigJson` at + * load time and cannot reach here under normal operation; the function + * still defends against it by returning the auto clause rather than + * throwing — a malformed config should degrade, not crash a write path. + */ +export function buildLanguageClause(language?: BrvConfigLanguage): string { + if (language === undefined || language.mode === 'auto') { + return AUTO_CLAUSE + } + + if (language.code === undefined) { + return AUTO_CLAUSE + } + + const name = LANGUAGE_NAMES[language.code] ?? `"${language.code}"` + return buildFixedClause(name) +} diff --git a/src/server/core/interfaces/storage/i-settings-store.ts b/src/server/core/interfaces/storage/i-settings-store.ts index e9f6ad96a..0b69bf042 100644 --- a/src/server/core/interfaces/storage/i-settings-store.ts +++ b/src/server/core/interfaces/storage/i-settings-store.ts @@ -12,7 +12,7 @@ export type SettingsStartupSnapshot = { * Daemon startup logs this once; all values fall back to defaults. */ readonly parseError?: string - readonly values: Readonly> + readonly values: Readonly> } /** diff --git a/src/server/infra/daemon/settings-bootstrap.ts b/src/server/infra/daemon/settings-bootstrap.ts index 3afa78577..8c2f0aed8 100644 --- a/src/server/infra/daemon/settings-bootstrap.ts +++ b/src/server/infra/daemon/settings-bootstrap.ts @@ -1,11 +1,11 @@ import type {ISettingsStore, SettingsStartupSnapshot} from '../../core/interfaces/storage/i-settings-store.js' +import {SETTINGS_KEYS} from '../../../shared/types/settings-keys.js' import { AGENT_MAX_CONCURRENT_TASKS, AGENT_POOL_MAX_SIZE, TASK_HISTORY_DEFAULT_MAX_ENTRIES, } from '../../constants.js' -import {SETTINGS_KEYS} from '../../core/domain/entities/settings.js' /** * Daemon-side resolved view of every settings key the bootstrap path diff --git a/src/server/infra/mcp/tools/brv-curate-tool.ts b/src/server/infra/mcp/tools/brv-curate-tool.ts index f230ebd63..cc0e78411 100644 --- a/src/server/infra/mcp/tools/brv-curate-tool.ts +++ b/src/server/infra/mcp/tools/brv-curate-tool.ts @@ -12,6 +12,7 @@ import type {HtmlWriteError} from '../../render/writer/html-writer.js' import {CurateMetaSchema} from '../../../../shared/curate-meta.js' import {encodeCurateHtmlContent} from '../../../../shared/transport/curate-html-content.js' import {CURATE_SCHEMA_PROMPT} from '../../../core/domain/render/curate-prompt-builder.js' +import {buildLanguageClause} from '../../../core/domain/render/language-clause.js' import {TransportTaskEventNames} from '../../../core/domain/transport/schemas.js' import {appendDriftFooter} from './drift-footer.js' import {associateProjectWithRetry, type McpStartupProjectContext, resolveMcpTaskContext} from './mcp-project-context.js' @@ -48,6 +49,16 @@ const TOOL_DESCRIPTION = [ '- Inside `
  • `, write plain text only — no leading `-`, `*`, `•`, `1.`/`2.` markers; the renderer adds them via CSS.', '- `` body: emit directly with HTML entities for `<`, `>`, `&`. Do NOT wrap in `` — HTML5 parses CDATA as a bogus comment that the first `-->` closes. Example: `graph LR; A -->|x| B`.', '', + // Auto clause unconditional: the MCP tool description is built once at + // server-boot, so it cannot read live config. Per-call fixed-mode is + // honored via the oclif `brv curate` kickoff prompt (which IS dynamic). + // MCP-only consumers under `language: { mode: 'fixed' }` see the auto + // clause here; their input language still gets preserved because auto + // says "match the input language". + '# Language', + '', + buildLanguageClause(), + '', '# Path format', '- The `path` attribute on is `/` or `//`, snake_case segments.', '- Pick descriptive domain names (1-3 words). Reuse existing domains where they fit; avoid generic names like `misc`, `general`.', diff --git a/src/server/infra/storage/file-settings-store.ts b/src/server/infra/storage/file-settings-store.ts index 46ca7a34b..c7adff9be 100644 --- a/src/server/infra/storage/file-settings-store.ts +++ b/src/server/infra/storage/file-settings-store.ts @@ -17,7 +17,8 @@ type SettingsFile = { * the file may legitimately retain pre-existing invalid entries that * `reset` is forbidden from collateral-damaging (the daemon startup * loader handles those via warnings). `set` writes only validated - * numeric values; partition() filters at read time. + * values (booleans, integers within range, or canonical enum options + * — see SettingsValidator); partition() filters at read time. */ readonly values: Record readonly version: string @@ -63,7 +64,7 @@ export class FileSettingsStore implements ISettingsStore { current: overrides[key] ?? descriptor.default, default: descriptor.default, key: descriptor.key, - restartRequired: true, + restartRequired: descriptor.restartRequired, } } @@ -73,7 +74,7 @@ export class FileSettingsStore implements ISettingsStore { current: overrides[descriptor.key] ?? descriptor.default, default: descriptor.default, key: descriptor.key, - restartRequired: true, + restartRequired: descriptor.restartRequired, })) } @@ -132,7 +133,7 @@ export class FileSettingsStore implements ISettingsStore { return join(this.baseDir, SETTINGS_FILE) } - private async readOverrides(): Promise> { + private async readOverrides(): Promise> { const raw = await this.readRawValues() const {valid} = this.validator.partition(raw) return {...valid} diff --git a/src/server/infra/storage/settings-validator.ts b/src/server/infra/storage/settings-validator.ts index c37fc2762..3f4b8313a 100644 --- a/src/server/infra/storage/settings-validator.ts +++ b/src/server/infra/storage/settings-validator.ts @@ -1,10 +1,12 @@ import type { BooleanSettingDescriptor, + EnumSettingDescriptor, IntegerSettingDescriptor, SettingDescriptor, } from '../../core/domain/entities/settings.js' -import {findSettingDescriptor, SETTINGS_KEYS} from '../../core/domain/entities/settings.js' +import {SETTINGS_KEYS} from '../../../shared/types/settings-keys.js' +import {findSettingDescriptor} from '../../core/domain/entities/settings.js' export class UnknownSettingKeyError extends Error { public readonly key: string @@ -30,7 +32,7 @@ export class InvalidSettingValueError extends Error { export type PartitionedSettings = { readonly invalid: ReadonlyArray<{readonly key: string; readonly reason: string; readonly value: unknown}> - readonly valid: Readonly> + readonly valid: Readonly> } export type CouplingViolation = { @@ -56,7 +58,7 @@ export class SettingsValidator { * log a warning about. */ public partition(record: Record): PartitionedSettings { - const valid: Record = {} + const valid: Record = {} const invalid: Array<{key: string; reason: string; value: unknown}> = [] for (const [key, value] of Object.entries(record)) { @@ -97,9 +99,9 @@ export class SettingsValidator { /** * Validates a single key/value pair. Throws on unknown key or invalid value. * Returns the coerced value on success (integer for integer descriptors, - * boolean for boolean descriptors). + * boolean for boolean descriptors, the canonical option for enum descriptors). */ - public validate(key: string, value: unknown): boolean | number { + public validate(key: string, value: unknown): boolean | number | string { const descriptor = this.validateKey(key) return this.validateAgainst(descriptor, value) } @@ -136,12 +138,33 @@ export class SettingsValidator { return descriptor } - private validateAgainst(descriptor: SettingDescriptor, value: unknown): boolean | number { + private validateAgainst(descriptor: SettingDescriptor, value: unknown): boolean | number | string { if (descriptor.type === 'boolean') return validateBoolean(descriptor, value) + if (descriptor.type === 'enum') return validateEnum(descriptor, value) return validateInteger(descriptor, value) } } +function validateEnum(descriptor: EnumSettingDescriptor, value: unknown): string { + if (typeof value !== 'string') { + throw new InvalidSettingValueError( + descriptor.key, + value, + `expected one of [${descriptor.options.join(', ')}], got ${describeType(value)}`, + ) + } + + if (!descriptor.options.includes(value)) { + throw new InvalidSettingValueError( + descriptor.key, + value, + `'${value}' is not one of [${descriptor.options.join(', ')}]`, + ) + } + + return value +} + function validateInteger(descriptor: IntegerSettingDescriptor, value: unknown): number { if (typeof value !== 'number' || !Number.isInteger(value)) { throw new InvalidSettingValueError( @@ -174,7 +197,7 @@ function validateBoolean(descriptor: BooleanSettingDescriptor, value: unknown): return value } -function numericSubset(values: Readonly>): Record { +function numericSubset(values: Readonly>): Record { const result: Record = {} for (const [key, value] of Object.entries(values)) { if (typeof value === 'number') result[key] = value diff --git a/src/server/infra/transport/handlers/settings-handler.ts b/src/server/infra/transport/handlers/settings-handler.ts index d48410cce..6650f9569 100644 --- a/src/server/infra/transport/handlers/settings-handler.ts +++ b/src/server/infra/transport/handlers/settings-handler.ts @@ -110,7 +110,7 @@ function restartRequiredFor(key: string): boolean { * Range, coupling, and fractional-number violations are left to the store's * validator and still surface as `invalid_value`. */ -function checkValueType(key: string, value: boolean | number): SettingsErrorDTO | undefined { +function checkValueType(key: string, value: boolean | number | string): SettingsErrorDTO | undefined { const descriptor = findSettingDescriptor(key) if (descriptor === undefined) return undefined @@ -137,6 +137,17 @@ function checkValueType(key: string, value: boolean | number): SettingsErrorDTO } } + if (descriptor.type === 'enum' && got !== 'string') { + return { + code: 'invalid_value_type', + expected: 'enum', + got, + key, + message: `expected string for '${key}', got ${got}`, + value, + } + } + return undefined } @@ -149,7 +160,7 @@ function toItemDTO(item: SettingItem): SettingsItemDTO { return descriptorToDTO(descriptor, item.current) } -function descriptorToDTO(descriptor: SettingDescriptor, current: boolean | number): SettingsItemDTO { +function descriptorToDTO(descriptor: SettingDescriptor, current: boolean | number | string): SettingsItemDTO { const dto: SettingsItemDTO = { current, default: descriptor.default, @@ -165,6 +176,10 @@ function descriptorToDTO(descriptor: SettingDescriptor, current: boolean | numbe if (descriptor.unit !== undefined) dto.unit = descriptor.unit } + if (descriptor.type === 'enum') { + dto.options = descriptor.options + } + return dto } diff --git a/src/server/templates/skill/onboarding.md b/src/server/templates/skill/onboarding.md index a145d89f2..de03c018f 100644 --- a/src/server/templates/skill/onboarding.md +++ b/src/server/templates/skill/onboarding.md @@ -5,7 +5,7 @@ description: "Use when the user asks for a tour, intro, or overview of ByteRover # ByteRover Onboarding Tour -A 90-second guided introduction. Three agent messages total: **learn → demonstrate → wrap**. +A 90-second guided introduction. Three beats — **learn → demonstrate → wrap** — with **one question per turn**. The learn beat asks the language question first (its own message), then the persona interview. The tour teaches the user that ByteRover remembers facts about them and their work — and that this memory is local, private, and starts shaping the agent's behavior immediately. @@ -22,7 +22,9 @@ If the user already knows ByteRover and asks a specific question, do NOT run the ## Budget -Three **agent** messages, with natural user turns in between. Roughly 90 seconds end-to-end. Do not exceed three agent messages. Do not feature-dump. +Three **agent** beats — **learn → demonstrate → wrap** — with natural user turns in between. Roughly 90 seconds end-to-end. Do not feature-dump. + +**Ask one question per turn — never two in the same message.** The learn beat (Message 1) has two short asks: the **language question first**, on its own, then the **persona interview** as a separate message only after the user has answered the language question. Do NOT bundle them. Asking both at once forces the user to juggle two unrelated answers and buries the language choice. The agent does NOT auto-fire the next message. Each message ends and waits for the user to respond — even a one-word "ok" or "go" is enough. This gives the user space to look at the artifact (or the web UI URL) before the next step. If the user asks a clarifying question instead of acknowledging, answer it briefly and resume the tour at the next agent turn — the question doesn't burn a tour-message slot. @@ -46,7 +48,31 @@ Example: Keep it to **one sentence**. Don't enumerate features. Don't explain mechanics (the save/retrieve loop is named later in this message, after the demo). The primer answers "what is this thing?" — nothing more. -Then run a **quick interview** — one combined open question that asks both about their work **and** about the pain that brought them to ByteRover. Do not present a form, a list of options, or a multiple-choice menu. Let the user answer however feels natural. +**Then ask which written language to save their memory in — and ask ONLY this.** This is the tour's **first question**, and it gets its own message. Right after the trust opener and the one-line primer, ask the language question, then **stop and wait** — do NOT also ask the persona/interview question in the same message (that comes next, after the user answers). One question per turn. Frame it warmly (reuse the primer's "I'll … by default" cadence so trust → orient → language reads as one continuous breath), offer 2-3 example languages at most, and never print the full supported-language list. This is NOT a heading and NOT a confirmation form — just one open line. + +Example (the whole of Message 1's first turn — trust, primer, then this and nothing more): + +> "One quick thing before we dive in: what language should I save your memory in? I'll match however you write to me by default — or just name one (Vietnamese, 日本語, Spanish…) and I'll keep everything in that." + +**Then stop and wait for their answer.** Do not ask the interview question yet. + +**When the user answers, act on it before you ask anything else** (set the language first, so the saved artifact later renders in it): + +- **English, blank, or "match me / doesn't matter / whatever"** → run nothing. `auto` mode already matches the user's input language, so the saved artifact comes out in their language with zero commands. Don't announce "leaving it on auto" — just move to the interview (next message). +- **A supported non-default language** → map the free-text answer (English name, an endonym like "tiếng việt" / "日本語", or a bare code) to its exact lowercase ISO-639-1 code yourself, then silently run the two commands below. At most one natural acknowledgement ("Done — I'll write everything in Vietnamese."). No confirmation form. +- **An unsupported or unresolvable language** → acknowledge in one half-sentence, fall back to auto (run nothing), keep moving. Never dump the supported list. + +Supported codes (anything else → auto): `ar de el en es fi fr he hi id it ja ko nl no pl pt ru sv th tr uk vi zh` + +```bash +# code FIRST, then mode — a rejected code then leaves mode at its safe `auto` default +brv settings set language.code --format json +brv settings set language.mode fixed --format json +``` + +Treat the language as set only when BOTH calls return `success: true` (`{"data":{"restartRequired":false,"value":""},"success":true}`). Because `restartRequired` is `false`, it applies live to the very next `brv curate` — do NOT tell the user to restart. On failure the error is nested at `data.error` (e.g. `{"data":{"error":{"code":"invalid_value",...}},"success":false}`), never a top-level `error`. If the `language.code` call fails, STOP: don't run the `mode` call, don't retry a guessed code, don't surface the JSON — because `mode` was never flipped, the tour just continues in `auto`. + +**Now — as the next message, only after the language question is answered and set — run a quick interview.** This is Message 1's second ask, on its own. Lead with at most a one-line acknowledgement of the language (if you set one), then ask **one combined open question** about both their work **and** the pain that brought them to ByteRover. Do not present a form, a list of options, or a multiple-choice menu. Do not re-ask about language here. Let the user answer however feels natural. Example phrasing: @@ -283,12 +309,14 @@ If the user invokes the tour again later, run it again — there is no state tra ## What NOT To Do -- Do NOT extend past 3 messages. -- Do NOT present a form, multiple-choice menu, or rigid field list. Ask one open question. +- Do NOT extend past the three beats (learn → demonstrate → wrap). Asking the language question and the persona interview as two short turns within the learn beat is expected; adding new beats or feature-dumping is not. +- Do NOT ask the language question and the persona interview in the same message. Language first, on its own turn; the interview comes only after the user answers it. +- Do NOT present a form, multiple-choice menu, or rigid field list. Ask one open question. This includes the language line: offer 2-3 example languages at most, never print the 23-language list, and never re-confirm the language with a follow-up form. - Do NOT drill down if the user gives a short answer. Save what they shared. - Do NOT skip the trust statement in Message 1. It is the foundation of the user's willingness to share. - Do NOT explain the architecture, the daemon, connector types, or the full command list. -- Do NOT prompt for an LLM provider, login, or any configuration. The tour runs with zero setup. +- Do NOT prompt for an LLM provider, login, or any other configuration — the tour runs with zero setup. **The single exception is the written-language question in Message 1**: you may ask which language to save memory in, and — ONLY if the user explicitly names a supported non-default language — silently run `brv settings set language.code ` then `brv settings set language.mode fixed` (code first, both `--format json`) before the first `brv curate`. That one GLOBAL `language.*` setting is the only configuration the tour ever touches; it applies live (no restart). If the user picks English or says "match me / doesn't matter," run nothing — auto mode already matches their input language. +- Do NOT make the language step heavy. Map the user's free-text answer to an exact lowercase ISO-639-1 code yourself (the enum is case-sensitive — emit `vi`, not `VI` or `vie`). If the named language isn't among the 23 supported, or the `language.code` set returns `invalid_value` (error nested at `data.error`), acknowledge in one half-sentence, fall back to auto, and keep the tour moving — never dump the supported list, never retry a guessed code, never surface the raw error JSON. `brv settings set` is GLOBAL (every project), unlike the per-project context tree; only mention that if the user asks. - Do NOT skip the persona-shaped tailoring in Message 2 in favor of a generic "here's how retrieve works" explanation. The tailored example IS the value demo. - Do NOT tailor with hollow phrases like "As a Rust developer, you'll love…" or "Since you work on a CLI, you might want to…" — these read as templated personalization and erode trust faster than no tailoring at all. The tailored example must reference something **specific** the user said, paired with a **specific** action the agent will take. - Do NOT turn the visible artifact in Message 1 into a confirmation step. No "Does this look right?" prompts. The artifact is shown so the user *feels* what was captured, not so they validate it. diff --git a/src/shared/language/language-names.ts b/src/shared/language/language-names.ts new file mode 100644 index 000000000..997f84db2 --- /dev/null +++ b/src/shared/language/language-names.ts @@ -0,0 +1,33 @@ +/** + * ISO-639-1 code → English language name. Single source of truth for + * surfaces that need a human-readable label alongside the canonical + * wire-format code: language-clause builder, WebUI / TUI pickers, CLI + * error messages. Codes not in this map degrade gracefully via the + * raw-code fallback in `buildLanguageClause`. + */ +export const LANGUAGE_NAMES: Record = { + ar: 'Arabic', + de: 'German', + el: 'Greek', + en: 'English', + es: 'Spanish', + fi: 'Finnish', + fr: 'French', + he: 'Hebrew', + hi: 'Hindi', + id: 'Indonesian', + it: 'Italian', + ja: 'Japanese', + ko: 'Korean', + nl: 'Dutch', + no: 'Norwegian', + pl: 'Polish', + pt: 'Portuguese', + ru: 'Russian', + sv: 'Swedish', + th: 'Thai', + tr: 'Turkish', + uk: 'Ukrainian', + vi: 'Vietnamese', + zh: 'Chinese', +} diff --git a/src/shared/transport/events/settings-events.ts b/src/shared/transport/events/settings-events.ts index 4bcea3d9e..19c3489d6 100644 --- a/src/shared/transport/events/settings-events.ts +++ b/src/shared/transport/events/settings-events.ts @@ -11,31 +11,30 @@ export const SettingsEvents = { * surfaces (CLI / TUI / WebUI) can consume it without crossing the * server import boundary. * - * M7 T2 added three optional fields (`category`, `unit`, `scope`); T1 of - * the Update-check toggle project widened `type`, `current`, `default`, - * and `restartRequired` to also cover boolean descriptors, and made - * `min` / `max` optional (only integer descriptors carry them). All - * widenings are additive at the JSON layer, so consumers that read - * existing integer fields continue to parse the wire format. + * Backward-compat: every widening here is additive at the JSON layer, so + * consumers that read pre-existing integer / boolean fields continue to + * parse the wire format unchanged. */ export interface SettingsItemDTO { - category?: 'concurrency' | 'llm' | 'task-history' | 'updates' - current: boolean | number - default: boolean | number + category?: 'concurrency' | 'language' | 'llm' | 'task-history' | 'updates' + current: boolean | number | string + default: boolean | number | string description: string key: string max?: number min?: number + /** Allowed values for `type === 'enum'`. Omitted otherwise. */ + options?: readonly string[] restartRequired: boolean scope?: 'global' | 'project' - type: 'boolean' | 'integer' + type: 'boolean' | 'enum' | 'integer' unit?: 'count' | 'ms' } export interface SettingsErrorDTO { code: 'invalid_value' | 'invalid_value_type' | 'unknown_key' /** Expected runtime kind, only set when `code === 'invalid_value_type'`. */ - expected?: 'boolean' | 'integer' + expected?: 'boolean' | 'enum' | 'integer' /** `typeof` of the offending value, only set when `code === 'invalid_value_type'`. */ got?: string key: string @@ -59,7 +58,7 @@ export type SettingsGetResponse = export interface SettingsSetRequest { key: string - value: boolean | number + value: boolean | number | string } export type SettingsSetResponse = diff --git a/src/shared/types/settings-keys.ts b/src/shared/types/settings-keys.ts new file mode 100644 index 000000000..4ff61fb5c --- /dev/null +++ b/src/shared/types/settings-keys.ts @@ -0,0 +1,22 @@ +/** + * Canonical user-configurable settings key registry. + * + * Lives in `shared/` so every consumer — server (`SETTINGS_REGISTRY`, + * `FileSettingsStore`, `SettingsValidator`, `settings-bootstrap`), agent + * (`cipher-agent`), oclif (`brv curate`), TUI / WebUI — can reference + * one canonical constant. A rename here becomes a typecheck error at + * every call site, preventing silent drift between the registry, the + * persisted overrides file, and the UI surfaces. + */ +export const SETTINGS_KEYS = { + AGENT_POOL_MAX_CONCURRENT_TASKS: 'agentPool.maxConcurrentTasksPerProject', + AGENT_POOL_MAX_SIZE: 'agentPool.maxSize', + LANGUAGE_CODE: 'language.code', + LANGUAGE_MODE: 'language.mode', + LLM_ITERATION_BUDGET_MS: 'llm.iterationBudgetMs', + LLM_REQUEST_TIMEOUT_MS: 'llm.requestTimeoutMs', + TASK_HISTORY_MAX_ENTRIES: 'taskHistory.maxEntries', + UPDATE_CHECK_FOR_UPDATES: 'update.checkForUpdates', +} as const + +export type SettingsKey = (typeof SETTINGS_KEYS)[keyof typeof SETTINGS_KEYS] diff --git a/src/shared/types/settings-row.ts b/src/shared/types/settings-row.ts index 23cf81983..f27ace029 100644 --- a/src/shared/types/settings-row.ts +++ b/src/shared/types/settings-row.ts @@ -1,10 +1,10 @@ -export type SettingsRowCategory = 'concurrency' | 'llm' | 'other' | 'task-history' | 'updates' +export type SettingsRowCategory = 'concurrency' | 'language' | 'llm' | 'other' | 'task-history' | 'updates' export type SettingsRowUnit = 'count' | 'ms' /** * View-model for one settings row consumed by the TUI. Discriminated on * `type` so the renderer narrows before reading integer-only fields - * (`min`, `max`, `unit`) or treating `current` / `default` as numeric. + * (`min`, `max`, `unit`) or enum-only fields (`options`). * * Restart requirement is propagated from the descriptor verbatim (no * literal `true` constraint) so the dirty-banner filter on the page can @@ -12,8 +12,8 @@ export type SettingsRowUnit = 'count' | 'ms' */ export interface SettingsRow { readonly category: SettingsRowCategory - readonly current: boolean | number - readonly default: boolean | number + readonly current: boolean | number | string + readonly default: boolean | number | string readonly description: string readonly displayCurrent: string readonly displayDefault: string @@ -23,13 +23,15 @@ export interface SettingsRow { readonly max?: number readonly min?: number readonly modified: boolean + /** Allowed values for `type === 'enum'`. Omitted otherwise. */ + readonly options?: readonly string[] readonly restartRequired: boolean - readonly type: 'boolean' | 'integer' + readonly type: 'boolean' | 'enum' | 'integer' readonly unit?: SettingsRowUnit } export type RowParseResult = - | {readonly displayValue: string; readonly kind: 'ok'; readonly value: number} + | {readonly displayValue: string; readonly kind: 'ok'; readonly value: number | string} | {readonly kind: 'error'; readonly message: string} export const CATEGORY_ORDER: readonly SettingsRowCategory[] = [ @@ -37,5 +39,45 @@ export const CATEGORY_ORDER: readonly SettingsRowCategory[] = [ 'llm', 'task-history', 'updates', + 'language', 'other', ] + +/** + * Display label shown above each category's row group in the TUI settings + * page and in `brv settings list` text output. Adding a new category here + * (and a corresponding entry in `CATEGORY_ORDER`) is the only edit needed + * for both consumers to render it — `oclif/commands/settings/index.ts` + * and `tui/features/settings/utils/format-settings.ts` both import from + * this module, so the surfaces never drift. + */ +export const CATEGORY_HEADERS: Readonly> = { + concurrency: 'CONCURRENCY', + language: 'LANGUAGE', + llm: 'LLM', + other: 'OTHER', + 'task-history': 'TASK HISTORY', + updates: 'UPDATES', +} + +const CATEGORY_SET: ReadonlySet = new Set(CATEGORY_ORDER) + +/** + * Type guard derived from `CATEGORY_ORDER` so the membership check never + * drifts from the canonical list. Adding a new category to + * `SettingsRowCategory` + `CATEGORY_ORDER` + `CATEGORY_HEADERS` is the + * only edit needed — this guard picks it up automatically. + */ +export function isSettingsRowCategory(value: unknown): value is SettingsRowCategory { + return typeof value === 'string' && CATEGORY_SET.has(value) +} + +/** + * Folds an arbitrary incoming category value into a canonical + * `SettingsRowCategory`. Unknown / missing categories fall through to + * `'other'` so an unexpected category emitted by the daemon still renders + * under the OTHER header instead of getting silently dropped. + */ +export function toRowCategory(category: unknown): SettingsRowCategory { + return isSettingsRowCategory(category) ? category : 'other' +} diff --git a/src/shared/utils/format-settings.ts b/src/shared/utils/format-settings.ts index 79d415e7e..087deeaf0 100644 --- a/src/shared/utils/format-settings.ts +++ b/src/shared/utils/format-settings.ts @@ -1,7 +1,7 @@ import type {SettingsItemDTO} from '../transport/events/settings-events.js' -import type {RowParseResult, SettingsRow, SettingsRowCategory, SettingsRowUnit} from '../types/settings-row.js' +import type {RowParseResult, SettingsRow, SettingsRowUnit} from '../types/settings-row.js' -import {CATEGORY_ORDER} from '../types/settings-row.js' +import {CATEGORY_ORDER, toRowCategory} from '../types/settings-row.js' import {formatCount, formatDuration, parseDuration} from './format-duration.js' export function buildSettingsRows(items: readonly SettingsItemDTO[]): SettingsRow[] { @@ -12,6 +12,11 @@ export function buildSettingsRows(items: readonly SettingsItemDTO[]): SettingsRo continue } + if (isEnumItem(item)) { + rows.push(toEnumRow(item)) + continue + } + if (isIntegerItem(item)) rows.push(toIntegerRow(item)) } @@ -22,10 +27,21 @@ export function parseRowInput(row: SettingsRow, raw: string): RowParseResult { const trimmed = raw.trim() if (trimmed === '') return {kind: 'error', message: 'Value is required'} + if (row.type === 'enum') return parseAsEnum(row, raw) if (row.unit === 'ms') return parseAsDuration(row, raw) return parseAsCount(row, raw) } +function parseAsEnum(row: SettingsRow, raw: string): RowParseResult { + const trimmed = raw.trim() + const options = row.options ?? [] + if (!options.includes(trimmed)) { + return {kind: 'error', message: `Expected one of [${options.join(', ')}], got '${raw}'`} + } + + return {displayValue: trimmed, kind: 'ok', value: trimmed} +} + function parseAsDuration(row: SettingsRow, raw: string): RowParseResult { if (row.min === undefined || row.max === undefined) { return {kind: 'error', message: `${row.key} has no numeric range`} @@ -124,16 +140,42 @@ function toBooleanRow(item: SettingsItemDTO, current: boolean, defaultValue: boo } } -function renderBoolean(value: boolean): string { - return value ? '[ on ]' : '[ off ]' +type EnumSettingsItemDTO = Omit & { + readonly current: string + readonly default: string + readonly options: readonly string[] + readonly type: 'enum' } -function toRowCategory(category: SettingsItemDTO['category']): SettingsRowCategory { - if (category === 'concurrency' || category === 'llm' || category === 'task-history' || category === 'updates') { - return category +function isEnumItem(item: SettingsItemDTO): item is EnumSettingsItemDTO { + return ( + item.type === 'enum' && + typeof item.current === 'string' && + typeof item.default === 'string' && + Array.isArray(item.options) + ) +} + +function toEnumRow(item: EnumSettingsItemDTO): SettingsRow { + return { + category: toRowCategory(item.category), + current: item.current, + default: item.default, + description: item.description, + displayCurrent: `[ ${item.current} ]`, + displayDefault: item.default, + displayRange: '', + key: item.key, + label: item.key, + modified: item.current !== item.default, + options: item.options, + restartRequired: item.restartRequired, + type: 'enum', } +} - return 'other' +function renderBoolean(value: boolean): string { + return value ? '[ on ]' : '[ off ]' } function formatIntegerRange(item: IntegerSettingsItemDTO, unit: SettingsRowUnit): string { diff --git a/src/tui/features/settings/components/settings-page.tsx b/src/tui/features/settings/components/settings-page.tsx index 3a7ca99f9..a7d67d068 100644 --- a/src/tui/features/settings/components/settings-page.tsx +++ b/src/tui/features/settings/components/settings-page.tsx @@ -29,8 +29,9 @@ export function SettingsPage({onCancel, onComplete}: CustomDialogCallbacks): Rea const rows = useMemo(() => (data ? buildSettingsRows(data.items) : []), [data]) const groups = useMemo(() => groupRowsByCategory(rows), [rows]) const focusedRow = rows[cursor] - const hintMode: 'browse' | 'edit' | 'edit-error' | 'saving' = - mode === 'edit' && rowError !== undefined ? 'edit-error' : mode + const isEnumEdit = mode === 'edit' && focusedRow?.type === 'enum' + const hintMode: 'browse' | 'edit' | 'edit-enum' | 'edit-error' | 'saving' = + mode === 'edit' && rowError !== undefined ? 'edit-error' : isEnumEdit ? 'edit-enum' : mode // Restart warning fires only when at least one dirty key actually // requires a daemon restart. Boolean toggles (e.g. update.checkForUpdates, @@ -173,6 +174,27 @@ export function SettingsPage({onCancel, onComplete}: CustomDialogCallbacks): Rea return } + const focused = rows[cursor] + if (focused?.type === 'enum' && focused.options !== undefined) { + const {options} = focused + const currentIndex = options.indexOf(editBuffer) + if (key.leftArrow) { + const previousIndex = currentIndex <= 0 ? options.length - 1 : currentIndex - 1 + setEditBuffer(options[previousIndex]) + setRowError(undefined) + return + } + + if (key.rightArrow) { + const nextIndex = currentIndex < 0 || currentIndex >= options.length - 1 ? 0 : currentIndex + 1 + setEditBuffer(options[nextIndex]) + setRowError(undefined) + return + } + + return + } + if (key.backspace || key.delete) { setEditBuffer((previous) => previous.slice(0, -1)) return diff --git a/src/tui/features/settings/utils/format-settings.ts b/src/tui/features/settings/utils/format-settings.ts index 4c171fb83..537413479 100644 --- a/src/tui/features/settings/utils/format-settings.ts +++ b/src/tui/features/settings/utils/format-settings.ts @@ -1,14 +1,11 @@ -import {CATEGORY_ORDER, type SettingsRow, type SettingsRowCategory} from '../../../../shared/types/settings-row.js' +import { + CATEGORY_HEADERS, + CATEGORY_ORDER, + type SettingsRow, + type SettingsRowCategory, +} from '../../../../shared/types/settings-row.js' import {formatDuration} from '../../../../shared/utils/format-duration.js' -const CATEGORY_HEADERS: Readonly> = { - concurrency: 'CONCURRENCY', - llm: 'LLM', - other: 'OTHER', - 'task-history': 'TASK HISTORY', - updates: 'UPDATES', -} - export function groupRowsByCategory(rows: readonly SettingsRow[]): ReadonlyArray<{ readonly category: SettingsRowCategory readonly header: string @@ -32,7 +29,10 @@ export function groupRowsByCategory(rows: readonly SettingsRow[]): ReadonlyArray return result } -export function bottomHintFor(mode: 'browse' | 'edit' | 'edit-error' | 'saving', focusedKey?: string): string { +export function bottomHintFor( + mode: 'browse' | 'edit' | 'edit-enum' | 'edit-error' | 'saving', + focusedKey?: string, +): string { switch (mode) { case 'browse': { return 'Up/Down move | Enter edit | R reset | Esc exit' @@ -42,6 +42,10 @@ export function bottomHintFor(mode: 'browse' | 'edit' | 'edit-error' | 'saving', return `Editing ${focusedKey ?? ''} | Enter save | Esc cancel` } + case 'edit-enum': { + return `Editing ${focusedKey ?? ''} | Left/Right cycle options | Enter save | Esc cancel` + } + case 'edit-error': { return `Editing ${focusedKey ?? ''} | Enter save (when valid) | Esc cancel` } @@ -53,10 +57,10 @@ export function bottomHintFor(mode: 'browse' | 'edit' | 'edit-error' | 'saving', } export function preFillBufferFor(row: SettingsRow): string { - // preFillBufferFor only runs when entering integer text-input mode. - // Boolean rows take the toggle path in the page and never reach here; - // guard the narrowing so the function still compiles under the wider - // SettingsRow union. + // preFillBufferFor only runs when entering integer text-input mode for + // numeric rows or enum cycling for enum rows. Boolean rows take the + // toggle path and never reach here. + if (row.type === 'enum') return String(row.current) if (typeof row.current !== 'number') return String(row.current) if (row.unit === 'ms') return formatDuration(row.current) return String(row.current) diff --git a/src/webui/features/settings/components/enum-settings-row.tsx b/src/webui/features/settings/components/enum-settings-row.tsx new file mode 100644 index 000000000..f857358f9 --- /dev/null +++ b/src/webui/features/settings/components/enum-settings-row.tsx @@ -0,0 +1,85 @@ +import { + Select, + SelectContent, + SelectItem, + SelectTrigger, + SelectValue, +} from '@campfirein/byterover-packages/components/select' +import {useId} from 'react' +import {toast} from 'sonner' + +import type {SettingsRow as SettingsRowData} from '../../../../shared/types/settings-row' + +import {LANGUAGE_NAMES} from '../../../../shared/language/language-names' +import {SETTINGS_KEYS} from '../../../../shared/types/settings-keys' +import {formatError} from '../../../lib/error-messages' +import {noop} from '../../../lib/noop' +import {useSetSetting} from '../api/set-setting' +import {labelFor} from '../lib/labels' +import {useRestartBannerStore} from '../stores/restart-banner-store' + +type Props = { + row: SettingsRowData +} + +export function EnumSettingsRow({row}: Props) { + const setMutation = useSetSetting() + const markDirty = useRestartBannerStore((s) => s.markDirty) + const descriptionId = useId() + + const label = labelFor(row.key) + const current = typeof row.current === 'string' ? row.current : String(row.current) + const options = row.options ?? [] + + const choose = async (next: string) => { + if (next === current) return + try { + const response = await setMutation.mutateAsync({key: row.key, value: next}) + if (response.ok) { + markDirty(row.key, row.restartRequired) + toast.success(`${label} set to ${displayLabel(row.key, next)}`) + return + } + + toast.error(response.error.message) + } catch (error) { + toast.error(formatError(error, `Failed to update ${label}`)) + } + } + + return ( +
    +
    + {label} + + {row.description} + +
    + +
    + ) +} + +function displayLabel(key: string, option: string): string { + if (key !== SETTINGS_KEYS.LANGUAGE_CODE) return option + const name = LANGUAGE_NAMES[option] + return name ? `${option} — ${name}` : option +} diff --git a/src/webui/features/settings/components/language-panel.tsx b/src/webui/features/settings/components/language-panel.tsx new file mode 100644 index 000000000..4ee8b9ca4 --- /dev/null +++ b/src/webui/features/settings/components/language-panel.tsx @@ -0,0 +1,42 @@ +import {LoaderCircle} from 'lucide-react' +import {Fragment, useMemo} from 'react' + +import {buildSettingsRows} from '../../../../shared/utils/format-settings' +import {noop} from '../../../lib/noop' +import {SettingsSection} from '../../vc/components/settings-section' +import {useGetSettings} from '../api/list-settings' +import {SettingsRow} from './settings-row' +import {SettingsSkeleton} from './settings-skeleton' + +export function LanguagePanel() { + const {data, error, isError, isLoading, refetch} = useGetSettings() + + const rows = useMemo(() => { + if (!data) return [] + return buildSettingsRows(data.items).filter((row) => row.category === 'language') + }, [data?.items]) + + return ( + : undefined} + description="Language used when ByteRover writes context. Auto matches your input language." + error={isError ? error : undefined} + errorFallback="Failed to load language settings" + onRetry={() => refetch().catch(noop)} + title="Language" + > + {data ? ( +
    + {rows.map((row, index) => ( + + + {index < rows.length - 1 &&
    } + + ))} +
    + ) : ( + + )} + + ) +} diff --git a/src/webui/features/settings/components/settings-row.tsx b/src/webui/features/settings/components/settings-row.tsx index 09278996a..8bed08ef5 100644 --- a/src/webui/features/settings/components/settings-row.tsx +++ b/src/webui/features/settings/components/settings-row.tsx @@ -15,6 +15,7 @@ import {useSetSetting} from '../api/set-setting' import {labelFor} from '../lib/labels' import {useRestartBannerStore} from '../stores/restart-banner-store' import {BooleanSettingsRow} from './boolean-settings-row' +import {EnumSettingsRow} from './enum-settings-row' type Props = { row: SettingsRowData @@ -22,6 +23,7 @@ type Props = { export function SettingsRow({row}: Props) { if (row.type === 'boolean') return + if (row.type === 'enum') return return } @@ -54,6 +56,11 @@ function IntegerSettingsRow({row}: Props) { return } + if (typeof parsed.value !== 'number') { + setError(`Expected an integer for ${row.key}`) + return + } + if (parsed.value === row.current) { setError(undefined) setBuffer(String(parsed.value)) @@ -62,11 +69,12 @@ function IntegerSettingsRow({row}: Props) { } setError(undefined) - const response = await setMutation.mutateAsync({key: row.key, value: parsed.value}) + const numericValue: number = parsed.value + const response = await setMutation.mutateAsync({key: row.key, value: numericValue}) if (response.ok) { markDirty(row.key, row.restartRequired) isUserEditingRef.current = false - toast.success(`${label} set to ${toastValue(parsed.value)}`) + toast.success(`${label} set to ${toastValue(numericValue)}`) return } diff --git a/src/webui/features/settings/lib/labels.ts b/src/webui/features/settings/lib/labels.ts index 1c5368197..3c3a8935b 100644 --- a/src/webui/features/settings/lib/labels.ts +++ b/src/webui/features/settings/lib/labels.ts @@ -1,10 +1,14 @@ +import {SETTINGS_KEYS} from '../../../../shared/types/settings-keys.js' + const LABELS: Record = { - 'agentPool.maxConcurrentTasksPerProject': 'Max parallel tasks per project', - 'agentPool.maxSize': 'Max concurrent projects', - 'llm.iterationBudgetMs': 'Agentic loop budget', - 'llm.requestTimeoutMs': 'LLM request timeout', - 'taskHistory.maxEntries': 'Task history size', - 'update.checkForUpdates': 'Check for updates at startup', + [SETTINGS_KEYS.AGENT_POOL_MAX_CONCURRENT_TASKS]: 'Max parallel tasks per project', + [SETTINGS_KEYS.AGENT_POOL_MAX_SIZE]: 'Max concurrent projects', + [SETTINGS_KEYS.LANGUAGE_CODE]: 'Language', + [SETTINGS_KEYS.LANGUAGE_MODE]: 'Language mode', + [SETTINGS_KEYS.LLM_ITERATION_BUDGET_MS]: 'Agentic loop budget', + [SETTINGS_KEYS.LLM_REQUEST_TIMEOUT_MS]: 'LLM request timeout', + [SETTINGS_KEYS.TASK_HISTORY_MAX_ENTRIES]: 'Task history size', + [SETTINGS_KEYS.UPDATE_CHECK_FOR_UPDATES]: 'Check for updates at startup', } export function labelFor(key: string): string { diff --git a/src/webui/pages/configuration/general.tsx b/src/webui/pages/configuration/general.tsx index a5e5c1237..3b2bf46c8 100644 --- a/src/webui/pages/configuration/general.tsx +++ b/src/webui/pages/configuration/general.tsx @@ -1,4 +1,5 @@ import {ConcurrencyPanel} from '../../features/settings/components/concurrency-panel' +import {LanguagePanel} from '../../features/settings/components/language-panel' import {LlmPanel} from '../../features/settings/components/llm-panel' import {TaskHistoryPanel} from '../../features/settings/components/task-history-panel' import {UpdatesPanel} from '../../features/settings/components/updates-panel' @@ -9,6 +10,7 @@ export function GeneralSection() { + ) diff --git a/test/commands/curate/read-language-from-settings.test.ts b/test/commands/curate/read-language-from-settings.test.ts new file mode 100644 index 000000000..49578bd2e --- /dev/null +++ b/test/commands/curate/read-language-from-settings.test.ts @@ -0,0 +1,141 @@ +import type {ConnectionResult, ITransportClient} from '@campfirein/brv-transport-client' + +import {DaemonSpawnError} from '@campfirein/brv-transport-client' +import {expect} from 'chai' +import sinon, {restore, stub} from 'sinon' + +import {readLanguageFromSettings} from '../../../src/oclif/commands/curate/index.js' + +describe('readLanguageFromSettings', () => { + let mockClient: sinon.SinonStubbedInstance + let mockConnector: sinon.SinonStub<[], Promise> + + beforeEach(() => { + mockClient = { + connect: stub().resolves(), + disconnect: stub().resolves(), + getClientId: stub().returns('test-client-id'), + getDaemonVersion: stub(), + getState: stub().returns('connected'), + isConnected: stub().resolves(true), + joinRoom: stub().resolves(), + leaveRoom: stub().resolves(), + on: stub().returns(() => {}), + once: stub(), + onStateChange: stub().returns(() => {}), + request: stub() as unknown as ITransportClient['request'], + requestWithAck: stub() as unknown as ITransportClient['requestWithAck'], + } as unknown as sinon.SinonStubbedInstance + + mockConnector = stub<[], Promise>().resolves({ + client: mockClient as unknown as ITransportClient, + projectRoot: '/test/project', + }) + }) + + afterEach(() => { + restore() + }) + + it('returns {mode: "fixed", code} when daemon settings have mode=fixed and a string code', async () => { + ;(mockClient.requestWithAck as sinon.SinonStub).resolves({ + items: [ + { + category: 'language', + current: 'fixed', + default: 'auto', + description: '', + key: 'language.mode', + options: ['auto', 'fixed'], + restartRequired: false, + type: 'enum', + }, + { + category: 'language', + current: 'ja', + default: 'en', + description: '', + key: 'language.code', + options: ['en', 'ja'], + restartRequired: false, + type: 'enum', + }, + ], + }) + + const result = await readLanguageFromSettings({ + maxRetries: 1, + retryDelayMs: 0, + transportConnector: mockConnector, + }) + + expect(result).to.eql({code: 'ja', mode: 'fixed'}) + }) + + it('returns undefined when daemon settings have mode=auto (regardless of code)', async () => { + ;(mockClient.requestWithAck as sinon.SinonStub).resolves({ + items: [ + { + category: 'language', + current: 'auto', + default: 'auto', + description: '', + key: 'language.mode', + options: ['auto', 'fixed'], + restartRequired: false, + type: 'enum', + }, + { + category: 'language', + current: 'ja', + default: 'en', + description: '', + key: 'language.code', + options: ['en', 'ja'], + restartRequired: false, + type: 'enum', + }, + ], + }) + + const result = await readLanguageFromSettings({ + maxRetries: 1, + retryDelayMs: 0, + transportConnector: mockConnector, + }) + + expect(result).to.equal(undefined) + }) + + it('returns undefined when the daemon connection throws', async () => { + mockConnector.rejects(new Error('connection failed')) + + const result = await readLanguageFromSettings({ + maxRetries: 1, + retryDelayMs: 0, + transportConnector: mockConnector, + }) + + expect(result).to.equal(undefined) + }) + + it('uses a tight retry budget by default — no long kickoff delay when daemon is unreachable', async () => { + // withDaemonRetry's default is MAX_RETRIES=10 × DEFAULT_RETRY_DELAY_MS=1000ms, which would + // make every `brv curate` kickoff sit through ~9s of retries before the catch returns + // undefined and the project-config fallback runs. readLanguageFromSettings should override + // the defaults to {maxRetries: 1, retryDelayMs: 0} so a missing daemon trips the fallback + // immediately. + mockConnector.rejects(new DaemonSpawnError('daemon not running')) + + const result = await readLanguageFromSettings({ + transportConnector: mockConnector, + // intentionally no maxRetries / retryDelayMs — verify the tight default + }) + + expect(result).to.equal(undefined) + // `withDaemonRetry` treats `maxRetries` as total attempts (`for attempt <= maxRetries`), so + // {maxRetries: 1} means a single attempt. With the prior MAX_RETRIES=10 default the connector + // would have been called 10 times; the cap below catches any regression to the loose default. + expect(mockConnector.callCount).to.be.at.most(2) + }) +}) diff --git a/test/commands/settings/get.test.ts b/test/commands/settings/get.test.ts index cbf38219c..ea7d62b9d 100644 --- a/test/commands/settings/get.test.ts +++ b/test/commands/settings/get.test.ts @@ -218,4 +218,96 @@ describe('brv settings get', () => { const description = SettingsGet.description ?? '' expect(description).to.match(/restart/i) }) + + describe('enum rows', () => { + it('prints an "allowed:" line listing the enum options in text mode', async () => { + const requestStub = mockClient.requestWithAck as sinon.SinonStub + requestStub.resolves({ + category: 'language', + current: 'fixed', + default: 'auto', + description: 'mode', + key: 'language.mode', + ok: true, + options: ['auto', 'fixed'], + restartRequired: false, + type: 'enum', + }) + + await createCommand('language.mode').run() + const output = loggedMessages.join('\n') + + expect(output).to.include('language.mode') + expect(output).to.match(/current:\s*fixed/) + expect(output).to.match(/default:\s*auto/) + expect(output).to.match(/allowed:\s*auto,\s*fixed/) + // Numeric range column is meaningless for enums. + expect(output).to.not.match(/range:/) + }) + + it('surfaces the enum options array in JSON mode', async () => { + const requestStub = mockClient.requestWithAck as sinon.SinonStub + requestStub.resolves({ + category: 'language', + current: 'ja', + default: 'en', + description: 'code', + key: 'language.code', + ok: true, + options: ['ar', 'en', 'ja', 'ru', 'vi', 'zh'], + restartRequired: false, + type: 'enum', + }) + + await createJsonCommand('language.code').run() + + const json = parseJsonOutput() + expect(json.success).to.be.true + expect(json.data).to.have.property('options').that.deep.equals(['ar', 'en', 'ja', 'ru', 'vi', 'zh']) + expect(json.data).to.have.property('type', 'enum') + }) + + it('omits the "allowed:" line in text mode for non-enum keys', async () => { + const requestStub = mockClient.requestWithAck as sinon.SinonStub + requestStub.resolves({ + category: 'concurrency', + current: 25, + default: 10, + description: 'desc', + key: 'agentPool.maxSize', + max: 100, + min: 1, + ok: true, + restartRequired: true, + type: 'integer', + }) + + await createCommand('agentPool.maxSize').run() + const output = loggedMessages.join('\n') + expect(output).to.not.match(/allowed:/) + }) + + it('omits the "options" field in JSON mode for non-enum keys', async () => { + const requestStub = mockClient.requestWithAck as sinon.SinonStub + requestStub.resolves({ + category: 'concurrency', + current: 25, + default: 10, + description: 'desc', + key: 'agentPool.maxSize', + max: 100, + min: 1, + ok: true, + restartRequired: true, + type: 'integer', + }) + + await createJsonCommand('agentPool.maxSize').run() + + const json = parseJsonOutput() + expect(json.success).to.be.true + expect(json.data).to.not.have.property('options') + expect(json.data).to.have.property('type', 'integer') + }) + }) }) diff --git a/test/commands/settings/index.test.ts b/test/commands/settings/index.test.ts index 5a5597152..ea19445d5 100644 --- a/test/commands/settings/index.test.ts +++ b/test/commands/settings/index.test.ts @@ -7,6 +7,15 @@ import sinon, {restore, stub} from 'sinon' import Settings from '../../../src/oclif/commands/settings/index.js' import {SettingsEvents} from '../../../src/shared/transport/events/settings-events.js' +import {CATEGORY_HEADERS, CATEGORY_ORDER} from '../../../src/shared/types/settings-row.js' + +function findHeaderIndex(messages: readonly string[], header: string): number { + for (const [i, m] of messages.entries()) { + if (m.includes(header)) return i + } + + return -1 +} class TestableSettings extends Settings { private readonly mockConnector: () => Promise @@ -375,4 +384,177 @@ describe('brv settings (index)', () => { expect(output).to.not.include('UPDATES') }) }) + + describe('enum rows (LANGUAGE group)', () => { + it('renders a LANGUAGE section with both enum rows when the daemon returns language items', async () => { + const requestStub = mockClient.requestWithAck as sinon.SinonStub + requestStub.resolves({ + items: [ + { + category: 'concurrency', + current: 10, + default: 10, + description: 'h', + key: 'agentPool.maxSize', + max: 100, + min: 1, + restartRequired: true, + type: 'integer', + }, + { + category: 'language', + current: 'fixed', + default: 'auto', + description: 'Match input language (auto) or force a fixed language for written output', + key: 'language.mode', + options: ['auto', 'fixed'], + restartRequired: false, + type: 'enum', + }, + { + category: 'language', + current: 'ja', + default: 'en', + description: 'ISO-639-1 code applied when mode is fixed; ignored in auto mode', + key: 'language.code', + options: ['ar', 'de', 'el', 'en', 'es', 'ja', 'ko', 'ru', 'vi', 'zh'], + restartRequired: false, + type: 'enum', + }, + ], + }) + + await createCommand().run() + const output = loggedMessages.join('\n') + + expect(output, 'LANGUAGE header must appear').to.include('LANGUAGE') + + const modeRow = loggedMessages.find((m) => m.includes('language.mode')) + expect(modeRow, 'row for language.mode').to.exist + expect(modeRow).to.include('fixed') + expect(modeRow).to.match(/default\s*auto/) + + const codeRow = loggedMessages.find((m) => m.includes('language.code')) + expect(codeRow, 'row for language.code').to.exist + expect(codeRow).to.include('ja') + expect(codeRow).to.match(/default\s*en/) + + // Numeric range column is meaningless for enums and must not be rendered. + expect(modeRow).to.not.match(/\d+\s*-\s*\d+/) + expect(codeRow).to.not.match(/\d+\s*-\s*\d+/) + }) + + it('omits the LANGUAGE section entirely when the daemon returns no language items', async () => { + const requestStub = mockClient.requestWithAck as sinon.SinonStub + requestStub.resolves({ + items: [ + { + category: 'concurrency', + current: 10, + default: 10, + description: 'h', + key: 'agentPool.maxSize', + max: 100, + min: 1, + restartRequired: true, + type: 'integer', + }, + ], + }) + + await createCommand().run() + const output = loggedMessages.join('\n') + + expect(output).to.not.include('LANGUAGE') + }) + + it('renders LANGUAGE after UPDATES, matching the shared CATEGORY_ORDER', async () => { + const requestStub = mockClient.requestWithAck as sinon.SinonStub + requestStub.resolves({ + items: [ + { + category: 'language', + current: 'auto', + default: 'auto', + description: '', + key: 'language.mode', + options: ['auto', 'fixed'], + restartRequired: false, + type: 'enum', + }, + { + category: 'updates', + current: true, + default: true, + description: '', + key: 'update.checkForUpdates', + restartRequired: false, + type: 'boolean', + }, + ], + }) + + await createCommand().run() + + const updatesIdx = loggedMessages.findIndex((m) => m.includes('UPDATES')) + const languageIdx = loggedMessages.findIndex((m) => m.includes('LANGUAGE')) + expect(updatesIdx).to.be.greaterThan(-1) + expect(languageIdx).to.be.greaterThan(-1) + expect(updatesIdx).to.be.lessThan(languageIdx) + }) + }) + + describe('canonical category coverage (regression guard for future categories)', () => { + it('renders every category in the shared CATEGORY_ORDER when the daemon returns one item per category', async () => { + const requestStub = mockClient.requestWithAck as sinon.SinonStub + // Synthesize one item per canonical category. Adding a new category to + // SettingsRowCategory + CATEGORY_ORDER + CATEGORY_HEADERS without updating + // the print loop should fail this test. + const items = CATEGORY_ORDER.map((category, idx) => ({ + category, + current: 'placeholder', + default: 'placeholder', + description: '', + key: `canonical.${category}.item.${idx}`, + options: ['placeholder'], + restartRequired: false, + type: 'enum' as const, + })) + requestStub.resolves({items}) + + await createCommand().run() + const output = loggedMessages.join('\n') + + for (const category of CATEGORY_ORDER) { + const header = CATEGORY_HEADERS[category] + expect(output, `header for ${category}`).to.include(header) + } + }) + + it('renders headers in CATEGORY_ORDER sequence', async () => { + const requestStub = mockClient.requestWithAck as sinon.SinonStub + const items = CATEGORY_ORDER.map((category, idx) => ({ + category, + current: 'placeholder', + default: 'placeholder', + description: '', + key: `canonical.${category}.item.${idx}`, + options: ['placeholder'], + restartRequired: false, + type: 'enum' as const, + })) + requestStub.resolves({items}) + + await createCommand().run() + + const indices: number[] = [] + for (const category of CATEGORY_ORDER) { + indices.push(findHeaderIndex(loggedMessages, CATEGORY_HEADERS[category])) + } + + for (let i = 1; i < indices.length; i++) { + expect(indices[i], `${CATEGORY_ORDER[i]} after ${CATEGORY_ORDER[i - 1]}`).to.be.greaterThan(indices[i - 1]) + } + }) + }) }) diff --git a/test/integration/scenarios/language-roundtrip.test.ts b/test/integration/scenarios/language-roundtrip.test.ts new file mode 100644 index 000000000..811bf9946 --- /dev/null +++ b/test/integration/scenarios/language-roundtrip.test.ts @@ -0,0 +1,149 @@ +/** + * Full-pipeline integration test for the language-selection feature. + * + * Walks `.brv/config.json` on disk → `ProjectConfigStore.read()` → + * `BrvConfig.language` → `kickoffSession()` → the kickoff prompt envelope + * the calling agent's LLM consumes. Proves the threading hasn't broken + * at any layer for the four target non-English scripts (Russian / + * Vietnamese / Chinese / Japanese) plus the default auto-mode. + * + * Unit tests cover each layer in isolation: + * - `language-clause.test.ts` — clause text emission + * - `brv-config.test.ts` — schema round-trip + * - `curate-prompt-builder.test.ts` — clause appears in the prompt + * - `curate-session.test.ts` — orchestrator threading + * + * This file proves the layers compose end-to-end against a real config + * file on disk — the legacy per-project fallback tier of + * `resolveLanguagePreference`. The canonical write surface is now + * `brv settings set language.code `; this test still exercises the + * `.brv/config.json` fallback used by mid-migration users. + * + * Out of scope: actual LLM-honoring of the clause. That requires a real + * calling agent (Claude Code, Cursor) and is validated manually pre-release. + * The on-the-wire prompt content is what we can test deterministically here. + */ + +import {expect} from 'chai' +import {existsSync, mkdirSync, mkdtempSync, rmSync, writeFileSync} from 'node:fs' +import {tmpdir} from 'node:os' +import {join} from 'node:path' + +import {kickoffSession} from '../../../src/oclif/lib/curate-session.js' +import {BRV_CONFIG_VERSION, BRV_DIR, PROJECT_CONFIG_FILE} from '../../../src/server/constants.js' +import {ProjectConfigStore} from '../../../src/server/infra/config/file-config-store.js' + +describe('language-roundtrip — config file → BrvConfig → kickoff prompt', () => { + let projectRoot: string + + beforeEach(() => { + projectRoot = mkdtempSync(join(tmpdir(), 'lang-roundtrip-')) + mkdirSync(join(projectRoot, BRV_DIR), {recursive: true}) + }) + + afterEach(() => { + if (existsSync(projectRoot)) rmSync(projectRoot, {force: true, recursive: true}) + }) + + function writeProjectConfig(language?: {code?: string; mode: 'auto' | 'fixed'}): void { + const config = { + createdAt: '2026-05-26T00:00:00.000Z', + cwd: projectRoot, + ...(language !== undefined && {language}), + version: BRV_CONFIG_VERSION, + } + writeFileSync(join(projectRoot, BRV_DIR, PROJECT_CONFIG_FILE), JSON.stringify(config, undefined, 2), 'utf8') + } + + async function kickoffWithProjectConfig(): ReturnType { + const config = await new ProjectConfigStore().read(projectRoot) + return kickoffSession({content: 'remember X', language: config?.language, projectRoot}) + } + + describe('auto mode (default)', () => { + it('default config without a language field emits the auto clause', async () => { + writeProjectConfig() + const envelope = await kickoffWithProjectConfig() + // Match the user's input language — the auto wording from language-clause.ts. + // This is the modal path: every existing `.brv/config.json` predates the + // feature, so the default must be a no-op for English users and a graceful + // pass-through for non-English users. + expect(envelope.prompt).to.include("Match the user's input language") + }) + + it('explicit `mode: auto` config emits the auto clause', async () => { + writeProjectConfig({mode: 'auto'}) + const envelope = await kickoffWithProjectConfig() + expect(envelope.prompt).to.include("Match the user's input language") + }) + }) + + describe('fixed mode — clause names the user-configured language', () => { + it('Russian (Cyrillic) — `code: ru` emits "in Russian"', async () => { + // The #616 reporter is a Russian user. This is the load-bearing path + // for closing the issue end-to-end: a real config on disk, read via + // the real loader, threaded through the real orchestrator, lands in + // the prompt the calling agent's LLM sees. + writeProjectConfig({code: 'ru', mode: 'fixed'}) + const envelope = await kickoffWithProjectConfig() + expect(envelope.prompt).to.include('in Russian') + expect(envelope.prompt).to.not.include("Match the user's input language") + }) + + it('Vietnamese (Latin-non-English) — `code: vi` emits "in Vietnamese"', async () => { + // The proof point for LLM-in-call detection beating a Unicode-block + // heuristic. Vietnamese is Latin script with diacritics, indistinguishable + // from English by code-range alone. + writeProjectConfig({code: 'vi', mode: 'fixed'}) + const envelope = await kickoffWithProjectConfig() + expect(envelope.prompt).to.include('in Vietnamese') + }) + + it('Chinese (CJK kanji) — `code: zh` emits "in Chinese"', async () => { + // CJK kanji — ENG-2689's tokenizer fix makes the search side searchable + // for content authored under this clause. This test is the curate-side + // equivalent: the calling agent's prompt explicitly names Chinese. + writeProjectConfig({code: 'zh', mode: 'fixed'}) + const envelope = await kickoffWithProjectConfig() + expect(envelope.prompt).to.include('in Chinese') + }) + + it('Japanese (CJK kanji + kana) — `code: ja` emits "in Japanese"', async () => { + // Second CJK script. Hiragana / Katakana / Kanji all share the same + // bigram tokenization rules from ENG-2689 and the same clause naming here. + writeProjectConfig({code: 'ja', mode: 'fixed'}) + const envelope = await kickoffWithProjectConfig() + expect(envelope.prompt).to.include('in Japanese') + }) + }) + + describe('schema rejection at load time', () => { + it('fixed mode without code is rejected by fromJson — the load throws', async () => { + // `mode: 'fixed'` without `code` would silently fall back to English at + // prompt time. `isBrvConfigJson` rejects it at load so the failure mode + // is structurally impossible. Confirm the loader still throws end-to-end + // (not just at the unit-test level). + writeProjectConfig({mode: 'fixed'}) + let threwAtLoadTime = false + try { + await new ProjectConfigStore().read(projectRoot) + } catch { + threwAtLoadTime = true + } + + expect(threwAtLoadTime, 'ProjectConfigStore.read rejects fixed-without-code').to.equal(true) + }) + }) + + describe('unknown ISO code degrades gracefully', () => { + it('unmapped code (`xx`) emits the fixed clause with the raw code in quotes', async () => { + // Forward-compat path. A future ISO code we haven't mapped yet must + // still produce a usable clause (`in "xx"`) rather than blowing up. + // This is the runtime-side counterpart to the loader's strict-validation + // contract. + writeProjectConfig({code: 'xx', mode: 'fixed'}) + const envelope = await kickoffWithProjectConfig() + expect(envelope.prompt).to.include('in "xx"') + }) + }) +}) diff --git a/test/unit/agent/infra/tools/implementations/cjk-tokenizer.test.ts b/test/unit/agent/infra/tools/implementations/cjk-tokenizer.test.ts new file mode 100644 index 000000000..fe3005920 --- /dev/null +++ b/test/unit/agent/infra/tools/implementations/cjk-tokenizer.test.ts @@ -0,0 +1,192 @@ +/** + * Tests for `tokenizeWithCjk` — the BM25 tokenizer that fixes MiniSearch's + * CJK blind spot. + * + * Whitespace-separated scripts (Latin, Cyrillic, Vietnamese, …) must + * tokenize byte-identical to the MiniSearch default; CJK runs must emit + * overlapping bigrams; mixed Latin+CJK tokens must split at the script + * boundary so the Latin portion stays a real word token. The integration + * block at the end exercises the wired-up MiniSearch contract — the CJK + * gate — and confirms English scoring is preserved. + */ + +import {expect} from 'chai' +import MiniSearch from 'minisearch' + +import {tokenizeWithCjk} from '../../../../../../src/agent/infra/tools/implementations/cjk-tokenizer.js' + +function buildMiniSearchIndex(docs: Array<{id: number; t: string}>): MiniSearch { + const ms = new MiniSearch({ + fields: ['t'], + idField: 'id', + tokenize: tokenizeWithCjk, + }) + ms.addAll(docs) + return ms +} + +describe('cjk-tokenizer', () => { + describe('tokenizeWithCjk — non-CJK scripts behave like the MiniSearch default', () => { + it('English: splits on whitespace, preserves word tokens verbatim', () => { + expect(tokenizeWithCjk('Hello world JWT auth')).to.deep.equal([ + 'Hello', 'world', 'JWT', 'auth', + ]) + }) + + it('Russian (Cyrillic): preserves whitespace tokenization, no CJK side effects', () => { + expect(tokenizeWithCjk('Привет мир программирования')).to.deep.equal([ + 'Привет', 'мир', 'программирования', + ]) + }) + + it('Vietnamese (Latin-non-English): diacritics survive intact', () => { + // The proof point that LLM-in-call detection beats a Unicode-block + // heuristic — Vietnamese is Latin script and tokenizes via whitespace + // just like English. Diacritics are part of the word, not separators. + expect(tokenizeWithCjk('Cách triển khai xác thực')).to.deep.equal([ + 'Cách', 'triển', 'khai', 'xác', 'thực', + ]) + }) + + it('punctuation acts as a separator (matches MiniSearch default)', () => { + // Default MiniSearch splits on `\p{Z}\p{P}+`; commas, periods, parens + // all become token boundaries. + expect(tokenizeWithCjk('one, two; three.')).to.deep.equal(['one', 'two', 'three']) + }) + }) + + describe('tokenizeWithCjk — CJK scripts emit overlapping bigrams', () => { + it('Chinese: 4-character run → 3 overlapping bigrams', () => { + expect(tokenizeWithCjk('认证系统')).to.deep.equal(['认证', '证系', '系统']) + }) + + it('Chinese: 2-character run → single bigram (the whole token)', () => { + expect(tokenizeWithCjk('认证')).to.deep.equal(['认证']) + }) + + it('Japanese: kanji + katakana both segmented as CJK', () => { + // `認証システム` contains both kanji (`認証`) and katakana + // (`システム`). The tokenizer treats them as a single CJK run since + // both ranges are CJK-classified, producing overlapping bigrams + // across the whole string. + const tokens = tokenizeWithCjk('認証システム') + expect(tokens).to.deep.include('認証') + expect(tokens).to.deep.include('証シ') + expect(tokens).to.deep.include('シス') + expect(tokens).to.deep.include('ステ') + expect(tokens).to.deep.include('テム') + }) + + it('Korean (Hangul Syllables): segmented into bigrams', () => { + // Whitespace-separated Korean tokens still bigram within each token. + // `'인증 시스템'` → `'인증'` (single bigram == whole token) plus + // bigrams of `'시스템'` (`'시스'`, `'스템'`). + const tokens = tokenizeWithCjk('인증 시스템') + expect(tokens).to.deep.include('인증') + expect(tokens).to.deep.include('시스') + expect(tokens).to.deep.include('스템') + }) + + it('single-character CJK input falls back to unigram', () => { + // Edge case for BM25 — a lone character has no bigram, but should + // still be searchable as itself. The unigram fallback prevents the + // tokenizer from emitting an empty array (which MiniSearch would + // interpret as "this document has no content for this field"). + expect(tokenizeWithCjk('认')).to.deep.equal(['认']) + }) + }) + + describe('tokenizeWithCjk — mixed Latin + CJK tokens split at the script boundary', () => { + it('whitespace-separated Latin and CJK tokens stay independent', () => { + // `'JWT 令牌'` is already two whitespace-separated tokens. Latin + // stays Latin, the 2-char CJK run emits one bigram (the whole thing). + expect(tokenizeWithCjk('JWT 令牌')).to.deep.equal(['JWT', '令牌']) + }) + + it('no-whitespace mixed token splits at the script boundary', () => { + // `'JWT令牌'` has no whitespace — but the script boundary between + // 'T' (Latin) and '令' (CJK) is still a token boundary. Otherwise + // the Latin portion would get lost in a CJK bigram smear. + expect(tokenizeWithCjk('JWT令牌')).to.deep.equal(['JWT', '令牌']) + }) + + it('multiple boundaries in one token: alternating Latin/CJK runs', () => { + // `'API请求JSON响应'` → Latin/CJK/Latin/CJK boundaries. + // Each non-CJK run stays as one token; each CJK run emits bigrams. + expect(tokenizeWithCjk('API请求JSON响应')).to.deep.equal([ + 'API', + '请求', + 'JSON', + '响应', + ]) + }) + }) + + describe('MiniSearch integration — the CJK gate', () => { + // The unit tests above lock the tokenizer's input/output contract. + // These integration tests prove the contract holds when the tokenizer + // is wired into a real MiniSearch instance — what + // `search-knowledge-service.ts:MINISEARCH_OPTIONS` does in production. + + it('Chinese query matches Chinese content (was broken before this fix)', () => { + // The motivating test. Pre-fix: empirical run returned [] because + // `'认证系统使用JWT令牌'` tokenized as a single token under the + // MiniSearch default. With the bigram tokenizer, the query `'认证'` + // tokenizes to ['认证'] and finds doc 1's `'认证'` bigram. + const ms = buildMiniSearchIndex([ + {id: 1, t: '认证系统使用JWT令牌'}, + {id: 2, t: 'JWT auth tokens'}, + ]) + const results = ms.search('认证') + expect(results.length, 'Chinese query returns at least one match').to.be.greaterThan(0) + expect(results[0].id).to.equal(1) + }) + + it('Japanese query matches Japanese content', () => { + const ms = buildMiniSearchIndex([{id: 1, t: '認証システムはJWTトークンを使用'}]) + const results = ms.search('認証') + expect(results.length).to.be.greaterThan(0) + }) + + it('Korean query matches Korean content', () => { + const ms = buildMiniSearchIndex([{id: 1, t: '인증 시스템은 JWT 토큰을 사용합니다'}]) + const results = ms.search('인증') + expect(results.length).to.be.greaterThan(0) + }) + + it('Russian query matches Russian content (regression, was working pre-fix)', () => { + // Cyrillic is whitespace-separated → the default tokenizer already + // handled it. Locking the regression so a future tokenizer rewrite + // doesn't accidentally break a script that used to work. + const ms = buildMiniSearchIndex([{id: 1, t: 'Привет мир программирования'}]) + const results = ms.search('программирования') + expect(results.length).to.be.greaterThan(0) + }) + + it('English query against English content returns the expected match', () => { + // Sanity check: the Latin path is byte-identical to the default + // MiniSearch behavior, so the existing BM25 ranking story is + // preserved end-to-end. + const ms = buildMiniSearchIndex([ + {id: 1, t: 'JWT authentication tokens'}, + {id: 2, t: 'session cookies and CSRF'}, + ]) + const results = ms.search('JWT') + expect(results.length).to.equal(1) + expect(results[0].id).to.equal(1) + }) + + it('English query does NOT match unrelated CJK content', () => { + // Cross-script isolation: a CJK doc shouldn't drag into English + // queries (and vice versa). The bigram tokenization is opaque to + // Latin queries; no false positives leak across scripts. + const ms = buildMiniSearchIndex([ + {id: 1, t: '认证系统'}, + {id: 2, t: 'JWT authentication'}, + ]) + const englishResults = ms.search('JWT') + expect(englishResults.length).to.equal(1) + expect(englishResults[0].id).to.equal(2) + }) + }) +}) diff --git a/test/unit/core/domain/entities/brv-config.test.ts b/test/unit/core/domain/entities/brv-config.test.ts index 404603e62..6e28ec374 100644 --- a/test/unit/core/domain/entities/brv-config.test.ts +++ b/test/unit/core/domain/entities/brv-config.test.ts @@ -304,4 +304,103 @@ describe('BrvConfig', () => { expect(config.createdAt).to.be.a('string') }) }) + + describe('language', () => { + const fixedRu: BrvConfigParams['language'] = {code: 'ru', mode: 'fixed'} + + it('defaults to undefined when not set', () => { + const config = new BrvConfig(validConstructorArgs) + expect(config.language).to.be.undefined + }) + + it('preserves auto-mode through the constructor', () => { + const config = new BrvConfig({...validConstructorArgs, language: {mode: 'auto'}}) + expect(config.language).to.deep.equal({mode: 'auto'}) + }) + + it('preserves fixed-mode with code through the constructor', () => { + const config = new BrvConfig({...validConstructorArgs, language: fixedRu}) + expect(config.language).to.deep.equal(fixedRu) + }) + + it('round-trips auto-mode through toJson/fromJson', () => { + const config = new BrvConfig({...validConstructorArgs, language: {mode: 'auto'}}) + const restored = BrvConfig.fromJson(config.toJson()) + expect(restored.language).to.deep.equal({mode: 'auto'}) + }) + + it('round-trips fixed-mode with code through toJson/fromJson', () => { + const config = new BrvConfig({...validConstructorArgs, language: fixedRu}) + const restored = BrvConfig.fromJson(config.toJson()) + expect(restored.language).to.deep.equal(fixedRu) + }) + + it('round-trips undefined language through toJson/fromJson', () => { + // Existing configs (no `language` field) must load cleanly post-rollout. + const config = new BrvConfig(validConstructorArgs) + const restored = BrvConfig.fromJson(config.toJson()) + expect(restored.language).to.be.undefined + }) + + it('rejects mode: fixed without code in fromJson', () => { + // `mode: 'fixed'` without `code` would silently fall back to English + // at prompt time. The loader rejects it so the failure mode is + // structurally impossible. + expect(() => + BrvConfig.fromJson({...validConstructorArgs, language: {mode: 'fixed'}}), + ).to.throw('Invalid BrvConfig JSON structure') + }) + + it('rejects unknown mode value in fromJson', () => { + expect(() => + BrvConfig.fromJson({...validConstructorArgs, language: {mode: 'always-english'}}), + ).to.throw('Invalid BrvConfig JSON structure') + }) + + it('rejects non-string code in fromJson', () => { + expect(() => + BrvConfig.fromJson({...validConstructorArgs, language: {code: 123, mode: 'fixed'}}), + ).to.throw('Invalid BrvConfig JSON structure') + }) + + it('rejects non-object language in fromJson', () => { + expect(() => + BrvConfig.fromJson({...validConstructorArgs, language: 'ru'}), + ).to.throw('Invalid BrvConfig JSON structure') + }) + + it('rejects null language in fromJson', () => { + expect(() => + BrvConfig.fromJson({...validConstructorArgs, language: null}), + ).to.throw('Invalid BrvConfig JSON structure') + }) + + it('preserves language through withSpace', () => { + const original = new BrvConfig({...validConstructorArgs, language: fixedRu}) + const space = new Space({ + id: 'space-789', + isDefault: false, + name: 'my-space', + teamId: 'team-abc', + teamName: 'my-team', + }) + expect(original.withSpace(space).language).to.deep.equal(fixedRu) + }) + + it('preserves language through withoutSpace', () => { + const original = new BrvConfig({...validConstructorArgs, language: fixedRu}) + expect(original.withoutSpace().language).to.deep.equal(fixedRu) + }) + + it('preserves language through withReviewDisabled', () => { + const original = new BrvConfig({...validConstructorArgs, language: fixedRu}) + expect(original.withReviewDisabled(true).language).to.deep.equal(fixedRu) + }) + + it('preserves language through withVersion', () => { + const original = new BrvConfig({...validConstructorArgs, language: fixedRu}) + expect(original.withVersion('9.9.9').language).to.deep.equal(fixedRu) + }) + }) + }) diff --git a/test/unit/core/domain/entities/settings-registry.test.ts b/test/unit/core/domain/entities/settings-registry.test.ts index 449c1e895..38d96b89c 100644 --- a/test/unit/core/domain/entities/settings-registry.test.ts +++ b/test/unit/core/domain/entities/settings-registry.test.ts @@ -1,10 +1,7 @@ import {expect} from 'chai' -import { - findSettingDescriptor, - SETTINGS_KEYS, - SETTINGS_REGISTRY, -} from '../../../../../src/server/core/domain/entities/settings.js' +import {findSettingDescriptor, SETTINGS_REGISTRY} from '../../../../../src/server/core/domain/entities/settings.js' +import {SETTINGS_KEYS} from '../../../../../src/shared/types/settings-keys.js' function integerMaxOf(key: string): number { const descriptor = findSettingDescriptor(key) @@ -22,6 +19,7 @@ describe('settings registry — M7 T2 shape', () => { for (const descriptor of SETTINGS_REGISTRY) { expect(descriptor.category, `key ${descriptor.key} missing category`).to.be.oneOf([ 'concurrency', + 'language', 'llm', 'task-history', 'updates', @@ -127,4 +125,58 @@ describe('settings registry — M7 T2 shape', () => { } }) }) + + describe('language.* enum descriptors', () => { + it('exposes LANGUAGE_MODE + LANGUAGE_CODE on SETTINGS_KEYS', () => { + expect(SETTINGS_KEYS.LANGUAGE_MODE).to.equal('language.mode') + expect(SETTINGS_KEYS.LANGUAGE_CODE).to.equal('language.code') + }) + + it('registers language.mode as enum with default=auto and options=[auto, fixed]', () => { + const descriptor = findSettingDescriptor(SETTINGS_KEYS.LANGUAGE_MODE) + expect(descriptor?.type).to.equal('enum') + if (descriptor?.type === 'enum') { + expect(descriptor.default).to.equal('auto') + expect([...descriptor.options]).to.deep.equal(['auto', 'fixed']) + } else { + expect.fail('expected enum descriptor for language.mode') + } + }) + + it('registers language.code as enum with default=en and options including ko + ja + zh', () => { + const descriptor = findSettingDescriptor(SETTINGS_KEYS.LANGUAGE_CODE) + expect(descriptor?.type).to.equal('enum') + if (descriptor?.type === 'enum') { + expect(descriptor.default).to.equal('en') + expect(descriptor.options).to.include('ko') + expect(descriptor.options).to.include('ja') + expect(descriptor.options).to.include('zh') + expect(descriptor.options).to.include('en') + } else { + expect.fail('expected enum descriptor for language.code') + } + }) + + it('groups both language entries under category=language', () => { + expect(findSettingDescriptor(SETTINGS_KEYS.LANGUAGE_MODE)?.category).to.equal('language') + expect(findSettingDescriptor(SETTINGS_KEYS.LANGUAGE_CODE)?.category).to.equal('language') + }) + + it('marks language settings as restart-not-required (live config)', () => { + expect(findSettingDescriptor(SETTINGS_KEYS.LANGUAGE_MODE)?.restartRequired).to.equal(false) + expect(findSettingDescriptor(SETTINGS_KEYS.LANGUAGE_CODE)?.restartRequired).to.equal(false) + }) + + it('narrows enum descriptors to EnumSettingDescriptor when descriptor.type === enum', () => { + const descriptor = findSettingDescriptor(SETTINGS_KEYS.LANGUAGE_MODE) + if (descriptor?.type === 'enum') { + const defaultValue: string = descriptor.default + const {options} = descriptor + expect(defaultValue).to.equal('auto') + expect(options.length).to.be.greaterThan(0) + } else { + expect.fail('expected enum descriptor for language.mode') + } + }) + }) }) diff --git a/test/unit/infra/connectors/skill/skill-connector.test.ts b/test/unit/infra/connectors/skill/skill-connector.test.ts index ceccd3b80..d8edca630 100644 --- a/test/unit/infra/connectors/skill/skill-connector.test.ts +++ b/test/unit/infra/connectors/skill/skill-connector.test.ts @@ -236,6 +236,34 @@ describe('SkillConnector', () => { expect(troubleshootingContent).to.include('does NOT invoke any LLM') }) + it('should create onboarding.md documenting the language-selection step', async () => { + const agent = 'Claude Code' as const + const {projectPath} = SKILL_CONNECTOR_CONFIGS[agent] + await skillConnector.install(agent) + + const skillDir = path.join(testDir, projectPath, BRV_SKILL_NAME) + const onboardingContent = await readFile(path.join(skillDir, 'onboarding.md'), 'utf8') + + expect(onboardingContent).to.include('brv settings set language.code') + expect(onboardingContent).to.include('brv settings set language.mode fixed') + expect(onboardingContent).to.include('--format json') + expect(onboardingContent).to.include('invalid_value') + expect(onboardingContent).to.include('data.error') + expect(onboardingContent).to.include('single exception') + // restartRequired:false → the tour must never tell the user to restart + expect(onboardingContent).to.not.include('brv restart') + // code must be set before mode, so a rejected code leaves mode at its safe `auto` default + expect(onboardingContent.indexOf('language.code')).to.be.lessThan( + onboardingContent.indexOf('language.mode fixed'), + ) + // trust opener still precedes the language ask + expect(onboardingContent.indexOf('stays on your machine')).to.be.lessThan( + onboardingContent.indexOf('what language should I save your memory in'), + ) + // never dump the supported-language table as a user-facing menu + expect(onboardingContent).to.not.match(/ar Arabic, de German/) + }) + it('should inject the OpenClaw block into the default agent workspace (agents.defaults.workspace), not the agentDir', async () => { const openClawStateDir = path.join(testDir, 'openclaw-state') const openClawConfigPath = path.join(openClawStateDir, 'openclaw.json') diff --git a/test/unit/infra/mcp/tools/brv-curate-tool.test.ts b/test/unit/infra/mcp/tools/brv-curate-tool.test.ts index f66bc6f2b..7fd41803d 100644 --- a/test/unit/infra/mcp/tools/brv-curate-tool.test.ts +++ b/test/unit/infra/mcp/tools/brv-curate-tool.test.ts @@ -298,6 +298,27 @@ describe('brv-curate-tool', () => { // dropping it would silently regress the Skill ↔ MCP output parity. expect(description).to.include('Place section titles INSIDE the container') }) + + it('embeds the auto-mode language-preservation clause', () => { + // MCP's TOOL_DESCRIPTION is built once at server-boot and cannot + // read live config, so it carries the auto clause unconditionally: + // "match the user's input language". Per-call fixed-mode is honored + // via the oclif `brv curate` kickoff prompt instead. + // The schema-key carve-out (tag names / attribute names / enum values + // stay English) prevents the calling agent's LLM from translating + // `` for non-English input — which would fail Zod + // validation at the writer boundary. + const {getDescription} = setupHandler({ + getClient: () => createMockClient().client, + getWorkingDirectory: () => '/project/root', + }) + + const description = getDescription() + expect(description).to.include('# Language') + expect(description).to.include("Match the user's input language") + expect(description).to.include('tag names') + expect(description).to.include('enum values') + }) }) describe('dispatch — task type + payload', () => { diff --git a/test/unit/infra/storage/file-settings-store.test.ts b/test/unit/infra/storage/file-settings-store.test.ts index 6c3a593ac..dcfb4cb6e 100644 --- a/test/unit/infra/storage/file-settings-store.test.ts +++ b/test/unit/infra/storage/file-settings-store.test.ts @@ -49,6 +49,8 @@ describe('FileSettingsStore', () => { expect(keys).to.deep.equal([ 'agentPool.maxConcurrentTasksPerProject', 'agentPool.maxSize', + 'language.code', + 'language.mode', 'llm.iterationBudgetMs', 'llm.requestTimeoutMs', 'taskHistory.maxEntries', diff --git a/test/unit/infra/transport/handlers/settings-handler.test.ts b/test/unit/infra/transport/handlers/settings-handler.test.ts index 8b4defc87..b7f2f03df 100644 --- a/test/unit/infra/transport/handlers/settings-handler.test.ts +++ b/test/unit/infra/transport/handlers/settings-handler.test.ts @@ -86,6 +86,8 @@ describe('SettingsHandler', () => { expect(result.items.map((i) => i.key).sort()).to.deep.equal([ 'agentPool.maxConcurrentTasksPerProject', 'agentPool.maxSize', + 'language.code', + 'language.mode', 'llm.iterationBudgetMs', 'llm.requestTimeoutMs', 'taskHistory.maxEntries', @@ -136,6 +138,23 @@ describe('SettingsHandler', () => { expect(item.scope).to.equal(undefined) } }) + + it('exposes options on enum-typed items and omits options on non-enum items', async () => { + store.listResult = [] + const result = await invokeList() + const byKey = new Map(result.items.map((i) => [i.key, i])) + + const mode = byKey.get('language.mode') + expect(mode?.type).to.equal('enum') + expect(mode?.options).to.deep.equal(['auto', 'fixed']) + + const code = byKey.get('language.code') + expect(code?.type).to.equal('enum') + expect(code?.options).to.include('ko') + + expect(byKey.get('agentPool.maxSize')?.options).to.equal(undefined) + expect(byKey.get('update.checkForUpdates')?.options).to.equal(undefined) + }) }) describe('GET', () => { @@ -269,6 +288,29 @@ describe('SettingsHandler', () => { if (!result.ok) expect(result.error.code).to.equal('unknown_key') }) + it('rejects a numeric value sent to an enum key', async () => { + const result = await invokeSet({key: 'language.mode', value: 5}) + + expect(result.ok).to.be.false + if (!result.ok) { + expect(result.error.code).to.equal('invalid_value_type') + expect(result.error.key).to.equal('language.mode') + expect(result.error.expected).to.equal('enum') + expect(result.error.got).to.equal('number') + } + + expect(store.calls.filter((c) => c.method === 'set')).to.have.lengthOf(0) + }) + + it('accepts a string value sent to an enum key and forwards to the store', async () => { + const result = await invokeSet({key: 'language.mode', value: 'fixed'}) + + expect(result.ok).to.be.true + const setCalls = store.calls.filter((c) => c.method === 'set') + expect(setCalls).to.have.lengthOf(1) + expect(setCalls[0].args).to.deep.equal(['language.mode', 'fixed']) + }) + it('still surfaces a range violation as invalid_value (not invalid_value_type)', async () => { store.setBehavior = async (key, value) => { throw new InvalidSettingValueError(key, value, 'value 0 is outside allowed range [1, 100]') diff --git a/test/unit/oclif/lib/curate-session.test.ts b/test/unit/oclif/lib/curate-session.test.ts index d04ef3601..eb6d41c4e 100644 --- a/test/unit/oclif/lib/curate-session.test.ts +++ b/test/unit/oclif/lib/curate-session.test.ts @@ -225,6 +225,25 @@ describe('curate-session', () => { const b = await kickoffSession({content: 'b', projectRoot}) expect(a.sessionId).to.not.equal(b.sessionId) }) + + it('threads `language` into the kickoff prompt (auto when omitted)', async () => { + // End-to-end threading proof: orchestrator → buildGeneratePrompt → + // buildLanguageClause. Auto when no language preference is set. + const env = await kickoffSession({content: 'x', projectRoot}) + expect(env.prompt).to.include("Match the user's input language") + }) + + it('threads `language` into the kickoff prompt (fixed-mode emits the mapped name)', async () => { + // A regression dropping the param in the orchestrator would surface + // here as the auto clause leaking into a fixed-mode kickoff. + const env = await kickoffSession({ + content: 'x', + language: {code: 'ru', mode: 'fixed'}, + projectRoot, + }) + expect(env.prompt).to.include('in Russian') + expect(env.prompt).to.not.include("Match the user's input language") + }) }) // ─── continueSession — dispatch to daemon ──────────────────────────────────── diff --git a/test/unit/server/core/domain/render/curate-prompt-builder.test.ts b/test/unit/server/core/domain/render/curate-prompt-builder.test.ts index ff6216698..d51cdba6f 100644 --- a/test/unit/server/core/domain/render/curate-prompt-builder.test.ts +++ b/test/unit/server/core/domain/render/curate-prompt-builder.test.ts @@ -249,12 +249,49 @@ describe('curate-prompt-builder', () => { // Schema slice is ~2-3 KB; the surrounding prose adds ~1.5 KB // for explicit contract rules covering `
  • ` bullet prefixes, // `` CDATA, and `related` file-vs-folder routing; - // the user intent is bounded by the caller. Each rule prevents - // a distinct FE-breaking output class. Bumping the budget should - // be a deliberate decision, not a silent drift. + // the language clause adds ~340 chars; the user intent is bounded + // by the caller. Each rule prevents a distinct FE-breaking output + // class. Bumping the budget should be a deliberate decision, not a + // silent drift. const prompt = buildGeneratePrompt({userIntent: 'remember we use RS256'}) expect(prompt.length).to.be.lessThan(6144) }) + + it('emits a `# Language` section between path format and element vocabulary', () => { + // Section ordering matters: the language clause is part of the + // byterover-controlled framing that must commit BEFORE the + // element vocabulary (so the LLM authors `` body text in + // the configured language) and BEFORE the user-intent block (so + // a malicious intent can't shadow it). + const prompt = buildGeneratePrompt({userIntent: 'x'}) + const pathIdx = prompt.indexOf('# Path format') + const languageIdx = prompt.indexOf('# Language') + const schemaIdx = prompt.indexOf('# Element vocabulary') + + expect(languageIdx, 'language section present').to.be.greaterThan(-1) + expect(languageIdx, 'language section after path format').to.be.greaterThan(pathIdx) + expect(languageIdx, 'language section before element vocabulary').to.be.lessThan(schemaIdx) + }) + + it('emits the auto-mode clause when language is not provided', () => { + const prompt = buildGeneratePrompt({userIntent: 'x'}) + expect(prompt).to.include("Match the user's input language") + }) + + it('emits the auto-mode clause when language.mode is auto', () => { + const prompt = buildGeneratePrompt({language: {mode: 'auto'}, userIntent: 'x'}) + expect(prompt).to.include("Match the user's input language") + }) + + it('emits the fixed-mode clause with the mapped language name', () => { + // Threading proof — confirms `language` from options reaches + // buildLanguageClause. A regression dropping the param (e.g. a + // future destructuring miss in the orchestrator) would surface + // here as the auto clause leaking into fixed-mode prompts. + const prompt = buildGeneratePrompt({language: {code: 'ru', mode: 'fixed'}, userIntent: 'x'}) + expect(prompt).to.include('in Russian') + expect(prompt).to.not.include("Match the user's input language") + }) }) describe('buildCorrectionPrompt', () => { @@ -425,5 +462,44 @@ describe('curate-prompt-builder', () => { }) expect(prompt).to.not.include(CURATE_SCHEMA_PROMPT) }) + + it('emits a `# Language` section between output contract and errors', () => { + // Correction prompts can't drop the language clause — if the + // first attempt failed validation, the LLM may also have drifted + // off language. The clause reasserts the contract on every + // retry. + const prompt = buildCorrectionPrompt({ + errors: [{kind: 'missing-path-attribute', message: 'm'}], + previousHtml, + userIntent, + }) + const contractIdx = prompt.indexOf('# Output contract') + const languageIdx = prompt.indexOf('# Language') + const errorsIdx = prompt.indexOf('# Errors to fix') + + expect(languageIdx, 'language section present').to.be.greaterThan(-1) + expect(languageIdx, 'language after output contract').to.be.greaterThan(contractIdx) + expect(languageIdx, 'language before errors block').to.be.lessThan(errorsIdx) + }) + + it('emits the auto-mode clause when language is not provided', () => { + const prompt = buildCorrectionPrompt({ + errors: [{kind: 'missing-path-attribute', message: 'm'}], + previousHtml, + userIntent, + }) + expect(prompt).to.include("Match the user's input language") + }) + + it('emits the fixed-mode clause with the mapped language name', () => { + const prompt = buildCorrectionPrompt({ + errors: [{kind: 'missing-path-attribute', message: 'm'}], + language: {code: 'ru', mode: 'fixed'}, + previousHtml, + userIntent, + }) + expect(prompt).to.include('in Russian') + expect(prompt).to.not.include("Match the user's input language") + }) }) }) diff --git a/test/unit/server/core/domain/render/language-clause.test.ts b/test/unit/server/core/domain/render/language-clause.test.ts new file mode 100644 index 000000000..f7f64d34b --- /dev/null +++ b/test/unit/server/core/domain/render/language-clause.test.ts @@ -0,0 +1,103 @@ +/** + * Tests for buildLanguageClause and the LANGUAGE_NAMES map. + * + * The clause is load-bearing for the language-selection feature — every + * downstream injection surface (kickoff prompt, correction prompt, MCP + * tool description) emits this exact string. The "schema-key invariant" + * test below is the contract the clause must hold so that LLM authoring + * doesn't drift into translating tag names / enum values, which would + * fail Zod validation at the writer boundary. + */ + +import {expect} from 'chai' + +import {buildLanguageClause} from '../../../../../../src/server/core/domain/render/language-clause.js' +import {LANGUAGE_NAMES} from '../../../../../../src/shared/language/language-names.js' + +describe('language-clause', () => { + describe('LANGUAGE_NAMES', () => { + it('includes Russian (the #616 reporter language)', () => { + expect(LANGUAGE_NAMES.ru).to.equal('Russian') + }) + + it('includes the four scripts covered by the validation matrix', () => { + expect(LANGUAGE_NAMES.vi).to.equal('Vietnamese') + expect(LANGUAGE_NAMES.zh).to.equal('Chinese') + expect(LANGUAGE_NAMES.ja).to.equal('Japanese') + }) + + it('includes English so the CLI accepts the restoration code', () => { + // Release notes recommend `language: { mode: 'fixed', code: 'en' }` + // as the opt-out path for users who want forced English. The CLI + // (commit 05) rejects codes not in this map, so `en` must be here. + expect(LANGUAGE_NAMES.en).to.equal('English') + }) + }) + + describe('buildLanguageClause', () => { + it('returns the auto clause when language is undefined', () => { + const clause = buildLanguageClause() + expect(clause).to.include("Match the user's input language") + }) + + it('returns the auto clause when mode is auto', () => { + const clause = buildLanguageClause({mode: 'auto'}) + expect(clause).to.include("Match the user's input language") + }) + + it('returns the fixed clause with mapped English name for a known code', () => { + const clause = buildLanguageClause({code: 'ru', mode: 'fixed'}) + expect(clause).to.include('in Russian') + }) + + it('returns the fixed clause for Chinese (CJK)', () => { + const clause = buildLanguageClause({code: 'zh', mode: 'fixed'}) + expect(clause).to.include('in Chinese') + }) + + it('returns the fixed clause for Vietnamese (Latin-non-English)', () => { + const clause = buildLanguageClause({code: 'vi', mode: 'fixed'}) + expect(clause).to.include('in Vietnamese') + }) + + it('falls back to the raw code in quotes for an unknown ISO code', () => { + // Forward-compat: a future code we haven't mapped yet still + // produces a usable clause. Degrades to `in "xx"` rather than + // failing the entire prompt build. + const clause = buildLanguageClause({code: 'xx', mode: 'fixed'}) + expect(clause).to.include('in "xx"') + }) + + it('degrades to auto when fixed-mode arrives without a code', () => { + // `isBrvConfigJson` rejects this shape at load time; the function + // defends against the case anyway so a malformed config degrades + // rather than crashing a write path. + const clause = buildLanguageClause({mode: 'fixed'}) + expect(clause).to.include("Match the user's input language") + }) + + it('every clause variant mentions the schema-key invariant', () => { + // Load-bearing — if the clause is loose enough that this assertion + // fails, the LLM may translate tag names like `` to a + // localized form, which fails Zod validation downstream. + const auto = buildLanguageClause() + const fixedKnown = buildLanguageClause({code: 'ru', mode: 'fixed'}) + const fixedUnknown = buildLanguageClause({code: 'xx', mode: 'fixed'}) + + for (const clause of [auto, fixedKnown, fixedUnknown]) { + expect(clause).to.include('tag names') + expect(clause).to.include('attribute names') + expect(clause).to.include('enum values') + expect(clause).to.include('`path`') + } + }) + + it('all clauses preserve code snippets verbatim', () => { + const auto = buildLanguageClause() + const fixed = buildLanguageClause({code: 'ru', mode: 'fixed'}) + + expect(auto).to.include('Code snippets and identifiers stay verbatim') + expect(fixed).to.include('Code snippets and identifiers stay verbatim') + }) + }) +}) diff --git a/test/unit/shared/utils/format-settings.test.ts b/test/unit/shared/utils/format-settings.test.ts index 8c9ea961e..cab597be9 100644 --- a/test/unit/shared/utils/format-settings.test.ts +++ b/test/unit/shared/utils/format-settings.test.ts @@ -32,6 +32,20 @@ function makeBooleanItem(current: boolean): SettingsItemDTO { } } +function makeEnumItem(overrides: Partial = {}): SettingsItemDTO { + return { + category: 'language', + current: 'auto', + default: 'auto', + description: 'desc', + key: 'language.mode', + options: ['auto', 'fixed'], + restartRequired: false, + type: 'enum', + ...overrides, + } +} + function makeRow(overrides: Partial = {}): SettingsRow { return { category: 'concurrency', @@ -253,4 +267,59 @@ describe('format-settings (shared)', () => { expect(rows.map((r) => r.category)).to.deep.equal(['concurrency', 'task-history', 'updates']) }) }) + + describe('enum rows', () => { + it('includes enum items in the output with options propagated', () => { + const rows = buildSettingsRows([makeEnumItem()]) + expect(rows).to.have.lengthOf(1) + expect(rows[0].type).to.equal('enum') + expect(rows[0].options).to.deep.equal(['auto', 'fixed']) + }) + + it('formats current=auto as "[ auto ]" and default verbatim', () => { + const row = buildSettingsRows([makeEnumItem({current: 'auto'})])[0] + expect(row.displayCurrent).to.equal('[ auto ]') + expect(row.displayDefault).to.equal('auto') + }) + + it('marks the row as modified when current differs from default', () => { + const row = buildSettingsRows([makeEnumItem({current: 'fixed'})])[0] + expect(row.modified).to.equal(true) + }) + + it('groups language enum rows under category=language', () => { + const row = buildSettingsRows([makeEnumItem()])[0] + expect(row.category).to.equal('language') + }) + + it('orders the language category after updates', () => { + const rows = buildSettingsRows([ + makeEnumItem(), + makeItem({category: 'concurrency', key: 'agentPool.maxSize'}), + makeBooleanItem(true), + ]) + expect(rows.map((r) => r.category)).to.deep.equal(['concurrency', 'updates', 'language']) + }) + + it('skips enum items with missing or wrong-typed fields (defensive narrowing)', () => { + const wonky = {...makeEnumItem(), current: 5} as unknown as SettingsItemDTO + expect(buildSettingsRows([wonky])).to.have.lengthOf(0) + }) + + it('parseRowInput: accepts a valid option as ok and rejects an unknown option', () => { + const row = buildSettingsRows([makeEnumItem()])[0] + const ok = parseRowInput(row, 'fixed') + expect(ok.kind).to.equal('ok') + if (ok.kind === 'ok') { + expect(ok.value).to.equal('fixed') + expect(ok.displayValue).to.equal('fixed') + } + + const bad = parseRowInput(row, 'pidgin') + expect(bad.kind).to.equal('error') + if (bad.kind === 'error') { + expect(bad.message).to.match(/Expected one of \[auto, fixed\]/) + } + }) + }) })