diff --git a/src/server/handlers.ts b/src/server/handlers.ts index bff37fb..55c9c80 100644 --- a/src/server/handlers.ts +++ b/src/server/handlers.ts @@ -18,11 +18,11 @@ import { loadPathPenalties, mergePathPenaltyEntries } from "./config-loader.js"; import { loadServerConfig } from "./config.js"; import { FtsStatusCache, ServerContext, TableAvailability } from "./context.js"; import type { DomainFileHint } from "./domain-terms.js"; +import type { SnippetRangeSource } from "./handlers/snippets-get.js"; import { createIdfProvider } from "./idf-provider.js"; import { coerceProfileName, loadScoringProfile, type ScoringWeights } from "./scoring.js"; import { createServerServices, ServerServices } from "./services/index.js"; import { loadStopWords, type StopWordsService } from "./stop-words.js"; -import type { SnippetRangeSource } from "./handlers/snippets-get.js"; // Re-export extracted handlers for backward compatibility export { @@ -5465,3 +5465,132 @@ export async function contextBundle( throw error; } } + +export interface HybridSearchParams { + goal: string; + limit?: number; // default: 7, max: 20 + required_types?: string[]; // default: ['sql'] - 未カバー時に補完 (最大5件, ドットなし小文字) + compact?: boolean; // default: true + boost_profile?: BoostProfileName; +} + +export interface HybridSearchCoverage { + semantic_count: number; + supplemental_count: number; + triggered: boolean; + missing_types: string[]; +} + +export interface HybridSearchResult { + context: ContextBundleItem[]; + supplemental: FilesSearchResult[]; + coverage: HybridSearchCoverage; +} + +function extractSearchKeywords(goal: string): string { + const STOP_WORDS = new Set([ + "how", + "does", + "the", + "a", + "an", + "and", + "or", + "for", + "to", + "in", + "of", + "is", + "are", + "was", + "were", + "be", + "been", + "do", + "did", + "what", + "where", + "when", + "why", + "which", + "who", + "will", + "would", + "should", + "could", + ]); + // Split camelCase/PascalCase before lowercasing to preserve word boundaries + // e.g. "createUser" → ["create", "User"] → "create user" + const splitCamel = (token: string): string => + token.replace(/([a-z])([A-Z])/g, "$1 $2").replace(/([A-Z]+)([A-Z][a-z])/g, "$1 $2"); + return goal + .split(/\s+/) + .flatMap((w) => splitCamel(w).split(/\s+/)) + .map((w) => w.toLowerCase().replace(/[^a-z0-9_]/g, "")) + .filter((w) => w.length > 2 && !STOP_WORDS.has(w)) + .slice(0, 5) + .join(" "); +} + +export async function hybridSearch( + context: ServerContext, + params: HybridSearchParams +): Promise { + const { goal, compact = true, boost_profile } = params; + const limit = Math.min(Math.max(1, params.limit ?? 7), 20); + const required_types = params.required_types ?? ["sql"]; + + // Step 1: Semantic search + const bundleParams: ContextBundleParams = { goal, limit, compact }; + if (boost_profile !== undefined) { + bundleParams.boost_profile = boost_profile; + } + const bundleResult = await contextBundle(context, bundleParams); + const semanticItems = bundleResult.context; + + // Step 2: Coverage analysis + // Use path.extname for reliable extension extraction (handles dotfiles, multi-dot paths) + const coveredExts = new Set( + semanticItems.map((item) => { + const ext = path.extname(item.path); + return ext.startsWith(".") ? ext.slice(1).toLowerCase() : ""; + }) + ); + const missingTypes = required_types.filter((ext) => !coveredExts.has(ext)); + // lowConfidence: score-based threshold is unreliable when scores are normalized; + // trigger only on completely empty semantic results instead. + const lowConfidence = semanticItems.length === 0; + const triggered = missingTypes.length > 0 || lowConfidence; + + // Step 3: Supplemental search + const supplemental: FilesSearchResult[] = []; + if (triggered) { + const keywords = extractSearchKeywords(goal); + if (keywords.length > 0) { + for (const ext of missingTypes) { + const results = await filesSearch(context, { + query: keywords, + ext: `.${ext}`, + limit: 3, + compact, + }); + supplemental.push(...results); + } + if (lowConfidence && missingTypes.length === 0) { + const results = await filesSearch(context, { query: keywords, limit: 3, compact }); + supplemental.push(...results); + } + } + } + + return { + context: semanticItems, + supplemental, + coverage: { + semantic_count: semanticItems.length, + supplemental_count: supplemental.length, + triggered, + missing_types: missingTypes, + }, + }; +} diff --git a/src/server/output-schemas.ts b/src/server/output-schemas.ts index 312cd12..4081fc7 100644 --- a/src/server/output-schemas.ts +++ b/src/server/output-schemas.ts @@ -136,6 +136,23 @@ export const SemanticRerankResultSchema = z.object({ candidates: z.array(SemanticRerankItemSchema).describe("リランキングされた候補配列"), }); +// ============================================================================= +// hybrid_search +// ============================================================================= + +export const HybridSearchCoverageSchema = z.object({ + semantic_count: z.number().describe("KIRI semantic search の結果件数"), + supplemental_count: z.number().describe("Grep補完の結果件数"), + triggered: z.boolean().describe("補完が実行されたか"), + missing_types: z.array(z.string()).describe("required_types のうち未カバーの拡張子"), +}); + +export const HybridSearchResultSchema = z.object({ + context: z.array(ContextBundleItemSchema).describe("KIRI semantic search の結果"), + supplemental: z.array(FilesSearchResultItemSchema).describe("Grep補完の結果"), + coverage: HybridSearchCoverageSchema.describe("カバレッジメタデータ"), +}); + // ============================================================================= // JSON Schema生成 // ============================================================================= @@ -151,4 +168,5 @@ export const OUTPUT_SCHEMAS = { snippets_get: z.toJSONSchema(SnippetResultSchema), deps_closure: z.toJSONSchema(DepsClosureResultSchema), semantic_rerank: z.toJSONSchema(SemanticRerankResultSchema), + hybrid_search: z.toJSONSchema(HybridSearchResultSchema), } as const; diff --git a/src/server/rpc.ts b/src/server/rpc.ts index 04b2d8e..d8bf8a7 100644 --- a/src/server/rpc.ts +++ b/src/server/rpc.ts @@ -8,20 +8,22 @@ import { } from "../shared/adaptive-k-categories.js"; import { maskValue } from "../shared/security/masker.js"; -import { resolveCompactFlag } from "./compact-mode.js"; import { isValidBoostProfile, BOOST_PROFILES } from "./boost-profiles.js"; +import { resolveCompactFlag } from "./compact-mode.js"; import { ServerContext } from "./context.js"; import { DegradeController } from "./fallbacks/degradeController.js"; import { ContextBundleParams, DepsClosureParams, FilesSearchParams, + HybridSearchParams, SemanticRerankParams, SnippetsGetParams, SnippetsGetView, contextBundle, depsClosure, filesSearch, + hybridSearch, semanticRerank, snippetsGet, } from "./handlers.js"; @@ -391,6 +393,45 @@ const TOOL_DESCRIPTORS: ToolDescriptor[] = [ }, outputSchema: OUTPUT_SCHEMAS.deps_closure, }, + { + name: "hybrid_search", + description: + "Semantic code search with automatic file-type supplementation.\n" + + "Runs context_bundle first; if required file types (e.g. SQL, YAML) are missing or\n" + + "confidence is low, automatically supplements with files_search.\n" + + "Returns coverage metadata so callers know what triggered supplementation.\n" + + "Example: hybrid_search({goal: 'DuckDB schema blob tree definition'})", + inputSchema: { + type: "object", + required: ["goal"], + additionalProperties: true, + properties: { + goal: { type: "string", description: "Concrete keywords describing what to find." }, + limit: { + type: "number", + minimum: 1, + maximum: 20, + description: "Max results (default: 7).", + }, + required_types: { + type: "array", + items: { type: "string" }, + description: + "File extensions to ensure coverage for (default: ['sql']). Triggers supplemental search if missing.", + }, + compact: { + type: "boolean", + description: "Omit previews for token savings (default: true).", + }, + boost_profile: { + type: "string", + enum: ["default", "docs", "balanced", "none", "code"], + description: "File type priority.", + }, + }, + }, + outputSchema: OUTPUT_SCHEMAS.hybrid_search, + }, ]; const INITIALIZE_PAYLOAD = { @@ -473,6 +514,61 @@ function parseFilesSearchParams(input: unknown): FilesSearchParams { return params; } +function parseHybridSearchParams(input: unknown): HybridSearchParams { + if (!input || typeof input !== "object") { + throw new Error( + "hybrid_search requires an object with a goal parameter. Provide keywords describing what to find." + ); + } + const record = input as Record; + const goal = record.goal; + if (typeof goal !== "string" || goal.trim().length === 0) { + throw new Error( + "hybrid_search requires a non-empty goal string. Provide keywords describing what to find." + ); + } + + const params: HybridSearchParams = { goal: goal.trim() }; + + if (typeof record.limit === "number") { + if (record.limit < 1 || record.limit > 20) { + throw new Error("limit must be between 1 and 20"); + } + params.limit = record.limit; + } + + if (Array.isArray(record.required_types)) { + // Normalize: trim whitespace, strip leading dots, lowercase, deduplicate via Set, cap at 5 + const seen = new Set(); + const normalized: string[] = []; + for (const t of record.required_types as unknown[]) { + if (typeof t !== "string") continue; + const clean = t.trim().replace(/^\.+/, "").toLowerCase(); + if (clean.length === 0 || seen.has(clean)) continue; + seen.add(clean); + normalized.push(clean); + if (normalized.length >= 5) break; + } + params.required_types = normalized; + } + + params.compact = resolveCompactFlag( + typeof record.compact === "boolean" ? record.compact : undefined + ); + + if (typeof record.boost_profile === "string") { + if (isValidBoostProfile(record.boost_profile)) { + params.boost_profile = record.boost_profile; + } else { + throw new Error( + `Invalid boost_profile: "${record.boost_profile}". Valid profiles are: ${Object.keys(BOOST_PROFILES).join(", ")}` + ); + } + } + + return params; +} + function parseSnippetsGetParams(input: unknown): SnippetsGetParams { if (!input || typeof input !== "object") { return { path: "" }; @@ -844,6 +940,12 @@ async function executeToolByName( await withSpan("deps_closure", async () => await depsClosure(context, params)); return await degrade.withResource(handler, "duckdb:deps_closure"); } + case "hybrid_search": { + const params = parseHybridSearchParams(toolParams); + const handler = async () => + await withSpan("hybrid_search", async () => await hybridSearch(context, params)); + return await degrade.withResource(handler, "duckdb:hybrid_search"); + } default: throw new Error(`Unknown tool: ${toolName}`); } diff --git a/tests/server/hybrid.search.spec.ts b/tests/server/hybrid.search.spec.ts new file mode 100644 index 0000000..e69b445 --- /dev/null +++ b/tests/server/hybrid.search.spec.ts @@ -0,0 +1,229 @@ +import { mkdtemp, rm } from "node:fs/promises"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; + +import { afterEach, describe, expect, it, vi } from "vitest"; + +import { runIndexer } from "../../src/indexer/cli.js"; +import type { ServerContext } from "../../src/server/context.js"; +import { DegradeController } from "../../src/server/fallbacks/degradeController.js"; +import * as handlers from "../../src/server/handlers.js"; +import { checkTableAvailability, hybridSearch, resolveRepoId } from "../../src/server/handlers.js"; +import type { HybridSearchParams } from "../../src/server/handlers.js"; +import { MetricsRegistry } from "../../src/server/observability/metrics.js"; +import { createRpcHandler, WarningManager } from "../../src/server/rpc.js"; +import type { ServerServices } from "../../src/server/services/index.js"; +import { createServerServices } from "../../src/server/services/index.js"; +import { DuckDBClient } from "../../src/shared/duckdb.js"; +import { createTempRepo } from "../helpers/test-repo.js"; + +interface CleanupTarget { + dispose: () => Promise; +} + +describe("hybrid_search", () => { + const cleanupTargets: CleanupTarget[] = []; + + afterEach(async () => { + for (const target of cleanupTargets.splice(0, cleanupTargets.length)) { + await target.dispose(); + } + }); + + async function setupRepo(files: Record): Promise { + const repo = await createTempRepo(files); + cleanupTargets.push({ dispose: repo.cleanup }); + + const dbDir = await mkdtemp(join(tmpdir(), "kiri-hybrid-")); + const dbPath = join(dbDir, "index.duckdb"); + cleanupTargets.push({ dispose: async () => await rm(dbDir, { recursive: true, force: true }) }); + + await runIndexer({ repoRoot: repo.path, databasePath: dbPath, full: true }); + + const db = await DuckDBClient.connect({ databasePath: dbPath }); + cleanupTargets.push({ dispose: async () => await db.close() }); + + const repoId = await resolveRepoId(db, repo.path); + const tableAvailability = await checkTableAvailability(db); + return { + db, + repoId, + services: createServerServices(db), + tableAvailability, + warningManager: new WarningManager(), + }; + } + + it("returns semantic results with no supplemental when required types are covered", async () => { + const context = await setupRepo({ + "src/main.ts": "export function meaning() {\n return 42;\n}\n", + "docs/readme.md": "The meaning of life.\n", + }); + + const result = await hybridSearch(context, { + goal: "meaning function definition", + required_types: ["ts"], + compact: true, + }); + + expect(result.context.length).toBeGreaterThan(0); + expect(result.coverage.semantic_count).toBeGreaterThan(0); + expect(result.coverage.missing_types).not.toContain("ts"); + }, 15000); + + it("runs supplemental search when SQL not in semantic results and returns valid coverage", async () => { + // Use a goal unrelated to SQL to ensure semantic search won't find the .sql file + const context = await setupRepo({ + "src/main.ts": "export function createUser() {}\n", + "sql/schema.sql": "CREATE TABLE user (id INTEGER, name TEXT);\n", + }); + + const result = await hybridSearch(context, { + goal: "createUser typescript function", + required_types: ["sql"], + compact: true, + }); + + expect(result.coverage).toMatchObject({ + semantic_count: expect.any(Number), + supplemental_count: expect.any(Number), + triggered: expect.any(Boolean), + missing_types: expect.any(Array), + }); + + // If SQL was not in semantic results, supplemental search should have been triggered + if (result.coverage.missing_types.includes("sql")) { + expect(result.coverage.triggered).toBe(true); + // supplemental search should have found the SQL file + expect(result.supplemental.length).toBeGreaterThan(0); + expect(result.supplemental.some((item) => item.path.endsWith(".sql"))).toBe(true); + } + }, 15000); + + it("returns results in compact mode without previews", async () => { + const context = await setupRepo({ + "src/app.ts": "export const config = { port: 8080 };\n", + }); + + const result = await hybridSearch(context, { + goal: "config port", + compact: true, + required_types: [], + }); + + expect(result.context.every((item) => item.preview === undefined)).toBe(true); + expect(result.coverage).toBeDefined(); + }, 15000); + + it("throws error when goal is missing", async () => { + const context = await setupRepo({ + "src/main.ts": "export function foo() {}\n", + }); + + await expect(hybridSearch(context, { goal: "" })).rejects.toThrow(); + }, 15000); + + it("supplements with YAML files when required_types includes yaml", async () => { + const context = await setupRepo({ + "src/main.ts": "export function deploy() {}\n", + "config/deploy.yaml": "service: web\nreplicas: 2\n", + }); + + const result = await hybridSearch(context, { + goal: "deploy service config", + required_types: ["yaml"], + compact: true, + }); + + expect(result.coverage).toMatchObject({ + semantic_count: expect.any(Number), + supplemental_count: expect.any(Number), + triggered: expect.any(Boolean), + missing_types: expect.any(Array), + }); + // Whether triggered or not depends on semantic search results + // but the structure should always be valid + expect(Array.isArray(result.context)).toBe(true); + expect(Array.isArray(result.supplemental)).toBe(true); + }, 15000); +}); + +// ============================================================================= +// parseHybridSearchParams (via RPC handler + spy) +// Tests parser normalization without running a real DB +// ============================================================================= + +describe("parseHybridSearchParams (rpc parser)", () => { + const createMockHandler = () => { + const warningManager = new WarningManager(); + const context: ServerContext = { + db: {} as DuckDBClient, + repoId: 1, + services: {} as ServerServices, + tableAvailability: { + hasMetadataTables: true, + hasLinkTable: true, + hasHintLog: true, + hasHintDictionary: true, + hasGraphMetrics: true, + hasCochange: true, + }, + warningManager, + }; + const degrade = new DegradeController(process.cwd()); + const metrics = new MetricsRegistry(); + return createRpcHandler({ context, degrade, metrics, tokens: [], allowDegrade: false }); + }; + + const buildCall = (args: Record) => ({ + jsonrpc: "2.0" as const, + id: 1, + method: "tools/call", + params: { name: "hybrid_search", arguments: args }, + }); + + afterEach(() => { + vi.restoreAllMocks(); + }); + + it("normalizes required_types: strips dots, lowercases, deduplicates, caps at 5", async () => { + let captured: HybridSearchParams | undefined; + vi.spyOn(handlers, "hybridSearch").mockImplementation(async (_ctx, params) => { + captured = params; + return { + context: [], + supplemental: [], + coverage: { semantic_count: 0, supplemental_count: 0, triggered: false, missing_types: [] }, + }; + }); + + const handler = createMockHandler(); + await handler( + buildCall({ + goal: "test", + required_types: [".SQL", " .yaml ", "SQL", "ts", "md", "rs", "go"], + }) + ); + + // .SQL → sql, .yaml (trimmed) → yaml, SQL (dup) → removed, ts, md, rs capped at 5 + expect(captured?.required_types).toEqual(["sql", "yaml", "ts", "md", "rs"]); + }); + + it("returns isError true when goal is missing", async () => { + const handler = createMockHandler(); + const response = await handler(buildCall({})); + expect(response?.response).toMatchObject({ result: { isError: true } }); + }); + + it("returns isError true when goal is empty string", async () => { + const handler = createMockHandler(); + const response = await handler(buildCall({ goal: " " })); + expect(response?.response).toMatchObject({ result: { isError: true } }); + }); + + it("returns isError true when limit > 20", async () => { + const handler = createMockHandler(); + const response = await handler(buildCall({ goal: "test", limit: 99 })); + expect(response?.response).toMatchObject({ result: { isError: true } }); + }); +});