From f7f6d6d0f3ab780bc257355a96c55a5923d826f5 Mon Sep 17 00:00:00 2001 From: tnm Date: Sun, 18 Jan 2026 13:35:47 -0800 Subject: [PATCH] fix: get_symbol_code returns full source code instead of just name (#187) The tree-sitter query files for TypeScript, JavaScript, and Rust had @definition.X captures placed on the identifier node instead of the declaration node, causing extract_symbols to return only the symbol name in the code field. - Fix TypeScript/JavaScript/Rust query patterns to capture full body - Add regression test for issue #187 - Document include_code param in MCP tool description and docs --- docs/src/content/docs/mcp/kit-dev-mcp.mdx | 2 +- src/kit/mcp/dev_server.py | 2 +- src/kit/queries/javascript/tags.scm | 14 ++-- src/kit/queries/rust/tags.scm | 15 ++--- src/kit/queries/typescript/tags.scm | 28 ++++---- tests/test_symbol_extraction_multilang.py | 78 +++++++++++++++++++++++ 6 files changed, 106 insertions(+), 33 deletions(-) diff --git a/docs/src/content/docs/mcp/kit-dev-mcp.mdx b/docs/src/content/docs/mcp/kit-dev-mcp.mdx index 8fd8cf61..d04ab94c 100644 --- a/docs/src/content/docs/mcp/kit-dev-mcp.mdx +++ b/docs/src/content/docs/mcp/kit-dev-mcp.mdx @@ -84,7 +84,7 @@ The server provides many tools including: - **grep_code** - Fast literal string search (120s default timeout, configurable via `KIT_GREP_TIMEOUT`) - **grep_ast** - Search code using AST patterns (semantic search) - **get_file_tree** - Repository file structure with pagination support (`limit`/`offset` params) -- **extract_symbols** - Extract functions, classes, and symbols +- **extract_symbols** - Extract functions, classes, and symbols (excludes code by default for ~90% token savings; use `include_code=true` to get full source) - **get_symbol_code** - Get source code of a specific symbol (lazy loading for context efficiency) - **find_symbol_usages** - Find where symbols are used - **warm_cache** - Pre-warm caches for faster operations on large codebases (100K+ files) diff --git a/src/kit/mcp/dev_server.py b/src/kit/mcp/dev_server.py index 4add470f..248a65e8 100644 --- a/src/kit/mcp/dev_server.py +++ b/src/kit/mcp/dev_server.py @@ -576,7 +576,7 @@ def list_tools(self) -> List[Tool]: ), Tool( name="extract_symbols", - description="Extract symbols from a file", + description="Extract symbols (functions, classes, etc.) from a file. Returns name, type, start_line, end_line, file. By default excludes source code to save tokens (~90% reduction). Use include_code=true to get full source, or use get_symbol_code for lazy loading specific symbols.", inputSchema=ExtractSymbolsParams.model_json_schema(), ), Tool( diff --git a/src/kit/queries/javascript/tags.scm b/src/kit/queries/javascript/tags.scm index 673accf6..b604647e 100644 --- a/src/kit/queries/javascript/tags.scm +++ b/src/kit/queries/javascript/tags.scm @@ -3,11 +3,11 @@ ; Function declarations (function_declaration - name: (identifier) @name @definition.function) + name: (identifier) @name) @definition.function ; Class declarations (class_declaration - name: (identifier) @name @definition.class) + name: (identifier) @name) @definition.class ; Arrow functions assigned to const/let (lexical_declaration) (lexical_declaration @@ -57,23 +57,23 @@ ; Exported function declarations (export_statement declaration: (function_declaration - name: (identifier) @name @definition.function)) + name: (identifier) @name) @definition.function) ; Exported class declarations (export_statement declaration: (class_declaration - name: (identifier) @name @definition.class)) + name: (identifier) @name) @definition.class) ; Class methods (class_body (method_definition - name: (property_identifier) @name @definition.method)) + name: (property_identifier) @name) @definition.method) ; Generator functions (generator_function_declaration - name: (identifier) @name @definition.function) + name: (identifier) @name) @definition.function ; Exported generator functions (export_statement declaration: (generator_function_declaration - name: (identifier) @name @definition.function)) + name: (identifier) @name) @definition.function) diff --git a/src/kit/queries/rust/tags.scm b/src/kit/queries/rust/tags.scm index f3d8c5ff..b4a4aca8 100644 --- a/src/kit/queries/rust/tags.scm +++ b/src/kit/queries/rust/tags.scm @@ -1,21 +1,16 @@ ;; tags.scm for Rust symbol extraction (function_item - name: (identifier) @name - (#set! type "function")) + name: (identifier) @name) @definition.function (struct_item - name: (type_identifier) @name - (#set! type "struct")) + name: (type_identifier) @name) @definition.struct (enum_item - name: (type_identifier) @name - (#set! type "enum")) + name: (type_identifier) @name) @definition.enum (trait_item - name: (type_identifier) @name - (#set! type "trait")) + name: (type_identifier) @name) @definition.trait (impl_item - type: (type_identifier) @name - (#set! type "impl")) + type: (type_identifier) @name) @definition.impl diff --git a/src/kit/queries/typescript/tags.scm b/src/kit/queries/typescript/tags.scm index 8c7bbde4..46927853 100644 --- a/src/kit/queries/typescript/tags.scm +++ b/src/kit/queries/typescript/tags.scm @@ -2,24 +2,24 @@ ; Function declarations (function_declaration - name: (identifier) @name @definition.function) + name: (identifier) @name) @definition.function ; Class declarations (with optional modifiers like export) (class_declaration - name: (type_identifier) @name @definition.class) + name: (type_identifier) @name) @definition.class ; Interface declarations (interface_declaration - name: (type_identifier) @name @definition.interface) + name: (type_identifier) @name) @definition.interface ; Enum declarations (enum_declaration - name: (identifier) @name @definition.enum) + name: (identifier) @name) @definition.enum ; Class methods (class_body (method_definition - name: (property_identifier) @name @definition.method)) + name: (property_identifier) @name) @definition.method) ; Arrow functions assigned to const/let (lexical_declaration) (lexical_declaration @@ -76,41 +76,41 @@ ; Exported function declarations (export_statement declaration: (function_declaration - name: (identifier) @name @definition.function)) + name: (identifier) @name) @definition.function) ; Exported class declarations (export_statement declaration: (class_declaration - name: (type_identifier) @name @definition.class)) + name: (type_identifier) @name) @definition.class) ; Exported interface declarations (export_statement declaration: (interface_declaration - name: (type_identifier) @name @definition.interface)) + name: (type_identifier) @name) @definition.interface) ; Exported enum declarations (export_statement declaration: (enum_declaration - name: (identifier) @name @definition.enum)) + name: (identifier) @name) @definition.enum) ; Type alias declarations (type_alias_declaration - name: (type_identifier) @name @definition.type) + name: (type_identifier) @name) @definition.type ; Exported type alias declarations (export_statement declaration: (type_alias_declaration - name: (type_identifier) @name @definition.type)) + name: (type_identifier) @name) @definition.type) ; Namespace (internal_module) (internal_module - name: (identifier) @name @definition.namespace) + name: (identifier) @name) @definition.namespace ; Generator functions (generator_function_declaration - name: (identifier) @name @definition.function) + name: (identifier) @name) @definition.function ; Exported generator functions (export_statement declaration: (generator_function_declaration - name: (identifier) @name @definition.function)) + name: (identifier) @name) @definition.function) diff --git a/tests/test_symbol_extraction_multilang.py b/tests/test_symbol_extraction_multilang.py index b69c1551..77767289 100644 --- a/tests/test_symbol_extraction_multilang.py +++ b/tests/test_symbol_extraction_multilang.py @@ -26,3 +26,81 @@ def test_symbol_extraction(ext: str, code: str): # Simple sanity: expect 'foo' OR 'Bar' present names = {s.get("name") for s in symbols} assert any(name in names for name in {"foo", "Bar", "main"}), f"Expected symbols missing for {ext}: {names}" + + +# Test for issue #187: get_symbol_code returns symbol name instead of actual source code +# https://github.com/cased/kit/issues/187 +MULTILINE_SAMPLES = { + ".ts": """ +function myFunction(x: number, y: number): number { + const result = x + y; + return result; +} +""", + ".js": """ +function myFunction(x, y) { + const result = x + y; + return result; +} +""", + ".rs": """ +fn my_function(x: i32, y: i32) -> i32 { + let result = x + y; + result +} +""", + ".py": """ +def my_function(x, y): + result = x + y + return result +""", + ".go": """ +package main + +func myFunction(x int, y int) int { + result := x + y + return result +} +""", +} + + +@pytest.mark.parametrize("ext,code", list(MULTILINE_SAMPLES.items())) +def test_symbol_code_contains_full_body(ext: str, code: str): + """Test that extract_symbols returns full function body in 'code' field, not just the name. + + This is a regression test for issue #187 where the code field only contained + the symbol name (e.g., 'myFunction') instead of the actual source code. + """ + parser = TreeSitterSymbolExtractor.get_parser(ext) + query = TreeSitterSymbolExtractor.get_query(ext) + if not parser or not query: + pytest.skip(f"Language for {ext} not supported in this environment") + + symbols = TreeSitterSymbolExtractor.extract_symbols(ext, code) + assert symbols, f"No symbols extracted for {ext}" + + # Find a function symbol + func_symbols = [s for s in symbols if s.get("type") in ("function", "method")] + assert func_symbols, f"No function symbols found for {ext}" + + func = func_symbols[0] + func_name = func.get("name") + func_code = func.get("code", "") + + # The code field should contain more than just the function name + assert len(func_code) > len(func_name), ( + f"Code field for {ext} only contains name '{func_name}', expected full function body. " + f"Got: '{func_code}'" + ) + + # The code should contain the function keyword or definition + assert func_name in func_code, f"Function name '{func_name}' not found in code for {ext}" + + # For multi-line functions, end_line should be greater than start_line + start_line = func.get("start_line", 0) + end_line = func.get("end_line", 0) + assert end_line > start_line, ( + f"For multi-line function in {ext}, expected end_line > start_line. " + f"Got start_line={start_line}, end_line={end_line}" + )