From 16895cc4d189f8d46ff80f0d202a16de35afe199 Mon Sep 17 00:00:00 2001 From: Florian Roks Date: Mon, 1 Jun 2026 11:29:56 +0200 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20Add=20JSONC=20language=20support=20?= =?UTF-8?q?for=20comment=20parsing=20and=20traceability?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds jsonc language support, also checks .json files if they start with a comment (see jsonc.org). Signed-off-by: Florian Roks --- docs/source/components/analyse.rst | 2 +- docs/source/components/configuration.rst | 8 ++- docs/source/components/features.rst | 27 ++++++++ pyproject.toml | 1 + src/sphinx_codelinks/analyse/utils.py | 64 +++++++++++++++++-- .../source_discover/config.py | 3 + .../source_discover/source_discover.py | 27 ++++++++ tests/data/jsonc/demo.jsonc | 15 +++++ tests/data/jsonc/plain.json | 3 + tests/data/jsonc/with_modeline.json | 5 ++ tests/test_analyse.py | 17 +++++ tests/test_analyse_utils.py | 51 +++++++++++++++ tests/test_source_discover.py | 14 +++- tests/test_src_trace.py | 2 +- 14 files changed, 231 insertions(+), 8 deletions(-) create mode 100644 tests/data/jsonc/demo.jsonc create mode 100644 tests/data/jsonc/plain.json create mode 100644 tests/data/jsonc/with_modeline.json diff --git a/docs/source/components/analyse.rst b/docs/source/components/analyse.rst index 2b47c5a..df0a40b 100644 --- a/docs/source/components/analyse.rst +++ b/docs/source/components/analyse.rst @@ -47,7 +47,7 @@ Limitations **Current Limitations:** -- **Language Support**: C/C++ (``//``, ``/* */``), C# (``//``, ``/* */``, ``///``), Python (``#``), YAML (``#``) and Rust (``//``, ``/* */``, ``///``) comment styles are supported +- **Language Support**: C/C++ (``//``, ``/* */``), C# (``//``, ``/* */``, ``///``), Python (``#``), YAML (``#``), Rust (``//``, ``/* */``, ``///``) and JSONC (``//``, ``/* */``) comment styles are supported - **Single Comment Style**: Each analysis run processes only one comment style at a time Extraction Examples diff --git a/docs/source/components/configuration.rst b/docs/source/components/configuration.rst index 0d354dd..1505e38 100644 --- a/docs/source/components/configuration.rst +++ b/docs/source/components/configuration.rst @@ -271,7 +271,7 @@ Specifies the comment syntax style used in the source code files. This determine **Type:** ``str`` **Default:** ``"cpp"`` -**Supported values:** ``"cpp"``, ``"python"``, ``"cs"``, ``"yaml"``, ``"rust"`` +**Supported values:** ``"cpp"``, ``"python"``, ``"cs"``, ``"yaml"``, ``"rust"``, ``"jsonc"`` .. code-block:: toml @@ -315,6 +315,12 @@ Specifies the comment syntax style used in the source code files. This determine ``///`` (doc comments), ``//!`` (inner doc comments) - ``.rs`` + * - JSON with Comments (JSONC) + - ``"jsonc"`` + - ``//`` (single-line), + ``/* */`` (multi-line) + - ``.jsonc`` (always); ``.json`` only when the file opens with a comment + (e.g. the mode line ``// -*- mode: jsonc -*-``) .. note:: Future versions may support additional programming languages. diff --git a/docs/source/components/features.rst b/docs/source/components/features.rst index 9fc9ca3..695eead 100644 --- a/docs/source/components/features.rst +++ b/docs/source/components/features.rst @@ -158,6 +158,33 @@ Features .. fault:: Sphinx-codelinks halucinates traceability objects in Rust :id: FAULT_RUST_2 +.. feature:: JSONC Language Support + :id: FE_JSONC + + Support for defining traceability objects in JSON with Comments (JSONC) files. + + The JSONC parser leverages tree-sitter to identify and extract single-line (``//``) + and multi-line (``/* */``) comments from JSON data, associating each marker with the + surrounding data structure such as the key/value pair, array item, or object it + annotates. + + ``.jsonc`` files are always parsed as JSONC. A ``.json`` file is only treated as JSONC + when it opens with a comment (e.g. the mode line ``// -*- mode: jsonc -*-``), following + the `JSONC filename convention `_. + + Key capabilities: + + * Detection of inline and leading comments + * Association of comments with key/value pairs and array items + * Support for both ``//`` and ``/* */`` comment styles + * Opt-in handling of ``.json`` files via a leading comment + + .. fault:: Traceability objects are not detected in JSONC + :id: FAULT_JSONC_1 + + .. fault:: Sphinx-codelinks hallucinates traceability objects in JSONC + :id: FAULT_JSONC_2 + .. feature:: Customized comment styles :id: FE_CMT diff --git a/pyproject.toml b/pyproject.toml index 93cb917..4085621 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,6 +30,7 @@ dependencies = [ "tree-sitter-c-sharp>=0.23.1", "tree-sitter-yaml>=0.7.1", "tree-sitter-rust>=0.23.0", + "tree-sitter-json>=0.24.8", ] [build-system] diff --git a/src/sphinx_codelinks/analyse/utils.py b/src/sphinx_codelinks/analyse/utils.py index 5a11fdd..d256e0c 100644 --- a/src/sphinx_codelinks/analyse/utils.py +++ b/src/sphinx_codelinks/analyse/utils.py @@ -29,6 +29,8 @@ "trait_item", "mod_item", }, + # @JSONC Scope Node Types, IMPL_JSONC_2, impl, [FE_JSONC] + CommentType.jsonc: {"pair", "object", "array", "document"}, } # initialize logger @@ -60,6 +62,19 @@ (line_comment) @comment (block_comment) @comment """ +JSONC_QUERY = """(comment) @comment""" + +# JSON value node types that can be associated with a comment. +JSON_STRUCTURE_TYPES = { + "pair", + "object", + "array", + "string", + "number", + "true", + "false", + "null", +} def is_text_file(filepath: Path, sample_size: int = 2048) -> bool: @@ -77,7 +92,7 @@ def is_text_file(filepath: Path, sample_size: int = 2048) -> bool: return False -# @Tree-sitter parser initialization for multiple languages, IMPL_LANG_1, impl, [FE_C_SUPPORT, FE_CPP, FE_PY, FE_YAML, FE_RUST] +# @Tree-sitter parser initialization for multiple languages, IMPL_LANG_1, impl, [FE_C_SUPPORT, FE_CPP, FE_PY, FE_YAML, FE_RUST, FE_JSONC] def init_tree_sitter(comment_type: CommentType) -> tuple[Parser, Query]: if comment_type == CommentType.cpp: import tree_sitter_cpp # noqa: PLC0415 @@ -104,6 +119,11 @@ def init_tree_sitter(comment_type: CommentType) -> tuple[Parser, Query]: parsed_language = Language(tree_sitter_rust.language()) query = Query(parsed_language, RUST_QUERY) + elif comment_type == CommentType.jsonc: + import tree_sitter_json # noqa: PLC0415 + + parsed_language = Language(tree_sitter_json.language()) + query = Query(parsed_language, JSONC_QUERY) else: raise ValueError(f"Unsupported comment style: {comment_type}") parser = Parser(parsed_language) @@ -203,8 +223,11 @@ def find_yaml_next_structure(node: TreeSitterNode) -> TreeSitterNode | None: return None -def find_yaml_prev_sibling_on_same_row(node: TreeSitterNode) -> TreeSitterNode | None: - """Find a previous named sibling that is on the same row as the comment.""" +def find_prev_sibling_on_same_row(node: TreeSitterNode) -> TreeSitterNode | None: + """Find a previous named sibling that is on the same row as the comment. + + Grammar-agnostic: used to detect inline comments in both YAML and JSONC. + """ comment_row = node.start_point.row current = node.prev_named_sibling @@ -225,7 +248,7 @@ def find_yaml_prev_sibling_on_same_row(node: TreeSitterNode) -> TreeSitterNode | def find_yaml_associated_structure(node: TreeSitterNode) -> TreeSitterNode | None: """Find the YAML structure (key-value pair, list item, etc.) associated with a comment.""" # First, check if this is an inline comment by looking for a previous sibling on the same row - prev_sibling_same_row = find_yaml_prev_sibling_on_same_row(node) + prev_sibling_same_row = find_prev_sibling_on_same_row(node) if prev_sibling_same_row: return prev_sibling_same_row @@ -244,6 +267,35 @@ def find_yaml_associated_structure(node: TreeSitterNode) -> TreeSitterNode | Non return None +def find_jsonc_associated_structure(node: TreeSitterNode) -> TreeSitterNode | None: + """Find the JSON structure (key/value pair, value, list item) for a comment. + + JSON is data rather than code, so association follows the same intent as YAML: + an inline comment belongs to the value on its row, a leading comment belongs to + the following structure, otherwise it belongs to the enclosing structure. + """ + # Inline comment: a value/pair on the same row, before the comment + prev_sibling_same_row = find_prev_sibling_on_same_row(node) + if prev_sibling_same_row: + return prev_sibling_same_row + + # Leading comment: the next structure following the comment + current = node.next_named_sibling + while current: + if current.type in JSON_STRUCTURE_TYPES: + return current + current = current.next_named_sibling + + # Otherwise: the enclosing structure + parent = node.parent + while parent: + if parent.type in {"pair", "object", "array"}: + return parent + parent = parent.parent + + return None + + def find_associated_scope( node: TreeSitterNode, comment_type: CommentType = CommentType.cpp ) -> TreeSitterNode | None: @@ -252,6 +304,10 @@ def find_associated_scope( # YAML uses different structure association logic return find_yaml_associated_structure(node) + if comment_type == CommentType.jsonc: + # JSONC uses data-aware structure association logic + return find_jsonc_associated_structure(node) + if node.type == CommentCategory.docstring: # Only for python's docstring return find_enclosing_scope(node, comment_type) diff --git a/src/sphinx_codelinks/source_discover/config.py b/src/sphinx_codelinks/source_discover/config.py index 51ae3c0..b4f6382 100644 --- a/src/sphinx_codelinks/source_discover/config.py +++ b/src/sphinx_codelinks/source_discover/config.py @@ -11,6 +11,7 @@ "cs": ["cs"], "yaml": ["yml", "yaml"], "rust": ["rs"], + "jsonc": ["jsonc", "json"], } @@ -21,6 +22,8 @@ class CommentType(str, Enum): yaml = "yaml" # @Support Rust style comments, IMPL_RUST_1, impl, [FE_RUST]; rust = "rust" + # @Support JSONC style comments, IMPL_JSONC_1, impl, [FE_JSONC]; + jsonc = "jsonc" class SourceDiscoverSectionConfigType(TypedDict, total=False): diff --git a/src/sphinx_codelinks/source_discover/source_discover.py b/src/sphinx_codelinks/source_discover/source_discover.py index 7faf154..f229464 100644 --- a/src/sphinx_codelinks/source_discover/source_discover.py +++ b/src/sphinx_codelinks/source_discover/source_discover.py @@ -6,10 +6,28 @@ from sphinx_codelinks.source_discover.config import ( COMMENT_FILETYPE, + CommentType, SourceDiscoverConfig, ) +def _json_starts_with_comment(filepath: Path, sample_size: int = 256) -> bool: + """Return True if a ``.json`` file's first non-whitespace content is a comment. + + Used to decide whether a ``.json`` file should be treated as JSONC. Per + https://jsonc.org/#filename-extension a ``.json`` file should only be treated as + JSONC when it opens with a comment (e.g. the mode line ``// -*- mode: jsonc -*-``). + """ + try: + with filepath.open("rb") as f: + chunk = f.read(sample_size) + except OSError: + return False + # strip a leading UTF-8 BOM, then leading whitespace + text = chunk.removeprefix(b"\xef\xbb\xbf").lstrip() + return text.startswith((b"//", b"/*")) + + # @Source code file discovery with gitignore support, IMPL_DISC_1, impl, [FE_DISCOVERY, FE_CLI_DISCOVER] class SourceDiscover: def __init__(self, src_discover_config: SourceDiscoverConfig): @@ -75,6 +93,15 @@ def _discover(self) -> list[Path]: continue if self.file_types and filepath.suffix.lower() not in self.file_types: continue + # @JSONC .json files require a leading comment, IMPL_JSONC_3, impl, [FE_JSONC] + # A plain ``.json`` file is only treated as JSONC when it opens with a + # comment; otherwise it is skipped under the ``jsonc`` comment type. + if ( + self.src_discover_config.comment_type == CommentType.jsonc + and filepath.suffix.lower() == ".json" + and not _json_starts_with_comment(filepath) + ): + continue # resolve() produces canonical absolute paths; follow_links only # controls whether the walker descends into symlinked directories discovered_files.append(filepath.resolve()) diff --git a/tests/data/jsonc/demo.jsonc b/tests/data/jsonc/demo.jsonc new file mode 100644 index 0000000..5fcab37 --- /dev/null +++ b/tests/data/jsonc/demo.jsonc @@ -0,0 +1,15 @@ +// -*- mode: jsonc -*- +{ + // @JSONC alpha implementation, IMPL_JSONC_A, impl, [REQ_JSONC_1] + "alpha": 1, + "items": [ + "first", // @JSONC inline item, IMPL_JSONC_B, impl, [REQ_JSONC_2] + "second" + ], + /* Block comment with marker + @JSONC beta implementation, IMPL_JSONC_C, impl, [REQ_JSONC_3] + */ + "beta": { + "nested": true + } +} diff --git a/tests/data/jsonc/plain.json b/tests/data/jsonc/plain.json new file mode 100644 index 0000000..d086709 --- /dev/null +++ b/tests/data/jsonc/plain.json @@ -0,0 +1,3 @@ +{ + "value": 42 +} diff --git a/tests/data/jsonc/with_modeline.json b/tests/data/jsonc/with_modeline.json new file mode 100644 index 0000000..e0dc106 --- /dev/null +++ b/tests/data/jsonc/with_modeline.json @@ -0,0 +1,5 @@ +// -*- mode: jsonc -*- +{ + // @JSONC modeline file, IMPL_JSONC_D, impl, [REQ_JSONC_4] + "value": 42 +} diff --git a/tests/test_analyse.py b/tests/test_analyse.py index 6e6c2a7..5e54ded 100644 --- a/tests/test_analyse.py +++ b/tests/test_analyse.py @@ -6,6 +6,7 @@ from sphinx_codelinks.analyse.analyse import SourceAnalyse from sphinx_codelinks.config import SourceAnalyseConfig +from sphinx_codelinks.source_discover.config import CommentType from tests.conftest import ( ONELINE_COMMENT_STYLE, ONELINE_COMMENT_STYLE_DEFAULT, @@ -118,6 +119,21 @@ def test_analyse(src_dir, src_paths, tmp_path, snapshot_marks): "num_oneline_warnings": 0, }, ), + ( + TEST_DIR / "data" / "jsonc", + [ + TEST_DIR / "data" / "jsonc" / "demo.jsonc", + ], + ONELINE_COMMENT_STYLE_DEFAULT, + { + "num_src_files": 1, + "num_uncached_files": 1, + "num_cached_files": 0, + "num_comments": 4, + "num_oneline_warnings": 0, + "comment_type": CommentType.jsonc, + }, + ), ], ) def test_analyse_oneline_needs( @@ -130,6 +146,7 @@ def test_analyse_oneline_needs( get_oneline_needs=True, get_rst=False, oneline_comment_style=oneline_comment_style, + comment_type=result.get("comment_type", CommentType.cpp), ) src_analyse = SourceAnalyse(src_analyse_config) src_analyse.run() diff --git a/tests/test_analyse_utils.py b/tests/test_analyse_utils.py index 207b1f9..1a1a937 100644 --- a/tests/test_analyse_utils.py +++ b/tests/test_analyse_utils.py @@ -8,6 +8,7 @@ from tree_sitter import Node as TreeSitterNode import tree_sitter_c_sharp import tree_sitter_cpp +import tree_sitter_json import tree_sitter_python import tree_sitter_rust import tree_sitter_yaml @@ -57,6 +58,14 @@ def init_rust_tree_sitter() -> tuple[Parser, Query]: return parser, query +@pytest.fixture(scope="session") +def init_jsonc_tree_sitter() -> tuple[Parser, Query]: + parsed_language = Language(tree_sitter_json.language()) + query = Query(parsed_language, utils.JSONC_QUERY) + parser = Parser(parsed_language) + return parser, query + + @pytest.mark.parametrize( ("code", "result"), [ @@ -365,6 +374,48 @@ def test_find_associated_scope_rust(code, result, init_rust_tree_sitter): assert result in rust_def +@pytest.mark.parametrize( + ("code", "result"), + [ + # leading comment is associated with the following key/value pair + ( + b'{\n // @req-id: need_001\n "alpha": 1\n}\n', + '"alpha": 1', + ), + # inline comment is associated with the array item on the same row + ( + b'{\n "items": [\n "first", // @req-id: need_001\n "second"\n ]\n}\n', + '"first"', + ), + # inline comment is associated with the pair on the same row + ( + b'{\n "alpha": 1, // @req-id: need_001\n "beta": 2\n}\n', + '"alpha": 1', + ), + # block comment is associated with the following pair + ( + b'{\n /* @req-id: need_001 */\n "beta": 2\n}\n', + '"beta": 2', + ), + # trailing comment falls back to the enclosing object + ( + b'{\n "alpha": 1\n // @req-id: need_001\n}\n', + '"alpha"', + ), + ], +) +def test_find_associated_scope_jsonc(code, result, init_jsonc_tree_sitter): + parser, query = init_jsonc_tree_sitter + comments = utils.extract_comments(code, parser, query) + node: TreeSitterNode | None = utils.find_associated_scope( + comments[0], CommentType.jsonc + ) + assert node + assert node.text + jsonc_structure = node.text.decode("utf-8") + assert result in jsonc_structure + + @pytest.mark.parametrize( ("code", "result"), [ diff --git a/tests/test_source_discover.py b/tests/test_source_discover.py index 5a6f5c1..5572033 100644 --- a/tests/test_source_discover.py +++ b/tests/test_source_discover.py @@ -49,7 +49,7 @@ "comment_type": "java", }, [ - "Schema validation error in field 'comment_type': 'java' is not one of ['cpp', 'cs', 'python', 'rust', 'yaml']" + "Schema validation error in field 'comment_type': 'java' is not one of ['cpp', 'cs', 'jsonc', 'python', 'rust', 'yaml']" ], ), ( @@ -196,6 +196,18 @@ def test_comment_filetype( assert len(source_discover.source_paths) == nums_files +def test_jsonc_discover_gate() -> None: + """`.jsonc` is always discovered; `.json` only when it opens with a comment.""" + jsonc_dir = Path(__file__).parent / "data" / "jsonc" + config = SourceDiscoverConfig( + src_dir=jsonc_dir, comment_type="jsonc", gitignore=False + ) + discovered = {p.name for p in SourceDiscover(config).source_paths} + assert "demo.jsonc" in discovered + assert "with_modeline.json" in discovered + assert "plain.json" not in discovered + + def test_follow_links(tmp_path: Path) -> None: """Test that follow_links controls whether symbolic links are followed.""" # Create a real directory with a source file diff --git a/tests/test_src_trace.py b/tests/test_src_trace.py index 8e87a71..f8be8cb 100644 --- a/tests/test_src_trace.py +++ b/tests/test_src_trace.py @@ -58,7 +58,7 @@ [ "Project 'dcdc' has the following errors:", "Schema validation error in field 'exclude': 123 is not of type 'string'", - "Schema validation error in field 'comment_type': 'java' is not one of ['cpp', 'cs', 'python', 'rust', 'yaml']", + "Schema validation error in field 'comment_type': 'java' is not one of ['cpp', 'cs', 'jsonc', 'python', 'rust', 'yaml']", "Schema validation error in field 'gitignore': '_true' is not of type 'boolean'", "Schema validation error in field 'include': 345 is not of type 'string'", "Schema validation error in field 'src_dir': ['../dcdc'] is not of type 'string'",