Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/source/components/analyse.rst
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ Limitations

**Current Limitations:**

- **Language Support**: C/C++ (``//``, ``/* */``), C# (``//``, ``/* */``, ``///``), Python (``#``), YAML (``#``) and Rust (``//``, ``/* */``, ``///``) comment styles are supported
- **Language Support**: C/C++ (``//``, ``/* */``), C# (``//``, ``/* */``, ``///``), Python (``#``), YAML (``#``), Rust (``//``, ``/* */``, ``///``) and JSONC (``//``, ``/* */``) comment styles are supported
- **Single Comment Style**: Each analysis run processes only one comment style at a time

Extraction Examples
Expand Down
8 changes: 7 additions & 1 deletion docs/source/components/configuration.rst
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ Specifies the comment syntax style used in the source code files. This determine

**Type:** ``str``
**Default:** ``"cpp"``
**Supported values:** ``"cpp"``, ``"python"``, ``"cs"``, ``"yaml"``, ``"rust"``
**Supported values:** ``"cpp"``, ``"python"``, ``"cs"``, ``"yaml"``, ``"rust"``, ``"jsonc"``

.. code-block:: toml

Expand Down Expand Up @@ -315,6 +315,12 @@ Specifies the comment syntax style used in the source code files. This determine
``///`` (doc comments),
``//!`` (inner doc comments)
- ``.rs``
* - JSON with Comments (JSONC)
- ``"jsonc"``
- ``//`` (single-line),
``/* */`` (multi-line)
- ``.jsonc`` (always); ``.json`` only when the file opens with a comment
(e.g. the mode line ``// -*- mode: jsonc -*-``)

.. note:: Future versions may support additional programming languages.

Expand Down
27 changes: 27 additions & 0 deletions docs/source/components/features.rst
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,33 @@ Features
.. fault:: Sphinx-codelinks halucinates traceability objects in Rust
:id: FAULT_RUST_2

.. feature:: JSONC Language Support
:id: FE_JSONC

Support for defining traceability objects in JSON with Comments (JSONC) files.

The JSONC parser leverages tree-sitter to identify and extract single-line (``//``)
and multi-line (``/* */``) comments from JSON data, associating each marker with the
surrounding data structure such as the key/value pair, array item, or object it
annotates.

``.jsonc`` files are always parsed as JSONC. A ``.json`` file is only treated as JSONC
when it opens with a comment (e.g. the mode line ``// -*- mode: jsonc -*-``), following
the `JSONC filename convention <https://jsonc.org/#filename-extension>`_.

Key capabilities:

* Detection of inline and leading comments
* Association of comments with key/value pairs and array items
* Support for both ``//`` and ``/* */`` comment styles
* Opt-in handling of ``.json`` files via a leading comment

.. fault:: Traceability objects are not detected in JSONC
:id: FAULT_JSONC_1

.. fault:: Sphinx-codelinks hallucinates traceability objects in JSONC
:id: FAULT_JSONC_2

.. feature:: Customized comment styles
:id: FE_CMT

Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ dependencies = [
"tree-sitter-c-sharp>=0.23.1",
"tree-sitter-yaml>=0.7.1",
"tree-sitter-rust>=0.23.0",
"tree-sitter-json>=0.24.8",
]

[build-system]
Expand Down
64 changes: 60 additions & 4 deletions src/sphinx_codelinks/analyse/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
"trait_item",
"mod_item",
},
# @JSONC Scope Node Types, IMPL_JSONC_2, impl, [FE_JSONC]
CommentType.jsonc: {"pair", "object", "array", "document"},
}

# initialize logger
Expand Down Expand Up @@ -60,6 +62,19 @@
(line_comment) @comment
(block_comment) @comment
"""
JSONC_QUERY = """(comment) @comment"""

# JSON value node types that can be associated with a comment.
JSON_STRUCTURE_TYPES = {
"pair",
"object",
"array",
"string",
"number",
"true",
"false",
"null",
}


def is_text_file(filepath: Path, sample_size: int = 2048) -> bool:
Expand All @@ -77,7 +92,7 @@ def is_text_file(filepath: Path, sample_size: int = 2048) -> bool:
return False


# @Tree-sitter parser initialization for multiple languages, IMPL_LANG_1, impl, [FE_C_SUPPORT, FE_CPP, FE_PY, FE_YAML, FE_RUST]
# @Tree-sitter parser initialization for multiple languages, IMPL_LANG_1, impl, [FE_C_SUPPORT, FE_CPP, FE_PY, FE_YAML, FE_RUST, FE_JSONC]
def init_tree_sitter(comment_type: CommentType) -> tuple[Parser, Query]:
if comment_type == CommentType.cpp:
import tree_sitter_cpp # noqa: PLC0415
Expand All @@ -104,6 +119,11 @@ def init_tree_sitter(comment_type: CommentType) -> tuple[Parser, Query]:

parsed_language = Language(tree_sitter_rust.language())
query = Query(parsed_language, RUST_QUERY)
elif comment_type == CommentType.jsonc:
import tree_sitter_json # noqa: PLC0415

parsed_language = Language(tree_sitter_json.language())
query = Query(parsed_language, JSONC_QUERY)
else:
raise ValueError(f"Unsupported comment style: {comment_type}")
parser = Parser(parsed_language)
Expand Down Expand Up @@ -203,8 +223,11 @@ def find_yaml_next_structure(node: TreeSitterNode) -> TreeSitterNode | None:
return None


def find_yaml_prev_sibling_on_same_row(node: TreeSitterNode) -> TreeSitterNode | None:
"""Find a previous named sibling that is on the same row as the comment."""
def find_prev_sibling_on_same_row(node: TreeSitterNode) -> TreeSitterNode | None:
"""Find a previous named sibling that is on the same row as the comment.

Grammar-agnostic: used to detect inline comments in both YAML and JSONC.
"""
comment_row = node.start_point.row
current = node.prev_named_sibling

Expand All @@ -225,7 +248,7 @@ def find_yaml_prev_sibling_on_same_row(node: TreeSitterNode) -> TreeSitterNode |
def find_yaml_associated_structure(node: TreeSitterNode) -> TreeSitterNode | None:
"""Find the YAML structure (key-value pair, list item, etc.) associated with a comment."""
# First, check if this is an inline comment by looking for a previous sibling on the same row
prev_sibling_same_row = find_yaml_prev_sibling_on_same_row(node)
prev_sibling_same_row = find_prev_sibling_on_same_row(node)
if prev_sibling_same_row:
return prev_sibling_same_row

Expand All @@ -244,6 +267,35 @@ def find_yaml_associated_structure(node: TreeSitterNode) -> TreeSitterNode | Non
return None


def find_jsonc_associated_structure(node: TreeSitterNode) -> TreeSitterNode | None:
"""Find the JSON structure (key/value pair, value, list item) for a comment.

JSON is data rather than code, so association follows the same intent as YAML:
an inline comment belongs to the value on its row, a leading comment belongs to
the following structure, otherwise it belongs to the enclosing structure.
"""
# Inline comment: a value/pair on the same row, before the comment
prev_sibling_same_row = find_prev_sibling_on_same_row(node)
if prev_sibling_same_row:
return prev_sibling_same_row

# Leading comment: the next structure following the comment
current = node.next_named_sibling
while current:
if current.type in JSON_STRUCTURE_TYPES:
return current
current = current.next_named_sibling

# Otherwise: the enclosing structure
parent = node.parent
while parent:
if parent.type in {"pair", "object", "array"}:
return parent
parent = parent.parent

return None


def find_associated_scope(
node: TreeSitterNode, comment_type: CommentType = CommentType.cpp
) -> TreeSitterNode | None:
Expand All @@ -252,6 +304,10 @@ def find_associated_scope(
# YAML uses different structure association logic
return find_yaml_associated_structure(node)

if comment_type == CommentType.jsonc:
# JSONC uses data-aware structure association logic
return find_jsonc_associated_structure(node)

if node.type == CommentCategory.docstring:
# Only for python's docstring
return find_enclosing_scope(node, comment_type)
Expand Down
3 changes: 3 additions & 0 deletions src/sphinx_codelinks/source_discover/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
"cs": ["cs"],
"yaml": ["yml", "yaml"],
"rust": ["rs"],
"jsonc": ["jsonc", "json"],
}


Expand All @@ -21,6 +22,8 @@ class CommentType(str, Enum):
yaml = "yaml"
# @Support Rust style comments, IMPL_RUST_1, impl, [FE_RUST];
rust = "rust"
# @Support JSONC style comments, IMPL_JSONC_1, impl, [FE_JSONC];
jsonc = "jsonc"


class SourceDiscoverSectionConfigType(TypedDict, total=False):
Expand Down
27 changes: 27 additions & 0 deletions src/sphinx_codelinks/source_discover/source_discover.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,28 @@

from sphinx_codelinks.source_discover.config import (
COMMENT_FILETYPE,
CommentType,
SourceDiscoverConfig,
)


def _json_starts_with_comment(filepath: Path, sample_size: int = 256) -> bool:
"""Return True if a ``.json`` file's first non-whitespace content is a comment.

Used to decide whether a ``.json`` file should be treated as JSONC. Per
https://jsonc.org/#filename-extension a ``.json`` file should only be treated as
JSONC when it opens with a comment (e.g. the mode line ``// -*- mode: jsonc -*-``).
"""
try:
with filepath.open("rb") as f:
chunk = f.read(sample_size)
except OSError:
return False
# strip a leading UTF-8 BOM, then leading whitespace
text = chunk.removeprefix(b"\xef\xbb\xbf").lstrip()
return text.startswith((b"//", b"/*"))


# @Source code file discovery with gitignore support, IMPL_DISC_1, impl, [FE_DISCOVERY, FE_CLI_DISCOVER]
class SourceDiscover:
def __init__(self, src_discover_config: SourceDiscoverConfig):
Expand Down Expand Up @@ -75,6 +93,15 @@ def _discover(self) -> list[Path]:
continue
if self.file_types and filepath.suffix.lower() not in self.file_types:
continue
# @JSONC .json files require a leading comment, IMPL_JSONC_3, impl, [FE_JSONC]
# A plain ``.json`` file is only treated as JSONC when it opens with a
# comment; otherwise it is skipped under the ``jsonc`` comment type.
if (
self.src_discover_config.comment_type == CommentType.jsonc
and filepath.suffix.lower() == ".json"
and not _json_starts_with_comment(filepath)
):
continue
# resolve() produces canonical absolute paths; follow_links only
# controls whether the walker descends into symlinked directories
discovered_files.append(filepath.resolve())
Expand Down
15 changes: 15 additions & 0 deletions tests/data/jsonc/demo.jsonc
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
// -*- mode: jsonc -*-
{
// @JSONC alpha implementation, IMPL_JSONC_A, impl, [REQ_JSONC_1]
"alpha": 1,
"items": [
"first", // @JSONC inline item, IMPL_JSONC_B, impl, [REQ_JSONC_2]
"second"
],
/* Block comment with marker
@JSONC beta implementation, IMPL_JSONC_C, impl, [REQ_JSONC_3]
*/
"beta": {
"nested": true
}
}
3 changes: 3 additions & 0 deletions tests/data/jsonc/plain.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"value": 42
}
5 changes: 5 additions & 0 deletions tests/data/jsonc/with_modeline.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
// -*- mode: jsonc -*-
{
// @JSONC modeline file, IMPL_JSONC_D, impl, [REQ_JSONC_4]
"value": 42
}
17 changes: 17 additions & 0 deletions tests/test_analyse.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from sphinx_codelinks.analyse.analyse import SourceAnalyse
from sphinx_codelinks.config import SourceAnalyseConfig
from sphinx_codelinks.source_discover.config import CommentType
from tests.conftest import (
ONELINE_COMMENT_STYLE,
ONELINE_COMMENT_STYLE_DEFAULT,
Expand Down Expand Up @@ -118,6 +119,21 @@ def test_analyse(src_dir, src_paths, tmp_path, snapshot_marks):
"num_oneline_warnings": 0,
},
),
(
TEST_DIR / "data" / "jsonc",
[
TEST_DIR / "data" / "jsonc" / "demo.jsonc",
],
ONELINE_COMMENT_STYLE_DEFAULT,
{
"num_src_files": 1,
"num_uncached_files": 1,
"num_cached_files": 0,
"num_comments": 4,
"num_oneline_warnings": 0,
"comment_type": CommentType.jsonc,
},
),
],
)
def test_analyse_oneline_needs(
Expand All @@ -130,6 +146,7 @@ def test_analyse_oneline_needs(
get_oneline_needs=True,
get_rst=False,
oneline_comment_style=oneline_comment_style,
comment_type=result.get("comment_type", CommentType.cpp),
)
src_analyse = SourceAnalyse(src_analyse_config)
src_analyse.run()
Expand Down
51 changes: 51 additions & 0 deletions tests/test_analyse_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from tree_sitter import Node as TreeSitterNode
import tree_sitter_c_sharp
import tree_sitter_cpp
import tree_sitter_json
import tree_sitter_python
import tree_sitter_rust
import tree_sitter_yaml
Expand Down Expand Up @@ -57,6 +58,14 @@ def init_rust_tree_sitter() -> tuple[Parser, Query]:
return parser, query


@pytest.fixture(scope="session")
def init_jsonc_tree_sitter() -> tuple[Parser, Query]:
parsed_language = Language(tree_sitter_json.language())
query = Query(parsed_language, utils.JSONC_QUERY)
parser = Parser(parsed_language)
return parser, query


@pytest.mark.parametrize(
("code", "result"),
[
Expand Down Expand Up @@ -365,6 +374,48 @@ def test_find_associated_scope_rust(code, result, init_rust_tree_sitter):
assert result in rust_def


@pytest.mark.parametrize(
("code", "result"),
[
# leading comment is associated with the following key/value pair
(
b'{\n // @req-id: need_001\n "alpha": 1\n}\n',
'"alpha": 1',
),
# inline comment is associated with the array item on the same row
(
b'{\n "items": [\n "first", // @req-id: need_001\n "second"\n ]\n}\n',
'"first"',
),
# inline comment is associated with the pair on the same row
(
b'{\n "alpha": 1, // @req-id: need_001\n "beta": 2\n}\n',
'"alpha": 1',
),
# block comment is associated with the following pair
(
b'{\n /* @req-id: need_001 */\n "beta": 2\n}\n',
'"beta": 2',
),
# trailing comment falls back to the enclosing object
(
b'{\n "alpha": 1\n // @req-id: need_001\n}\n',
'"alpha"',
),
],
)
def test_find_associated_scope_jsonc(code, result, init_jsonc_tree_sitter):
parser, query = init_jsonc_tree_sitter
comments = utils.extract_comments(code, parser, query)
node: TreeSitterNode | None = utils.find_associated_scope(
comments[0], CommentType.jsonc
)
assert node
assert node.text
jsonc_structure = node.text.decode("utf-8")
assert result in jsonc_structure


@pytest.mark.parametrize(
("code", "result"),
[
Expand Down
Loading