From 99b847f032ce26864222d2f4492e5b04c87c2408 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 13 May 2026 14:34:14 +0000 Subject: [PATCH 1/2] Deduplicate shared language extractor logic Agent-Logs-Url: https://github.com/daedalus/ImpactGuard/sessions/af5e762a-a4a4-4b4c-a52f-5ed7256b49ef Co-authored-by: daedalus <115175+daedalus@users.noreply.github.com> --- src/impactguard/languages/go.py | 17 ++++------------- src/impactguard/languages/java.py | 17 ++++------------- src/impactguard/languages/javascript.py | 17 ++++------------- src/impactguard/languages/kotlin.py | 17 ++++------------- src/impactguard/languages/lib/shared.py | 24 ++++++++++++++++++++++++ src/impactguard/languages/ruby.py | 17 ++++------------- src/impactguard/languages/rust.py | 17 ++++------------- src/impactguard/languages/swift.py | 17 ++++------------- src/impactguard/languages/typescript.py | 17 ++++------------- src/impactguard/languages/zig.py | 17 ++++------------- 10 files changed, 60 insertions(+), 117 deletions(-) diff --git a/src/impactguard/languages/go.py b/src/impactguard/languages/go.py index 5947806..822c791 100644 --- a/src/impactguard/languages/go.py +++ b/src/impactguard/languages/go.py @@ -26,12 +26,14 @@ from .lib.shared import ( _TREE_SITTER_AVAILABLE, child_of_type, + dedupe_signatures_by_fqname, extract_calls_with_tree_sitter, has_ignore_comment, has_ignore_comment_fallback, make_parser, node_text, register_extractor, + split_pipe_union_members, warn_if_no_tree_sitter, ) @@ -353,15 +355,7 @@ def _extract_with_regex( } ) - seen: set[str] = set() - unique: list[dict[str, Any]] = [] - for sig in all_funcs: - if sig["fqname"] not in seen: - seen.add(sig["fqname"]) - unique.append(sig) - - unique.sort(key=lambda x: x["fqname"]) - return unique + return dedupe_signatures_by_fqname(all_funcs) def _extract_calls_with_regex(path: Path) -> list[dict[str, Any]]: @@ -435,10 +429,7 @@ def parse_union_members(self, type_str: str) -> frozenset[str]: Interface unions (``A | B``) introduced in Go 1.18 generics are handled by splitting on ``|``. """ - s = type_str.strip() - if "|" in s: - return frozenset(p.strip() for p in s.split("|")) - return frozenset({s}) + return split_pipe_union_members(type_str) # ── Self-registration ───────────────────────────────── diff --git a/src/impactguard/languages/java.py b/src/impactguard/languages/java.py index f74f249..519ce55 100644 --- a/src/impactguard/languages/java.py +++ b/src/impactguard/languages/java.py @@ -27,6 +27,7 @@ _TREE_SITTER_AVAILABLE, call_re, child_of_type, + dedupe_signatures_by_fqname, extract_calls_with_tree_sitter, has_ignore_comment, has_ignore_comment_fallback, @@ -35,6 +36,7 @@ make_signature_dict, node_text, register_extractor, + split_pipe_union_members, warn_if_no_tree_sitter, ) @@ -355,15 +357,7 @@ def _extract_with_regex( ) ) - seen: set[str] = set() - unique: list[dict[str, Any]] = [] - for sig in all_funcs: - if sig["fqname"] not in seen: - seen.add(sig["fqname"]) - unique.append(sig) - - unique.sort(key=lambda x: x["fqname"]) - return unique + return dedupe_signatures_by_fqname(all_funcs) def _extract_calls_with_regex(path: Path) -> list[dict[str, Any]]: @@ -432,10 +426,7 @@ def parse_union_members(self, type_str: str) -> frozenset[str]: Java does not have union types natively; returns a singleton frozenset unless the type contains ``|`` (as used in multi-catch clauses). """ - s = type_str.strip() - if "|" in s: - return frozenset(p.strip() for p in s.split("|")) - return frozenset({s}) + return split_pipe_union_members(type_str) # ── Self-registration ───────────────────────────────────────────────── diff --git a/src/impactguard/languages/javascript.py b/src/impactguard/languages/javascript.py index 5b7e184..1308697 100644 --- a/src/impactguard/languages/javascript.py +++ b/src/impactguard/languages/javascript.py @@ -27,12 +27,14 @@ from .lib.shared import ( _TREE_SITTER_AVAILABLE, child_of_type, + dedupe_signatures_by_fqname, extract_calls_with_tree_sitter, has_ignore_comment, has_ignore_comment_fallback, make_parser, node_text, register_extractor, + split_pipe_union_members, warn_if_no_tree_sitter, ) @@ -344,15 +346,7 @@ def _extract_with_regex( } ) - seen: set[str] = set() - unique: list[dict[str, Any]] = [] - for sig in all_funcs: - if sig["fqname"] not in seen: - seen.add(sig["fqname"]) - unique.append(sig) - - unique.sort(key=lambda x: x["fqname"]) - return unique + return dedupe_signatures_by_fqname(all_funcs) def _extract_calls_with_regex(path: Path) -> list[dict[str, Any]]: @@ -424,10 +418,7 @@ def parse_union_members(self, type_str: str) -> frozenset[str]: Splits on ``|`` for union types. """ - s = type_str.strip() - if "|" in s: - return frozenset(p.strip() for p in s.split("|")) - return frozenset({s}) + return split_pipe_union_members(type_str) # ── Self-registration ───────────────────────── diff --git a/src/impactguard/languages/kotlin.py b/src/impactguard/languages/kotlin.py index 54f81d5..213962b 100644 --- a/src/impactguard/languages/kotlin.py +++ b/src/impactguard/languages/kotlin.py @@ -27,12 +27,14 @@ from .lib.shared import ( _TREE_SITTER_AVAILABLE, child_of_type, + dedupe_signatures_by_fqname, extract_calls_with_tree_sitter, has_ignore_comment, has_ignore_comment_fallback, make_parser, node_text, register_extractor, + split_pipe_union_members, warn_if_no_tree_sitter, ) @@ -312,15 +314,7 @@ def _extract_with_regex( } ) - seen: set[str] = set() - unique: list[dict[str, Any]] = [] - for sig in all_funcs: - if sig["fqname"] not in seen: - seen.add(sig["fqname"]) - unique.append(sig) - - unique.sort(key=lambda x: x["fqname"]) - return unique + return dedupe_signatures_by_fqname(all_funcs) def _extract_calls_with_regex(path: Path) -> list[dict[str, Any]]: @@ -392,10 +386,7 @@ def parse_union_members(self, type_str: str) -> frozenset[str]: Splits on ``|`` (nullable ``T?`` becomes ``T | null``). """ - s = type_str.strip() - if "|" in s: - return frozenset(p.strip() for p in s.split("|")) - return frozenset({s}) + return split_pipe_union_members(type_str) # ── Self-registration ───────────────────────────────────────── diff --git a/src/impactguard/languages/lib/shared.py b/src/impactguard/languages/lib/shared.py index aab7ac8..e1047b4 100644 --- a/src/impactguard/languages/lib/shared.py +++ b/src/impactguard/languages/lib/shared.py @@ -191,6 +191,30 @@ def make_call_dict( } +def dedupe_signatures_by_fqname( + signatures: list[dict[str, Any]], +) -> list[dict[str, Any]]: + """Deduplicate signature dicts by ``fqname`` and return sorted output.""" + seen: set[str] = set() + unique: list[dict[str, Any]] = [] + for sig in signatures: + fqname = sig.get("fqname") + if isinstance(fqname, str) and fqname not in seen: + seen.add(fqname) + unique.append(sig) + + unique.sort(key=lambda x: x["fqname"]) + return unique + + +def split_pipe_union_members(type_str: str) -> frozenset[str]: + """Split ``A | B`` style unions, or return a singleton member set.""" + s = type_str.strip() + if "|" in s: + return frozenset(p.strip() for p in s.split("|")) + return frozenset({s}) + + def _extract_call_name( node: Any, source: bytes, diff --git a/src/impactguard/languages/ruby.py b/src/impactguard/languages/ruby.py index 5e35b04..f0f867e 100644 --- a/src/impactguard/languages/ruby.py +++ b/src/impactguard/languages/ruby.py @@ -26,12 +26,14 @@ from .lib.shared import ( _TREE_SITTER_AVAILABLE, child_of_type, + dedupe_signatures_by_fqname, extract_calls_with_tree_sitter, has_ignore_comment, has_ignore_comment_fallback, make_parser, node_text, register_extractor, + split_pipe_union_members, warn_if_no_tree_sitter, ) @@ -343,15 +345,7 @@ def _extract_with_regex( } ) - seen: set[str] = set() - unique: list[dict[str, Any]] = [] - for sig in all_funcs: - if sig["fqname"] not in seen: - seen.add(sig["fqname"]) - unique.append(sig) - - unique.sort(key=lambda x: x["fqname"]) - return unique + return dedupe_signatures_by_fqname(all_funcs) def _extract_calls_with_regex(path: Path) -> list[dict[str, Any]]: @@ -425,10 +419,7 @@ def parse_union_members(self, type_str: str) -> frozenset[str]: Type unions written with ``|`` (e.g. in Sorbet/RBS annotations) are split by ``|``. """ - s = type_str.strip() - if "|" in s: - return frozenset(p.strip() for p in s.split("|")) - return frozenset({s}) + return split_pipe_union_members(type_str) # ── Self-registration ───────────────────────────────── diff --git a/src/impactguard/languages/rust.py b/src/impactguard/languages/rust.py index 07d34a7..3f9908a 100644 --- a/src/impactguard/languages/rust.py +++ b/src/impactguard/languages/rust.py @@ -26,12 +26,14 @@ from .lib.shared import ( _TREE_SITTER_AVAILABLE, child_of_type, + dedupe_signatures_by_fqname, extract_calls_with_tree_sitter, has_ignore_comment, has_ignore_comment_fallback, make_parser, node_text, register_extractor, + split_pipe_union_members, warn_if_no_tree_sitter, ) @@ -301,15 +303,7 @@ def _extract_with_regex( } ) - seen: set[str] = set() - unique: list[dict[str, Any]] = [] - for sig in all_funcs: - if sig["fqname"] not in seen: - seen.add(sig["fqname"]) - unique.append(sig) - - unique.sort(key=lambda x: x["fqname"]) - return unique + return dedupe_signatures_by_fqname(all_funcs) def _extract_calls_with_regex(path: Path) -> list[dict[str, Any]]: @@ -383,10 +377,7 @@ def parse_union_members(self, type_str: str) -> frozenset[str]: singleton frozenset. If ``|`` appears (e.g. in pattern matching contexts), each branch is returned as a separate member. """ - s = type_str.strip() - if "|" in s: - return frozenset(p.strip() for p in s.split("|")) - return frozenset({s}) + return split_pipe_union_members(type_str) # ── Self-registration ───────────────────────── diff --git a/src/impactguard/languages/swift.py b/src/impactguard/languages/swift.py index e65a589..b7857aa 100644 --- a/src/impactguard/languages/swift.py +++ b/src/impactguard/languages/swift.py @@ -27,12 +27,14 @@ from .lib.shared import ( _TREE_SITTER_AVAILABLE, child_of_type, + dedupe_signatures_by_fqname, extract_calls_with_tree_sitter, has_ignore_comment, has_ignore_comment_fallback, make_parser, node_text, register_extractor, + split_pipe_union_members, warn_if_no_tree_sitter, ) @@ -340,15 +342,7 @@ def _extract_with_regex( } ) - seen: set[str] = set() - unique: list[dict[str, Any]] = [] - for sig in all_funcs: - if sig["fqname"] not in seen: - seen.add(sig["fqname"]) - unique.append(sig) - - unique.sort(key=lambda x: x["fqname"]) - return unique + return dedupe_signatures_by_fqname(all_funcs) def _extract_calls_with_regex(path: Path) -> list[dict[str, Any]]: @@ -420,10 +414,7 @@ def parse_union_members(self, type_str: str) -> frozenset[str]: Splits on ``|`` for union/enum types. """ - s = type_str.strip() - if "|" in s: - return frozenset(p.strip() for p in s.split("|")) - return frozenset({s}) + return split_pipe_union_members(type_str) # ── Self-registration ───────────────────────────────── diff --git a/src/impactguard/languages/typescript.py b/src/impactguard/languages/typescript.py index 4c7b3b4..8abd001 100644 --- a/src/impactguard/languages/typescript.py +++ b/src/impactguard/languages/typescript.py @@ -27,11 +27,13 @@ from .lib.shared import ( _TREE_SITTER_AVAILABLE, child_of_type, + dedupe_signatures_by_fqname, extract_calls_with_tree_sitter, has_ignore_comment_fallback, make_parser, node_text, register_extractor, + split_pipe_union_members, warn_if_no_tree_sitter, ) @@ -700,15 +702,7 @@ def _extract_with_regex( ) # De-duplicate by fqname (regex patterns may overlap for some forms) - seen: set[str] = set() - unique: list[dict[str, Any]] = [] - for sig in all_funcs: - if sig["fqname"] not in seen: - seen.add(sig["fqname"]) - unique.append(sig) - - unique.sort(key=lambda x: x["fqname"]) - return unique + return dedupe_signatures_by_fqname(all_funcs) def _extract_calls_with_regex(path: Path) -> list[dict[str, Any]]: @@ -781,10 +775,7 @@ def parse_union_members(self, type_str: str) -> frozenset[str]: Handles ``X | Y | null | undefined`` syntax. Each member is returned as-is (whitespace-stripped). """ - s = type_str.strip() - if "|" in s: - return frozenset(p.strip() for p in s.split("|")) - return frozenset({s}) + return split_pipe_union_members(type_str) # ── Self-registration ───────────────────────── diff --git a/src/impactguard/languages/zig.py b/src/impactguard/languages/zig.py index a5307d8..78bb10b 100644 --- a/src/impactguard/languages/zig.py +++ b/src/impactguard/languages/zig.py @@ -26,12 +26,14 @@ from .lib.shared import ( _TREE_SITTER_AVAILABLE, child_of_type, + dedupe_signatures_by_fqname, extract_calls_with_tree_sitter, has_ignore_comment, has_ignore_comment_fallback, make_parser, node_text, register_extractor, + split_pipe_union_members, warn_if_no_tree_sitter, ) @@ -285,15 +287,7 @@ def _extract_with_regex( } ) - seen: set[str] = set() - unique: list[dict[str, Any]] = [] - for sig in all_funcs: - if sig["fqname"] not in seen: - seen.add(sig["fqname"]) - unique.append(sig) - - unique.sort(key=lambda x: x["fqname"]) - return unique + return dedupe_signatures_by_fqname(all_funcs) def _extract_calls_with_regex(path: Path) -> list[dict[str, Any]]: @@ -365,10 +359,7 @@ def parse_union_members(self, type_str: str) -> frozenset[str]: Splits on `` | `` for tagged union types. """ - s = type_str.strip() - if "|" in s: - return frozenset(p.strip() for p in s.split("|")) - return frozenset({s}) + return split_pipe_union_members(type_str) # ── Self-registration ───────────────────────────────── From 5fdbb4c28147ea95cc970059b3c108ee08b6b76a Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 13 May 2026 14:35:50 +0000 Subject: [PATCH 2/2] Align dedupe helper behavior with existing semantics Agent-Logs-Url: https://github.com/daedalus/ImpactGuard/sessions/af5e762a-a4a4-4b4c-a52f-5ed7256b49ef Co-authored-by: daedalus <115175+daedalus@users.noreply.github.com> --- src/impactguard/languages/lib/shared.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/impactguard/languages/lib/shared.py b/src/impactguard/languages/lib/shared.py index e1047b4..cc84899 100644 --- a/src/impactguard/languages/lib/shared.py +++ b/src/impactguard/languages/lib/shared.py @@ -198,8 +198,8 @@ def dedupe_signatures_by_fqname( seen: set[str] = set() unique: list[dict[str, Any]] = [] for sig in signatures: - fqname = sig.get("fqname") - if isinstance(fqname, str) and fqname not in seen: + fqname = sig["fqname"] + if fqname not in seen: seen.add(fqname) unique.append(sig)