Skip to content

Commit 175dbd0

Browse files
SonAIengineclaude
andcommitted
fix: Ontology 관계 품질 대폭 개선 — false positive 56% 감소 (149→66)
## 핵심 변경 ### Path hierarchy REQUIRES 강화 - 조부모 관계 제거: /orders/{id}/refund → /orders (X) → /orders/{id} (O) - GET single (단일 리소스)만 parent로 허용 - 가장 가까운 parent에서 stop (transitive 차단) ### CRUD 관계 축소 - COMPLEMENTARY (POST↔PUT) 제거 — 노이즈만 생성 - CONFLICTS_WITH (PUT↔DELETE) 제거 — 실제 데이터 의존성 아님 - PRECEDES: POST→DELETE (같은 리소스)만 유지 - REQUIRES: POST→GET single (같은 리소스)만 유지 - 모든 CRUD 조합 생성 → 실제 데이터 흐름만 생성 ### Name-based detection 정밀화 - {resource}_id 파라미터가 있을 때만 REQUIRES 생성 - 2+ shared token 또는 ID 파라미터 존재 필수 ## 결과 - 관계 수: 149 → 66 (56% 감소) - requestRefund: 4개 REQUIRES → getOrder 1개만 - plan_workflow("process a refund"): getCart→checkout → getOrder→requestRefund (정확!) - 493 tests passed Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent fe136d4 commit 175dbd0

2 files changed

Lines changed: 106 additions & 137 deletions

File tree

graph_tool_call/analyze/dependency.py

Lines changed: 83 additions & 119 deletions
Original file line numberDiff line numberDiff line change
@@ -177,44 +177,62 @@ def _detect_structural(
177177

178178

179179
def _detect_path_hierarchy(tools: list[ToolSchema]) -> list[DetectedRelation]:
180-
"""Nested paths imply REQUIRES (child requires parent)."""
180+
"""Nested paths imply REQUIRES — but only direct parent-child, not grandparent.
181+
182+
/orders/{id}/refund REQUIRES /orders/{id} (direct parent)
183+
/orders/{id}/refund does NOT require /orders (grandparent — too loose)
184+
185+
Additionally, the parent must be a data-providing operation (GET/POST)
186+
to avoid false positives like refund REQUIRES listOrders.
187+
"""
181188
relations: list[DetectedRelation] = []
182-
for i, a in enumerate(tools):
183-
for b in tools[i + 1 :]:
184-
if a.name == b.name:
185-
continue
186-
path_a = _strip_path_params(a.metadata["path"])
187-
path_b = _strip_path_params(b.metadata["path"])
188-
if path_a == path_b:
189-
continue
190-
if path_b.startswith(path_a + "/"):
191-
# b is nested under a → b REQUIRES a
192-
relations.append(
193-
DetectedRelation(
194-
source=b.name,
195-
target=a.name,
196-
relation_type=RelationType.REQUIRES,
197-
confidence=0.95,
198-
evidence=(
199-
f"Path {b.metadata['path']} is nested under {a.metadata['path']}"
200-
),
201-
layer=1,
202-
)
203-
)
204-
elif path_a.startswith(path_b + "/"):
205-
# a is nested under b → a REQUIRES b
189+
190+
# Build (stripped_path, original_path) → tool index
191+
path_tools: dict[str, list[ToolSchema]] = {}
192+
for tool in tools:
193+
stripped = _strip_path_params(tool.metadata["path"])
194+
path_tools.setdefault(stripped, []).append(tool)
195+
196+
for tool in tools:
197+
path = tool.metadata["path"]
198+
# Find the closest parent by walking up the original path segments
199+
# /orders/{orderId}/refund → try /orders/{orderId} first, then /orders
200+
segments = [s for s in path.split("/") if s]
201+
if len(segments) < 2:
202+
continue
203+
204+
# Try progressively shorter paths, stop at first match
205+
found_parent = False
206+
for depth in range(len(segments) - 1, 0, -1):
207+
parent_path_raw = "/" + "/".join(segments[:depth])
208+
parent_stripped = _strip_path_params(parent_path_raw)
209+
parent_tools_list = path_tools.get(parent_stripped, [])
210+
for parent in parent_tools_list:
211+
if parent.name == tool.name:
212+
continue
213+
# Only GET as data provider (not POST/list — too loose)
214+
parent_method = parent.metadata.get("method", "").upper()
215+
if parent_method != "GET":
216+
continue
217+
# Must be a single-resource GET (with {id} param)
218+
if not _is_single_resource_path(parent.metadata["path"]):
219+
continue
206220
relations.append(
207221
DetectedRelation(
208-
source=a.name,
209-
target=b.name,
222+
source=tool.name,
223+
target=parent.name,
210224
relation_type=RelationType.REQUIRES,
211-
confidence=0.95,
225+
confidence=0.9,
212226
evidence=(
213-
f"Path {a.metadata['path']} is nested under {b.metadata['path']}"
227+
f"{tool.name} ({path}) requires data from "
228+
f"{parent.name} ({parent.metadata['path']})"
214229
),
215230
layer=1,
216231
)
217232
)
233+
found_parent = True
234+
if found_parent:
235+
break # stop at closest parent
218236
return relations
219237

220238

@@ -240,39 +258,33 @@ def _detect_crud_patterns(group: list[ToolSchema]) -> list[DetectedRelation]:
240258

241259
updates = puts + patches
242260

243-
# POST → GET/{id}: REQUIRES (creating before retrieving specific)
261+
# --- Focused CRUD relations ---
262+
# Only create relations that represent real data dependencies,
263+
# not every possible CRUD combination.
264+
265+
# POST → GET/{id}: the resource must be created before it can be read
266+
# This is the strongest CRUD dependency.
244267
for post in posts:
245268
for get_s in gets_single:
246269
if post.name == get_s.name:
247270
continue
271+
# Only if they share the same resource path
272+
post_resource = _extract_resource(post.metadata["path"])
273+
get_resource = _extract_resource(get_s.metadata["path"])
274+
if post_resource != get_resource:
275+
continue
248276
relations.append(
249277
DetectedRelation(
250278
source=get_s.name,
251279
target=post.name,
252280
relation_type=RelationType.REQUIRES,
253-
confidence=0.95,
254-
evidence=f"{get_s.name} (GET single) requires {post.name} (POST) to exist",
255-
layer=1,
256-
)
257-
)
258-
259-
# POST → PUT: COMPLEMENTARY
260-
for post in posts:
261-
for upd in updates:
262-
if post.name == upd.name:
263-
continue
264-
relations.append(
265-
DetectedRelation(
266-
source=post.name,
267-
target=upd.name,
268-
relation_type=RelationType.COMPLEMENTARY,
269281
confidence=0.9,
270-
evidence=f"{post.name} (POST) and {upd.name} (PUT/PATCH) are complementary",
282+
evidence=f"{get_s.name} (GET single) requires {post.name} (POST) — same resource '{post_resource}'",
271283
layer=1,
272284
)
273285
)
274286

275-
# GET (single) ↔ GET (list): SIMILAR_TO
287+
# GET (single) ↔ GET (list): SIMILAR_TO (these are alternative views)
276288
for get_c in gets_collection:
277289
for get_s in gets_single:
278290
if get_c.name == get_s.name:
@@ -291,82 +303,24 @@ def _detect_crud_patterns(group: list[ToolSchema]) -> list[DetectedRelation]:
291303
)
292304
)
293305

294-
# PUT ↔ DELETE: CONFLICTS_WITH
295-
for upd in updates:
306+
# POST → DELETE: create before delete (lifecycle endpoints only)
307+
for post in posts:
296308
for dele in deletes:
297-
if upd.name == dele.name:
309+
if post.name == dele.name:
298310
continue
299-
relations.append(
300-
DetectedRelation(
301-
source=upd.name,
302-
target=dele.name,
303-
relation_type=RelationType.CONFLICTS_WITH,
304-
confidence=0.8,
305-
evidence=(
306-
f"{upd.name} (PUT/PATCH) and {dele.name} (DELETE) "
307-
"are conflicting state changes"
308-
),
309-
layer=1,
310-
)
311-
)
312-
313-
# CRUD ordering: POST → GET/PUT/PATCH/DELETE = PRECEDES
314-
# Only create PRECEDES between different CRUD stages (not within same stage)
315-
# POST(create) → GET(read), PUT/PATCH(update), DELETE(delete)
316-
# GET(read) → PUT/PATCH(update) — need to read before updating
317-
# POST is prerequisite for single-resource operations
318-
for post in posts:
319-
for target in gets_single + updates + deletes:
320-
if post.name == target.name:
311+
post_resource = _extract_resource(post.metadata["path"])
312+
del_resource = _extract_resource(dele.metadata["path"])
313+
if post_resource != del_resource:
321314
continue
322315
relations.append(
323316
DetectedRelation(
324317
source=post.name,
325-
target=target.name,
326-
relation_type=RelationType.PRECEDES,
327-
confidence=0.9,
328-
evidence=(
329-
f"{post.name} (POST/create) precedes "
330-
f"{target.name} ({target.metadata['method'].upper()}) — "
331-
"resource must exist first"
332-
),
333-
layer=1,
334-
)
335-
)
336-
337-
# GET(single) → PUT/PATCH/DELETE: read before modify/delete
338-
for get_s in gets_single:
339-
for target in updates + deletes:
340-
if get_s.name == target.name:
341-
continue
342-
relations.append(
343-
DetectedRelation(
344-
source=get_s.name,
345-
target=target.name,
346-
relation_type=RelationType.PRECEDES,
347-
confidence=0.8,
348-
evidence=(
349-
f"{get_s.name} (GET) precedes {target.name} "
350-
f"({target.metadata['method'].upper()}) — read before modify"
351-
),
352-
layer=1,
353-
)
354-
)
355-
356-
# PUT/PATCH → DELETE: update before delete (optional, lower confidence)
357-
for upd in updates:
358-
for dele in deletes:
359-
if upd.name == dele.name:
360-
continue
361-
relations.append(
362-
DetectedRelation(
363-
source=upd.name,
364318
target=dele.name,
365319
relation_type=RelationType.PRECEDES,
366-
confidence=0.7,
320+
confidence=0.85,
367321
evidence=(
368-
f"{upd.name} ({upd.metadata['method'].upper()}) precedes "
369-
f"{dele.name} (DELETE) in CRUD lifecycle"
322+
f"{post.name} (create) precedes {dele.name} (delete) "
323+
f"— same resource '{post_resource}'"
370324
),
371325
layer=1,
372326
)
@@ -506,9 +460,10 @@ def _detect_name_based(tools: list[ToolSchema]) -> list[DetectedRelation]:
506460
param_tokens.add(tok)
507461
tool_param_tokens[tool.name] = param_tokens
508462

509-
# Match: tool A is a creator (POST) and tool B's params reference A's resource
510-
# → tool B depends on tool A (tool B REQUIRES tool A)
511-
# Only POST/creator tools can be dependency targets to avoid noisy relations.
463+
# Match: tool B has a parameter like "{resource}_id" and tool A is
464+
# a creator (POST) for that resource → tool B REQUIRES tool A.
465+
# Filter: require at least 2 shared tokens OR the shared token must
466+
# be a specific resource name (not a generic verb).
512467
creators = {
513468
t.name: tool_tokens[t.name] for t in tools if t.metadata.get("method", "").lower() == "post"
514469
}
@@ -520,7 +475,16 @@ def _detect_name_based(tools: list[ToolSchema]) -> list[DetectedRelation]:
520475
continue
521476
params_b = tool_param_tokens[tool_b.name]
522477
shared = resource_tokens & params_b
523-
if shared:
478+
if not shared:
479+
continue
480+
# Require strong evidence: 2+ shared tokens, or the token
481+
# appears in a parameter ending with "id" (e.g., "orderId")
482+
has_id_param = any(
483+
tok in p.name.lower() for p in tool_b.parameters
484+
for tok in shared
485+
if "id" in p.name.lower()
486+
)
487+
if len(shared) >= 2 or has_id_param:
524488
conf = 0.85 if len(shared) >= 2 else 0.8
525489
relations.append(
526490
DetectedRelation(

tests/test_dependency.py

Lines changed: 23 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -71,13 +71,12 @@ def test_crud_requires():
7171

7272

7373
def test_crud_complementary():
74-
"""POST PUT should produce COMPLEMENTARY."""
74+
"""POST and PUT are no longer marked COMPLEMENTARY (removed to reduce noise)."""
7575
tools = _pet_tools()
7676
relations = detect_dependencies(tools)
7777
rel = _find_relation(relations, "createPet", "updatePet", RelationType.COMPLEMENTARY)
78-
assert rel is not None, "POST and PUT should be COMPLEMENTARY"
79-
assert rel.confidence >= 0.85
80-
assert rel.layer == 1
78+
# COMPLEMENTARY was removed in v0.15.1 to reduce false positives
79+
assert rel is None, "COMPLEMENTARY removed — POST/PUT no longer auto-linked"
8180

8281

8382
def test_crud_similar():
@@ -91,45 +90,51 @@ def test_crud_similar():
9190

9291

9392
def test_crud_conflicts():
94-
"""PUT and DELETE should produce CONFLICTS_WITH."""
93+
"""PUT and DELETE are no longer marked CONFLICTS_WITH (removed to reduce noise)."""
9594
tools = _pet_tools()
9695
relations = detect_dependencies(tools)
9796
rel = _find_relation(relations, "updatePet", "deletePet", RelationType.CONFLICTS_WITH)
98-
assert rel is not None, "PUT and DELETE should CONFLICT"
99-
assert rel.confidence >= 0.75
100-
assert rel.layer == 1
97+
# CONFLICTS_WITH was removed in v0.15.1 to reduce false positives
98+
assert rel is None, "CONFLICTS_WITH removed — PUT/DELETE no longer auto-linked"
10199

102100

103-
def test_crud_precedes():
104-
"""POST → GET ordering should produce PRECEDES."""
101+
def test_crud_requires():
102+
"""GET single should REQUIRE POST (resource must exist to be retrieved)."""
105103
tools = _pet_tools()
106104
relations = detect_dependencies(tools)
107-
rel = _find_relation(relations, "createPet", "getPet", RelationType.PRECEDES)
108-
assert rel is not None, "POST should PRECEDE GET in CRUD lifecycle"
109-
assert rel.relation_type == RelationType.PRECEDES
110-
assert rel.confidence >= 0.8
105+
rel = _find_relation(relations, "getPet", "createPet", RelationType.REQUIRES)
106+
assert rel is not None, "GET single should REQUIRE POST (same resource)"
107+
assert rel.confidence >= 0.85
111108
assert rel.layer == 1
112109

113110

114111
def test_path_hierarchy():
115-
"""Nested paths should produce REQUIRES."""
112+
"""Nested paths should REQUIRE the closest single-resource GET parent."""
116113
tools = [
117114
ToolSchema(
118115
name="listUsers",
119116
description="List users",
120117
metadata={"method": "get", "path": "/users"},
121118
),
119+
ToolSchema(
120+
name="getUser",
121+
description="Get user by ID",
122+
metadata={"method": "get", "path": "/users/{userId}"},
123+
),
122124
ToolSchema(
123125
name="listUserOrders",
124126
description="List orders for a user",
125127
metadata={"method": "get", "path": "/users/{userId}/orders"},
126128
),
127129
]
128130
relations = detect_dependencies(tools)
129-
rel = _find_relation(relations, "listUserOrders", "listUsers", RelationType.REQUIRES)
130-
assert rel is not None, "Nested path tool should REQUIRE parent path tool"
131+
# listUserOrders should require getUser (closest single-resource GET parent)
132+
rel = _find_relation(relations, "listUserOrders", "getUser", RelationType.REQUIRES)
133+
assert rel is not None, "Nested path should REQUIRE closest GET single parent"
131134
assert rel.confidence >= 0.9
132-
assert rel.layer == 1
135+
# Should NOT require listUsers (collection GET is too loose)
136+
rel_list = _find_relation(relations, "listUserOrders", "listUsers", RelationType.REQUIRES)
137+
assert rel_list is None, "Should not REQUIRE collection GET (too loose)"
133138

134139

135140
def test_name_based_detection():

0 commit comments

Comments
 (0)