Skip to content

Commit 11a0e25

Browse files
Mjboothausoz-agent
andcommitted
feat: add run-aware corpus schema and named queries
Co-Authored-By: Oz <oz-agent@warp.dev>
1 parent b976534 commit 11a0e25

4 files changed

Lines changed: 519 additions & 41 deletions

File tree

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
"""Named query packs for justfile corpus analytics."""
2+
3+
from __future__ import annotations
4+
5+
from pathlib import Path
6+
from typing import Any
7+
8+
import duckdb
9+
10+
NAMED_QUERIES: dict[str, str] = {
11+
"top_recipe_names": """
12+
SELECT
13+
recipe_name,
14+
COUNT(DISTINCT repo_name) AS repo_count,
15+
COUNT(*) AS occurrence_count
16+
FROM recipe_occurrences
17+
WHERE (? IS NULL OR run_id = ?)
18+
GROUP BY recipe_name
19+
ORDER BY repo_count DESC, occurrence_count DESC, recipe_name ASC
20+
LIMIT ?
21+
""",
22+
"top_reused_signatures": """
23+
SELECT
24+
recipe_name,
25+
repo_count,
26+
occurrence_count,
27+
signature
28+
FROM unique_recipes
29+
WHERE (? IS NULL OR run_id = ?) AND repo_count > 1
30+
ORDER BY repo_count DESC, occurrence_count DESC, recipe_name ASC
31+
LIMIT ?
32+
""",
33+
"parse_failures": """
34+
SELECT
35+
repo_name,
36+
parse_error
37+
FROM repo_sources
38+
WHERE (? IS NULL OR run_id = ?) AND parsed_success = FALSE
39+
ORDER BY repo_name ASC
40+
LIMIT ?
41+
""",
42+
"dependency_hotspots": """
43+
SELECT
44+
dependency_name,
45+
COUNT(*) AS dependent_recipe_count,
46+
COUNT(DISTINCT repo_name) AS repo_count
47+
FROM recipe_dependencies
48+
WHERE (? IS NULL OR run_id = ?)
49+
GROUP BY dependency_name
50+
ORDER BY dependent_recipe_count DESC, repo_count DESC, dependency_name ASC
51+
LIMIT ?
52+
""",
53+
}
54+
55+
56+
def list_named_queries() -> list[str]:
57+
"""Return available named query identifiers sorted by name."""
58+
return sorted(NAMED_QUERIES)
59+
60+
61+
def run_named_query(
62+
db_path: Path,
63+
query_name: str,
64+
*,
65+
limit: int = 25,
66+
run_id: str | None = None,
67+
) -> tuple[list[str], list[tuple[Any, ...]]]:
68+
"""Execute a named query against a corpus database."""
69+
if query_name not in NAMED_QUERIES:
70+
known = ", ".join(list_named_queries())
71+
raise ValueError(f"Unknown query `{query_name}`. Known queries: {known}")
72+
if limit < 1:
73+
raise ValueError("limit must be >= 1")
74+
75+
con = duckdb.connect(str(db_path))
76+
try:
77+
relation = con.execute(NAMED_QUERIES[query_name], [run_id, run_id, limit])
78+
rows = relation.fetchall()
79+
columns = [item[0] for item in (relation.description or [])]
80+
finally:
81+
con.close()
82+
return columns, rows
83+

0 commit comments

Comments
 (0)