|
| 1 | +"""Named query packs for justfile corpus analytics.""" |
| 2 | + |
| 3 | +from __future__ import annotations |
| 4 | + |
| 5 | +from pathlib import Path |
| 6 | +from typing import Any |
| 7 | + |
| 8 | +import duckdb |
| 9 | + |
| 10 | +NAMED_QUERIES: dict[str, str] = { |
| 11 | + "top_recipe_names": """ |
| 12 | + SELECT |
| 13 | + recipe_name, |
| 14 | + COUNT(DISTINCT repo_name) AS repo_count, |
| 15 | + COUNT(*) AS occurrence_count |
| 16 | + FROM recipe_occurrences |
| 17 | + WHERE (? IS NULL OR run_id = ?) |
| 18 | + GROUP BY recipe_name |
| 19 | + ORDER BY repo_count DESC, occurrence_count DESC, recipe_name ASC |
| 20 | + LIMIT ? |
| 21 | + """, |
| 22 | + "top_reused_signatures": """ |
| 23 | + SELECT |
| 24 | + recipe_name, |
| 25 | + repo_count, |
| 26 | + occurrence_count, |
| 27 | + signature |
| 28 | + FROM unique_recipes |
| 29 | + WHERE (? IS NULL OR run_id = ?) AND repo_count > 1 |
| 30 | + ORDER BY repo_count DESC, occurrence_count DESC, recipe_name ASC |
| 31 | + LIMIT ? |
| 32 | + """, |
| 33 | + "parse_failures": """ |
| 34 | + SELECT |
| 35 | + repo_name, |
| 36 | + parse_error |
| 37 | + FROM repo_sources |
| 38 | + WHERE (? IS NULL OR run_id = ?) AND parsed_success = FALSE |
| 39 | + ORDER BY repo_name ASC |
| 40 | + LIMIT ? |
| 41 | + """, |
| 42 | + "dependency_hotspots": """ |
| 43 | + SELECT |
| 44 | + dependency_name, |
| 45 | + COUNT(*) AS dependent_recipe_count, |
| 46 | + COUNT(DISTINCT repo_name) AS repo_count |
| 47 | + FROM recipe_dependencies |
| 48 | + WHERE (? IS NULL OR run_id = ?) |
| 49 | + GROUP BY dependency_name |
| 50 | + ORDER BY dependent_recipe_count DESC, repo_count DESC, dependency_name ASC |
| 51 | + LIMIT ? |
| 52 | + """, |
| 53 | +} |
| 54 | + |
| 55 | + |
| 56 | +def list_named_queries() -> list[str]: |
| 57 | + """Return available named query identifiers sorted by name.""" |
| 58 | + return sorted(NAMED_QUERIES) |
| 59 | + |
| 60 | + |
| 61 | +def run_named_query( |
| 62 | + db_path: Path, |
| 63 | + query_name: str, |
| 64 | + *, |
| 65 | + limit: int = 25, |
| 66 | + run_id: str | None = None, |
| 67 | +) -> tuple[list[str], list[tuple[Any, ...]]]: |
| 68 | + """Execute a named query against a corpus database.""" |
| 69 | + if query_name not in NAMED_QUERIES: |
| 70 | + known = ", ".join(list_named_queries()) |
| 71 | + raise ValueError(f"Unknown query `{query_name}`. Known queries: {known}") |
| 72 | + if limit < 1: |
| 73 | + raise ValueError("limit must be >= 1") |
| 74 | + |
| 75 | + con = duckdb.connect(str(db_path)) |
| 76 | + try: |
| 77 | + relation = con.execute(NAMED_QUERIES[query_name], [run_id, run_id, limit]) |
| 78 | + rows = relation.fetchall() |
| 79 | + columns = [item[0] for item in (relation.description or [])] |
| 80 | + finally: |
| 81 | + con.close() |
| 82 | + return columns, rows |
| 83 | + |
0 commit comments