From 57a6f62856e880e4c9c4fa1b2ecf9541df757e10 Mon Sep 17 00:00:00 2001 From: aarontuor Date: Fri, 12 Jun 2026 10:44:36 -0700 Subject: [PATCH 1/6] feat(skills): external skill catalogs, native discovery, skill-creator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bridge dsagt's skill system into agent-native discovery and add a searchable catalog of installable skills from external GitHub repos. Two tiers: - Catalog — external Agent-Skills repos (default: K-Dense scientific, 140+) cloned + indexed into per-source `skills_catalog__` KB collections. Searchable via search_skills, never loaded into the agent's context. - Installed — a chosen skill copied into /skills/ and mirrored into .claude/skills/ for Claude Code's native discovery. Changes: - commands/skills_catalog.py: shallow-clone cache, recursive SKILL.md discovery, per-source indexing (idempotent re-sync via drop+rebuild), find/install. Known sources: scientific, anthropic, antigravity, composio. - MCP tools: install_skill + catalog-spanning search_skills (registry); add_skill_source / list_skill_sources (knowledge). Added to auto-allow. - agents/base._mirror_skills_to + ClaudeSetup hook: manifest-gated mirror into .claude/skills/ (never clobbers user skills; reaps stale entries; trims >1536-char descriptions in the copy only). - Bundled skill-creator meta-skill (Anthropic template + condensed spec). - CLI: dsagt skills sync/add/list/search. - Config: skills block (sources/populate_native/populate_catalog), backfilled for old configs; setup-kb syncs the default catalog (--no-skill-catalog to skip); reserve .skill_sources; kb_from_config. - dsagt_instructions.md: two-tier guidance (native vs catalog/install). - use_cases/isaac_skills_demo: runnable mock of the isaac_vasp workflow exercising the full flow with tiny mock VASP data. Tests: test_skills_catalog.py + config/server-routing additions (201 passed, 13 skipped); black + ruff clean. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/dsagt/agents/base.py | 81 +++++ src/dsagt/agents/claude.py | 13 + src/dsagt/commands/cli.py | 158 ++++++++++ src/dsagt/commands/knowledge_server.py | 288 ++++++++++++++--- src/dsagt/commands/registry_server.py | 298 ++++++++++++++---- src/dsagt/commands/setup_core_kb.py | 128 ++++++-- src/dsagt/commands/skills_catalog.py | 277 ++++++++++++++++ src/dsagt/dsagt_instructions.md | 2 + src/dsagt/registry.py | 51 ++- src/dsagt/session.py | 190 ++++++++--- src/dsagt/skills/skill-creator/SKILL.md | 65 ++++ .../references/SKILL_template.md | 53 ++++ .../references/agent_skills_spec.md | 48 +++ tests/test_config.py | 26 ++ tests/test_knowledge_server.py | 298 +++++++++++++----- tests/test_registry_server.py | 239 ++++++++++---- tests/test_skills_catalog.py | 196 ++++++++++++ use_cases/isaac_skills_demo/README.md | 159 ++++++++++ .../mock_data/expected_isaac_record.json | 63 ++++ .../mock_data/mock_slab/INCAR | 18 ++ .../mock_data/mock_slab/OUTCAR | 42 +++ .../mock_data/mock_slab/POSCAR | 21 ++ 22 files changed, 2379 insertions(+), 335 deletions(-) create mode 100644 src/dsagt/commands/skills_catalog.py create mode 100644 src/dsagt/skills/skill-creator/SKILL.md create mode 100644 src/dsagt/skills/skill-creator/references/SKILL_template.md create mode 100644 src/dsagt/skills/skill-creator/references/agent_skills_spec.md create mode 100644 tests/test_skills_catalog.py create mode 100644 use_cases/isaac_skills_demo/README.md create mode 100644 use_cases/isaac_skills_demo/mock_data/expected_isaac_record.json create mode 100644 use_cases/isaac_skills_demo/mock_data/mock_slab/INCAR create mode 100644 use_cases/isaac_skills_demo/mock_data/mock_slab/OUTCAR create mode 100644 use_cases/isaac_skills_demo/mock_data/mock_slab/POSCAR diff --git a/src/dsagt/agents/base.py b/src/dsagt/agents/base.py index 2835d80..454ac46 100644 --- a/src/dsagt/agents/base.py +++ b/src/dsagt/agents/base.py @@ -13,6 +13,7 @@ import json import logging import shlex +import shutil import subprocess from abc import ABC, abstractmethod from pathlib import Path @@ -41,6 +42,7 @@ "get_registry", "http_request", "install_dependencies", + "install_skill", "read_file", "reconstruct_pipeline", "run_command", @@ -50,6 +52,7 @@ "search_skills", ], "knowledge": [ + "add_skill_source", "kb_add_vector_db", "kb_append", "kb_dismiss_suggestion", @@ -60,6 +63,7 @@ "kb_list_collections", "kb_remember", "kb_search", + "list_skill_sources", ], } @@ -212,6 +216,83 @@ def _append_or_write(path: Path, content: str, marker: str) -> str | None: return f"Wrote {path}" +#: Claude Code caps a skill's frontmatter description (combined with +#: when_to_use) at this many characters; longer ones are rejected. We +#: truncate the *mirrored* copy only, never the project source. +_NATIVE_DESCRIPTION_CAP = 1536 + +#: Manifest filename inside a native skills dir listing the skill names +#: dsagt placed there, so the mirror can reap its own stale entries on +#: re-run without ever touching user-authored skills. +_SKILL_MANIFEST = ".dsagt-managed.json" + + +def _truncate_native_description(skill_md: Path) -> None: + """If the mirrored SKILL.md's description exceeds the native cap, trim it.""" + import yaml + + text = skill_md.read_text() + if not text.startswith("---"): + return + parts = text.split("---", 2) + if len(parts) < 3: + return + try: + front = yaml.safe_load(parts[1]) or {} + except yaml.YAMLError: + return + desc = front.get("description") + if isinstance(desc, str) and len(desc) > _NATIVE_DESCRIPTION_CAP: + front["description"] = desc[: _NATIVE_DESCRIPTION_CAP - 1].rstrip() + "…" + new_front = yaml.dump(front, default_flow_style=False, sort_keys=False) + skill_md.write_text(f"---\n{new_front}---{parts[2]}") + + +def _mirror_skills_to(target_dir: Path, skill_dirs: list[Path]) -> list[str]: + """Idempotently mirror *skill_dirs* into *target_dir* (e.g. .claude/skills). + + Copies each skill directory (SKILL.md + scripts/ + references/) under + ``target_dir//``. A manifest tracks the names dsagt owns so a + later run reaps skills that were removed upstream **without ever + touching user-authored skills** that dsagt didn't place. ``skill_dirs`` + should list bundled dirs before project dirs so a project skill wins a + name collision (copied last). + """ + actions: list[str] = [] + manifest_path = target_dir / _SKILL_MANIFEST + previously: list[str] = [] + if manifest_path.exists(): + try: + previously = json.loads(manifest_path.read_text()) + except (json.JSONDecodeError, OSError): + previously = [] + + target_dir.mkdir(parents=True, exist_ok=True) + managed: list[str] = [] + for src in skill_dirs: + if not (src / "SKILL.md").exists(): + continue + name = src.name + dest = target_dir / name + if dest.exists(): + shutil.rmtree(dest) + shutil.copytree(src, dest) + _truncate_native_description(dest / "SKILL.md") + if name not in managed: + managed.append(name) + + # Reap skills dsagt placed previously that are gone from the source set. + for stale in set(previously) - set(managed): + stale_dir = target_dir / stale + if stale_dir.is_dir(): + shutil.rmtree(stale_dir, ignore_errors=True) + + manifest_path.write_text(json.dumps(sorted(managed), indent=2) + "\n") + if managed: + actions.append(f"Mirrored {len(managed)} skill(s) into {target_dir}") + return actions + + def _build_mcp_servers_dict(env_block: dict | None) -> dict: """Build the standard ``{"mcpServers": {...}}`` dict for dsagt servers. diff --git a/src/dsagt/agents/claude.py b/src/dsagt/agents/claude.py index f2a4094..87ad480 100644 --- a/src/dsagt/agents/claude.py +++ b/src/dsagt/agents/claude.py @@ -52,6 +52,7 @@ _load_master_instructions, _mcp_env_block, _mcp_server_args, + _mirror_skills_to, _run_simple_script, ) @@ -168,6 +169,18 @@ def write_dynamic( mcp_path.write_text(json.dumps(mcp_config, indent=2) + "\n") actions.append(f"Wrote {mcp_path}") + # Mirror installed (project) + bundled skills into Claude Code's + # native skill dir so it discovers/auto-invokes them without an MCP + # round-trip. Bundled first, project last → project wins collisions. + # A newly-created .claude/skills/ is only picked up on Claude restart, + # which is fine: this runs at init/start, before the agent launches. + if (config.get("skills") or {}).get("populate_native", True): + from dsagt.registry import SkillRegistry + + reg = SkillRegistry(runtime_dir=working_dir, kb=None) + src_dirs = reg._bundled_skill_dirs() + reg._project_skill_dirs() + actions += _mirror_skills_to(working_dir / ".claude" / "skills", src_dirs) + # Configure mlflow autolog claude — writes .claude/settings.json # with the MLflow Stop hook + tracking env vars. Idempotent and # preserves any existing keys in settings.json (mlflow's setup diff --git a/src/dsagt/commands/cli.py b/src/dsagt/commands/cli.py index 40d30d4..f935846 100644 --- a/src/dsagt/commands/cli.py +++ b/src/dsagt/commands/cli.py @@ -382,6 +382,129 @@ def _cmd_setup_kb(args): run_setup_kb(args) +def _cmd_skills(args): + """Manage external skill catalogs and project skill installs.""" + from dsagt.commands.skills_catalog import ( + KNOWN_SOURCES, + install_into_project, + sync_source, + ) + from dsagt.registry import ( + CATALOG_COLLECTION_PREFIX, + SKILLS_COLLECTION, + SkillRegistry, + ) + from dsagt.session import kb_from_config, load_config + + action = getattr(args, "skills_action", None) + if not action: + print( + "Usage: dsagt skills ...", file=sys.stderr + ) + return 1 + + config = load_config(args.project) + pdir = Path(config["project_dir"]) + + if action == "sync": + kb = kb_from_config(config) + try: + sources = ( + [args.source] + if args.source + else config.get("skills", {}).get("sources", []) + ) + if not sources: + print("No skill sources configured.") + return 0 + for src in sources: + stats = sync_source(src, kb=kb, force=args.force) + print( + f" {stats['url']}: {stats['indexed']} skill(s) indexed (slug {stats['slug']})" + ) + finally: + kb.close() + return 0 + + if action == "add": + target = args.target + is_source = ( + target in KNOWN_SOURCES + or target.startswith(("http://", "https://", "git@")) + or target.count("/") == 1 + ) + if is_source: + kb = kb_from_config(config) + try: + stats = sync_source(target, kb=kb) + finally: + kb.close() + print(f"Added source {stats['url']}: {stats['indexed']} skill(s) indexed.") + print( + "Run 'dsagt start' to mirror an installed skill natively, or " + f"'dsagt skills add {args.project} ' to install one." + ) + else: + info = install_into_project(target, pdir) + print( + f"{info['action'].capitalize()} skill '{info['name']}' at {info['dest_dir']}." + ) + print("It becomes natively discoverable on the next 'dsagt start'.") + return 0 + + if action == "list": + if args.catalog: + kb = kb_from_config(config) + try: + cats = [ + c for c in kb.collections if c.startswith(CATALOG_COLLECTION_PREFIX) + ] + finally: + kb.close() + print( + "Catalog collections:" + if cats + else "No catalog synced. Run 'dsagt skills sync'." + ) + for c in sorted(cats): + print(f" {c}") + else: + reg = SkillRegistry(runtime_dir=pdir, kb=None) + skills = reg.list_skills() + print(f"Installed/bundled skills ({len(skills)}):") + for s in skills: + print(f" {s.get('name')} — {(s.get('description') or '')[:80]}") + return 0 + + if action == "search": + kb = kb_from_config(config) + try: + collections = [SKILLS_COLLECTION] + [ + c for c in kb.collections if c.startswith(CATALOG_COLLECTION_PREFIX) + ] + hits = [] + for coll in collections: + try: + hits.extend(kb.search(query=args.query, collection=coll, top_k=10)) + except (FileNotFoundError, KeyError, ValueError): + continue + hits.sort(key=lambda r: r.get("score", 0), reverse=True) + for r in hits[:10]: + meta = r.get("chunk", {}).get("metadata", {}) + print( + f" {meta.get('skill_name', '?')} ({r.get('score', 0):.2f}) " + f"[{meta.get('source', '')}]" + ) + if not hits: + print("No skills found.") + finally: + kb.close() + return 0 + + print(f"Unknown skills action: {action}", file=sys.stderr) + return 1 + + def _cmd_mlflow(args): """Run MLflow in the foreground. @@ -1032,6 +1155,40 @@ def main(argv=None): add_setup_kb_args(p_setup_kb) + p_skills = sub.add_parser( + "skills", help="Manage external skill catalogs and project installs" + ) + skills_sub = p_skills.add_subparsers(dest="skills_action") + sp_sync = skills_sub.add_parser( + "sync", help="Clone + index skill source(s) into the catalog" + ) + sp_sync.add_argument("project", help="Project name") + sp_sync.add_argument( + "--source", help="Known source name or GitHub URL (default: all configured)" + ) + sp_sync.add_argument( + "--force", action="store_true", help="Re-clone sources from scratch" + ) + sp_add = skills_sub.add_parser( + "add", help="Install a catalog skill, or add+sync a new source" + ) + sp_add.add_argument("project", help="Project name") + sp_add.add_argument( + "target", help="Skill name to install, or source name/URL to add" + ) + sp_list = skills_sub.add_parser( + "list", help="List installed skills (or --catalog collections)" + ) + sp_list.add_argument("project", help="Project name") + sp_list.add_argument( + "--catalog", action="store_true", help="List synced catalog collections" + ) + sp_search = skills_sub.add_parser( + "search", help="Search installed + catalog skills" + ) + sp_search.add_argument("project", help="Project name") + sp_search.add_argument("query", help="Search query") + sub.add_parser("list", help="List all registered projects and their status") p_mv = sub.add_parser("mv", help="Move a project to a new location") @@ -1077,6 +1234,7 @@ def main(argv=None): "stop": _cmd_stop, "smoke-test": _cmd_smoke_test, "setup-kb": _cmd_setup_kb, + "skills": _cmd_skills, "list": _cmd_list, "mv": _cmd_mv, "rm": _cmd_rm, diff --git a/src/dsagt/commands/knowledge_server.py b/src/dsagt/commands/knowledge_server.py index dd970cd..a06716c 100644 --- a/src/dsagt/commands/knowledge_server.py +++ b/src/dsagt/commands/knowledge_server.py @@ -38,10 +38,19 @@ from mcp.server.lowlevel import Server, NotificationOptions from mcp.server.models import InitializationOptions -from dsagt.knowledge import EMBEDDER_REGISTRY, VECTORINDEX_REGISTRY, CollectionRoute, KnowledgeBase +from dsagt.knowledge import ( + EMBEDDER_REGISTRY, + VECTORINDEX_REGISTRY, + CollectionRoute, + KnowledgeBase, +) from dsagt.memory import SuggestionQueue from dsagt.memory import ExplicitMemory -from dsagt.session import REGISTRY_DIR, _collection_exists, setup_runtime_kb # noqa: F401 +from dsagt.session import ( + REGISTRY_DIR, + _collection_exists, + setup_runtime_kb, +) # noqa: F401 logger = logging.getLogger(__name__) @@ -50,6 +59,7 @@ # MCP server helpers # --------------------------------------------------------------------------- + async def _run_stdio(server: Server, name: str) -> None: async with mcp.server.stdio.stdio_server() as (read_stream, write_stream): await server.run( @@ -66,8 +76,6 @@ async def _run_stdio(server: Server, name: str) -> None: ) - - # _collection_exists and setup_runtime_kb live in dsagt.session (imported above). @@ -120,12 +128,11 @@ def _register_external_collection( kb.register_route(collection_name, route) - - # --------------------------------------------------------------------------- # Background job tracker # --------------------------------------------------------------------------- + @dataclass class _JobTracker: """Tracks background ingest/append jobs and their completion state.""" @@ -157,6 +164,7 @@ async def _run(): tracker.jobs[job_id]["message"] = "Done." except Exception as e: import traceback + tb = traceback.format_exc() tracker.jobs[job_id]["status"] = "error" tracker.jobs[job_id]["error"] = f"{type(e).__name__}: {e}" @@ -179,13 +187,16 @@ async def _run(): # return a result dict; the outer call_tool wrapper JSON-serializes it. # --------------------------------------------------------------------------- + async def _handle_kb_list_collections(arguments: dict, *, kb: KnowledgeBase) -> dict: collections = await asyncio.to_thread(kb.list_collections) return {"status": "ok", "collections": collections, "count": len(collections)} async def _handle_kb_search( - arguments: dict, *, kb: KnowledgeBase, + arguments: dict, + *, + kb: KnowledgeBase, ) -> dict: query = arguments["query"] top_k = arguments.get("top_k", 5) @@ -217,7 +228,9 @@ async def _handle_kb_search( for coll_name in target_collections: try: - search_kwargs = dict(query=query, collection=coll_name, top_k=top_k, rerank=rerank) + search_kwargs = dict( + query=query, collection=coll_name, top_k=top_k, rerank=rerank + ) if where: search_kwargs["where"] = where coll_results = await asyncio.to_thread(kb.search, **search_kwargs) @@ -229,7 +242,10 @@ async def _handle_kb_search( if search_errors and not all_results: if len(target_collections) == 1: return {"status": "error", "error": search_errors[0]} - return {"status": "error", "error": f"All collections failed: {'; '.join(search_errors)}"} + return { + "status": "error", + "error": f"All collections failed: {'; '.join(search_errors)}", + } score_key = "rerank_score" if rerank else "score" all_results.sort(key=lambda r: r.get(score_key, r["score"]), reverse=True) @@ -248,8 +264,10 @@ async def _handle_kb_search( "source_file": r["chunk"]["metadata"].get("source_file", ""), "chunk_index": r["chunk"]["metadata"].get("chunk_index", 0), "metadata": { - k: v for k, v in r["chunk"]["metadata"].items() - if k not in ("source_file", "chunk_index", "collection", "file_type") + k: v + for k, v in r["chunk"]["metadata"].items() + if k + not in ("source_file", "chunk_index", "collection", "file_type") }, } for r in all_results @@ -261,7 +279,10 @@ async def _handle_kb_search( async def _handle_kb_ingest( - arguments: dict, *, kb: KnowledgeBase, job_tracker: _JobTracker, + arguments: dict, + *, + kb: KnowledgeBase, + job_tracker: _JobTracker, ) -> dict: folder_path = Path(arguments["folder_path"]) collection_name = arguments.get("collection_name") @@ -289,7 +310,9 @@ async def _handle_kb_ingest( if _collection_exists(kb.index_dir / target_name): source_path = kb.index_dir / target_name / "source.txt" - existing_source = source_path.read_text().strip() if source_path.exists() else None + existing_source = ( + source_path.read_text().strip() if source_path.exists() else None + ) same_source = ( existing_source is None or Path(existing_source).resolve() == folder_path.resolve() @@ -326,8 +349,13 @@ async def _handle_kb_ingest( async def _ingest_with_logging(): import traceback as _tb - logger.info("Ingest starting: collection=%s folder=%s kwargs=%s", - target_name, folder_path, ingest_kwargs) + + logger.info( + "Ingest starting: collection=%s folder=%s kwargs=%s", + target_name, + folder_path, + ingest_kwargs, + ) try: result = await asyncio.to_thread(kb.ingest, folder_path, **ingest_kwargs) logger.info("Ingest complete: %s", result) @@ -354,7 +382,10 @@ async def _ingest_with_logging(): async def _handle_kb_append( - arguments: dict, *, kb: KnowledgeBase, job_tracker: _JobTracker, + arguments: dict, + *, + kb: KnowledgeBase, + job_tracker: _JobTracker, ) -> dict: collection = arguments["collection"] paths = arguments["paths"] @@ -399,8 +430,12 @@ async def _handle_kb_add_vector_db(arguments: dict, *, kb: KnowledgeBase) -> dic await asyncio.to_thread( _register_external_collection, - kb, collection_name, vector_db, - connection_params, embedding_model, description, + kb, + collection_name, + vector_db, + connection_params, + embedding_model, + description, ) return { "status": "ok", @@ -472,11 +507,13 @@ async def _handle_kb_remember( kb.add_entries, texts=[text], collection="session_memory", - metadatas=[{ - "source_type": "explicit_memory", - "category": category, - "session_id": session_id, - }], + metadatas=[ + { + "source_type": "explicit_memory", + "category": category, + "session_id": session_id, + } + ], ) if promoted_from: @@ -492,7 +529,10 @@ async def _handle_kb_remember( async def _handle_kb_get_memories( - arguments: dict, *, memory: ExplicitMemory, suggestions: SuggestionQueue, + arguments: dict, + *, + memory: ExplicitMemory, + suggestions: SuggestionQueue, ) -> dict: entries = await asyncio.to_thread(memory.get_all) pending = suggestions.get_all() @@ -504,14 +544,18 @@ async def _handle_kb_get_memories( async def _handle_kb_get_suggestions( - arguments: dict, *, suggestions: SuggestionQueue, + arguments: dict, + *, + suggestions: SuggestionQueue, ) -> dict: pending = suggestions.get_all() return {"status": "ok", "count": len(pending), "suggestions": pending} async def _handle_kb_dismiss_suggestion( - arguments: dict, *, suggestions: SuggestionQueue, + arguments: dict, + *, + suggestions: SuggestionQueue, ) -> dict: suggestion_id = arguments["suggestion_id"] dismissed = suggestions.dismiss(suggestion_id) @@ -524,6 +568,74 @@ async def _handle_kb_dismiss_suggestion( # Server factory (thin wiring — used by main() and tests) # --------------------------------------------------------------------------- + +def _persist_skill_source(runtime_dir: Path, spec: dict) -> None: + """Append a resolved source to ``skills.sources`` in the project config. + + Dedupes by URL. No-op if the config file is missing (e.g. tests with a + bare runtime dir) — the catalog is still indexed either way. + """ + cfg_path = runtime_dir / "dsagt_config.yaml" + if not cfg_path.exists(): + return + cfg = yaml.safe_load(cfg_path.read_text()) or {} + skills = cfg.setdefault("skills", {}) + sources = skills.setdefault("sources", []) + if not any(s.get("url") == spec.get("url") for s in sources): + sources.append( + {k: spec[k] for k in ("name", "url", "branch", "subdir") if k in spec} + ) + cfg_path.write_text(yaml.dump(cfg, default_flow_style=False, sort_keys=False)) + + +async def _handle_add_skill_source( + arguments: dict, + *, + kb: KnowledgeBase, + runtime_dir: Path, +) -> dict: + """Enable a skill source (known name or GitHub URL): clone + index the catalog.""" + from dsagt.commands.skills_catalog import KNOWN_SOURCES, resolve_source, sync_source + + source = arguments.get("source") + if not source: + return { + "error": "add_skill_source requires 'source' (known name or GitHub URL)." + } + try: + spec = resolve_source(source) + if isinstance(source, str) and source in KNOWN_SOURCES: + spec.setdefault("name", source) + stats = await asyncio.to_thread(sync_source, source, kb=kb) + except (ValueError, RuntimeError) as e: + return {"error": str(e)} + _persist_skill_source( + runtime_dir, {"name": spec.get("name", stats["slug"]), **spec} + ) + return { + "source": spec["url"], + "slug": stats["slug"], + "skills_indexed": stats["indexed"], + "note": "Searchable via search_skills; install one with install_skill.", + } + + +async def _handle_list_skill_sources(arguments: dict, *, kb: KnowledgeBase) -> dict: + """List known + synced skill sources and their indexed counts.""" + from dsagt.commands.skills_catalog import KNOWN_SOURCES + from dsagt.registry import CATALOG_COLLECTION_PREFIX + + synced = {c for c in kb.collections if c.startswith(CATALOG_COLLECTION_PREFIX)} + return { + "known_sources": { + name: {"url": s["url"], "description": s.get("description", "")} + for name, s in KNOWN_SOURCES.items() + }, + "synced_collections": sorted(synced), + "note": "add_skill_source to enable; search_skills to browse.", + } + + def create_knowledge_server( kb: KnowledgeBase, runtime_dir: str | Path | None = None, @@ -545,21 +657,57 @@ def create_knowledge_server( job_tracker = _JobTracker() handlers = { + "add_skill_source": partial( + _handle_add_skill_source, kb=kb, runtime_dir=mem_dir + ), + "list_skill_sources": partial(_handle_list_skill_sources, kb=kb), "kb_list_collections": partial(_handle_kb_list_collections, kb=kb), "kb_search": partial(_handle_kb_search, kb=kb), "kb_ingest": partial(_handle_kb_ingest, kb=kb, job_tracker=job_tracker), "kb_append": partial(_handle_kb_append, kb=kb, job_tracker=job_tracker), "kb_add_vector_db": partial(_handle_kb_add_vector_db, kb=kb), "kb_job_status": partial(_handle_kb_job_status, job_tracker=job_tracker), - "kb_remember": partial(_handle_kb_remember, kb=kb, memory=memory, suggestions=suggestions), - "kb_get_memories": partial(_handle_kb_get_memories, memory=memory, suggestions=suggestions), - "kb_get_suggestions": partial(_handle_kb_get_suggestions, suggestions=suggestions), - "kb_dismiss_suggestion": partial(_handle_kb_dismiss_suggestion, suggestions=suggestions), + "kb_remember": partial( + _handle_kb_remember, kb=kb, memory=memory, suggestions=suggestions + ), + "kb_get_memories": partial( + _handle_kb_get_memories, memory=memory, suggestions=suggestions + ), + "kb_get_suggestions": partial( + _handle_kb_get_suggestions, suggestions=suggestions + ), + "kb_dismiss_suggestion": partial( + _handle_kb_dismiss_suggestion, suggestions=suggestions + ), } @server.list_tools() async def list_tools() -> list[types.Tool]: return [ + types.Tool( + name="add_skill_source", + description=( + "Enable an external agent-skill source (a known name like " + "'scientific'/'anthropic'/'antigravity'/'composio', or a GitHub URL). " + "Clones it and indexes its skills into the searchable catalog " + "(search_skills). Does NOT load them into context." + ), + inputSchema={ + "type": "object", + "properties": { + "source": { + "type": "string", + "description": "Known source name or GitHub repo URL / owner/repo", + }, + }, + "required": ["source"], + }, + ), + types.Tool( + name="list_skill_sources", + description="List known + synced external skill sources and their indexed catalogs.", + inputSchema={"type": "object", "properties": {}}, + ), types.Tool( name="kb_list_collections", description=( @@ -706,13 +854,34 @@ async def list_tools() -> list[types.Tool]: inputSchema={ "type": "object", "properties": { - "collection_name": {"type": "string", "description": "Unique name for this collection"}, - "vector_db": {"type": "string", "enum": ["chroma", "lancedb", "qdrant"], "description": "Vector store backend type"}, - "connection_params": {"type": "object", "description": "Backend-specific connection parameters."}, - "embedding_model": {"type": "string", "description": "The API model used to build this index"}, - "description": {"type": "string", "description": "Human-readable description for agent discovery"}, + "collection_name": { + "type": "string", + "description": "Unique name for this collection", + }, + "vector_db": { + "type": "string", + "enum": ["chroma", "lancedb", "qdrant"], + "description": "Vector store backend type", + }, + "connection_params": { + "type": "object", + "description": "Backend-specific connection parameters.", + }, + "embedding_model": { + "type": "string", + "description": "The API model used to build this index", + }, + "description": { + "type": "string", + "description": "Human-readable description for agent discovery", + }, }, - "required": ["collection_name", "vector_db", "connection_params", "embedding_model"], + "required": [ + "collection_name", + "vector_db", + "connection_params", + "embedding_model", + ], }, ), types.Tool( @@ -721,7 +890,10 @@ async def list_tools() -> list[types.Tool]: inputSchema={ "type": "object", "properties": { - "job_id": {"type": "string", "description": "Job ID returned by kb_ingest or kb_append"}, + "job_id": { + "type": "string", + "description": "Job ID returned by kb_ingest or kb_append", + }, }, "required": ["job_id"], }, @@ -735,11 +907,26 @@ async def list_tools() -> list[types.Tool]: inputSchema={ "type": "object", "properties": { - "text": {"type": "string", "description": "The fact to remember"}, - "category": {"type": "string", "description": "Classification tag"}, - "session_id": {"type": "string", "description": "Current session identifier"}, - "supersedes": {"type": "string", "description": "entry_id of an existing memory this replaces"}, - "promoted_from": {"type": "string", "description": "suggestion_id if promoted from outlier suggestion"}, + "text": { + "type": "string", + "description": "The fact to remember", + }, + "category": { + "type": "string", + "description": "Classification tag", + }, + "session_id": { + "type": "string", + "description": "Current session identifier", + }, + "supersedes": { + "type": "string", + "description": "entry_id of an existing memory this replaces", + }, + "promoted_from": { + "type": "string", + "description": "suggestion_id if promoted from outlier suggestion", + }, }, "required": ["text"], }, @@ -766,7 +953,10 @@ async def list_tools() -> list[types.Tool]: inputSchema={ "type": "object", "properties": { - "suggestion_id": {"type": "string", "description": "ID of the suggestion to dismiss"}, + "suggestion_id": { + "type": "string", + "description": "ID of the suggestion to dismiss", + }, }, "required": ["suggestion_id"], }, @@ -783,7 +973,9 @@ async def call_tool(name: str, arguments: dict) -> list[types.TextContent]: except Exception as e: logger.exception("Unexpected error in tool '%s'", name) result = {"status": "error", "error": f"Unexpected error: {e}"} - return [types.TextContent(type="text", text=json.dumps(result, ensure_ascii=False))] + return [ + types.TextContent(type="text", text=json.dumps(result, ensure_ascii=False)) + ] return server @@ -792,6 +984,7 @@ async def call_tool(name: str, arguments: dict) -> list[types.TextContent]: # Entry point # --------------------------------------------------------------------------- + def main(): """Entry point for dsagt-knowledge-server. @@ -805,6 +998,7 @@ def main(): so cwd is project_dir for the MCP children it spawns. """ from dsagt.observability import find_project_config + project_dir, _ = find_project_config() if project_dir is None: raise RuntimeError( @@ -835,6 +1029,7 @@ def main(): # the config is broken and the server fails fast. config_path = project_dir / "dsagt_config.yaml" from dsagt.session import resolve_env_vars + config = resolve_env_vars(yaml.safe_load(config_path.read_text())) kb_config = config["knowledge"] @@ -894,7 +1089,10 @@ def main(): embedder_kwargs.update({"base_url": base_url, "api_key": api_key}) from dsagt.observability import init_tracing, configure_litellm_retries - init_tracing("dsagt-knowledge-server") # session_id picked up from DSAGT_SESSION_ID env + + init_tracing( + "dsagt-knowledge-server" + ) # session_id picked up from DSAGT_SESSION_ID env configure_litellm_retries() runtime_kb_dir = setup_runtime_kb(REGISTRY_DIR / "kb_index", project_dir) diff --git a/src/dsagt/commands/registry_server.py b/src/dsagt/commands/registry_server.py index 4202811..3d7fd80 100644 --- a/src/dsagt/commands/registry_server.py +++ b/src/dsagt/commands/registry_server.py @@ -40,10 +40,12 @@ ) from dsagt.provenance import reconstruct_pipeline from dsagt.registry import ( + CATALOG_COLLECTION_PREFIX, SKILLS_COLLECTION, TOOLS_COLLECTION, SkillRegistry, ToolRegistry, + _parse_frontmatter, ) os.environ["PYTHONUNBUFFERED"] = "1" @@ -55,12 +57,15 @@ # MCP server helpers # --------------------------------------------------------------------------- + async def _run_stdio(server: Server, name: str): async with mcp.server.stdio.stdio_server() as (read_stream, write_stream): await server.run( - read_stream, write_stream, + read_stream, + write_stream, InitializationOptions( - server_name=name, server_version="0.1.0", + server_name=name, + server_version="0.1.0", capabilities=server.get_capabilities( notification_options=NotificationOptions(), experimental_capabilities={}, @@ -85,19 +90,27 @@ def _install_dependencies(packages: list[str], timeout: int = 120) -> str: except subprocess.TimeoutExpired: return f"Installation timed out after {timeout}s for: {', '.join(packages)}" except FileNotFoundError: - return "Error: 'uv' command not found. Install uv: https://github.com/astral-sh/uv" + return ( + "Error: 'uv' command not found. Install uv: https://github.com/astral-sh/uv" + ) # --------------------------------------------------------------------------- # Per-tool handlers (module-level, explicit dependencies) # --------------------------------------------------------------------------- + async def _handle_read_file(arguments: dict) -> str: path = Path(arguments["path"]) try: return path.read_text() - except (FileNotFoundError, PermissionError, IsADirectoryError, - OSError, UnicodeDecodeError) as e: + except ( + FileNotFoundError, + PermissionError, + IsADirectoryError, + OSError, + UnicodeDecodeError, + ) as e: return f"Error reading file: {e}" @@ -108,7 +121,10 @@ async def _handle_http_request(arguments: dict) -> str: try: async with httpx.AsyncClient(follow_redirects=True) as client: response = await client.request( - method=method, url=url, headers=headers, timeout=30.0, + method=method, + url=url, + headers=headers, + timeout=30.0, ) return f"Status: {response.status_code}\n\n{response.text}" except (httpx.HTTPError, httpx.InvalidURL) as e: @@ -122,7 +138,9 @@ async def _handle_run_command(arguments: dict) -> str: try: result = subprocess.run( [command] + args, - capture_output=True, text=True, timeout=timeout, + capture_output=True, + text=True, + timeout=timeout, ) except subprocess.TimeoutExpired: return f"Command timed out after {timeout} seconds" @@ -139,7 +157,9 @@ async def _handle_run_command(arguments: dict) -> str: async def _handle_save_tool_spec( - arguments: dict, *, registry: ToolRegistry, + arguments: dict, + *, + registry: ToolRegistry, ) -> str: spec = arguments["spec"] # Some MCP clients (notably Claude Sonnet/Haiku 4.x) serialize nested @@ -180,7 +200,9 @@ async def _handle_save_tool_spec( async def _handle_save_skill( - arguments: dict, *, skill_registry: SkillRegistry, + arguments: dict, + *, + skill_registry: SkillRegistry, ) -> str: """Register a skill (workflow / agent instructions) for later reuse. @@ -202,7 +224,9 @@ async def _handle_save_skill( except json.JSONDecodeError as e: return f"Error: reference_files must be a JSON object: {e}" try: - action = skill_registry.save_skill(spec, body=body, reference_files=reference_files) + action = skill_registry.save_skill( + spec, body=body, reference_files=reference_files + ) except (KeyError, ValueError, OSError) as e: return f"Error saving skill: {e}" skill_count = len(skill_registry.list_skills()) @@ -213,7 +237,9 @@ async def _handle_save_skill( async def _handle_get_registry( - arguments: dict, *, registry: ToolRegistry, + arguments: dict, + *, + registry: ToolRegistry, ) -> str: tools = registry.list_tools_raw() if not tools: @@ -222,7 +248,10 @@ async def _handle_get_registry( async def _handle_search_registry( - arguments: dict, *, registry: ToolRegistry, kb: KnowledgeBase | None, + arguments: dict, + *, + registry: ToolRegistry, + kb: KnowledgeBase | None, ) -> str: tool_name = arguments.get("tool_name") query = arguments.get("query", "") @@ -232,9 +261,8 @@ async def _handle_search_registry( if tool_name: tool = registry.get_tool(tool_name) if tool: - return ( - f"Found tool '{tool_name}':\n\n" - + yaml.dump(tool, default_flow_style=False, sort_keys=False) + return f"Found tool '{tool_name}':\n\n" + yaml.dump( + tool, default_flow_style=False, sort_keys=False ) return f"No tool named '{tool_name}'." @@ -255,7 +283,8 @@ async def _handle_search_registry( ) if tag and results: results = [ - r for r in results + r + for r in results if tag in r.get("chunk", {}).get("metadata", {}).get("tags", "") ][:top_k] if not results: @@ -287,9 +316,8 @@ async def _handle_search_skills( if skill_name and skill_registry: skill = skill_registry.get_skill(skill_name) if skill: - return ( - f"Found skill '{skill_name}':\n\n" - + yaml.dump(skill, default_flow_style=False, sort_keys=False) + return f"Found skill '{skill_name}':\n\n" + yaml.dump( + skill, default_flow_style=False, sort_keys=False ) return f"No skill named '{skill_name}'." @@ -301,17 +329,30 @@ async def _handle_search_skills( "skill_name for KB-free lookups." ) - # Single ``skills`` collection — bundled and registered entries. - results = kb.search( - query=query or "skill", - collection=SKILLS_COLLECTION, - top_k=top_k * 3 if tag else top_k, - ) - if tag and results: + # Search the installed/bundled ``skills`` collection AND every external + # ``skills_catalog__`` collection, then merge by score. Installed + # skills are also natively discovered by the agent; the catalog is the + # part native discovery can't do (it isn't loaded into context). + collections = [SKILLS_COLLECTION] + [ + c for c in kb.collections if c.startswith(CATALOG_COLLECTION_PREFIX) + ] + fetch_k = top_k * 3 if tag else top_k + results: list[dict] = [] + for coll in collections: + try: + results.extend( + kb.search(query=query or "skill", collection=coll, top_k=fetch_k) + ) + except (FileNotFoundError, KeyError, ValueError): + continue # collection absent/empty on this KB — skip + if tag: results = [ - r for r in results + r + for r in results if tag in r.get("chunk", {}).get("metadata", {}).get("tags", "") - ][:top_k] + ] + results.sort(key=lambda r: r.get("score", 0), reverse=True) + results = results[:top_k] if not results: return "No skills found matching the query." @@ -319,16 +360,65 @@ async def _handle_search_skills( for r in results: chunk = r.get("chunk", {}) meta = chunk.get("metadata", {}) + src = meta.get("source", "") + where = ( + " [installed]" + if src in ("bundled", "registered") + else ( + " [catalog · install_skill to add]" + if src.startswith("catalog:") + else "" + ) + ) summaries.append( - f"- **{meta.get('skill_name', 'unknown')}** " + f"- **{meta.get('skill_name', 'unknown')}**{where} " f"(score: {r.get('score', 0):.2f})\n" f" {chunk.get('text', '')[:200]}" ) return f"Found {len(results)} skill(s):\n\n" + "\n\n".join(summaries) +async def _handle_install_skill( + arguments: dict, + *, + skill_registry: SkillRegistry | None, + runtime_dir: Path, +) -> str: + """Install a catalog skill into ``/skills//``. + + The skill becomes natively discoverable after the next ``dsagt start`` + (which mirrors installed skills into ``.claude/skills/`` before launch). + """ + from dsagt.commands.skills_catalog import install_into_project + + name = arguments.get("skill_name") + if not name: + return "install_skill requires 'skill_name'." + try: + info = install_into_project(name, runtime_dir) + except LookupError as e: + return f"Error: {e}" + + # Index the now-installed skill as a project ('registered') skill too, so + # non-native agents can still find it via search_skills after install. + if skill_registry is not None and skill_registry._kb is not None: + skill_md = Path(info["dest_dir"]) / "SKILL.md" + spec = _parse_frontmatter(skill_md) + if spec.get("name"): + skill_registry._index_skill(spec, skill_md) + + return ( + f"{info['action'].capitalize()} skill '{info['name']}' at " + f"{info['dest_dir']}.\n\nIt will be available to the agent natively " + f"(.claude/skills/) on the next `dsagt start`; restart the agent to " + f"pick it up." + ) + + async def _handle_reconstruct_pipeline( - arguments: dict, *, runtime_dir: Path, + arguments: dict, + *, + runtime_dir: Path, ) -> str: fmt = arguments.get("format", "bash") trace_dir = runtime_dir / "trace_archive" @@ -343,7 +433,9 @@ async def _handle_reconstruct_pipeline( async def _handle_install_dependencies( - arguments: dict, *, registry: ToolRegistry, + arguments: dict, + *, + registry: ToolRegistry, ) -> str: tool_name = arguments.get("tool_name") tools = registry.list_tools_raw() @@ -383,6 +475,7 @@ async def _handle_install_dependencies( # Server factory (thin wiring — used by main() and tests) # --------------------------------------------------------------------------- + def create_registry_server( registry: ToolRegistry, kb: KnowledgeBase | None = None, @@ -407,9 +500,20 @@ def create_registry_server( "save_skill": partial(_handle_save_skill, skill_registry=skill_registry), "get_registry": partial(_handle_get_registry, registry=registry), "search_registry": partial(_handle_search_registry, registry=registry, kb=kb), - "search_skills": partial(_handle_search_skills, kb=kb, skill_registry=skill_registry), - "reconstruct_pipeline": partial(_handle_reconstruct_pipeline, runtime_dir=runtime_dir), - "install_dependencies": partial(_handle_install_dependencies, registry=registry), + "search_skills": partial( + _handle_search_skills, kb=kb, skill_registry=skill_registry + ), + "install_skill": partial( + _handle_install_skill, + skill_registry=skill_registry, + runtime_dir=runtime_dir, + ), + "reconstruct_pipeline": partial( + _handle_reconstruct_pipeline, runtime_dir=runtime_dir + ), + "install_dependencies": partial( + _handle_install_dependencies, registry=registry + ), } @server.list_tools() @@ -421,7 +525,10 @@ async def list_tools() -> list[types.Tool]: inputSchema={ "type": "object", "properties": { - "path": {"type": "string", "description": "Path to the file to read"}, + "path": { + "type": "string", + "description": "Path to the file to read", + }, }, "required": ["path"], }, @@ -433,8 +540,15 @@ async def list_tools() -> list[types.Tool]: "type": "object", "properties": { "url": {"type": "string", "description": "URL to request"}, - "method": {"type": "string", "description": "HTTP method", "default": "GET"}, - "headers": {"type": "object", "description": "Optional headers"}, + "method": { + "type": "string", + "description": "HTTP method", + "default": "GET", + }, + "headers": { + "type": "object", + "description": "Optional headers", + }, }, "required": ["url"], }, @@ -445,7 +559,10 @@ async def list_tools() -> list[types.Tool]: inputSchema={ "type": "object", "properties": { - "command": {"type": "string", "description": "Command to execute"}, + "command": { + "type": "string", + "description": "Command to execute", + }, "args": { "type": "array", "items": {"type": "string"}, @@ -474,19 +591,33 @@ async def list_tools() -> list[types.Tool]: { "type": "object", "properties": { - "name": {"type": "string", "description": "Unique tool identifier"}, - "description": {"type": "string", "description": "What the tool does"}, - "executable": {"type": "string", "description": "Command to execute"}, + "name": { + "type": "string", + "description": "Unique tool identifier", + }, + "description": { + "type": "string", + "description": "What the tool does", + }, + "executable": { + "type": "string", + "description": "Command to execute", + }, "parameters": { "type": "object", "description": "Parameter definitions", "additionalProperties": { "type": "object", "properties": { - "type": {"type": "string", "description": "Parameter type"}, + "type": { + "type": "string", + "description": "Parameter type", + }, "required": {"type": "boolean"}, "description": {"type": "string"}, - "default": {"description": "Default value"}, + "default": { + "description": "Default value" + }, "cli": { "type": "string", "description": ( @@ -512,7 +643,12 @@ async def list_tools() -> list[types.Tool]: "description": "Tags for categorizing the tool", }, }, - "required": ["name", "description", "executable", "parameters"], + "required": [ + "name", + "description", + "executable", + "parameters", + ], }, {"type": "string"}, ], @@ -543,8 +679,14 @@ async def list_tools() -> list[types.Tool]: { "type": "object", "properties": { - "name": {"type": "string", "description": "Unique skill identifier (becomes the directory name)"}, - "description": {"type": "string", "description": "What the skill does / when to use it"}, + "name": { + "type": "string", + "description": "Unique skill identifier (becomes the directory name)", + }, + "description": { + "type": "string", + "description": "What the skill does / when to use it", + }, "tags": { "type": "array", "items": {"type": "string"}, @@ -572,7 +714,10 @@ async def list_tools() -> list[types.Tool]: "path -> file contents, or JSON-encoded string." ), "anyOf": [ - {"type": "object", "additionalProperties": {"type": "string"}}, + { + "type": "object", + "additionalProperties": {"type": "string"}, + }, {"type": "string"}, ], }, @@ -593,24 +738,52 @@ async def list_tools() -> list[types.Tool]: "properties": { "query": {"type": "string", "description": "Search query"}, "tag": {"type": "string", "description": "Filter by tag"}, - "tool_name": {"type": "string", "description": "Exact tool name lookup"}, + "tool_name": { + "type": "string", + "description": "Exact tool name lookup", + }, "top_k": {"type": "integer", "default": 10}, }, }, ), types.Tool( name="search_skills", - description="Search for agent skills (workflows, templates) by name, tag, or description.", + description=( + "Search agent skills by name, tag, or description. Spans installed " + "skills and the external installable catalog. Catalog hits are marked " + "'[catalog]' — use install_skill to add one to this project." + ), inputSchema={ "type": "object", "properties": { "query": {"type": "string", "description": "Search query"}, "tag": {"type": "string", "description": "Filter by tag"}, - "skill_name": {"type": "string", "description": "Exact skill name lookup"}, + "skill_name": { + "type": "string", + "description": "Exact skill name lookup", + }, "top_k": {"type": "integer", "default": 10}, }, }, ), + types.Tool( + name="install_skill", + description=( + "Install a skill from the external catalog (found via search_skills) " + "into this project so the agent can use it natively. Copies SKILL.md " + "+ scripts/references; available natively after the next restart." + ), + inputSchema={ + "type": "object", + "properties": { + "skill_name": { + "type": "string", + "description": "Catalog skill name to install", + }, + }, + "required": ["skill_name"], + }, + ), types.Tool( name="reconstruct_pipeline", description="Reconstruct a reproducible pipeline script from tool execution records.", @@ -631,7 +804,10 @@ async def list_tools() -> list[types.Tool]: inputSchema={ "type": "object", "properties": { - "tool_name": {"type": "string", "description": "Install deps for a specific tool (omit for all)"}, + "tool_name": { + "type": "string", + "description": "Install deps for a specific tool (omit for all)", + }, }, }, ), @@ -650,6 +826,7 @@ async def call_tool(name: str, arguments: dict) -> list[types.TextContent]: # Entry point # --------------------------------------------------------------------------- + def main(): """Entry point for dsagt-registry-server. @@ -693,12 +870,16 @@ def main(): config_path = project_dir / "dsagt_config.yaml" from dsagt.session import resolve_env_vars + config = resolve_env_vars(yaml.safe_load(config_path.read_text())) emb_config = config["embedding"] from dsagt.observability import init_tracing, configure_litellm_retries - init_tracing("dsagt-registry-server") # session_id picked up from DSAGT_SESSION_ID env + + init_tracing( + "dsagt-registry-server" + ) # session_id picked up from DSAGT_SESSION_ID env configure_litellm_retries() # The KB is optional for the registry server — most tools (save_tool_spec, @@ -729,13 +910,14 @@ def main(): else: # backend == "api" api_key = emb_config.get("api_key") or "" kb_available = ( - api_key and not api_key.startswith("${") - and emb_config.get("base_url") + api_key and not api_key.startswith("${") and emb_config.get("base_url") + ) + embedder_kwargs.update( + { + "base_url": emb_config.get("base_url") or "", + "api_key": api_key, + } ) - embedder_kwargs.update({ - "base_url": emb_config.get("base_url") or "", - "api_key": api_key, - }) kb = None if kb_available: diff --git a/src/dsagt/commands/setup_core_kb.py b/src/dsagt/commands/setup_core_kb.py index 9cf977c..0fa493b 100644 --- a/src/dsagt/commands/setup_core_kb.py +++ b/src/dsagt/commands/setup_core_kb.py @@ -22,7 +22,6 @@ import os import shutil import subprocess -import sys import tarfile import tempfile from pathlib import Path @@ -135,7 +134,9 @@ } -def clone_github(url: str, dest: Path, branch: str = "main", include: list[str] | None = None): +def clone_github( + url: str, dest: Path, branch: str = "main", include: list[str] | None = None +): """Clone a GitHub repo, optionally keeping only specific directories. When *include* is set, the named subdirectories are copied AND any @@ -175,7 +176,7 @@ def clone_github(url: str, dest: Path, branch: str = "main", include: list[str] def download_arxiv(paper_id: str, dest: Path): """Download arXiv paper (source if available, else PDF).""" client = httpx.Client(timeout=60.0, follow_redirects=True) - + try: # Try source tarball first response = client.get(f"https://arxiv.org/e-print/{paper_id}") @@ -190,7 +191,7 @@ def download_arxiv(paper_id: str, dest: Path): return except tarfile.ReadError: tar_path.unlink() - + # Fall back to PDF response = client.get(f"https://arxiv.org/pdf/{paper_id}.pdf") response.raise_for_status() @@ -248,6 +249,7 @@ def setup_collection( owned_kb = kb is None if owned_kb: from dsagt.knowledge import KnowledgeBase + kb = KnowledgeBase( index_dir=index_dir, default_embedder=embedding_backend, @@ -257,7 +259,8 @@ def setup_collection( try: result = kb.ingest( download_dir, - exclude_patterns=config.get("exclude_patterns") or DEFAULT_EXCLUDE_PATTERNS, + exclude_patterns=config.get("exclude_patterns") + or DEFAULT_EXCLUDE_PATTERNS, ) skipped = result.get("skipped_files", 0) miss_msg = f", {skipped} file misses" if skipped else "" @@ -275,6 +278,7 @@ def _current_dsagt_version() -> str: """Return the installed dsagt package version, or ``"unknown"`` if absent.""" try: from importlib.metadata import version + return version("dsagt") except Exception: return "unknown" @@ -310,15 +314,18 @@ def add_setup_kb_args(parser): ), ) parser.add_argument( - "--embedding-model", default=None, + "--embedding-model", + default=None, help="Embedding model name (falls back to EMBEDDING_MODEL env var)", ) parser.add_argument( - "--embedding-base-url", default=None, + "--embedding-base-url", + default=None, help="Embedding API base URL (falls back to OPENAI_BASE_URL env var)", ) parser.add_argument( - "--embedding-api-key", default=None, + "--embedding-api-key", + default=None, help="Embedding API key (falls back to LLM_API_KEY / OPENAI_API_KEY env var)", ) parser.add_argument( @@ -333,6 +340,12 @@ def add_setup_kb_args(parser): help="Re-ingest collections that already exist in the index directory " "(default: skip existing).", ) + parser.add_argument( + "--no-skill-catalog", + action="store_true", + help="Skip cloning + indexing the default external skill catalog " + "(the K-Dense scientific skills repo).", + ) def run_setup_kb(args): @@ -352,6 +365,7 @@ def run_setup_kb(args): # ``force``, the second basicConfig is a no-op because the root # logger already has handlers, and the INFO-level chatter survives. import logging as _logging + _logging.basicConfig( level=_logging.WARNING, format="%(levelname)s: %(message)s", @@ -368,10 +382,18 @@ def run_setup_kb(args): # clear error up front rather than 5 minutes into the first ingest. embedder_kwargs: dict = {} if args.embedding_backend == "api": - api_key = args.embedding_api_key or os.getenv("LLM_API_KEY") or os.getenv("OPENAI_API_KEY") + api_key = ( + args.embedding_api_key + or os.getenv("LLM_API_KEY") + or os.getenv("OPENAI_API_KEY") + ) base_url = args.embedding_base_url or os.getenv("OPENAI_BASE_URL") model = args.embedding_model or os.getenv("EMBEDDING_MODEL") - missing = [n for n, v in [("api key", api_key), ("base URL", base_url), ("model", model)] if not v] + missing = [ + n + for n, v in [("api key", api_key), ("base URL", base_url), ("model", model)] + if not v + ] if missing: raise ValueError( "API embedding backend requires " @@ -388,6 +410,7 @@ def run_setup_kb(args): # to, so we skip init_tracing entirely. @traced decorators inside # KnowledgeBase see no backend and short-circuit cleanly. from dsagt.observability import configure_litellm_retries + configure_litellm_retries() # One KnowledgeBase per setup-kb invocation. The embedder cache @@ -399,9 +422,13 @@ def run_setup_kb(args): args.index_dir.mkdir(parents=True, exist_ok=True) from dsagt.knowledge import KnowledgeBase from dsagt.registry import ( - SKILLS_COLLECTION, TOOLS_COLLECTION, - ToolRegistry, SkillRegistry, _parse_frontmatter, + SKILLS_COLLECTION, + TOOLS_COLLECTION, + ToolRegistry, + SkillRegistry, + _parse_frontmatter, ) + shared_kb = KnowledgeBase( index_dir=args.index_dir, default_embedder=args.embedding_backend, @@ -415,11 +442,13 @@ def run_setup_kb(args): current_version = _current_dsagt_version() tool_paths = [ - p for p in sorted(ToolRegistry._PACKAGE_TOOLS_DIR.glob("*.md")) + p + for p in sorted(ToolRegistry._PACKAGE_TOOLS_DIR.glob("*.md")) if _parse_frontmatter(p).get("name") ] skill_dirs = [ - d for d in sorted(SkillRegistry._PACKAGE_SKILLS_DIR.iterdir()) + d + for d in sorted(SkillRegistry._PACKAGE_SKILLS_DIR.iterdir()) if d.is_dir() and (d / "SKILL.md").exists() and _parse_frontmatter(d / "SKILL.md").get("name") @@ -435,14 +464,17 @@ def run_setup_kb(args): shared_kb.add_entries( texts=[p.read_text() for p in tool_paths], collection=TOOLS_COLLECTION, - metadatas=[{ - "tool_name": s["name"], - "tags": ",".join(s.get("tags", [])), - "executable": s.get("executable", ""), - "has_dependencies": str(bool(s.get("dependencies"))), - "source": "bundled", - "dsagt_version": current_version, - } for s in tool_specs], + metadatas=[ + { + "tool_name": s["name"], + "tags": ",".join(s.get("tags", [])), + "executable": s.get("executable", ""), + "has_dependencies": str(bool(s.get("dependencies"))), + "source": "bundled", + "dsagt_version": current_version, + } + for s in tool_specs + ], ) if skill_dirs: @@ -450,28 +482,60 @@ def run_setup_kb(args): shared_kb.add_entries( texts=[(d / "SKILL.md").read_text() for d in skill_dirs], collection=SKILLS_COLLECTION, - metadatas=[{ - "skill_name": s["name"], - "tags": ",".join(s.get("tags", [])), - "source": "bundled", - "dsagt_version": current_version, - } for s in skill_specs], + metadatas=[ + { + "skill_name": s["name"], + "tags": ",".join(s.get("tags", [])), + "source": "bundled", + "dsagt_version": current_version, + } + for s in skill_specs + ], ) print(" bundled tools + skills: indexed", flush=True) - collections = {args.collection: COLLECTIONS[args.collection]} if args.collection else COLLECTIONS + # External skill catalog: clone + index the default source(s) so + # ``search_skills`` can browse installable skills out of the box. + # Best-effort — a clone failure (offline, repo moved) warns and + # continues rather than aborting the whole KB build. + if not getattr(args, "no_skill_catalog", False): + from dsagt.commands.skills_catalog import sync_source + from dsagt.session import DEFAULTS + + for src in DEFAULTS["skills"]["sources"]: + try: + stats = sync_source(src, kb=shared_kb, force=args.rebuild) + print( + f" skill catalog {stats['slug']}: {stats['indexed']} indexed", + flush=True, + ) + except Exception as e: # noqa: BLE001 — best-effort, keep going + print( + f" skill catalog {src.get('url', src)}: skipped ({e})", + flush=True, + ) + + collections = ( + {args.collection: COLLECTIONS[args.collection]} + if args.collection + else COLLECTIONS + ) for name, config in collections.items(): target_dir = args.index_dir / name if _collection_exists(target_dir): if not args.rebuild: - print(f" {name}: already indexed (use --rebuild to force)", - flush=True) + print( + f" {name}: already indexed (use --rebuild to force)", + flush=True, + ) continue shutil.rmtree(target_dir) setup_collection( - name, config, args.index_dir, + name, + config, + args.index_dir, embedder_kwargs=embedder_kwargs, embedding_backend=args.embedding_backend, vector_db=args.vector_db, diff --git a/src/dsagt/commands/skills_catalog.py b/src/dsagt/commands/skills_catalog.py new file mode 100644 index 0000000..c375a3b --- /dev/null +++ b/src/dsagt/commands/skills_catalog.py @@ -0,0 +1,277 @@ +""" +External skill catalog — fetch Agent-Skills repos, index, install. + +Two tiers (see the skill-management plan): + +* **Catalog** — every skill in a configured GitHub source repo, indexed + into a per-source ``skills_catalog__`` KB collection. Searchable + via ``search_skills``, but NOT copied locally and NOT loaded into the + agent's context. This is the one job native skill discovery can't do + (you can't hold thousands of skill descriptions in context). +* **Installed** — a chosen skill copied into ``/skills//``. + The agent setup then mirrors it into ``.claude/skills/`` for native + discovery (see ``agents.base._mirror_skills_to``). + +Re-sync is idempotent by dropping the per-source collection directory and +rebuilding it — no delete-by-metadata primitive required. + +``clone_github`` is imported lazily inside :func:`sync_source` to avoid an +import cycle with ``setup_core_kb`` (which calls back into ``sync_source``). +""" + +from __future__ import annotations + +import logging +import re +import shutil +from pathlib import Path + +from dsagt.registry import _parse_frontmatter, catalog_collection +from dsagt.session import REGISTRY_DIR + +logger = logging.getLogger(__name__) + +#: Default source enabled out of the box (matches dsagt_config.yaml default). +DEFAULT_SOURCE = "scientific" + +#: Curated, named skill sources. ``subdir`` scopes the recursive SKILL.md +#: walk when set (cheaper clone); when omitted the whole repo is cloned and +#: walked, which is robust to category-nested layouts. +KNOWN_SOURCES: dict[str, dict] = { + "scientific": { + "url": "https://github.com/K-Dense-AI/scientific-agent-skills", + "branch": "main", + "subdir": "skills", + "description": "K-Dense scientific agent skills — chem/bio/medicine/materials (140+).", + }, + "anthropic": { + "url": "https://github.com/anthropics/skills", + "branch": "main", + "subdir": "skills", + "description": "Official Anthropic skills + document-editing examples.", + }, + "antigravity": { + "url": "https://github.com/sickn33/antigravity-awesome-skills", + "branch": "main", + "subdir": None, + "description": "Antigravity Awesome Skills — 1,500+ cross-platform agentic skills.", + }, + "composio": { + "url": "https://github.com/ComposioHQ/awesome-claude-skills", + "branch": "master", + "subdir": None, + "description": "Composio awesome-claude-skills — workflow skills for many SaaS apps.", + }, +} + +#: Shared, machine-global cache of cloned source repos (sibling of kb_index/). +SKILL_SOURCES_DIR = REGISTRY_DIR / ".skill_sources" + + +# --------------------------------------------------------------------------- +# Source resolution + slugging +# --------------------------------------------------------------------------- + + +def resolve_source(source: str | dict) -> dict: + """Resolve a known-source name, a GitHub URL, or a full spec dict. + + Returns a dict with at least ``url``; optional ``branch`` / ``subdir``. + """ + if isinstance(source, dict): + if not source.get("url"): + raise ValueError("source dict must include a 'url'") + return source + if source in KNOWN_SOURCES: + return dict(KNOWN_SOURCES[source]) + if source.startswith(("http://", "https://", "git@")) or source.count("/") == 1: + # Full URL or ``owner/repo`` shorthand. + url = ( + source + if "://" in source or source.startswith("git@") + else f"https://github.com/{source}" + ) + return {"url": url, "branch": "main", "subdir": None} + raise ValueError( + f"Unknown skill source '{source}'. Use a known name " + f"({', '.join(sorted(KNOWN_SOURCES))}), a GitHub URL, or owner/repo." + ) + + +def _repo_slug(url: str) -> str: + """Stable, collection-name-safe slug from a GitHub URL (``owner-repo``).""" + s = url.rstrip("/") + s = re.sub(r"^https?://github\.com/", "", s) + s = re.sub(r"^git@github\.com:", "", s) + s = re.sub(r"\.git$", "", s).lower() + s = re.sub(r"[^a-z0-9]+", "-", s).strip("-") + return s[:40] + + +# --------------------------------------------------------------------------- +# Discovery +# --------------------------------------------------------------------------- + + +def _discover_skill_dirs(root: Path) -> list[Path]: + """Recursively find skill directories (any dir holding a parseable SKILL.md). + + Recursive so both flat (``skills//SKILL.md``) and category-nested + (``skills///SKILL.md``) repo layouts work. A directory + qualifies only if its SKILL.md has YAML frontmatter with a ``name``. + """ + out: list[Path] = [] + if not root.exists(): + return out + for skill_md in sorted(root.rglob("SKILL.md")): + try: + spec = _parse_frontmatter(skill_md) + except ValueError as e: # malformed frontmatter — skip, don't abort + logger.warning("skipping %s: %s", skill_md, e) + continue + if spec.get("name"): + out.append(skill_md.parent) + return out + + +# --------------------------------------------------------------------------- +# Sync (clone + index) +# --------------------------------------------------------------------------- + + +def sync_source( + source: str | dict, + *, + kb=None, + cache_dir: Path = SKILL_SOURCES_DIR, + force: bool = False, +) -> dict: + """Clone *source* into the cache and (re)index its skills into the catalog. + + ``force`` re-clones from scratch. Indexing wipes and rebuilds only this + source's ``skills_catalog__`` collection, so other catalogs and the + installed/bundled ``skills`` collection are untouched. When *kb* is None + the clone still happens (so ``install`` works offline-of-KB) but nothing + is indexed. + """ + spec = resolve_source(source) + slug = _repo_slug(spec["url"]) + dest = cache_dir / slug + + if force and dest.exists(): + shutil.rmtree(dest) + if not dest.exists(): + from dsagt.commands.setup_core_kb import clone_github # lazy: break cycle + + dest.mkdir(parents=True, exist_ok=True) + subdir = spec.get("subdir") + include = [subdir] if subdir else None + clone_github( + spec["url"], dest, branch=spec.get("branch", "main"), include=include + ) + + walk_root = dest / spec["subdir"] if spec.get("subdir") else dest + skill_dirs = _discover_skill_dirs(walk_root) + indexed = index_catalog(skill_dirs, slug, spec["url"], kb) if kb is not None else 0 + if kb is not None and not skill_dirs: + logger.warning( + "source %s yielded no SKILL.md skills under %s", spec["url"], walk_root + ) + + return { + "slug": slug, + "url": spec["url"], + "discovered": len(skill_dirs), + "indexed": indexed, + "cache_dir": str(dest), + } + + +def index_catalog(skill_dirs: list[Path], slug: str, url: str, kb) -> int: + """Wipe + rebuild source *slug*'s catalog collection from *skill_dirs*.""" + collection = catalog_collection(slug) + coll_dir = Path(kb.index_dir) / collection + if coll_dir.exists(): + shutil.rmtree(coll_dir) + + texts: list[str] = [] + metas: list[dict] = [] + for d in skill_dirs: + skill_md = d / "SKILL.md" + spec = _parse_frontmatter(skill_md) + name = spec.get("name") or d.name + texts.append(skill_md.read_text()) + metas.append( + { + "skill_name": name, + "tags": ",".join(spec.get("tags") or []), + "source": f"catalog:{slug}", + "source_url": url, + "cache_path": str(d), + } + ) + if texts: + kb.add_entries(texts=texts, collection=collection, metadatas=metas) + return len(texts) + + +# --------------------------------------------------------------------------- +# Lookup + install +# --------------------------------------------------------------------------- + + +def find_catalog_skill(name: str, *, cache_dir: Path = SKILL_SOURCES_DIR) -> Path: + """Locate a cached catalog skill dir by name across all synced sources. + + Matches on frontmatter ``name`` first, then directory name. Raises on + no match, or on an ambiguous match spanning more than one source repo. + """ + matches: list[Path] = [] + if cache_dir.exists(): + for slug_dir in sorted(p for p in cache_dir.iterdir() if p.is_dir()): + for d in _discover_skill_dirs(slug_dir): + spec = _parse_frontmatter(d / "SKILL.md") + if spec.get("name") == name or d.name == name: + matches.append(d) + if not matches: + raise LookupError( + f"No catalog skill named '{name}'. Run 'dsagt skills sync' or " + f"add_skill_source first, then search_skills to find one." + ) + # Collapse matches that point at the same source repo (slug = first path + # part under cache_dir); ambiguity only matters across different sources. + by_source = {p.relative_to(cache_dir).parts[0]: p for p in matches} + if len(by_source) > 1: + raise LookupError( + f"Skill '{name}' exists in multiple sources " + f"({', '.join(sorted(by_source))}); install by source with " + f"'dsagt skills add /{name}'." + ) + return next(iter(by_source.values())) + + +def install_into_project( + name: str, project_dir: str | Path, *, cache_dir: Path = SKILL_SOURCES_DIR +) -> dict: + """Copy a catalog skill into ``/skills//`` (with scripts/refs). + + The destination directory is named after the skill's frontmatter ``name`` + (falling back to its source dir name) so it matches the invocable name in + native discovery. Returns ``{name, source_dir, dest_dir, action}``. + """ + src = find_catalog_skill(name, cache_dir=cache_dir) + spec = _parse_frontmatter(src / "SKILL.md") + skill_name = spec.get("name") or src.name + + dest = Path(project_dir) / "skills" / skill_name + action = "updated" if dest.exists() else "added" + if dest.exists(): + shutil.rmtree(dest) + shutil.copytree(src, dest) + + return { + "name": skill_name, + "source_dir": str(src), + "dest_dir": str(dest), + "action": action, + } diff --git a/src/dsagt/dsagt_instructions.md b/src/dsagt/dsagt_instructions.md index d2bcfd6..385c762 100644 --- a/src/dsagt/dsagt_instructions.md +++ b/src/dsagt/dsagt_instructions.md @@ -25,6 +25,8 @@ Before implementing anything, search for existing capabilities: - `search_skills(query)` — find agent skills (workflows, templates, procedures) - `get_registry()` — list all registered tools +**Skills come in two tiers.** *Installed* skills (in this project) are discovered **natively** by your platform — their names/descriptions are already in your context and you auto-invoke them; you do NOT need `search_skills` to find those. Use `search_skills` to browse the much larger *external catalog* of installable skills (entries marked `[catalog]`), which are NOT loaded into context. To add a catalog skill to the project, call `install_skill(skill_name=...)`; it becomes natively available after the next session restart. To enable another catalog source (a known name like `scientific`/`anthropic`, or a GitHub URL), call `add_skill_source(source=...)`. To author a brand-new skill, use the bundled `skill-creator` skill. + **When the user indicates they want a specific tool used** — phrasings like "use tool `foo`", "use `foo` from the registry", "run `foo`", or similar — look it up first (`search_registry(tool_name=...)` for exact match, `get_registry()` to browse). Read the returned spec's `executable` field and each parameter's `cli` field, then invoke via your shell. Do not substitute your own file/shell tools for a task a registered tool can do. (See section 1b for the verbatim-`executable` rule.) **Rendering parameters**: each parameter's `cli` field pins exactly how its value goes on the command line. Emit positional args first (in position order), then named args. Skip optional parameters whose value is absent; use the `default` when present. diff --git a/src/dsagt/registry.py b/src/dsagt/registry.py index 7e91aed..a615672 100644 --- a/src/dsagt/registry.py +++ b/src/dsagt/registry.py @@ -34,6 +34,19 @@ TOOLS_COLLECTION = "tools" SKILLS_COLLECTION = "skills" +#: External skill catalogs (fetched from GitHub repos) live in their own +#: per-source collections named ``skills_catalog__``. Keeping each +#: source in its own collection lets a re-sync drop+rebuild one source's +#: directory without disturbing bundled/registered skills (the ``skills`` +#: collection) or other catalogs — no delete-by-metadata primitive needed. +CATALOG_COLLECTION_PREFIX = "skills_catalog__" + + +def catalog_collection(slug: str) -> str: + """KB collection name holding the indexed catalog for source *slug*.""" + return f"{CATALOG_COLLECTION_PREFIX}{slug}" + + #: Backwards-compat aliases — kept so external code that imported the #: previous names still resolves. New code should use the names above. TOOL_REGISTRY_COLLECTION = TOOLS_COLLECTION @@ -44,6 +57,7 @@ # Helpers (tools only) # --------------------------------------------------------------------------- + def _uv_run_prefix(deps: list[str]) -> str: """Build a 'uv run --with dep1,dep2 --' prefix for Python dependencies.""" if not deps: @@ -77,7 +91,9 @@ def _generate_tool_body(spec: dict) -> str: for name, p in params.items(): req = "yes" if p.get("required") else "no" default = p.get("default", "—") - lines.append(f"| `{name}` | {req} | {default} | {p.get('description', '')} |\n") + lines.append( + f"| `{name}` | {req} | {default} | {p.get('description', '')} |\n" + ) return "".join(lines) @@ -115,6 +131,7 @@ def _parse_frontmatter(path: Path) -> dict: # Parameters with `type: boolean` render as a bare flag when truthy and emit # nothing when falsy; positional booleans are not supported. + def _parse_cli(cli: str, param_name: str) -> dict: """Classify a cli string into a rendering descriptor. Fails fast on invalid input.""" if cli == "positional": @@ -187,6 +204,7 @@ def render_arguments(parameters: dict, values: dict) -> list[str]: # Tool Registry # --------------------------------------------------------------------------- + class ToolRegistry: """ Manages CLI tool spec files and optional KB indexing. @@ -283,15 +301,17 @@ def list_tools(self) -> list[dict]: properties[param_name]["default"] = param_def["default"] if param_def.get("required", False): required.append(param_name) - tools.append({ - "name": tool["name"], - "description": tool["description"], - "inputSchema": { - "type": "object", - "properties": properties, - "required": required, - }, - }) + tools.append( + { + "name": tool["name"], + "description": tool["description"], + "inputSchema": { + "type": "object", + "properties": properties, + "required": required, + }, + } + ) return tools def get_tool(self, name: str) -> dict | None: @@ -322,7 +342,9 @@ def save_tool(self, spec: dict) -> str: spec = dict(spec) spec["executable"] = _wrap_executable( - spec["name"], spec["executable"], spec.get("dependencies"), + spec["name"], + spec["executable"], + spec.get("dependencies"), ) # Preserve existing body when updating so hand-edited docs survive @@ -389,6 +411,7 @@ def reindex_all(self) -> int: # Skill Registry # --------------------------------------------------------------------------- + class SkillRegistry: """ Manages instruction-based agent skills and optional KB indexing. @@ -435,14 +458,16 @@ def _bundled_skill_dirs(self) -> list[Path]: if not self._bundled_dir.exists(): return [] return [ - d for d in sorted(self._bundled_dir.iterdir()) + d + for d in sorted(self._bundled_dir.iterdir()) if d.is_dir() and (d / "SKILL.md").exists() ] def _project_skill_dirs(self) -> list[Path]: """Return skill directories the agent has saved into this project.""" return [ - d for d in sorted(self.skills_dir.iterdir()) + d + for d in sorted(self.skills_dir.iterdir()) if d.is_dir() and (d / "SKILL.md").exists() ] diff --git a/src/dsagt/session.py b/src/dsagt/session.py index 6ae19b3..0679c2f 100644 --- a/src/dsagt/session.py +++ b/src/dsagt/session.py @@ -53,7 +53,7 @@ # ``dsagt setup-kb``). Migrated from ``~/.dsagt/`` on 2026-05-07. REGISTRY_DIR = DEFAULT_PROJECTS_BASE REGISTRY_FILE = REGISTRY_DIR / "projects.yaml" -RESERVED_PROJECT_NAMES = ("projects.yaml", "kb_index") +RESERVED_PROJECT_NAMES = ("projects.yaml", "kb_index", ".skill_sources") DEFAULTS = { # ``llm`` block uses ${VAR} placeholders so per-project config @@ -103,6 +103,24 @@ "vector_db": "chroma", "rerank": False, }, + # External agent-skill catalogs. ``sources`` are GitHub repos whose + # SKILL.md skills get indexed into per-source catalog collections for + # ``search_skills`` (searchable but NOT loaded into agent context). + # The agent installs a chosen one via the ``install_skill`` MCP tool; + # the agent setup then mirrors installed + bundled skills into the + # platform's native skill dir (e.g. ``.claude/skills/``). + "skills": { + "sources": [ + { + "name": "scientific", + "url": "https://github.com/K-Dense-AI/scientific-agent-skills", + "branch": "main", + "subdir": "skills", + }, + ], + "populate_catalog": True, # index sources into the catalog at setup-kb + "populate_native": True, # mirror installed+bundled into .claude/skills + }, } _ENV_VAR_RE = re.compile(r"\$\{(\w+)\}") @@ -112,6 +130,7 @@ # Config helpers # --------------------------------------------------------------------------- + def resolve_env_vars(value): """Replace ${VAR_NAME} references with environment variable values.""" if isinstance(value, str): @@ -157,6 +176,7 @@ def default_config_content( "knowledge": DEFAULTS["knowledge"], "categories": DEFAULTS["categories"], "extraction": DEFAULTS["extraction"], + "skills": DEFAULTS["skills"], } return yaml.dump(body, default_flow_style=False, sort_keys=False) @@ -165,6 +185,7 @@ def default_config_content( # Project registry # --------------------------------------------------------------------------- + def _load_registry() -> dict[str, str]: """Load the project registry. Returns empty dict if no registry exists.""" if not REGISTRY_FILE.exists(): @@ -190,6 +211,34 @@ def list_projects() -> dict[str, str]: return _load_registry() +def kb_from_config(config: dict, index_dir: Path | None = None) -> "KnowledgeBase": + """Build a KnowledgeBase from a resolved project config. + + Mirrors the embedding-backend resolution used by ``extract_session`` so + callers (CLI ``skills`` group, catalog sync) get a KB wired to the same + embedder the project uses. Defaults to ``/kb_index``. + """ + pdir = Path(config["project_dir"]) + emb = config.get("embedding", {}) + backend = emb.get("backend", "local") + if backend == "local": + model = emb.get("model") + if model and "/" not in str(model): + model = None + embedder_kwargs = {"model": model} + else: + embedder_kwargs = { + "model": emb.get("model"), + "base_url": emb.get("base_url"), + "api_key": os.environ.get("EMBEDDING_API_KEY", ""), + } + return KnowledgeBase( + index_dir=index_dir or (pdir / "kb_index"), + default_embedder=backend, + embedder_kwargs=embedder_kwargs, + ) + + def project_dir(name: str) -> Path: """Resolve a project name to its directory via the registry.""" registry = _load_registry() @@ -207,6 +256,7 @@ def project_dir(name: str) -> Path: # Config loading # --------------------------------------------------------------------------- + def load_config(project_name: str) -> dict: """Load and validate a project config by name. @@ -242,13 +292,16 @@ def _validate(config: dict) -> None: backend = config.get("mlflow", {}).get("backend") if backend and backend not in VALID_MLFLOW_BACKENDS: - raise ValueError(f"'mlflow.backend' must be one of {VALID_MLFLOW_BACKENDS}, got '{backend}'") + raise ValueError( + f"'mlflow.backend' must be one of {VALID_MLFLOW_BACKENDS}, got '{backend}'" + ) # --------------------------------------------------------------------------- # Project initialization # --------------------------------------------------------------------------- + def _collection_exists(path: Path) -> bool: """Return True if *path* looks like a persisted KB collection directory. @@ -256,14 +309,11 @@ def _collection_exists(path: Path) -> bool: collections, and bare ChromaDB sqlite files (as produced by ``dsagt setup-kb`` for description-only collections). """ - return ( - path.is_dir() - and ( - (path / "index.faiss").exists() - or (path / "chroma_ids.json").exists() - or (path / "route.json").exists() - or (path / "chroma.sqlite3").exists() - ) + return path.is_dir() and ( + (path / "index.faiss").exists() + or (path / "chroma_ids.json").exists() + or (path / "route.json").exists() + or (path / "chroma.sqlite3").exists() ) @@ -390,7 +440,9 @@ def persist_agent_choice(project_name: str, agent: str) -> None: "# ollama, mistral, groq, deepseek.\n" "# Full list: https://docs.litellm.ai/docs/providers\n" ) - yaml_path.write_text(header + yaml.dump(raw, default_flow_style=False, sort_keys=False)) + yaml_path.write_text( + header + yaml.dump(raw, default_flow_style=False, sort_keys=False) + ) def move_project(project_name: str, new_location: Path) -> Path: @@ -434,6 +486,7 @@ def remove_project(project_name: str, keep_files: bool = False) -> Path: # Service start / stop # --------------------------------------------------------------------------- + def _embedding_provider(config: dict) -> str: """Resolve embedding provider with a fallback for two cases: @@ -481,12 +534,20 @@ def mlflow_command(pdir: Path, mlflow_config: dict, port: int) -> list[str]: else str(mlflow_dir) ) return [ - sys.executable, "-m", "mlflow", "server", - "--backend-store-uri", backend_uri, - "--default-artifact-root", str(mlflow_dir / "artifacts"), - "--host", "0.0.0.0", - "--port", str(port), - "--workers", "1", + sys.executable, + "-m", + "mlflow", + "server", + "--backend-store-uri", + backend_uri, + "--default-artifact-root", + str(mlflow_dir / "artifacts"), + "--host", + "0.0.0.0", + "--port", + str(port), + "--workers", + "1", ] @@ -508,7 +569,10 @@ def _process_command(pid: int) -> str: try: result = subprocess.run( ["ps", "-p", str(pid), "-o", "command="], - capture_output=True, text=True, check=False, timeout=2.0, + capture_output=True, + text=True, + check=False, + timeout=2.0, ) except (FileNotFoundError, subprocess.TimeoutExpired): return "" @@ -555,7 +619,9 @@ def reap_runtime(runtime_file: Path) -> list[str]: for name, (pid, pgid) in pending.items(): try: os.killpg(pgid, signal.SIGKILL) - stopped.append(f"Stopped {name} (pid {pid}, SIGKILL after {_STOP_GRACE_SECONDS}s)") + stopped.append( + f"Stopped {name} (pid {pid}, SIGKILL after {_STOP_GRACE_SECONDS}s)" + ) except ProcessLookupError: stopped.append(f"Stopped {name} (pid {pid})") @@ -619,7 +685,8 @@ def start_services(config: dict) -> dict[str, int]: ) logger.info( "MLflow started (pid %d) → http://localhost:%d", - mlflow_proc.pid, mlflow_port, + mlflow_proc.pid, + mlflow_port, ) pids = {"mlflow": mlflow_proc.pid} @@ -635,11 +702,17 @@ def start_services(config: dict) -> dict[str, int]: pids["proxy"] = proxy_proc.pid ports["proxy"] = proxy_port - runtime_file.write_text(json.dumps({ - "pids": pids, - "ports": ports, - "started_at": datetime.now(timezone.utc).isoformat(), - }, indent=2) + "\n") + runtime_file.write_text( + json.dumps( + { + "pids": pids, + "ports": ports, + "started_at": datetime.now(timezone.utc).isoformat(), + }, + indent=2, + ) + + "\n" + ) if not proxy_requested: _wait_for_mlflow(mlflow_port, mlflow_proc, mlflow_log, timeout=30.0) @@ -650,7 +723,11 @@ def start_services(config: dict) -> dict[str, int]: def _start_proxy( - config: dict, pdir: Path, mlflow_port: int, proxy_port: int, session_id: str, + config: dict, + pdir: Path, + mlflow_port: int, + proxy_port: int, + session_id: str, ) -> subprocess.Popen: """Spawn the dsagt-proxy subprocess. @@ -671,15 +748,25 @@ def _start_proxy( ) cmd = [ - sys.executable, "-m", "dsagt.commands.proxy_server", - "--port", str(proxy_port), - "--mlflow-url", f"http://localhost:{mlflow_port}", - "--project", config["project"], - "--session", session_id, - "--records-dir", str(pdir / "trace_archive"), - "--model", llm["model"], - "--base-url", llm["base_url"], - "--provider", llm["provider"], + sys.executable, + "-m", + "dsagt.commands.proxy_server", + "--port", + str(proxy_port), + "--mlflow-url", + f"http://localhost:{mlflow_port}", + "--project", + config["project"], + "--session", + session_id, + "--records-dir", + str(pdir / "trace_archive"), + "--model", + llm["model"], + "--base-url", + llm["base_url"], + "--provider", + llm["provider"], ] # Embedding routing through the proxy is only relevant when the # project's embedding backend is ``api`` — in ``local`` mode the @@ -693,11 +780,16 @@ def _start_proxy( f"--enable-proxy with embedding.backend=api needs " f"config.embedding.{required} (got {emb.get(required)!r})" ) - cmd.extend([ - "--embedding-model", emb["model"], - "--embedding-base-url", emb["base_url"], - "--embedding-provider", emb["provider"], - ]) + cmd.extend( + [ + "--embedding-model", + emb["model"], + "--embedding-base-url", + emb["base_url"], + "--embedding-provider", + emb["provider"], + ] + ) proxy_log = pdir / "proxy.log" # The proxy needs the *real* upstream credentials in env (not the # sentinel agents see). os.environ already has them from the user's @@ -719,13 +811,17 @@ def _start_proxy( ) logger.info( "Proxy started (pid %d) → http://localhost:%d", - proxy_proc.pid, proxy_port, + proxy_proc.pid, + proxy_port, ) return proxy_proc def _wait_for_proxy( - port: int, proc: subprocess.Popen, log_path: Path, timeout: float = 45.0, + port: int, + proc: subprocess.Popen, + log_path: Path, + timeout: float = 45.0, ) -> None: """Poll *port* until the proxy answers, the subprocess dies, or we time out. @@ -755,7 +851,10 @@ def _wait_for_proxy( def _wait_for_mlflow( - port: int, proc: subprocess.Popen, log_path: Path, timeout: float = 30.0, + port: int, + proc: subprocess.Popen, + log_path: Path, + timeout: float = 30.0, ) -> None: """Poll *port* until MLflow answers, the subprocess dies, or we time out. @@ -787,11 +886,11 @@ def stop_services(project_name: str) -> list[str]: return reap_runtime(project_dir(project_name) / ".runtime") - # --------------------------------------------------------------------------- # Memory extraction orchestration # --------------------------------------------------------------------------- + def run_extraction(project_name: str) -> dict: """Two-phase post-session work, both best-effort. @@ -857,7 +956,8 @@ def run_extraction(project_name: str) -> dict: mlflow_port = config.get("mlflow", {}).get("port") mlflow_uri = ( - f"http://localhost:{mlflow_port}" if mlflow_port + f"http://localhost:{mlflow_port}" + if mlflow_port else os.environ.get("MLFLOW_TRACKING_URI") ) try: diff --git a/src/dsagt/skills/skill-creator/SKILL.md b/src/dsagt/skills/skill-creator/SKILL.md new file mode 100644 index 0000000..2f20e0f --- /dev/null +++ b/src/dsagt/skills/skill-creator/SKILL.md @@ -0,0 +1,65 @@ +--- +name: skill-creator +description: "Author a new agent Skill (a SKILL.md directory in the open Agent Skills format) from the Anthropic template. Use when the user wants to create a skill, scaffold a SKILL.md, package a repeatable workflow as a reusable skill, turn instructions into a skill, or capture a procedure so the agent can auto-invoke it later. Produces a valid SKILL.md (name + description frontmatter, optional scripts/ and references/) and saves it into the project's skills directory." +metadata: + version: "1.0" +--- + +# Skill Creator + +Scaffold a new, spec-valid agent Skill from the Anthropic template and save it so the agent can discover and auto-invoke it. + +A *skill* is a directory `/SKILL.md`: YAML frontmatter (`name`, `description`) plus markdown instructions the agent follows when the description matches the task. It can bundle `scripts/` and `references/`. See [references/agent_skills_spec.md](references/agent_skills_spec.md) for the full contract. + +## Workflow + +Copy this checklist and check off steps as you go: + +``` +Progress: +- [ ] 1. Gather skill intent (name, purpose, triggers) +- [ ] 2. Draft from the template +- [ ] 3. Write the body (instructions/workflow) +- [ ] 4. Add scripts/ and references/ if needed +- [ ] 5. Validate the frontmatter +- [ ] 6. Save into the project (save_skill) +- [ ] 7. Confirm + note how it activates +``` + +### 1. Gather Intent + +Ask the user (or infer from context): +- **name** — short, lowercase, hyphenated (e.g. `convert-vasp-outputs`). This becomes the directory name and the invocable name. +- **purpose** — one sentence on what the skill does. +- **triggers** — the user requests / phrasing that should make the agent reach for this skill. These become keywords in the `description`. + +### 2. Draft From the Template + +Start from [references/SKILL_template.md](references/SKILL_template.md). Fill the frontmatter: +- `name`: must equal the directory name. +- `description`: pack it with *what it does AND when to use it* (trigger phrases) — this is the only thing the agent sees when deciding to invoke. Keep it ≤ 1536 characters. + +### 3. Write the Body + +After the frontmatter, write the instructions the agent will follow. Prefer a copyable checklist (like this one) for multi-step workflows. Reference bundled files by relative path, e.g. `[reference](references/notes.md)`, or run a bundled script with `${CLAUDE_SKILL_DIR}/scripts/foo.py` so paths resolve regardless of cwd. + +### 4. Add Supporting Files (optional) + +- `scripts/` — runnable helpers the body invokes. +- `references/` — long docs/templates loaded on demand (keep them OUT of SKILL.md so they cost no tokens until used). + +### 5. Validate + +Confirm before saving: +- Frontmatter is valid YAML between `---` fences. +- `name` is present, lowercase-hyphenated, and equals the intended directory name. +- `description` is present and ≤ 1536 characters. +- Any `[link](references/...)` and `${CLAUDE_SKILL_DIR}/scripts/...` paths exist. + +### 6. Save + +Save via the **`save_skill`** MCP tool (registry server) with the `spec` (frontmatter dict: `name`, `description`, optional `tags`), the `body` markdown, and any `reference_files` (a `{relative_path: contents}` map). This writes `/skills//` and indexes it for `search_skills`. + +### 7. Confirm + +Tell the user the skill was saved and how it activates: project skills are mirrored into the platform's native skill directory (e.g. `.claude/skills/`) at the next `dsagt start`, after which the agent auto-discovers it. To use it in the current session, restart the agent. diff --git a/src/dsagt/skills/skill-creator/references/SKILL_template.md b/src/dsagt/skills/skill-creator/references/SKILL_template.md new file mode 100644 index 0000000..99db025 --- /dev/null +++ b/src/dsagt/skills/skill-creator/references/SKILL_template.md @@ -0,0 +1,53 @@ +# SKILL.md template + +Copy the block below into `/SKILL.md` and fill it in. Only the +frontmatter `name` + `description` are required; everything else is +optional. (Based on the Anthropic skill template / open Agent Skills +standard — https://github.com/anthropics/skills/tree/main/template.) + +```markdown +--- +name: my-skill-name +description: A clear description of WHAT this skill does and WHEN to use it — include the user phrasings/triggers that should invoke it. (≤ 1536 chars; this is the only text the agent sees when deciding to invoke.) +# optional: +# tags: [domain, keyword] +# metadata: +# version: "1.0" +--- + +# My Skill Name + +One or two sentences framing the task this skill handles. + +## Workflow + +Copy this checklist and check off steps as you go: + +``` +Progress: +- [ ] 1. ... +- [ ] 2. ... +``` + +### 1. ... + +Step-by-step instructions. Reference bundled docs by relative path: +[details](references/details.md). Run bundled scripts with an absolute +skill-dir path so cwd doesn't matter: + + python ${CLAUDE_SKILL_DIR}/scripts/helper.py + +## Notes / Guidelines +- ... +``` + +## Optional bundled files + +``` +my-skill-name/ +├── SKILL.md (required) +├── references/ (long docs/templates, loaded on demand) +│ └── details.md +└── scripts/ (runnable helpers the body invokes) + └── helper.py +``` diff --git a/src/dsagt/skills/skill-creator/references/agent_skills_spec.md b/src/dsagt/skills/skill-creator/references/agent_skills_spec.md new file mode 100644 index 0000000..bd3f79a --- /dev/null +++ b/src/dsagt/skills/skill-creator/references/agent_skills_spec.md @@ -0,0 +1,48 @@ +# Agent Skills — condensed contract + +A *skill* packages instructions (and optionally code/docs) so an agent can +discover and follow a repeatable workflow. dsagt skills follow the open +Agent Skills standard, which is what Claude Code, Cursor, Codex, and +Antigravity all read — so one SKILL.md works across platforms. + +## Directory layout + +``` +/ +├── SKILL.md # required — frontmatter + instructions +├── references/ # optional — docs/templates loaded on demand +└── scripts/ # optional — runnable helpers +``` + +- The **directory name is the invocable name** (e.g. `.claude/skills/deploy/` → `/deploy`). Keep it lowercase, hyphenated. + +## Frontmatter + +YAML between `---` fences. Common fields: + +| Field | Required | Notes | +|-------|----------|-------| +| `name` | recommended | Should equal the directory name. Lowercase-hyphenated. | +| `description` | **yes (in practice)** | What it does AND when to use it (trigger phrases). The agent sees only this when deciding to invoke. **≤ 1536 characters.** | +| `tags` | no | List of keywords; dsagt uses these for `search_skills` tag filters. | +| `metadata` | no | Free-form (e.g. `version`). Ignored by the platform. | +| `license` | no | Free-form. Ignored by the platform. | + +Unknown/extra frontmatter fields are **silently ignored** by Claude Code, so dsagt-specific fields are safe to include. + +## How discovery works + +- At session start, each installed skill's `name` + `description` are loaded into the agent's context. The full SKILL.md body loads only when the skill is invoked (lazy — zero cost until used). +- The agent auto-invokes a skill when the `description` matches the task; the user can also invoke it directly (`/skill-name`). +- A **newly created** top-level skills directory is only picked up after the agent restarts. + +## Body conventions + +- Lead with a copyable progress checklist for multi-step workflows. +- Keep long material in `references/` (loaded on demand) rather than inline, to save context tokens. +- Reference bundled files by relative path, or run scripts via `${CLAUDE_SKILL_DIR}/scripts/...` so paths resolve regardless of working directory. + +## Two tiers in dsagt + +- **Catalog** — skills indexed from external GitHub source repos, searchable via `search_skills` but not installed. Not in context. +- **Installed** — skills in `/skills/` (saved via `save_skill` or installed via `install_skill`). Mirrored into the platform's native skill dir at `dsagt start`, then natively discovered. diff --git a/tests/test_config.py b/tests/test_config.py index b616597..adfc439 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -164,6 +164,20 @@ def test_missing_project_raises(self, tmp_path): with pytest.raises(ValueError, match="project"): load_config(name) + def test_skills_block_backfilled_for_old_config(self, tmp_path): + """A config written before the skills block still gets the default.""" + name = self._write_config( + tmp_path, + "myproject", + {"project": "myproject", "agent": "claude", "llm": {"provider": "openai"}}, + ) + config = load_config(name) + sources = config["skills"]["sources"] + assert sources[0]["name"] == "scientific" + assert "K-Dense-AI" in sources[0]["url"] + assert config["skills"]["populate_native"] is True + assert config["skills"]["populate_catalog"] is True + def test_missing_agent_raises(self, tmp_path): name = self._write_config(tmp_path, "myproject", {"project": "myproject"}) with pytest.raises(ValueError, match="agent"): @@ -194,6 +208,18 @@ def test_project_dir_injected(self, tmp_path): assert config["project_dir"] == str(tmp_path / "myproject") +class TestSkillsDefaults: + + def test_defaults_has_skills(self): + from dsagt.session import DEFAULTS + + assert DEFAULTS["skills"]["sources"][0]["name"] == "scientific" + + def test_default_config_content_includes_skills(self): + body = yaml.safe_load(default_config_content("p", "claude", 5001)) + assert body["skills"]["sources"][0]["name"] == "scientific" + + # --------------------------------------------------------------------------- # Config: helpers # --------------------------------------------------------------------------- diff --git a/tests/test_knowledge_server.py b/tests/test_knowledge_server.py index 1504490..a05c35f 100644 --- a/tests/test_knowledge_server.py +++ b/tests/test_knowledge_server.py @@ -34,7 +34,9 @@ async def _call_tool_async(server, name: str, arguments: dict) -> dict: return json.loads(result.root.content[0].text) -async def call_tool_and_await_job(server, name: str, arguments: dict) -> tuple[dict, dict]: +async def call_tool_and_await_job( + server, name: str, arguments: dict +) -> tuple[dict, dict]: """Call a tool that starts a background job, wait for it, return (initial, final).""" initial = await _call_tool_async(server, name, arguments) assert initial["status"] == "started" @@ -50,7 +52,9 @@ async def call_tool_and_await_job(server, name: str, arguments: dict) -> tuple[d raise TimeoutError(f"Job {job_id} did not complete") -def make_search_result(text: str, source_file: str, chunk_index: int = 0, score: float = 0.9): +def make_search_result( + text: str, source_file: str, chunk_index: int = 0, score: float = 0.9 +): """Create a search result in the format KnowledgeBase.search returns.""" return { "chunk": { @@ -70,6 +74,7 @@ def make_search_result(text: str, source_file: str, chunk_index: int = 0, score: # Fixtures # --------------------------------------------------------------------------- + @pytest.fixture def mock_kb(tmp_path): """A mocked KnowledgeBase with default behaviors.""" @@ -86,7 +91,12 @@ def mock_kb(tmp_path): make_search_result("Second result text", "/path/to/file2.md", 1, 0.80), ] kb.ingest.return_value = {"collection": "new_docs", "files": 5, "chunks": 42} - kb.append.return_value = {"collection": "docs", "files": 2, "chunks_added": 10, "total_chunks": 50} + kb.append.return_value = { + "collection": "docs", + "files": 2, + "chunks_added": 10, + "total_chunks": 50, + } return kb @@ -100,6 +110,25 @@ def server(mock_kb): # kb_list_collections # --------------------------------------------------------------------------- + +class TestSkillSources: + + def test_list_skill_sources_returns_known(self, mock_kb): + mock_kb.collections = [] + server = create_knowledge_server(mock_kb) + result = call_tool(server, "list_skill_sources", {}) + assert "scientific" in result["known_sources"] + assert result["synced_collections"] == [] + + def test_add_skill_source_bad_source_errors(self, mock_kb): + mock_kb.collections = [] + server = create_knowledge_server(mock_kb) + result = call_tool( + server, "add_skill_source", {"source": "not-a-real-known-name"} + ) + assert "error" in result + + class TestListCollections: def test_returns_collections(self, server, mock_kb): @@ -128,14 +157,19 @@ def test_empty_collections(self, mock_kb): # kb_search # --------------------------------------------------------------------------- + class TestSearch: def test_search_success(self, server, mock_kb): """Successful search returns formatted results.""" - result = call_tool(server, "kb_search", { - "query": "how to install", - "collection": "docs", - }) + result = call_tool( + server, + "kb_search", + { + "query": "how to install", + "collection": "docs", + }, + ) assert result["status"] == "ok" assert result["query"] == "how to install" @@ -150,12 +184,16 @@ def test_search_success(self, server, mock_kb): def test_search_passes_parameters(self, server, mock_kb): """Search forwards top_k and rerank to the knowledge base.""" - call_tool(server, "kb_search", { - "query": "test", - "collection": "docs", - "top_k": 10, - "rerank": False, - }) + call_tool( + server, + "kb_search", + { + "query": "test", + "collection": "docs", + "top_k": 10, + "rerank": False, + }, + ) mock_kb.search.assert_called_once_with( query="test", @@ -166,10 +204,14 @@ def test_search_passes_parameters(self, server, mock_kb): def test_search_defaults(self, server, mock_kb): """Search uses default top_k=5 and server's use_rerank setting.""" - call_tool(server, "kb_search", { - "query": "test", - "collection": "docs", - }) + call_tool( + server, + "kb_search", + { + "query": "test", + "collection": "docs", + }, + ) mock_kb.search.assert_called_once_with( query="test", @@ -182,10 +224,14 @@ def test_search_nonexistent_collection(self, server, mock_kb): """Searching a missing collection returns an error.""" mock_kb.search.side_effect = ValueError("Collection 'missing' not found") - result = call_tool(server, "kb_search", { - "query": "test", - "collection": "missing", - }) + result = call_tool( + server, + "kb_search", + { + "query": "test", + "collection": "missing", + }, + ) assert result["status"] == "error" assert "not found" in result["error"] @@ -196,10 +242,14 @@ def test_search_rerank_score_forwarded(self, server, mock_kb): {**make_search_result("text", "file.md"), "rerank_score": 0.99}, ] - result = call_tool(server, "kb_search", { - "query": "test", - "collection": "docs", - }) + result = call_tool( + server, + "kb_search", + { + "query": "test", + "collection": "docs", + }, + ) assert result["results"][0]["rerank_score"] == 0.99 @@ -208,6 +258,7 @@ def test_search_rerank_score_forwarded(self, server, mock_kb): # kb_ingest (background job pattern) # --------------------------------------------------------------------------- + class TestIngest: def test_ingest_returns_started(self, server, mock_kb, tmp_path): @@ -215,9 +266,13 @@ def test_ingest_returns_started(self, server, mock_kb, tmp_path): folder = tmp_path / "new_docs" folder.mkdir() - result = call_tool(server, "kb_ingest", { - "folder_path": str(folder), - }) + result = call_tool( + server, + "kb_ingest", + { + "folder_path": str(folder), + }, + ) assert result["status"] == "started" assert "job_id" in result @@ -245,23 +300,31 @@ def test_ingest_with_file_types(self, server, mock_kb, tmp_path): async def run(): await call_tool_and_await_job( - server, "kb_ingest", { + server, + "kb_ingest", + { "folder_path": str(folder), "file_types": ["md", "txt"], - } + }, ) # New server always passes collection_name to kb.ingest mock_kb.ingest.assert_called_once_with( - folder, collection_name="docs2", file_types=["md", "txt"], + folder, + collection_name="docs2", + file_types=["md", "txt"], ) asyncio.run(run()) def test_ingest_folder_not_found(self, server): """Ingesting a nonexistent folder returns an error immediately.""" - result = call_tool(server, "kb_ingest", { - "folder_path": "/nonexistent/folder", - }) + result = call_tool( + server, + "kb_ingest", + { + "folder_path": "/nonexistent/folder", + }, + ) assert result["status"] == "error" assert "not found" in result["error"].lower() @@ -271,9 +334,13 @@ def test_ingest_not_a_directory(self, server, tmp_path): file_path = tmp_path / "not_a_dir.txt" file_path.write_text("I'm a file") - result = call_tool(server, "kb_ingest", { - "folder_path": str(file_path), - }) + result = call_tool( + server, + "kb_ingest", + { + "folder_path": str(file_path), + }, + ) assert result["status"] == "error" assert "Not a directory" in result["error"] @@ -308,9 +375,13 @@ def test_ingest_deconflicts_existing_collection(self, server, mock_kb, tmp_path) mock_kb.ingest.return_value = {"collection": "docs1", "files": 3, "chunks": 10} - result = call_tool(server, "kb_ingest", { - "folder_path": str(folder), - }) + result = call_tool( + server, + "kb_ingest", + { + "folder_path": str(folder), + }, + ) assert result["status"] == "started" assert result["collection"] == "docs1" @@ -331,9 +402,13 @@ def test_ingest_deconflicts_symlinked_collection(self, server, mock_kb, tmp_path (mock_kb.index_dir / "docs").symlink_to(base_dir) mock_kb.ingest.return_value = {"collection": "docs1", "files": 3, "chunks": 10} - result = call_tool(server, "kb_ingest", { - "folder_path": str(folder), - }) + result = call_tool( + server, + "kb_ingest", + { + "folder_path": str(folder), + }, + ) assert result["status"] == "started" assert result["collection"] == "docs1" @@ -347,6 +422,7 @@ def test_ingest_deconflicts_symlinked_collection(self, server, mock_kb, tmp_path # kb_job_status # --------------------------------------------------------------------------- + class TestJobStatus: def test_unknown_job(self, server): @@ -365,12 +441,17 @@ def test_running_job(self, server, mock_kb, tmp_path): def blocking_ingest(*args, **kwargs): time.sleep(10) return {"collection": "slow_docs", "files": 1, "chunks": 5} + mock_kb.ingest.side_effect = blocking_ingest async def run(): - initial = await _call_tool_async(server, "kb_ingest", { - "folder_path": str(folder), - }) + initial = await _call_tool_async( + server, + "kb_ingest", + { + "folder_path": str(folder), + }, + ) assert initial["status"] == "started" job_id = initial["job_id"] @@ -385,6 +466,7 @@ async def run(): # kb_append (background job pattern) # --------------------------------------------------------------------------- + class TestAppend: def test_append_returns_started(self, server, mock_kb, tmp_path): @@ -394,10 +476,14 @@ def test_append_returns_started(self, server, mock_kb, tmp_path): coll_dir.mkdir(exist_ok=True) (coll_dir / "index.faiss").write_text("fake") - result = call_tool(server, "kb_append", { - "collection": "docs", - "paths": [str(tmp_path)], - }) + result = call_tool( + server, + "kb_append", + { + "collection": "docs", + "paths": [str(tmp_path)], + }, + ) assert result["status"] == "started" assert "job_id" in result @@ -411,10 +497,12 @@ def test_append_job_completes(self, server, mock_kb, tmp_path): async def run(): initial, final = await call_tool_and_await_job( - server, "kb_append", { + server, + "kb_append", + { "collection": "docs", "paths": [str(tmp_path)], - } + }, ) assert final["status"] == "complete" assert final["result"]["chunks_added"] == 10 @@ -423,10 +511,14 @@ async def run(): def test_append_collection_not_found(self, server, mock_kb): """Appending to a nonexistent collection returns an error immediately.""" - result = call_tool(server, "kb_append", { - "collection": "nonexistent", - "paths": ["/some/path"], - }) + result = call_tool( + server, + "kb_append", + { + "collection": "nonexistent", + "paths": ["/some/path"], + }, + ) assert result["status"] == "error" assert "not found" in result["error"].lower() @@ -436,6 +528,7 @@ def test_append_collection_not_found(self, server, mock_kb): # kb_search — error handling (transport-closed diagnostics) # --------------------------------------------------------------------------- + class TestSearchErrorHandling: """Verify the server returns error responses (not crashes) for common failure modes that would otherwise cause 'transport closed'.""" @@ -443,12 +536,18 @@ class TestSearchErrorHandling: def test_search_httpx_connect_error(self, mock_kb): """Network unreachable during search returns error, not crash.""" import httpx + mock_kb.search.side_effect = httpx.ConnectError("Connection refused") server = create_knowledge_server(mock_kb) - result = call_tool(server, "kb_search", { - "query": "test", "collection": "docs", - }) + result = call_tool( + server, + "kb_search", + { + "query": "test", + "collection": "docs", + }, + ) assert result["status"] == "error" assert "Connection refused" in result["error"] @@ -456,12 +555,18 @@ def test_search_httpx_connect_error(self, mock_kb): def test_search_httpx_timeout(self, mock_kb): """Embedding API timeout during search returns error, not crash.""" import httpx + mock_kb.search.side_effect = httpx.ReadTimeout("Read timed out") server = create_knowledge_server(mock_kb) - result = call_tool(server, "kb_search", { - "query": "test", "collection": "docs", - }) + result = call_tool( + server, + "kb_search", + { + "query": "test", + "collection": "docs", + }, + ) assert result["status"] == "error" assert "timed out" in result["error"].lower() @@ -469,16 +574,24 @@ def test_search_httpx_timeout(self, mock_kb): def test_search_httpx_401(self, mock_kb): """Expired/invalid API key during search returns error, not crash.""" import httpx + mock_resp = MagicMock() mock_resp.status_code = 401 mock_kb.search.side_effect = httpx.HTTPStatusError( - "401 Unauthorized", request=MagicMock(), response=mock_resp, + "401 Unauthorized", + request=MagicMock(), + response=mock_resp, ) server = create_knowledge_server(mock_kb) - result = call_tool(server, "kb_search", { - "query": "test", "collection": "docs", - }) + result = call_tool( + server, + "kb_search", + { + "query": "test", + "collection": "docs", + }, + ) assert result["status"] == "error" assert "401" in result["error"] @@ -486,16 +599,24 @@ def test_search_httpx_401(self, mock_kb): def test_search_httpx_500(self, mock_kb): """Embedding API server error returns error, not crash.""" import httpx + mock_resp = MagicMock() mock_resp.status_code = 500 mock_kb.search.side_effect = httpx.HTTPStatusError( - "500 Internal Server Error", request=MagicMock(), response=mock_resp, + "500 Internal Server Error", + request=MagicMock(), + response=mock_resp, ) server = create_knowledge_server(mock_kb) - result = call_tool(server, "kb_search", { - "query": "test", "collection": "docs", - }) + result = call_tool( + server, + "kb_search", + { + "query": "test", + "collection": "docs", + }, + ) assert result["status"] == "error" assert "500" in result["error"] @@ -505,9 +626,14 @@ def test_search_runtime_error(self, mock_kb): mock_kb.search.side_effect = RuntimeError("FAISS segfault simulation") server = create_knowledge_server(mock_kb) - result = call_tool(server, "kb_search", { - "query": "test", "collection": "docs", - }) + result = call_tool( + server, + "kb_search", + { + "query": "test", + "collection": "docs", + }, + ) assert result["status"] == "error" assert "FAISS segfault" in result["error"] @@ -517,9 +643,14 @@ def test_search_os_error(self, mock_kb): mock_kb.search.side_effect = OSError("Permission denied: index.faiss") server = create_knowledge_server(mock_kb) - result = call_tool(server, "kb_search", { - "query": "test", "collection": "docs", - }) + result = call_tool( + server, + "kb_search", + { + "query": "test", + "collection": "docs", + }, + ) assert result["status"] == "error" assert "Permission denied" in result["error"] @@ -530,6 +661,7 @@ def test_search_os_error(self, mock_kb): # setup_runtime_kb # --------------------------------------------------------------------------- + class TestSetupRuntimeKb: def test_copies_collections(self, tmp_path): @@ -616,6 +748,7 @@ def test_does_not_overwrite_existing(self, tmp_path): # Regression: OpenMP duplicate library crash (transport closed) # --------------------------------------------------------------------------- + class TestOpenMPWorkaround: """Importing knowledge_server must set KMP_DUPLICATE_LIB_OK to prevent a fatal OpenMP crash when FAISS and sentence-transformers (PyTorch) @@ -636,6 +769,7 @@ def test_kmp_duplicate_lib_ok_is_set(self): # Regression: rerank schema default must match server config # --------------------------------------------------------------------------- + class TestRerankSchemaDefault: """The kb_search schema previously hardcoded 'default': True for the rerank parameter, causing agents to request reranking even when the @@ -665,10 +799,14 @@ def test_search_omitted_rerank_passes_none(self, mock_kb): """Omitting rerank passes None to kb.search, which resolves to kb.default_rerank internally.""" server = create_knowledge_server(mock_kb) - call_tool(server, "kb_search", { - "query": "test", - "collection": "docs", - }) + call_tool( + server, + "kb_search", + { + "query": "test", + "collection": "docs", + }, + ) mock_kb.search.assert_called_once_with( query="test", collection="docs", diff --git a/tests/test_registry_server.py b/tests/test_registry_server.py index a7e8cb2..81c5eb2 100644 --- a/tests/test_registry_server.py +++ b/tests/test_registry_server.py @@ -19,8 +19,12 @@ from mcp_helpers import call_tool_sync as call_tool -def make_spec(name="test_tool", description="A test tool", executable="echo hello", - dependencies=None): +def make_spec( + name="test_tool", + description="A test tool", + executable="echo hello", + dependencies=None, +): """Create a minimal valid tool spec.""" spec = { "name": name, @@ -59,7 +63,7 @@ def _make_server(tmp_path, tools=None): runtime_dir = tmp_path / "runtime" project_tools_dir = runtime_dir / "tools" project_tools_dir.mkdir(parents=True, exist_ok=True) - for spec in (tools or []): + for spec in tools or []: _write_tool(project_tools_dir, spec) reg = ToolRegistry( source_tools_dir=str(source_dir), @@ -72,6 +76,7 @@ def _make_server(tmp_path, tools=None): # Fixtures # --------------------------------------------------------------------------- + @pytest.fixture def server_and_registry(tmp_path): return _make_server(tmp_path) @@ -89,10 +94,13 @@ def registry(server_and_registry): @pytest.fixture def populated(tmp_path): - server, reg = _make_server(tmp_path, tools=[ - make_spec("tool_alpha", "Alpha tool", "python alpha.py"), - make_spec("tool_beta", "Beta data processor", "python beta.py"), - ]) + server, reg = _make_server( + tmp_path, + tools=[ + make_spec("tool_alpha", "Alpha tool", "python alpha.py"), + make_spec("tool_beta", "Beta data processor", "python beta.py"), + ], + ) return server, reg @@ -105,6 +113,7 @@ def populated_server(populated): # save_tool_spec # --------------------------------------------------------------------------- + class TestSaveToolSpec: def test_add_new_tool(self, server, registry): @@ -118,8 +127,16 @@ def test_add_new_tool(self, server, registry): def test_update_existing_tool(self, server, registry): """Saving a spec with the same name updates rather than duplicates.""" - call_tool(server, "save_tool_spec", {"spec": make_spec("my_tool", description="Version 1")}) - text = call_tool(server, "save_tool_spec", {"spec": make_spec("my_tool", description="Version 2")}) + call_tool( + server, + "save_tool_spec", + {"spec": make_spec("my_tool", description="Version 1")}, + ) + text = call_tool( + server, + "save_tool_spec", + {"spec": make_spec("my_tool", description="Version 2")}, + ) assert "updated" in text assert "1 tools" in text @@ -138,6 +155,7 @@ def test_accepts_stringified_spec(self, server, registry): """Some MCP clients (Claude Sonnet/Haiku 4.x) send nested-object args as JSON strings. The handler must accept both shapes.""" import json + spec = make_spec("stringy_tool") text = call_tool(server, "save_tool_spec", {"spec": json.dumps(spec)}) @@ -152,10 +170,29 @@ def test_rejects_invalid_stringified_spec(self, server, registry): assert "JSON object" in text +# --------------------------------------------------------------------------- +# install_skill +# --------------------------------------------------------------------------- + + +class TestInstallSkill: + + def test_install_skill_routes_and_reports_missing(self, server): + """install_skill is registered and reports a clean error when the + named skill isn't in any synced catalog.""" + text = call_tool( + server, + "install_skill", + {"skill_name": "zzz-definitely-not-a-real-skill-xyz"}, + ) + assert "No catalog skill" in text + + # --------------------------------------------------------------------------- # save_skill # --------------------------------------------------------------------------- + class TestSaveSkill: def test_add_new_skill_creates_files_and_indexes(self, tmp_path): @@ -168,6 +205,7 @@ def test_add_new_skill_creates_files_and_indexes(self, tmp_path): """ server, reg, kb = _make_server_with_kb(tmp_path) from dsagt.registry import SkillRegistry as _SR + skill_reg = _SR(runtime_dir=str(tmp_path / "runtime"), kb=kb) before = len(skill_reg.list_skills()) @@ -192,14 +230,22 @@ def test_update_existing_skill_preserves_body_when_omitted(self, tmp_path): """Saving a spec for an existing skill without body keeps the body.""" server, reg, kb = _make_server_with_kb(tmp_path) first_body = "# orig\n\nOriginal workflow body.\n" - call_tool(server, "save_skill", { - "spec": {"name": "wf", "description": "v1"}, - "body": first_body, - }) + call_tool( + server, + "save_skill", + { + "spec": {"name": "wf", "description": "v1"}, + "body": first_body, + }, + ) # Update the description only — body should be preserved. - text = call_tool(server, "save_skill", { - "spec": {"name": "wf", "description": "v2 description"}, - }) + text = call_tool( + server, + "save_skill", + { + "spec": {"name": "wf", "description": "v2 description"}, + }, + ) assert "updated" in text skill_md = tmp_path / "runtime" / "skills" / "wf" / "SKILL.md" content = skill_md.read_text() @@ -209,11 +255,15 @@ def test_update_existing_skill_preserves_body_when_omitted(self, tmp_path): def test_save_skill_writes_reference_files(self, tmp_path): """reference_files dict lands as additional files in the skill dir.""" server, reg, kb = _make_server_with_kb(tmp_path) - text = call_tool(server, "save_skill", { - "spec": {"name": "with_template", "description": "Has a template"}, - "body": "# with_template\n\nUses template.json.\n", - "reference_files": {"template.json": '{"foo": "bar"}\n'}, - }) + text = call_tool( + server, + "save_skill", + { + "spec": {"name": "with_template", "description": "Has a template"}, + "body": "# with_template\n\nUses template.json.\n", + "reference_files": {"template.json": '{"foo": "bar"}\n'}, + }, + ) assert "added" in text skill_dir = tmp_path / "runtime" / "skills" / "with_template" assert (skill_dir / "SKILL.md").exists() @@ -231,6 +281,7 @@ def test_save_skill_string_encoded_spec(self, tmp_path): # get_registry # --------------------------------------------------------------------------- + class TestGetRegistry: def test_empty_registry(self, server): @@ -253,6 +304,7 @@ def test_populated_registry(self, populated_server, populated): # search_registry # --------------------------------------------------------------------------- + class TestSearchRegistryNoKB: """search_registry with no KB configured. @@ -266,12 +318,16 @@ class TestSearchRegistryNoKB: def test_exact_name_lookup_works_without_kb(self, populated_server): """tool_name lookup is KB-free and must keep working.""" - text = call_tool(populated_server, "search_registry", {"tool_name": "tool_alpha"}) + text = call_tool( + populated_server, "search_registry", {"tool_name": "tool_alpha"} + ) assert "tool_alpha" in text def test_exact_name_miss_without_kb(self, populated_server): """tool_name with a non-existent name returns a clean 'no tool' message.""" - text = call_tool(populated_server, "search_registry", {"tool_name": "nonexistent"}) + text = call_tool( + populated_server, "search_registry", {"tool_name": "nonexistent"} + ) assert "No tool named 'nonexistent'" in text def test_query_search_without_kb_returns_helpful_error(self, populated_server): @@ -291,6 +347,7 @@ def test_empty_query_without_kb_returns_helpful_error(self, populated_server): # read_file # --------------------------------------------------------------------------- + class TestReadFile: def test_read_success(self, server, tmp_path): @@ -311,31 +368,44 @@ def test_read_missing_file(self, server): # run_command # --------------------------------------------------------------------------- + class TestRunCommand: def test_success(self, server): """Running a valid command returns its output.""" - text = call_tool(server, "run_command", { - "command": "echo", - "args": ["hello"], - }) + text = call_tool( + server, + "run_command", + { + "command": "echo", + "args": ["hello"], + }, + ) assert "hello" in text assert "Return code: 0" in text def test_command_not_found(self, server): """Running a nonexistent command returns not found error.""" - text = call_tool(server, "run_command", { - "command": "nonexistent_command_xyz", - }) + text = call_tool( + server, + "run_command", + { + "command": "nonexistent_command_xyz", + }, + ) assert "not found" in text def test_timeout(self, server): """A command that exceeds the timeout reports timeout.""" - text = call_tool(server, "run_command", { - "command": "sleep", - "args": ["30"], - "timeout": 0.1, - }) + text = call_tool( + server, + "run_command", + { + "command": "sleep", + "args": ["30"], + "timeout": 0.1, + }, + ) assert "timed out" in text @@ -343,6 +413,7 @@ def test_timeout(self, server): # save_tool_spec — dependency installation # --------------------------------------------------------------------------- + class TestSaveToolSpecDependencies: @patch("dsagt.commands.registry_server.subprocess.run") @@ -358,8 +429,15 @@ def test_deps_installed_on_save(self, mock_run, server, registry): assert "Successfully installed" in text mock_run.assert_called_once() cmd = mock_run.call_args[0][0] - assert cmd == ["uv", "pip", "install", "--python", sys.executable, - "pandas>=2.0", "numpy"] + assert cmd == [ + "uv", + "pip", + "install", + "--python", + sys.executable, + "pandas>=2.0", + "numpy", + ] @patch("dsagt.commands.registry_server.subprocess.run") def test_deps_failure_still_saves_spec(self, mock_run, server, registry): @@ -419,15 +497,19 @@ def test_uv_not_found(self, mock_run, server): # install_dependencies # --------------------------------------------------------------------------- + class TestInstallDependencies: @patch("dsagt.commands.registry_server.subprocess.run") def test_install_all(self, mock_run, tmp_path): """install_dependencies with no tool_name installs all unique deps.""" - server, reg = _make_server(tmp_path, tools=[ - make_spec("tool_a", dependencies=["pandas", "numpy"]), - make_spec("tool_b", dependencies=["numpy", "scipy"]), - ]) + server, reg = _make_server( + tmp_path, + tools=[ + make_spec("tool_a", dependencies=["pandas", "numpy"]), + make_spec("tool_b", dependencies=["numpy", "scipy"]), + ], + ) mock_run.return_value = MagicMock(returncode=0, stdout="ok", stderr="") text = call_tool(server, "install_dependencies", {}) @@ -435,16 +517,27 @@ def test_install_all(self, mock_run, tmp_path): assert "tool_a" in text assert "tool_b" in text cmd = mock_run.call_args[0][0] - assert cmd == ["uv", "pip", "install", "--python", sys.executable, - "pandas", "numpy", "scipy"] + assert cmd == [ + "uv", + "pip", + "install", + "--python", + sys.executable, + "pandas", + "numpy", + "scipy", + ] @patch("dsagt.commands.registry_server.subprocess.run") def test_install_single_tool(self, mock_run, tmp_path): """install_dependencies with tool_name targets only that tool.""" - server, reg = _make_server(tmp_path, tools=[ - make_spec("tool_a", dependencies=["pandas"]), - make_spec("tool_b", dependencies=["scipy"]), - ]) + server, reg = _make_server( + tmp_path, + tools=[ + make_spec("tool_a", dependencies=["pandas"]), + make_spec("tool_b", dependencies=["scipy"]), + ], + ) mock_run.return_value = MagicMock(returncode=0, stdout="ok", stderr="") text = call_tool(server, "install_dependencies", {"tool_name": "tool_b"}) @@ -471,6 +564,7 @@ def test_tools_without_deps(self, tmp_path): # KB-backed tool indexing and search # --------------------------------------------------------------------------- + def _make_server_with_kb(tmp_path, tools=None): """Create (server, registry, kb) with a real local-embedding KnowledgeBase. @@ -484,7 +578,7 @@ def _make_server_with_kb(tmp_path, tools=None): runtime_dir = tmp_path / "runtime" project_tools_dir = runtime_dir / "tools" project_tools_dir.mkdir(parents=True, exist_ok=True) - for spec in (tools or []): + for spec in tools or []: _write_tool(project_tools_dir, spec) kb = KnowledgeBase( @@ -515,10 +609,16 @@ def test_save_tool_indexes_into_kb(self, tmp_path): server, reg, kb = _make_server_with_kb(tmp_path) - call_tool(server, "save_tool_spec", {"spec": make_spec( - name="csv_filter", - description="Filter CSV rows by column value", - )}) + call_tool( + server, + "save_tool_spec", + { + "spec": make_spec( + name="csv_filter", + description="Filter CSV rows by column value", + ) + }, + ) results = kb.search("filter", collection=TOOL_REGISTRY_COLLECTION) assert len(results) > 0 @@ -542,12 +642,20 @@ def test_search_registry_by_name_not_found(self, tmp_path): def test_search_registry_semantic(self, tmp_path): """Semantic search finds tools by description similarity.""" server, reg, kb = _make_server_with_kb(tmp_path) - call_tool(server, "save_tool_spec", {"spec": make_spec( - name="csv_filter", - description="Filter and remove rows from a CSV spreadsheet based on column values", - )}) + call_tool( + server, + "save_tool_spec", + { + "spec": make_spec( + name="csv_filter", + description="Filter and remove rows from a CSV spreadsheet based on column values", + ) + }, + ) - text = call_tool(server, "search_registry", {"query": "delete rows from tabular data"}) + text = call_tool( + server, "search_registry", {"query": "delete rows from tabular data"} + ) assert "csv_filter" in text def test_search_registry_by_tag(self, tmp_path): @@ -562,7 +670,9 @@ def test_search_registry_by_tag(self, tmp_path): spec_other["tags"] = ["data_processing"] call_tool(server, "save_tool_spec", {"spec": spec_other}) - text = call_tool(server, "search_registry", {"query": "tool", "tag": "genomics"}) + text = call_tool( + server, "search_registry", {"query": "tool", "tag": "genomics"} + ) assert "fastp" in text def test_reindex_all(self, tmp_path): @@ -571,7 +681,9 @@ def test_reindex_all(self, tmp_path): server, reg, kb = _make_server_with_kb( tmp_path, - tools=[make_spec(name="preexisting", description="Already registered tool")], + tools=[ + make_spec(name="preexisting", description="Already registered tool") + ], ) # Skills were copied to runtime on init but not indexed (KB was empty) @@ -592,9 +704,12 @@ def test_no_kb_query_search_returns_explicit_error(self, tmp_path): and produced dramatically worse search results without telling anyone. """ - server, reg = _make_server(tmp_path, tools=[ - make_spec(name="csv_filter", description="Filter CSV rows"), - ]) + server, reg = _make_server( + tmp_path, + tools=[ + make_spec(name="csv_filter", description="Filter CSV rows"), + ], + ) text = call_tool(server, "search_registry", {"query": "csv"}) assert "csv_filter" not in text # the substring match must NOT happen diff --git a/tests/test_skills_catalog.py b/tests/test_skills_catalog.py new file mode 100644 index 0000000..d1006bc --- /dev/null +++ b/tests/test_skills_catalog.py @@ -0,0 +1,196 @@ +"""Unit tests for the external skill catalog (fetch / index / install) and +the native-skill mirror. No network: ``clone_github`` is monkeypatched and +the KB is a lightweight fake that records ``add_entries`` calls.""" + +import json + +import pytest + +from dsagt.agents.base import ( + _NATIVE_DESCRIPTION_CAP, + _SKILL_MANIFEST, + _mirror_skills_to, +) +from dsagt.commands import skills_catalog as sc +from dsagt.registry import CATALOG_COLLECTION_PREFIX, catalog_collection + + +def _mkskill(d, name, desc="a short description"): + d.mkdir(parents=True, exist_ok=True) + (d / "SKILL.md").write_text( + f"---\nname: {name}\ndescription: {desc}\n---\n# {name}\nbody\n" + ) + return d + + +# --------------------------------------------------------------------------- +# slug + source resolution +# --------------------------------------------------------------------------- + + +def test_repo_slug_is_collection_safe(): + slug = sc._repo_slug("https://github.com/K-Dense-AI/scientific-agent-skills") + assert slug == "k-dense-ai-scientific-agent-skills" + assert sc._repo_slug("git@github.com:Foo/Bar.git") == "foo-bar" + + +def test_resolve_source_known_url_and_shorthand(): + assert ( + sc.resolve_source("scientific")["url"] == sc.KNOWN_SOURCES["scientific"]["url"] + ) + assert ( + sc.resolve_source("https://github.com/a/b")["url"] == "https://github.com/a/b" + ) + assert sc.resolve_source("a/b")["url"] == "https://github.com/a/b" + with pytest.raises(ValueError): + sc.resolve_source("not-a-known-name") + + +# --------------------------------------------------------------------------- +# discovery +# --------------------------------------------------------------------------- + + +def test_discover_skill_dirs_flat_and_nested(tmp_path): + root = tmp_path / "skills" + _mkskill(root / "flat", "flat") + _mkskill(root / "domain" / "nested", "nested") + # A dir whose SKILL.md has no name is ignored. + bad = root / "noname" + bad.mkdir(parents=True) + (bad / "SKILL.md").write_text("---\ndescription: x\n---\nbody") + names = sorted(p.name for p in sc._discover_skill_dirs(tmp_path)) + assert names == ["flat", "nested"] + + +# --------------------------------------------------------------------------- +# find + install +# --------------------------------------------------------------------------- + + +def test_find_catalog_skill_and_ambiguity(tmp_path): + cache = tmp_path / "cache" + _mkskill(cache / "srcA" / "skills" / "alpha", "alpha") + found = sc.find_catalog_skill("alpha", cache_dir=cache) + assert found.name == "alpha" + with pytest.raises(LookupError): + sc.find_catalog_skill("missing", cache_dir=cache) + # Same skill name in a second source → ambiguous. + _mkskill(cache / "srcB" / "skills" / "alpha", "alpha") + with pytest.raises(LookupError): + sc.find_catalog_skill("alpha", cache_dir=cache) + + +def test_install_into_project_copies_subdirs(tmp_path): + cache = tmp_path / "cache" + skill = _mkskill(cache / "src" / "vasp-to-isaac", "vasp-to-isaac") + (skill / "scripts").mkdir() + (skill / "scripts" / "run.py").write_text("print(1)") + (skill / "references").mkdir() + (skill / "references" / "spec.md").write_text("# spec") + + proj = tmp_path / "proj" + proj.mkdir() + info = sc.install_into_project("vasp-to-isaac", proj, cache_dir=cache) + dest = proj / "skills" / "vasp-to-isaac" + assert info["action"] == "added" + assert (dest / "SKILL.md").exists() + assert (dest / "scripts" / "run.py").exists() + assert (dest / "references" / "spec.md").exists() + # Re-install reports "updated". + assert ( + sc.install_into_project("vasp-to-isaac", proj, cache_dir=cache)["action"] + == "updated" + ) + + +# --------------------------------------------------------------------------- +# sync_source (mocked clone + fake KB) +# --------------------------------------------------------------------------- + + +class _FakeKB: + def __init__(self, index_dir): + self.index_dir = index_dir + self.collections = [] + self.adds = [] # (collection, metadatas) + + def add_entries(self, texts, collection, metadatas=None): + self.adds.append((collection, metadatas)) + if collection not in self.collections: + self.collections.append(collection) + return {"collection": collection, "entries_added": len(texts)} + + +def test_sync_source_indexes_per_source_collection(tmp_path, monkeypatch): + # Fake clone: populate dest/ with two skills. + def fake_clone(url, dest, branch="main", include=None): + sub = include[0] if include else "" + base = dest / sub if sub else dest + _mkskill(base / "s1", "s1") + _mkskill(base / "s2", "s2") + + monkeypatch.setattr("dsagt.commands.setup_core_kb.clone_github", fake_clone) + + kb = _FakeKB(tmp_path / "kb_index") + cache = tmp_path / "cache" + stats = sc.sync_source( + {"url": "https://github.com/x/y", "branch": "main", "subdir": "skills"}, + kb=kb, + cache_dir=cache, + ) + slug = sc._repo_slug("https://github.com/x/y") + coll = catalog_collection(slug) + assert stats["discovered"] == 2 and stats["indexed"] == 2 + assert coll.startswith(CATALOG_COLLECTION_PREFIX) + added_coll, metas = kb.adds[-1] + assert added_coll == coll + assert all(m["source"] == f"catalog:{slug}" for m in metas) + assert {m["skill_name"] for m in metas} == {"s1", "s2"} + + +# --------------------------------------------------------------------------- +# native mirror +# --------------------------------------------------------------------------- + + +def test_mirror_manifest_preserves_user_skills_and_reaps(tmp_path): + target = tmp_path / ".claude" / "skills" + target.mkdir(parents=True) + # A user-authored skill dsagt must never touch. + _mkskill(target / "user-skill", "user-skill") + + bundled = _mkskill(tmp_path / "bundled" / "skill-creator", "skill-creator") + proj = _mkskill(tmp_path / "proj" / "alpha", "alpha") + + _mirror_skills_to(target, [bundled, proj]) + assert sorted(p.name for p in target.iterdir() if p.is_dir()) == [ + "alpha", + "skill-creator", + "user-skill", + ] + manifest = json.loads((target / _SKILL_MANIFEST).read_text()) + assert manifest == ["alpha", "skill-creator"] + assert "user-skill" not in manifest + + # Re-run with skill-creator gone → reaped; user-skill preserved. + _mirror_skills_to(target, [proj]) + assert sorted(p.name for p in target.iterdir() if p.is_dir()) == [ + "alpha", + "user-skill", + ] + + +def test_mirror_truncates_long_description(tmp_path): + long_desc = "x" * (_NATIVE_DESCRIPTION_CAP + 500) + src = _mkskill(tmp_path / "src" / "big", "big", desc=long_desc) + target = tmp_path / ".claude" / "skills" + _mirror_skills_to(target, [src]) + + import yaml + + mirrored = (target / "big" / "SKILL.md").read_text() + front = yaml.safe_load(mirrored.split("---", 2)[1]) + assert len(front["description"]) <= _NATIVE_DESCRIPTION_CAP + # Source untouched. + assert len((src / "SKILL.md").read_text()) > _NATIVE_DESCRIPTION_CAP diff --git a/use_cases/isaac_skills_demo/README.md b/use_cases/isaac_skills_demo/README.md new file mode 100644 index 0000000..200de44 --- /dev/null +++ b/use_cases/isaac_skills_demo/README.md @@ -0,0 +1,159 @@ +# DSAgt Demo: Skill-Driven VASP → ISAAC Conversion + +A lightweight mock of the [`isaac_vasp`](../isaac_vasp/) workflow, built specifically to **vet the skill-management feature**. Instead of shipping a hand-written converter skill, this walkthrough has the agent **discover, install, and author skills** — and surfaces them through Claude Code's *native* skill discovery. + +It uses tiny **mock VASP outputs** (`mock_data/`, a few KB) so the whole thing runs in seconds with no DFT, no NERSC, and no 32 MB OUTCAR files. + +## What this demonstrates (the new functionality) + +- **External skill catalog** — pull Agent-Skills from GitHub repos (default: K-Dense `scientific-agent-skills`, 140+ skills) into a searchable catalog that is **not** loaded into the agent's context. +- **`search_skills`** spanning installed skills *and* the catalog (catalog hits marked `[catalog]`). +- **`install_skill`** — move a chosen catalog skill into the project, then into Claude's native `.claude/skills/`. +- **`add_skill_source`** — the agent enables another source (e.g. `anthropic`) via an MCP tool call. +- **`skill-creator`** — the bundled meta-skill scaffolds a brand-new `vasp-to-isaac-mock` skill from the Anthropic template. +- **Native mirror** — installed + bundled skills appear under `.claude/skills//` (tracked by `.dsagt-managed.json`), so Claude auto-invokes them with no MCP round-trip. + +The two tiers in one sentence: **catalog = searchable but not in context; installed = native and auto-invoked.** + +## Prerequisites + +- DSAgt installed (`uv sync --all-groups`) +- Claude Code installed (`npm i -g @anthropic-ai/claude-code`) +- Valid embedding credentials (so `search_skills` works) — `EMBEDDING_*` in your shell or project config +- Git installed (catalog sync uses a shallow clone) + +## Setup + +### 1. Build the core KB + default skill catalog + +```bash +dsagt setup-kb +``` + +This indexes the bundled tools/skills **and** clones + indexes the default skill source (K-Dense scientific). To skip the catalog, pass `--no-skill-catalog`. To go faster, you can defer the catalog and add it per-project later (Step A). + +### 2. Initialize a project + +```bash +dsagt init isaac-skills-demo --agent claude +``` + +The generated `dsagt_config.yaml` already carries a `skills:` block with the `scientific` source enabled and `populate_native: true`. + +### 3. Start the session + +```bash +dsagt start isaac-skills-demo +``` + +`dsagt start` mirrors installed + bundled skills into `.claude/skills/` **before** launching Claude, so the bundled `skill-creator` is already discoverable. Copy the mock data into the project first so the agent can reach it: + +```bash +cp -r use_cases/isaac_skills_demo/mock_data ~/dsagt-projects/isaac-skills-demo/mock_data +``` + +## Execution + +### A. Browse and install a skill from the catalog + +First confirm the catalog is searchable (these are NOT in Claude's context — they live in the KB): + +```bash +dsagt skills list isaac-skills-demo --catalog +``` + +You should see a `skills_catalog__k-dense-ai-scientific-agent-skills` collection. Now have the agent search it: + +```text +Search the skill catalog for a skill that helps work with VASP, pymatgen, or DFT materials data. List what you find and which are installable from the catalog. +``` + +The agent calls `search_skills(...)`; catalog hits are marked `[catalog · install_skill to add]`. Install one: + +```text +Install the most relevant materials/DFT skill you found from the catalog into this project. +``` + +The agent calls `install_skill(skill_name=...)`. + +**Verify:** + +```bash +ls ~/dsagt-projects/isaac-skills-demo/skills/ +``` + +The installed skill directory (with any `scripts/` and `references/`) is now under the project's `skills/`. + +### B. Add a second catalog source via the agent + +```text +Enable the "anthropic" skill source so we also have the official Anthropic skills available, then tell me how many skills that added to the catalog. +``` + +The agent calls `add_skill_source(source="anthropic")` — it clones + indexes that repo into its own catalog collection and persists the source to `dsagt_config.yaml`. Confirm: + +```text +List the skill sources currently configured and synced. +``` + +(`list_skill_sources`.) + +### C. Author the project-specific skill with `skill-creator` + +The real `isaac_vasp` ships a hand-written `vasp-to-isaac` converter. Here we let the agent build a mock one using the bundled meta-skill: + +```text +Use the skill-creator skill to author a new project skill named "vasp-to-isaac-mock". It should: read a mock VASP calculation directory (POSCAR + INCAR + OUTCAR) under mock_data/mock_slab/, extract the final energy, atom count, and whether it's a slab relaxation (NSW > 0), and emit a small ISAAC-style JSON record. Use mock_data/expected_isaac_record.json as the shape to target. Save it with save_skill. +``` + +The agent reads `skill-creator`'s template (`references/SKILL_template.md`) and spec, then writes `/skills/vasp-to-isaac-mock/`. + +**Verify:** + +```bash +cat ~/dsagt-projects/isaac-skills-demo/skills/vasp-to-isaac-mock/SKILL.md +``` + +### D. Run the new skill on the mock data + +```text +Invoke the vasp-to-isaac-mock workflow on mock_data/mock_slab/ and write the result to audit/mock_slab_isaac.json. Then diff its structure against mock_data/expected_isaac_record.json and report any missing fields. +``` + +### E. Inspect both tiers + +```bash +# Installed (native-discoverable) skills — bundled + project + anything installed +dsagt skills list isaac-skills-demo + +# The native mirror Claude actually reads, plus the dsagt-managed manifest +ls ~/dsagt-projects/isaac-skills-demo/.claude/skills/ +cat ~/dsagt-projects/isaac-skills-demo/.claude/skills/.dsagt-managed.json +``` + +The manifest lists only the skills **dsagt** placed (`skill-creator`, the installed catalog skill, `vasp-to-isaac-mock`). Any skill you hand-create under `.claude/skills/` is never touched. + +To pick up newly-mirrored skills as native `/commands`, restart Claude (`dsagt start isaac-skills-demo` again, then relaunch). + +## Post-Conditions + +1. The KB holds per-source catalog collections (`skills_catalog__*`) for `scientific` (+ `anthropic` after Step B), searchable via `search_skills` but absent from Claude's context. +2. A catalog skill was installed into `/skills/` and mirrored into `.claude/skills/`. +3. A new `vasp-to-isaac-mock` skill, authored via `skill-creator`, exists and is native-discoverable. +4. `audit/mock_slab_isaac.json` was produced from the mock VASP directory and matches the ISAAC shape. +5. `.claude/skills/.dsagt-managed.json` tracks exactly the dsagt-placed skills. + +## Cleanup + +```bash +dsagt stop isaac-skills-demo +dsagt rm isaac-skills-demo # add -y to skip the prompt +``` + +The shared catalog cache lives at `~/dsagt-projects/.skill_sources/` and is reused across projects; delete it to force a fresh clone on the next `setup-kb` / `add_skill_source`. + +## Notes + +- `mock_data/` is intentionally tiny and **not** real DFT output — the OUTCAR is a truncated stub. It exists only to exercise the conversion skill's parse-and-emit path. +- If your embedding backend isn't configured, `search_skills` degrades to the "requires a configured knowledge base" message; `install_skill` and the native mirror still work (they're pure filesystem operations). +- Swap in other catalogs the same way: `dsagt skills add isaac-skills-demo antigravity` (or `composio`, or any `https://github.com/owner/repo`). diff --git a/use_cases/isaac_skills_demo/mock_data/expected_isaac_record.json b/use_cases/isaac_skills_demo/mock_data/expected_isaac_record.json new file mode 100644 index 0000000..5d03192 --- /dev/null +++ b/use_cases/isaac_skills_demo/mock_data/expected_isaac_record.json @@ -0,0 +1,63 @@ +{ + "isaac_record_version": "1.05", + "record_id": "mock-iro2-110-slab-0001", + "record_type": "dft_calculation", + "record_domain": "materials", + "source_type": "MOCK", + "_note": "Reference SHAPE/values for the isaac_skills_demo. Derived from the tiny mock_slab/ stub, NOT a real DFT run.", + "timestamps": { + "created_utc": "2026-06-01T10:00:00Z" + }, + "sample": { + "material": { + "name": "IrO2 (110) slab", + "formula": "IrO2", + "provenance": "mock" + }, + "sample_form": "surface_slab" + }, + "system": { + "domain": "materials", + "technique": "DFT", + "instrument": { + "instrument_type": "compute", + "instrument_name": "VASP", + "vendor_or_project": "VASP" + }, + "configuration": { + "code_version": "6.3.2", + "compute_architecture": "cpu", + "cores": null + } + }, + "computation": { + "method": { + "family": "DFT", + "functional_class": "GGA", + "functional_name": "PBE", + "pseudopotential": "PAW", + "cutoff_eV": 520.0, + "spin_treatment": "collinear", + "hubbard_u": {"Ir": 4.0} + }, + "relaxation": { + "is_relaxation": true, + "nsw": 50, + "converged": true, + "ionic_steps": 50 + } + }, + "results": { + "total_energy_eV": -132.84210000, + "energy_sigma0_eV": -132.84210000, + "n_atoms": 12, + "n_species": {"Ir": 4, "O": 8}, + "total_magnetization_muB": 8.0123, + "max_residual_force_eV_per_A": 0.011 + }, + "assets": [ + {"name": "POSCAR", "role": "structure_input"}, + {"name": "INCAR", "role": "calc_parameters"}, + {"name": "OUTCAR", "role": "calc_output"} + ] +} diff --git a/use_cases/isaac_skills_demo/mock_data/mock_slab/INCAR b/use_cases/isaac_skills_demo/mock_data/mock_slab/INCAR new file mode 100644 index 0000000..4c80632 --- /dev/null +++ b/use_cases/isaac_skills_demo/mock_data/mock_slab/INCAR @@ -0,0 +1,18 @@ +# MOCK INCAR — slab ionic relaxation (NSW > 0 marks this as a slab calc) +SYSTEM = IrO2(110) mock slab +ISTART = 0 +ICHARG = 2 +ENCUT = 520 +ISMEAR = 0 +SIGMA = 0.05 +IBRION = 2 +NSW = 50 +ISIF = 2 +EDIFF = 1E-5 +EDIFFG = -0.02 +ISPIN = 2 +LDAU = .TRUE. +LDAUTYPE = 2 +LDAUL = 2 -1 +LDAUU = 4.0 0.0 +GGA = PE diff --git a/use_cases/isaac_skills_demo/mock_data/mock_slab/OUTCAR b/use_cases/isaac_skills_demo/mock_data/mock_slab/OUTCAR new file mode 100644 index 0000000..4640af5 --- /dev/null +++ b/use_cases/isaac_skills_demo/mock_data/mock_slab/OUTCAR @@ -0,0 +1,42 @@ + MOCK OUTCAR — heavily truncated stub for the skills demo. NOT real VASP output. + Only the few lines a converter typically greps for are kept; the SCF/eigenvalue + blocks that make a real OUTCAR ~250k lines are omitted on purpose. + + vasp.6.3.2 mock build + executed on LinuxIFC date 2026.06.01 10:00:00 + + INCAR: + ENCUT = 520.0 + ISPIN = 2 + NSW = 50 + LDAUU = 4.000 0.000 + + energy without entropy= -123.45678901 energy(sigma->0) = -123.40000000 + ... + FREE ENERGIE OF THE ION-ELECTRON SYSTEM (eV) + --------------------------------------------------- + free energy TOTEN = -132.10000000 eV (ionic step 1) + + energy without entropy= -131.98000000 energy(sigma->0) = -131.99000000 + + FREE ENERGIE OF THE ION-ELECTRON SYSTEM (eV) + --------------------------------------------------- + free energy TOTEN = -132.84210000 eV (ionic step 50, converged) + + energy without entropy= -132.80000000 energy(sigma->0) = -132.84210000 + + POSITION TOTAL-FORCE (eV/Angst) + ----------------------------------------------------------------------------------- + 0.00000 0.00000 7.04000 0.000000 0.000000 -0.004000 + 3.19250 3.24900 7.04000 0.000000 0.000000 0.003000 + 0.00000 0.00000 9.90000 0.000000 0.000000 0.011000 + ----------------------------------------------------------------------------------- + + magnetization (x) + number of electron 192.0000000 magnetization 8.0123000 + + General timing and accounting informations for this job: + ======================================================== + Total CPU time used (sec): 4210.123 + Elapsed time (sec): 1130.456 + reached required accuracy - stopping structural energy minimisation diff --git a/use_cases/isaac_skills_demo/mock_data/mock_slab/POSCAR b/use_cases/isaac_skills_demo/mock_data/mock_slab/POSCAR new file mode 100644 index 0000000..5101962 --- /dev/null +++ b/use_cases/isaac_skills_demo/mock_data/mock_slab/POSCAR @@ -0,0 +1,21 @@ +IrO2 (110) mock slab [MOCK — not real DFT input] +1.0 + 6.3850000000 0.0000000000 0.0000000000 + 0.0000000000 6.4980000000 0.0000000000 + 0.0000000000 0.0000000000 22.0000000000 + Ir O + 4 8 +Selective dynamics +Direct + 0.0000 0.0000 0.3200 F F F + 0.5000 0.5000 0.3200 F F F + 0.0000 0.0000 0.4500 T T T + 0.5000 0.5000 0.4500 T T T + 0.2500 0.2500 0.3850 T T T + 0.7500 0.7500 0.3850 T T T + 0.2500 0.7500 0.3850 T T T + 0.7500 0.2500 0.3850 T T T + 0.2500 0.2500 0.5100 T T T + 0.7500 0.7500 0.5100 T T T + 0.2500 0.7500 0.5100 T T T + 0.7500 0.2500 0.5100 T T T From c7ed5fd06bd0d60747256d97338529542dfbc894 Mon Sep 17 00:00:00 2001 From: aarontuor Date: Fri, 12 Jun 2026 10:54:34 -0700 Subject: [PATCH 2/6] fix(skills): persist CLI-added sources to config; demo prompt script First-pass vetting of use_cases/isaac_skills_demo against the real K-Dense catalog surfaced one bug: `dsagt skills add ` synced + indexed the catalog but never wrote the source into dsagt_config.yaml, so a later config-driven `dsagt skills sync` would forget it. Only the `add_skill_source` MCP tool persisted. - Move the persist logic into a shared `persist_source_to_config` helper in skills_catalog.py; call it from both the CLI add-source path and the knowledge-server `add_skill_source` handler (removes the duplicated `_persist_skill_source`). - Regression test for the helper (append + dedupe + missing-config no-op). - Add use_cases/isaac_skills_demo/PROMPTS.md: the 8-prompt hand-pass script plus first-pass results (init/mirror, 146-skill sync, search, install pymatgen, native re-mirror, add anthropic all verified). Co-Authored-By: Claude Opus 4.8 (1M context) --- src/dsagt/commands/cli.py | 8 +++ src/dsagt/commands/knowledge_server.py | 28 ++------ src/dsagt/commands/skills_catalog.py | 25 +++++++ tests/test_skills_catalog.py | 16 +++++ use_cases/isaac_skills_demo/PROMPTS.md | 95 ++++++++++++++++++++++++++ 5 files changed, 151 insertions(+), 21 deletions(-) create mode 100644 use_cases/isaac_skills_demo/PROMPTS.md diff --git a/src/dsagt/commands/cli.py b/src/dsagt/commands/cli.py index f935846..0e0b59c 100644 --- a/src/dsagt/commands/cli.py +++ b/src/dsagt/commands/cli.py @@ -387,6 +387,8 @@ def _cmd_skills(args): from dsagt.commands.skills_catalog import ( KNOWN_SOURCES, install_into_project, + persist_source_to_config, + resolve_source, sync_source, ) from dsagt.registry import ( @@ -434,11 +436,17 @@ def _cmd_skills(args): or target.count("/") == 1 ) if is_source: + spec = resolve_source(target) + if target in KNOWN_SOURCES: + spec.setdefault("name", target) kb = kb_from_config(config) try: stats = sync_source(target, kb=kb) finally: kb.close() + persist_source_to_config( + pdir, {"name": spec.get("name", stats["slug"]), **spec} + ) print(f"Added source {stats['url']}: {stats['indexed']} skill(s) indexed.") print( "Run 'dsagt start' to mirror an installed skill natively, or " diff --git a/src/dsagt/commands/knowledge_server.py b/src/dsagt/commands/knowledge_server.py index a06716c..92643b6 100644 --- a/src/dsagt/commands/knowledge_server.py +++ b/src/dsagt/commands/knowledge_server.py @@ -569,25 +569,6 @@ async def _handle_kb_dismiss_suggestion( # --------------------------------------------------------------------------- -def _persist_skill_source(runtime_dir: Path, spec: dict) -> None: - """Append a resolved source to ``skills.sources`` in the project config. - - Dedupes by URL. No-op if the config file is missing (e.g. tests with a - bare runtime dir) — the catalog is still indexed either way. - """ - cfg_path = runtime_dir / "dsagt_config.yaml" - if not cfg_path.exists(): - return - cfg = yaml.safe_load(cfg_path.read_text()) or {} - skills = cfg.setdefault("skills", {}) - sources = skills.setdefault("sources", []) - if not any(s.get("url") == spec.get("url") for s in sources): - sources.append( - {k: spec[k] for k in ("name", "url", "branch", "subdir") if k in spec} - ) - cfg_path.write_text(yaml.dump(cfg, default_flow_style=False, sort_keys=False)) - - async def _handle_add_skill_source( arguments: dict, *, @@ -595,7 +576,12 @@ async def _handle_add_skill_source( runtime_dir: Path, ) -> dict: """Enable a skill source (known name or GitHub URL): clone + index the catalog.""" - from dsagt.commands.skills_catalog import KNOWN_SOURCES, resolve_source, sync_source + from dsagt.commands.skills_catalog import ( + KNOWN_SOURCES, + persist_source_to_config, + resolve_source, + sync_source, + ) source = arguments.get("source") if not source: @@ -609,7 +595,7 @@ async def _handle_add_skill_source( stats = await asyncio.to_thread(sync_source, source, kb=kb) except (ValueError, RuntimeError) as e: return {"error": str(e)} - _persist_skill_source( + persist_source_to_config( runtime_dir, {"name": spec.get("name", stats["slug"]), **spec} ) return { diff --git a/src/dsagt/commands/skills_catalog.py b/src/dsagt/commands/skills_catalog.py index c375a3b..e59c083 100644 --- a/src/dsagt/commands/skills_catalog.py +++ b/src/dsagt/commands/skills_catalog.py @@ -98,6 +98,31 @@ def resolve_source(source: str | dict) -> dict: ) +def persist_source_to_config(project_dir: str | Path, spec: dict) -> bool: + """Append a resolved source to ``skills.sources`` in the project config. + + Dedupes by URL. Returns True if the config was updated. No-op (returns + False) if the config file is missing — the catalog is still indexed + either way. Used by both the ``add_skill_source`` MCP tool and the + ``dsagt skills add`` CLI so a CLI-added source is re-synced by a later + config-driven ``dsagt skills sync``. + """ + import yaml + + cfg_path = Path(project_dir) / "dsagt_config.yaml" + if not cfg_path.exists(): + return False + cfg = yaml.safe_load(cfg_path.read_text()) or {} + sources = cfg.setdefault("skills", {}).setdefault("sources", []) + if any(s.get("url") == spec.get("url") for s in sources): + return False + sources.append( + {k: spec[k] for k in ("name", "url", "branch", "subdir") if k in spec} + ) + cfg_path.write_text(yaml.dump(cfg, default_flow_style=False, sort_keys=False)) + return True + + def _repo_slug(url: str) -> str: """Stable, collection-name-safe slug from a GitHub URL (``owner-repo``).""" s = url.rstrip("/") diff --git a/tests/test_skills_catalog.py b/tests/test_skills_catalog.py index d1006bc..277313a 100644 --- a/tests/test_skills_catalog.py +++ b/tests/test_skills_catalog.py @@ -34,6 +34,22 @@ def test_repo_slug_is_collection_safe(): assert sc._repo_slug("git@github.com:Foo/Bar.git") == "foo-bar" +def test_persist_source_to_config_appends_and_dedupes(tmp_path): + import yaml + + cfg = tmp_path / "dsagt_config.yaml" + cfg.write_text(yaml.dump({"project": "p", "skills": {"sources": []}})) + spec = {"name": "anthropic", "url": "https://github.com/anthropics/skills", "branch": "main"} + assert sc.persist_source_to_config(tmp_path, spec) is True + sources = yaml.safe_load(cfg.read_text())["skills"]["sources"] + assert sources[-1]["name"] == "anthropic" + # Idempotent: same URL is not appended twice. + assert sc.persist_source_to_config(tmp_path, spec) is False + assert len(yaml.safe_load(cfg.read_text())["skills"]["sources"]) == 1 + # No config file → no-op, no crash. + assert sc.persist_source_to_config(tmp_path / "nope", spec) is False + + def test_resolve_source_known_url_and_shorthand(): assert ( sc.resolve_source("scientific")["url"] == sc.KNOWN_SOURCES["scientific"]["url"] diff --git a/use_cases/isaac_skills_demo/PROMPTS.md b/use_cases/isaac_skills_demo/PROMPTS.md new file mode 100644 index 0000000..b154be0 --- /dev/null +++ b/use_cases/isaac_skills_demo/PROMPTS.md @@ -0,0 +1,95 @@ +# Hand-pass prompt script — isaac_skills_demo + +The deterministic backbone (init, catalog sync, CLI `skills` commands, the +native mirror) was already vetted by a first pass — see "First-pass results" +at the bottom. This script is for the **interactive agent pass**: paste each +prompt into Claude Code (running inside the project) and check the expected +behavior. + +## Before you start + +The project `isaac-skills-demo` is already set up from the first pass: +- catalog synced (146 K-Dense + 17 Anthropic skills), +- `pymatgen` already installed and mirrored into `.claude/skills/`, +- `mock_data/` copied into the project. + +To start fresh instead, delete and rebuild: + +```bash +dsagt rm isaac-skills-demo -y +dsagt init isaac-skills-demo --agent claude +cp -r use_cases/isaac_skills_demo/mock_data ~/dsagt-projects/isaac-skills-demo/mock_data +``` + +Launch the agent: + +```bash +dsagt start isaac-skills-demo +``` + +--- + +## Prompts (paste one at a time) + +### 1 — Confirm native discovery of the bundled meta-skill +> What skills do you have available right now? List them and say which are dsagt-managed. + +*Expect:* `skill-creator`, `datacard-generator`, and (if you didn't rebuild) `pymatgen` are visible as native skills. + +### 2 — Search the catalog (NOT in context) +> Search the skill catalog for a skill that helps work with VASP, pymatgen, or DFT materials data. List what you find and which are installable from the catalog. + +*Expect:* `search_skills` is called; catalog hits are tagged `[catalog · install_skill to add]`; `pymatgen` ranks at/near the top. + +### 3 — Install from the catalog +> Install the most relevant materials/DFT skill you found from the catalog into this project. + +*Expect:* `install_skill(skill_name="pymatgen")`; reply notes it'll be native after the next start. (Already installed if you didn't rebuild — it should say "updated".) + +### 4 — Add a second catalog source via MCP +> Enable the "anthropic" skill source so we also have the official Anthropic skills available, then tell me how many skills that added to the catalog. + +*Expect:* `add_skill_source(source="anthropic")` → ~17 skills indexed; the source is written into `dsagt_config.yaml`. + +### 5 — List configured/synced sources +> List the skill sources currently configured and synced. + +*Expect:* `list_skill_sources` → `scientific` + `anthropic` known/synced. + +### 6 — Author a project skill with skill-creator +> Use the skill-creator skill to author a new project skill named "vasp-to-isaac-mock". It should: read a mock VASP calculation directory (POSCAR + INCAR + OUTCAR) under mock_data/mock_slab/, extract the final energy, atom count, and whether it's a slab relaxation (NSW > 0), and emit a small ISAAC-style JSON record. Use mock_data/expected_isaac_record.json as the shape to target. Save it with save_skill. + +*Expect:* the agent reads skill-creator's template + spec, then `save_skill` writes `/skills/vasp-to-isaac-mock/`. + +### 7 — Run the new skill on the mock data +> Invoke the vasp-to-isaac-mock workflow on mock_data/mock_slab/ and write the result to audit/mock_slab_isaac.json. Then diff its structure against mock_data/expected_isaac_record.json and report any missing fields. + +*Expect:* a produced `audit/mock_slab_isaac.json` with the key fields (final energy ≈ -132.8421 eV, 12 atoms, slab/NSW=50). Compare to the reference. + +### 8 — Inspect both tiers (run in a shell, not the agent) +```bash +dsagt skills list isaac-skills-demo +dsagt skills list isaac-skills-demo --catalog +ls ~/dsagt-projects/isaac-skills-demo/.claude/skills/ +cat ~/dsagt-projects/isaac-skills-demo/.claude/skills/.dsagt-managed.json +``` + +*Expect:* installed list includes `pymatgen` + `vasp-to-isaac-mock`; catalog lists both `skills_catalog__*` collections; the manifest tracks exactly the dsagt-placed skills (your hand-authored `.claude/skills/` entries, if any, are untouched). + +--- + +## First-pass results (already verified, no agent needed) + +| Step | Result | +|---|---| +| `dsagt init` | ✅ `.claude/skills/` mirror fired at init: `skill-creator` (+ refs) + `datacard-generator`; manifest correct; config `skills:` block present | +| `dsagt skills sync` | ✅ real clone of K-Dense, **146 skills** indexed into `skills_catalog__k-dense-ai-scientific-agent-skills` (~19s, local embeddings) | +| `dsagt skills list --catalog` | ✅ shows the catalog collection | +| `dsagt skills search "VASP pymatgen DFT materials"` | ✅ `pymatgen` top catalog hit; tiers tagged `[bundled]` / `[catalog:…]` | +| `dsagt skills add … pymatgen` | ✅ installed into `skills/pymatgen/` **with** `scripts/` + `references/` | +| start-equivalent re-mirror | ✅ `pymatgen` now in `.claude/skills/` + manifest | +| `dsagt skills add … anthropic` | ✅ second source cloned + **17 skills** indexed; source persisted to config | + +**Caveat to know:** with the default **local** embedding backend (`bge-small`), absolute search scores are low (~0.03) because short queries under-score long SKILL.md texts — *ranking* is still correct (pymatgen #1). An API embedding model scores higher. Set `EMBEDDING_*` / switch `embedding.backend` to `api` for sharper relevance. + +**Fix applied during the first pass:** the CLI `dsagt skills add ` path now also persists the source to `dsagt_config.yaml` (previously only the MCP `add_skill_source` tool did), so a later config-driven `dsagt skills sync` re-syncs it. Regression test added. From a07109199f36aba002f0c53c24113e00880593b7 Mon Sep 17 00:00:00 2001 From: aarontuor Date: Fri, 12 Jun 2026 12:24:27 -0700 Subject: [PATCH 3/6] docs(skills): demo rebuild block must sync the catalog The isaac_skills_demo rebuild path (rm + init) left the external catalog empty because a fresh `init` copies only the shared KB, while the catalog is project-scoped. The agent's search_skills then correctly returned no catalog hits. Add the required `dsagt skills sync` step to the rebuild block, a pre-launch catalog check, and a note on the global setup-kb alternative. Co-Authored-By: Claude Opus 4.8 (1M context) --- use_cases/isaac_skills_demo/PROMPTS.md | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/use_cases/isaac_skills_demo/PROMPTS.md b/use_cases/isaac_skills_demo/PROMPTS.md index b154be0..cf29800 100644 --- a/use_cases/isaac_skills_demo/PROMPTS.md +++ b/use_cases/isaac_skills_demo/PROMPTS.md @@ -13,12 +13,26 @@ The project `isaac-skills-demo` is already set up from the first pass: - `pymatgen` already installed and mirrored into `.claude/skills/`, - `mock_data/` copied into the project. -To start fresh instead, delete and rebuild: +To start fresh instead, delete and rebuild — **including the catalog sync** +(a fresh `init` only copies the *shared* KB; the external catalog is +project-scoped, so it must be synced after init or the catalog will be empty +and prompt 2 finds nothing): ```bash dsagt rm isaac-skills-demo -y dsagt init isaac-skills-demo --agent claude cp -r use_cases/isaac_skills_demo/mock_data ~/dsagt-projects/isaac-skills-demo/mock_data +dsagt skills sync isaac-skills-demo # REQUIRED: populate the catalog (146 skills) +``` + +> Alternatively, run the global `dsagt setup-kb` once (it syncs the default +> catalog into the *shared* KB, which every new `init` then copies in). The +> per-project `dsagt skills sync` above is the lighter, self-contained path. + +Confirm the catalog is present before launching: + +```bash +dsagt skills list isaac-skills-demo --catalog # expect a skills_catalog__* collection ``` Launch the agent: From 6c8854de479d0e71cd40e17e7c359d036be63b29 Mon Sep 17 00:00:00 2001 From: aarontuor Date: Tue, 23 Jun 2026 13:40:56 -0700 Subject: [PATCH 4/6] feat(skills): clarify catalog discovery and install signals An empty/unsynced external skill catalog was indistinguishable from a genuine no-match, forcing the agent through a multi-step discovery dance and a misleading "skill unusable until restart" message. - search_skills: when no catalog is synced, say so and point at list_skill_sources / add_skill_source instead of a bare no-match - list_skill_sources: flag each known source synced/available with its indexed count, rather than two parallel lists to cross-reference - install_skill: state the skill is usable this session immediately; restart only enables hands-free native auto-invocation - dsagt_instructions: document the catalog as opt-in and the list_skill_sources -> add_skill_source -> search_skills -> install flow Co-Authored-By: Claude Opus 4.8 (1M context) --- src/dsagt/commands/knowledge_server.py | 57 ++++++++++++++++++++++---- src/dsagt/commands/registry_server.py | 27 +++++++++--- src/dsagt/dsagt_instructions.md | 11 ++++- tests/test_knowledge_server.py | 11 +++-- tests/test_registry_server.py | 10 +++++ 5 files changed, 96 insertions(+), 20 deletions(-) diff --git a/src/dsagt/commands/knowledge_server.py b/src/dsagt/commands/knowledge_server.py index 92643b6..2bae002 100644 --- a/src/dsagt/commands/knowledge_server.py +++ b/src/dsagt/commands/knowledge_server.py @@ -607,18 +607,57 @@ async def _handle_add_skill_source( async def _handle_list_skill_sources(arguments: dict, *, kb: KnowledgeBase) -> dict: - """List known + synced skill sources and their indexed counts.""" - from dsagt.commands.skills_catalog import KNOWN_SOURCES - from dsagt.registry import CATALOG_COLLECTION_PREFIX + """List known skill sources, each flagged synced/available with its count. + + A source is ``synced`` (searchable via ``search_skills``) only after an + ``add_skill_source`` call has cloned + indexed it; otherwise it is + ``available`` (known name + URL, nothing indexed yet). Reporting the + flag + ``indexed`` count inline means the agent doesn't have to cross- + reference a separate ``synced_collections`` list to tell the difference. + """ + import json + + from dsagt.commands.skills_catalog import KNOWN_SOURCES, _repo_slug + from dsagt.registry import CATALOG_COLLECTION_PREFIX, catalog_collection synced = {c for c in kb.collections if c.startswith(CATALOG_COLLECTION_PREFIX)} + + def _indexed_count(collection: str) -> int: + ids = Path(kb.index_dir) / collection / "chroma_ids.json" + try: + return len(json.loads(ids.read_text())) + except (FileNotFoundError, ValueError): + return 0 + + sources = {} + for name, s in KNOWN_SOURCES.items(): + coll = catalog_collection(_repo_slug(s["url"])) + is_synced = coll in synced + sources[name] = { + "url": s["url"], + "description": s.get("description", ""), + "synced": is_synced, + "indexed": _indexed_count(coll) if is_synced else 0, + } + + # Surface any synced catalog whose source isn't in KNOWN_SOURCES (added + # by raw GitHub URL) so the count is never silently dropped. + known_colls = { + catalog_collection(_repo_slug(s["url"])) for s in KNOWN_SOURCES.values() + } + extra = sorted(synced - known_colls) + + any_synced = any(v["synced"] for v in sources.values()) or bool(extra) return { - "known_sources": { - name: {"url": s["url"], "description": s.get("description", "")} - for name, s in KNOWN_SOURCES.items() - }, - "synced_collections": sorted(synced), - "note": "add_skill_source to enable; search_skills to browse.", + "sources": sources, + "other_synced_collections": extra, + "note": ( + "add_skill_source to sync a source whose synced=false; " + "then search_skills to browse. search_skills only sees synced sources." + if any_synced + else "No catalog synced yet — add_skill_source " + "(e.g. 'scientific') to enable one, then search_skills to browse." + ), } diff --git a/src/dsagt/commands/registry_server.py b/src/dsagt/commands/registry_server.py index 3d7fd80..5a57046 100644 --- a/src/dsagt/commands/registry_server.py +++ b/src/dsagt/commands/registry_server.py @@ -333,9 +333,10 @@ async def _handle_search_skills( # ``skills_catalog__`` collection, then merge by score. Installed # skills are also natively discovered by the agent; the catalog is the # part native discovery can't do (it isn't loaded into context). - collections = [SKILLS_COLLECTION] + [ + catalog_collections = [ c for c in kb.collections if c.startswith(CATALOG_COLLECTION_PREFIX) ] + collections = [SKILLS_COLLECTION] + catalog_collections fetch_k = top_k * 3 if tag else top_k results: list[dict] = [] for coll in collections: @@ -354,6 +355,14 @@ async def _handle_search_skills( results.sort(key=lambda r: r.get("score", 0), reverse=True) results = results[:top_k] if not results: + if not catalog_collections: + return ( + "No skills found matching the query. Note: no external skill " + "catalog is synced yet, so only installed skills were searched. " + "Call list_skill_sources() to see available sources, then " + "add_skill_source(source=...) (e.g. 'scientific' for " + "materials/chem/bio) to sync one before searching again." + ) return "No skills found matching the query." summaries = [] @@ -386,8 +395,10 @@ async def _handle_install_skill( ) -> str: """Install a catalog skill into ``/skills//``. - The skill becomes natively discoverable after the next ``dsagt start`` - (which mirrors installed skills into ``.claude/skills/`` before launch). + The skill's files land on disk immediately, so the agent can use it in the + current session by reading its SKILL.md. *Native* auto-invocation requires + the next ``dsagt start`` (which mirrors installed skills into + ``.claude/skills/`` before launch) plus an agent restart. """ from dsagt.commands.skills_catalog import install_into_project @@ -409,9 +420,13 @@ async def _handle_install_skill( return ( f"{info['action'].capitalize()} skill '{info['name']}' at " - f"{info['dest_dir']}.\n\nIt will be available to the agent natively " - f"(.claude/skills/) on the next `dsagt start`; restart the agent to " - f"pick it up." + f"{info['dest_dir']}.\n\n" + f"Usable now — its SKILL.md and any scripts/references are already on " + f"disk in this project. To use it this session, read " + f"{info['dest_dir']}/SKILL.md and follow it; you don't need to restart.\n" + f"Restart is only for hands-free auto-invocation: the next `dsagt start` " + f"mirrors it into the platform's native skill dir (.claude/skills/), and " + f"after relaunch the agent discovers and auto-invokes it without this tool." ) diff --git a/src/dsagt/dsagt_instructions.md b/src/dsagt/dsagt_instructions.md index 385c762..cebfcdd 100644 --- a/src/dsagt/dsagt_instructions.md +++ b/src/dsagt/dsagt_instructions.md @@ -25,7 +25,16 @@ Before implementing anything, search for existing capabilities: - `search_skills(query)` — find agent skills (workflows, templates, procedures) - `get_registry()` — list all registered tools -**Skills come in two tiers.** *Installed* skills (in this project) are discovered **natively** by your platform — their names/descriptions are already in your context and you auto-invoke them; you do NOT need `search_skills` to find those. Use `search_skills` to browse the much larger *external catalog* of installable skills (entries marked `[catalog]`), which are NOT loaded into context. To add a catalog skill to the project, call `install_skill(skill_name=...)`; it becomes natively available after the next session restart. To enable another catalog source (a known name like `scientific`/`anthropic`, or a GitHub URL), call `add_skill_source(source=...)`. To author a brand-new skill, use the bundled `skill-creator` skill. +**Skills come in two tiers.** *Installed* skills (in this project) are discovered **natively** by your platform — their names/descriptions are already in your context and you auto-invoke them; you do NOT need `search_skills` to find those. Separately there is a much larger *external catalog* of installable skills (entries marked `[catalog]`), NOT loaded into context. + +**The external catalog is opt-in and starts empty — sources must be synced before `search_skills` can see them.** A blank/weak `search_skills` result usually means the relevant source isn't synced yet, NOT that no such skill exists. So before concluding the catalog has nothing, call `list_skill_sources()` — it reports each known source with its `synced` flag and `indexed` count. The flow: + +1. `list_skill_sources()` — see which sources are already synced vs only `available` (known name + URL, not yet indexed). For materials/chem/bio/DFT skills, the `scientific` source (K-Dense) is the one to enable. +2. `add_skill_source(source=...)` — sync a source (a known name like `scientific`/`anthropic`, or a GitHub URL). Read-only indexing step; nothing is installed into the project. Only needed for sources whose `synced` is false. +3. `search_skills(query)` — now browse the synced catalog. Entries marked `[catalog]` are installable. +4. `install_skill(skill_name=...)` — copy a catalog skill into the project. Its SKILL.md + scripts land on disk immediately, so you can **use it this session** by reading `skills//SKILL.md` and following it. A restart (next `dsagt start`) is only needed for hands-free *native* auto-invocation, not for use. + +To author a brand-new skill instead of installing one, use the bundled `skill-creator` skill. **When the user indicates they want a specific tool used** — phrasings like "use tool `foo`", "use `foo` from the registry", "run `foo`", or similar — look it up first (`search_registry(tool_name=...)` for exact match, `get_registry()` to browse). Read the returned spec's `executable` field and each parameter's `cli` field, then invoke via your shell. Do not substitute your own file/shell tools for a task a registered tool can do. (See section 1b for the verbatim-`executable` rule.) diff --git a/tests/test_knowledge_server.py b/tests/test_knowledge_server.py index a05c35f..49c4cba 100644 --- a/tests/test_knowledge_server.py +++ b/tests/test_knowledge_server.py @@ -13,8 +13,7 @@ import asyncio import json import time -from pathlib import Path -from unittest.mock import MagicMock, patch +from unittest.mock import MagicMock import pytest import mcp.types as types @@ -117,8 +116,12 @@ def test_list_skill_sources_returns_known(self, mock_kb): mock_kb.collections = [] server = create_knowledge_server(mock_kb) result = call_tool(server, "list_skill_sources", {}) - assert "scientific" in result["known_sources"] - assert result["synced_collections"] == [] + assert "scientific" in result["sources"] + # Nothing synced → every known source flagged available, not synced. + assert result["sources"]["scientific"]["synced"] is False + assert result["sources"]["scientific"]["indexed"] == 0 + assert result["other_synced_collections"] == [] + assert "scientific" in result["note"] def test_add_skill_source_bad_source_errors(self, mock_kb): mock_kb.collections = [] diff --git a/tests/test_registry_server.py b/tests/test_registry_server.py index 81c5eb2..206ff46 100644 --- a/tests/test_registry_server.py +++ b/tests/test_registry_server.py @@ -624,6 +624,16 @@ def test_save_tool_indexes_into_kb(self, tmp_path): assert len(results) > 0 assert any("csv_filter" in r["chunk"].get("text", "") for r in results) + def test_search_skills_empty_catalog_hints_to_sync(self, tmp_path): + """With no catalog synced, search_skills explains how to enable one + instead of returning a bare 'no match' the agent reads as exhausted.""" + server, reg, kb = _make_server_with_kb(tmp_path) + + text = call_tool(server, "search_skills", {"query": "vasp pymatgen dft"}) + assert "No skills found" in text + assert "no external skill catalog is synced" in text.lower() + assert "add_skill_source" in text + def test_search_registry_by_name(self, tmp_path): """Exact tool_name lookup returns the tool.""" server, reg, kb = _make_server_with_kb(tmp_path) From 0aea670d88486f2c378b19721ca53b0e7cdd9060 Mon Sep 17 00:00:00 2001 From: aarontuor Date: Tue, 23 Jun 2026 13:41:09 -0700 Subject: [PATCH 5/6] chore(release): 0.2.0 with single-sourced version MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bump to 0.2.0 and make dsagt.__version__ the single source of truth — pyproject reads it via setuptools dynamic metadata, so future bumps touch one line. Add CHANGELOG (Keep a Changelog format). Co-Authored-By: Claude Opus 4.8 (1M context) --- CHANGELOG.md | 49 +++++++++++++++++++++++++++++++++++++++++++ pyproject.toml | 5 ++++- src/dsagt/__init__.py | 4 +++- 3 files changed, 56 insertions(+), 2 deletions(-) create mode 100644 CHANGELOG.md diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..92eb9e2 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,49 @@ +# Changelog + +All notable changes to DSAgt are documented here. The format is based on +[Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project +adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [Unreleased] + +## [0.2.0] - 2026-06-23 + +### Added +- External skill catalogs: discover and install agent skills from GitHub + sources via `add_skill_source`, `search_skills`, and `install_skill` (plus + the `dsagt skills sync/add/list/search` CLI), backed by per-source ChromaDB + collections. +- Native skill discovery: installed and bundled skills are mirrored into the + agent's native skill directory (e.g. `.claude/skills/`) at init/start. +- `skill-creator` bundled skill for authoring new skills from the Anthropic + template. +- Install-from-GitHub instructions for non-developers (`pip install + git+https://github.com/AI-ModCon/dsagt.git` into any Python 3.12/3.13 + environment) in the README and docs. + +### Changed +- `search_skills` now reports when no external catalog is synced instead of a + bare "no match", and `list_skill_sources` flags each known source as + `synced`/available with its indexed count. +- `install_skill` clarifies that an installed skill is usable in the current + session immediately — a restart is only needed for hands-free native + auto-invocation. +- The package version is single-sourced from `dsagt.__version__` (pyproject + reads it via setuptools dynamic metadata). +- Documentation home page (`docs/index.md`) pulls the supported-agents table + and install instructions directly from the README via the + `mkdocs-include-markdown` plugin, so the two no longer drift. + +### Fixed +- CLI-added skill sources are now persisted to the project config. + +## [0.1.0] - 2026-01-11 + +### Added +- Initial release: registry and knowledge MCP servers, BYOA per-agent config + generation, MLflow/OTel observability, the tool/skill registry, execution + provenance, and explicit + episodic memory. + +[Unreleased]: https://github.com/AI-ModCon/dsagt/compare/v0.2.0...HEAD +[0.2.0]: https://github.com/AI-ModCon/dsagt/releases/tag/v0.2.0 +[0.1.0]: https://github.com/AI-ModCon/dsagt/releases/tag/v0.1.0 diff --git a/pyproject.toml b/pyproject.toml index bcd0c77..0c9d805 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dsagt" -version = "0.1.0" +dynamic = ["version"] description = "DataSmith Agent - AI-assisted data pipeline builder" readme = "README.md" requires-python = ">=3.12,<3.14" @@ -81,6 +81,9 @@ build-backend = "setuptools.build_meta" [tool.setuptools] package-dir = {"" = "src"} +[tool.setuptools.dynamic] +version = {attr = "dsagt.__version__"} + [tool.setuptools.packages.find] where = ["src"] diff --git a/src/dsagt/__init__.py b/src/dsagt/__init__.py index 5ad0529..8337be1 100644 --- a/src/dsagt/__init__.py +++ b/src/dsagt/__init__.py @@ -4,7 +4,9 @@ AI-assisted data pipeline builder for MCP-compatible agents. """ -__version__ = "0.1.0" +# Single source of truth for the package version: pyproject.toml reads this +# via `[tool.setuptools.dynamic] version = {attr = "dsagt.__version__"}`. +__version__ = "0.2.0" # Cap CPU thread count for embedding / tokenization libraries before any # heavy imports happen. Without this, PyTorch / sentence-transformers / From e36f18dd7276af3dc964e4ac6c6dd66b45f164e8 Mon Sep 17 00:00:00 2001 From: aarontuor Date: Tue, 23 Jun 2026 13:41:25 -0700 Subject: [PATCH 6/6] docs: add pip-from-github install, sync README/docs, build on uv - README/docs: document the non-developer install (pip install git+https://github.com/AI-ModCon/dsagt.git into any 3.12/3.13 env); note uv is dev/CI-only and conda/venv both work - de-duplicate the supported-agents table and install block via mkdocs-include-markdown so docs/index.md pulls them from the README - correct the Python prerequisite to 3.12/3.13 - cli.md: drop the uv-sync-specific install assumption - docs CI builds with the locked docs dependency group via uv Co-Authored-By: Claude Opus 4.8 (1M context) --- .github/workflows/docs.yml | 9 ++++++--- README.md | 41 +++++++++++++++++++++++++++++++++++++- docs/cli.md | 2 +- docs/index.md | 31 ++++++++++++++++++---------- mkdocs.yml | 4 ++++ pyproject.toml | 1 + 6 files changed, 73 insertions(+), 15 deletions(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 0cae487..9ed5cb7 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -21,12 +21,15 @@ jobs: with: python-version: "3.12" + - name: Install uv + uses: astral-sh/setup-uv@v5 + - name: Install docs dependencies - run: pip install mkdocs-material + run: uv sync --group docs - name: Build docs - run: mkdocs build --strict + run: uv run mkdocs build --strict - name: Deploy to GitHub Pages if: github.ref == 'refs/heads/main' && github.event_name != 'pull_request' - run: mkdocs gh-deploy --force + run: uv run mkdocs gh-deploy --force diff --git a/README.md b/README.md index de1bc88..5f0b8e7 100644 --- a/README.md +++ b/README.md @@ -6,8 +6,9 @@ DSAgt connects an MCP-compatible AI coding agent to tool registration, a semantic knowledge base, execution provenance, and observability infrastructure. DSAgt provides data-pipeline scaffolding around a user's existing agent CLI or VS Code extension (Claude Code, Goose, Codex, …); -**Prerequisites:** Python 3.10–3.13, [uv](https://github.com/astral-sh/uv), and one of the supported agent platforms below — already installed and authenticated against whatever LLM provider you intend to use. +**Prerequisites:** Python 3.12 or 3.13, and one of the supported agent platforms below — already installed and authenticated against whatever LLM provider you intend to use. ([uv](https://github.com/astral-sh/uv) is only needed for the development install.) + | Agent | Install | Verify | |-------|---------|--------| | [Claude Code](https://github.com/anthropics/claude-code) | `npm i -g @anthropic-ai/claude-code` | `claude --version` | @@ -16,6 +17,44 @@ DSAgt connects an MCP-compatible AI coding agent to tool registration, a semanti | [opencode](https://github.com/sst/opencode) | See [opencode docs](https://opencode.ai/docs/) | `opencode --version` | | [Roo Code](https://github.com/RooCodeInc/Roo-Code) | `npm i -g @roo-code/cli` | `roo --version` | | [Cline](https://github.com/cline/cline) | `npm i -g cline` | `cline --version` | + + +## Installation + +### For use (no development) + + +If you just want to *run* DSAgt against your own data and agent — no repo checkout, no `uv` — install it straight from GitHub into a virtual environment. Any Python 3.12/3.13 environment works (`venv`, conda, etc.); only the `pip install git+…` step is officially supported. + +```bash +python3.12 -m venv ~/.venvs/dsagt # or: conda create -n dsagt python=3.12 && conda activate dsagt +source ~/.venvs/dsagt/bin/activate # (Windows venv: ~\.venvs\dsagt\Scripts\activate) +pip install "git+https://github.com/AI-ModCon/dsagt.git" +dsagt --version # 0.2.0 +``` + +This puts the `dsagt` CLI (and the `dsagt-run` / `dsagt-*-server` helpers) on your PATH. Then build the shared knowledge base once and create your first project: + +```bash +dsagt setup-kb # bundled tools + skills + reference corpora + # (downloads a ~130 MB local embedder on first run) +dsagt init my-project --agent claude # or: goose / codex / opencode / roo / cline +dsagt start my-project +``` + +To upgrade later, reinstall and re-run `setup-kb` to pick up new bundled tools/skills: + +```bash +pip install --upgrade "git+https://github.com/AI-ModCon/dsagt.git" +dsagt setup-kb +``` + +> Pin to a specific release once tags are published, e.g. `pip install "git+https://github.com/AI-ModCon/dsagt.git@v0.2.0"`. + + +### For development + +Clone the repo and use `uv` (editable install with the full test suite) — see [Quick Start](#quick-start) below. ## Quick Start diff --git a/docs/cli.md b/docs/cli.md index d4b24e4..1866a7e 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -1,6 +1,6 @@ # CLI Reference -All commands are available after running `uv sync` and activating the virtual environment (`source .venv/bin/activate`). +All commands are available after [installation](index.md#installation) and activating your virtual environment. ## Project Management diff --git a/docs/index.md b/docs/index.md index ff5eaa6..80a3fca 100644 --- a/docs/index.md +++ b/docs/index.md @@ -6,23 +6,34 @@ DSAgt connects an MCP-compatible AI coding agent to tool registration, a semanti ## Supported Agents -| Agent | Install | Verify | -|-------|---------|--------| -| [Claude Code](https://github.com/anthropics/claude-code) | `npm i -g @anthropic-ai/claude-code` | `claude --version` | -| [Goose](https://github.com/block/goose) | See [Goose docs](https://github.com/block/goose#installation) | `goose --version` | -| [Codex](https://github.com/openai/codex) | `npm i -g @openai/codex` | `codex --version` | -| [opencode](https://github.com/sst/opencode) | See [opencode docs](https://opencode.ai/docs/) | `opencode --version` | -| [Roo Code](https://github.com/RooCodeInc/Roo-Code) | `npm i -g @roo-code/cli` | `roo --version` | -| [Cline](https://github.com/cline/cline) | `npm i -g cline` | `cline --version` | + +{% + include-markdown "../README.md" + start="" + end="" +%} ## Prerequisites -- Python 3.12–3.13 -- [uv](https://github.com/astral-sh/uv) +- Python 3.12 or 3.13 - One of the supported agent platforms above, installed and authenticated against your LLM provider +- [uv](https://github.com/astral-sh/uv) — only for the development install ## Installation +### For use (no development) + + +{% + include-markdown "../README.md" + start="" + end="" +%} + +### For development + +Clone the repo and use `uv` (editable install; add `--all-groups` for the test suite): + ```bash git clone https://github.com/AI-ModCon/dsagt.git cd dsagt diff --git a/mkdocs.yml b/mkdocs.yml index 5fd83a0..dda4d9d 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -31,6 +31,10 @@ theme: - content.code.copy - content.code.annotate +plugins: + - search + - include-markdown + markdown_extensions: - admonition - pymdownx.details diff --git a/pyproject.toml b/pyproject.toml index 0c9d805..c0e68d9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -72,6 +72,7 @@ dev = [ ] docs = [ "mkdocs-material>=9.5", + "mkdocs-include-markdown-plugin>=6.0", ] [build-system]