Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 19 additions & 1 deletion hermes_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import logging
import random
import re
import shutil
import sqlite3
import threading
import time
Expand Down Expand Up @@ -71,6 +72,7 @@
# filesystem-incompat warning on every connection, filling errors.log.
_wal_fallback_warned_paths: set[str] = set()
_wal_fallback_warned_lock = threading.Lock()
_SESSION_ARTIFACT_SEGMENT_PATTERN = re.compile(r"[^A-Za-z0-9._-]+")

_FTS_TRIGGERS = (
"messages_fts_insert",
Expand Down Expand Up @@ -99,6 +101,12 @@ def _set_last_init_error(msg: Optional[str]) -> None:
_last_init_error = msg


def _safe_session_artifact_segment(session_id: str) -> str:
value = _SESSION_ARTIFACT_SEGMENT_PATTERN.sub("_", str(session_id or "").strip())
value = value.strip("._-")[:80]
return value or "session"


def get_last_init_error() -> Optional[str]:
"""Return the most recent state.db init failure, if any.

Expand Down Expand Up @@ -3368,7 +3376,9 @@ def _remove_session_files(sessions_dir: Optional[Path], session_id: str) -> None
"""Remove on-disk transcript files for a session.

Cleans up ``{session_id}.json``, ``{session_id}.jsonl``, and any
``request_dump_{session_id}_*.json`` files left by the gateway.
``request_dump_{session_id}_*.json`` files left by the gateway, plus
the controlled ``{session_id}.artifacts`` directory used for report
artifacts.
Silently skips files that don't exist and swallows OSError so a
filesystem hiccup never blocks a DB operation.
"""
Expand All @@ -3389,6 +3399,14 @@ def _remove_session_files(sessions_dir: Optional[Path], session_id: str) -> None
pass
except OSError:
pass
artifacts_dir = sessions_dir / f"{_safe_session_artifact_segment(session_id)}.artifacts"
try:
if artifacts_dir.is_symlink() or artifacts_dir.is_file():
artifacts_dir.unlink(missing_ok=True)
elif artifacts_dir.exists():
shutil.rmtree(artifacts_dir)
except OSError:
pass

def delete_session(
self,
Expand Down
36 changes: 35 additions & 1 deletion runtime_manager/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

try:
from fastapi import FastAPI, Header, HTTPException, Query
from fastapi.responses import JSONResponse, StreamingResponse
from fastapi.responses import FileResponse, JSONResponse, StreamingResponse
from pydantic import BaseModel, ConfigDict, Field
except ImportError as exc: # pragma: no cover - runtime dependency
raise SystemExit(
Expand Down Expand Up @@ -210,6 +210,40 @@ async def delete_session(
except RuntimeError as exc:
raise HTTPException(status_code=409, detail=str(exc)) from exc

@app.get("/agent/sessions/{session_id}/artifacts/{artifact_id}")
async def get_artifact(
session_id: str,
artifact_id: str,
user_id: str = Query(...),
run_id: str = Query(...),
authorization: str | None = Header(default=None),
):
state = app.state.runtime_manager
await _authorize(state, authorization)
try:
path, metadata = state.manager.get_artifact(
user_id=user_id,
session_id=session_id,
run_id=run_id,
artifact_id=artifact_id,
)
except ValueError as exc:
raise HTTPException(status_code=400, detail=str(exc)) from exc
except FileNotFoundError as exc:
raise HTTPException(status_code=404, detail=str(exc)) from exc
return FileResponse(
path,
media_type=metadata.get("mimeType") or "application/octet-stream",
filename=metadata.get("fileName") or "artifact",
content_disposition_type="attachment",
headers={
"X-Hermes-Artifact-Id": str(metadata.get("artifactId") or ""),
"X-Hermes-Artifact-Sha256": str(metadata.get("sha256") or ""),
"X-Hermes-Artifact-Kind": str(metadata.get("kind") or ""),
"X-Hermes-Artifact-Source": str(metadata.get("source") or ""),
},
)

return app


Expand Down
161 changes: 161 additions & 0 deletions runtime_manager/artifacts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
from __future__ import annotations

import hashlib
import mimetypes
import re
from pathlib import Path
from typing import Any

_SEGMENT_PATTERN = re.compile(r"[^A-Za-z0-9._-]+")
_BROWSABLE_MIME_PREFIXES = ("text/",)
_BROWSABLE_MIME_TYPES = {
"application/pdf",
}


def artifact_dir_for(user_home: Path, session_id: str, run_id: str) -> Path:
session_segment = safe_artifact_segment(session_id, fallback="session")
run_segment = safe_artifact_segment(run_id, fallback="run")
return user_home / "sessions" / f"{session_segment}.artifacts" / run_segment


def safe_artifact_segment(value: Any, *, fallback: str) -> str:
text = str(value or "").strip()
text = _SEGMENT_PATTERN.sub("_", text)
text = text.strip("._-")[:80]
return text or fallback


def metadata_for_artifact_file(
path: Path,
artifact_root: Path,
*,
source: str = "tool",
kind: str = "report",
artifact_id: str | None = None,
summary: str | None = None,
) -> dict[str, Any]:
root = artifact_root.resolve()
file_path = path.resolve()
file_path.relative_to(root)
if not file_path.is_file():
raise FileNotFoundError(str(path))

size_bytes, digest = file_digest(file_path)
file_name = file_path.name
mime_type = infer_mime_type(file_path)
resolved_artifact_id = artifact_id or artifact_id_for_file(file_path, artifact_root, digest)
return {
"artifactId": resolved_artifact_id,
"fileName": file_name,
"sizeBytes": size_bytes,
"mimeType": mime_type,
"sha256": digest,
"kind": kind,
"source": source,
"summary": summary or f"Generated report file: {file_name}",
"canDownload": True,
"canBrowse": can_browse_mime_type(mime_type),
}


def file_digest(path: Path) -> tuple[int, str]:
digest = hashlib.sha256()
size_bytes = 0
with path.open("rb") as handle:
while True:
chunk = handle.read(1024 * 1024)
if not chunk:
break
size_bytes += len(chunk)
digest.update(chunk)
return size_bytes, digest.hexdigest()


def artifact_id_for_file(path: Path, artifact_root: Path, sha256: str) -> str:
try:
relative = path.resolve().relative_to(artifact_root.resolve())
except ValueError:
relative = path.name
stem = Path(str(relative)).stem or Path(str(relative)).name
name_segment = safe_artifact_segment(stem, fallback="artifact")[:48]
return f"{name_segment}-{sha256[:16]}"


def infer_mime_type(path: Path) -> str:
guessed, _ = mimetypes.guess_type(str(path))
if guessed == "text/html":
return "text/html; charset=utf-8"
if guessed and guessed.startswith("text/"):
return f"{guessed}; charset=utf-8"
if guessed:
return guessed
return "text/plain; charset=utf-8"


def can_browse_mime_type(mime_type: str) -> bool:
normalized = str(mime_type or "").split(";", 1)[0].strip().lower()
if not normalized:
return False
return normalized in _BROWSABLE_MIME_TYPES or normalized.startswith(_BROWSABLE_MIME_PREFIXES)


def discover_artifacts(
artifact_root: Path,
*,
seen_artifact_ids: set[str] | None = None,
max_files: int = 50,
) -> list[dict[str, Any]]:
try:
root = artifact_root.resolve()
except Exception:
return []
if not root.is_dir():
return []

seen = seen_artifact_ids if seen_artifact_ids is not None else set()
artifacts: list[dict[str, Any]] = []
try:
candidates = sorted(root.rglob("*"))
except OSError:
return []
for path in candidates:
if len(artifacts) >= max_files:
break
try:
metadata = metadata_for_artifact_file(path, root)
except (FileNotFoundError, OSError, ValueError):
continue
artifact_id = metadata["artifactId"]
if artifact_id in seen:
continue
seen.add(artifact_id)
artifacts.append(metadata)
return artifacts


def find_artifact_file(
artifact_root: Path,
artifact_id: str,
) -> tuple[Path, dict[str, Any]] | None:
requested_id = str(artifact_id or "").strip()
if not requested_id or "/" in requested_id or "\\" in requested_id:
return None
try:
root = artifact_root.resolve()
except Exception:
return None
if not root.is_dir():
return None
try:
candidates = sorted(root.rglob("*"))
except OSError:
return None
for path in candidates:
try:
metadata = metadata_for_artifact_file(path, root)
except (FileNotFoundError, OSError, ValueError):
continue
if path.stem == requested_id or metadata["artifactId"] == requested_id:
return path.resolve(), metadata
return None
48 changes: 48 additions & 0 deletions runtime_manager/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import json
import logging
import os
import re
import shutil
import sys
import time
import uuid
Expand All @@ -12,11 +14,13 @@

from hermes_state import SessionDB

from .artifacts import artifact_dir_for, find_artifact_file
from .cloud_kubeconfig import CloudKubeconfigResolver
from .profile_resolver import RuntimeProfileResolver
from .registry import RunHandle, RunRegistry

logger = logging.getLogger(__name__)
_SESSION_ARTIFACT_SEGMENT_PATTERN = re.compile(r"[^A-Za-z0-9._-]+")

class RuntimeManager:
def __init__(
Expand Down Expand Up @@ -109,6 +113,9 @@ async def start_run(self, payload: dict[str, Any]) -> RunHandle:

try:
proc_env = resolved.worker_env.copy()
artifact_dir = artifact_dir_for(resolved.user_home, resolved.session_id, run_id)
artifact_dir.mkdir(parents=True, mode=0o700, exist_ok=True)
proc_env["HERMES_ARTIFACT_DIR"] = str(artifact_dir)
proc = await asyncio.create_subprocess_exec(
self.python_executable,
str(self.worker_script),
Expand Down Expand Up @@ -146,6 +153,7 @@ async def start_run(self, payload: dict[str, Any]) -> RunHandle:
"skip_context_files": bool(payload.get("skip_context_files", True)),
"max_iterations": resolved.max_iterations,
"metadata": payload.get("metadata") or {},
"artifact_dir": str(artifact_dir),
}
assert proc.stdin is not None
proc.stdin.write((json.dumps(worker_request, ensure_ascii=False) + "\n").encode("utf-8"))
Expand Down Expand Up @@ -268,6 +276,30 @@ async def delete_session(self, *, user_id: str, session_id: str) -> dict[str, An
"deleted": bool(deleted_from_db or deleted_files or removed_runs),
}

def get_artifact(
self,
*,
user_id: str,
session_id: str,
run_id: str,
artifact_id: str,
) -> tuple[Path, dict[str, Any]]:
user_id = self.resolver.validate_user_id(user_id)
session_id = str(session_id or "").strip()
run_id = str(run_id or "").strip()
if not session_id:
raise ValueError("session_id is required")
if not run_id:
raise ValueError("run_id is required")
if not artifact_id:
raise ValueError("artifact_id is required")

user_home = self.resolver.resolve(user_id, create=False)
found = find_artifact_file(artifact_dir_for(user_home, session_id, run_id), artifact_id)
if found is None:
raise FileNotFoundError("artifact not found")
return found

async def _pump_stdout(self, handle: RunHandle) -> None:
proc = handle.process
assert proc is not None and proc.stdout is not None
Expand Down Expand Up @@ -401,4 +433,20 @@ def _remove_session_files(sessions_dir: Path, session_id: str) -> bool:
removed = True
except OSError:
pass
artifacts_dir = sessions_dir / f"{_safe_session_artifact_segment(session_id)}.artifacts"
if artifacts_dir.exists() or artifacts_dir.is_symlink():
try:
if artifacts_dir.is_symlink() or artifacts_dir.is_file():
artifacts_dir.unlink()
else:
shutil.rmtree(artifacts_dir)
removed = True
except OSError:
pass
return removed


def _safe_session_artifact_segment(session_id: str) -> str:
value = _SESSION_ARTIFACT_SEGMENT_PATTERN.sub("_", str(session_id or "").strip())
value = value.strip("._-")[:80]
return value or "session"
Loading
Loading