Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
359 changes: 299 additions & 60 deletions CONTRIBUTING.md

Large diffs are not rendered by default.

17 changes: 9 additions & 8 deletions codec_agents.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import json
import os
import re
import secrets
import time
from dataclasses import dataclass, field
from datetime import datetime
Expand Down Expand Up @@ -84,16 +83,18 @@ def _serper_api_key() -> str:
# called outside a crew) sets _correlation_id_var; nested emits inherit it
# automatically. Using contextvars keeps the ID intact across asyncio task
# boundaries and run_in_executor calls. See design §1.4.
_correlation_id_var: contextvars.ContextVar[Optional[str]] = contextvars.ContextVar(
"codec_agents_correlation_id", default=None
#
# A5 / SR-5: the canonical home for this contextvar moved to codec_audit so
# downstream readers (codec_ask_user, codec_observer, codec_triggers) can
# import it without dragging codec_agents into a cycle. Re-exported here for
# back-compat with any external importer that grabbed
# `codec_agents._correlation_id_var` directly.
from codec_audit import (
_correlation_id_var as _correlation_id_var, # noqa: F401 — re-export
_new_correlation_id as _new_correlation_id, # noqa: F401 — re-export
)


def _new_correlation_id() -> str:
"""Generate a 12-char lowercase-hex correlation_id (6 bytes)."""
return secrets.token_hex(6)


def _audit(event_type: str, **kwargs):
"""Shim over codec_audit.audit() for crew/agent runtime events.

Expand Down
17 changes: 7 additions & 10 deletions codec_ask_user.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,20 +333,17 @@ def _deadline_iso(timeout_seconds: int) -> str:
# ── Correlation_id discovery from contextvars (Step 1 §1.4) ───────────────────
def _current_correlation_id() -> Optional[str]:
"""Read the wrapping operation's correlation_id from whichever module's
contextvar happens to be set. ``codec_agents._correlation_id_var`` is
set by Crew.run / Agent.run; ``codec_voice._voice_correlation_id_var``
is set by VoicePipeline.run. Try both — first non-None wins.
contextvar happens to be set. The general one (Crew/Agent) and the
voice-session one both live in codec_audit (A5 / SR-5) — we import
one-way down to the foundation layer instead of reaching back into
codec_agents / codec_voice (which would create import cycles).
"""
try:
from codec_agents import _correlation_id_var as _cv1
v = _cv1.get()
from codec_audit import _correlation_id_var, _voice_correlation_id_var
v = _correlation_id_var.get()
if v:
return v
except Exception:
pass
try:
from codec_voice import _voice_correlation_id_var as _cv2
v = _cv2.get()
v = _voice_correlation_id_var.get()
if v:
return v
except Exception:
Expand Down
2 changes: 1 addition & 1 deletion codec_audit.html
Original file line number Diff line number Diff line change
Expand Up @@ -500,7 +500,7 @@ <h3>No events found</h3>
html += `<div class="event-row ${levelClass(ev)} ${isExpanded ? 'expanded' : ''}" onclick="expandEvent(${i})">
<span class="ev-time">${formatTimestamp(ev.ts)}</span>
<span class="ev-dot dot-${cat}"></span>
<span class="ev-source">${ev.src || cat}</span>
<span class="ev-source">${escapeHtml(ev.src || cat)}</span>
<span class="ev-summary"><span class="ev-msg">${escapeHtml(ev.sum || ev.message || '--')}</span></span>
<span class="ev-chevron">&#9656;</span>
</div>
Expand Down
33 changes: 33 additions & 0 deletions codec_audit.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,22 +48,55 @@
"""
from __future__ import annotations

import contextvars
import hashlib
import hmac as _hmac
import json
import logging
import os
import re
import secrets
import threading
import time
from datetime import datetime, timezone
from pathlib import Path
from typing import Optional

# H-3 (PR-4E): the cross-process flock primitive (stdlib-only; does NOT import
# codec_audit, so no cycle with this foundation module). Used in _write to
# serialize rotation + append across all PM2 daemons.
import codec_jsonstore

# ── Correlation ID contextvars (A5 / SR-5) ─────────────────────────────────────
# Canonical home for the cross-module correlation_id state. Previously these
# lived in codec_agents (general) and codec_voice (voice-scoped), forcing
# codec_ask_user / codec_observer / codec_triggers to import from those
# modules to read the contextvar — creating 3 of the 4 documented import
# cycles. With both vars hosted here in the foundation layer, every reader
# imports one-way down (toward codec_audit) and the cycles vanish.
#
# `_correlation_id_var` is the general 12-hex contextvar set by Crew.run /
# Agent.run / agent_runner. tool_call / tool_result / hook_fired / audit
# emit nested inside a wrapping operation inherit it.
#
# `_voice_correlation_id_var` is a parallel var set by VoicePipeline.run for
# voice-session-scoped operations. Voice sessions can long outlive any one
# crew or agent run; the separate var prevents cross-contamination.
_correlation_id_var: contextvars.ContextVar[Optional[str]] = contextvars.ContextVar(
"codec_correlation_id", default=None
)
_voice_correlation_id_var: contextvars.ContextVar[Optional[str]] = contextvars.ContextVar(
"codec_voice_correlation_id", default=None
)


def _new_correlation_id() -> str:
"""12-character lowercase-hex correlation_id from secrets.token_hex(6).
Mirrors the Step 1 §1.4 contract: short enough for inline logging,
enough entropy that collisions inside the rotation window are negligible.
"""
return secrets.token_hex(6)

# ── Storage ────────────────────────────────────────────────────────────────────
_AUDIT_DIR = Path(os.path.expanduser("~/.codec"))
_AUDIT_DIR.mkdir(parents=True, exist_ok=True)
Expand Down
18 changes: 12 additions & 6 deletions codec_dashboard.html
Original file line number Diff line number Diff line change
Expand Up @@ -1699,7 +1699,7 @@ <h2>MENU</h2>
if (Object.keys(_promptsData).length === 0) throw new Error('No prompts found');
renderPrompts();
} catch(e) {
container.innerHTML = '<div style="color:var(--text-dim);font-size:12px;padding:8px">Failed to load prompts: ' + e.message + '. Try restarting the dashboard.</div>';
container.innerHTML = '<div style="color:var(--text-dim);font-size:12px;padding:8px">Failed to load prompts: ' + _escHtml(e.message) + '. Try restarting the dashboard.</div>';
}
}
function renderPrompts() {
Expand All @@ -1712,12 +1712,18 @@ <h2>MENU</h2>
var modClass = p.modified ? ' modified' : '';
var modBadge = p.modified ? '<span class="prompt-modified-badge">edited</span>' : '';
var charCount = (p.value || '').length;
html += '<div class="prompt-card' + modClass + '" id="prompt-' + key + '">' +
'<div class="prompt-header" onclick="togglePrompt(\'' + key + '\')">' +
// A4 / XSS hardening: prompt metadata (label/description/file) is sourced
// from ~/.codec/prompt_overrides.json. If an attacker ever lands a write to
// that file (via a /api/save_file bypass — see A1 — or a future plugin
// misstep), unescaped metadata becomes stored XSS on this settings panel.
// Escape every interpolated field; the textarea content already routes
// through _escHtml.
html += '<div class="prompt-card' + modClass + '" id="prompt-' + _escHtml(key) + '">' +
'<div class="prompt-header" onclick="togglePrompt(\'' + _escHtml(key) + '\')">' +
'<div style="flex:1;min-width:0">' +
'<div class="prompt-label">' + p.label + ' ' + modBadge + '</div>' +
'<div class="prompt-desc">' + p.description + '</div>' +
'<div class="prompt-file">' + p.file + ' — ' + charCount + ' chars</div>' +
'<div class="prompt-label">' + _escHtml(p.label || '') + ' ' + modBadge + '</div>' +
'<div class="prompt-desc">' + _escHtml(p.description || '') + '</div>' +
'<div class="prompt-file">' + _escHtml(p.file || '') + ' — ' + charCount + ' chars</div>' +
'</div>' +
'<svg class="prompt-chevron" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><polyline points="6 9 12 15 18 9"/></svg>' +
'</div>' +
Expand Down
119 changes: 105 additions & 14 deletions codec_dashboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -1947,24 +1947,108 @@ async def run_code(request: Request):
except OSError as e:
log.debug(f"Temp file cleanup failed for {_p}: {e}")

# /api/save_file safety check — mirrors PR-1C's `file_write` skill blocklist.
# A1 / SR-8: previously `~/.codec` was in the allowlist, which let any
# authenticated POST drop a malicious plugin into ~/.codec/plugins/ + add its
# hash to plugins.allowlist → RCE on next dispatch tick. The skill-side
# blocklist (skills/file_write.py:62-104) refuses all of:
# - the macOS system tree (/System, /Library, /usr, /bin, /sbin, /etc, …)
# - the entire ~/.codec/ tree (skills, plugins, oauth_state.json, audit.log,
# config.json, memory.db, agents/, plugins.allowlist, …)
# - the repo's built-in skills/ directory
# - sensitive filename patterns (.ssh, .env, credentials, id_rsa, token, …)
# - sensitive extensions (.pem, .key, .p12, .pfx, .keystore)
# This HTTP endpoint must apply the same set. Replicated inline (rather than
# importing from the skill module) so the dashboard's safety surface doesn't
# depend on skill-loader timing. Keep in sync with skills/file_write.py.
_SAVE_FILE_BLOCKED_SYSTEM_ROOTS = [
"/System", "/Library", "/usr", "/bin", "/sbin", "/etc",
"/var", "/dev", "/Volumes",
]
_SAVE_FILE_BLOCKED_FILENAME_PATTERNS = [
".ssh", ".gnupg", ".env", "credentials", "secrets", "secret",
".aws", ".gcloud", ".kube", "id_rsa", "id_ed25519", "id_dsa",
".netrc", ".npmrc", ".pypirc", "keychain", "password", "token",
"api_key", "apikey", "private_key",
]
_SAVE_FILE_BLOCKED_EXTS = [".pem", ".key", ".p12", ".pfx", ".keystore"]

def _save_file_blocked_roots():
"""Realpath-resolved blocklist. Built once on first call (module-load
avoidance — keeps dashboard import side-effect-free)."""
roots = []
for p in _SAVE_FILE_BLOCKED_SYSTEM_ROOTS:
try:
roots.append(os.path.realpath(p))
except Exception:
roots.append(p)
# CODEC's own state directory + repo's built-in skills/ tree.
roots.append(os.path.realpath(os.path.expanduser("~/.codec")))
roots.append(os.path.realpath(os.path.join(
os.path.dirname(os.path.abspath(__file__)), "skills")))
return roots


_SAVE_FILE_BLOCKED_ROOTS_CACHE = None
_SAVE_FILE_TMP_REAL = os.path.realpath("/tmp")
_SAVE_FILE_HOME_REAL = os.path.realpath(os.path.expanduser("~"))


def _save_file_is_safe(path):
"""Return (ok, reason). Mirrors skills/file_write._is_safe_target."""
global _SAVE_FILE_BLOCKED_ROOTS_CACHE
if _SAVE_FILE_BLOCKED_ROOTS_CACHE is None:
_SAVE_FILE_BLOCKED_ROOTS_CACHE = _save_file_blocked_roots()
if not path:
return False, "Empty path"
expanded = os.path.expanduser(path)
try:
real_path = os.path.realpath(expanded)
except Exception:
real_path = expanded
base_lower = os.path.basename(real_path).lower()
for pat in _SAVE_FILE_BLOCKED_FILENAME_PATTERNS:
if pat in base_lower:
return False, f"Blocked filename pattern: {pat!r}"
for ext in _SAVE_FILE_BLOCKED_EXTS:
if base_lower.endswith(ext):
return False, f"Blocked extension: {ext}"
for blocked in _SAVE_FILE_BLOCKED_ROOTS_CACHE:
if real_path == blocked or real_path.startswith(blocked + os.sep):
return False, f"Blocked path: {blocked}"
under_home = (real_path == _SAVE_FILE_HOME_REAL or
real_path.startswith(_SAVE_FILE_HOME_REAL + os.sep))
under_tmp = (real_path == _SAVE_FILE_TMP_REAL or
real_path.startswith(_SAVE_FILE_TMP_REAL + os.sep))
if not (under_home or under_tmp):
return False, f"Target must live under $HOME or /tmp (got: {real_path})"
return True, ""


@app.post("/api/save_file")
async def save_file(request: Request):
body = await request.json()
filename = os.path.basename(body.get("filename", "untitled.py"))
content = body.get("content", "")
ALLOWED_SAVE_DIRS = [
os.path.expanduser("~/codec-workspace"),
os.path.expanduser("~/.codec"),
os.path.expanduser("~/Desktop"),
os.path.expanduser("~/Documents"),
]
directory = os.path.realpath(os.path.expanduser(body.get("directory", "~/codec-workspace")))
if not any(directory.startswith(allowed) for allowed in ALLOWED_SAVE_DIRS):
return JSONResponse({"error": "Directory not allowed"}, status_code=403)
directory = os.path.realpath(os.path.expanduser(
body.get("directory", "~/codec-workspace")))
target_path = os.path.join(directory, filename)
ok, reason = _save_file_is_safe(target_path)
if not ok:
try:
log_event("save_file_blocked", "codec-dashboard",
f"/api/save_file refused: {reason}",
extra={"requested_path": target_path, "reason": reason},
outcome="denied", level="warning")
except Exception:
pass
return JSONResponse(
{"error": "Directory not allowed", "reason": reason},
status_code=403)
os.makedirs(directory, exist_ok=True)
path = os.path.join(directory, filename)
with open(path, "w") as f: f.write(content)
return {"path": path, "size": len(content)}
with open(target_path, "w") as f:
f.write(content)
return {"path": target_path, "size": len(content)}


# (Skills endpoints moved to routes/skills.py)
Expand Down Expand Up @@ -3697,7 +3781,14 @@ async def _bg_heartbeat():


async def _bg_watcher():
"""Poll for draft tasks every 200ms."""
"""Poll for draft tasks every 1s.

A10 / SR-13: was 200ms — that's 432k stat()+exists() calls/day per
customer for a file that changes <100×/day. 1s drops it 5× to ~86k
while keeping draft-task pickup latency within UX comfort (a draft
overlay closing 0.5-1s after a paste is indistinguishable from instant
to the operator).
"""
from codec_watcher import TASK_FILE, handle_draft
_bg_status["watcher"]["running"] = True
log.info("[WATCHER] Background service started")
Expand All @@ -3716,7 +3807,7 @@ async def _bg_watcher():
except Exception as e:
_bg_status["watcher"]["errors"] += 1
log.error(f"[WATCHER] Error: {e}")
await asyncio.sleep(0.2)
await asyncio.sleep(1.0)
_bg_status["watcher"]["running"] = False


Expand Down
12 changes: 7 additions & 5 deletions codec_heartbeat.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
def check_pending_tasks():
"""Check memory for tasks that were saved for later"""
try:
conn = sqlite3.connect(DB_PATH)
conn = sqlite3.connect(DB_PATH, timeout=5.0); conn.execute("PRAGMA busy_timeout=5000")
# Find messages containing "task" or "later" or "remind" from last 24h
cutoff = (datetime.now() - timedelta(hours=24)).isoformat()
rows = conn.execute("""
Expand Down Expand Up @@ -90,7 +90,7 @@ def check_system_health():
def check_memory_stats():
"""Report memory database stats + size monitoring"""
try:
conn = sqlite3.connect(DB_PATH)
conn = sqlite3.connect(DB_PATH, timeout=5.0); conn.execute("PRAGMA busy_timeout=5000")
total = conn.execute("SELECT COUNT(*) FROM conversations").fetchone()[0]
sessions = conn.execute("SELECT COUNT(DISTINCT session_id) FROM conversations").fetchone()[0]
latest = conn.execute("SELECT timestamp FROM conversations ORDER BY id DESC LIMIT 1").fetchone()
Expand Down Expand Up @@ -132,8 +132,10 @@ def backup_memory_db():

try:
# Use SQLite backup API for safe copy (handles WAL mode)
src = sqlite3.connect(DB_PATH)
dst = sqlite3.connect(backup_path)
src = sqlite3.connect(DB_PATH, timeout=5.0)
src.execute("PRAGMA busy_timeout=5000")
dst = sqlite3.connect(backup_path, timeout=5.0)
dst.execute("PRAGMA busy_timeout=5000")
src.backup(dst)
dst.close()
src.close()
Expand Down Expand Up @@ -195,7 +197,7 @@ def execute_pending_tasks():
against DANGEROUS_PATTERNS before execution.
"""
try:
conn = sqlite3.connect(DB_PATH)
conn = sqlite3.connect(DB_PATH, timeout=5.0); conn.execute("PRAGMA busy_timeout=5000")
cutoff = (datetime.now() - timedelta(hours=24)).isoformat()

# Tightened patterns: require explicit prefix or very specific phrasing
Expand Down
25 changes: 18 additions & 7 deletions codec_jsonstore.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,20 +64,31 @@ def atomic_write_json(
def file_lock(path: Any) -> Iterator[None]:
"""Exclusive cross-process lock on `<path>.lock` for the duration of the
block. Use around a read-modify-write of `path` so concurrent daemons
serialize instead of clobbering each other."""
serialize instead of clobbering each other.

A7 / SR-11: the `open()` is now inside the try-block, paired with a
finally close. Previously, if a path issue caused open() to succeed but
something raised between open() and the body's try-block, the lock-
sidecar file handle leaked in LOCK_EX state until GC.
"""
path = str(path)
directory = os.path.dirname(path) or "."
os.makedirs(directory, exist_ok=True)
lock_file = open(path + ".lock", "w")
lock_file = None
try:
lock_file = open(path + ".lock", "w")
fcntl.flock(lock_file.fileno(), fcntl.LOCK_EX)
yield
finally:
try:
fcntl.flock(lock_file.fileno(), fcntl.LOCK_UN)
except Exception:
pass
lock_file.close()
if lock_file is not None:
try:
fcntl.flock(lock_file.fileno(), fcntl.LOCK_UN)
except Exception:
pass
try:
lock_file.close()
except Exception:
pass


def read_modify_write(
Expand Down
Loading