diff --git a/.turbo b/.turbo new file mode 120000 index 00000000..0b7d9ca6 --- /dev/null +++ b/.turbo @@ -0,0 +1 @@ +/home/nathan/sandbox-agent/.turbo \ No newline at end of file diff --git a/Cargo.toml b/Cargo.toml index 3907eee5..b4955a29 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -69,6 +69,8 @@ url = "2.5" regress = "0.10" include_dir = "0.7" base64 = "0.22" +globset = "0.4" +mime_guess = "2.0" # Code generation (build deps) typify = "0.4" diff --git a/dist b/dist new file mode 120000 index 00000000..f02d77f6 --- /dev/null +++ b/dist @@ -0,0 +1 @@ +/home/nathan/sandbox-agent/dist \ No newline at end of file diff --git a/node_modules b/node_modules new file mode 120000 index 00000000..501480b2 --- /dev/null +++ b/node_modules @@ -0,0 +1 @@ +/home/nathan/sandbox-agent/node_modules \ No newline at end of file diff --git a/server/packages/sandbox-agent/Cargo.toml b/server/packages/sandbox-agent/Cargo.toml index 5f45ad01..9690a7cc 100644 --- a/server/packages/sandbox-agent/Cargo.toml +++ b/server/packages/sandbox-agent/Cargo.toml @@ -36,6 +36,8 @@ tracing-logfmt.workspace = true tracing-subscriber.workspace = true include_dir.workspace = true base64.workspace = true +globset.workspace = true +mime_guess.workspace = true tempfile = { workspace = true, optional = true } [target.'cfg(unix)'.dependencies] diff --git a/server/packages/sandbox-agent/src/filesystem.rs b/server/packages/sandbox-agent/src/filesystem.rs new file mode 100644 index 00000000..12d3efc2 --- /dev/null +++ b/server/packages/sandbox-agent/src/filesystem.rs @@ -0,0 +1,448 @@ +use std::collections::VecDeque; +use std::fs; +use std::path::{Component, Path, PathBuf}; +use std::process::Command; +use std::time::UNIX_EPOCH; + +use base64::{engine::general_purpose::STANDARD, Engine as _}; +use globset::{GlobBuilder, GlobSet, GlobSetBuilder}; +use mime_guess::MimeGuess; +use serde::Serialize; + +use sandbox_agent_error::SandboxError; + +#[derive(Debug, Clone, Copy)] +pub(crate) struct FileReadRange { + pub start: Option, + pub end: Option, +} + +#[derive(Debug, Clone)] +pub(crate) struct FileReadOptions { + pub path: String, + pub range: Option, +} + +#[derive(Debug, Clone)] +pub(crate) struct FileListOptions { + pub path: String, + pub glob: Option, + pub depth: Option, + pub include_hidden: bool, + pub directories_only: bool, +} + +#[derive(Debug, Serialize)] +#[serde(rename_all = "camelCase")] +pub(crate) struct WorkspaceFileNode { + pub name: String, + pub path: String, + pub absolute: String, + #[serde(rename = "type")] + pub entry_type: String, + pub ignored: bool, +} + +#[derive(Debug, Serialize)] +#[serde(rename_all = "camelCase")] +pub(crate) struct WorkspaceFileContent { + #[serde(rename = "type")] + pub content_type: String, + pub content: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub encoding: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub mime_type: Option, +} + +#[derive(Debug, Serialize, Clone)] +#[serde(rename_all = "camelCase")] +pub(crate) struct WorkspaceVcsStatus { + pub status: String, + pub added: i64, + pub removed: i64, +} + +#[derive(Debug, Serialize)] +#[serde(rename_all = "camelCase")] +pub(crate) struct WorkspaceFileStatus { + pub path: String, + pub exists: bool, + #[serde(rename = "type")] + pub entry_type: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub size: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub modified: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub vcs: Option, +} + +#[derive(Debug, Clone, Default)] +pub(crate) struct WorkspaceFilesystemService; + +impl WorkspaceFilesystemService { + pub(crate) fn new() -> Self { + Self + } + + pub(crate) fn scoped( + &self, + root: impl Into, + ) -> Result { + WorkspaceFilesystem::new(root.into()) + } +} + +#[derive(Debug, Clone)] +pub(crate) struct WorkspaceFilesystem { + root: PathBuf, +} + +impl WorkspaceFilesystem { + fn new(root: PathBuf) -> Result { + let root = fs::canonicalize(&root).unwrap_or(root); + if !root.exists() { + return Err(SandboxError::InvalidRequest { + message: "workspace root does not exist".to_string(), + }); + } + if !root.is_dir() { + return Err(SandboxError::InvalidRequest { + message: "workspace root is not a directory".to_string(), + }); + } + Ok(Self { root }) + } + + pub(crate) fn root(&self) -> &Path { + &self.root + } + + pub(crate) fn list( + &self, + options: FileListOptions, + ) -> Result, SandboxError> { + let path = options.path.trim(); + if path.is_empty() { + return Err(SandboxError::InvalidRequest { + message: "path is required".to_string(), + }); + } + let directory = self.resolve_path(path, false)?; + let metadata = fs::metadata(&directory).map_err(|err| SandboxError::InvalidRequest { + message: format!("failed to access directory: {err}"), + })?; + if !metadata.is_dir() { + return Err(SandboxError::InvalidRequest { + message: "path is not a directory".to_string(), + }); + } + + let matcher = build_glob_matcher(options.glob.as_deref())?; + let max_depth = options.depth.unwrap_or(1); + let mut queue = VecDeque::new(); + let mut entries = Vec::new(); + queue.push_back((directory, 0usize)); + + while let Some((current_dir, depth)) = queue.pop_front() { + if depth >= max_depth { + continue; + } + let read_dir = + fs::read_dir(¤t_dir).map_err(|err| SandboxError::InvalidRequest { + message: format!("failed to read directory: {err}"), + })?; + + for entry in read_dir { + let entry = entry.map_err(|err| SandboxError::InvalidRequest { + message: format!("failed to read directory entry: {err}"), + })?; + let file_name = entry.file_name(); + let name = file_name.to_string_lossy().to_string(); + if !options.include_hidden && name.starts_with('.') { + continue; + } + let file_type = entry + .file_type() + .map_err(|err| SandboxError::InvalidRequest { + message: format!("failed to read file type: {err}"), + })?; + let entry_path = entry.path(); + + if file_type.is_dir() && !options.include_hidden && is_hidden_dir(&entry_path) { + continue; + } + + let relative_path = path_relative_to_root(&self.root, &entry_path)?; + if let Some(matcher) = matcher.as_ref() { + if !matcher.is_match(relative_path.as_str()) { + if file_type.is_dir() { + if depth + 1 < max_depth { + queue.push_back((entry_path.clone(), depth + 1)); + } + } + continue; + } + } + + if options.directories_only && !file_type.is_dir() { + continue; + } + + let entry_type = if file_type.is_dir() { + "directory" + } else { + "file" + }; + + entries.push(WorkspaceFileNode { + name, + path: relative_path, + absolute: entry_path.to_string_lossy().to_string(), + entry_type: entry_type.to_string(), + ignored: false, + }); + + if file_type.is_dir() && depth + 1 < max_depth { + queue.push_back((entry_path, depth + 1)); + } + } + } + + entries.sort_by(|a, b| a.path.cmp(&b.path)); + Ok(entries) + } + + pub(crate) fn read( + &self, + options: FileReadOptions, + ) -> Result { + let path = options.path.trim(); + if path.is_empty() { + return Err(SandboxError::InvalidRequest { + message: "path is required".to_string(), + }); + } + let file_path = self.resolve_path(path, false)?; + let metadata = fs::metadata(&file_path).map_err(|err| SandboxError::InvalidRequest { + message: format!("failed to access file: {err}"), + })?; + if !metadata.is_file() { + return Err(SandboxError::InvalidRequest { + message: "path is not a file".to_string(), + }); + } + let mut bytes = fs::read(&file_path).map_err(|err| SandboxError::InvalidRequest { + message: format!("failed to read file: {err}"), + })?; + + if let Some(range) = options.range { + bytes = apply_byte_range(bytes, range)?; + } + + let mime = MimeGuess::from_path(&file_path) + .first_or_octet_stream() + .essence_str() + .to_string(); + if let Ok(text) = String::from_utf8(bytes.clone()) { + return Ok(WorkspaceFileContent { + content_type: "text".to_string(), + content: text, + encoding: None, + mime_type: Some(mime), + }); + } + + Ok(WorkspaceFileContent { + content_type: "binary".to_string(), + content: STANDARD.encode(bytes), + encoding: Some("base64".to_string()), + mime_type: Some(mime), + }) + } + + pub(crate) fn status(&self) -> Result, SandboxError> { + if !self.root.join(".git").exists() { + return Ok(Vec::new()); + } + let output = Command::new("git") + .arg("status") + .arg("--porcelain=v1") + .arg("-z") + .current_dir(&self.root) + .output() + .map_err(|err| SandboxError::StreamError { + message: format!("failed to run git status: {err}"), + })?; + if !output.status.success() { + return Err(SandboxError::StreamError { + message: format!("git status failed: {}", output.status), + }); + } + + let stdout = String::from_utf8_lossy(&output.stdout); + let mut entries = Vec::new(); + for record in stdout.split('\0').filter(|line| !line.is_empty()) { + let (status_code, path) = parse_git_porcelain_entry(record); + let Some(path) = path else { + continue; + }; + let status = map_git_status(status_code); + let absolute = self.root.join(&path); + let (exists, entry_type, size, modified) = file_metadata(&absolute); + entries.push(WorkspaceFileStatus { + path, + exists, + entry_type, + size, + modified, + vcs: Some(WorkspaceVcsStatus { + status, + added: 0, + removed: 0, + }), + }); + } + + Ok(entries) + } + + fn resolve_path(&self, input: &str, allow_missing: bool) -> Result { + let input_path = PathBuf::from(input); + if input_path + .components() + .any(|component| matches!(component, Component::ParentDir)) + { + return Err(SandboxError::InvalidRequest { + message: "path traversal is not allowed".to_string(), + }); + } + + let joined = if input_path.is_absolute() { + input_path + } else { + self.root.join(input_path) + }; + + let normalized = if allow_missing { + normalize_path(&joined) + } else { + fs::canonicalize(&joined).unwrap_or(joined.clone()) + }; + + if !normalized.starts_with(&self.root) { + return Err(SandboxError::InvalidRequest { + message: "path is outside the workspace".to_string(), + }); + } + + Ok(normalized) + } +} + +fn build_glob_matcher(glob: Option<&str>) -> Result, SandboxError> { + let Some(pattern) = glob else { + return Ok(None); + }; + let mut builder = GlobSetBuilder::new(); + let glob = GlobBuilder::new(pattern) + .literal_separator(true) + .build() + .map_err(|err| SandboxError::InvalidRequest { + message: format!("invalid glob pattern: {err}"), + })?; + builder.add(glob); + let set = builder + .build() + .map_err(|err| SandboxError::InvalidRequest { + message: format!("invalid glob matcher: {err}"), + })?; + Ok(Some(set)) +} + +fn apply_byte_range(bytes: Vec, range: FileReadRange) -> Result, SandboxError> { + let len = bytes.len() as u64; + let start = range.start.unwrap_or(0); + let end = range.end.unwrap_or(len); + if start > end || end > len { + return Err(SandboxError::InvalidRequest { + message: "invalid byte range".to_string(), + }); + } + Ok(bytes[start as usize..end as usize].to_vec()) +} + +fn normalize_path(path: &Path) -> PathBuf { + let mut normalized = PathBuf::new(); + for component in path.components() { + match component { + Component::Prefix(prefix) => normalized.push(prefix.as_os_str()), + Component::RootDir => normalized.push(Path::new(std::path::MAIN_SEPARATOR_STR)), + Component::CurDir => {} + Component::ParentDir => { + normalized.pop(); + } + Component::Normal(value) => normalized.push(value), + } + } + normalized +} + +fn path_relative_to_root(root: &Path, path: &Path) -> Result { + let relative = path + .strip_prefix(root) + .map_err(|_| SandboxError::InvalidRequest { + message: "path is outside the workspace".to_string(), + })?; + Ok(relative.to_string_lossy().to_string()) +} + +fn is_hidden_dir(path: &Path) -> bool { + path.file_name() + .and_then(|name| name.to_str()) + .map(|name| name.starts_with('.')) + .unwrap_or(false) +} + +fn file_metadata(path: &Path) -> (bool, String, Option, Option) { + let Ok(metadata) = fs::metadata(path) else { + return (false, "file".to_string(), None, None); + }; + let entry_type = if metadata.is_dir() { + "directory" + } else { + "file" + }; + let modified = metadata + .modified() + .ok() + .and_then(|time| time.duration_since(UNIX_EPOCH).ok()) + .map(|duration| duration.as_millis() as i64); + (true, entry_type.to_string(), Some(metadata.len()), modified) +} + +fn parse_git_porcelain_entry(entry: &str) -> (&str, Option) { + if entry.len() < 3 { + return ("", None); + } + let status = &entry[0..2]; + let path = entry[3..].trim(); + if path.is_empty() { + return (status, None); + } + if let Some((_, new_path)) = path.split_once(" -> ") { + return (status, Some(new_path.to_string())); + } + (status, Some(path.to_string())) +} + +fn map_git_status(status: &str) -> String { + if status.contains('D') { + return "deleted".to_string(); + } + if status.contains('A') || status.contains('?') { + return "added".to_string(); + } + "modified".to_string() +} diff --git a/server/packages/sandbox-agent/src/lib.rs b/server/packages/sandbox-agent/src/lib.rs index 8c113431..72361053 100644 --- a/server/packages/sandbox-agent/src/lib.rs +++ b/server/packages/sandbox-agent/src/lib.rs @@ -2,6 +2,7 @@ mod agent_server_logs; pub mod credentials; +pub(crate) mod filesystem; pub mod opencode_compat; pub mod router; pub mod server_logs; diff --git a/server/packages/sandbox-agent/src/opencode_compat.rs b/server/packages/sandbox-agent/src/opencode_compat.rs index 55b70505..7642ed18 100644 --- a/server/packages/sandbox-agent/src/opencode_compat.rs +++ b/server/packages/sandbox-agent/src/opencode_compat.rs @@ -13,7 +13,7 @@ use std::str::FromStr; use axum::extract::{Path, Query, State}; use axum::http::{HeaderMap, StatusCode}; use axum::response::sse::{Event, KeepAlive}; -use axum::response::{IntoResponse, Sse}; +use axum::response::{IntoResponse, Response, Sse}; use axum::routing::{get, patch, post, put}; use axum::{Json, Router}; use futures::stream; @@ -23,6 +23,7 @@ use tokio::sync::{broadcast, Mutex}; use tokio::time::interval; use utoipa::{IntoParams, OpenApi, ToSchema}; +use crate::filesystem::{FileListOptions, FileReadOptions, FileReadRange, WorkspaceFileStatus}; use crate::router::{AppState, CreateSessionRequest, PermissionReply}; use sandbox_agent_error::SandboxError; use sandbox_agent_agent_management::agents::AgentId; @@ -491,10 +492,22 @@ struct FindSymbolsQuery { query: Option, } +#[derive(Debug, Deserialize, IntoParams)] +struct FileListQuery { + directory: Option, + path: Option, + glob: Option, + depth: Option, + hidden: Option, + directories: Option, +} + #[derive(Debug, Deserialize, IntoParams)] struct FileContentQuery { directory: Option, path: Option, + start: Option, + end: Option, } #[derive(Debug, Serialize, Deserialize, ToSchema)] @@ -769,6 +782,37 @@ fn sandbox_error_response(err: SandboxError) -> (StatusCode, Json) { } } +fn filesystem_error_response(err: SandboxError) -> Response { + match err { + SandboxError::InvalidRequest { message } => bad_request(&message).into_response(), + SandboxError::StreamError { message } => internal_error(&message).into_response(), + other => internal_error(&other.to_string()).into_response(), + } +} + +#[derive(Debug, Serialize)] +struct OpenCodeFileStatusEntry { + path: String, + added: i64, + removed: i64, + status: String, +} + +fn opencode_status_entries(entries: Vec) -> Vec { + entries + .into_iter() + .filter_map(|entry| { + let vcs = entry.vcs?; + Some(OpenCodeFileStatusEntry { + path: entry.path, + added: vcs.added, + removed: vcs.removed, + status: vcs.status, + }) + }) + .collect() +} + fn parse_permission_reply_value(value: Option<&str>) -> Result { let value = value.unwrap_or("once").to_ascii_lowercase(); match value.as_str() { @@ -3754,8 +3798,35 @@ async fn oc_pty_connect(Path(_pty_id): Path) -> impl IntoResponse { responses((status = 200)), tag = "opencode" )] -async fn oc_file_list() -> impl IntoResponse { - (StatusCode::OK, Json(json!([]))) +async fn oc_file_list( + State(state): State>, + headers: HeaderMap, + Query(query): Query, +) -> impl IntoResponse { + let Some(path) = query.path else { + return bad_request("path is required").into_response(); + }; + let directory = state.opencode.directory_for(&headers, query.directory.as_ref()); + let filesystem = match state + .inner + .session_manager() + .workspace_filesystem() + .scoped(directory) + { + Ok(filesystem) => filesystem, + Err(err) => return filesystem_error_response(err), + }; + let options = FileListOptions { + path, + glob: query.glob, + depth: query.depth, + include_hidden: query.hidden.unwrap_or(false), + directories_only: query.directories.unwrap_or(false), + }; + match filesystem.list(options) { + Ok(entries) => (StatusCode::OK, Json(entries)).into_response(), + Err(err) => filesystem_error_response(err), + } } #[utoipa::path( @@ -3764,18 +3835,37 @@ async fn oc_file_list() -> impl IntoResponse { responses((status = 200)), tag = "opencode" )] -async fn oc_file_content(Query(query): Query) -> impl IntoResponse { - if query.path.is_none() { +async fn oc_file_content( + State(state): State>, + headers: HeaderMap, + Query(query): Query, +) -> impl IntoResponse { + let Some(path) = query.path else { return bad_request("path is required").into_response(); + }; + let directory = state.opencode.directory_for(&headers, query.directory.as_ref()); + let filesystem = match state + .inner + .session_manager() + .workspace_filesystem() + .scoped(directory) + { + Ok(filesystem) => filesystem, + Err(err) => return filesystem_error_response(err), + }; + let range = if query.start.is_some() || query.end.is_some() { + Some(FileReadRange { + start: query.start, + end: query.end, + }) + } else { + None + }; + let options = FileReadOptions { path, range }; + match filesystem.read(options) { + Ok(content) => (StatusCode::OK, Json(content)).into_response(), + Err(err) => filesystem_error_response(err), } - ( - StatusCode::OK, - Json(json!({ - "type": "text", - "content": "", - })), - ) - .into_response() } #[utoipa::path( @@ -3784,8 +3874,28 @@ async fn oc_file_content(Query(query): Query) -> impl IntoResp responses((status = 200)), tag = "opencode" )] -async fn oc_file_status() -> impl IntoResponse { - (StatusCode::OK, Json(json!([]))).into_response() +async fn oc_file_status( + State(state): State>, + headers: HeaderMap, + Query(query): Query, +) -> impl IntoResponse { + let directory = state.opencode.directory_for(&headers, query.directory.as_ref()); + let filesystem = match state + .inner + .session_manager() + .workspace_filesystem() + .scoped(directory) + { + Ok(filesystem) => filesystem, + Err(err) => return filesystem_error_response(err), + }; + match filesystem.status() { + Ok(entries) => { + let files = opencode_status_entries(entries); + (StatusCode::OK, Json(json!(files))).into_response() + } + Err(err) => filesystem_error_response(err), + } } #[utoipa::path( diff --git a/server/packages/sandbox-agent/src/router.rs b/server/packages/sandbox-agent/src/router.rs index 3ca437a7..d273b084 100644 --- a/server/packages/sandbox-agent/src/router.rs +++ b/server/packages/sandbox-agent/src/router.rs @@ -39,6 +39,7 @@ use tracing::Span; use utoipa::{Modify, OpenApi, ToSchema}; use crate::agent_server_logs::AgentServerLogs; +use crate::filesystem::WorkspaceFilesystemService; use crate::opencode_compat::{build_opencode_router, OpenCodeAppState}; use crate::ui; use sandbox_agent_agent_management::agents::{ @@ -818,6 +819,7 @@ pub(crate) struct SessionManager { sessions: Mutex>, server_manager: Arc, http_client: Client, + filesystem: WorkspaceFilesystemService, } /// Shared Codex app-server process that handles multiple sessions via JSON-RPC. @@ -1538,6 +1540,7 @@ impl SessionManager { sessions: Mutex::new(Vec::new()), server_manager, http_client: Client::new(), + filesystem: WorkspaceFilesystemService::new(), } } @@ -1562,6 +1565,10 @@ impl SessionManager { logs.read_stderr() } + pub(crate) fn workspace_filesystem(&self) -> &WorkspaceFilesystemService { + &self.filesystem + } + pub(crate) async fn create_session( self: &Arc, session_id: String, diff --git a/server/packages/sandbox-agent/tests/opencode-compat/filesystem.test.ts b/server/packages/sandbox-agent/tests/opencode-compat/filesystem.test.ts new file mode 100644 index 00000000..68f174dd --- /dev/null +++ b/server/packages/sandbox-agent/tests/opencode-compat/filesystem.test.ts @@ -0,0 +1,73 @@ +import { describe, it, expect, beforeAll, afterEach, beforeEach } from "vitest"; +import { createOpencodeClient, type OpencodeClient } from "@opencode-ai/sdk"; +import { spawnSandboxAgent, buildSandboxAgent, type SandboxAgentHandle } from "./helpers/spawn"; +import { mkdtemp, mkdir, writeFile, rm } from "node:fs/promises"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; + +describe("OpenCode-compatible filesystem API", () => { + let handle: SandboxAgentHandle; + let client: OpencodeClient; + let tempDir: string; + + beforeAll(async () => { + await buildSandboxAgent(); + }); + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), "opencode-fs-")); + await writeFile(join(tempDir, "hello.txt"), "hello world\n"); + await mkdir(join(tempDir, "nested"), { recursive: true }); + await writeFile(join(tempDir, "nested", "child.txt"), "child content\n"); + + handle = await spawnSandboxAgent({ + opencodeCompat: true, + env: { + OPENCODE_COMPAT_DIRECTORY: tempDir, + OPENCODE_COMPAT_WORKTREE: tempDir, + }, + }); + + client = createOpencodeClient({ + baseUrl: `${handle.baseUrl}/opencode`, + headers: { Authorization: `Bearer ${handle.token}` }, + }); + }); + + afterEach(async () => { + await handle?.dispose(); + if (tempDir) { + await rm(tempDir, { recursive: true, force: true }); + } + }); + + it("lists files within the workspace", async () => { + const response = await client.file.list({ + query: { path: "." }, + }); + + expect(response.data).toBeDefined(); + expect(Array.isArray(response.data)).toBe(true); + const paths = (response.data ?? []).map((entry) => entry.path); + expect(paths).toContain("hello.txt"); + expect(paths).toContain("nested"); + }); + + it("reads file content", async () => { + const response = await client.file.read({ + query: { path: "hello.txt" }, + }); + + expect(response.data).toBeDefined(); + expect(response.data?.type).toBe("text"); + expect(response.data?.content).toContain("hello world"); + }); + + it("rejects paths outside the workspace", async () => { + const response = await client.file.read({ + query: { path: "../outside.txt" }, + }); + + expect(response.error).toBeDefined(); + }); +}); diff --git a/target b/target new file mode 120000 index 00000000..3d6ad8cf --- /dev/null +++ b/target @@ -0,0 +1 @@ +/home/nathan/sandbox-agent/target \ No newline at end of file