diff --git a/Cargo.toml b/Cargo.toml index 74906c3a..4ce3cf60 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -66,6 +66,7 @@ zip = { version = "0.6", default-features = false, features = ["deflate"] } # Misc url = "2.5" +regex = "1.10" regress = "0.10" include_dir = "0.7" base64 = "0.22" diff --git a/server/packages/sandbox-agent/Cargo.toml b/server/packages/sandbox-agent/Cargo.toml index 703c87c6..54803421 100644 --- a/server/packages/sandbox-agent/Cargo.toml +++ b/server/packages/sandbox-agent/Cargo.toml @@ -36,6 +36,7 @@ tracing-logfmt.workspace = true tracing-subscriber.workspace = true include_dir.workspace = true base64.workspace = true +regex.workspace = true tempfile = { workspace = true, optional = true } [target.'cfg(unix)'.dependencies] diff --git a/server/packages/sandbox-agent/src/lib.rs b/server/packages/sandbox-agent/src/lib.rs index 8c113431..459b7c2d 100644 --- a/server/packages/sandbox-agent/src/lib.rs +++ b/server/packages/sandbox-agent/src/lib.rs @@ -4,6 +4,7 @@ mod agent_server_logs; pub mod credentials; pub mod opencode_compat; pub mod router; +pub(crate) mod search; pub mod server_logs; pub mod telemetry; pub mod ui; diff --git a/server/packages/sandbox-agent/src/opencode_compat.rs b/server/packages/sandbox-agent/src/opencode_compat.rs index 427d440c..62520953 100644 --- a/server/packages/sandbox-agent/src/opencode_compat.rs +++ b/server/packages/sandbox-agent/src/opencode_compat.rs @@ -6,6 +6,7 @@ use std::collections::HashMap; use std::convert::Infallible; +use std::path::PathBuf; use std::str::FromStr; use std::sync::atomic::{AtomicU64, Ordering}; use std::sync::Arc; @@ -24,6 +25,7 @@ use tokio::time::interval; use utoipa::{IntoParams, OpenApi, ToSchema}; use crate::router::{AppState, CreateSessionRequest, PermissionReply}; +use crate::search::{FileSearchType, SearchFileParams, SearchSymbolParams, SearchTextParams}; use sandbox_agent_agent_management::agents::AgentId; use sandbox_agent_error::SandboxError; use sandbox_agent_universal_agent_schema::{ @@ -471,18 +473,26 @@ struct ToolQuery { struct FindTextQuery { directory: Option, pattern: Option, + #[serde(rename = "caseSensitive")] + case_sensitive: Option, + limit: Option, } #[derive(Debug, Deserialize, IntoParams)] struct FindFilesQuery { directory: Option, query: Option, + dirs: Option, + #[serde(rename = "type")] + kind: Option, + limit: Option, } #[derive(Debug, Deserialize, IntoParams)] struct FindSymbolsQuery { directory: Option, query: Option, + limit: Option, } #[derive(Debug, Deserialize, IntoParams)] @@ -3855,11 +3865,37 @@ async fn oc_file_status() -> impl IntoResponse { responses((status = 200)), tag = "opencode" )] -async fn oc_find_text(Query(query): Query) -> impl IntoResponse { - if query.pattern.is_none() { +async fn oc_find_text( + State(state): State>, + headers: HeaderMap, + Query(query): Query, +) -> impl IntoResponse { + let Some(pattern) = query.pattern else { return bad_request("pattern is required").into_response(); + }; + + let directory = state + .opencode + .directory_for(&headers, query.directory.as_ref()); + let worktree = state.opencode.worktree_for(&directory); + let search_params = SearchTextParams { + root: PathBuf::from(worktree), + directory: PathBuf::from(directory), + pattern, + case_sensitive: query.case_sensitive, + limit: query.limit, + }; + + match state + .inner + .session_manager() + .search_service() + .search_text(search_params) + .await + { + Ok(matches) => (StatusCode::OK, Json(matches)).into_response(), + Err(err) => sandbox_error_response(err).into_response(), } - (StatusCode::OK, Json(json!([]))).into_response() } #[utoipa::path( @@ -3868,11 +3904,52 @@ async fn oc_find_text(Query(query): Query) -> impl IntoResponse { responses((status = 200)), tag = "opencode" )] -async fn oc_find_files(Query(query): Query) -> impl IntoResponse { - if query.query.is_none() { +async fn oc_find_files( + State(state): State>, + headers: HeaderMap, + Query(query): Query, +) -> impl IntoResponse { + let Some(query_value) = query.query else { return bad_request("query is required").into_response(); + }; + + let include_dirs = match query.dirs.as_deref() { + Some("true") => Some(true), + Some("false") => Some(false), + Some(_) => return bad_request("dirs must be true or false").into_response(), + None => None, + }; + + let file_type = match query.kind.as_deref() { + Some("file") => Some(FileSearchType::File), + Some("directory") => Some(FileSearchType::Directory), + Some(_) => return bad_request("type must be file or directory").into_response(), + None => None, + }; + + let directory = state + .opencode + .directory_for(&headers, query.directory.as_ref()); + let worktree = state.opencode.worktree_for(&directory); + let search_params = SearchFileParams { + root: PathBuf::from(worktree), + directory: PathBuf::from(directory), + query: query_value, + include_dirs, + file_type, + limit: query.limit, + }; + + match state + .inner + .session_manager() + .search_service() + .search_files(search_params) + .await + { + Ok(results) => (StatusCode::OK, Json(results)).into_response(), + Err(err) => sandbox_error_response(err).into_response(), } - (StatusCode::OK, Json(json!([]))).into_response() } #[utoipa::path( @@ -3881,11 +3958,36 @@ async fn oc_find_files(Query(query): Query) -> impl IntoResponse responses((status = 200)), tag = "opencode" )] -async fn oc_find_symbols(Query(query): Query) -> impl IntoResponse { - if query.query.is_none() { +async fn oc_find_symbols( + State(state): State>, + headers: HeaderMap, + Query(query): Query, +) -> impl IntoResponse { + let Some(query_value) = query.query else { return bad_request("query is required").into_response(); + }; + + let directory = state + .opencode + .directory_for(&headers, query.directory.as_ref()); + let worktree = state.opencode.worktree_for(&directory); + let search_params = SearchSymbolParams { + root: PathBuf::from(worktree), + directory: PathBuf::from(directory), + query: query_value, + limit: query.limit, + }; + + match state + .inner + .session_manager() + .search_service() + .search_symbols(search_params) + .await + { + Ok(results) => (StatusCode::OK, Json(results)).into_response(), + Err(err) => sandbox_error_response(err).into_response(), } - (StatusCode::OK, Json(json!([]))).into_response() } #[utoipa::path( diff --git a/server/packages/sandbox-agent/src/router.rs b/server/packages/sandbox-agent/src/router.rs index 92460d55..dee8ed1b 100644 --- a/server/packages/sandbox-agent/src/router.rs +++ b/server/packages/sandbox-agent/src/router.rs @@ -40,6 +40,7 @@ use utoipa::{Modify, OpenApi, ToSchema}; use crate::agent_server_logs::AgentServerLogs; use crate::opencode_compat::{build_opencode_router, OpenCodeAppState}; +use crate::search::SearchService; use crate::ui; use sandbox_agent_agent_management::agents::{ AgentError as ManagerError, AgentId, AgentManager, InstallOptions, SpawnOptions, StreamingSpawn, @@ -818,6 +819,7 @@ pub(crate) struct SessionManager { sessions: Mutex>, server_manager: Arc, http_client: Client, + search: SearchService, } /// Shared Codex app-server process that handles multiple sessions via JSON-RPC. @@ -1538,9 +1540,14 @@ impl SessionManager { sessions: Mutex::new(Vec::new()), server_manager, http_client: Client::new(), + search: SearchService::new(), } } + pub(crate) fn search_service(&self) -> SearchService { + self.search.clone() + } + fn session_ref<'a>(sessions: &'a [SessionState], session_id: &str) -> Option<&'a SessionState> { sessions .iter() diff --git a/server/packages/sandbox-agent/src/search.rs b/server/packages/sandbox-agent/src/search.rs new file mode 100644 index 00000000..3ba26d61 --- /dev/null +++ b/server/packages/sandbox-agent/src/search.rs @@ -0,0 +1,909 @@ +use std::collections::{HashMap, HashSet}; +use std::fs; +use std::hash::{Hash, Hasher}; +use std::path::{Path, PathBuf}; +use std::process::Command; +use std::sync::{Arc, Mutex}; +use std::time::SystemTime; + +use regex::{Regex, RegexBuilder}; +use serde::{Deserialize, Serialize}; +use tokio::task; + +use sandbox_agent_error::SandboxError; + +const DEFAULT_TEXT_LIMIT: usize = 200; +const DEFAULT_FILE_LIMIT: usize = 200; +const DEFAULT_SYMBOL_LIMIT: usize = 200; +const MAX_TEXT_LIMIT: usize = 500; +const MAX_FILE_LIMIT: usize = 200; +const MAX_SYMBOL_LIMIT: usize = 200; +const RIPGREP_NOT_AVAILABLE: &str = "ripgrep not available"; + +const SYMBOL_KIND_CLASS: u32 = 5; +const SYMBOL_KIND_METHOD: u32 = 6; +const SYMBOL_KIND_INTERFACE: u32 = 11; +const SYMBOL_KIND_FUNCTION: u32 = 12; +const SYMBOL_KIND_VARIABLE: u32 = 13; +const SYMBOL_KIND_CONSTANT: u32 = 14; +const SYMBOL_KIND_ENUM: u32 = 10; +const SYMBOL_KIND_STRUCT: u32 = 23; +const SYMBOL_KIND_TYPE_PARAMETER: u32 = 26; + +#[derive(Clone, Debug)] +pub(crate) struct SearchService { + symbol_cache: Arc>, +} + +impl SearchService { + pub fn new() -> Self { + Self { + symbol_cache: Arc::new(Mutex::new(SymbolCache::default())), + } + } + + pub async fn search_text( + &self, + params: SearchTextParams, + ) -> Result, SandboxError> { + task::spawn_blocking(move || search_text_sync(params)) + .await + .map_err(|err| SandboxError::StreamError { + message: err.to_string(), + })? + } + + pub async fn search_files( + &self, + params: SearchFileParams, + ) -> Result, SandboxError> { + task::spawn_blocking(move || search_files_sync(params)) + .await + .map_err(|err| SandboxError::StreamError { + message: err.to_string(), + })? + } + + pub async fn search_symbols( + &self, + params: SearchSymbolParams, + ) -> Result, SandboxError> { + let cache = self.symbol_cache.clone(); + task::spawn_blocking(move || search_symbols_sync(cache, params)) + .await + .map_err(|err| SandboxError::StreamError { + message: err.to_string(), + })? + } +} + +#[derive(Clone, Debug)] +pub(crate) struct SearchTextParams { + pub root: PathBuf, + pub directory: PathBuf, + pub pattern: String, + pub case_sensitive: Option, + pub limit: Option, +} + +#[derive(Clone, Debug)] +pub(crate) struct SearchFileParams { + pub root: PathBuf, + pub directory: PathBuf, + pub query: String, + pub include_dirs: Option, + pub file_type: Option, + pub limit: Option, +} + +#[derive(Clone, Copy, Debug)] +pub(crate) enum FileSearchType { + File, + Directory, +} + +#[derive(Clone, Debug)] +pub(crate) struct SearchSymbolParams { + pub root: PathBuf, + pub directory: PathBuf, + pub query: String, + pub limit: Option, +} + +#[derive(Clone, Debug, Serialize)] +pub(crate) struct TextMatch { + pub path: TextValue, + pub lines: TextValue, + pub line_number: u64, + pub absolute_offset: u64, + pub submatches: Vec, +} + +#[derive(Clone, Debug, Serialize)] +pub(crate) struct TextValue { + pub text: String, +} + +#[derive(Clone, Debug, Serialize)] +pub(crate) struct TextSubmatch { + #[serde(rename = "match")] + pub match_text: TextValue, + pub start: u64, + pub end: u64, +} + +#[derive(Clone, Debug, Serialize)] +pub(crate) struct Symbol { + pub name: String, + pub kind: u32, + pub location: SymbolLocation, +} + +#[derive(Clone, Debug, Serialize)] +pub(crate) struct SymbolLocation { + pub uri: String, + pub range: Range, +} + +#[derive(Clone, Debug, Serialize)] +pub(crate) struct Range { + pub start: Position, + pub end: Position, +} + +#[derive(Clone, Debug, Serialize)] +pub(crate) struct Position { + pub line: u32, + pub character: u32, +} + +#[derive(Debug, Default)] +struct SymbolCache { + roots: HashMap, +} + +#[derive(Debug, Default)] +struct SymbolIndex { + fingerprint: u64, + symbols: Vec, +} + +#[derive(Clone, Debug)] +struct SymbolPattern { + regex: Regex, + kind: u32, +} + +fn search_text_sync(params: SearchTextParams) -> Result, SandboxError> { + if params.pattern.trim().is_empty() { + return Err(SandboxError::InvalidRequest { + message: "pattern is required".to_string(), + }); + } + let scope = resolve_scope(¶ms.root, ¶ms.directory)?; + let limit = clamp_limit(params.limit, DEFAULT_TEXT_LIMIT, MAX_TEXT_LIMIT); + + match rg_search(&scope, ¶ms.pattern, params.case_sensitive, limit) { + Ok(matches) => Ok(matches), + Err(SandboxError::StreamError { message }) + if message == RIPGREP_NOT_AVAILABLE => + { + search_text_fallback(&scope, ¶ms.pattern, params.case_sensitive, limit) + } + Err(err) => Err(err), + } +} + +fn search_files_sync(params: SearchFileParams) -> Result, SandboxError> { + let scope = resolve_scope(¶ms.root, ¶ms.directory)?; + let limit = clamp_limit(params.limit, DEFAULT_FILE_LIMIT, MAX_FILE_LIMIT); + + if params.query.trim().is_empty() { + return Err(SandboxError::InvalidRequest { + message: "query is required".to_string(), + }); + } + + let matcher = build_file_matcher(¶ms.query)?; + let include_dirs = match params.file_type { + Some(FileSearchType::File) => false, + Some(FileSearchType::Directory) => true, + None => params.include_dirs.unwrap_or(false), + }; + let only_dirs = matches!(params.file_type, Some(FileSearchType::Directory)); + let only_files = matches!(params.file_type, Some(FileSearchType::File)); + + let mut results = Vec::new(); + + walk_dir(&scope.directory, |path, file_type| { + if results.len() >= limit { + return WalkAction::Stop; + } + if file_type.is_dir() { + if should_skip_dir(path) { + return WalkAction::Skip; + } + if include_dirs || only_dirs { + let rel = relative_path(&scope.root, path); + if matcher.is_match(&rel) { + results.push(rel); + } + } + return WalkAction::Continue; + } + + if file_type.is_file() { + if only_dirs { + return WalkAction::Continue; + } + if only_files || !only_dirs { + let rel = relative_path(&scope.root, path); + if matcher.is_match(&rel) { + results.push(rel); + } + } + } + WalkAction::Continue + })?; + + Ok(results) +} + +fn search_symbols_sync( + cache: Arc>, + params: SearchSymbolParams, +) -> Result, SandboxError> { + let scope = resolve_scope(¶ms.root, ¶ms.directory)?; + if params.query.trim().is_empty() { + return Err(SandboxError::InvalidRequest { + message: "query is required".to_string(), + }); + } + + let limit = clamp_limit(params.limit, DEFAULT_SYMBOL_LIMIT, MAX_SYMBOL_LIMIT); + let query = params.query.to_lowercase(); + + let mut cache_guard = cache + .lock() + .map_err(|_| SandboxError::StreamError { + message: "symbol cache poisoned".to_string(), + })?; + let entry = cache_guard + .roots + .entry(scope.directory.clone()) + .or_insert_with(SymbolIndex::default); + update_symbol_index(entry, &scope.directory)?; + + let mut results = Vec::new(); + for symbol in entry.symbols.iter() { + if results.len() >= limit { + break; + } + if symbol.name.to_lowercase().contains(&query) { + results.push(symbol.clone()); + } + } + + Ok(results) +} + +struct SearchScope { + root: PathBuf, + directory: PathBuf, +} + +fn resolve_scope(root: &Path, directory: &Path) -> Result { + let root_abs = fs::canonicalize(root).map_err(|_| SandboxError::InvalidRequest { + message: "root directory not found".to_string(), + })?; + + let directory_path = if directory.is_absolute() { + directory.to_path_buf() + } else { + root_abs.join(directory) + }; + + let directory_abs = fs::canonicalize(&directory_path).map_err(|_| SandboxError::InvalidRequest { + message: "directory not found".to_string(), + })?; + + if !directory_abs.starts_with(&root_abs) { + return Err(SandboxError::InvalidRequest { + message: "directory escapes worktree".to_string(), + }); + } + + Ok(SearchScope { + root: root_abs, + directory: directory_abs, + }) +} + +fn clamp_limit(limit: Option, default_limit: usize, max_limit: usize) -> usize { + let limit = limit.unwrap_or(default_limit); + let limit = limit.max(1).min(max_limit); + limit +} + +fn rg_search( + scope: &SearchScope, + pattern: &str, + case_sensitive: Option, + limit: usize, +) -> Result, SandboxError> { + let mut cmd = Command::new("rg"); + cmd.arg("--json"); + match case_sensitive { + Some(true) => { + cmd.arg("--case-sensitive"); + } + Some(false) => { + cmd.arg("--ignore-case"); + } + None => { + cmd.arg("--smart-case"); + } + } + cmd.arg(pattern); + + let relative = scope + .directory + .strip_prefix(&scope.root) + .unwrap_or(&scope.directory); + if relative.as_os_str().is_empty() { + cmd.arg("."); + } else { + cmd.arg(relative); + } + + let output = cmd.current_dir(&scope.root).output(); + let output = match output { + Ok(output) => output, + Err(err) => { + if err.kind() == std::io::ErrorKind::NotFound { + return Err(SandboxError::StreamError { + message: RIPGREP_NOT_AVAILABLE.to_string(), + }); + } + return Err(SandboxError::StreamError { + message: err.to_string(), + }); + } + }; + + if !output.status.success() { + if output.status.code() == Some(1) { + return Ok(Vec::new()); + } + let stderr = String::from_utf8_lossy(&output.stderr); + let message = stderr.trim(); + if !message.is_empty() { + return Err(SandboxError::InvalidRequest { + message: message.to_string(), + }); + } + return Err(SandboxError::StreamError { + message: "ripgrep failed".to_string(), + }); + } + + let stdout = String::from_utf8_lossy(&output.stdout); + let mut matches = Vec::new(); + for line in stdout.lines() { + if matches.len() >= limit { + break; + } + let Ok(event) = serde_json::from_str::(line) else { + continue; + }; + if event.event_type != "match" { + continue; + } + let Some(data) = event.data else { + continue; + }; + let path = data.path.text; + let rel_path = normalize_path_string(&scope.root, &PathBuf::from(path)); + let submatches = data + .submatches + .into_iter() + .map(|sub| TextSubmatch { + match_text: TextValue { text: sub.match_text.text }, + start: sub.start as u64, + end: sub.end as u64, + }) + .collect(); + matches.push(TextMatch { + path: TextValue { text: rel_path }, + lines: TextValue { text: data.lines.text }, + line_number: data.line_number as u64, + absolute_offset: data.absolute_offset as u64, + submatches, + }); + } + + Ok(matches) +} + +fn search_text_fallback( + scope: &SearchScope, + pattern: &str, + case_sensitive: Option, + limit: usize, +) -> Result, SandboxError> { + let regex = build_text_regex(pattern, case_sensitive)?; + let mut matches = Vec::new(); + + walk_dir(&scope.directory, |path, file_type| { + if matches.len() >= limit { + return WalkAction::Stop; + } + if file_type.is_dir() { + if should_skip_dir(path) { + return WalkAction::Skip; + } + return WalkAction::Continue; + } + if !file_type.is_file() { + return WalkAction::Continue; + } + let Ok(content) = fs::read_to_string(path) else { + return WalkAction::Continue; + }; + + let mut absolute_offset = 0u64; + for (line_index, line) in content.split_inclusive('\n').enumerate() { + let line_text = line.trim_end_matches(['\n', '\r']); + let mut submatches = Vec::new(); + for mat in regex.find_iter(line_text) { + if matches.len() >= limit { + break; + } + submatches.push(TextSubmatch { + match_text: TextValue { + text: mat.as_str().to_string(), + }, + start: mat.start() as u64, + end: mat.end() as u64, + }); + } + if !submatches.is_empty() { + matches.push(TextMatch { + path: TextValue { + text: relative_path(&scope.root, path), + }, + lines: TextValue { + text: line_text.to_string(), + }, + line_number: (line_index + 1) as u64, + absolute_offset, + submatches, + }); + } + absolute_offset += line.as_bytes().len() as u64; + if matches.len() >= limit { + break; + } + } + + WalkAction::Continue + })?; + + Ok(matches) +} + +fn build_text_regex(pattern: &str, case_sensitive: Option) -> Result { + let case_sensitive = match case_sensitive { + Some(value) => value, + None => contains_uppercase(pattern), + }; + + let mut builder = RegexBuilder::new(pattern); + builder.case_insensitive(!case_sensitive); + builder + .build() + .map_err(|err| SandboxError::InvalidRequest { + message: err.to_string(), + }) +} + +fn contains_uppercase(pattern: &str) -> bool { + pattern.chars().any(|c| c.is_ascii_uppercase()) +} + +fn build_file_matcher(query: &str) -> Result { + let trimmed = query.trim(); + if trimmed.is_empty() { + return Err(SandboxError::InvalidRequest { + message: "query is required".to_string(), + }); + } + + let is_glob = trimmed.contains('*') || trimmed.contains('?') || trimmed.contains('['); + let pattern = if is_glob { + trimmed.to_string() + } else { + format!("*{}*", trimmed) + }; + + let mut regex = String::from("(?i)^"); + let mut chars = pattern.chars().peekable(); + while let Some(ch) = chars.next() { + match ch { + '*' => regex.push_str(".*"), + '?' => regex.push('.'), + '[' => { + regex.push('['); + while let Some(next) = chars.next() { + regex.push(next); + if next == ']' { + break; + } + } + } + _ => regex.push_str(®ex::escape(&ch.to_string())), + } + } + regex.push('$'); + + Regex::new(®ex).map_err(|err| SandboxError::InvalidRequest { + message: err.to_string(), + }) +} + +fn update_symbol_index(index: &mut SymbolIndex, directory: &Path) -> Result<(), SandboxError> { + let mut fingerprint = std::collections::hash_map::DefaultHasher::new(); + let mut files = Vec::new(); + + walk_dir(directory, |path, file_type| { + if file_type.is_dir() { + if should_skip_dir(path) { + return WalkAction::Skip; + } + return WalkAction::Continue; + } + if !file_type.is_file() { + return WalkAction::Continue; + } + + if !is_supported_symbol_file(path) { + return WalkAction::Continue; + } + + if let Ok(metadata) = fs::metadata(path) { + if let Ok(modified) = metadata.modified() { + path.hash(&mut fingerprint); + modified + .duration_since(SystemTime::UNIX_EPOCH) + .ok() + .map(|d| d.as_secs()) + .hash(&mut fingerprint); + } + } + files.push(path.to_path_buf()); + + WalkAction::Continue + })?; + + let new_fingerprint = fingerprint.finish(); + if new_fingerprint == index.fingerprint { + return Ok(()); + } + + let mut symbols = Vec::new(); + for path in files { + let Ok(content) = fs::read_to_string(&path) else { + continue; + }; + symbols.extend(extract_symbols_for_file(&path, &content)); + } + + index.fingerprint = new_fingerprint; + index.symbols = symbols; + + Ok(()) +} + +fn extract_symbols_for_file(path: &Path, content: &str) -> Vec { + let Some(ext) = path.extension().and_then(|v| v.to_str()) else { + return Vec::new(); + }; + let patterns = symbol_patterns_for_extension(ext); + if patterns.is_empty() { + return Vec::new(); + } + + let uri = path_to_file_uri(path); + let mut symbols = Vec::new(); + + for (line_index, line) in content.lines().enumerate() { + for pattern in &patterns { + for caps in pattern.regex.captures_iter(line) { + let Some(matched) = caps.get(1) else { + continue; + }; + let name = matched.as_str(); + let start = matched.start() as u32; + let end = matched.end() as u32; + symbols.push(Symbol { + name: name.to_string(), + kind: pattern.kind, + location: SymbolLocation { + uri: uri.clone(), + range: Range { + start: Position { + line: line_index as u32, + character: start, + }, + end: Position { + line: line_index as u32, + character: end, + }, + }, + }, + }); + } + } + } + + symbols +} + +fn symbol_patterns_for_extension(ext: &str) -> Vec { + match ext { + "rs" => rust_symbol_patterns(), + "js" | "jsx" | "ts" | "tsx" => js_symbol_patterns(), + "py" => python_symbol_patterns(), + "go" => go_symbol_patterns(), + _ => Vec::new(), + } +} + +fn rust_symbol_patterns() -> Vec { + vec![ + SymbolPattern { + regex: Regex::new(r"^\s*(?:pub\s+)?(?:async\s+)?fn\s+([A-Za-z_][A-Za-z0-9_]*)") + .unwrap(), + kind: SYMBOL_KIND_FUNCTION, + }, + SymbolPattern { + regex: Regex::new(r"^\s*(?:pub\s+)?struct\s+([A-Za-z_][A-Za-z0-9_]*)").unwrap(), + kind: SYMBOL_KIND_STRUCT, + }, + SymbolPattern { + regex: Regex::new(r"^\s*(?:pub\s+)?enum\s+([A-Za-z_][A-Za-z0-9_]*)").unwrap(), + kind: SYMBOL_KIND_ENUM, + }, + SymbolPattern { + regex: Regex::new(r"^\s*(?:pub\s+)?trait\s+([A-Za-z_][A-Za-z0-9_]*)").unwrap(), + kind: SYMBOL_KIND_INTERFACE, + }, + SymbolPattern { + regex: Regex::new(r"^\s*(?:pub\s+)?const\s+([A-Za-z_][A-Za-z0-9_]*)").unwrap(), + kind: SYMBOL_KIND_CONSTANT, + }, + ] +} + +fn js_symbol_patterns() -> Vec { + vec![ + SymbolPattern { + regex: Regex::new( + r"^\s*(?:export\s+)?(?:async\s+)?function\s+([A-Za-z_$][A-Za-z0-9_$]*)", + ) + .unwrap(), + kind: SYMBOL_KIND_FUNCTION, + }, + SymbolPattern { + regex: Regex::new(r"^\s*(?:export\s+)?class\s+([A-Za-z_$][A-Za-z0-9_$]*)") + .unwrap(), + kind: SYMBOL_KIND_CLASS, + }, + SymbolPattern { + regex: Regex::new( + r"^\s*(?:export\s+)?interface\s+([A-Za-z_$][A-Za-z0-9_$]*)", + ) + .unwrap(), + kind: SYMBOL_KIND_INTERFACE, + }, + SymbolPattern { + regex: Regex::new(r"^\s*(?:export\s+)?type\s+([A-Za-z_$][A-Za-z0-9_$]*)").unwrap(), + kind: SYMBOL_KIND_TYPE_PARAMETER, + }, + SymbolPattern { + regex: Regex::new( + r"^\s*(?:export\s+)?const\s+([A-Za-z_$][A-Za-z0-9_$]*)", + ) + .unwrap(), + kind: SYMBOL_KIND_CONSTANT, + }, + SymbolPattern { + regex: Regex::new( + r"^\s*(?:export\s+)?(?:let|var)\s+([A-Za-z_$][A-Za-z0-9_$]*)", + ) + .unwrap(), + kind: SYMBOL_KIND_VARIABLE, + }, + ] +} + +fn python_symbol_patterns() -> Vec { + vec![ + SymbolPattern { + regex: Regex::new(r"^\s*def\s+([A-Za-z_][A-Za-z0-9_]*)").unwrap(), + kind: SYMBOL_KIND_FUNCTION, + }, + SymbolPattern { + regex: Regex::new(r"^\s*class\s+([A-Za-z_][A-Za-z0-9_]*)").unwrap(), + kind: SYMBOL_KIND_CLASS, + }, + ] +} + +fn go_symbol_patterns() -> Vec { + vec![ + SymbolPattern { + regex: Regex::new(r"^\s*func\s+([A-Za-z_][A-Za-z0-9_]*)").unwrap(), + kind: SYMBOL_KIND_FUNCTION, + }, + SymbolPattern { + regex: Regex::new( + r"^\s*func\s*\(.*?\)\s*([A-Za-z_][A-Za-z0-9_]*)", + ) + .unwrap(), + kind: SYMBOL_KIND_METHOD, + }, + SymbolPattern { + regex: Regex::new(r"^\s*type\s+([A-Za-z_][A-Za-z0-9_]*)\s+struct").unwrap(), + kind: SYMBOL_KIND_STRUCT, + }, + SymbolPattern { + regex: Regex::new(r"^\s*type\s+([A-Za-z_][A-Za-z0-9_]*)\s+interface").unwrap(), + kind: SYMBOL_KIND_INTERFACE, + }, + ] +} + +fn is_supported_symbol_file(path: &Path) -> bool { + match path.extension().and_then(|v| v.to_str()) { + Some("rs" | "js" | "jsx" | "ts" | "tsx" | "py" | "go") => true, + _ => false, + } +} + +fn relative_path(root: &Path, path: &Path) -> String { + normalize_path_string(root, path) +} + +fn normalize_path_string(root: &Path, path: &Path) -> String { + let candidate = if path.is_absolute() { + path.to_path_buf() + } else { + root.join(path) + }; + let rel = candidate.strip_prefix(root).unwrap_or(candidate.as_path()); + rel.to_string_lossy().replace('\\', "/") +} + +fn path_to_file_uri(path: &Path) -> String { + let raw = path.to_string_lossy().replace('\\', "/"); + let encoded = percent_encode_path(&raw); + format!("file://{}", encoded) +} + +fn percent_encode_path(path: &str) -> String { + let mut out = String::new(); + for byte in path.as_bytes() { + match byte { + b'A'..=b'Z' + | b'a'..=b'z' + | b'0'..=b'9' + | b'/' + | b'-' + | b'.' + | b'_' + | b'~' => out.push(*byte as char), + _ => out.push_str(&format!("%{:02X}", byte)), + } + } + out +} + +#[derive(Clone, Copy, Debug)] +enum WalkAction { + Continue, + Skip, + Stop, +} + +fn walk_dir( + root: &Path, + mut visit: impl FnMut(&Path, &fs::FileType) -> WalkAction, +) -> Result<(), SandboxError> { + let mut stack = vec![root.to_path_buf()]; + let mut visited = HashSet::new(); + + while let Some(dir) = stack.pop() { + if !visited.insert(dir.clone()) { + continue; + } + let entries = match fs::read_dir(&dir) { + Ok(entries) => entries, + Err(_) => continue, + }; + for entry in entries { + let entry = match entry { + Ok(entry) => entry, + Err(_) => continue, + }; + let file_type = match entry.file_type() { + Ok(file_type) => file_type, + Err(_) => continue, + }; + if file_type.is_symlink() { + continue; + } + let path = entry.path(); + match visit(&path, &file_type) { + WalkAction::Stop => return Ok(()), + WalkAction::Skip => { + if file_type.is_dir() { + continue; + } + } + WalkAction::Continue => {} + } + if file_type.is_dir() { + stack.push(path); + } + } + } + Ok(()) +} + +fn should_skip_dir(path: &Path) -> bool { + let Some(name) = path.file_name().and_then(|v| v.to_str()) else { + return false; + }; + matches!( + name, + ".git" + | ".hg" + | ".svn" + | ".opencode" + | ".cache" + | "node_modules" + | "target" + | "dist" + | "build" + ) +} + +#[derive(Debug, Deserialize)] +struct RgEvent { + #[serde(rename = "type")] + event_type: String, + data: Option, +} + +#[derive(Debug, Deserialize)] +struct RgMatchData { + path: RgText, + lines: RgText, + line_number: u64, + absolute_offset: u64, + submatches: Vec, +} + +#[derive(Debug, Deserialize)] +struct RgText { + text: String, +} + +#[derive(Debug, Deserialize)] +struct RgSubmatch { + #[serde(rename = "match")] + match_text: RgText, + start: u64, + end: u64, +} diff --git a/server/packages/sandbox-agent/tests/opencode-compat/search.test.ts b/server/packages/sandbox-agent/tests/opencode-compat/search.test.ts new file mode 100644 index 00000000..6f9ef5ec --- /dev/null +++ b/server/packages/sandbox-agent/tests/opencode-compat/search.test.ts @@ -0,0 +1,97 @@ +/** + * Tests for OpenCode-compatible search endpoints. + */ + +import { describe, it, expect, beforeAll, beforeEach, afterEach } from "vitest"; +import { createOpencodeClient, type OpencodeClient } from "@opencode-ai/sdk"; +import { spawnSandboxAgent, buildSandboxAgent, type SandboxAgentHandle } from "./helpers/spawn"; +import { mkdtemp, mkdir, writeFile, rm } from "node:fs/promises"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; + +describe("OpenCode-compatible Search API", () => { + let handle: SandboxAgentHandle; + let client: OpencodeClient; + let fixtureDir: string; + + beforeAll(async () => { + await buildSandboxAgent(); + }); + + beforeEach(async () => { + fixtureDir = await mkdtemp(join(tmpdir(), "opencode-search-")); + await mkdir(join(fixtureDir, "src"), { recursive: true }); + + await writeFile( + join(fixtureDir, "src", "lib.rs"), + [ + "pub struct Greeter;", + "", + "impl Greeter {", + " pub fn greet(name: &str) -> String {", + " format!(\"Hello, {}\", name)", + " }", + "}", + "", + "pub fn add(a: i32, b: i32) -> i32 {", + " a + b // needle", + "}", + "", + ].join("\n") + ); + + await writeFile(join(fixtureDir, "README.md"), "Search fixture"); + + handle = await spawnSandboxAgent({ + opencodeCompat: true, + env: { + OPENCODE_COMPAT_DIRECTORY: fixtureDir, + OPENCODE_COMPAT_WORKTREE: fixtureDir, + }, + }); + + client = createOpencodeClient({ + baseUrl: `${handle.baseUrl}/opencode`, + headers: { Authorization: `Bearer ${handle.token}` }, + }); + }); + + afterEach(async () => { + await handle?.dispose(); + if (fixtureDir) { + await rm(fixtureDir, { recursive: true, force: true }); + } + }); + + it("finds text matches", async () => { + const response = await client.find.text({ + query: { pattern: "needle" }, + }); + + expect(response.error).toBeUndefined(); + expect(response.data?.length).toBeGreaterThan(0); + + const match = response.data?.find((entry) => entry.path.text.endsWith("src/lib.rs")); + expect(match).toBeDefined(); + expect(match?.lines.text).toContain("needle"); + }); + + it("finds files", async () => { + const response = await client.find.files({ + query: { query: "lib.rs" }, + }); + + expect(response.error).toBeUndefined(); + expect(response.data).toContain("src/lib.rs"); + }); + + it("finds symbols", async () => { + const response = await client.find.symbols({ + query: { query: "greet" }, + }); + + expect(response.error).toBeUndefined(); + const symbols = response.data ?? []; + expect(symbols.some((symbol) => symbol.name === "greet")).toBe(true); + }); +});