diff --git a/Cargo.lock b/Cargo.lock index 57da983b..2fbfe22f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -964,7 +964,7 @@ dependencies = [ "codex-utils-tokenizer", "codex-windows-sandbox", "core-foundation 0.9.4", - "dirs", + "dirs 6.0.0", "dunce", "env-flags", "eventsource-stream", @@ -1106,7 +1106,7 @@ dependencies = [ "axum", "codex-keyring-store", "codex-protocol", - "dirs", + "dirs 6.0.0", "futures", "keyring", "mcp-types", @@ -1488,13 +1488,22 @@ dependencies = [ "subtle", ] +[[package]] +name = "dirs" +version = "5.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44c45a9d03d6676652bcb5e724c7e988de1acad23a711b5217ab9cbecbec2225" +dependencies = [ + "dirs-sys 0.4.1", +] + [[package]] name = "dirs" version = "6.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c3e8aa94d75141228480295a7d0e7feb620b1a5ad9f12bc40be62411e38cce4e" dependencies = [ - "dirs-sys", + "dirs-sys 0.5.0", ] [[package]] @@ -1507,6 +1516,18 @@ dependencies = [ "dirs-sys-next", ] +[[package]] +name = "dirs-sys" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "520f05a5cbd335fae5a99ff7a6ab8627577660ee5cfd6a94a6a929b52ff0321c" +dependencies = [ + "libc", + "option-ext", + "redox_users 0.4.6", + "windows-sys 0.48.0", +] + [[package]] name = "dirs-sys" version = "0.5.0" @@ -3050,6 +3071,7 @@ dependencies = [ "chrono", "clap", "config", + "dirs 5.0.1", "home", "nocodo-agents", "nocodo-llm-sdk", diff --git a/gui/vite.config.ts b/gui/vite.config.ts index d3e3a72f..6203cecc 100644 --- a/gui/vite.config.ts +++ b/gui/vite.config.ts @@ -6,7 +6,7 @@ import tailwindcss from '@tailwindcss/vite'; export default defineConfig({ plugins: [devtools(), tailwindcss(), solidPlugin()], server: { - port: 3000, + port: 9010, }, build: { target: 'esnext', diff --git a/nocodo-agents/src/lib.rs b/nocodo-agents/src/lib.rs index 91f24098..724aa2da 100644 --- a/nocodo-agents/src/lib.rs +++ b/nocodo-agents/src/lib.rs @@ -3,6 +3,7 @@ pub mod config; pub mod database; pub mod factory; pub mod imap_email; +pub mod pdftotext; pub mod requirements_gathering; pub mod settings_management; pub mod sqlite_reader; @@ -28,6 +29,7 @@ pub enum AgentTool { AskUser, Sqlite3Reader, ImapReader, + PdfToText, } impl AgentTool { @@ -43,6 +45,7 @@ impl AgentTool { AgentTool::AskUser => "ask_user", AgentTool::Sqlite3Reader => "sqlite3_reader", AgentTool::ImapReader => "imap_reader", + AgentTool::PdfToText => "pdftotext", } } @@ -111,6 +114,11 @@ impl AgentTool { serde_json::from_value(arguments)?; ToolRequest::ImapReader(req) } + "pdftotext" => { + let req: nocodo_tools::types::PdfToTextRequest = + serde_json::from_value(arguments)?; + ToolRequest::PdfToText(req) + } _ => anyhow::bail!("Unknown tool: {}", name), }; @@ -151,6 +159,23 @@ pub fn format_tool_response(response: &nocodo_tools::types::ToolResponse) -> Str ) } } + ToolResponse::PdfToText(r) => { + if r.success { + if let Some(content) = &r.content { + format!("PDF text extraction successful:\n{}", content) + } else if let Some(output_path) = &r.output_path { + format!( + "PDF text extraction successful: {} bytes written to {}", + r.bytes_written.unwrap_or(0), + output_path + ) + } else { + r.message.clone() + } + } else { + format!("PDF text extraction failed: {}", r.message) + } + } ToolResponse::Error(e) => format!("Error: {}", e.message), } } diff --git a/nocodo-agents/src/pdftotext/mod.rs b/nocodo-agents/src/pdftotext/mod.rs new file mode 100644 index 00000000..2e78b730 --- /dev/null +++ b/nocodo-agents/src/pdftotext/mod.rs @@ -0,0 +1,569 @@ +use crate::{database::Database, Agent, AgentTool}; +use anyhow::{self, Context}; +use async_trait::async_trait; +use nocodo_llm_sdk::client::LlmClient; +use nocodo_llm_sdk::tools::{ToolCall, ToolChoice}; +use nocodo_llm_sdk::types::{CompletionRequest, ContentBlock, Message, Role}; +use nocodo_tools::{ + bash::{BashExecutor, BashPermissions}, + ToolExecutor, +}; +use std::path::PathBuf; +use std::sync::Arc; +use std::time::Instant; + +/// Agent specialized in extracting text from PDFs using pdftotext and qpdf +pub struct PdfToTextAgent { + client: Arc, + database: Arc, + tool_executor: Arc, + #[allow(dead_code)] // Stored for reference, used during construction + pdf_path: PathBuf, + #[allow(dead_code)] // Used in system prompt generation during construction + pdf_filename: String, + system_prompt: String, +} + +impl PdfToTextAgent { + /// Create a new PdfToTextAgent + /// + /// # Arguments + /// * `client` - LLM client for AI inference + /// * `database` - Database for session/message tracking + /// * `pdf_path` - Path to the PDF file to process + /// + /// # Security + /// The agent is configured with restricted bash access: + /// - Only the `pdftotext` and `qpdf` commands are allowed + /// - All other bash commands are denied + /// - File operations are restricted to the PDF's directory + /// + /// # Pre-conditions + /// - pdftotext (poppler-utils) must be installed on the system + /// - qpdf must be installed for page extraction operations + /// - Run `pdftotext -v` and `qpdf --version` to verify installation + /// - The PDF file must exist + pub fn new( + client: Arc, + database: Arc, + pdf_path: PathBuf, + ) -> anyhow::Result { + // Validate PDF path exists + if !pdf_path.exists() { + anyhow::bail!("PDF file does not exist: {}", pdf_path.display()); + } + + // Extract filename and directory + let pdf_filename = pdf_path + .file_name() + .ok_or_else(|| anyhow::anyhow!("Invalid PDF path - no filename"))? + .to_string_lossy() + .to_string(); + + let base_path = pdf_path + .parent() + .ok_or_else(|| anyhow::anyhow!("Invalid PDF path - no parent directory"))? + .to_path_buf(); + + // Create restricted bash permissions (only pdftotext and qpdf commands) + let bash_perms = BashPermissions::minimal(vec!["pdftotext", "qpdf"]); + let bash_executor = BashExecutor::new(bash_perms, 120)?; + + // Create tool executor with restricted bash + let tool_executor = Arc::new( + ToolExecutor::builder() + .base_path(base_path) + .bash_executor(Some(Box::new(bash_executor))) + .build(), + ); + + let system_prompt = generate_system_prompt(&pdf_filename); + + Ok(Self { + client, + database, + tool_executor, + pdf_path, + pdf_filename, + system_prompt, + }) + } + + /// Get tool definitions for this agent + fn get_tool_definitions(&self) -> Vec { + self.tools() + .into_iter() + .map(|tool| tool.to_tool_definition()) + .collect() + } + + /// Build messages from session history + fn build_messages(&self, session_id: i64) -> anyhow::Result> { + let db_messages = self.database.get_messages(session_id)?; + + db_messages + .into_iter() + .map(|msg| { + let role = match msg.role.as_str() { + "user" => Role::User, + "assistant" => Role::Assistant, + "system" => Role::System, + "tool" => Role::User, + _ => Role::User, + }; + + Ok(Message { + role, + content: vec![ContentBlock::Text { text: msg.content }], + }) + }) + .collect() + } + + /// Execute a tool call + async fn execute_tool_call( + &self, + session_id: i64, + message_id: Option, + tool_call: &ToolCall, + ) -> anyhow::Result<()> { + // 1. Parse LLM tool call into typed ToolRequest + let tool_request = + AgentTool::parse_tool_call(tool_call.name(), tool_call.arguments().clone())?; + + // 2. Record tool call in database + let call_id = self.database.create_tool_call( + session_id, + message_id, + tool_call.id(), + tool_call.name(), + tool_call.arguments().clone(), + )?; + + // 3. Execute tool + let start = Instant::now(); + let result: anyhow::Result = + self.tool_executor.execute(tool_request).await; + let execution_time = start.elapsed().as_millis() as i64; + + // 4. Update database with result + match result { + Ok(response) => { + let response_json = serde_json::to_value(&response)?; + self.database + .complete_tool_call(call_id, response_json.clone(), execution_time)?; + + let result_text = crate::format_tool_response(&response); + let message_to_llm = format!("Tool {} result:\n{}", tool_call.name(), result_text); + + tracing::debug!( + tool_name = tool_call.name(), + tool_id = tool_call.id(), + execution_time_ms = execution_time, + "Tool execution completed successfully" + ); + + self.database + .create_message(session_id, "tool", &message_to_llm)?; + } + Err(e) => { + let error_msg = format!("{:?}", e); + self.database.fail_tool_call(call_id, &error_msg)?; + + let error_message_to_llm = + format!("Tool {} failed: {}", tool_call.name(), error_msg); + + tracing::debug!( + tool_name = tool_call.name(), + tool_id = tool_call.id(), + error = %error_msg, + "Tool execution failed" + ); + + self.database + .create_message(session_id, "tool", &error_message_to_llm)?; + } + } + + Ok(()) + } +} + +#[async_trait] +impl Agent for PdfToTextAgent { + fn objective(&self) -> &str { + "Extract text from PDF files using pdftotext with layout preservation and optional page selection using qpdf" + } + + fn system_prompt(&self) -> String { + self.system_prompt.clone() + } + + fn pre_conditions(&self) -> Option> { + Some(vec![ + "pdftotext (poppler-utils) must be installed on the system".to_string(), + "qpdf must be installed for page extraction operations".to_string(), + "Run 'pdftotext -v' to verify pdftotext installation".to_string(), + "Run 'qpdf --version' to verify qpdf installation".to_string(), + "Install with: brew install poppler qpdf (macOS) or apt-get install poppler-utils qpdf (Linux)" + .to_string(), + ]) + } + + fn tools(&self) -> Vec { + vec![ + AgentTool::Bash, // Only pdftotext and qpdf commands allowed + AgentTool::ReadFile, // Read extracted text + AgentTool::WriteFile, // Write cleaned results + ] + } + + async fn execute(&self, user_prompt: &str, _session_id: i64) -> anyhow::Result { + // 1. Create session + let session_id = self.database.create_session( + "pdftotext", + self.client.provider_name(), + self.client.model_name(), + Some(&self.system_prompt), + user_prompt, + None, // No config for PdfToTextAgent + )?; + + // 2. Create initial user message + self.database + .create_message(session_id, "user", user_prompt)?; + + // 3. Get tool definitions + let tools = self.get_tool_definitions(); + + // 4. Execution loop (max 30 iterations) + let mut iteration = 0; + let max_iterations = 30; + + loop { + iteration += 1; + if iteration > max_iterations { + let error = "Maximum iteration limit reached"; + self.database.fail_session(session_id, error)?; + return Err(anyhow::anyhow!(error)); + } + + // 5. Build request with conversation history + let messages = self.build_messages(session_id)?; + + let request = CompletionRequest { + messages, + max_tokens: 4000, + model: self.client.model_name().to_string(), + system: Some(self.system_prompt()), + temperature: Some(0.7), + top_p: None, + stop_sequences: None, + tools: Some(tools.clone()), + tool_choice: Some(ToolChoice::Auto), + response_format: None, + }; + + // 6. Call LLM + let response = self.client.complete(request).await?; + + // 7. Extract text and save assistant message + let text = extract_text_from_content(&response.content); + let message_id = self + .database + .create_message(session_id, "assistant", &text)?; + + // 8. Check for tool calls + if let Some(tool_calls) = response.tool_calls { + if tool_calls.is_empty() { + self.database.complete_session(session_id, &text)?; + return Ok(text); + } + + // 9. Execute tools + for tool_call in tool_calls { + self.execute_tool_call(session_id, Some(message_id), &tool_call) + .await?; + } + } else { + self.database.complete_session(session_id, &text)?; + return Ok(text); + } + } + } +} + +/// Generate system prompt for PdfToTextAgent +fn generate_system_prompt(pdf_filename: &str) -> String { + format!( + r#"You are a PDF text extraction specialist. Your task is to extract text from the PDF file "{}" and optionally clean and format the extracted text. + +You have access to these tools: +1. bash - ONLY for running pdftotext and qpdf commands +2. read_file - To read extracted text files +3. write_file - To write cleaned results (optional) + +# PDF File + +The PDF file to process is: {} + +# Available Commands + +## pdftotext - Extract text from PDF + +Basic usage: +pdftotext [options] {} + +Key options: +- -layout : Maintain original physical layout (RECOMMENDED for preserving formatting) +- -f : First page to convert +- -l : Last page to convert +- -nopgbrk : Don't insert page breaks between pages +- -enc : Output text encoding (default: UTF-8) +- -raw : Keep strings in content stream order (alternative to -layout) + +The -layout flag is HIGHLY RECOMMENDED as it preserves the original formatting, tables, and structure. + +Examples: +- Extract all pages with layout: pdftotext -layout {} output.txt +- Extract pages 1-5: pdftotext -layout -f 1 -l 5 {} output.txt +- Extract without page breaks: pdftotext -layout -nopgbrk {} output.txt + +## qpdf - Extract specific pages to a new PDF + +Use qpdf when the user wants to extract specific pages BEFORE text extraction. + +Basic usage: +qpdf {} --pages . -- output.pdf + +Page range syntax: +- Single page: 1 +- Range: 1-5 +- Multiple ranges: 1-3,7-10 +- From end: r1 (last page), r2 (second to last) +- Last page: z + +Examples: +- Extract pages 1-5: qpdf {} --pages . 1-5 -- pages_1-5.pdf +- Extract pages 2,4,6: qpdf {} --pages . 2,4,6 -- selected_pages.pdf +- Extract last 3 pages: qpdf {} --pages . r3-r1 -- last_3_pages.pdf + +# Workflow + +## Simple extraction (most common): +1. Run: pdftotext -layout {} output.txt +2. Read: output.txt +3. Present the extracted text to the user + +## Extract specific pages (if user requests): +Option A: Use pdftotext -f and -l flags directly +1. Run: pdftotext -layout -f 1 -l 5 {} output.txt +2. Read: output.txt +3. Present the extracted text + +Option B: Use qpdf first, then pdftotext +1. Run: qpdf {} --pages . 1-5 -- pages_1-5.pdf +2. Run: pdftotext -layout pages_1-5.pdf output.txt +3. Read: output.txt +4. Present the extracted text + +## Clean and format (if user requests): +1. Extract text using pdftotext +2. Read the output file +3. Analyze and clean the text: + - Fix common extraction errors + - Improve formatting and structure + - Remove artifacts or noise + - Preserve intended structure (tables, paragraphs, lists) +4. Present cleaned text to user +5. Optionally write cleaned result to a file if requested + +# Example Interactions + +User: "Extract text from this PDF" +1. Run: pdftotext -layout {} output.txt +2. Read: output.txt +3. Present the extracted text + +User: "Extract text from pages 1-10" +1. Run: pdftotext -layout -f 1 -l 10 {} output.txt +2. Read: output.txt +3. Present the extracted text + +User: "Extract and clean the text from pages 5-15" +1. Run: pdftotext -layout -f 5 -l 15 {} output.txt +2. Read: output.txt +3. Analyze and clean the text +4. Present cleaned text to user + +User: "Extract page 3 only" +1. Run: pdftotext -layout -f 3 -l 3 {} page_3.txt +2. Read: page_3.txt +3. Present the extracted text + +# Important Notes + +- You can ONLY run pdftotext and qpdf commands (no other bash commands will work) +- The PDF file is: {} +- ALWAYS use -layout flag with pdftotext to preserve formatting (unless user explicitly asks not to) +- pdftotext creates output files automatically (don't need to redirect with >) +- Page numbers start at 1 +- For page extraction, using pdftotext -f/-l is usually simpler than qpdf +- Use qpdf when you need complex page selection (e.g., non-contiguous pages like 1,5,10) +"#, + pdf_filename, + pdf_filename, + pdf_filename, + pdf_filename, + pdf_filename, + pdf_filename, + pdf_filename, + pdf_filename, + pdf_filename, + pdf_filename, + pdf_filename, + pdf_filename, + pdf_filename, + pdf_filename, + pdf_filename, + pdf_filename, + pdf_filename, + pdf_filename + ) +} + +/// Helper function to extract text from content blocks +fn extract_text_from_content(content: &[ContentBlock]) -> String { + content + .iter() + .filter_map(|block| match block { + ContentBlock::Text { text } => Some(text.as_str()), + _ => None, + }) + .collect::>() + .join("\n") +} + +/// Verify that pdftotext is installed and accessible +pub fn verify_pdftotext_installation() -> anyhow::Result { + use std::process::Command; + + let output = Command::new("pdftotext") + .arg("-v") + .output() + .context("Failed to execute 'pdftotext -v'. Is pdftotext (poppler-utils) installed?")?; + + // pdftotext -v outputs to stderr + let version_info = String::from_utf8_lossy(&output.stderr).to_string(); + + if version_info.is_empty() { + anyhow::bail!("pdftotext command did not return version information"); + } + + Ok(version_info) +} + +/// Verify that qpdf is installed and accessible +pub fn verify_qpdf_installation() -> anyhow::Result { + use std::process::Command; + + let output = Command::new("qpdf") + .arg("--version") + .output() + .context("Failed to execute 'qpdf --version'. Is qpdf installed?")?; + + if !output.status.success() { + anyhow::bail!( + "qpdf command failed. stderr: {}", + String::from_utf8_lossy(&output.stderr) + ); + } + + let version_info = String::from_utf8_lossy(&output.stdout).to_string(); + + Ok(version_info) +} + +impl PdfToTextAgent { + /// Verify pre-conditions before creating agent + pub fn verify_preconditions() -> anyhow::Result<()> { + // Check pdftotext + match verify_pdftotext_installation() { + Ok(version) => { + tracing::info!( + "pdftotext found: {}", + version.lines().next().unwrap_or("") + ); + } + Err(e) => { + anyhow::bail!( + "pdftotext is not installed or not accessible.\n\ + Error: {}\n\ + \n\ + Installation instructions:\n\ + - macOS: brew install poppler\n\ + - Ubuntu/Debian: sudo apt-get install poppler-utils\n\ + - Windows: Download from https://blog.alivate.com.au/poppler-windows/\n\ + \n\ + After installation, verify with: pdftotext -v", + e + ) + } + } + + // Check qpdf + match verify_qpdf_installation() { + Ok(version) => { + tracing::info!("qpdf found: {}", version.lines().next().unwrap_or("")); + } + Err(e) => { + anyhow::bail!( + "qpdf is not installed or not accessible.\n\ + Error: {}\n\ + \n\ + Installation instructions:\n\ + - macOS: brew install qpdf\n\ + - Ubuntu/Debian: sudo apt-get install qpdf\n\ + - Windows: Download from https://github.com/qpdf/qpdf/releases\n\ + \n\ + After installation, verify with: qpdf --version", + e + ) + } + } + + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_agent_creation() { + // Note: This test requires setting up mock dependencies + // Skipping for now due to complexity of mocking LlmClient and Database + } + + #[test] + fn test_system_prompt_generation() { + let prompt = generate_system_prompt("test.pdf"); + assert!(prompt.contains("pdftotext")); + assert!(prompt.contains("qpdf")); + assert!(prompt.contains("-layout")); + assert!(prompt.contains("test.pdf")); + } + + #[test] + fn test_verify_pdftotext_installation() { + // This test will pass if pdftotext is installed + let _result = verify_pdftotext_installation(); + } + + #[test] + fn test_verify_qpdf_installation() { + // This test will pass if qpdf is installed + let _result = verify_qpdf_installation(); + } +} diff --git a/nocodo-agents/src/tools/llm_schemas.rs b/nocodo-agents/src/tools/llm_schemas.rs index d67e68e3..f9017930 100644 --- a/nocodo-agents/src/tools/llm_schemas.rs +++ b/nocodo-agents/src/tools/llm_schemas.rs @@ -122,6 +122,49 @@ pub fn create_tool_definitions() -> Vec { imap_schema, ).expect("Failed to create imap_reader tool schema"); + let pdftotext_schema = serde_json::json!({ + "type": "object", + "required": ["file_path"], + "properties": { + "file_path": { + "type": "string", + "description": "Path to the PDF file to extract text from" + }, + "output_path": { + "type": "string", + "description": "Optional output file path. If not specified, text is returned in the response" + }, + "preserve_layout": { + "type": "boolean", + "description": "Preserve original physical layout (default: true). Uses pdftotext -layout flag", + "default": true + }, + "first_page": { + "type": "integer", + "description": "First page to convert (optional, 1-based index)" + }, + "last_page": { + "type": "integer", + "description": "Last page to convert (optional, 1-based index)" + }, + "encoding": { + "type": "string", + "description": "Output text encoding (default: UTF-8)" + }, + "no_page_breaks": { + "type": "boolean", + "description": "Don't insert page breaks between pages (default: false)", + "default": false + } + } + }); + + let pdftotext_tool = Tool::from_json_schema( + "pdftotext".to_string(), + "Extract text from PDF files using pdftotext. Supports layout preservation, page range selection, and various encoding options. Use preserve_layout=true (default) to maintain formatting.".to_string(), + pdftotext_schema, + ).expect("Failed to create pdftotext tool schema"); + vec![ Tool::from_type::() .name("list_files") @@ -155,6 +198,7 @@ pub fn create_tool_definitions() -> Vec { .build(), sqlite_tool, imap_tool, + pdftotext_tool, ] } diff --git a/nocodo-api/src/config.rs b/nocodo-api/src/config.rs index 7f88c54f..ddf5864e 100644 --- a/nocodo-api/src/config.rs +++ b/nocodo-api/src/config.rs @@ -51,7 +51,7 @@ impl Default for ApiConfig { port: 8080, }, database: DatabaseConfig { - path: get_default_db_path(), + path: get_default_db_path().expect("Failed to detect OS data directory"), }, api_keys: None, llm: None, @@ -64,7 +64,7 @@ impl Default for ApiConfig { impl ApiConfig { pub fn load() -> Result<(Self, PathBuf), ConfigError> { - let config_path = get_config_path(); + let config_path = get_config_path()?; // Create config directory if it doesn't exist if let Some(parent) = config_path.parent() { @@ -75,13 +75,15 @@ impl ApiConfig { // Create default config file if it doesn't exist if !config_path.exists() { - let default_config = r#" + let default_db_path = get_default_db_path()?; + let default_config = format!( + r#" [server] host = "127.0.0.1" port = 8080 [database] -path = "~/.local/share/nocodo/api.db" +path = "{}" [cors] allowed_origins = ["http://localhost:3000"] @@ -97,7 +99,9 @@ allowed_origins = ["http://localhost:3000"] # zai_api_key = "your-zai-key" # zai_coding_plan = true # zen_api_key = "your-zen-key" -"#; +"#, + default_db_path.display() + ); std::fs::write(&config_path, default_config).map_err(|e| { ConfigError::Message(format!("Failed to write default config: {e}")) })?; @@ -122,18 +126,14 @@ allowed_origins = ["http://localhost:3000"] } } -fn get_config_path() -> PathBuf { - if let Some(config_dir) = dirs::config_dir() { - config_dir.join("nocodo/api.toml") - } else { - PathBuf::from("api.toml") - } +fn get_config_path() -> Result { + dirs::config_dir() + .map(|dir| dir.join("nocodo/api.toml")) + .ok_or_else(|| ConfigError::Message("Failed to detect OS config directory".to_string())) } -fn get_default_db_path() -> PathBuf { - if let Some(data_dir) = dirs::data_local_dir() { - data_dir.join("nocodo/api.db") - } else { - PathBuf::from("api.db") - } +fn get_default_db_path() -> Result { + dirs::data_local_dir() + .map(|dir| dir.join("nocodo/api.db")) + .ok_or_else(|| ConfigError::Message("Failed to detect OS data directory".to_string())) } diff --git a/nocodo-api/src/handlers/agent_execution/mod.rs b/nocodo-api/src/handlers/agent_execution/mod.rs index 6a44b71c..5a6ed8e6 100644 --- a/nocodo-api/src/handlers/agent_execution/mod.rs +++ b/nocodo-api/src/handlers/agent_execution/mod.rs @@ -1,5 +1,6 @@ pub mod codebase_analysis_agent; pub mod imap_email_agent; +pub mod pdftotext_agent; pub mod requirements_gathering_agent; pub mod settings_management_agent; pub mod sqlite_agent; diff --git a/nocodo-api/src/handlers/agent_execution/pdftotext_agent.rs b/nocodo-api/src/handlers/agent_execution/pdftotext_agent.rs new file mode 100644 index 00000000..ebe2ddb1 --- /dev/null +++ b/nocodo-api/src/handlers/agent_execution/pdftotext_agent.rs @@ -0,0 +1,97 @@ +use crate::helpers::agents::create_pdftotext_agent; +use crate::models::ErrorResponse; +use actix_web::{post, web, HttpResponse, Responder}; +use nocodo_agents::Agent; +use serde_json::json; +use shared_types::{AgentConfig, AgentExecutionRequest, AgentExecutionResponse}; +use std::sync::Arc; +use tracing::{error, info}; + +#[post("/agents/pdftotext/execute")] +pub async fn execute_pdftotext_agent( + req: web::Json, + llm_client: web::Data>, + database: web::Data>, +) -> impl Responder { + let pdf_path = match &req.config { + AgentConfig::PdfToText(config) => config.pdf_path.clone(), + _ => { + error!(config_type = ?req.config, "Invalid config type for PdfToText agent"); + return HttpResponse::BadRequest().json(ErrorResponse { + error: "Expected PdfToText agent config".to_string(), + }); + } + }; + + info!( + user_prompt = %req.user_prompt, + pdf_path = %pdf_path, + "Executing PdfToText agent" + ); + + let user_prompt = req.user_prompt.clone(); + let agent_name = "pdftotext".to_string(); + + let config = json!(&req.config); + + let provider = llm_client.provider_name().to_string(); + let model = llm_client.model_name().to_string(); + + let session_id = match database.create_session( + &agent_name, + &provider, + &model, + None, + &user_prompt, + Some(config), + ) { + Ok(id) => id, + Err(e) => { + error!(error = %e, "Failed to create session"); + return HttpResponse::InternalServerError().json(ErrorResponse { + error: format!("Failed to create session: {}", e), + }); + } + }; + + // Return immediately with session_id and spawn background task + let llm_client_clone = llm_client.get_ref().clone(); + let database_clone = database.get_ref().clone(); + let pdf_path_clone = pdf_path.clone(); + let user_prompt_clone = user_prompt.clone(); + + tokio::spawn(async move { + let agent = + match create_pdftotext_agent(&llm_client_clone, &database_clone, &pdf_path_clone).await + { + Ok(agent) => agent, + Err(e) => { + error!(error = %e, session_id = session_id, "Failed to create PdfToText agent"); + let _ = database_clone + .fail_session(session_id, &format!("Failed to create agent: {}", e)); + return; + } + }; + + match agent.execute(&user_prompt_clone, session_id).await { + Ok(result) => { + info!(result = %result, session_id = session_id, "Agent execution completed successfully"); + if let Err(e) = database_clone.complete_session(session_id, &result) { + error!(error = %e, session_id = session_id, "Failed to complete session"); + } + } + Err(e) => { + error!(error = %e, session_id = session_id, "Agent execution failed"); + let _ = + database_clone.fail_session(session_id, &format!("Execution failed: {}", e)); + } + } + }); + + HttpResponse::Ok().json(AgentExecutionResponse { + session_id, + agent_name, + status: "running".to_string(), + result: String::new(), + }) +} diff --git a/nocodo-api/src/helpers/agents.rs b/nocodo-api/src/helpers/agents.rs index 038dd963..7cbb0d40 100644 --- a/nocodo-api/src/helpers/agents.rs +++ b/nocodo-api/src/helpers/agents.rs @@ -59,6 +59,14 @@ pub fn list_supported_agents() -> Vec { .to_string(), enabled: true, }, + AgentInfo { + id: "pdftotext".to_string(), + name: "PDF to Text Agent".to_string(), + description: + "Agent for extracting text from PDF files using pdftotext with layout preservation and page selection capabilities" + .to_string(), + enabled: true, + }, ] } @@ -258,3 +266,28 @@ pub fn create_imap_agent( Ok(agent) } + +/// Creates a PDF to Text agent +/// +/// # Arguments +/// +/// * `llm_client` - The LLM client to use for the agent +/// * `database` - Shared database for session persistence +/// * `pdf_path` - Path to the PDF file to process +/// +/// # Returns +/// +/// A PDF to Text agent instance +pub async fn create_pdftotext_agent( + llm_client: &Arc, + database: &Arc, + pdf_path: &str, +) -> anyhow::Result { + let agent = nocodo_agents::pdftotext::PdfToTextAgent::new( + llm_client.clone(), + database.clone(), + std::path::PathBuf::from(pdf_path), + )?; + + Ok(agent) +} diff --git a/nocodo-api/src/main.rs b/nocodo-api/src/main.rs index 22e66a77..6106cfca 100644 --- a/nocodo-api/src/main.rs +++ b/nocodo-api/src/main.rs @@ -64,11 +64,11 @@ async fn main() -> Result<(), anyhow::Error> { .expect("Failed to acquire config read lock"); let llm_client = helpers::llm::create_llm_client(&config).expect("Failed to create LLM client"); let db_path = config.database.path.clone(); + let bind_addr = format!("{}:{}", config.server.host, config.server.port); drop(config); let (db_conn, db) = helpers::database::initialize_database(&db_path).expect("Failed to initialize database"); - let bind_addr = "127.0.0.1:8080"; info!("Starting nocodo-api server at http://{}", bind_addr); let cors_config = app_config @@ -116,6 +116,7 @@ async fn main() -> Result<(), anyhow::Error> { handlers::agent_execution::workflow_creation_agent::execute_workflow_creation_agent, ) .service(handlers::agent_execution::imap_email_agent::execute_imap_agent) + .service(handlers::agent_execution::pdftotext_agent::execute_pdftotext_agent) .service(handlers::sessions::list_sessions) .service(handlers::sessions::get_session) .service(handlers::sessions::get_pending_questions) diff --git a/nocodo-tools/src/lib.rs b/nocodo-tools/src/lib.rs index 286a7812..628f22b7 100644 --- a/nocodo-tools/src/lib.rs +++ b/nocodo-tools/src/lib.rs @@ -3,6 +3,7 @@ pub mod filesystem; pub mod grep; pub mod hackernews; pub mod imap; +pub mod pdftotext; pub mod sqlite_reader; pub mod tool_error; pub mod tool_executor; diff --git a/nocodo-tools/src/pdftotext/mod.rs b/nocodo-tools/src/pdftotext/mod.rs new file mode 100644 index 00000000..2408c4bc --- /dev/null +++ b/nocodo-tools/src/pdftotext/mod.rs @@ -0,0 +1,154 @@ +use crate::tool_error::ToolError; +use crate::types::{PdfToTextRequest, PdfToTextResponse}; +use std::path::Path; +use std::process::Command; + +/// Execute pdftotext command to extract text from PDF +pub fn execute_pdftotext(request: PdfToTextRequest) -> Result { + // Validate PDF file exists + let pdf_path = Path::new(&request.file_path); + if !pdf_path.exists() { + return Err(ToolError::InvalidInput(format!( + "PDF file does not exist: {}", + request.file_path + ))); + } + + // Build pdftotext command + let mut cmd = Command::new("pdftotext"); + + // Add layout preservation flag (default: true) + if request.preserve_layout { + cmd.arg("-layout"); + } + + // Add page range if specified + if let Some(first_page) = request.first_page { + cmd.arg("-f").arg(first_page.to_string()); + } + if let Some(last_page) = request.last_page { + cmd.arg("-l").arg(last_page.to_string()); + } + + // Add encoding if specified + if let Some(ref encoding) = request.encoding { + cmd.arg("-enc").arg(encoding); + } + + // Add no page breaks flag if specified + if request.no_page_breaks { + cmd.arg("-nopgbrk"); + } + + // Add input file + cmd.arg(&request.file_path); + + // Determine output: file or stdout + let output_to_stdout = request.output_path.is_none(); + if output_to_stdout { + // Output to stdout (use "-" as output file) + cmd.arg("-"); + } else if let Some(ref output_path) = request.output_path { + cmd.arg(output_path); + } + + // Execute command + let output = cmd.output().map_err(|e| { + ToolError::ExecutionError(format!( + "Failed to execute pdftotext command. Is pdftotext installed? Error: {}", + e + )) + })?; + + // Check exit status + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + return Err(ToolError::ExecutionError(format!( + "pdftotext command failed: {}", + stderr + ))); + } + + // Build response + if output_to_stdout { + let content = String::from_utf8_lossy(&output.stdout).to_string(); + Ok(PdfToTextResponse { + content: Some(content.clone()), + output_path: None, + bytes_written: None, + success: true, + message: format!("Successfully extracted {} bytes of text", content.len()), + }) + } else { + let output_path = request.output_path.unwrap(); + let bytes_written = std::fs::metadata(&output_path) + .map(|m| m.len() as usize) + .unwrap_or(0); + Ok(PdfToTextResponse { + content: None, + output_path: Some(output_path.clone()), + bytes_written: Some(bytes_written), + success: true, + message: format!("Successfully wrote {} bytes to {}", bytes_written, output_path), + }) + } +} + +/// Verify that pdftotext is installed +pub fn verify_pdftotext_installation() -> anyhow::Result { + let output = Command::new("pdftotext") + .arg("-v") + .output() + .map_err(|e| anyhow::anyhow!("Failed to execute 'pdftotext -v'. Is pdftotext installed? Error: {}", e))?; + + // pdftotext -v outputs to stderr + let version_info = String::from_utf8_lossy(&output.stderr).to_string(); + + if version_info.is_empty() { + anyhow::bail!("pdftotext command did not return version information"); + } + + Ok(version_info) +} + +/// Verify that qpdf is installed +pub fn verify_qpdf_installation() -> anyhow::Result { + let output = Command::new("qpdf") + .arg("--version") + .output() + .map_err(|e| anyhow::anyhow!("Failed to execute 'qpdf --version'. Is qpdf installed? Error: {}", e))?; + + if !output.status.success() { + anyhow::bail!( + "qpdf command failed. stderr: {}", + String::from_utf8_lossy(&output.stderr) + ); + } + + let version_info = String::from_utf8_lossy(&output.stdout).to_string(); + + Ok(version_info) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_verify_pdftotext_installation() { + // This test will pass if pdftotext is installed + let result = verify_pdftotext_installation(); + if result.is_ok() { + println!("pdftotext version: {}", result.unwrap()); + } + } + + #[test] + fn test_verify_qpdf_installation() { + // This test will pass if qpdf is installed + let result = verify_qpdf_installation(); + if result.is_ok() { + println!("qpdf version: {}", result.unwrap()); + } + } +} diff --git a/nocodo-tools/src/tool_executor.rs b/nocodo-tools/src/tool_executor.rs index 6c097ec1..dfd8a6a8 100644 --- a/nocodo-tools/src/tool_executor.rs +++ b/nocodo-tools/src/tool_executor.rs @@ -10,6 +10,7 @@ use crate::filesystem::{apply_patch, list_files, read_file, write_file}; use crate::grep; use crate::hackernews; use crate::imap; +use crate::pdftotext; use crate::sqlite_reader; use crate::user_interaction; @@ -86,6 +87,9 @@ impl ToolExecutor { ToolRequest::ImapReader(req) => imap::execute_imap_reader(req) .await .map_err(|e| anyhow::anyhow!(e)), + ToolRequest::PdfToText(req) => pdftotext::execute_pdftotext(req) + .map(ToolResponse::PdfToText) + .map_err(|e| anyhow::anyhow!(e)), } } @@ -106,6 +110,7 @@ impl ToolExecutor { ToolResponse::Sqlite3Reader(response) => serde_json::to_value(response)?, ToolResponse::HackerNewsResponse(response) => serde_json::to_value(response)?, ToolResponse::ImapReader(response) => serde_json::to_value(response)?, + ToolResponse::PdfToText(response) => serde_json::to_value(response)?, ToolResponse::Error(response) => serde_json::to_value(response)?, }; diff --git a/nocodo-tools/src/types/core.rs b/nocodo-tools/src/types/core.rs index 68eca6e8..13c5c966 100644 --- a/nocodo-tools/src/types/core.rs +++ b/nocodo-tools/src/types/core.rs @@ -26,6 +26,8 @@ pub enum ToolRequest { HackerNewsRequest(super::hackernews::HackerNewsRequest), #[serde(rename = "imap_reader")] ImapReader(super::imap::ImapReaderRequest), + #[serde(rename = "pdftotext")] + PdfToText(super::pdftotext::PdfToTextRequest), } /// Tool response enum containing all possible tool results @@ -52,6 +54,8 @@ pub enum ToolResponse { HackerNewsResponse(super::hackernews::HackerNewsResponse), #[serde(rename = "imap_reader")] ImapReader(super::imap::ImapReaderResponse), + #[serde(rename = "pdftotext")] + PdfToText(super::pdftotext::PdfToTextResponse), #[serde(rename = "error")] Error(ToolErrorResponse), } diff --git a/nocodo-tools/src/types/mod.rs b/nocodo-tools/src/types/mod.rs index 861ef0f4..70a14efe 100644 --- a/nocodo-tools/src/types/mod.rs +++ b/nocodo-tools/src/types/mod.rs @@ -4,6 +4,7 @@ pub mod filesystem; pub mod grep; pub mod hackernews; pub mod imap; +pub mod pdftotext; pub mod sqlite_reader; // Re-export commonly used types @@ -17,6 +18,7 @@ pub use filesystem::{ pub use grep::{GrepMatch, GrepRequest, GrepResponse}; pub use hackernews::{DownloadState, FetchMode, HackerNewsRequest, HackerNewsResponse, StoryType}; pub use imap::{ImapOperation, ImapReaderRequest, ImapReaderResponse, SearchCriteria}; +pub use pdftotext::{PdfToTextRequest, PdfToTextResponse}; pub use sqlite_reader::{Sqlite3ReaderRequest, Sqlite3ReaderResponse, SqliteMode}; // Re-export user interaction types from shared-types diff --git a/nocodo-tools/src/types/pdftotext.rs b/nocodo-tools/src/types/pdftotext.rs new file mode 100644 index 00000000..35fe646a --- /dev/null +++ b/nocodo-tools/src/types/pdftotext.rs @@ -0,0 +1,57 @@ +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; + +/// Request to extract text from a PDF file using pdftotext +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +pub struct PdfToTextRequest { + /// Path to the PDF file to extract text from + pub file_path: String, + + /// Optional output file path. If not specified, output is returned in response + #[serde(skip_serializing_if = "Option::is_none")] + pub output_path: Option, + + /// Preserve original physical layout (default: true) + /// Uses pdftotext -layout flag + #[serde(default = "default_true")] + pub preserve_layout: bool, + + /// First page to convert (optional) + #[serde(skip_serializing_if = "Option::is_none")] + pub first_page: Option, + + /// Last page to convert (optional) + #[serde(skip_serializing_if = "Option::is_none")] + pub last_page: Option, + + /// Output text encoding (default: UTF-8) + #[serde(skip_serializing_if = "Option::is_none")] + pub encoding: Option, + + /// Don't insert page breaks between pages + #[serde(default)] + pub no_page_breaks: bool, +} + +fn default_true() -> bool { + true +} + +/// Response from PDF to text extraction +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct PdfToTextResponse { + /// Extracted text content (if output_path was not specified) + pub content: Option, + + /// Path to the output file (if output_path was specified) + pub output_path: Option, + + /// Number of bytes written (if output_path was specified) + pub bytes_written: Option, + + /// Success status + pub success: bool, + + /// Any error or informational message + pub message: String, +} diff --git a/shared-types/src/agent.rs b/shared-types/src/agent.rs index 34870f64..e5cc2b1c 100644 --- a/shared-types/src/agent.rs +++ b/shared-types/src/agent.rs @@ -66,6 +66,13 @@ pub struct ImapAgentConfig { pub password: String, } +/// Configuration for PDF to Text agent +#[derive(Debug, Clone, Serialize, Deserialize, TS)] +#[ts(export)] +pub struct PdfToTextAgentConfig { + pub pdf_path: String, +} + /// Type of setting value #[derive(Debug, Clone, Serialize, Deserialize, TS)] #[ts(export)] @@ -118,6 +125,8 @@ pub enum AgentConfig { SettingsManagement(SettingsManagementAgentConfig), #[serde(rename = "imap")] Imap(ImapAgentConfig), + #[serde(rename = "pdftotext")] + PdfToText(PdfToTextAgentConfig), } /// Generic agent execution request with type-safe config