From 6cd5cbd174335601e4de04110c3ae1cd29f64a94 Mon Sep 17 00:00:00 2001 From: Sumit Datta Date: Sat, 31 Jan 2026 14:48:34 +0530 Subject: [PATCH 1/5] Fix nocodo-api server to use configured host and port Previously, the server was hardcoded to bind to 127.0.0.1:8080, ignoring the configuration file settings. Additionally, config paths were hardcoded for Linux-style directories. Changes: - Use config.server.host and config.server.port for binding - Add dirs crate for cross-platform config/data directory detection - Replace hardcoded ~/.config and ~/.local/share paths with dirs::config_dir() and dirs::data_local_dir() - Generate default database path dynamically in config template The server now correctly loads configuration from: - macOS: ~/Library/Application Support/nocodo/api.toml - Linux: ~/.config/nocodo/api.toml - Windows: %APPDATA%\nocodo\api.toml Co-Authored-By: Claude Sonnet 4.5 --- Cargo.lock | 1 + nocodo-api/Cargo.toml | 1 + nocodo-api/src/config.rs | 18 +++++++++++++----- nocodo-api/src/main.rs | 2 +- 4 files changed, 16 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 57da983b..d539ae4a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3050,6 +3050,7 @@ dependencies = [ "chrono", "clap", "config", + "dirs", "home", "nocodo-agents", "nocodo-llm-sdk", diff --git a/nocodo-api/Cargo.toml b/nocodo-api/Cargo.toml index f0bd83af..e2abcb74 100644 --- a/nocodo-api/Cargo.toml +++ b/nocodo-api/Cargo.toml @@ -21,6 +21,7 @@ tracing-subscriber.workspace = true chrono.workspace = true anyhow.workspace = true home = "0.5" +dirs = "6.0" rusqlite = { version = "0.37", features = ["bundled"] } toml = "0.9" config.workspace = true diff --git a/nocodo-api/src/config.rs b/nocodo-api/src/config.rs index eab887b8..81f98380 100644 --- a/nocodo-api/src/config.rs +++ b/nocodo-api/src/config.rs @@ -75,13 +75,15 @@ impl ApiConfig { // Create default config file if it doesn't exist if !config_path.exists() { - let default_config = r#" + let default_db_path = get_default_db_path(); + let default_config = format!( + r#" [server] host = "127.0.0.1" port = 8080 [database] -path = "~/.local/share/nocodo/api.db" +path = "{}" [cors] allowed_origins = ["http://localhost:3000"] @@ -97,7 +99,9 @@ allowed_origins = ["http://localhost:3000"] # zai_api_key = "your-zai-key" # zai_coding_plan = true # zen_api_key = "your-zen-key" -"#; +"#, + default_db_path.display() + ); std::fs::write(&config_path, default_config).map_err(|e| { ConfigError::Message(format!("Failed to write default config: {e}")) })?; @@ -123,7 +127,9 @@ allowed_origins = ["http://localhost:3000"] } fn get_config_path() -> PathBuf { - if let Some(home) = home::home_dir() { + if let Some(config_dir) = dirs::config_dir() { + config_dir.join("nocodo/api.toml") + } else if let Some(home) = home::home_dir() { home.join(".config/nocodo/api.toml") } else { PathBuf::from("api.toml") @@ -131,7 +137,9 @@ fn get_config_path() -> PathBuf { } fn get_default_db_path() -> PathBuf { - if let Some(home) = home::home_dir() { + if let Some(data_dir) = dirs::data_local_dir() { + data_dir.join("nocodo/api.db") + } else if let Some(home) = home::home_dir() { home.join(".local/share/nocodo/api.db") } else { PathBuf::from("api.db") diff --git a/nocodo-api/src/main.rs b/nocodo-api/src/main.rs index 22e66a77..0818e8a5 100644 --- a/nocodo-api/src/main.rs +++ b/nocodo-api/src/main.rs @@ -64,11 +64,11 @@ async fn main() -> Result<(), anyhow::Error> { .expect("Failed to acquire config read lock"); let llm_client = helpers::llm::create_llm_client(&config).expect("Failed to create LLM client"); let db_path = config.database.path.clone(); + let bind_addr = format!("{}:{}", config.server.host, config.server.port); drop(config); let (db_conn, db) = helpers::database::initialize_database(&db_path).expect("Failed to initialize database"); - let bind_addr = "127.0.0.1:8080"; info!("Starting nocodo-api server at http://{}", bind_addr); let cors_config = app_config From 36bbdbfcccb91c94ae6bf4654a5022f89c8534d6 Mon Sep 17 00:00:00 2001 From: Sumit Datta Date: Sat, 31 Jan 2026 17:09:48 +0530 Subject: [PATCH 2/5] Add PDF to text extraction agent and tools Implement pdftotext agent with qpdf support for extracting text from PDF files. The agent preserves layout by default and supports page range selection. Features: - pdftotext executor with layout preservation - qpdf integration for complex page extraction - Restricted bash access (only pdftotext and qpdf commands) - Comprehensive system prompt with usage examples - Page range selection via -f/-l flags or qpdf - Optional text cleaning and formatting - Installation verification for both tools Co-Authored-By: Claude Sonnet 4.5 --- nocodo-agents/src/lib.rs | 25 + nocodo-agents/src/pdftotext/mod.rs | 569 ++++++++++++++++++ nocodo-agents/src/tools/llm_schemas.rs | 44 ++ .../src/handlers/agent_execution/mod.rs | 1 + .../agent_execution/pdftotext_agent.rs | 97 +++ nocodo-api/src/helpers/agents.rs | 33 + nocodo-api/src/main.rs | 1 + nocodo-tools/src/lib.rs | 1 + nocodo-tools/src/pdftotext/mod.rs | 154 +++++ nocodo-tools/src/tool_executor.rs | 5 + nocodo-tools/src/types/core.rs | 4 + nocodo-tools/src/types/mod.rs | 2 + nocodo-tools/src/types/pdftotext.rs | 57 ++ shared-types/src/agent.rs | 9 + 14 files changed, 1002 insertions(+) create mode 100644 nocodo-agents/src/pdftotext/mod.rs create mode 100644 nocodo-api/src/handlers/agent_execution/pdftotext_agent.rs create mode 100644 nocodo-tools/src/pdftotext/mod.rs create mode 100644 nocodo-tools/src/types/pdftotext.rs diff --git a/nocodo-agents/src/lib.rs b/nocodo-agents/src/lib.rs index 91f24098..724aa2da 100644 --- a/nocodo-agents/src/lib.rs +++ b/nocodo-agents/src/lib.rs @@ -3,6 +3,7 @@ pub mod config; pub mod database; pub mod factory; pub mod imap_email; +pub mod pdftotext; pub mod requirements_gathering; pub mod settings_management; pub mod sqlite_reader; @@ -28,6 +29,7 @@ pub enum AgentTool { AskUser, Sqlite3Reader, ImapReader, + PdfToText, } impl AgentTool { @@ -43,6 +45,7 @@ impl AgentTool { AgentTool::AskUser => "ask_user", AgentTool::Sqlite3Reader => "sqlite3_reader", AgentTool::ImapReader => "imap_reader", + AgentTool::PdfToText => "pdftotext", } } @@ -111,6 +114,11 @@ impl AgentTool { serde_json::from_value(arguments)?; ToolRequest::ImapReader(req) } + "pdftotext" => { + let req: nocodo_tools::types::PdfToTextRequest = + serde_json::from_value(arguments)?; + ToolRequest::PdfToText(req) + } _ => anyhow::bail!("Unknown tool: {}", name), }; @@ -151,6 +159,23 @@ pub fn format_tool_response(response: &nocodo_tools::types::ToolResponse) -> Str ) } } + ToolResponse::PdfToText(r) => { + if r.success { + if let Some(content) = &r.content { + format!("PDF text extraction successful:\n{}", content) + } else if let Some(output_path) = &r.output_path { + format!( + "PDF text extraction successful: {} bytes written to {}", + r.bytes_written.unwrap_or(0), + output_path + ) + } else { + r.message.clone() + } + } else { + format!("PDF text extraction failed: {}", r.message) + } + } ToolResponse::Error(e) => format!("Error: {}", e.message), } } diff --git a/nocodo-agents/src/pdftotext/mod.rs b/nocodo-agents/src/pdftotext/mod.rs new file mode 100644 index 00000000..2e78b730 --- /dev/null +++ b/nocodo-agents/src/pdftotext/mod.rs @@ -0,0 +1,569 @@ +use crate::{database::Database, Agent, AgentTool}; +use anyhow::{self, Context}; +use async_trait::async_trait; +use nocodo_llm_sdk::client::LlmClient; +use nocodo_llm_sdk::tools::{ToolCall, ToolChoice}; +use nocodo_llm_sdk::types::{CompletionRequest, ContentBlock, Message, Role}; +use nocodo_tools::{ + bash::{BashExecutor, BashPermissions}, + ToolExecutor, +}; +use std::path::PathBuf; +use std::sync::Arc; +use std::time::Instant; + +/// Agent specialized in extracting text from PDFs using pdftotext and qpdf +pub struct PdfToTextAgent { + client: Arc, + database: Arc, + tool_executor: Arc, + #[allow(dead_code)] // Stored for reference, used during construction + pdf_path: PathBuf, + #[allow(dead_code)] // Used in system prompt generation during construction + pdf_filename: String, + system_prompt: String, +} + +impl PdfToTextAgent { + /// Create a new PdfToTextAgent + /// + /// # Arguments + /// * `client` - LLM client for AI inference + /// * `database` - Database for session/message tracking + /// * `pdf_path` - Path to the PDF file to process + /// + /// # Security + /// The agent is configured with restricted bash access: + /// - Only the `pdftotext` and `qpdf` commands are allowed + /// - All other bash commands are denied + /// - File operations are restricted to the PDF's directory + /// + /// # Pre-conditions + /// - pdftotext (poppler-utils) must be installed on the system + /// - qpdf must be installed for page extraction operations + /// - Run `pdftotext -v` and `qpdf --version` to verify installation + /// - The PDF file must exist + pub fn new( + client: Arc, + database: Arc, + pdf_path: PathBuf, + ) -> anyhow::Result { + // Validate PDF path exists + if !pdf_path.exists() { + anyhow::bail!("PDF file does not exist: {}", pdf_path.display()); + } + + // Extract filename and directory + let pdf_filename = pdf_path + .file_name() + .ok_or_else(|| anyhow::anyhow!("Invalid PDF path - no filename"))? + .to_string_lossy() + .to_string(); + + let base_path = pdf_path + .parent() + .ok_or_else(|| anyhow::anyhow!("Invalid PDF path - no parent directory"))? + .to_path_buf(); + + // Create restricted bash permissions (only pdftotext and qpdf commands) + let bash_perms = BashPermissions::minimal(vec!["pdftotext", "qpdf"]); + let bash_executor = BashExecutor::new(bash_perms, 120)?; + + // Create tool executor with restricted bash + let tool_executor = Arc::new( + ToolExecutor::builder() + .base_path(base_path) + .bash_executor(Some(Box::new(bash_executor))) + .build(), + ); + + let system_prompt = generate_system_prompt(&pdf_filename); + + Ok(Self { + client, + database, + tool_executor, + pdf_path, + pdf_filename, + system_prompt, + }) + } + + /// Get tool definitions for this agent + fn get_tool_definitions(&self) -> Vec { + self.tools() + .into_iter() + .map(|tool| tool.to_tool_definition()) + .collect() + } + + /// Build messages from session history + fn build_messages(&self, session_id: i64) -> anyhow::Result> { + let db_messages = self.database.get_messages(session_id)?; + + db_messages + .into_iter() + .map(|msg| { + let role = match msg.role.as_str() { + "user" => Role::User, + "assistant" => Role::Assistant, + "system" => Role::System, + "tool" => Role::User, + _ => Role::User, + }; + + Ok(Message { + role, + content: vec![ContentBlock::Text { text: msg.content }], + }) + }) + .collect() + } + + /// Execute a tool call + async fn execute_tool_call( + &self, + session_id: i64, + message_id: Option, + tool_call: &ToolCall, + ) -> anyhow::Result<()> { + // 1. Parse LLM tool call into typed ToolRequest + let tool_request = + AgentTool::parse_tool_call(tool_call.name(), tool_call.arguments().clone())?; + + // 2. Record tool call in database + let call_id = self.database.create_tool_call( + session_id, + message_id, + tool_call.id(), + tool_call.name(), + tool_call.arguments().clone(), + )?; + + // 3. Execute tool + let start = Instant::now(); + let result: anyhow::Result = + self.tool_executor.execute(tool_request).await; + let execution_time = start.elapsed().as_millis() as i64; + + // 4. Update database with result + match result { + Ok(response) => { + let response_json = serde_json::to_value(&response)?; + self.database + .complete_tool_call(call_id, response_json.clone(), execution_time)?; + + let result_text = crate::format_tool_response(&response); + let message_to_llm = format!("Tool {} result:\n{}", tool_call.name(), result_text); + + tracing::debug!( + tool_name = tool_call.name(), + tool_id = tool_call.id(), + execution_time_ms = execution_time, + "Tool execution completed successfully" + ); + + self.database + .create_message(session_id, "tool", &message_to_llm)?; + } + Err(e) => { + let error_msg = format!("{:?}", e); + self.database.fail_tool_call(call_id, &error_msg)?; + + let error_message_to_llm = + format!("Tool {} failed: {}", tool_call.name(), error_msg); + + tracing::debug!( + tool_name = tool_call.name(), + tool_id = tool_call.id(), + error = %error_msg, + "Tool execution failed" + ); + + self.database + .create_message(session_id, "tool", &error_message_to_llm)?; + } + } + + Ok(()) + } +} + +#[async_trait] +impl Agent for PdfToTextAgent { + fn objective(&self) -> &str { + "Extract text from PDF files using pdftotext with layout preservation and optional page selection using qpdf" + } + + fn system_prompt(&self) -> String { + self.system_prompt.clone() + } + + fn pre_conditions(&self) -> Option> { + Some(vec![ + "pdftotext (poppler-utils) must be installed on the system".to_string(), + "qpdf must be installed for page extraction operations".to_string(), + "Run 'pdftotext -v' to verify pdftotext installation".to_string(), + "Run 'qpdf --version' to verify qpdf installation".to_string(), + "Install with: brew install poppler qpdf (macOS) or apt-get install poppler-utils qpdf (Linux)" + .to_string(), + ]) + } + + fn tools(&self) -> Vec { + vec![ + AgentTool::Bash, // Only pdftotext and qpdf commands allowed + AgentTool::ReadFile, // Read extracted text + AgentTool::WriteFile, // Write cleaned results + ] + } + + async fn execute(&self, user_prompt: &str, _session_id: i64) -> anyhow::Result { + // 1. Create session + let session_id = self.database.create_session( + "pdftotext", + self.client.provider_name(), + self.client.model_name(), + Some(&self.system_prompt), + user_prompt, + None, // No config for PdfToTextAgent + )?; + + // 2. Create initial user message + self.database + .create_message(session_id, "user", user_prompt)?; + + // 3. Get tool definitions + let tools = self.get_tool_definitions(); + + // 4. Execution loop (max 30 iterations) + let mut iteration = 0; + let max_iterations = 30; + + loop { + iteration += 1; + if iteration > max_iterations { + let error = "Maximum iteration limit reached"; + self.database.fail_session(session_id, error)?; + return Err(anyhow::anyhow!(error)); + } + + // 5. Build request with conversation history + let messages = self.build_messages(session_id)?; + + let request = CompletionRequest { + messages, + max_tokens: 4000, + model: self.client.model_name().to_string(), + system: Some(self.system_prompt()), + temperature: Some(0.7), + top_p: None, + stop_sequences: None, + tools: Some(tools.clone()), + tool_choice: Some(ToolChoice::Auto), + response_format: None, + }; + + // 6. Call LLM + let response = self.client.complete(request).await?; + + // 7. Extract text and save assistant message + let text = extract_text_from_content(&response.content); + let message_id = self + .database + .create_message(session_id, "assistant", &text)?; + + // 8. Check for tool calls + if let Some(tool_calls) = response.tool_calls { + if tool_calls.is_empty() { + self.database.complete_session(session_id, &text)?; + return Ok(text); + } + + // 9. Execute tools + for tool_call in tool_calls { + self.execute_tool_call(session_id, Some(message_id), &tool_call) + .await?; + } + } else { + self.database.complete_session(session_id, &text)?; + return Ok(text); + } + } + } +} + +/// Generate system prompt for PdfToTextAgent +fn generate_system_prompt(pdf_filename: &str) -> String { + format!( + r#"You are a PDF text extraction specialist. Your task is to extract text from the PDF file "{}" and optionally clean and format the extracted text. + +You have access to these tools: +1. bash - ONLY for running pdftotext and qpdf commands +2. read_file - To read extracted text files +3. write_file - To write cleaned results (optional) + +# PDF File + +The PDF file to process is: {} + +# Available Commands + +## pdftotext - Extract text from PDF + +Basic usage: +pdftotext [options] {} + +Key options: +- -layout : Maintain original physical layout (RECOMMENDED for preserving formatting) +- -f : First page to convert +- -l : Last page to convert +- -nopgbrk : Don't insert page breaks between pages +- -enc : Output text encoding (default: UTF-8) +- -raw : Keep strings in content stream order (alternative to -layout) + +The -layout flag is HIGHLY RECOMMENDED as it preserves the original formatting, tables, and structure. + +Examples: +- Extract all pages with layout: pdftotext -layout {} output.txt +- Extract pages 1-5: pdftotext -layout -f 1 -l 5 {} output.txt +- Extract without page breaks: pdftotext -layout -nopgbrk {} output.txt + +## qpdf - Extract specific pages to a new PDF + +Use qpdf when the user wants to extract specific pages BEFORE text extraction. + +Basic usage: +qpdf {} --pages . -- output.pdf + +Page range syntax: +- Single page: 1 +- Range: 1-5 +- Multiple ranges: 1-3,7-10 +- From end: r1 (last page), r2 (second to last) +- Last page: z + +Examples: +- Extract pages 1-5: qpdf {} --pages . 1-5 -- pages_1-5.pdf +- Extract pages 2,4,6: qpdf {} --pages . 2,4,6 -- selected_pages.pdf +- Extract last 3 pages: qpdf {} --pages . r3-r1 -- last_3_pages.pdf + +# Workflow + +## Simple extraction (most common): +1. Run: pdftotext -layout {} output.txt +2. Read: output.txt +3. Present the extracted text to the user + +## Extract specific pages (if user requests): +Option A: Use pdftotext -f and -l flags directly +1. Run: pdftotext -layout -f 1 -l 5 {} output.txt +2. Read: output.txt +3. Present the extracted text + +Option B: Use qpdf first, then pdftotext +1. Run: qpdf {} --pages . 1-5 -- pages_1-5.pdf +2. Run: pdftotext -layout pages_1-5.pdf output.txt +3. Read: output.txt +4. Present the extracted text + +## Clean and format (if user requests): +1. Extract text using pdftotext +2. Read the output file +3. Analyze and clean the text: + - Fix common extraction errors + - Improve formatting and structure + - Remove artifacts or noise + - Preserve intended structure (tables, paragraphs, lists) +4. Present cleaned text to user +5. Optionally write cleaned result to a file if requested + +# Example Interactions + +User: "Extract text from this PDF" +1. Run: pdftotext -layout {} output.txt +2. Read: output.txt +3. Present the extracted text + +User: "Extract text from pages 1-10" +1. Run: pdftotext -layout -f 1 -l 10 {} output.txt +2. Read: output.txt +3. Present the extracted text + +User: "Extract and clean the text from pages 5-15" +1. Run: pdftotext -layout -f 5 -l 15 {} output.txt +2. Read: output.txt +3. Analyze and clean the text +4. Present cleaned text to user + +User: "Extract page 3 only" +1. Run: pdftotext -layout -f 3 -l 3 {} page_3.txt +2. Read: page_3.txt +3. Present the extracted text + +# Important Notes + +- You can ONLY run pdftotext and qpdf commands (no other bash commands will work) +- The PDF file is: {} +- ALWAYS use -layout flag with pdftotext to preserve formatting (unless user explicitly asks not to) +- pdftotext creates output files automatically (don't need to redirect with >) +- Page numbers start at 1 +- For page extraction, using pdftotext -f/-l is usually simpler than qpdf +- Use qpdf when you need complex page selection (e.g., non-contiguous pages like 1,5,10) +"#, + pdf_filename, + pdf_filename, + pdf_filename, + pdf_filename, + pdf_filename, + pdf_filename, + pdf_filename, + pdf_filename, + pdf_filename, + pdf_filename, + pdf_filename, + pdf_filename, + pdf_filename, + pdf_filename, + pdf_filename, + pdf_filename, + pdf_filename, + pdf_filename + ) +} + +/// Helper function to extract text from content blocks +fn extract_text_from_content(content: &[ContentBlock]) -> String { + content + .iter() + .filter_map(|block| match block { + ContentBlock::Text { text } => Some(text.as_str()), + _ => None, + }) + .collect::>() + .join("\n") +} + +/// Verify that pdftotext is installed and accessible +pub fn verify_pdftotext_installation() -> anyhow::Result { + use std::process::Command; + + let output = Command::new("pdftotext") + .arg("-v") + .output() + .context("Failed to execute 'pdftotext -v'. Is pdftotext (poppler-utils) installed?")?; + + // pdftotext -v outputs to stderr + let version_info = String::from_utf8_lossy(&output.stderr).to_string(); + + if version_info.is_empty() { + anyhow::bail!("pdftotext command did not return version information"); + } + + Ok(version_info) +} + +/// Verify that qpdf is installed and accessible +pub fn verify_qpdf_installation() -> anyhow::Result { + use std::process::Command; + + let output = Command::new("qpdf") + .arg("--version") + .output() + .context("Failed to execute 'qpdf --version'. Is qpdf installed?")?; + + if !output.status.success() { + anyhow::bail!( + "qpdf command failed. stderr: {}", + String::from_utf8_lossy(&output.stderr) + ); + } + + let version_info = String::from_utf8_lossy(&output.stdout).to_string(); + + Ok(version_info) +} + +impl PdfToTextAgent { + /// Verify pre-conditions before creating agent + pub fn verify_preconditions() -> anyhow::Result<()> { + // Check pdftotext + match verify_pdftotext_installation() { + Ok(version) => { + tracing::info!( + "pdftotext found: {}", + version.lines().next().unwrap_or("") + ); + } + Err(e) => { + anyhow::bail!( + "pdftotext is not installed or not accessible.\n\ + Error: {}\n\ + \n\ + Installation instructions:\n\ + - macOS: brew install poppler\n\ + - Ubuntu/Debian: sudo apt-get install poppler-utils\n\ + - Windows: Download from https://blog.alivate.com.au/poppler-windows/\n\ + \n\ + After installation, verify with: pdftotext -v", + e + ) + } + } + + // Check qpdf + match verify_qpdf_installation() { + Ok(version) => { + tracing::info!("qpdf found: {}", version.lines().next().unwrap_or("")); + } + Err(e) => { + anyhow::bail!( + "qpdf is not installed or not accessible.\n\ + Error: {}\n\ + \n\ + Installation instructions:\n\ + - macOS: brew install qpdf\n\ + - Ubuntu/Debian: sudo apt-get install qpdf\n\ + - Windows: Download from https://github.com/qpdf/qpdf/releases\n\ + \n\ + After installation, verify with: qpdf --version", + e + ) + } + } + + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_agent_creation() { + // Note: This test requires setting up mock dependencies + // Skipping for now due to complexity of mocking LlmClient and Database + } + + #[test] + fn test_system_prompt_generation() { + let prompt = generate_system_prompt("test.pdf"); + assert!(prompt.contains("pdftotext")); + assert!(prompt.contains("qpdf")); + assert!(prompt.contains("-layout")); + assert!(prompt.contains("test.pdf")); + } + + #[test] + fn test_verify_pdftotext_installation() { + // This test will pass if pdftotext is installed + let _result = verify_pdftotext_installation(); + } + + #[test] + fn test_verify_qpdf_installation() { + // This test will pass if qpdf is installed + let _result = verify_qpdf_installation(); + } +} diff --git a/nocodo-agents/src/tools/llm_schemas.rs b/nocodo-agents/src/tools/llm_schemas.rs index d67e68e3..f9017930 100644 --- a/nocodo-agents/src/tools/llm_schemas.rs +++ b/nocodo-agents/src/tools/llm_schemas.rs @@ -122,6 +122,49 @@ pub fn create_tool_definitions() -> Vec { imap_schema, ).expect("Failed to create imap_reader tool schema"); + let pdftotext_schema = serde_json::json!({ + "type": "object", + "required": ["file_path"], + "properties": { + "file_path": { + "type": "string", + "description": "Path to the PDF file to extract text from" + }, + "output_path": { + "type": "string", + "description": "Optional output file path. If not specified, text is returned in the response" + }, + "preserve_layout": { + "type": "boolean", + "description": "Preserve original physical layout (default: true). Uses pdftotext -layout flag", + "default": true + }, + "first_page": { + "type": "integer", + "description": "First page to convert (optional, 1-based index)" + }, + "last_page": { + "type": "integer", + "description": "Last page to convert (optional, 1-based index)" + }, + "encoding": { + "type": "string", + "description": "Output text encoding (default: UTF-8)" + }, + "no_page_breaks": { + "type": "boolean", + "description": "Don't insert page breaks between pages (default: false)", + "default": false + } + } + }); + + let pdftotext_tool = Tool::from_json_schema( + "pdftotext".to_string(), + "Extract text from PDF files using pdftotext. Supports layout preservation, page range selection, and various encoding options. Use preserve_layout=true (default) to maintain formatting.".to_string(), + pdftotext_schema, + ).expect("Failed to create pdftotext tool schema"); + vec![ Tool::from_type::() .name("list_files") @@ -155,6 +198,7 @@ pub fn create_tool_definitions() -> Vec { .build(), sqlite_tool, imap_tool, + pdftotext_tool, ] } diff --git a/nocodo-api/src/handlers/agent_execution/mod.rs b/nocodo-api/src/handlers/agent_execution/mod.rs index 6a44b71c..5a6ed8e6 100644 --- a/nocodo-api/src/handlers/agent_execution/mod.rs +++ b/nocodo-api/src/handlers/agent_execution/mod.rs @@ -1,5 +1,6 @@ pub mod codebase_analysis_agent; pub mod imap_email_agent; +pub mod pdftotext_agent; pub mod requirements_gathering_agent; pub mod settings_management_agent; pub mod sqlite_agent; diff --git a/nocodo-api/src/handlers/agent_execution/pdftotext_agent.rs b/nocodo-api/src/handlers/agent_execution/pdftotext_agent.rs new file mode 100644 index 00000000..ebe2ddb1 --- /dev/null +++ b/nocodo-api/src/handlers/agent_execution/pdftotext_agent.rs @@ -0,0 +1,97 @@ +use crate::helpers::agents::create_pdftotext_agent; +use crate::models::ErrorResponse; +use actix_web::{post, web, HttpResponse, Responder}; +use nocodo_agents::Agent; +use serde_json::json; +use shared_types::{AgentConfig, AgentExecutionRequest, AgentExecutionResponse}; +use std::sync::Arc; +use tracing::{error, info}; + +#[post("/agents/pdftotext/execute")] +pub async fn execute_pdftotext_agent( + req: web::Json, + llm_client: web::Data>, + database: web::Data>, +) -> impl Responder { + let pdf_path = match &req.config { + AgentConfig::PdfToText(config) => config.pdf_path.clone(), + _ => { + error!(config_type = ?req.config, "Invalid config type for PdfToText agent"); + return HttpResponse::BadRequest().json(ErrorResponse { + error: "Expected PdfToText agent config".to_string(), + }); + } + }; + + info!( + user_prompt = %req.user_prompt, + pdf_path = %pdf_path, + "Executing PdfToText agent" + ); + + let user_prompt = req.user_prompt.clone(); + let agent_name = "pdftotext".to_string(); + + let config = json!(&req.config); + + let provider = llm_client.provider_name().to_string(); + let model = llm_client.model_name().to_string(); + + let session_id = match database.create_session( + &agent_name, + &provider, + &model, + None, + &user_prompt, + Some(config), + ) { + Ok(id) => id, + Err(e) => { + error!(error = %e, "Failed to create session"); + return HttpResponse::InternalServerError().json(ErrorResponse { + error: format!("Failed to create session: {}", e), + }); + } + }; + + // Return immediately with session_id and spawn background task + let llm_client_clone = llm_client.get_ref().clone(); + let database_clone = database.get_ref().clone(); + let pdf_path_clone = pdf_path.clone(); + let user_prompt_clone = user_prompt.clone(); + + tokio::spawn(async move { + let agent = + match create_pdftotext_agent(&llm_client_clone, &database_clone, &pdf_path_clone).await + { + Ok(agent) => agent, + Err(e) => { + error!(error = %e, session_id = session_id, "Failed to create PdfToText agent"); + let _ = database_clone + .fail_session(session_id, &format!("Failed to create agent: {}", e)); + return; + } + }; + + match agent.execute(&user_prompt_clone, session_id).await { + Ok(result) => { + info!(result = %result, session_id = session_id, "Agent execution completed successfully"); + if let Err(e) = database_clone.complete_session(session_id, &result) { + error!(error = %e, session_id = session_id, "Failed to complete session"); + } + } + Err(e) => { + error!(error = %e, session_id = session_id, "Agent execution failed"); + let _ = + database_clone.fail_session(session_id, &format!("Execution failed: {}", e)); + } + } + }); + + HttpResponse::Ok().json(AgentExecutionResponse { + session_id, + agent_name, + status: "running".to_string(), + result: String::new(), + }) +} diff --git a/nocodo-api/src/helpers/agents.rs b/nocodo-api/src/helpers/agents.rs index 038dd963..7cbb0d40 100644 --- a/nocodo-api/src/helpers/agents.rs +++ b/nocodo-api/src/helpers/agents.rs @@ -59,6 +59,14 @@ pub fn list_supported_agents() -> Vec { .to_string(), enabled: true, }, + AgentInfo { + id: "pdftotext".to_string(), + name: "PDF to Text Agent".to_string(), + description: + "Agent for extracting text from PDF files using pdftotext with layout preservation and page selection capabilities" + .to_string(), + enabled: true, + }, ] } @@ -258,3 +266,28 @@ pub fn create_imap_agent( Ok(agent) } + +/// Creates a PDF to Text agent +/// +/// # Arguments +/// +/// * `llm_client` - The LLM client to use for the agent +/// * `database` - Shared database for session persistence +/// * `pdf_path` - Path to the PDF file to process +/// +/// # Returns +/// +/// A PDF to Text agent instance +pub async fn create_pdftotext_agent( + llm_client: &Arc, + database: &Arc, + pdf_path: &str, +) -> anyhow::Result { + let agent = nocodo_agents::pdftotext::PdfToTextAgent::new( + llm_client.clone(), + database.clone(), + std::path::PathBuf::from(pdf_path), + )?; + + Ok(agent) +} diff --git a/nocodo-api/src/main.rs b/nocodo-api/src/main.rs index 0818e8a5..6106cfca 100644 --- a/nocodo-api/src/main.rs +++ b/nocodo-api/src/main.rs @@ -116,6 +116,7 @@ async fn main() -> Result<(), anyhow::Error> { handlers::agent_execution::workflow_creation_agent::execute_workflow_creation_agent, ) .service(handlers::agent_execution::imap_email_agent::execute_imap_agent) + .service(handlers::agent_execution::pdftotext_agent::execute_pdftotext_agent) .service(handlers::sessions::list_sessions) .service(handlers::sessions::get_session) .service(handlers::sessions::get_pending_questions) diff --git a/nocodo-tools/src/lib.rs b/nocodo-tools/src/lib.rs index 286a7812..628f22b7 100644 --- a/nocodo-tools/src/lib.rs +++ b/nocodo-tools/src/lib.rs @@ -3,6 +3,7 @@ pub mod filesystem; pub mod grep; pub mod hackernews; pub mod imap; +pub mod pdftotext; pub mod sqlite_reader; pub mod tool_error; pub mod tool_executor; diff --git a/nocodo-tools/src/pdftotext/mod.rs b/nocodo-tools/src/pdftotext/mod.rs new file mode 100644 index 00000000..2408c4bc --- /dev/null +++ b/nocodo-tools/src/pdftotext/mod.rs @@ -0,0 +1,154 @@ +use crate::tool_error::ToolError; +use crate::types::{PdfToTextRequest, PdfToTextResponse}; +use std::path::Path; +use std::process::Command; + +/// Execute pdftotext command to extract text from PDF +pub fn execute_pdftotext(request: PdfToTextRequest) -> Result { + // Validate PDF file exists + let pdf_path = Path::new(&request.file_path); + if !pdf_path.exists() { + return Err(ToolError::InvalidInput(format!( + "PDF file does not exist: {}", + request.file_path + ))); + } + + // Build pdftotext command + let mut cmd = Command::new("pdftotext"); + + // Add layout preservation flag (default: true) + if request.preserve_layout { + cmd.arg("-layout"); + } + + // Add page range if specified + if let Some(first_page) = request.first_page { + cmd.arg("-f").arg(first_page.to_string()); + } + if let Some(last_page) = request.last_page { + cmd.arg("-l").arg(last_page.to_string()); + } + + // Add encoding if specified + if let Some(ref encoding) = request.encoding { + cmd.arg("-enc").arg(encoding); + } + + // Add no page breaks flag if specified + if request.no_page_breaks { + cmd.arg("-nopgbrk"); + } + + // Add input file + cmd.arg(&request.file_path); + + // Determine output: file or stdout + let output_to_stdout = request.output_path.is_none(); + if output_to_stdout { + // Output to stdout (use "-" as output file) + cmd.arg("-"); + } else if let Some(ref output_path) = request.output_path { + cmd.arg(output_path); + } + + // Execute command + let output = cmd.output().map_err(|e| { + ToolError::ExecutionError(format!( + "Failed to execute pdftotext command. Is pdftotext installed? Error: {}", + e + )) + })?; + + // Check exit status + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + return Err(ToolError::ExecutionError(format!( + "pdftotext command failed: {}", + stderr + ))); + } + + // Build response + if output_to_stdout { + let content = String::from_utf8_lossy(&output.stdout).to_string(); + Ok(PdfToTextResponse { + content: Some(content.clone()), + output_path: None, + bytes_written: None, + success: true, + message: format!("Successfully extracted {} bytes of text", content.len()), + }) + } else { + let output_path = request.output_path.unwrap(); + let bytes_written = std::fs::metadata(&output_path) + .map(|m| m.len() as usize) + .unwrap_or(0); + Ok(PdfToTextResponse { + content: None, + output_path: Some(output_path.clone()), + bytes_written: Some(bytes_written), + success: true, + message: format!("Successfully wrote {} bytes to {}", bytes_written, output_path), + }) + } +} + +/// Verify that pdftotext is installed +pub fn verify_pdftotext_installation() -> anyhow::Result { + let output = Command::new("pdftotext") + .arg("-v") + .output() + .map_err(|e| anyhow::anyhow!("Failed to execute 'pdftotext -v'. Is pdftotext installed? Error: {}", e))?; + + // pdftotext -v outputs to stderr + let version_info = String::from_utf8_lossy(&output.stderr).to_string(); + + if version_info.is_empty() { + anyhow::bail!("pdftotext command did not return version information"); + } + + Ok(version_info) +} + +/// Verify that qpdf is installed +pub fn verify_qpdf_installation() -> anyhow::Result { + let output = Command::new("qpdf") + .arg("--version") + .output() + .map_err(|e| anyhow::anyhow!("Failed to execute 'qpdf --version'. Is qpdf installed? Error: {}", e))?; + + if !output.status.success() { + anyhow::bail!( + "qpdf command failed. stderr: {}", + String::from_utf8_lossy(&output.stderr) + ); + } + + let version_info = String::from_utf8_lossy(&output.stdout).to_string(); + + Ok(version_info) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_verify_pdftotext_installation() { + // This test will pass if pdftotext is installed + let result = verify_pdftotext_installation(); + if result.is_ok() { + println!("pdftotext version: {}", result.unwrap()); + } + } + + #[test] + fn test_verify_qpdf_installation() { + // This test will pass if qpdf is installed + let result = verify_qpdf_installation(); + if result.is_ok() { + println!("qpdf version: {}", result.unwrap()); + } + } +} diff --git a/nocodo-tools/src/tool_executor.rs b/nocodo-tools/src/tool_executor.rs index 6c097ec1..dfd8a6a8 100644 --- a/nocodo-tools/src/tool_executor.rs +++ b/nocodo-tools/src/tool_executor.rs @@ -10,6 +10,7 @@ use crate::filesystem::{apply_patch, list_files, read_file, write_file}; use crate::grep; use crate::hackernews; use crate::imap; +use crate::pdftotext; use crate::sqlite_reader; use crate::user_interaction; @@ -86,6 +87,9 @@ impl ToolExecutor { ToolRequest::ImapReader(req) => imap::execute_imap_reader(req) .await .map_err(|e| anyhow::anyhow!(e)), + ToolRequest::PdfToText(req) => pdftotext::execute_pdftotext(req) + .map(ToolResponse::PdfToText) + .map_err(|e| anyhow::anyhow!(e)), } } @@ -106,6 +110,7 @@ impl ToolExecutor { ToolResponse::Sqlite3Reader(response) => serde_json::to_value(response)?, ToolResponse::HackerNewsResponse(response) => serde_json::to_value(response)?, ToolResponse::ImapReader(response) => serde_json::to_value(response)?, + ToolResponse::PdfToText(response) => serde_json::to_value(response)?, ToolResponse::Error(response) => serde_json::to_value(response)?, }; diff --git a/nocodo-tools/src/types/core.rs b/nocodo-tools/src/types/core.rs index 68eca6e8..13c5c966 100644 --- a/nocodo-tools/src/types/core.rs +++ b/nocodo-tools/src/types/core.rs @@ -26,6 +26,8 @@ pub enum ToolRequest { HackerNewsRequest(super::hackernews::HackerNewsRequest), #[serde(rename = "imap_reader")] ImapReader(super::imap::ImapReaderRequest), + #[serde(rename = "pdftotext")] + PdfToText(super::pdftotext::PdfToTextRequest), } /// Tool response enum containing all possible tool results @@ -52,6 +54,8 @@ pub enum ToolResponse { HackerNewsResponse(super::hackernews::HackerNewsResponse), #[serde(rename = "imap_reader")] ImapReader(super::imap::ImapReaderResponse), + #[serde(rename = "pdftotext")] + PdfToText(super::pdftotext::PdfToTextResponse), #[serde(rename = "error")] Error(ToolErrorResponse), } diff --git a/nocodo-tools/src/types/mod.rs b/nocodo-tools/src/types/mod.rs index 861ef0f4..70a14efe 100644 --- a/nocodo-tools/src/types/mod.rs +++ b/nocodo-tools/src/types/mod.rs @@ -4,6 +4,7 @@ pub mod filesystem; pub mod grep; pub mod hackernews; pub mod imap; +pub mod pdftotext; pub mod sqlite_reader; // Re-export commonly used types @@ -17,6 +18,7 @@ pub use filesystem::{ pub use grep::{GrepMatch, GrepRequest, GrepResponse}; pub use hackernews::{DownloadState, FetchMode, HackerNewsRequest, HackerNewsResponse, StoryType}; pub use imap::{ImapOperation, ImapReaderRequest, ImapReaderResponse, SearchCriteria}; +pub use pdftotext::{PdfToTextRequest, PdfToTextResponse}; pub use sqlite_reader::{Sqlite3ReaderRequest, Sqlite3ReaderResponse, SqliteMode}; // Re-export user interaction types from shared-types diff --git a/nocodo-tools/src/types/pdftotext.rs b/nocodo-tools/src/types/pdftotext.rs new file mode 100644 index 00000000..35fe646a --- /dev/null +++ b/nocodo-tools/src/types/pdftotext.rs @@ -0,0 +1,57 @@ +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; + +/// Request to extract text from a PDF file using pdftotext +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +pub struct PdfToTextRequest { + /// Path to the PDF file to extract text from + pub file_path: String, + + /// Optional output file path. If not specified, output is returned in response + #[serde(skip_serializing_if = "Option::is_none")] + pub output_path: Option, + + /// Preserve original physical layout (default: true) + /// Uses pdftotext -layout flag + #[serde(default = "default_true")] + pub preserve_layout: bool, + + /// First page to convert (optional) + #[serde(skip_serializing_if = "Option::is_none")] + pub first_page: Option, + + /// Last page to convert (optional) + #[serde(skip_serializing_if = "Option::is_none")] + pub last_page: Option, + + /// Output text encoding (default: UTF-8) + #[serde(skip_serializing_if = "Option::is_none")] + pub encoding: Option, + + /// Don't insert page breaks between pages + #[serde(default)] + pub no_page_breaks: bool, +} + +fn default_true() -> bool { + true +} + +/// Response from PDF to text extraction +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct PdfToTextResponse { + /// Extracted text content (if output_path was not specified) + pub content: Option, + + /// Path to the output file (if output_path was specified) + pub output_path: Option, + + /// Number of bytes written (if output_path was specified) + pub bytes_written: Option, + + /// Success status + pub success: bool, + + /// Any error or informational message + pub message: String, +} diff --git a/shared-types/src/agent.rs b/shared-types/src/agent.rs index 34870f64..e5cc2b1c 100644 --- a/shared-types/src/agent.rs +++ b/shared-types/src/agent.rs @@ -66,6 +66,13 @@ pub struct ImapAgentConfig { pub password: String, } +/// Configuration for PDF to Text agent +#[derive(Debug, Clone, Serialize, Deserialize, TS)] +#[ts(export)] +pub struct PdfToTextAgentConfig { + pub pdf_path: String, +} + /// Type of setting value #[derive(Debug, Clone, Serialize, Deserialize, TS)] #[ts(export)] @@ -118,6 +125,8 @@ pub enum AgentConfig { SettingsManagement(SettingsManagementAgentConfig), #[serde(rename = "imap")] Imap(ImapAgentConfig), + #[serde(rename = "pdftotext")] + PdfToText(PdfToTextAgentConfig), } /// Generic agent execution request with type-safe config From 812f1435e6cd8ba9526507a29383ee47c8806859 Mon Sep 17 00:00:00 2001 From: Sumit Datta Date: Sat, 31 Jan 2026 21:50:15 +0530 Subject: [PATCH 3/5] Minor change in vite config --- gui/vite.config.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gui/vite.config.ts b/gui/vite.config.ts index d3e3a72f..6203cecc 100644 --- a/gui/vite.config.ts +++ b/gui/vite.config.ts @@ -6,7 +6,7 @@ import tailwindcss from '@tailwindcss/vite'; export default defineConfig({ plugins: [devtools(), tailwindcss(), solidPlugin()], server: { - port: 3000, + port: 9010, }, build: { target: 'esnext', From 7d050b9a526ef999c75d2feaf3667231578c19d4 Mon Sep 17 00:00:00 2001 From: Sumit Datta Date: Tue, 3 Feb 2026 09:06:24 +0530 Subject: [PATCH 4/5] Return error instead of using fallback paths for config and DB directories --- nocodo-api/src/config.rs | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/nocodo-api/src/config.rs b/nocodo-api/src/config.rs index 47099d38..ddf5864e 100644 --- a/nocodo-api/src/config.rs +++ b/nocodo-api/src/config.rs @@ -51,7 +51,7 @@ impl Default for ApiConfig { port: 8080, }, database: DatabaseConfig { - path: get_default_db_path(), + path: get_default_db_path().expect("Failed to detect OS data directory"), }, api_keys: None, llm: None, @@ -64,7 +64,7 @@ impl Default for ApiConfig { impl ApiConfig { pub fn load() -> Result<(Self, PathBuf), ConfigError> { - let config_path = get_config_path(); + let config_path = get_config_path()?; // Create config directory if it doesn't exist if let Some(parent) = config_path.parent() { @@ -75,7 +75,7 @@ impl ApiConfig { // Create default config file if it doesn't exist if !config_path.exists() { - let default_db_path = get_default_db_path(); + let default_db_path = get_default_db_path()?; let default_config = format!( r#" [server] @@ -126,18 +126,14 @@ allowed_origins = ["http://localhost:3000"] } } -fn get_config_path() -> PathBuf { - if let Some(config_dir) = dirs::config_dir() { - config_dir.join("nocodo/api.toml") - } else { - PathBuf::from("api.toml") - } +fn get_config_path() -> Result { + dirs::config_dir() + .map(|dir| dir.join("nocodo/api.toml")) + .ok_or_else(|| ConfigError::Message("Failed to detect OS config directory".to_string())) } -fn get_default_db_path() -> PathBuf { - if let Some(data_dir) = dirs::data_local_dir() { - data_dir.join("nocodo/api.db") - } else { - PathBuf::from("api.db") - } +fn get_default_db_path() -> Result { + dirs::data_local_dir() + .map(|dir| dir.join("nocodo/api.db")) + .ok_or_else(|| ConfigError::Message("Failed to detect OS data directory".to_string())) } From 6804258278f1ec886ae1dd5cf6b8f20d01e7aa0a Mon Sep 17 00:00:00 2001 From: Sumit Datta Date: Tue, 3 Feb 2026 09:19:57 +0530 Subject: [PATCH 5/5] Update Cargo.lock for dirs workspace dependency --- Cargo.lock | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d539ae4a..2fbfe22f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -964,7 +964,7 @@ dependencies = [ "codex-utils-tokenizer", "codex-windows-sandbox", "core-foundation 0.9.4", - "dirs", + "dirs 6.0.0", "dunce", "env-flags", "eventsource-stream", @@ -1106,7 +1106,7 @@ dependencies = [ "axum", "codex-keyring-store", "codex-protocol", - "dirs", + "dirs 6.0.0", "futures", "keyring", "mcp-types", @@ -1488,13 +1488,22 @@ dependencies = [ "subtle", ] +[[package]] +name = "dirs" +version = "5.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44c45a9d03d6676652bcb5e724c7e988de1acad23a711b5217ab9cbecbec2225" +dependencies = [ + "dirs-sys 0.4.1", +] + [[package]] name = "dirs" version = "6.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c3e8aa94d75141228480295a7d0e7feb620b1a5ad9f12bc40be62411e38cce4e" dependencies = [ - "dirs-sys", + "dirs-sys 0.5.0", ] [[package]] @@ -1507,6 +1516,18 @@ dependencies = [ "dirs-sys-next", ] +[[package]] +name = "dirs-sys" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "520f05a5cbd335fae5a99ff7a6ab8627577660ee5cfd6a94a6a929b52ff0321c" +dependencies = [ + "libc", + "option-ext", + "redox_users 0.4.6", + "windows-sys 0.48.0", +] + [[package]] name = "dirs-sys" version = "0.5.0" @@ -3050,7 +3071,7 @@ dependencies = [ "chrono", "clap", "config", - "dirs", + "dirs 5.0.1", "home", "nocodo-agents", "nocodo-llm-sdk",