diff --git a/Cargo.lock b/Cargo.lock index ab033bf..fa0deac 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -494,6 +494,35 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "cookie" +version = "0.18.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ddef33a339a91ea89fb53151bd0a4689cfce27055c291dfa69945475d22c747" +dependencies = [ + "percent-encoding", + "time", + "version_check", +] + +[[package]] +name = "cookie_store" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15b2c103cf610ec6cae3da84a766285b42fd16aad564758459e6ecf128c75206" +dependencies = [ + "cookie", + "document-features", + "idna", + "indexmap", + "log", + "serde", + "serde_derive", + "serde_json", + "time", + "url", +] + [[package]] name = "core-foundation" version = "0.9.4" @@ -701,6 +730,15 @@ dependencies = [ "zeroize", ] +[[package]] +name = "deranged" +version = "0.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7cd812cc2bc1d69d4764bd80df88b4317eaef9e773c75226407d9bc0876b211c" +dependencies = [ + "powerfmt", +] + [[package]] name = "derive_builder" version = "0.20.2" @@ -774,6 +812,15 @@ dependencies = [ "syn", ] +[[package]] +name = "document-features" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4b8a88685455ed29a21542a33abd9cb6510b6b129abadabdcef0f4c55bc8f61" +dependencies = [ + "litrs", +] + [[package]] name = "dyn-clone" version = "1.0.20" @@ -1713,6 +1760,12 @@ version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77" +[[package]] +name = "litrs" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11d3d7f243d5c5a8b9bb5d6dd2b1602c0cb0b9db1621bafc7ed66e35ff9fe092" + [[package]] name = "lock_api" version = "0.4.14" @@ -1950,6 +2003,12 @@ dependencies = [ "num-traits", ] +[[package]] +name = "num-conv" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf97ec579c3c42f953ef76dbf8d55ac91fb219dde70e49aa4a6b7d74e9919050" + [[package]] name = "num-derive" version = "0.4.2" @@ -2259,6 +2318,12 @@ dependencies = [ "zerovec", ] +[[package]] +name = "powerfmt" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" + [[package]] name = "ppv-lite86" version = "0.2.21" @@ -2630,6 +2695,7 @@ dependencies = [ "indicatif 0.18.3", "rpg-core", "rpg-encoder", + "rpg-lift", "rpg-nav", "rpg-parser", "serde_json", @@ -2675,6 +2741,23 @@ dependencies = [ "tracing", ] +[[package]] +name = "rpg-lift" +version = "0.6.7" +dependencies = [ + "globset", + "indicatif 0.18.3", + "rpg-core", + "rpg-encoder", + "rpg-parser", + "serde", + "serde_json", + "tempfile", + "thiserror", + "tracing", + "ureq 3.2.0", +] + [[package]] name = "rpg-mcp" version = "0.6.7" @@ -3204,6 +3287,37 @@ dependencies = [ "zune-jpeg 0.4.21", ] +[[package]] +name = "time" +version = "0.3.47" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "743bd48c283afc0388f9b8827b976905fb217ad9e647fae3a379a9283c4def2c" +dependencies = [ + "deranged", + "itoa", + "num-conv", + "powerfmt", + "serde_core", + "time-core", + "time-macros", +] + +[[package]] +name = "time-core" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7694e1cfe791f8d31026952abf09c69ca6f6fa4e1a1229e18988f06a04a12dca" + +[[package]] +name = "time-macros" +version = "0.2.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e70e4c5a0e0a8a4823ad65dfe1a6930e4f4d756dcd9dd7939022b5e8c501215" +dependencies = [ + "num-conv", + "time-core", +] + [[package]] name = "tinystr" version = "0.8.2" @@ -3729,15 +3843,21 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fdc97a28575b85cfedf2a7e7d3cc64b3e11bd8ac766666318003abbacc7a21fc" dependencies = [ "base64 0.22.1", + "cookie_store", "der", + "flate2", "log", "native-tls", "percent-encoding", + "rustls", "rustls-pki-types", + "serde", + "serde_json", "socks", "ureq-proto", "utf-8", "webpki-root-certs", + "webpki-roots 1.0.6", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 29bf15a..c772f27 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,6 +7,7 @@ members = [ "crates/rpg-nav", "crates/rpg-cli", "crates/rpg-mcp", + "crates/rpg-lift", ] [workspace.package] @@ -138,6 +139,9 @@ rayon = "1.11.0" # Compression zstd = "0.13.3" +# HTTP client +ureq = "3" + # Embeddings fastembed = "5.8.1" @@ -156,3 +160,4 @@ rpg-core = { path = "crates/rpg-core" } rpg-parser = { path = "crates/rpg-parser" } rpg-encoder = { path = "crates/rpg-encoder" } rpg-nav = { path = "crates/rpg-nav" } +rpg-lift = { path = "crates/rpg-lift" } diff --git a/crates/rpg-cli/Cargo.toml b/crates/rpg-cli/Cargo.toml index 4eebdad..032ea84 100644 --- a/crates/rpg-cli/Cargo.toml +++ b/crates/rpg-cli/Cargo.toml @@ -11,11 +11,16 @@ description = "CLI tool for building and querying Repository Planning Graphs" name = "rpg-encoder" path = "src/main.rs" +[features] +default = ["lift"] +lift = ["rpg-lift"] + [dependencies] rpg-core.workspace = true rpg-parser.workspace = true rpg-encoder.workspace = true rpg-nav.workspace = true +rpg-lift = { workspace = true, optional = true } clap.workspace = true serde_json.workspace = true anyhow.workspace = true diff --git a/crates/rpg-cli/src/main.rs b/crates/rpg-cli/src/main.rs index ea5312e..39e718d 100644 --- a/crates/rpg-cli/src/main.rs +++ b/crates/rpg-cli/src/main.rs @@ -131,6 +131,34 @@ enum Commands { action: String, }, + /// Autonomous LLM-driven semantic lifting (fire-and-forget) + #[cfg(feature = "lift")] + Lift { + /// LLM provider: "anthropic" or "openai" + #[arg(long, default_value = "anthropic")] + provider: String, + + /// Model override (default: haiku for anthropic, gpt-4o-mini for openai) + #[arg(long)] + model: Option, + + /// API key (or set ANTHROPIC_API_KEY / OPENAI_API_KEY env var) + #[arg(long)] + api_key: Option, + + /// Base URL for OpenAI-compatible endpoints + #[arg(long)] + base_url: Option, + + /// Estimate cost without calling the LLM + #[arg(long)] + dry_run: bool, + + /// Scope: file glob, hierarchy path, or "*" for all unlifted + #[arg(long, default_value = "*")] + scope: String, + }, + /// Start MCP server (use rpg-mcp-server binary instead) #[command(hide = true)] Serve, @@ -193,6 +221,23 @@ fn main() -> Result<()> { } => cmd_reconstruct_plan(&project_root, max_batch_size, &format, include_modules), Commands::Validate => cmd_validate(&project_root), Commands::Hook { action } => cmd_hook(&project_root, &action), + #[cfg(feature = "lift")] + Commands::Lift { + provider, + model, + api_key, + base_url, + dry_run, + scope, + } => cmd_lift( + &project_root, + &provider, + model.as_deref(), + api_key.as_deref(), + base_url.as_deref(), + dry_run, + &scope, + ), Commands::Serve => { eprintln!("MCP server not yet implemented. Use rpg-mcp binary instead."); Ok(()) @@ -1048,6 +1093,103 @@ fn check_hierarchy_orphans( } } +#[cfg(feature = "lift")] +fn cmd_lift( + project_root: &Path, + provider_name: &str, + model: Option<&str>, + api_key: Option<&str>, + base_url: Option<&str>, + dry_run: bool, + scope: &str, +) -> Result<()> { + if !rpg_core::storage::rpg_exists(project_root) { + anyhow::bail!("No RPG found. Run `rpg-encoder build` first."); + } + + // Resolve API key from arg or environment + let api_key = api_key + .map(String::from) + .or_else(|| match provider_name { + "anthropic" => std::env::var("ANTHROPIC_API_KEY").ok(), + "openai" => std::env::var("OPENAI_API_KEY").ok(), + _ => None, + }) + .ok_or_else(|| { + let env_var = match provider_name { + "anthropic" => "ANTHROPIC_API_KEY", + "openai" => "OPENAI_API_KEY", + _ => "API_KEY", + }; + anyhow::anyhow!( + "No API key provided. Use --api-key or set {} env var.", + env_var + ) + })?; + + let provider = rpg_lift::create_provider(provider_name, &api_key, model, base_url) + .map_err(|e| anyhow::anyhow!("{}", e))?; + + let mut graph = rpg_core::storage::load(project_root)?; + + // Dry run: estimate cost and exit + if dry_run { + let estimate = rpg_lift::estimate_cost(&graph, provider.as_ref(), project_root); + eprintln!("\n{}", estimate); + return Ok(()); + } + + eprintln!( + "Starting autonomous lift with {} ({})", + provider_name, + provider.model_name() + ); + + let config = rpg_lift::LiftConfig { + provider: provider.as_ref(), + project_root, + scope, + max_retries: 2, + batch_size: 25, + batch_tokens: 8000, + }; + + let report = + rpg_lift::run_pipeline(&mut graph, &config).map_err(|e| anyhow::anyhow!("{}", e))?; + + // Print report + eprintln!("\nLifting complete!"); + eprintln!(" Auto-lifted: {}", report.entities_auto_lifted); + eprintln!(" LLM-lifted: {}", report.entities_llm_lifted); + if report.entities_failed > 0 { + eprintln!(" Failed: {}", report.entities_failed); + } + eprintln!(" Batches processed: {}", report.batches_processed); + eprintln!(" Files synthesized: {}", report.files_synthesized); + eprintln!( + " Hierarchy: {}", + if report.hierarchy_assigned { + "assigned" + } else { + "not assigned" + } + ); + eprintln!( + " Tokens: {} input, {} output", + report.total_input_tokens, report.total_output_tokens + ); + eprintln!(" Cost: ${:.4}", report.total_cost_usd); + + if !report.errors.is_empty() { + eprintln!("\n Warnings ({}):", report.errors.len()); + for err in &report.errors { + eprintln!(" - {}", err); + } + } + + Ok(()) +} + fn cmd_info(project_root: &Path) -> Result<()> { if !rpg_core::storage::rpg_exists(project_root) { eprintln!("No RPG found. Run `rpg-encoder build` first."); diff --git a/crates/rpg-lift/Cargo.toml b/crates/rpg-lift/Cargo.toml new file mode 100644 index 0000000..6a33259 --- /dev/null +++ b/crates/rpg-lift/Cargo.toml @@ -0,0 +1,31 @@ +[package] +name = "rpg-lift" +version.workspace = true +edition.workspace = true +license.workspace = true +authors.workspace = true +repository.workspace = true +description = "Autonomous LLM-driven semantic lifting for RPG" + +[features] +default = ["anthropic", "openai"] +anthropic = [] +openai = [] + +[dependencies] +rpg-core = { workspace = true } +rpg-encoder = { workspace = true } +rpg-parser = { workspace = true } +ureq = { workspace = true, features = ["json"] } +serde = { workspace = true } +serde_json = { workspace = true } +thiserror = { workspace = true } +tracing = { workspace = true } +indicatif = { workspace = true } +globset = { workspace = true } + +[dev-dependencies] +tempfile = { workspace = true } + +[lints] +workspace = true diff --git a/crates/rpg-lift/src/cost.rs b/crates/rpg-lift/src/cost.rs new file mode 100644 index 0000000..deffa3a --- /dev/null +++ b/crates/rpg-lift/src/cost.rs @@ -0,0 +1,166 @@ +//! Cost estimation for autonomous lifting. + +use crate::provider::LlmProvider; +use rpg_core::graph::RPGraph; + +/// Pre-computed cost estimate for a lifting run. +#[derive(Debug, Clone)] +pub struct CostEstimate { + /// Total entities that need LLM lifting (after auto-lift). + pub entities_to_lift: usize, + /// Entities handled by auto-lift (no LLM cost). + pub entities_auto_lifted: usize, + /// Estimated number of LLM batches. + pub estimated_batches: usize, + /// Estimated input tokens across all phases. + pub estimated_input_tokens: u64, + /// Estimated output tokens across all phases. + pub estimated_output_tokens: u64, + /// Estimated total cost in USD. + pub estimated_cost_usd: f64, + /// Model name. + pub model: String, +} + +impl std::fmt::Display for CostEstimate { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + writeln!(f, "Cost Estimate:")?; + writeln!( + f, + " Auto-lifted (free): {} entities", + self.entities_auto_lifted + )?; + writeln!( + f, + " LLM lifting needed: {} entities ({} batches)", + self.entities_to_lift, self.estimated_batches + )?; + writeln!( + f, + " Estimated tokens: ~{} input, ~{} output", + self.estimated_input_tokens, self.estimated_output_tokens + )?; + writeln!(f, " Model: {}", self.model)?; + write!(f, " Estimated cost: ${:.4}", self.estimated_cost_usd) + } +} + +/// Running cost tracker during lifting. +#[derive(Debug, Default)] +pub struct CostTracker { + pub total_input_tokens: u64, + pub total_output_tokens: u64, + input_rate: f64, + output_rate: f64, +} + +impl CostTracker { + pub fn new(provider: &dyn LlmProvider) -> Self { + Self { + total_input_tokens: 0, + total_output_tokens: 0, + input_rate: provider.cost_per_mtok_input(), + output_rate: provider.cost_per_mtok_output(), + } + } + + /// Record token usage from a response. + pub fn record(&mut self, input_tokens: Option, output_tokens: Option) { + if let Some(t) = input_tokens { + self.total_input_tokens += t; + } + if let Some(t) = output_tokens { + self.total_output_tokens += t; + } + } + + /// Current total cost in USD. + pub fn total_cost_usd(&self) -> f64 { + (self.total_input_tokens as f64 / 1_000_000.0) * self.input_rate + + (self.total_output_tokens as f64 / 1_000_000.0) * self.output_rate + } +} + +/// Estimate lifting cost without making API calls. +/// +/// Scans the graph to count entities needing LLM lifting (excluding auto-liftable), +/// estimates token counts using the 4-chars-per-token heuristic, and computes cost. +pub fn estimate_cost( + graph: &RPGraph, + provider: &dyn LlmProvider, + project_root: &std::path::Path, +) -> CostEstimate { + let scope = rpg_encoder::lift::resolve_scope(graph, "*"); + + // Try auto-lift on raw entities to estimate how many need LLM + let raw_entities = + rpg_encoder::lift::collect_raw_entities(graph, &scope, project_root).unwrap_or_default(); + + let paradigm_defs = rpg_parser::paradigms::defs::load_builtin_defs().unwrap_or_default(); + let active_paradigms: Vec = graph.metadata.paradigms.clone(); + let engine = rpg_encoder::lift::AutoLiftEngine::new(¶digm_defs, &active_paradigms); + + let mut auto_lifted = 0usize; + let mut llm_needed = Vec::new(); + + for raw in &raw_entities { + match engine.try_lift_with_confidence(raw) { + Some((_, rpg_encoder::lift::LiftConfidence::Accept)) => { + auto_lifted += 1; + } + _ => { + llm_needed.push(raw); + } + } + } + + // Estimate tokens for entity lifting batches + let batches = rpg_encoder::lift::build_token_aware_batches(&raw_entities, 25, 8000); + let llm_batches = if raw_entities.is_empty() { + 0 + } else { + // Scale batches by ratio of LLM-needed entities + let ratio = llm_needed.len() as f64 / raw_entities.len() as f64; + #[allow(clippy::cast_sign_loss)] + { + (batches.len() as f64 * ratio).ceil() as usize + } + }; + + // Estimate input tokens: system prompt (~500 tokens) + entity source per batch + let system_tokens = 500u64; + let avg_source_tokens: u64 = if llm_needed.is_empty() { + 0 + } else { + llm_needed + .iter() + .map(|r| (r.source_text.len() as u64) / 4) + .sum::() + / llm_needed.len() as u64 + }; + let tokens_per_batch = system_tokens + avg_source_tokens * 25; + let lift_input_tokens = tokens_per_batch * llm_batches as u64; + + // Output: ~30 tokens per entity (name + features) + let lift_output_tokens = llm_needed.len() as u64 * 30; + + // Synthesis + hierarchy phases: ~20% overhead on top of lifting + let synthesis_tokens = lift_input_tokens / 5; + let synthesis_output = lift_output_tokens / 5; + + let total_input = lift_input_tokens + synthesis_tokens; + let total_output = lift_output_tokens + synthesis_output; + + let cost = (total_input as f64 / 1_000_000.0) * provider.cost_per_mtok_input() + + (total_output as f64 / 1_000_000.0) * provider.cost_per_mtok_output(); + + CostEstimate { + entities_to_lift: llm_needed.len(), + entities_auto_lifted: auto_lifted, + estimated_batches: llm_batches, + estimated_input_tokens: total_input, + estimated_output_tokens: total_output, + estimated_cost_usd: cost, + model: provider.model_name().to_string(), + } +} diff --git a/crates/rpg-lift/src/lib.rs b/crates/rpg-lift/src/lib.rs new file mode 100644 index 0000000..ee139af --- /dev/null +++ b/crates/rpg-lift/src/lib.rs @@ -0,0 +1,21 @@ +//! Autonomous LLM-driven semantic lifting for RPG. +//! +//! This crate provides a fire-and-forget CLI pipeline that calls cheap LLM APIs +//! (Anthropic Haiku, OpenAI GPT-4o-mini) to perform the full semantic lifting +//! workflow without a connected coding agent. +//! +//! # Architecture +//! +//! - **provider**: `LlmProvider` trait with Anthropic and OpenAI implementations +//! - **pipeline**: Orchestrates auto-lift → LLM lifting → synthesis → hierarchy +//! - **cost**: Pre-scan cost estimation and runtime tracking +//! - **progress**: Terminal progress bars via `indicatif` + +pub mod cost; +pub mod pipeline; +pub mod progress; +pub mod provider; + +pub use cost::{CostEstimate, estimate_cost}; +pub use pipeline::{LiftConfig, LiftReport, PipelineError, run_pipeline}; +pub use provider::{LlmProvider, ProviderError, available_providers, create_provider}; diff --git a/crates/rpg-lift/src/pipeline.rs b/crates/rpg-lift/src/pipeline.rs new file mode 100644 index 0000000..3435f0c --- /dev/null +++ b/crates/rpg-lift/src/pipeline.rs @@ -0,0 +1,641 @@ +//! Autonomous lifting pipeline — fire-and-forget semantic analysis. +//! +//! Orchestrates the full lifting flow: auto-lift → LLM entity lifting → finalize → +//! file synthesis → domain discovery → hierarchy construction. Each phase reuses +//! existing rpg-encoder utilities with LLM calls handled via the provider trait. + +use crate::cost::CostTracker; +use crate::progress::LiftProgress; +use crate::provider::{LlmProvider, ProviderError}; +use rpg_core::graph::RPGraph; +use rpg_encoder::lift::{ + AutoLiftEngine, LiftConfidence, build_token_aware_batches, collect_raw_entities, resolve_scope, +}; +use rpg_encoder::semantic_lifting::{ + DOMAIN_DISCOVERY_PROMPT, FILE_SYNTHESIS_SYSTEM, HIERARCHY_CONSTRUCTION_PROMPT, + SEMANTIC_PARSING_SYSTEM, aggregate_module_features, parse_line_features, +}; +use rpg_parser::entities::RawEntity; +use std::collections::HashMap; +use std::path::Path; + +/// Configuration for an autonomous lifting run. +pub struct LiftConfig<'a> { + pub provider: &'a dyn LlmProvider, + pub project_root: &'a Path, + pub scope: &'a str, + pub max_retries: usize, + pub batch_size: usize, + pub batch_tokens: usize, +} + +/// Result of a completed lifting run. +#[derive(Debug)] +pub struct LiftReport { + pub entities_auto_lifted: usize, + pub entities_llm_lifted: usize, + pub entities_failed: usize, + pub batches_processed: usize, + pub files_synthesized: usize, + pub hierarchy_assigned: bool, + pub total_input_tokens: u64, + pub total_output_tokens: u64, + pub total_cost_usd: f64, + pub errors: Vec, +} + +/// Run the full autonomous lifting pipeline. +pub fn run_pipeline( + graph: &mut RPGraph, + config: &LiftConfig<'_>, +) -> Result { + let progress = LiftProgress::new(); + let mut tracker = CostTracker::new(config.provider); + let mut errors: Vec = Vec::new(); + + // Phase 1: Resolve scope and collect raw entities + let scope = resolve_scope(graph, config.scope); + if scope.entity_ids.is_empty() { + progress.finish(); + return Ok(LiftReport { + entities_auto_lifted: 0, + entities_llm_lifted: 0, + entities_failed: 0, + batches_processed: 0, + files_synthesized: 0, + hierarchy_assigned: false, + total_input_tokens: 0, + total_output_tokens: 0, + total_cost_usd: 0.0, + errors: vec!["No entities to lift (scope resolved to empty set)".to_string()], + }); + } + + let raw_entities = collect_raw_entities(graph, &scope, config.project_root) + .map_err(|e| PipelineError::Setup(e.to_string()))?; + + if raw_entities.is_empty() { + progress.finish(); + return Ok(LiftReport { + entities_auto_lifted: 0, + entities_llm_lifted: 0, + entities_failed: 0, + batches_processed: 0, + files_synthesized: 0, + hierarchy_assigned: false, + total_input_tokens: 0, + total_output_tokens: 0, + total_cost_usd: 0.0, + errors: vec!["No source files could be read for scoped entities".to_string()], + }); + } + + eprintln!( + " Found {} entities to process ({} in scope)", + raw_entities.len(), + scope.entity_ids.len() + ); + + // Phase 2: Auto-lift trivial entities + let paradigm_defs = rpg_parser::paradigms::defs::load_builtin_defs().unwrap_or_default(); + let active_paradigms: Vec = graph.metadata.paradigms.clone(); + let engine = AutoLiftEngine::new(¶digm_defs, &active_paradigms); + + progress.start_phase("Auto-lift", raw_entities.len() as u64); + + let mut auto_lifted = 0usize; + let mut needs_llm: Vec<&RawEntity> = Vec::new(); + + for raw in &raw_entities { + match engine.try_lift_with_confidence(raw) { + Some((features, LiftConfidence::Accept)) => { + let entity_id = raw.id(); + if let Some(entity) = graph.entities.get_mut(&entity_id) { + entity.semantic_features = features; + entity.feature_source = Some("auto".to_string()); + } + auto_lifted += 1; + } + Some((features, LiftConfidence::Review)) => { + // Apply auto-lift features but still queue for LLM review + let entity_id = raw.id(); + if let Some(entity) = graph.entities.get_mut(&entity_id) { + entity.semantic_features = features; + entity.feature_source = Some("auto-review".to_string()); + } + auto_lifted += 1; + // Don't add to needs_llm — accept the auto-lift for autonomous mode + } + _ => { + needs_llm.push(raw); + } + } + progress.tick_phase(); + } + + progress.suspend(|| { + eprintln!( + " Auto-lifted {} entities, {} need LLM", + auto_lifted, + needs_llm.len() + ); + }); + + // Phase 3: LLM entity lifting + let mut llm_lifted = 0usize; + let mut llm_failed = 0usize; + let mut batches_done = 0usize; + + if !needs_llm.is_empty() { + // Build owned copies for batching + let llm_raws: Vec = needs_llm.iter().map(|r| (*r).clone()).collect(); + let batches = build_token_aware_batches(&llm_raws, config.batch_size, config.batch_tokens); + + progress.start_phase("LLM Lift", batches.len() as u64); + + let repo_info = + rpg_encoder::lift::generate_repo_info(graph, &project_name(config.project_root)); + + for (batch_idx, &(start, end)) in batches.iter().enumerate() { + let batch = &llm_raws[start..end]; + let user_prompt = format_entity_batch(batch, batch_idx == 0, &repo_info); + + match call_with_retry( + config.provider, + SEMANTIC_PARSING_SYSTEM, + &user_prompt, + config.max_retries, + ) { + Ok(response) => { + tracker.record(response.input_tokens, response.output_tokens); + + let features = parse_line_features(&response.text); + + // Apply features to graph entities + let mut batch_applied = 0; + for raw in batch { + let entity_id = raw.id(); + // Match by entity name (parse_line_features returns name → features) + let name_key = &raw.name; + if let Some(feats) = features.get(name_key) + && let Some(entity) = graph.entities.get_mut(&entity_id) + { + entity.semantic_features = feats.clone(); + entity.feature_source = Some("llm".to_string()); + batch_applied += 1; + } + } + llm_lifted += batch_applied; + llm_failed += batch.len() - batch_applied; + } + Err(e) => { + let msg = format!("Batch {} failed: {}", batch_idx, e); + errors.push(msg.clone()); + progress.suspend(|| eprintln!(" WARNING: {}", msg)); + llm_failed += batch.len(); + } + } + + batches_done += 1; + progress.tick_phase(); + progress.update_cost(tracker.total_cost_usd(), tracker.total_cost_usd() * 1.2); + + // Save after each batch for crash recovery + let config_storage = rpg_core::config::RpgConfig::load(config.project_root) + .unwrap_or_default() + .storage; + graph.refresh_metadata(); + if let Err(e) = + rpg_core::storage::save_with_config(config.project_root, graph, &config_storage) + { + errors.push(format!("Save after batch {} failed: {}", batch_idx, e)); + } + } + } + + // Phase 4: Finalize — aggregate module features + progress.start_phase("Finalize", 1); + let modules_aggregated = aggregate_module_features(graph); + progress.tick_phase(); + + progress.suspend(|| { + eprintln!( + " Aggregated features for {} file modules", + modules_aggregated + ); + }); + + // Phase 5: File synthesis + let files_synthesized = run_file_synthesis(graph, config, &mut tracker, &mut errors, &progress); + + // Phase 6: Domain discovery + hierarchy construction + let hierarchy_assigned = + run_hierarchy_construction(graph, config, &mut tracker, &mut errors, &progress); + + // Final save + graph.refresh_metadata(); + let config_storage = rpg_core::config::RpgConfig::load(config.project_root) + .unwrap_or_default() + .storage; + let _ = rpg_core::storage::save_with_config(config.project_root, graph, &config_storage); + + progress.finish(); + + Ok(LiftReport { + entities_auto_lifted: auto_lifted, + entities_llm_lifted: llm_lifted, + entities_failed: llm_failed, + batches_processed: batches_done, + files_synthesized, + hierarchy_assigned, + total_input_tokens: tracker.total_input_tokens, + total_output_tokens: tracker.total_output_tokens, + total_cost_usd: tracker.total_cost_usd(), + errors, + }) +} + +// --------------------------------------------------------------------------- +// Phase 5: File synthesis +// --------------------------------------------------------------------------- + +fn run_file_synthesis( + graph: &mut RPGraph, + config: &LiftConfig<'_>, + tracker: &mut CostTracker, + errors: &mut Vec, + progress: &LiftProgress, +) -> usize { + // Collect Module entities with aggregated features + let modules: Vec<(String, Vec)> = graph + .entities + .iter() + .filter(|(_, e)| { + e.kind == rpg_core::graph::EntityKind::Module && !e.semantic_features.is_empty() + }) + .map(|(id, e)| (id.clone(), e.semantic_features.clone())) + .collect(); + + if modules.is_empty() { + return 0; + } + + // Batch modules for synthesis (70 per batch) + let batch_size = 70; + let total_batches = modules.len().div_ceil(batch_size); + + progress.start_phase("Synthesis", total_batches as u64); + + let mut synthesized = 0usize; + + for (batch_idx, chunk) in modules.chunks(batch_size).enumerate() { + let user_prompt = format_synthesis_batch(chunk); + + match call_with_retry( + config.provider, + FILE_SYNTHESIS_SYSTEM, + &user_prompt, + config.max_retries, + ) { + Ok(response) => { + tracker.record(response.input_tokens, response.output_tokens); + + // Parse synthesis response — one line per file + for line in response.text.lines() { + let line = line.trim(); + if line.is_empty() || line.starts_with('#') { + continue; + } + + // Format: file_path | features OR just comma-separated features (for single-file batches) + if let Some((file_key, features_str)) = line.split_once('|') { + let file_key = file_key.trim(); + let features: Vec = features_str + .split(',') + .map(|f| f.trim().to_lowercase()) + .filter(|f| !f.is_empty()) + .collect(); + + // Find matching module by file path prefix + if let Some((module_id, _)) = + chunk.iter().find(|(id, _)| id.contains(file_key)) + && let Some(entity) = graph.entities.get_mut(module_id) + && !features.is_empty() + { + entity.semantic_features = features; + synthesized += 1; + } + } + } + } + Err(e) => { + errors.push(format!("Synthesis batch {} failed: {}", batch_idx, e)); + } + } + + progress.tick_phase(); + progress.update_cost(tracker.total_cost_usd(), tracker.total_cost_usd() * 1.1); + } + + progress.suspend(|| { + eprintln!(" Synthesized features for {} file modules", synthesized); + }); + + synthesized +} + +// --------------------------------------------------------------------------- +// Phase 6: Hierarchy construction +// --------------------------------------------------------------------------- + +fn run_hierarchy_construction( + graph: &mut RPGraph, + config: &LiftConfig<'_>, + tracker: &mut CostTracker, + errors: &mut Vec, + progress: &LiftProgress, +) -> bool { + let clusters = rpg_encoder::hierarchy::cluster_files_for_hierarchy(graph, 70); + + if clusters.is_empty() { + return false; + } + + // Step 1: Domain discovery — identify functional areas + progress.start_phase("Discovery", 1); + + let file_features = collect_file_features(graph); + let discovery_prompt = format_discovery_prompt(&file_features); + + let areas = match call_with_retry( + config.provider, + DOMAIN_DISCOVERY_PROMPT, + &discovery_prompt, + config.max_retries, + ) { + Ok(response) => { + tracker.record(response.input_tokens, response.output_tokens); + + // Parse areas: one per line, PascalCase + let areas: Vec = response + .text + .lines() + .map(|l| l.trim().to_string()) + .filter(|l| !l.is_empty() && !l.starts_with('#') && !l.starts_with("```")) + .collect(); + + if areas.is_empty() { + errors.push("Domain discovery returned no areas".to_string()); + return false; + } + + progress.suspend(|| { + eprintln!( + " Discovered {} functional areas: {}", + areas.len(), + areas.join(", ") + ); + }); + + areas + } + Err(e) => { + errors.push(format!("Domain discovery failed: {}", e)); + progress.tick_phase(); + return false; + } + }; + + progress.tick_phase(); + + // Step 2: Hierarchy assignment — assign files to 3-level paths + progress.start_phase("Hierarchy", clusters.len() as u64); + + let mut all_assignments: HashMap = HashMap::new(); + + for (cluster_idx, cluster) in clusters.iter().enumerate() { + let user_prompt = format_hierarchy_prompt(&cluster.files, &areas, &file_features); + + match call_with_retry( + config.provider, + HIERARCHY_CONSTRUCTION_PROMPT, + &user_prompt, + config.max_retries, + ) { + Ok(response) => { + tracker.record(response.input_tokens, response.output_tokens); + + // Parse assignments: file_path | FunctionalArea/category/subcategory + for line in response.text.lines() { + let line = line.trim(); + if line.is_empty() || line.starts_with('#') || line.starts_with("```") { + continue; + } + if let Some((file_path, hierarchy_path)) = line.split_once('|') { + let file_path = file_path.trim().to_string(); + let hierarchy_path = hierarchy_path.trim().to_string(); + if !hierarchy_path.is_empty() + && hierarchy_path.contains('/') + && !file_path.is_empty() + { + all_assignments.insert(file_path, hierarchy_path); + } + } + } + } + Err(e) => { + errors.push(format!("Hierarchy batch {} failed: {}", cluster_idx, e)); + } + } + + progress.tick_phase(); + progress.update_cost(tracker.total_cost_usd(), tracker.total_cost_usd()); + } + + if all_assignments.is_empty() { + errors.push("No hierarchy assignments could be parsed".to_string()); + return false; + } + + // Apply hierarchy + rpg_encoder::hierarchy::apply_hierarchy(graph, &all_assignments); + + // Rebuild graph hierarchy metadata + graph.metadata.semantic_hierarchy = true; + graph.assign_hierarchy_ids(); + graph.aggregate_hierarchy_features(); + graph.materialize_containment_edges(); + + progress.suspend(|| { + eprintln!( + " Applied {} hierarchy assignments across {} areas", + all_assignments.len(), + areas.len() + ); + }); + + true +} + +// --------------------------------------------------------------------------- +// Formatting helpers +// --------------------------------------------------------------------------- + +/// Format a batch of raw entities for the LLM entity-lifting prompt. +fn format_entity_batch(batch: &[RawEntity], is_first: bool, repo_info: &str) -> String { + let mut prompt = String::new(); + + if is_first { + prompt.push_str(repo_info); + prompt.push_str("\n\n"); + } + + for raw in batch { + let kind = format!("{:?}", raw.kind); + prompt.push_str(&format!("### {} ({})\n", raw.id(), kind)); + if let Some(parent) = &raw.parent_class { + prompt.push_str(&format!("Parent: {}\n", parent)); + } + + // Truncate source to 40 lines + let lines: Vec<&str> = raw.source_text.lines().collect(); + let truncated = lines.len() > 40; + let source = if truncated { + let mut s: String = lines[..40].join("\n"); + s.push_str("\n// ... truncated ..."); + s + } else { + raw.source_text.clone() + }; + + prompt.push_str("```\n"); + prompt.push_str(&source); + prompt.push_str("\n```\n\n"); + } + + prompt +} + +/// Format file modules for synthesis. +fn format_synthesis_batch(modules: &[(String, Vec)]) -> String { + let mut prompt = String::new(); + for (module_id, features) in modules { + // Extract file path from module ID (format: "path/to/file.rs:module") + let file_path = module_id.split(':').next().unwrap_or(module_id); + prompt.push_str(&format!( + "### {}\nEntity features: {}\n\n", + file_path, + features.join(", ") + )); + } + prompt +} + +/// Collect file-level features for domain discovery. +fn collect_file_features(graph: &RPGraph) -> HashMap> { + graph + .entities + .iter() + .filter(|(_, e)| { + e.kind == rpg_core::graph::EntityKind::Module && !e.semantic_features.is_empty() + }) + .map(|(_, e)| { + let path = rpg_core::graph::normalize_path(&e.file); + (path, e.semantic_features.clone()) + }) + .collect() +} + +/// Format the domain discovery prompt with file features. +fn format_discovery_prompt(file_features: &HashMap>) -> String { + let mut prompt = String::from( + "Analyze this repository and identify its main functional areas.\n\nFile features:\n", + ); + + // Sort for deterministic output + let mut files: Vec<(&String, &Vec)> = file_features.iter().collect(); + files.sort_by_key(|(path, _)| *path); + + for (path, features) in &files { + prompt.push_str(&format!(" {} — {}\n", path, features.join(", "))); + } + + prompt +} + +/// Format hierarchy assignment prompt for a file cluster. +fn format_hierarchy_prompt( + files: &[String], + areas: &[String], + file_features: &HashMap>, +) -> String { + let mut prompt = String::from("Assign each file to a 3-level hierarchy path.\n\n"); + + prompt.push_str("Functional areas:\n"); + for area in areas { + prompt.push_str(&format!(" {}\n", area)); + } + + prompt.push_str("\nFiles to assign:\n"); + for file in files { + let features = file_features + .get(file) + .map(|f| f.join(", ")) + .unwrap_or_default(); + prompt.push_str(&format!(" {} — {}\n", file, features)); + } + + prompt +} + +// --------------------------------------------------------------------------- +// Retry wrapper +// --------------------------------------------------------------------------- + +/// Call the LLM with retry logic. +fn call_with_retry( + provider: &dyn LlmProvider, + system: &str, + user: &str, + max_retries: usize, +) -> Result { + let mut last_err = None; + + for attempt in 0..=max_retries { + match provider.complete(system, user) { + Ok(response) => return Ok(response), + Err(e) => { + tracing::warn!("LLM call attempt {} failed: {}", attempt + 1, e); + last_err = Some(e); + if attempt < max_retries { + // Brief pause before retry + std::thread::sleep(std::time::Duration::from_secs(2u64.pow(attempt as u32))); + } + } + } + } + + Err(last_err.unwrap()) +} + +/// Extract a project name from the root path. +fn project_name(project_root: &Path) -> String { + project_root + .file_name() + .and_then(|n| n.to_str()) + .unwrap_or("unknown") + .to_string() +} + +// --------------------------------------------------------------------------- +// Pipeline errors +// --------------------------------------------------------------------------- + +/// Errors from the autonomous lifting pipeline. +#[derive(Debug, thiserror::Error)] +pub enum PipelineError { + #[error("setup error: {0}")] + Setup(String), + #[error("provider error: {0}")] + Provider(#[from] ProviderError), +} diff --git a/crates/rpg-lift/src/progress.rs b/crates/rpg-lift/src/progress.rs new file mode 100644 index 0000000..03b618d --- /dev/null +++ b/crates/rpg-lift/src/progress.rs @@ -0,0 +1,87 @@ +//! Terminal progress display for autonomous lifting. + +use indicatif::{MultiProgress, ProgressBar, ProgressStyle}; +use std::sync::Arc; + +/// Manages progress bars for the lifting pipeline. +pub struct LiftProgress { + multi: Arc, + phase_bar: ProgressBar, + cost_bar: ProgressBar, +} + +impl Default for LiftProgress { + fn default() -> Self { + Self::new() + } +} + +impl LiftProgress { + pub fn new() -> Self { + let multi = Arc::new(MultiProgress::new()); + + let phase_bar = multi.add(ProgressBar::new(0)); + phase_bar.set_style( + ProgressStyle::default_bar() + .template(" {prefix:.bold} [{bar:30.cyan/blue}] {pos}/{len} {msg}") + .expect("valid template") + .progress_chars("##-"), + ); + + let cost_bar = multi.add(ProgressBar::new_spinner()); + cost_bar.set_style( + ProgressStyle::default_spinner() + .template(" {spinner:.green} {msg}") + .expect("valid template"), + ); + + Self { + multi, + phase_bar, + cost_bar, + } + } + + /// Start a new phase with a given name and total count. + pub fn start_phase(&self, name: &str, total: u64) { + self.phase_bar.set_prefix(name.to_string()); + self.phase_bar.set_length(total); + self.phase_bar.set_position(0); + self.phase_bar.set_message(""); + } + + /// Increment the phase progress by 1. + pub fn tick_phase(&self) { + self.phase_bar.inc(1); + } + + /// Increment the phase progress by `n`. + pub fn tick_phase_by(&self, n: u64) { + self.phase_bar.inc(n); + } + + /// Set a message on the phase bar. + pub fn set_phase_message(&self, msg: &str) { + self.phase_bar.set_message(msg.to_string()); + } + + /// Update the cost display. + pub fn update_cost(&self, spent: f64, estimated_total: f64) { + self.cost_bar.set_message(format!( + "${:.4} spent (est. ${:.4} total)", + spent, estimated_total + )); + self.cost_bar.tick(); + } + + /// Finish all bars. + pub fn finish(&self) { + self.phase_bar.finish_and_clear(); + self.cost_bar.finish_and_clear(); + } + + /// Suspend progress bars for clean eprintln output, then resume. + pub fn suspend(&self, f: F) { + self.multi.suspend(f); + } +} diff --git a/crates/rpg-lift/src/provider.rs b/crates/rpg-lift/src/provider.rs new file mode 100644 index 0000000..59ded06 --- /dev/null +++ b/crates/rpg-lift/src/provider.rs @@ -0,0 +1,335 @@ +//! LLM provider abstraction for autonomous lifting. +//! +//! Supports Anthropic (Claude Haiku) and OpenAI-compatible (GPT-4o-mini) APIs. +//! Uses blocking HTTP via `ureq` — the CLI has no async runtime. + +use serde_json::Value; + +/// Errors from LLM provider calls. +#[derive(Debug, thiserror::Error)] +pub enum ProviderError { + #[error("HTTP request failed: {0}")] + Http(String), + #[error("API error ({status}): {message}")] + Api { status: u16, message: String }, + #[error("response parse error: {0}")] + Parse(String), + #[error("empty response from LLM")] + EmptyResponse, +} + +/// A completed LLM response. +pub struct LlmResponse { + /// The text content of the response. + pub text: String, + /// Input tokens used (from API response, if reported). + pub input_tokens: Option, + /// Output tokens used (from API response, if reported). + pub output_tokens: Option, +} + +/// Abstraction over LLM API providers. +pub trait LlmProvider: Send { + /// Send a completion request with system and user messages. + fn complete(&self, system: &str, user: &str) -> Result; + + /// The model name (for display/logging). + fn model_name(&self) -> &str; + + /// Cost per million input tokens (USD). + fn cost_per_mtok_input(&self) -> f64; + + /// Cost per million output tokens (USD). + fn cost_per_mtok_output(&self) -> f64; +} + +// --------------------------------------------------------------------------- +// Anthropic Messages API +// --------------------------------------------------------------------------- + +/// Anthropic provider using the Messages API. +#[cfg(feature = "anthropic")] +pub struct AnthropicProvider { + api_key: String, + model: String, + agent: ureq::Agent, +} + +#[cfg(feature = "anthropic")] +impl AnthropicProvider { + /// Default model: Claude Haiku 4.5 — fast and cheap. + pub const DEFAULT_MODEL: &str = "claude-haiku-4-5-20251001"; + const API_URL: &str = "https://api.anthropic.com/v1/messages"; + + pub fn new(api_key: String, model: Option) -> Self { + Self { + api_key, + model: model.unwrap_or_else(|| Self::DEFAULT_MODEL.to_string()), + agent: ureq::Agent::new_with_config( + ureq::config::Config::builder() + .timeout_global(Some(std::time::Duration::from_secs(120))) + .build(), + ), + } + } +} + +#[cfg(feature = "anthropic")] +impl LlmProvider for AnthropicProvider { + fn complete(&self, system: &str, user: &str) -> Result { + let body = serde_json::json!({ + "model": self.model, + "max_tokens": 4096, + "system": system, + "messages": [ + {"role": "user", "content": user} + ] + }); + + let mut response = self + .agent + .post(Self::API_URL) + .header("x-api-key", &self.api_key) + .header("anthropic-version", "2023-06-01") + .header("content-type", "application/json") + .send_json(&body) + .map_err(|e| ProviderError::Http(e.to_string()))?; + + let json: Value = response + .body_mut() + .read_json() + .map_err(|e| ProviderError::Parse(e.to_string()))?; + + // Check for API error + if let Some(err) = json.get("error") { + return Err(ProviderError::Api { + status: 400, + message: err + .get("message") + .and_then(|m| m.as_str()) + .unwrap_or("unknown error") + .to_string(), + }); + } + + // Extract text from content blocks + let text = json + .get("content") + .and_then(|c| c.as_array()) + .and_then(|arr| { + arr.iter() + .find_map(|block| block.get("text").and_then(|t| t.as_str())) + }) + .ok_or(ProviderError::EmptyResponse)? + .to_string(); + + // Extract usage + let input_tokens = json + .get("usage") + .and_then(|u| u.get("input_tokens")) + .and_then(|t| t.as_u64()); + let output_tokens = json + .get("usage") + .and_then(|u| u.get("output_tokens")) + .and_then(|t| t.as_u64()); + + Ok(LlmResponse { + text, + input_tokens, + output_tokens, + }) + } + + fn model_name(&self) -> &str { + &self.model + } + + fn cost_per_mtok_input(&self) -> f64 { + // Haiku 4.5: $0.80/MTok input + if self.model.contains("haiku") { + 0.80 + } else if self.model.contains("sonnet") { + 3.00 + } else { + 1.00 // conservative default + } + } + + fn cost_per_mtok_output(&self) -> f64 { + // Haiku 4.5: $4.00/MTok output + if self.model.contains("haiku") { + 4.00 + } else if self.model.contains("sonnet") { + 15.00 + } else { + 5.00 + } + } +} + +// --------------------------------------------------------------------------- +// OpenAI Chat Completions API +// --------------------------------------------------------------------------- + +/// OpenAI-compatible provider (works with OpenAI, Azure, local proxies). +#[cfg(feature = "openai")] +pub struct OpenAiProvider { + api_key: String, + model: String, + base_url: String, + agent: ureq::Agent, +} + +#[cfg(feature = "openai")] +impl OpenAiProvider { + /// Default model: GPT-4o-mini — fast and cheap. + pub const DEFAULT_MODEL: &str = "gpt-4o-mini"; + const DEFAULT_BASE_URL: &str = "https://api.openai.com"; + + pub fn new(api_key: String, model: Option, base_url: Option) -> Self { + Self { + api_key, + model: model.unwrap_or_else(|| Self::DEFAULT_MODEL.to_string()), + base_url: base_url.unwrap_or_else(|| Self::DEFAULT_BASE_URL.to_string()), + agent: ureq::Agent::new_with_config( + ureq::config::Config::builder() + .timeout_global(Some(std::time::Duration::from_secs(120))) + .build(), + ), + } + } +} + +#[cfg(feature = "openai")] +impl LlmProvider for OpenAiProvider { + fn complete(&self, system: &str, user: &str) -> Result { + let url = format!( + "{}/v1/chat/completions", + self.base_url.trim_end_matches('/') + ); + + let body = serde_json::json!({ + "model": self.model, + "max_tokens": 4096, + "messages": [ + {"role": "system", "content": system}, + {"role": "user", "content": user} + ] + }); + + let mut response = self + .agent + .post(&url) + .header("Authorization", &format!("Bearer {}", self.api_key)) + .header("content-type", "application/json") + .send_json(&body) + .map_err(|e| ProviderError::Http(e.to_string()))?; + + let json: Value = response + .body_mut() + .read_json() + .map_err(|e| ProviderError::Parse(e.to_string()))?; + + // Check for API error + if let Some(err) = json.get("error") { + return Err(ProviderError::Api { + status: 400, + message: err + .get("message") + .and_then(|m| m.as_str()) + .unwrap_or("unknown error") + .to_string(), + }); + } + + // Extract text from choices + let text = json + .get("choices") + .and_then(|c| c.as_array()) + .and_then(|arr| arr.first()) + .and_then(|choice| choice.get("message")) + .and_then(|msg| msg.get("content")) + .and_then(|c| c.as_str()) + .ok_or(ProviderError::EmptyResponse)? + .to_string(); + + // Extract usage + let input_tokens = json + .get("usage") + .and_then(|u| u.get("prompt_tokens")) + .and_then(|t| t.as_u64()); + let output_tokens = json + .get("usage") + .and_then(|u| u.get("completion_tokens")) + .and_then(|t| t.as_u64()); + + Ok(LlmResponse { + text, + input_tokens, + output_tokens, + }) + } + + fn model_name(&self) -> &str { + &self.model + } + + fn cost_per_mtok_input(&self) -> f64 { + // GPT-4o-mini: $0.15/MTok input + if self.model.contains("4o-mini") { + 0.15 + } else if self.model.contains("4o") { + 2.50 + } else { + 0.50 + } + } + + fn cost_per_mtok_output(&self) -> f64 { + // GPT-4o-mini: $0.60/MTok output + if self.model.contains("4o-mini") { + 0.60 + } else if self.model.contains("4o") { + 10.00 + } else { + 1.50 + } + } +} + +/// Create a provider from CLI arguments. +pub fn create_provider( + provider_name: &str, + api_key: &str, + model: Option<&str>, + base_url: Option<&str>, +) -> Result, ProviderError> { + match provider_name { + #[cfg(feature = "anthropic")] + "anthropic" => Ok(Box::new(AnthropicProvider::new( + api_key.to_string(), + model.map(String::from), + ))), + #[cfg(feature = "openai")] + "openai" => Ok(Box::new(OpenAiProvider::new( + api_key.to_string(), + model.map(String::from), + base_url.map(String::from), + ))), + other => Err(ProviderError::Http(format!( + "unknown provider: '{}'. Available: {}", + other, + available_providers().join(", ") + ))), + } +} + +/// List compiled-in provider names. +pub fn available_providers() -> Vec<&'static str> { + vec![ + #[cfg(feature = "anthropic")] + "anthropic", + #[cfg(feature = "openai")] + "openai", + ] +}